{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "77365834-52b5-4443-8cc8-4ffdf0a847ba", "metadata": {}, "outputs": [], "source": [ "import csv" ] }, { "cell_type": "code", "execution_count": 1, "id": "d1c44d5e-ed2f-4122-b2ac-fcb9dca2358f", "metadata": {}, "outputs": [], "source": [ "single_syllable_nouns = [\n", " \"art\", \"ash\", \"axe\", \"bag\", \"ball\", \"bar\", \"bat\", \"bay\", \"bed\", \"bee\",\n", " \"bell\", \"belt\", \"bench\", \"bird\", \"boat\", \"book\", \"boot\", \"bow\", \"box\", \"boy\",\n", " \"branch\", \"bread\", \"bridge\", \"brush\", \"bucket\", \"bus\", \"bush\", \"cake\", \"can\",\n", " \"cap\", \"car\", \"card\", \"cart\", \"cat\", \"chain\", \"chair\", \"chalk\", \"cheese\", \"chest\",\n", " \"chicken\", \"child\", \"church\", \"city\", \"class\", \"clock\", \"cloud\", \"coat\", \"code\",\n", " \"coin\", \"couch\", \"court\", \"cow\", \"crab\", \"cream\", \"crow\", \"cup\", \"curtain\", \"dad\",\n", " \"day\", \"deck\", \"desk\", \"dog\", \"door\", \"dress\", \"drink\", \"drop\", \"duck\", \"dust\",\n", " \"ear\", \"earth\", \"egg\", \"eye\", \"face\", \"fact\", \"farm\", \"field\", \"file\", \"film\",\n", " \"fire\", \"fish\", \"flag\", \"floor\", \"flower\", \"fly\", \"fog\", \"food\", \"foot\", \"fork\",\n", " \"fox\", \"friend\", \"frog\", \"fruit\", \"game\", \"gate\", \"girl\", \"glass\", \"glove\", \"goat\",\n", " \"god\", \"gold\", \"grass\", \"grave\", \"green\", \"ground\", \"group\", \"gum\", \"gun\", \"hair\",\n", " \"hand\", \"hat\", \"head\", \"heart\", \"heat\", \"hill\", \"hole\", \"home\", \"horse\", \"house\",\n", " \"ice\", \"ink\", \"jacket\", \"jam\", \"jar\", \"job\", \"key\", \"king\", \"kiss\", \"kite\",\n", " \"knife\", \"lady\", \"lake\", \"lamp\", \"land\", \"law\", \"leaf\", \"leg\", \"letter\", \"light\",\n", " \"line\", \"lion\", \"list\", \"lock\", \"log\", \"love\", \"lunch\", \"man\", \"map\", \"mask\",\n", " \"meal\", \"meat\", \"men\", \"milk\", \"mind\", \"mine\", \"moon\", \"morning\", \"mother\", \"mouse\",\n", " \"mouth\", \"name\", \"neck\", \"night\", \"noise\", \"nose\", \"note\", \"ocean\", \"office\", \"oil\",\n", " \"orange\", \"page\", \"pain\", \"paint\", \"pan\", \"paper\", \"park\", \"part\", \"party\", \"path\",\n", " \"peace\", \"pear\", \"pen\", \"pencil\", \"people\", \"phone\", \"photo\", \"pie\", \"pig\", \"pin\",\n", " \"pipe\", \"place\", \"plane\", \"plant\", \"plate\", \"play\", \"point\", \"pole\", \"pool\", \"port\",\n", " \"post\", \"pot\", \"price\", \"prince\", \"queen\", \"race\", \"rain\", \"rat\", \"ring\", \"river\",\n", " \"road\", \"rock\", \"room\", \"root\", \"rose\", \"rule\", \"run\", \"sail\", \"salt\", \"sand\",\n", " \"school\", \"sea\", \"seat\", \"seed\", \"shade\", \"shape\", \"sheep\", \"shelf\", \"ship\", \"shirt\",\n", " \"shoe\", \"shop\", \"shot\", \"side\", \"sign\", \"silk\", \"sister\", \"size\", \"sky\", \"sleep\",\n", " \"smile\", \"smoke\", \"snake\", \"snow\", \"sock\", \"son\", \"song\", \"sound\", \"soup\", \"space\",\n", " \"speech\", \"spoon\", \"sport\", \"spring\", \"square\", \"star\", \"state\", \"steam\", \"steel\",\n", " \"step\", \"stick\", \"stone\", \"stop\", \"store\", \"storm\", \"street\", \"string\", \"student\", \"sun\",\n", " \"table\", \"tail\", \"tea\", \"teacher\", \"team\", \"test\", \"text\", \"thread\", \"throne\", \"time\",\n", " \"toe\", \"town\", \"toy\", \"train\", \"tree\", \"trip\", \"truck\", \"truth\", \"tube\", \"turn\",\n", " \"wall\", \"war\", \"watch\", \"water\", \"wave\", \"way\", \"week\", \"weight\", \"well\", \"wheel\",\n", " \"wind\", \"window\", \"wine\", \"wing\", \"winter\", \"wire\", \"wish\", \"woman\", \"wood\", \"word\",\n", " \"work\", \"world\", \"year\", \"youth\"\n", "]" ] }, { "cell_type": "code", "execution_count": null, "id": "c7b08367-f320-459e-9dda-6392e533e979", "metadata": {}, "outputs": [], "source": [ "with open('nouns.csv', 'w', newline='') as csvfile:\n", " writer = csv.writer(csvfile)\n", " writer.writerows(single_syllable_nouns)" ] }, { "cell_type": "code", "execution_count": 2, "id": "1dcce87e-6b3f-4e45-b28c-499bbe1d33c9", "metadata": {}, "outputs": [], "source": [ "import nltk\n", "from nltk.corpus import cmudict" ] }, { "cell_type": "code", "execution_count": 3, "id": "86f80604-fb54-46bd-ab2a-5331ec7e5411", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[nltk_data] Downloading package cmudict to /home/changcl/nltk_data...\n", "[nltk_data] Package cmudict is already up-to-date!\n" ] } ], "source": [ "# Download the CMU Pronouncing Dictionary\n", "nltk.download('cmudict')\n", "\n", "d = cmudict.dict()\n", "\n", "def count_syllables(word):\n", " try:\n", " return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]][0]\n", " except KeyError:\n", " return 0" ] }, { "cell_type": "code", "execution_count": null, "id": "ff7f7385-d2b4-439b-9079-6de0775b9435", "metadata": {}, "outputs": [], "source": [ "# Test a few words\n", "test_words = [\"computer\", \"cat\", \"elephant\", \"dog\", \"important\"]\n", "for word in test_words:\n", " print(f\"'{word}': {count_syllables(word)} syllables\")" ] }, { "cell_type": "code", "execution_count": 6, "id": "51222d43-baaa-48ed-8b9f-58fc22bbe769", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "'bucket': 2 syllables\n", "'chicken': 2 syllables\n", "'city': 2 syllables\n", "'curtain': 2 syllables\n", "'fire': 2 syllables\n", "'flower': 2 syllables\n", "'jacket': 2 syllables\n", "'lady': 2 syllables\n", "'letter': 2 syllables\n", "'lion': 2 syllables\n", "'morning': 2 syllables\n", "'mother': 2 syllables\n", "'ocean': 2 syllables\n", "'office': 2 syllables\n", "'orange': 2 syllables\n", "'paper': 2 syllables\n", "'party': 2 syllables\n", "'pencil': 2 syllables\n", "'people': 2 syllables\n", "'photo': 2 syllables\n", "'river': 2 syllables\n", "'sister': 2 syllables\n", "'student': 2 syllables\n", "'table': 2 syllables\n", "'teacher': 2 syllables\n", "'water': 2 syllables\n", "'window': 2 syllables\n", "'winter': 2 syllables\n", "'wire': 2 syllables\n", "'woman': 2 syllables\n" ] } ], "source": [ "# Test a few words\n", "# test_words = [\"computer\", \"cat\", \"elephant\", \"dog\", \"important\"]\n", "for word in single_syllable_nouns:\n", " count = count_syllables(word)\n", " if count > 1:\n", " print(f\"'{word}': {count} syllables\")" ] }, { "cell_type": "code", "execution_count": 7, "id": "218a5ad4-33b7-4e73-af1b-ba8c6303f012", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['bucket', 'chicken', 'city', 'curtain', 'fire', 'flower', 'jacket', 'lady', 'letter', 'lion', 'morning', 'mother', 'ocean', 'office', 'orange', 'paper', 'party', 'pencil', 'people', 'photo', 'river', 'sister', 'student', 'table', 'teacher', 'water', 'window', 'winter', 'wire', 'woman']\n" ] } ], "source": [ "not_single_syllable = []\n", "for word in single_syllable_nouns:\n", " count = count_syllables(word)\n", " if count > 1:\n", " not_single_syllable.append(word)\n", "print(not_single_syllable)" ] }, { "cell_type": "markdown", "id": "629d364b-9120-4615-8e04-8704a9ccddf6", "metadata": {}, "source": [ "```\n", "list_1 = ['apple', 'banana', 'orange', 'grape', 'kiwi']\n", "list_2 = ['banana', 'kiwi']\n", "\n", "# Remove items from list_1 that are in list_2\n", "list_1 = [item for item in list_1 if item not in list_2]\n", "\n", "print(list_1) # Output: ['apple', 'orange', 'grape']\n", "```" ] }, { "cell_type": "code", "execution_count": 9, "id": "5c2a24e2-f027-40c9-aca2-ddb8a1a4d969", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['art', 'ash', 'axe', 'bag', 'ball', 'bar', 'bat', 'bay', 'bed', 'bee', 'bell', 'belt', 'bench', 'bird', 'boat', 'book', 'boot', 'bow', 'box', 'boy', 'branch', 'bread', 'bridge', 'brush', 'bus', 'bush', 'cake', 'can', 'cap', 'car', 'card', 'cart', 'cat', 'chain', 'chair', 'chalk', 'cheese', 'chest', 'child', 'church', 'class', 'clock', 'cloud', 'coat', 'code', 'coin', 'couch', 'court', 'cow', 'crab', 'cream', 'crow', 'cup', 'dad', 'day', 'deck', 'desk', 'dog', 'door', 'dress', 'drink', 'drop', 'duck', 'dust', 'ear', 'earth', 'egg', 'eye', 'face', 'fact', 'farm', 'field', 'file', 'film', 'fish', 'flag', 'floor', 'fly', 'fog', 'food', 'foot', 'fork', 'fox', 'friend', 'frog', 'fruit', 'game', 'gate', 'girl', 'glass', 'glove', 'goat', 'god', 'gold', 'grass', 'grave', 'green', 'ground', 'group', 'gum', 'gun', 'hair', 'hand', 'hat', 'head', 'heart', 'heat', 'hill', 'hole', 'home', 'horse', 'house', 'ice', 'ink', 'jam', 'jar', 'job', 'key', 'king', 'kiss', 'kite', 'knife', 'lake', 'lamp', 'land', 'law', 'leaf', 'leg', 'light', 'line', 'list', 'lock', 'log', 'love', 'lunch', 'man', 'map', 'mask', 'meal', 'meat', 'men', 'milk', 'mind', 'mine', 'moon', 'mouse', 'mouth', 'name', 'neck', 'night', 'noise', 'nose', 'note', 'oil', 'page', 'pain', 'paint', 'pan', 'park', 'part', 'path', 'peace', 'pear', 'pen', 'phone', 'pie', 'pig', 'pin', 'pipe', 'place', 'plane', 'plant', 'plate', 'play', 'point', 'pole', 'pool', 'port', 'post', 'pot', 'price', 'prince', 'queen', 'race', 'rain', 'rat', 'ring', 'road', 'rock', 'room', 'root', 'rose', 'rule', 'run', 'sail', 'salt', 'sand', 'school', 'sea', 'seat', 'seed', 'shade', 'shape', 'sheep', 'shelf', 'ship', 'shirt', 'shoe', 'shop', 'shot', 'side', 'sign', 'silk', 'size', 'sky', 'sleep', 'smile', 'smoke', 'snake', 'snow', 'sock', 'son', 'song', 'sound', 'soup', 'space', 'speech', 'spoon', 'sport', 'spring', 'square', 'star', 'state', 'steam', 'steel', 'step', 'stick', 'stone', 'stop', 'store', 'storm', 'street', 'string', 'sun', 'tail', 'tea', 'team', 'test', 'text', 'thread', 'throne', 'time', 'toe', 'town', 'toy', 'train', 'tree', 'trip', 'truck', 'truth', 'tube', 'turn', 'wall', 'war', 'watch', 'wave', 'way', 'week', 'weight', 'well', 'wheel', 'wind', 'wine', 'wing', 'wish', 'wood', 'word', 'work', 'world', 'year', 'youth']\n" ] } ], "source": [ "single_syllable_nouns_cleaned = [item for item in single_syllable_nouns if item not in not_single_syllable]\n", "print(single_syllable_nouns_cleaned)" ] }, { "cell_type": "code", "execution_count": 12, "id": "9701af0e-440e-4cf7-886d-815fc720eb68", "metadata": {}, "outputs": [], "source": [ "with open('cleaned_nouns.csv', 'w', newline='') as csvfile:\n", " writer = csv.writer(csvfile)\n", " writer.writerows(single_syllable_nouns_cleaned)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.11" } }, "nbformat": 4, "nbformat_minor": 5 }