13 KiB
13 KiB
In [11]:
import csv
In [1]:
single_syllable_nouns = [
"art", "ash", "axe", "bag", "ball", "bar", "bat", "bay", "bed", "bee",
"bell", "belt", "bench", "bird", "boat", "book", "boot", "bow", "box", "boy",
"branch", "bread", "bridge", "brush", "bucket", "bus", "bush", "cake", "can",
"cap", "car", "card", "cart", "cat", "chain", "chair", "chalk", "cheese", "chest",
"chicken", "child", "church", "city", "class", "clock", "cloud", "coat", "code",
"coin", "couch", "court", "cow", "crab", "cream", "crow", "cup", "curtain", "dad",
"day", "deck", "desk", "dog", "door", "dress", "drink", "drop", "duck", "dust",
"ear", "earth", "egg", "eye", "face", "fact", "farm", "field", "file", "film",
"fire", "fish", "flag", "floor", "flower", "fly", "fog", "food", "foot", "fork",
"fox", "friend", "frog", "fruit", "game", "gate", "girl", "glass", "glove", "goat",
"god", "gold", "grass", "grave", "green", "ground", "group", "gum", "gun", "hair",
"hand", "hat", "head", "heart", "heat", "hill", "hole", "home", "horse", "house",
"ice", "ink", "jacket", "jam", "jar", "job", "key", "king", "kiss", "kite",
"knife", "lady", "lake", "lamp", "land", "law", "leaf", "leg", "letter", "light",
"line", "lion", "list", "lock", "log", "love", "lunch", "man", "map", "mask",
"meal", "meat", "men", "milk", "mind", "mine", "moon", "morning", "mother", "mouse",
"mouth", "name", "neck", "night", "noise", "nose", "note", "ocean", "office", "oil",
"orange", "page", "pain", "paint", "pan", "paper", "park", "part", "party", "path",
"peace", "pear", "pen", "pencil", "people", "phone", "photo", "pie", "pig", "pin",
"pipe", "place", "plane", "plant", "plate", "play", "point", "pole", "pool", "port",
"post", "pot", "price", "prince", "queen", "race", "rain", "rat", "ring", "river",
"road", "rock", "room", "root", "rose", "rule", "run", "sail", "salt", "sand",
"school", "sea", "seat", "seed", "shade", "shape", "sheep", "shelf", "ship", "shirt",
"shoe", "shop", "shot", "side", "sign", "silk", "sister", "size", "sky", "sleep",
"smile", "smoke", "snake", "snow", "sock", "son", "song", "sound", "soup", "space",
"speech", "spoon", "sport", "spring", "square", "star", "state", "steam", "steel",
"step", "stick", "stone", "stop", "store", "storm", "street", "string", "student", "sun",
"table", "tail", "tea", "teacher", "team", "test", "text", "thread", "throne", "time",
"toe", "town", "toy", "train", "tree", "trip", "truck", "truth", "tube", "turn",
"wall", "war", "watch", "water", "wave", "way", "week", "weight", "well", "wheel",
"wind", "window", "wine", "wing", "winter", "wire", "wish", "woman", "wood", "word",
"work", "world", "year", "youth"
]
In [ ]:
with open('nouns.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(single_syllable_nouns)
In [2]:
import nltk
from nltk.corpus import cmudict
In [3]:
# Download the CMU Pronouncing Dictionary
nltk.download('cmudict')
d = cmudict.dict()
def count_syllables(word):
try:
return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]][0]
except KeyError:
return 0
In [ ]:
# Test a few words
test_words = ["computer", "cat", "elephant", "dog", "important"]
for word in test_words:
print(f"'{word}': {count_syllables(word)} syllables")
In [6]:
# Test a few words
# test_words = ["computer", "cat", "elephant", "dog", "important"]
for word in single_syllable_nouns:
count = count_syllables(word)
if count > 1:
print(f"'{word}': {count} syllables")
In [7]:
not_single_syllable = []
for word in single_syllable_nouns:
count = count_syllables(word)
if count > 1:
not_single_syllable.append(word)
print(not_single_syllable)
list_1 = ['apple', 'banana', 'orange', 'grape', 'kiwi']
list_2 = ['banana', 'kiwi']
# Remove items from list_1 that are in list_2
list_1 = [item for item in list_1 if item not in list_2]
print(list_1) # Output: ['apple', 'orange', 'grape']
In [9]:
single_syllable_nouns_cleaned = [item for item in single_syllable_nouns if item not in not_single_syllable]
print(single_syllable_nouns_cleaned)
In [12]:
with open('cleaned_nouns.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerows(single_syllable_nouns_cleaned)
In [14]:
from textblob import TextBlob
# Alternative method using sentiment analysis (requires: pip install textblob)
def is_negative(word):
# Simple sentiment check - words with negative polarity
analysis = TextBlob(word)
return analysis.sentiment.polarity < -0.1
In [20]:
for word in single_syllable_nouns_cleaned:
if is_negative(word):
print(word)
In [15]:
sentiment_filtered_nouns = [word for word in single_syllable_nouns_cleaned if not is_negative(word)]