remove not single syllable

This commit is contained in:
Chang CL
2025-08-24 13:47:23 +08:00
parent 52683bacdd
commit cbeb11d0f1
4 changed files with 856 additions and 0 deletions

281
cleaned_nouns.csv Normal file
View File

@@ -0,0 +1,281 @@
a,r,t
a,s,h
a,x,e
b,a,g
b,a,l,l
b,a,r
b,a,t
b,a,y
b,e,d
b,e,e
b,e,l,l
b,e,l,t
b,e,n,c,h
b,i,r,d
b,o,a,t
b,o,o,k
b,o,o,t
b,o,w
b,o,x
b,o,y
b,r,a,n,c,h
b,r,e,a,d
b,r,i,d,g,e
b,r,u,s,h
b,u,s
b,u,s,h
c,a,k,e
c,a,n
c,a,p
c,a,r
c,a,r,d
c,a,r,t
c,a,t
c,h,a,i,n
c,h,a,i,r
c,h,a,l,k
c,h,e,e,s,e
c,h,e,s,t
c,h,i,l,d
c,h,u,r,c,h
c,l,a,s,s
c,l,o,c,k
c,l,o,u,d
c,o,a,t
c,o,d,e
c,o,i,n
c,o,u,c,h
c,o,u,r,t
c,o,w
c,r,a,b
c,r,e,a,m
c,r,o,w
c,u,p
d,a,d
d,a,y
d,e,c,k
d,e,s,k
d,o,g
d,o,o,r
d,r,e,s,s
d,r,i,n,k
d,r,o,p
d,u,c,k
d,u,s,t
e,a,r
e,a,r,t,h
e,g,g
e,y,e
f,a,c,e
f,a,c,t
f,a,r,m
f,i,e,l,d
f,i,l,e
f,i,l,m
f,i,s,h
f,l,a,g
f,l,o,o,r
f,l,y
f,o,g
f,o,o,d
f,o,o,t
f,o,r,k
f,o,x
f,r,i,e,n,d
f,r,o,g
f,r,u,i,t
g,a,m,e
g,a,t,e
g,i,r,l
g,l,a,s,s
g,l,o,v,e
g,o,a,t
g,o,d
g,o,l,d
g,r,a,s,s
g,r,a,v,e
g,r,e,e,n
g,r,o,u,n,d
g,r,o,u,p
g,u,m
g,u,n
h,a,i,r
h,a,n,d
h,a,t
h,e,a,d
h,e,a,r,t
h,e,a,t
h,i,l,l
h,o,l,e
h,o,m,e
h,o,r,s,e
h,o,u,s,e
i,c,e
i,n,k
j,a,m
j,a,r
j,o,b
k,e,y
k,i,n,g
k,i,s,s
k,i,t,e
k,n,i,f,e
l,a,k,e
l,a,m,p
l,a,n,d
l,a,w
l,e,a,f
l,e,g
l,i,g,h,t
l,i,n,e
l,i,s,t
l,o,c,k
l,o,g
l,o,v,e
l,u,n,c,h
m,a,n
m,a,p
m,a,s,k
m,e,a,l
m,e,a,t
m,e,n
m,i,l,k
m,i,n,d
m,i,n,e
m,o,o,n
m,o,u,s,e
m,o,u,t,h
n,a,m,e
n,e,c,k
n,i,g,h,t
n,o,i,s,e
n,o,s,e
n,o,t,e
o,i,l
p,a,g,e
p,a,i,n
p,a,i,n,t
p,a,n
p,a,r,k
p,a,r,t
p,a,t,h
p,e,a,c,e
p,e,a,r
p,e,n
p,h,o,n,e
p,i,e
p,i,g
p,i,n
p,i,p,e
p,l,a,c,e
p,l,a,n,e
p,l,a,n,t
p,l,a,t,e
p,l,a,y
p,o,i,n,t
p,o,l,e
p,o,o,l
p,o,r,t
p,o,s,t
p,o,t
p,r,i,c,e
p,r,i,n,c,e
q,u,e,e,n
r,a,c,e
r,a,i,n
r,a,t
r,i,n,g
r,o,a,d
r,o,c,k
r,o,o,m
r,o,o,t
r,o,s,e
r,u,l,e
r,u,n
s,a,i,l
s,a,l,t
s,a,n,d
s,c,h,o,o,l
s,e,a
s,e,a,t
s,e,e,d
s,h,a,d,e
s,h,a,p,e
s,h,e,e,p
s,h,e,l,f
s,h,i,p
s,h,i,r,t
s,h,o,e
s,h,o,p
s,h,o,t
s,i,d,e
s,i,g,n
s,i,l,k
s,i,z,e
s,k,y
s,l,e,e,p
s,m,i,l,e
s,m,o,k,e
s,n,a,k,e
s,n,o,w
s,o,c,k
s,o,n
s,o,n,g
s,o,u,n,d
s,o,u,p
s,p,a,c,e
s,p,e,e,c,h
s,p,o,o,n
s,p,o,r,t
s,p,r,i,n,g
s,q,u,a,r,e
s,t,a,r
s,t,a,t,e
s,t,e,a,m
s,t,e,e,l
s,t,e,p
s,t,i,c,k
s,t,o,n,e
s,t,o,p
s,t,o,r,e
s,t,o,r,m
s,t,r,e,e,t
s,t,r,i,n,g
s,u,n
t,a,i,l
t,e,a
t,e,a,m
t,e,s,t
t,e,x,t
t,h,r,e,a,d
t,h,r,o,n,e
t,i,m,e
t,o,e
t,o,w,n
t,o,y
t,r,a,i,n
t,r,e,e
t,r,i,p
t,r,u,c,k
t,r,u,t,h
t,u,b,e
t,u,r,n
w,a,l,l
w,a,r
w,a,t,c,h
w,a,v,e
w,a,y
w,e,e,k
w,e,i,g,h,t
w,e,l,l
w,h,e,e,l
w,i,n,d
w,i,n,e
w,i,n,g
w,i,s,h
w,o,o,d
w,o,r,d
w,o,r,k
w,o,r,l,d
y,e,a,r
y,o,u,t,h
1 a,r,t
2 a,s,h
3 a,x,e
4 b,a,g
5 b,a,l,l
6 b,a,r
7 b,a,t
8 b,a,y
9 b,e,d
10 b,e,e
11 b,e,l,l
12 b,e,l,t
13 b,e,n,c,h
14 b,i,r,d
15 b,o,a,t
16 b,o,o,k
17 b,o,o,t
18 b,o,w
19 b,o,x
20 b,o,y
21 b,r,a,n,c,h
22 b,r,e,a,d
23 b,r,i,d,g,e
24 b,r,u,s,h
25 b,u,s
26 b,u,s,h
27 c,a,k,e
28 c,a,n
29 c,a,p
30 c,a,r
31 c,a,r,d
32 c,a,r,t
33 c,a,t
34 c,h,a,i,n
35 c,h,a,i,r
36 c,h,a,l,k
37 c,h,e,e,s,e
38 c,h,e,s,t
39 c,h,i,l,d
40 c,h,u,r,c,h
41 c,l,a,s,s
42 c,l,o,c,k
43 c,l,o,u,d
44 c,o,a,t
45 c,o,d,e
46 c,o,i,n
47 c,o,u,c,h
48 c,o,u,r,t
49 c,o,w
50 c,r,a,b
51 c,r,e,a,m
52 c,r,o,w
53 c,u,p
54 d,a,d
55 d,a,y
56 d,e,c,k
57 d,e,s,k
58 d,o,g
59 d,o,o,r
60 d,r,e,s,s
61 d,r,i,n,k
62 d,r,o,p
63 d,u,c,k
64 d,u,s,t
65 e,a,r
66 e,a,r,t,h
67 e,g,g
68 e,y,e
69 f,a,c,e
70 f,a,c,t
71 f,a,r,m
72 f,i,e,l,d
73 f,i,l,e
74 f,i,l,m
75 f,i,s,h
76 f,l,a,g
77 f,l,o,o,r
78 f,l,y
79 f,o,g
80 f,o,o,d
81 f,o,o,t
82 f,o,r,k
83 f,o,x
84 f,r,i,e,n,d
85 f,r,o,g
86 f,r,u,i,t
87 g,a,m,e
88 g,a,t,e
89 g,i,r,l
90 g,l,a,s,s
91 g,l,o,v,e
92 g,o,a,t
93 g,o,d
94 g,o,l,d
95 g,r,a,s,s
96 g,r,a,v,e
97 g,r,e,e,n
98 g,r,o,u,n,d
99 g,r,o,u,p
100 g,u,m
101 g,u,n
102 h,a,i,r
103 h,a,n,d
104 h,a,t
105 h,e,a,d
106 h,e,a,r,t
107 h,e,a,t
108 h,i,l,l
109 h,o,l,e
110 h,o,m,e
111 h,o,r,s,e
112 h,o,u,s,e
113 i,c,e
114 i,n,k
115 j,a,m
116 j,a,r
117 j,o,b
118 k,e,y
119 k,i,n,g
120 k,i,s,s
121 k,i,t,e
122 k,n,i,f,e
123 l,a,k,e
124 l,a,m,p
125 l,a,n,d
126 l,a,w
127 l,e,a,f
128 l,e,g
129 l,i,g,h,t
130 l,i,n,e
131 l,i,s,t
132 l,o,c,k
133 l,o,g
134 l,o,v,e
135 l,u,n,c,h
136 m,a,n
137 m,a,p
138 m,a,s,k
139 m,e,a,l
140 m,e,a,t
141 m,e,n
142 m,i,l,k
143 m,i,n,d
144 m,i,n,e
145 m,o,o,n
146 m,o,u,s,e
147 m,o,u,t,h
148 n,a,m,e
149 n,e,c,k
150 n,i,g,h,t
151 n,o,i,s,e
152 n,o,s,e
153 n,o,t,e
154 o,i,l
155 p,a,g,e
156 p,a,i,n
157 p,a,i,n,t
158 p,a,n
159 p,a,r,k
160 p,a,r,t
161 p,a,t,h
162 p,e,a,c,e
163 p,e,a,r
164 p,e,n
165 p,h,o,n,e
166 p,i,e
167 p,i,g
168 p,i,n
169 p,i,p,e
170 p,l,a,c,e
171 p,l,a,n,e
172 p,l,a,n,t
173 p,l,a,t,e
174 p,l,a,y
175 p,o,i,n,t
176 p,o,l,e
177 p,o,o,l
178 p,o,r,t
179 p,o,s,t
180 p,o,t
181 p,r,i,c,e
182 p,r,i,n,c,e
183 q,u,e,e,n
184 r,a,c,e
185 r,a,i,n
186 r,a,t
187 r,i,n,g
188 r,o,a,d
189 r,o,c,k
190 r,o,o,m
191 r,o,o,t
192 r,o,s,e
193 r,u,l,e
194 r,u,n
195 s,a,i,l
196 s,a,l,t
197 s,a,n,d
198 s,c,h,o,o,l
199 s,e,a
200 s,e,a,t
201 s,e,e,d
202 s,h,a,d,e
203 s,h,a,p,e
204 s,h,e,e,p
205 s,h,e,l,f
206 s,h,i,p
207 s,h,i,r,t
208 s,h,o,e
209 s,h,o,p
210 s,h,o,t
211 s,i,d,e
212 s,i,g,n
213 s,i,l,k
214 s,i,z,e
215 s,k,y
216 s,l,e,e,p
217 s,m,i,l,e
218 s,m,o,k,e
219 s,n,a,k,e
220 s,n,o,w
221 s,o,c,k
222 s,o,n
223 s,o,n,g
224 s,o,u,n,d
225 s,o,u,p
226 s,p,a,c,e
227 s,p,e,e,c,h
228 s,p,o,o,n
229 s,p,o,r,t
230 s,p,r,i,n,g
231 s,q,u,a,r,e
232 s,t,a,r
233 s,t,a,t,e
234 s,t,e,a,m
235 s,t,e,e,l
236 s,t,e,p
237 s,t,i,c,k
238 s,t,o,n,e
239 s,t,o,p
240 s,t,o,r,e
241 s,t,o,r,m
242 s,t,r,e,e,t
243 s,t,r,i,n,g
244 s,u,n
245 t,a,i,l
246 t,e,a
247 t,e,a,m
248 t,e,s,t
249 t,e,x,t
250 t,h,r,e,a,d
251 t,h,r,o,n,e
252 t,i,m,e
253 t,o,e
254 t,o,w,n
255 t,o,y
256 t,r,a,i,n
257 t,r,e,e
258 t,r,i,p
259 t,r,u,c,k
260 t,r,u,t,h
261 t,u,b,e
262 t,u,r,n
263 w,a,l,l
264 w,a,r
265 w,a,t,c,h
266 w,a,v,e
267 w,a,y
268 w,e,e,k
269 w,e,i,g,h,t
270 w,e,l,l
271 w,h,e,e,l
272 w,i,n,d
273 w,i,n,e
274 w,i,n,g
275 w,i,s,h
276 w,o,o,d
277 w,o,r,d
278 w,o,r,k
279 w,o,r,l,d
280 y,e,a,r
281 y,o,u,t,h

311
nouns.csv Normal file
View File

@@ -0,0 +1,311 @@
a,r,t
a,s,h
a,x,e
b,a,g
b,a,l,l
b,a,r
b,a,t
b,a,y
b,e,d
b,e,e
b,e,l,l
b,e,l,t
b,e,n,c,h
b,i,r,d
b,o,a,t
b,o,o,k
b,o,o,t
b,o,w
b,o,x
b,o,y
b,r,a,n,c,h
b,r,e,a,d
b,r,i,d,g,e
b,r,u,s,h
b,u,c,k,e,t
b,u,s
b,u,s,h
c,a,k,e
c,a,n
c,a,p
c,a,r
c,a,r,d
c,a,r,t
c,a,t
c,h,a,i,n
c,h,a,i,r
c,h,a,l,k
c,h,e,e,s,e
c,h,e,s,t
c,h,i,c,k,e,n
c,h,i,l,d
c,h,u,r,c,h
c,i,t,y
c,l,a,s,s
c,l,o,c,k
c,l,o,u,d
c,o,a,t
c,o,d,e
c,o,i,n
c,o,u,c,h
c,o,u,r,t
c,o,w
c,r,a,b
c,r,e,a,m
c,r,o,w
c,u,p
c,u,r,t,a,i,n
d,a,d
d,a,y
d,e,c,k
d,e,s,k
d,o,g
d,o,o,r
d,r,e,s,s
d,r,i,n,k
d,r,o,p
d,u,c,k
d,u,s,t
e,a,r
e,a,r,t,h
e,g,g
e,y,e
f,a,c,e
f,a,c,t
f,a,r,m
f,i,e,l,d
f,i,l,e
f,i,l,m
f,i,r,e
f,i,s,h
f,l,a,g
f,l,o,o,r
f,l,o,w,e,r
f,l,y
f,o,g
f,o,o,d
f,o,o,t
f,o,r,k
f,o,x
f,r,i,e,n,d
f,r,o,g
f,r,u,i,t
g,a,m,e
g,a,t,e
g,i,r,l
g,l,a,s,s
g,l,o,v,e
g,o,a,t
g,o,d
g,o,l,d
g,r,a,s,s
g,r,a,v,e
g,r,e,e,n
g,r,o,u,n,d
g,r,o,u,p
g,u,m
g,u,n
h,a,i,r
h,a,n,d
h,a,t
h,e,a,d
h,e,a,r,t
h,e,a,t
h,i,l,l
h,o,l,e
h,o,m,e
h,o,r,s,e
h,o,u,s,e
i,c,e
i,n,k
j,a,c,k,e,t
j,a,m
j,a,r
j,o,b
k,e,y
k,i,n,g
k,i,s,s
k,i,t,e
k,n,i,f,e
l,a,d,y
l,a,k,e
l,a,m,p
l,a,n,d
l,a,w
l,e,a,f
l,e,g
l,e,t,t,e,r
l,i,g,h,t
l,i,n,e
l,i,o,n
l,i,s,t
l,o,c,k
l,o,g
l,o,v,e
l,u,n,c,h
m,a,n
m,a,p
m,a,s,k
m,e,a,l
m,e,a,t
m,e,n
m,i,l,k
m,i,n,d
m,i,n,e
m,o,o,n
m,o,r,n,i,n,g
m,o,t,h,e,r
m,o,u,s,e
m,o,u,t,h
n,a,m,e
n,e,c,k
n,i,g,h,t
n,o,i,s,e
n,o,s,e
n,o,t,e
o,c,e,a,n
o,f,f,i,c,e
o,i,l
o,r,a,n,g,e
p,a,g,e
p,a,i,n
p,a,i,n,t
p,a,n
p,a,p,e,r
p,a,r,k
p,a,r,t
p,a,r,t,y
p,a,t,h
p,e,a,c,e
p,e,a,r
p,e,n
p,e,n,c,i,l
p,e,o,p,l,e
p,h,o,n,e
p,h,o,t,o
p,i,e
p,i,g
p,i,n
p,i,p,e
p,l,a,c,e
p,l,a,n,e
p,l,a,n,t
p,l,a,t,e
p,l,a,y
p,o,i,n,t
p,o,l,e
p,o,o,l
p,o,r,t
p,o,s,t
p,o,t
p,r,i,c,e
p,r,i,n,c,e
q,u,e,e,n
r,a,c,e
r,a,i,n
r,a,t
r,i,n,g
r,i,v,e,r
r,o,a,d
r,o,c,k
r,o,o,m
r,o,o,t
r,o,s,e
r,u,l,e
r,u,n
s,a,i,l
s,a,l,t
s,a,n,d
s,c,h,o,o,l
s,e,a
s,e,a,t
s,e,e,d
s,h,a,d,e
s,h,a,p,e
s,h,e,e,p
s,h,e,l,f
s,h,i,p
s,h,i,r,t
s,h,o,e
s,h,o,p
s,h,o,t
s,i,d,e
s,i,g,n
s,i,l,k
s,i,s,t,e,r
s,i,z,e
s,k,y
s,l,e,e,p
s,m,i,l,e
s,m,o,k,e
s,n,a,k,e
s,n,o,w
s,o,c,k
s,o,n
s,o,n,g
s,o,u,n,d
s,o,u,p
s,p,a,c,e
s,p,e,e,c,h
s,p,o,o,n
s,p,o,r,t
s,p,r,i,n,g
s,q,u,a,r,e
s,t,a,r
s,t,a,t,e
s,t,e,a,m
s,t,e,e,l
s,t,e,p
s,t,i,c,k
s,t,o,n,e
s,t,o,p
s,t,o,r,e
s,t,o,r,m
s,t,r,e,e,t
s,t,r,i,n,g
s,t,u,d,e,n,t
s,u,n
t,a,b,l,e
t,a,i,l
t,e,a
t,e,a,c,h,e,r
t,e,a,m
t,e,s,t
t,e,x,t
t,h,r,e,a,d
t,h,r,o,n,e
t,i,m,e
t,o,e
t,o,w,n
t,o,y
t,r,a,i,n
t,r,e,e
t,r,i,p
t,r,u,c,k
t,r,u,t,h
t,u,b,e
t,u,r,n
w,a,l,l
w,a,r
w,a,t,c,h
w,a,t,e,r
w,a,v,e
w,a,y
w,e,e,k
w,e,i,g,h,t
w,e,l,l
w,h,e,e,l
w,i,n,d
w,i,n,d,o,w
w,i,n,e
w,i,n,g
w,i,n,t,e,r
w,i,r,e
w,i,s,h
w,o,m,a,n
w,o,o,d
w,o,r,d
w,o,r,k
w,o,r,l,d
y,e,a,r
y,o,u,t,h
1 a,r,t
2 a,s,h
3 a,x,e
4 b,a,g
5 b,a,l,l
6 b,a,r
7 b,a,t
8 b,a,y
9 b,e,d
10 b,e,e
11 b,e,l,l
12 b,e,l,t
13 b,e,n,c,h
14 b,i,r,d
15 b,o,a,t
16 b,o,o,k
17 b,o,o,t
18 b,o,w
19 b,o,x
20 b,o,y
21 b,r,a,n,c,h
22 b,r,e,a,d
23 b,r,i,d,g,e
24 b,r,u,s,h
25 b,u,c,k,e,t
26 b,u,s
27 b,u,s,h
28 c,a,k,e
29 c,a,n
30 c,a,p
31 c,a,r
32 c,a,r,d
33 c,a,r,t
34 c,a,t
35 c,h,a,i,n
36 c,h,a,i,r
37 c,h,a,l,k
38 c,h,e,e,s,e
39 c,h,e,s,t
40 c,h,i,c,k,e,n
41 c,h,i,l,d
42 c,h,u,r,c,h
43 c,i,t,y
44 c,l,a,s,s
45 c,l,o,c,k
46 c,l,o,u,d
47 c,o,a,t
48 c,o,d,e
49 c,o,i,n
50 c,o,u,c,h
51 c,o,u,r,t
52 c,o,w
53 c,r,a,b
54 c,r,e,a,m
55 c,r,o,w
56 c,u,p
57 c,u,r,t,a,i,n
58 d,a,d
59 d,a,y
60 d,e,c,k
61 d,e,s,k
62 d,o,g
63 d,o,o,r
64 d,r,e,s,s
65 d,r,i,n,k
66 d,r,o,p
67 d,u,c,k
68 d,u,s,t
69 e,a,r
70 e,a,r,t,h
71 e,g,g
72 e,y,e
73 f,a,c,e
74 f,a,c,t
75 f,a,r,m
76 f,i,e,l,d
77 f,i,l,e
78 f,i,l,m
79 f,i,r,e
80 f,i,s,h
81 f,l,a,g
82 f,l,o,o,r
83 f,l,o,w,e,r
84 f,l,y
85 f,o,g
86 f,o,o,d
87 f,o,o,t
88 f,o,r,k
89 f,o,x
90 f,r,i,e,n,d
91 f,r,o,g
92 f,r,u,i,t
93 g,a,m,e
94 g,a,t,e
95 g,i,r,l
96 g,l,a,s,s
97 g,l,o,v,e
98 g,o,a,t
99 g,o,d
100 g,o,l,d
101 g,r,a,s,s
102 g,r,a,v,e
103 g,r,e,e,n
104 g,r,o,u,n,d
105 g,r,o,u,p
106 g,u,m
107 g,u,n
108 h,a,i,r
109 h,a,n,d
110 h,a,t
111 h,e,a,d
112 h,e,a,r,t
113 h,e,a,t
114 h,i,l,l
115 h,o,l,e
116 h,o,m,e
117 h,o,r,s,e
118 h,o,u,s,e
119 i,c,e
120 i,n,k
121 j,a,c,k,e,t
122 j,a,m
123 j,a,r
124 j,o,b
125 k,e,y
126 k,i,n,g
127 k,i,s,s
128 k,i,t,e
129 k,n,i,f,e
130 l,a,d,y
131 l,a,k,e
132 l,a,m,p
133 l,a,n,d
134 l,a,w
135 l,e,a,f
136 l,e,g
137 l,e,t,t,e,r
138 l,i,g,h,t
139 l,i,n,e
140 l,i,o,n
141 l,i,s,t
142 l,o,c,k
143 l,o,g
144 l,o,v,e
145 l,u,n,c,h
146 m,a,n
147 m,a,p
148 m,a,s,k
149 m,e,a,l
150 m,e,a,t
151 m,e,n
152 m,i,l,k
153 m,i,n,d
154 m,i,n,e
155 m,o,o,n
156 m,o,r,n,i,n,g
157 m,o,t,h,e,r
158 m,o,u,s,e
159 m,o,u,t,h
160 n,a,m,e
161 n,e,c,k
162 n,i,g,h,t
163 n,o,i,s,e
164 n,o,s,e
165 n,o,t,e
166 o,c,e,a,n
167 o,f,f,i,c,e
168 o,i,l
169 o,r,a,n,g,e
170 p,a,g,e
171 p,a,i,n
172 p,a,i,n,t
173 p,a,n
174 p,a,p,e,r
175 p,a,r,k
176 p,a,r,t
177 p,a,r,t,y
178 p,a,t,h
179 p,e,a,c,e
180 p,e,a,r
181 p,e,n
182 p,e,n,c,i,l
183 p,e,o,p,l,e
184 p,h,o,n,e
185 p,h,o,t,o
186 p,i,e
187 p,i,g
188 p,i,n
189 p,i,p,e
190 p,l,a,c,e
191 p,l,a,n,e
192 p,l,a,n,t
193 p,l,a,t,e
194 p,l,a,y
195 p,o,i,n,t
196 p,o,l,e
197 p,o,o,l
198 p,o,r,t
199 p,o,s,t
200 p,o,t
201 p,r,i,c,e
202 p,r,i,n,c,e
203 q,u,e,e,n
204 r,a,c,e
205 r,a,i,n
206 r,a,t
207 r,i,n,g
208 r,i,v,e,r
209 r,o,a,d
210 r,o,c,k
211 r,o,o,m
212 r,o,o,t
213 r,o,s,e
214 r,u,l,e
215 r,u,n
216 s,a,i,l
217 s,a,l,t
218 s,a,n,d
219 s,c,h,o,o,l
220 s,e,a
221 s,e,a,t
222 s,e,e,d
223 s,h,a,d,e
224 s,h,a,p,e
225 s,h,e,e,p
226 s,h,e,l,f
227 s,h,i,p
228 s,h,i,r,t
229 s,h,o,e
230 s,h,o,p
231 s,h,o,t
232 s,i,d,e
233 s,i,g,n
234 s,i,l,k
235 s,i,s,t,e,r
236 s,i,z,e
237 s,k,y
238 s,l,e,e,p
239 s,m,i,l,e
240 s,m,o,k,e
241 s,n,a,k,e
242 s,n,o,w
243 s,o,c,k
244 s,o,n
245 s,o,n,g
246 s,o,u,n,d
247 s,o,u,p
248 s,p,a,c,e
249 s,p,e,e,c,h
250 s,p,o,o,n
251 s,p,o,r,t
252 s,p,r,i,n,g
253 s,q,u,a,r,e
254 s,t,a,r
255 s,t,a,t,e
256 s,t,e,a,m
257 s,t,e,e,l
258 s,t,e,p
259 s,t,i,c,k
260 s,t,o,n,e
261 s,t,o,p
262 s,t,o,r,e
263 s,t,o,r,m
264 s,t,r,e,e,t
265 s,t,r,i,n,g
266 s,t,u,d,e,n,t
267 s,u,n
268 t,a,b,l,e
269 t,a,i,l
270 t,e,a
271 t,e,a,c,h,e,r
272 t,e,a,m
273 t,e,s,t
274 t,e,x,t
275 t,h,r,e,a,d
276 t,h,r,o,n,e
277 t,i,m,e
278 t,o,e
279 t,o,w,n
280 t,o,y
281 t,r,a,i,n
282 t,r,e,e
283 t,r,i,p
284 t,r,u,c,k
285 t,r,u,t,h
286 t,u,b,e
287 t,u,r,n
288 w,a,l,l
289 w,a,r
290 w,a,t,c,h
291 w,a,t,e,r
292 w,a,v,e
293 w,a,y
294 w,e,e,k
295 w,e,i,g,h,t
296 w,e,l,l
297 w,h,e,e,l
298 w,i,n,d
299 w,i,n,d,o,w
300 w,i,n,e
301 w,i,n,g
302 w,i,n,t,e,r
303 w,i,r,e
304 w,i,s,h
305 w,o,m,a,n
306 w,o,o,d
307 w,o,r,d
308 w,o,r,k
309 w,o,r,l,d
310 y,e,a,r
311 y,o,u,t,h

264
words_syllables.ipynb Normal file
View File

@@ -0,0 +1,264 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"id": "77365834-52b5-4443-8cc8-4ffdf0a847ba",
"metadata": {},
"outputs": [],
"source": [
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "d1c44d5e-ed2f-4122-b2ac-fcb9dca2358f",
"metadata": {},
"outputs": [],
"source": [
"single_syllable_nouns = [\n",
" \"art\", \"ash\", \"axe\", \"bag\", \"ball\", \"bar\", \"bat\", \"bay\", \"bed\", \"bee\",\n",
" \"bell\", \"belt\", \"bench\", \"bird\", \"boat\", \"book\", \"boot\", \"bow\", \"box\", \"boy\",\n",
" \"branch\", \"bread\", \"bridge\", \"brush\", \"bucket\", \"bus\", \"bush\", \"cake\", \"can\",\n",
" \"cap\", \"car\", \"card\", \"cart\", \"cat\", \"chain\", \"chair\", \"chalk\", \"cheese\", \"chest\",\n",
" \"chicken\", \"child\", \"church\", \"city\", \"class\", \"clock\", \"cloud\", \"coat\", \"code\",\n",
" \"coin\", \"couch\", \"court\", \"cow\", \"crab\", \"cream\", \"crow\", \"cup\", \"curtain\", \"dad\",\n",
" \"day\", \"deck\", \"desk\", \"dog\", \"door\", \"dress\", \"drink\", \"drop\", \"duck\", \"dust\",\n",
" \"ear\", \"earth\", \"egg\", \"eye\", \"face\", \"fact\", \"farm\", \"field\", \"file\", \"film\",\n",
" \"fire\", \"fish\", \"flag\", \"floor\", \"flower\", \"fly\", \"fog\", \"food\", \"foot\", \"fork\",\n",
" \"fox\", \"friend\", \"frog\", \"fruit\", \"game\", \"gate\", \"girl\", \"glass\", \"glove\", \"goat\",\n",
" \"god\", \"gold\", \"grass\", \"grave\", \"green\", \"ground\", \"group\", \"gum\", \"gun\", \"hair\",\n",
" \"hand\", \"hat\", \"head\", \"heart\", \"heat\", \"hill\", \"hole\", \"home\", \"horse\", \"house\",\n",
" \"ice\", \"ink\", \"jacket\", \"jam\", \"jar\", \"job\", \"key\", \"king\", \"kiss\", \"kite\",\n",
" \"knife\", \"lady\", \"lake\", \"lamp\", \"land\", \"law\", \"leaf\", \"leg\", \"letter\", \"light\",\n",
" \"line\", \"lion\", \"list\", \"lock\", \"log\", \"love\", \"lunch\", \"man\", \"map\", \"mask\",\n",
" \"meal\", \"meat\", \"men\", \"milk\", \"mind\", \"mine\", \"moon\", \"morning\", \"mother\", \"mouse\",\n",
" \"mouth\", \"name\", \"neck\", \"night\", \"noise\", \"nose\", \"note\", \"ocean\", \"office\", \"oil\",\n",
" \"orange\", \"page\", \"pain\", \"paint\", \"pan\", \"paper\", \"park\", \"part\", \"party\", \"path\",\n",
" \"peace\", \"pear\", \"pen\", \"pencil\", \"people\", \"phone\", \"photo\", \"pie\", \"pig\", \"pin\",\n",
" \"pipe\", \"place\", \"plane\", \"plant\", \"plate\", \"play\", \"point\", \"pole\", \"pool\", \"port\",\n",
" \"post\", \"pot\", \"price\", \"prince\", \"queen\", \"race\", \"rain\", \"rat\", \"ring\", \"river\",\n",
" \"road\", \"rock\", \"room\", \"root\", \"rose\", \"rule\", \"run\", \"sail\", \"salt\", \"sand\",\n",
" \"school\", \"sea\", \"seat\", \"seed\", \"shade\", \"shape\", \"sheep\", \"shelf\", \"ship\", \"shirt\",\n",
" \"shoe\", \"shop\", \"shot\", \"side\", \"sign\", \"silk\", \"sister\", \"size\", \"sky\", \"sleep\",\n",
" \"smile\", \"smoke\", \"snake\", \"snow\", \"sock\", \"son\", \"song\", \"sound\", \"soup\", \"space\",\n",
" \"speech\", \"spoon\", \"sport\", \"spring\", \"square\", \"star\", \"state\", \"steam\", \"steel\",\n",
" \"step\", \"stick\", \"stone\", \"stop\", \"store\", \"storm\", \"street\", \"string\", \"student\", \"sun\",\n",
" \"table\", \"tail\", \"tea\", \"teacher\", \"team\", \"test\", \"text\", \"thread\", \"throne\", \"time\",\n",
" \"toe\", \"town\", \"toy\", \"train\", \"tree\", \"trip\", \"truck\", \"truth\", \"tube\", \"turn\",\n",
" \"wall\", \"war\", \"watch\", \"water\", \"wave\", \"way\", \"week\", \"weight\", \"well\", \"wheel\",\n",
" \"wind\", \"window\", \"wine\", \"wing\", \"winter\", \"wire\", \"wish\", \"woman\", \"wood\", \"word\",\n",
" \"work\", \"world\", \"year\", \"youth\"\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c7b08367-f320-459e-9dda-6392e533e979",
"metadata": {},
"outputs": [],
"source": [
"with open('nouns.csv', 'w', newline='') as csvfile:\n",
" writer = csv.writer(csvfile)\n",
" writer.writerows(single_syllable_nouns)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "1dcce87e-6b3f-4e45-b28c-499bbe1d33c9",
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"from nltk.corpus import cmudict"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "86f80604-fb54-46bd-ab2a-5331ec7e5411",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package cmudict to /home/changcl/nltk_data...\n",
"[nltk_data] Package cmudict is already up-to-date!\n"
]
}
],
"source": [
"# Download the CMU Pronouncing Dictionary\n",
"nltk.download('cmudict')\n",
"\n",
"d = cmudict.dict()\n",
"\n",
"def count_syllables(word):\n",
" try:\n",
" return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]][0]\n",
" except KeyError:\n",
" return 0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ff7f7385-d2b4-439b-9079-6de0775b9435",
"metadata": {},
"outputs": [],
"source": [
"# Test a few words\n",
"test_words = [\"computer\", \"cat\", \"elephant\", \"dog\", \"important\"]\n",
"for word in test_words:\n",
" print(f\"'{word}': {count_syllables(word)} syllables\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "51222d43-baaa-48ed-8b9f-58fc22bbe769",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"'bucket': 2 syllables\n",
"'chicken': 2 syllables\n",
"'city': 2 syllables\n",
"'curtain': 2 syllables\n",
"'fire': 2 syllables\n",
"'flower': 2 syllables\n",
"'jacket': 2 syllables\n",
"'lady': 2 syllables\n",
"'letter': 2 syllables\n",
"'lion': 2 syllables\n",
"'morning': 2 syllables\n",
"'mother': 2 syllables\n",
"'ocean': 2 syllables\n",
"'office': 2 syllables\n",
"'orange': 2 syllables\n",
"'paper': 2 syllables\n",
"'party': 2 syllables\n",
"'pencil': 2 syllables\n",
"'people': 2 syllables\n",
"'photo': 2 syllables\n",
"'river': 2 syllables\n",
"'sister': 2 syllables\n",
"'student': 2 syllables\n",
"'table': 2 syllables\n",
"'teacher': 2 syllables\n",
"'water': 2 syllables\n",
"'window': 2 syllables\n",
"'winter': 2 syllables\n",
"'wire': 2 syllables\n",
"'woman': 2 syllables\n"
]
}
],
"source": [
"# Test a few words\n",
"# test_words = [\"computer\", \"cat\", \"elephant\", \"dog\", \"important\"]\n",
"for word in single_syllable_nouns:\n",
" count = count_syllables(word)\n",
" if count > 1:\n",
" print(f\"'{word}': {count} syllables\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "218a5ad4-33b7-4e73-af1b-ba8c6303f012",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['bucket', 'chicken', 'city', 'curtain', 'fire', 'flower', 'jacket', 'lady', 'letter', 'lion', 'morning', 'mother', 'ocean', 'office', 'orange', 'paper', 'party', 'pencil', 'people', 'photo', 'river', 'sister', 'student', 'table', 'teacher', 'water', 'window', 'winter', 'wire', 'woman']\n"
]
}
],
"source": [
"not_single_syllable = []\n",
"for word in single_syllable_nouns:\n",
" count = count_syllables(word)\n",
" if count > 1:\n",
" not_single_syllable.append(word)\n",
"print(not_single_syllable)"
]
},
{
"cell_type": "markdown",
"id": "629d364b-9120-4615-8e04-8704a9ccddf6",
"metadata": {},
"source": [
"```\n",
"list_1 = ['apple', 'banana', 'orange', 'grape', 'kiwi']\n",
"list_2 = ['banana', 'kiwi']\n",
"\n",
"# Remove items from list_1 that are in list_2\n",
"list_1 = [item for item in list_1 if item not in list_2]\n",
"\n",
"print(list_1) # Output: ['apple', 'orange', 'grape']\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "5c2a24e2-f027-40c9-aca2-ddb8a1a4d969",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['art', 'ash', 'axe', 'bag', 'ball', 'bar', 'bat', 'bay', 'bed', 'bee', 'bell', 'belt', 'bench', 'bird', 'boat', 'book', 'boot', 'bow', 'box', 'boy', 'branch', 'bread', 'bridge', 'brush', 'bus', 'bush', 'cake', 'can', 'cap', 'car', 'card', 'cart', 'cat', 'chain', 'chair', 'chalk', 'cheese', 'chest', 'child', 'church', 'class', 'clock', 'cloud', 'coat', 'code', 'coin', 'couch', 'court', 'cow', 'crab', 'cream', 'crow', 'cup', 'dad', 'day', 'deck', 'desk', 'dog', 'door', 'dress', 'drink', 'drop', 'duck', 'dust', 'ear', 'earth', 'egg', 'eye', 'face', 'fact', 'farm', 'field', 'file', 'film', 'fish', 'flag', 'floor', 'fly', 'fog', 'food', 'foot', 'fork', 'fox', 'friend', 'frog', 'fruit', 'game', 'gate', 'girl', 'glass', 'glove', 'goat', 'god', 'gold', 'grass', 'grave', 'green', 'ground', 'group', 'gum', 'gun', 'hair', 'hand', 'hat', 'head', 'heart', 'heat', 'hill', 'hole', 'home', 'horse', 'house', 'ice', 'ink', 'jam', 'jar', 'job', 'key', 'king', 'kiss', 'kite', 'knife', 'lake', 'lamp', 'land', 'law', 'leaf', 'leg', 'light', 'line', 'list', 'lock', 'log', 'love', 'lunch', 'man', 'map', 'mask', 'meal', 'meat', 'men', 'milk', 'mind', 'mine', 'moon', 'mouse', 'mouth', 'name', 'neck', 'night', 'noise', 'nose', 'note', 'oil', 'page', 'pain', 'paint', 'pan', 'park', 'part', 'path', 'peace', 'pear', 'pen', 'phone', 'pie', 'pig', 'pin', 'pipe', 'place', 'plane', 'plant', 'plate', 'play', 'point', 'pole', 'pool', 'port', 'post', 'pot', 'price', 'prince', 'queen', 'race', 'rain', 'rat', 'ring', 'road', 'rock', 'room', 'root', 'rose', 'rule', 'run', 'sail', 'salt', 'sand', 'school', 'sea', 'seat', 'seed', 'shade', 'shape', 'sheep', 'shelf', 'ship', 'shirt', 'shoe', 'shop', 'shot', 'side', 'sign', 'silk', 'size', 'sky', 'sleep', 'smile', 'smoke', 'snake', 'snow', 'sock', 'son', 'song', 'sound', 'soup', 'space', 'speech', 'spoon', 'sport', 'spring', 'square', 'star', 'state', 'steam', 'steel', 'step', 'stick', 'stone', 'stop', 'store', 'storm', 'street', 'string', 'sun', 'tail', 'tea', 'team', 'test', 'text', 'thread', 'throne', 'time', 'toe', 'town', 'toy', 'train', 'tree', 'trip', 'truck', 'truth', 'tube', 'turn', 'wall', 'war', 'watch', 'wave', 'way', 'week', 'weight', 'well', 'wheel', 'wind', 'wine', 'wing', 'wish', 'wood', 'word', 'work', 'world', 'year', 'youth']\n"
]
}
],
"source": [
"single_syllable_nouns_cleaned = [item for item in single_syllable_nouns if item not in not_single_syllable]\n",
"print(single_syllable_nouns_cleaned)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "9701af0e-440e-4cf7-886d-815fc720eb68",
"metadata": {},
"outputs": [],
"source": [
"with open('cleaned_nouns.csv', 'w', newline='') as csvfile:\n",
" writer = csv.writer(csvfile)\n",
" writer.writerows(single_syllable_nouns_cleaned)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.11"
}
},
"nbformat": 4,
"nbformat_minor": 5
}