0%

FastText模型

训练模型

试用gensim自带的Lee语料库来训练模型

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
from pprint import pprint as print
from gensim.models.fasttext import FastText as FT_gensim
from gensim.test.utils import datapath

#设置文件名
corpus_file = datapath('lee_background.cor')

model = FT_gensim(size = 100)

#建立词汇表
model.build_vocab(corpus_file = corpus_file)

#训练模型
model.train(
corpus_file = corpus_file,epochs = model.epochs,
total_examples = model.corpus_count,total_words = model.corpus_total_words
)

print(model)
1
<gensim.models.fasttext.FastText object at 0x000001B960B8D400>

训练超参数

用于训练模型的超参数遵循与Word2Vec相同的模式。FastText支持来自原始word2vec的以下参数:

  • model: Training architecture. Allowed values: cbow, skipgram (Default cbow)
  • size: Size of embeddings to be learnt (Default 100)
  • alpha: Initial learning rate (Default 0.025)
  • window: Context window size (Default 5)
  • min_count: Ignore words with number of occurrences below this (Default 5)
  • loss: Training objective. Allowed values: ns, hs, softmax (Default ns)
  • sample: Threshold for downsampling higher-frequency words (Default 0.001)
  • negative: Number of negative words to sample, for ns (Default 5)
  • iter: Number of epochs (Default 5)
  • sorted_vocab: Sort vocab by descending frequency (Default 1)
  • threads: Number of threads to use (Default 12)

此外,FastText还有三个附加参数:

  • min_n: min length of char ngrams (Default 3)
  • max_n: max length of char ngrams (Default 6)
  • bucket: number of buckets used for hashing ngrams (Default 2000000)

保存、加载模型

可以通过load和save方法保存和加载模型

1
2
3
4
5
6
7
8
9
10
#通过load和save方法保存和加载模型
import tempfile
import os
with tempfile.NamedTemporaryFile(prefix = 'saved_model_gensim-',delete = False) as tmp:
model.save(tmp.name,separately = [])

loaded_model = FT_gensim.load(tmp.name)
print(loaded_model)

os.unlink(tmp.name)
1
<gensim.models.fasttext.FastText object at 0x000001B993579198>

词向量查找

FastText模型通过总结属于单词的字符ngrams来支持词汇表外单词的向量查找

1
2
print('night' in model.wv.vocab)
print('nights' in model.wv.vocab)
1
2
True
False
1
print(model['night'])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
array([ 0.0953326 ,  0.00227786, -0.63099104,  0.48497984,  0.61077046,
-0.20671842, -0.2864202 , -0.00851544, 0.36318386, 0.3499794 ,
-0.67137724, -0.05059881, -0.75054985, 0.39179742, 0.23554917,
-0.00135564, -0.16808018, 0.27810544, 0.28773504, -0.42447615,
-0.21546099, 0.39643624, -0.35647205, 0.11427329, -0.8091079 ,
0.68502676, 0.21528108, 0.13505526, 0.44624037, 0.09882572,
-0.6161567 , 0.27545458, -0.00874986, -0.47377393, 0.404326 ,
0.04802122, -0.18317538, -0.09473048, 0.45228255, 0.22224163,
-0.00220438, -0.05910338, 0.3433522 , 0.01136153, 0.12559271,
0.15975659, -0.13288054, 0.24637105, 0.03247422, -0.38466278,
-0.5199409 , -0.56950164, 0.04317402, 0.01742636, 0.37354326,
-0.857144 , -0.13501711, -0.2867473 , -0.0336968 , -0.05738431,
0.2521108 , -0.03732477, -0.53491974, -0.07037822, -0.564876 ,
0.21968082, 0.10176989, 0.09708861, -0.01507892, 0.44874096,
-0.603698 , -0.5272684 , 0.04438765, -0.11443205, -0.37223595,
0.13290597, 0.30462274, 0.1901602 , -0.07255627, 0.08489503,
0.48211902, -0.0182712 , -0.1890541 , 0.4491388 , -0.29322693,
-0.33450714, 0.11029933, 0.24498533, 0.377997 , 0.33057505,
-0.23820789, 0.10019816, 0.00383096, -0.17659418, -0.25245324,
0.5648699 , 0.29140565, 0.5146172 , 0.5881857 , 0.38764346],
dtype=float32)
1
print(model['nights'])
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
array([ 0.08310547,  0.00255469, -0.5438613 ,  0.41711542,  0.52543885,
-0.17962681, -0.24739201, -0.00658811, 0.3127566 , 0.30214837,
-0.58069485, -0.04493077, -0.64692456, 0.33895043, 0.19910343,
-0.00081991, -0.14691554, 0.23774558, 0.24622767, -0.366245 ,
-0.18553475, 0.3425331 , -0.30774567, 0.09903311, -0.69780546,
0.59015656, 0.18548216, 0.1167931 , 0.38456377, 0.084683 ,
-0.5286803 , 0.23848796, -0.00879034, -0.41040844, 0.34827596,
0.04112032, -0.15626806, -0.08284618, 0.38985735, 0.19218934,
-0.0009631 , -0.05129346, 0.29532096, 0.01136862, 0.10713599,
0.1378806 , -0.11797399, 0.21411985, 0.02733744, -0.32998133,
-0.45055205, -0.4899713 , 0.03825624, 0.01596748, 0.32250437,
-0.7385598 , -0.11651613, -0.24604279, -0.03137114, -0.04910181,
0.21684855, -0.0329251 , -0.4606339 , -0.0611559 , -0.487312 ,
0.18886864, 0.08784177, 0.08357754, -0.01117994, 0.38745624,
-0.52011096, -0.453825 , 0.03824352, -0.09949207, -0.32059684,
0.11355332, 0.2626802 , 0.16410257, -0.06057531, 0.07242386,
0.4162787 , -0.01578102, -0.16374995, 0.3860768 , -0.25174987,
-0.28764334, 0.09705427, 0.21252516, 0.3250878 , 0.28572875,
-0.2055463 , 0.08591434, 0.00200712, -0.15187213, -0.21731909,
0.4869826 , 0.25010216, 0.44192523, 0.5064299 , 0.33416855],
dtype=float32)

测试单词是否出现在词汇表中

1
print("word" in model.wv.vocab)
1
False

测试单词是否出现在词汇表中

1
print("word" in model)
1
True

相似操作

相似性操作的工作方式与word2vec相同。也可以使用词汇表外的单词,前提是他们在训练数据中至少有一个字符ngram

1
2
print("nights" in model.wv.vocab)
print("night" in model.wv.vocab)
1
2
False
True
1
print(model.similarity("night","nights"))
1
0.99999285

其他相似操作

1
print(model.most_similar("nights"))
1
2
3
4
5
6
7
8
9
10
[('night', 0.999992847442627),
('rights', 0.9999881982803345),
('flights', 0.9999872446060181),
('fighting', 0.9999870657920837),
('starting', 0.9999867677688599),
('hearing', 0.9999866485595703),
('expressed', 0.9999860525131226),
('overnight', 0.9999858736991882),
('negotiating', 0.9999858140945435),
('stand', 0.9999856948852539)]
1
print(model.n_similarity(['sushi', 'shop'], ['japanese', 'restaurant']))
1
0.9999506
1
print(model.doesnt_match("breakfast cereal dinner lunch".split()))
1
'lunch'
1
print(model.most_similar(positive = ['baghdad','england'],negative = ['london']))
1
2
3
4
5
6
7
8
9
10
[('temporary', 0.9996868371963501),
('Island', 0.9996848106384277),
('Anthony', 0.9996838569641113),
('only', 0.9996838569641113),
('last', 0.9996830821037292),
('Hamas', 0.9996793270111084),
('everything', 0.9996790885925293),
('met', 0.9996784329414368),
('Pentagon', 0.9996781349182129),
('campaign', 0.9996780157089233)]
1
print(model.accuracy(questions = datapath('questions-words.txt')))
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
Jupyter Notebook
FastText
(自动保存)
Current Kernel Logo
Python 3
File
Edit
View
Insert
Cell
Kernel
Widgets
Help

代码
from pprint import pprint as print
from gensim.models.fasttext import FastText as FT_gensim
from gensim.test.utils import datapath

#设置文件名
corpus_file = datapath('lee_background.cor')

model = FT_gensim(size = 100)

#建立词汇表
model.build_vocab(corpus_file = corpus_file)

#训练模型
model.train(
corpus_file = corpus_file,epochs = model.epochs,
total_examples = model.corpus_count,total_words = model.corpus_total_words
)

print(model)
from pprint import pprint as print
from gensim.models.fasttext import FastText as FT_gensim
from gensim.test.utils import datapath

#设置文件名
corpus_file = datapath('lee_background.cor')

model = FT_gensim(size = 100)

#建立词汇表
model.build_vocab(corpus_file = corpus_file)

#训练模型
model.train(
corpus_file = corpus_file,epochs = model.epochs,
total_examples = model.corpus_count,total_words = model.corpus_total_words
)

print(model)
<gensim.models.fasttext.FastText object at 0x000001B960B8D400>
#通过load和save方法保存和加载模型
import tempfile
import os
with tempfile.NamedTemporaryFile(prefix = 'saved_model_gensim-',delete = False) as tmp:
model.save(tmp.name,separately = [])

loaded_model = FT_gensim.load(tmp.name)
print(loaded_model)

os.unlink(tmp.name)
#通过load和save方法保存和加载模型
import tempfile
import os
with tempfile.NamedTemporaryFile(prefix = 'saved_model_gensim-',delete = False) as tmp:
model.save(tmp.name,separately = [])

loaded_model = FT_gensim.load(tmp.name)
print(loaded_model)

os.unlink(tmp.name)
<gensim.models.fasttext.FastText object at 0x000001B993579198>
print('night' in model.wv.vocab)
True
print('nights' in model.wv.vocab)
False
print(model['night'])
array([ 0.0953326 , 0.00227786, -0.63099104, 0.48497984, 0.61077046,
-0.20671842, -0.2864202 , -0.00851544, 0.36318386, 0.3499794 ,
-0.67137724, -0.05059881, -0.75054985, 0.39179742, 0.23554917,
-0.00135564, -0.16808018, 0.27810544, 0.28773504, -0.42447615,
-0.21546099, 0.39643624, -0.35647205, 0.11427329, -0.8091079 ,
0.68502676, 0.21528108, 0.13505526, 0.44624037, 0.09882572,
-0.6161567 , 0.27545458, -0.00874986, -0.47377393, 0.404326 ,
0.04802122, -0.18317538, -0.09473048, 0.45228255, 0.22224163,
-0.00220438, -0.05910338, 0.3433522 , 0.01136153, 0.12559271,
0.15975659, -0.13288054, 0.24637105, 0.03247422, -0.38466278,
-0.5199409 , -0.56950164, 0.04317402, 0.01742636, 0.37354326,
-0.857144 , -0.13501711, -0.2867473 , -0.0336968 , -0.05738431,
0.2521108 , -0.03732477, -0.53491974, -0.07037822, -0.564876 ,
0.21968082, 0.10176989, 0.09708861, -0.01507892, 0.44874096,
-0.603698 , -0.5272684 , 0.04438765, -0.11443205, -0.37223595,
0.13290597, 0.30462274, 0.1901602 , -0.07255627, 0.08489503,
0.48211902, -0.0182712 , -0.1890541 , 0.4491388 , -0.29322693,
-0.33450714, 0.11029933, 0.24498533, 0.377997 , 0.33057505,
-0.23820789, 0.10019816, 0.00383096, -0.17659418, -0.25245324,
0.5648699 , 0.29140565, 0.5146172 , 0.5881857 , 0.38764346],
dtype=float32)
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).
"""Entry point for launching an IPython kernel.
print(model['nights'])
array([ 0.08310547, 0.00255469, -0.5438613 , 0.41711542, 0.52543885,
-0.17962681, -0.24739201, -0.00658811, 0.3127566 , 0.30214837,
-0.58069485, -0.04493077, -0.64692456, 0.33895043, 0.19910343,
-0.00081991, -0.14691554, 0.23774558, 0.24622767, -0.366245 ,
-0.18553475, 0.3425331 , -0.30774567, 0.09903311, -0.69780546,
0.59015656, 0.18548216, 0.1167931 , 0.38456377, 0.084683 ,
-0.5286803 , 0.23848796, -0.00879034, -0.41040844, 0.34827596,
0.04112032, -0.15626806, -0.08284618, 0.38985735, 0.19218934,
-0.0009631 , -0.05129346, 0.29532096, 0.01136862, 0.10713599,
0.1378806 , -0.11797399, 0.21411985, 0.02733744, -0.32998133,
-0.45055205, -0.4899713 , 0.03825624, 0.01596748, 0.32250437,
-0.7385598 , -0.11651613, -0.24604279, -0.03137114, -0.04910181,
0.21684855, -0.0329251 , -0.4606339 , -0.0611559 , -0.487312 ,
0.18886864, 0.08784177, 0.08357754, -0.01117994, 0.38745624,
-0.52011096, -0.453825 , 0.03824352, -0.09949207, -0.32059684,
0.11355332, 0.2626802 , 0.16410257, -0.06057531, 0.07242386,
0.4162787 , -0.01578102, -0.16374995, 0.3860768 , -0.25174987,
-0.28764334, 0.09705427, 0.21252516, 0.3250878 , 0.28572875,
-0.2055463 , 0.08591434, 0.00200712, -0.15187213, -0.21731909,
0.4869826 , 0.25010216, 0.44192523, 0.5064299 , 0.33416855],
dtype=float32)
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `__getitem__` (Method will be removed in 4.0.0, use self.wv.__getitem__() instead).
"""Entry point for launching an IPython kernel.
print("word" in model.wv.vocab)
False
n model
print("word" in model)
True
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `__contains__` (Method will be removed in 4.0.0, use self.wv.__contains__() instead).
"""Entry point for launching an IPython kernel.
print("nights" in model.wv.vocab)
False
print("night" in model.wv.vocab)
True
print(model.similarity("night","nights"))
0.99999285
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `similarity` (Method will be removed in 4.0.0, use self.wv.similarity() instead).
"""Entry point for launching an IPython kernel.
print(model.most_similar("nights"))
print(model.most_similar("nights"))
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).
"""Entry point for launching an IPython kernel.
[('night', 0.999992847442627),
('rights', 0.9999881982803345),
('flights', 0.9999872446060181),
('fighting', 0.9999870657920837),
('starting', 0.9999867677688599),
('hearing', 0.9999866485595703),
('expressed', 0.9999860525131226),
('overnight', 0.9999858736991882),
('negotiating', 0.9999858140945435),
('stand', 0.9999856948852539)]
print(model.n_similarity(['sushi','shop'],['japanese','restaurant']))
0.9999506
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `n_similarity` (Method will be removed in 4.0.0, use self.wv.n_similarity() instead).
"""Entry point for launching an IPython kernel.
print(model.doesnt_match("breakfast cereal dinner lunch".split()))
print(model.doesnt_match("breakfast cereal dinner lunch".split()))
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `doesnt_match` (Method will be removed in 4.0.0, use self.wv.doesnt_match() instead).
"""Entry point for launching an IPython kernel.
'lunch'
D:\anaconda\anaconda3\lib\site-packages\gensim\models\keyedvectors.py:877: FutureWarning: arrays to stack must be passed as a "sequence" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.
vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)
print(model.most_similar(positive = ['baghdad','england'],negative = ['london']))
print(model.most_similar(positive = ['baghdad','england'],negative = ['london']))
[('temporary', 0.9996868371963501),
('Island', 0.9996848106384277),
('Anthony', 0.9996838569641113),
('only', 0.9996838569641113),
('last', 0.9996830821037292),
('Hamas', 0.9996793270111084),
('everything', 0.9996790885925293),
('met', 0.9996784329414368),
('Pentagon', 0.9996781349182129),
('campaign', 0.9996780157089233)]
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).
"""Entry point for launching an IPython kernel.
print(model.accuracy(questions = datapath('questions-words.txt')))
print(model.accuracy(questions = datapath('questions-words.txt')))
D:\anaconda\anaconda3\lib\site-packages\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `accuracy` (Method will be removed in 4.0.0, use self.wv.accuracy() instead).
"""Entry point for launching an IPython kernel.
[{'correct': [], 'incorrect': [], 'section': 'capital-common-countries'},
{'correct': [], 'incorrect': [], 'section': 'capital-world'},
{'correct': [], 'incorrect': [], 'section': 'currency'},
{'correct': [], 'incorrect': [], 'section': 'city-in-state'},
{'correct': [],
'incorrect': [('HE', 'SHE', 'HIS', 'HER'), ('HIS', 'HER', 'HE', 'SHE')],
'section': 'family'},
{'correct': [], 'incorrect': [], 'section': 'gram1-adjective-to-adverb'},
{'correct': [], 'incorrect': [], 'section': 'gram2-opposite'},
{'correct': [('GOOD', 'BETTER', 'GREAT', 'GREATER'),
('GREAT', 'GREATER', 'LOW', 'LOWER'),
('LONG', 'LONGER', 'GREAT', 'GREATER')],
'incorrect': [('GOOD', 'BETTER', 'LONG', 'LONGER'),
('GOOD', 'BETTER', 'LOW', 'LOWER'),
('GREAT', 'GREATER', 'LONG', 'LONGER'),
('GREAT', 'GREATER', 'GOOD', 'BETTER'),
('LONG', 'LONGER', 'LOW', 'LOWER'),
('LONG', 'LONGER', 'GOOD', 'BETTER'),
('LOW', 'LOWER', 'GOOD', 'BETTER'),
('LOW', 'LOWER', 'GREAT', 'GREATER'),
('LOW', 'LOWER', 'LONG', 'LONGER')],
'section': 'gram3-comparative'},
{'correct': [('GREAT', 'GREATEST', 'LARGE', 'LARGEST')],
'incorrect': [('BIG', 'BIGGEST', 'GOOD', 'BEST'),
('BIG', 'BIGGEST', 'GREAT', 'GREATEST'),
('BIG', 'BIGGEST', 'LARGE', 'LARGEST'),
('GOOD', 'BEST', 'GREAT', 'GREATEST'),
('GOOD', 'BEST', 'LARGE', 'LARGEST'),
('GOOD', 'BEST', 'BIG', 'BIGGEST'),
('GREAT', 'GREATEST', 'BIG', 'BIGGEST'),
('GREAT', 'GREATEST', 'GOOD', 'BEST'),
('LARGE', 'LARGEST', 'BIG', 'BIGGEST'),
('LARGE', 'LARGEST', 'GOOD', 'BEST'),
('LARGE', 'LARGEST', 'GREAT', 'GREATEST')],
'section': 'gram4-superlative'},
{'correct': [('LOOK', 'LOOKING', 'SAY', 'SAYING'),
('PLAY', 'PLAYING', 'SAY', 'SAYING'),
('PLAY', 'PLAYING', 'LOOK', 'LOOKING'),
('SAY', 'SAYING', 'LOOK', 'LOOKING'),
('SAY', 'SAYING', 'PLAY', 'PLAYING')],
'incorrect': [('GO', 'GOING', 'LOOK', 'LOOKING'),
('GO', 'GOING', 'PLAY', 'PLAYING'),
('GO', 'GOING', 'RUN', 'RUNNING'),
('GO', 'GOING', 'SAY', 'SAYING'),
('LOOK', 'LOOKING', 'PLAY', 'PLAYING'),
('LOOK', 'LOOKING', 'RUN', 'RUNNING'),
('LOOK', 'LOOKING', 'GO', 'GOING'),
('PLAY', 'PLAYING', 'RUN', 'RUNNING'),
('PLAY', 'PLAYING', 'GO', 'GOING'),
('RUN', 'RUNNING', 'SAY', 'SAYING'),
('RUN', 'RUNNING', 'GO', 'GOING'),
('RUN', 'RUNNING', 'LOOK', 'LOOKING'),
('RUN', 'RUNNING', 'PLAY', 'PLAYING'),
('SAY', 'SAYING', 'GO', 'GOING'),
('SAY', 'SAYING', 'RUN', 'RUNNING')],
'section': 'gram5-present-participle'},
{'correct': [('AUSTRALIA', 'AUSTRALIAN', 'INDIA', 'INDIAN'),
('AUSTRALIA', 'AUSTRALIAN', 'ISRAEL', 'ISRAELI'),
('FRANCE', 'FRENCH', 'INDIA', 'INDIAN'),
('FRANCE', 'FRENCH', 'ISRAEL', 'ISRAELI'),
('INDIA', 'INDIAN', 'ISRAEL', 'ISRAELI'),
('INDIA', 'INDIAN', 'AUSTRALIA', 'AUSTRALIAN'),
('ISRAEL', 'ISRAELI', 'AUSTRALIA', 'AUSTRALIAN'),
('ISRAEL', 'ISRAELI', 'INDIA', 'INDIAN')],
'incorrect': [('AUSTRALIA', 'AUSTRALIAN', 'FRANCE', 'FRENCH'),
('AUSTRALIA', 'AUSTRALIAN', 'SWITZERLAND', 'SWISS'),
('FRANCE', 'FRENCH', 'SWITZERLAND', 'SWISS'),
('FRANCE', 'FRENCH', 'AUSTRALIA', 'AUSTRALIAN'),
('INDIA', 'INDIAN', 'SWITZERLAND', 'SWISS'),
('INDIA', 'INDIAN', 'FRANCE', 'FRENCH'),
('ISRAEL', 'ISRAELI', 'SWITZERLAND', 'SWISS'),
('ISRAEL', 'ISRAELI', 'FRANCE', 'FRENCH'),
('SWITZERLAND', 'SWISS', 'AUSTRALIA', 'AUSTRALIAN'),
('SWITZERLAND', 'SWISS', 'FRANCE', 'FRENCH'),
('SWITZERLAND', 'SWISS', 'INDIA', 'INDIAN'),
('SWITZERLAND', 'SWISS', 'ISRAEL', 'ISRAELI')],
'section': 'gram6-nationality-adjective'},
{'correct': [('PAYING', 'PAID', 'SAYING', 'SAID')],
'incorrect': [('GOING', 'WENT', 'PAYING', 'PAID'),
('GOING', 'WENT', 'PLAYING', 'PLAYED'),
('GOING', 'WENT', 'SAYING', 'SAID'),
('GOING', 'WENT', 'TAKING', 'TOOK'),
('PAYING', 'PAID', 'PLAYING', 'PLAYED'),
('PAYING', 'PAID', 'TAKING', 'TOOK'),
('PAYING', 'PAID', 'GOING', 'WENT'),
('PLAYING', 'PLAYED', 'SAYING', 'SAID'),
('PLAYING', 'PLAYED', 'TAKING', 'TOOK'),
('PLAYING', 'PLAYED', 'GOING', 'WENT'),
('PLAYING', 'PLAYED', 'PAYING', 'PAID'),
('SAYING', 'SAID', 'TAKING', 'TOOK'),
('SAYING', 'SAID', 'GOING', 'WENT'),
('SAYING', 'SAID', 'PAYING', 'PAID'),
('SAYING', 'SAID', 'PLAYING', 'PLAYED'),
('TAKING', 'TOOK', 'GOING', 'WENT'),
('TAKING', 'TOOK', 'PAYING', 'PAID'),
('TAKING', 'TOOK', 'PLAYING', 'PLAYED'),
('TAKING', 'TOOK', 'SAYING', 'SAID')],
'section': 'gram7-past-tense'},
{'correct': [('MAN', 'MEN', 'CHILD', 'CHILDREN')],
'incorrect': [('BUILDING', 'BUILDINGS', 'CAR', 'CARS'),
('BUILDING', 'BUILDINGS', 'CHILD', 'CHILDREN'),
('BUILDING', 'BUILDINGS', 'MAN', 'MEN'),
('CAR', 'CARS', 'CHILD', 'CHILDREN'),
('CAR', 'CARS', 'MAN', 'MEN'),
('CAR', 'CARS', 'BUILDING', 'BUILDINGS'),
('CHILD', 'CHILDREN', 'MAN', 'MEN'),
('CHILD', 'CHILDREN', 'BUILDING', 'BUILDINGS'),
('CHILD', 'CHILDREN', 'CAR', 'CARS'),
('MAN', 'MEN', 'BUILDING', 'BUILDINGS'),
('MAN', 'MEN', 'CAR', 'CARS')],
'section': 'gram8-plural'},
{'correct': [], 'incorrect': [], 'section': 'gram9-plural-verbs'},
{'correct': [('GOOD', 'BETTER', 'GREAT', 'GREATER'),
('GREAT', 'GREATER', 'LOW', 'LOWER'),
('LONG', 'LONGER', 'GREAT', 'GREATER'),
('GREAT', 'GREATEST', 'LARGE', 'LARGEST'),
('LOOK', 'LOOKING', 'SAY', 'SAYING'),
('PLAY', 'PLAYING', 'SAY', 'SAYING'),
('PLAY', 'PLAYING', 'LOOK', 'LOOKING'),
('SAY', 'SAYING', 'LOOK', 'LOOKING'),
('SAY', 'SAYING', 'PLAY', 'PLAYING'),
('AUSTRALIA', 'AUSTRALIAN', 'INDIA', 'INDIAN'),
('AUSTRALIA', 'AUSTRALIAN', 'ISRAEL', 'ISRAELI'),
('FRANCE', 'FRENCH', 'INDIA', 'INDIAN'),
('FRANCE', 'FRENCH', 'ISRAEL', 'ISRAELI'),
('INDIA', 'INDIAN', 'ISRAEL', 'ISRAELI'),
('INDIA', 'INDIAN', 'AUSTRALIA', 'AUSTRALIAN'),
('ISRAEL', 'ISRAELI', 'AUSTRALIA', 'AUSTRALIAN'),
('ISRAEL', 'ISRAELI', 'INDIA', 'INDIAN'),
('PAYING', 'PAID', 'SAYING', 'SAID'),
('MAN', 'MEN', 'CHILD', 'CHILDREN')],
'incorrect': [('HE', 'SHE', 'HIS', 'HER'),
('HIS', 'HER', 'HE', 'SHE'),
('GOOD', 'BETTER', 'LONG', 'LONGER'),
('GOOD', 'BETTER', 'LOW', 'LOWER'),
('GREAT', 'GREATER', 'LONG', 'LONGER'),
('GREAT', 'GREATER', 'GOOD', 'BETTER'),
('LONG', 'LONGER', 'LOW', 'LOWER'),
('LONG', 'LONGER', 'GOOD', 'BETTER'),
('LOW', 'LOWER', 'GOOD', 'BETTER'),
('LOW', 'LOWER', 'GREAT', 'GREATER'),
('LOW', 'LOWER', 'LONG', 'LONGER'),
('BIG', 'BIGGEST', 'GOOD', 'BEST'),
('BIG', 'BIGGEST', 'GREAT', 'GREATEST'),
('BIG', 'BIGGEST', 'LARGE', 'LARGEST'),
('GOOD', 'BEST', 'GREAT', 'GREATEST'),
('GOOD', 'BEST', 'LARGE', 'LARGEST'),
('GOOD', 'BEST', 'BIG', 'BIGGEST'),
('GREAT', 'GREATEST', 'BIG', 'BIGGEST'),
('GREAT', 'GREATEST', 'GOOD', 'BEST'),
('LARGE', 'LARGEST', 'BIG', 'BIGGEST'),
('LARGE', 'LARGEST', 'GOOD', 'BEST'),
('LARGE', 'LARGEST', 'GREAT', 'GREATEST'),
('GO', 'GOING', 'LOOK', 'LOOKING'),
('GO', 'GOING', 'PLAY', 'PLAYING'),
('GO', 'GOING', 'RUN', 'RUNNING'),
('GO', 'GOING', 'SAY', 'SAYING'),
('LOOK', 'LOOKING', 'PLAY', 'PLAYING'),
('LOOK', 'LOOKING', 'RUN', 'RUNNING'),
('LOOK', 'LOOKING', 'GO', 'GOING'),
('PLAY', 'PLAYING', 'RUN', 'RUNNING'),
('PLAY', 'PLAYING', 'GO', 'GOING'),
('RUN', 'RUNNING', 'SAY', 'SAYING'),
('RUN', 'RUNNING', 'GO', 'GOING'),
('RUN', 'RUNNING', 'LOOK', 'LOOKING'),
('RUN', 'RUNNING', 'PLAY', 'PLAYING'),
('SAY', 'SAYING', 'GO', 'GOING'),
('SAY', 'SAYING', 'RUN', 'RUNNING'),
('AUSTRALIA', 'AUSTRALIAN', 'FRANCE', 'FRENCH'),
('AUSTRALIA', 'AUSTRALIAN', 'SWITZERLAND', 'SWISS'),
('FRANCE', 'FRENCH', 'SWITZERLAND', 'SWISS'),
('FRANCE', 'FRENCH', 'AUSTRALIA', 'AUSTRALIAN'),
('INDIA', 'INDIAN', 'SWITZERLAND', 'SWISS'),
('INDIA', 'INDIAN', 'FRANCE', 'FRENCH'),
('ISRAEL', 'ISRAELI', 'SWITZERLAND', 'SWISS'),
('ISRAEL', 'ISRAELI', 'FRANCE', 'FRENCH'),
('SWITZERLAND', 'SWISS', 'AUSTRALIA', 'AUSTRALIAN'),
('SWITZERLAND', 'SWISS', 'FRANCE', 'FRENCH'),
('SWITZERLAND', 'SWISS', 'INDIA', 'INDIAN'),
('SWITZERLAND', 'SWISS', 'ISRAEL', 'ISRAELI'),
('GOING', 'WENT', 'PAYING', 'PAID'),
('GOING', 'WENT', 'PLAYING', 'PLAYED'),
('GOING', 'WENT', 'SAYING', 'SAID'),
('GOING', 'WENT', 'TAKING', 'TOOK'),
('PAYING', 'PAID', 'PLAYING', 'PLAYED'),
('PAYING', 'PAID', 'TAKING', 'TOOK'),
('PAYING', 'PAID', 'GOING', 'WENT'),
('PLAYING', 'PLAYED', 'SAYING', 'SAID'),
('PLAYING', 'PLAYED', 'TAKING', 'TOOK'),
('PLAYING', 'PLAYED', 'GOING', 'WENT'),
('PLAYING', 'PLAYED', 'PAYING', 'PAID'),
('SAYING', 'SAID', 'TAKING', 'TOOK'),
('SAYING', 'SAID', 'GOING', 'WENT'),
('SAYING', 'SAID', 'PAYING', 'PAID'),
('SAYING', 'SAID', 'PLAYING', 'PLAYED'),
('TAKING', 'TOOK', 'GOING', 'WENT'),
('TAKING', 'TOOK', 'PAYING', 'PAID'),
('TAKING', 'TOOK', 'PLAYING', 'PLAYED'),
('TAKING', 'TOOK', 'SAYING', 'SAID'),
('BUILDING', 'BUILDINGS', 'CAR', 'CARS'),
('BUILDING', 'BUILDINGS', 'CHILD', 'CHILDREN'),
('BUILDING', 'BUILDINGS', 'MAN', 'MEN'),
('CAR', 'CARS', 'CHILD', 'CHILDREN'),
('CAR', 'CARS', 'MAN', 'MEN'),
('CAR', 'CARS', 'BUILDING', 'BUILDINGS'),
('CHILD', 'CHILDREN', 'MAN', 'MEN'),
('CHILD', 'CHILDREN', 'BUILDING', 'BUILDINGS'),
('CHILD', 'CHILDREN', 'CAR', 'CARS'),
('MAN', 'MEN', 'BUILDING', 'BUILDINGS'),
('MAN', 'MEN', 'CAR', 'CARS')],
'section': 'total'}]