Tom Aarsen
commited on
Commit
·
f9c9b72
1
Parent(s):
6e99faa
Reflect that JW300 was removed
Browse files
train.py
CHANGED
@@ -66,13 +66,6 @@ def main():
|
|
66 |
global_voices_eval_dataset: Dataset = global_voices_dataset_dict["test"]
|
67 |
print("Loaded global voices dataset.")
|
68 |
|
69 |
-
print("Loading jw300 dataset...")
|
70 |
-
jw300_dataset = load_dataset("sentence-transformers/parallel-sentences-jw300", "all", split="train")
|
71 |
-
jw300_dataset_dict = jw300_dataset.train_test_split(test_size=10_000, seed=12)
|
72 |
-
jw300_train_dataset: Dataset = jw300_dataset_dict["train"]
|
73 |
-
jw300_eval_dataset: Dataset = jw300_dataset_dict["test"]
|
74 |
-
print("Loaded jw300 dataset.")
|
75 |
-
|
76 |
print("Loading muse dataset...")
|
77 |
muse_dataset = load_dataset("sentence-transformers/parallel-sentences-muse", split="train")
|
78 |
muse_dataset_dict = muse_dataset.train_test_split(test_size=10_000, seed=12)
|
@@ -168,7 +161,6 @@ def main():
|
|
168 |
"talks": talks_train_dataset,
|
169 |
"europarl": europarl_train_dataset,
|
170 |
"global_voices": global_voices_train_dataset,
|
171 |
-
"jw300": jw300_train_dataset,
|
172 |
"muse": muse_train_dataset,
|
173 |
"wikimatrix": wikimatrix_train_dataset,
|
174 |
"opensubtitles": opensubtitles_train_dataset,
|
@@ -189,7 +181,6 @@ def main():
|
|
189 |
"talks": talks_eval_dataset,
|
190 |
"europarl": europarl_eval_dataset,
|
191 |
"global_voices": global_voices_eval_dataset,
|
192 |
-
"jw300": jw300_eval_dataset,
|
193 |
"muse": muse_eval_dataset,
|
194 |
"wikimatrix": wikimatrix_eval_dataset,
|
195 |
"opensubtitles": opensubtitles_eval_dataset,
|
|
|
66 |
global_voices_eval_dataset: Dataset = global_voices_dataset_dict["test"]
|
67 |
print("Loaded global voices dataset.")
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
print("Loading muse dataset...")
|
70 |
muse_dataset = load_dataset("sentence-transformers/parallel-sentences-muse", split="train")
|
71 |
muse_dataset_dict = muse_dataset.train_test_split(test_size=10_000, seed=12)
|
|
|
161 |
"talks": talks_train_dataset,
|
162 |
"europarl": europarl_train_dataset,
|
163 |
"global_voices": global_voices_train_dataset,
|
|
|
164 |
"muse": muse_train_dataset,
|
165 |
"wikimatrix": wikimatrix_train_dataset,
|
166 |
"opensubtitles": opensubtitles_train_dataset,
|
|
|
181 |
"talks": talks_eval_dataset,
|
182 |
"europarl": europarl_eval_dataset,
|
183 |
"global_voices": global_voices_eval_dataset,
|
|
|
184 |
"muse": muse_eval_dataset,
|
185 |
"wikimatrix": wikimatrix_eval_dataset,
|
186 |
"opensubtitles": opensubtitles_eval_dataset,
|