Tom Aarsen commited on
Commit
f9c9b72
·
1 Parent(s): 6e99faa

Reflect that JW300 was removed

Browse files
Files changed (1) hide show
  1. train.py +0 -9
train.py CHANGED
@@ -66,13 +66,6 @@ def main():
66
  global_voices_eval_dataset: Dataset = global_voices_dataset_dict["test"]
67
  print("Loaded global voices dataset.")
68
 
69
- print("Loading jw300 dataset...")
70
- jw300_dataset = load_dataset("sentence-transformers/parallel-sentences-jw300", "all", split="train")
71
- jw300_dataset_dict = jw300_dataset.train_test_split(test_size=10_000, seed=12)
72
- jw300_train_dataset: Dataset = jw300_dataset_dict["train"]
73
- jw300_eval_dataset: Dataset = jw300_dataset_dict["test"]
74
- print("Loaded jw300 dataset.")
75
-
76
  print("Loading muse dataset...")
77
  muse_dataset = load_dataset("sentence-transformers/parallel-sentences-muse", split="train")
78
  muse_dataset_dict = muse_dataset.train_test_split(test_size=10_000, seed=12)
@@ -168,7 +161,6 @@ def main():
168
  "talks": talks_train_dataset,
169
  "europarl": europarl_train_dataset,
170
  "global_voices": global_voices_train_dataset,
171
- "jw300": jw300_train_dataset,
172
  "muse": muse_train_dataset,
173
  "wikimatrix": wikimatrix_train_dataset,
174
  "opensubtitles": opensubtitles_train_dataset,
@@ -189,7 +181,6 @@ def main():
189
  "talks": talks_eval_dataset,
190
  "europarl": europarl_eval_dataset,
191
  "global_voices": global_voices_eval_dataset,
192
- "jw300": jw300_eval_dataset,
193
  "muse": muse_eval_dataset,
194
  "wikimatrix": wikimatrix_eval_dataset,
195
  "opensubtitles": opensubtitles_eval_dataset,
 
66
  global_voices_eval_dataset: Dataset = global_voices_dataset_dict["test"]
67
  print("Loaded global voices dataset.")
68
 
 
 
 
 
 
 
 
69
  print("Loading muse dataset...")
70
  muse_dataset = load_dataset("sentence-transformers/parallel-sentences-muse", split="train")
71
  muse_dataset_dict = muse_dataset.train_test_split(test_size=10_000, seed=12)
 
161
  "talks": talks_train_dataset,
162
  "europarl": europarl_train_dataset,
163
  "global_voices": global_voices_train_dataset,
 
164
  "muse": muse_train_dataset,
165
  "wikimatrix": wikimatrix_train_dataset,
166
  "opensubtitles": opensubtitles_train_dataset,
 
181
  "talks": talks_eval_dataset,
182
  "europarl": europarl_eval_dataset,
183
  "global_voices": global_voices_eval_dataset,
 
184
  "muse": muse_eval_dataset,
185
  "wikimatrix": wikimatrix_eval_dataset,
186
  "opensubtitles": opensubtitles_eval_dataset,