Update run_speech_recognition_ctc.py
Browse files
run_speech_recognition_ctc.py
CHANGED
@@ -401,8 +401,7 @@ def main():
|
|
401 |
data_args.dataset_name,
|
402 |
data_args.dataset_config_name,
|
403 |
split=data_args.train_split_name,
|
404 |
-
use_auth_token=data_args.use_auth_token
|
405 |
-
cache_dir="../downloaded_data/"
|
406 |
)
|
407 |
|
408 |
if data_args.audio_column_name not in raw_datasets["train"].column_names:
|
@@ -428,7 +427,6 @@ def main():
|
|
428 |
data_args.dataset_config_name,
|
429 |
split=data_args.eval_split_name,
|
430 |
use_auth_token=data_args.use_auth_token,
|
431 |
-
cache_dir="../downloaded_data/"
|
432 |
)
|
433 |
|
434 |
if data_args.max_eval_samples is not None:
|
@@ -609,7 +607,7 @@ def main():
|
|
609 |
prepare_dataset,
|
610 |
remove_columns=next(iter(raw_datasets.values())).column_names,
|
611 |
batch_size=-1,
|
612 |
-
desc="preprocess datasets"
|
613 |
)
|
614 |
|
615 |
def is_audio_in_length_range(length):
|
|
|
401 |
data_args.dataset_name,
|
402 |
data_args.dataset_config_name,
|
403 |
split=data_args.train_split_name,
|
404 |
+
use_auth_token=data_args.use_auth_token
|
|
|
405 |
)
|
406 |
|
407 |
if data_args.audio_column_name not in raw_datasets["train"].column_names:
|
|
|
427 |
data_args.dataset_config_name,
|
428 |
split=data_args.eval_split_name,
|
429 |
use_auth_token=data_args.use_auth_token,
|
|
|
430 |
)
|
431 |
|
432 |
if data_args.max_eval_samples is not None:
|
|
|
607 |
prepare_dataset,
|
608 |
remove_columns=next(iter(raw_datasets.values())).column_names,
|
609 |
batch_size=-1,
|
610 |
+
desc="preprocess datasets"
|
611 |
)
|
612 |
|
613 |
def is_audio_in_length_range(length):
|