up (#14008)
This commit is contained in:
parent
7604557e44
commit
7fb2a8b3d9
|
@ -66,7 +66,7 @@ be installed as follows: ``apt install libsndfile1-dev``
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
||||||
|
@ -98,7 +98,7 @@ be installed as follows: ``apt install libsndfile1-dev``
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
||||||
|
|
|
@ -68,7 +68,7 @@ predicted token ids.
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
>>> inputs = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="pt")
|
||||||
|
@ -86,7 +86,7 @@ predicted token ids.
|
||||||
>>> from datasets import load_dataset
|
>>> from datasets import load_dataset
|
||||||
>>> from transformers import pipeline
|
>>> from transformers import pipeline
|
||||||
|
|
||||||
>>> librispeech_en = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> librispeech_en = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> asr = pipeline("automatic-speech-recognition", model="facebook/s2t-wav2vec2-large-en-de", feature_extractor="facebook/s2t-wav2vec2-large-en-de")
|
>>> asr = pipeline("automatic-speech-recognition", model="facebook/s2t-wav2vec2-large-en-de", feature_extractor="facebook/s2t-wav2vec2-large-en-de")
|
||||||
|
|
||||||
>>> translation_de = asr(librispeech_en[0]["file"])
|
>>> translation_de = asr(librispeech_en[0]["file"])
|
||||||
|
|
|
@ -391,7 +391,7 @@ class ExamplesTests(TestCasePlus):
|
||||||
run_speech_recognition_ctc.py
|
run_speech_recognition_ctc.py
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
||||||
--dataset_name patrickvonplaten/librispeech_asr_dummy
|
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--eval_split_name validation
|
--eval_split_name validation
|
||||||
|
@ -460,7 +460,7 @@ class ExamplesTests(TestCasePlus):
|
||||||
run_wav2vec2_pretraining_no_trainer.py
|
run_wav2vec2_pretraining_no_trainer.py
|
||||||
--output_dir {tmp_dir}
|
--output_dir {tmp_dir}
|
||||||
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
--model_name_or_path hf-internal-testing/tiny-random-wav2vec2
|
||||||
--dataset_name patrickvonplaten/librispeech_asr_dummy
|
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||||
--dataset_config_names clean
|
--dataset_config_names clean
|
||||||
--dataset_split_names validation
|
--dataset_split_names validation
|
||||||
--learning_rate 1e-4
|
--learning_rate 1e-4
|
||||||
|
|
|
@ -155,7 +155,7 @@ run_asr.py \
|
||||||
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||||
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
||||||
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
||||||
--dataset_name=patrickvonplaten/librispeech_asr_dummy --dataset_config_name=clean \
|
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
|
||||||
--train_split_name=validation --validation_split_name=validation --orthography=timit \
|
--train_split_name=validation --validation_split_name=validation --orthography=timit \
|
||||||
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
|
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
|
||||||
--deepspeed ds_config_wav2vec2_zero2.json
|
--deepspeed ds_config_wav2vec2_zero2.json
|
||||||
|
@ -179,7 +179,7 @@ run_asr.py \
|
||||||
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
--per_device_eval_batch_size=2 --evaluation_strategy=steps --save_steps=500 --eval_steps=100 \
|
||||||
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
--logging_steps=5 --learning_rate=5e-4 --warmup_steps=3000 \
|
||||||
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
--model_name_or_path=patrickvonplaten/wav2vec2_tiny_random_robust \
|
||||||
--dataset_name=patrickvonplaten/librispeech_asr_dummy --dataset_config_name=clean \
|
--dataset_name=hf-internal-testing/librispeech_asr_dummy --dataset_config_name=clean \
|
||||||
--train_split_name=validation --validation_split_name=validation --orthography=timit \
|
--train_split_name=validation --validation_split_name=validation --orthography=timit \
|
||||||
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
|
--preprocessing_num_workers=1 --group_by_length --freeze_feature_extractor --verbose_logging \
|
||||||
--deepspeed ds_config_wav2vec2_zero3.json
|
--deepspeed ds_config_wav2vec2_zero3.json
|
||||||
|
|
|
@ -155,7 +155,7 @@ class TestDeepSpeedWav2Vec2(TestCasePlus):
|
||||||
output_dir = self.get_auto_remove_tmp_dir("./xxx", after=False)
|
output_dir = self.get_auto_remove_tmp_dir("./xxx", after=False)
|
||||||
args = f"""
|
args = f"""
|
||||||
--model_name_or_path {model_name}
|
--model_name_or_path {model_name}
|
||||||
--dataset_name patrickvonplaten/librispeech_asr_dummy
|
--dataset_name hf-internal-testing/librispeech_asr_dummy
|
||||||
--dataset_config_name clean
|
--dataset_config_name clean
|
||||||
--train_split_name validation
|
--train_split_name validation
|
||||||
--validation_split_name validation
|
--validation_split_name validation
|
||||||
|
|
|
@ -953,7 +953,7 @@ class HubertModel(HubertPreTrainedModel):
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
@ -1059,7 +1059,7 @@ class HubertForCTC(HubertPreTrainedModel):
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
|
|
@ -1412,7 +1412,7 @@ class TFHubertModel(TFHubertPreTrainedModel):
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
||||||
|
@ -1522,7 +1522,7 @@ class TFHubertForCTC(TFHubertPreTrainedModel):
|
||||||
... batch["speech"] = speech
|
... batch["speech"] = speech
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
||||||
|
|
|
@ -414,7 +414,7 @@ class SpeechEncoderDecoderModel(PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
|
|
@ -1306,7 +1306,7 @@ class Speech2TextForConditionalGeneration(Speech2TextPreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1
|
>>> input_features = processor(ds["speech"][0], sampling_rate=16000, return_tensors="pt").input_features # Batch size 1
|
||||||
|
|
|
@ -944,7 +944,7 @@ FLAX_WAV2VEC2_MODEL_DOCSTRING = """
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
|
||||||
|
@ -1045,7 +1045,7 @@ FLAX_WAV2VEC2_FOR_CTC_DOCSTRING = """
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], sampling_rate=16_000, return_tensors="np").input_values # Batch size 1
|
||||||
|
@ -1233,7 +1233,7 @@ FLAX_WAV2VEC2_FOR_PRETRAINING_DOCSTRING = """
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="np").input_values # Batch size 1
|
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="np").input_values # Batch size 1
|
||||||
|
|
|
@ -1406,7 +1406,7 @@ class TFWav2Vec2Model(TFWav2Vec2PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
||||||
|
@ -1516,7 +1516,7 @@ class TFWav2Vec2ForCTC(TFWav2Vec2PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="tf").input_values # Batch size 1
|
||||||
|
|
|
@ -1146,7 +1146,7 @@ class Wav2Vec2Model(Wav2Vec2PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
@ -1280,7 +1280,7 @@ class Wav2Vec2ForPreTraining(Wav2Vec2PreTrainedModel):
|
||||||
... return batch
|
... return batch
|
||||||
|
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = feature_extractor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
@ -1442,7 +1442,7 @@ class Wav2Vec2ForMaskedLM(Wav2Vec2PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
@ -1536,7 +1536,7 @@ class Wav2Vec2ForCTC(Wav2Vec2PreTrainedModel):
|
||||||
>>> batch["speech"] = speech
|
>>> batch["speech"] = speech
|
||||||
>>> return batch
|
>>> return batch
|
||||||
|
|
||||||
>>> ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
>>> ds = ds.map(map_to_array)
|
>>> ds = ds.map(map_to_array)
|
||||||
|
|
||||||
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
>>> input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
|
||||||
|
|
|
@ -366,7 +366,7 @@ class FlaxWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
|
||||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||||
|
|
||||||
|
|
|
@ -623,7 +623,7 @@ class HubertModelIntegrationTest(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
|
||||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||||
|
|
||||||
|
|
|
@ -723,7 +723,7 @@ class Speech2TextModelIntegrationTests(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
ds = ds.sort("id").select(range(num_samples)).map(map_to_array)
|
ds = ds.sort("id").select(range(num_samples)).map(map_to_array)
|
||||||
|
|
||||||
return ds["speech"][:num_samples]
|
return ds["speech"][:num_samples]
|
||||||
|
|
|
@ -489,7 +489,7 @@ class TFHubertModelIntegrationTest(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
|
||||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||||
|
|
||||||
|
|
|
@ -489,7 +489,7 @@ class TFWav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
|
||||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||||
|
|
||||||
|
|
|
@ -910,7 +910,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
|
||||||
batch["speech"] = speech
|
batch["speech"] = speech
|
||||||
return batch
|
return batch
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
|
|
||||||
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
ds = ds.filter(lambda x: x["id"] in ids).sort("id").map(map_to_array)
|
||||||
|
|
||||||
|
|
|
@ -62,7 +62,7 @@ class AudioClassificationPipelineTests(unittest.TestCase, metaclass=PipelineTest
|
||||||
)
|
)
|
||||||
|
|
||||||
# test with a local file
|
# test with a local file
|
||||||
dataset = datasets.load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
|
dataset = datasets.load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
||||||
filename = dataset[0]["file"]
|
filename = dataset[0]["file"]
|
||||||
output = audio_classifier(filename)
|
output = audio_classifier(filename)
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
|
|
|
@ -74,7 +74,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||||
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||||
filename = ds[40]["file"]
|
filename = ds[40]["file"]
|
||||||
output = speech_recognizer(filename)
|
output = speech_recognizer(filename)
|
||||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||||
|
@ -92,7 +92,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||||
|
|
||||||
from datasets import load_dataset
|
from datasets import load_dataset
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||||
filename = ds[40]["file"]
|
filename = ds[40]["file"]
|
||||||
output = speech_recognizer(filename)
|
output = speech_recognizer(filename)
|
||||||
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
|
self.assertEqual(output, {"text": 'Ein Mann sagte zum Universum : " Sir, ich existiert! "'})
|
||||||
|
@ -114,7 +114,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||||
output = asr(waveform)
|
output = asr(waveform)
|
||||||
self.assertEqual(output, {"text": ""})
|
self.assertEqual(output, {"text": ""})
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||||
filename = ds[40]["file"]
|
filename = ds[40]["file"]
|
||||||
output = asr(filename)
|
output = asr(filename)
|
||||||
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
self.assertEqual(output, {"text": "A MAN SAID TO THE UNIVERSE SIR I EXIST"})
|
||||||
|
@ -144,7 +144,7 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase):
|
||||||
output = asr(waveform)
|
output = asr(waveform)
|
||||||
self.assertEqual(output, {"text": "(Applausi)"})
|
self.assertEqual(output, {"text": "(Applausi)"})
|
||||||
|
|
||||||
ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation").sort("id")
|
||||||
filename = ds[40]["file"]
|
filename = ds[40]["file"]
|
||||||
output = asr(filename)
|
output = asr(filename)
|
||||||
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
|
self.assertEqual(output, {"text": "Un uomo disse all'universo: \"Signore, io esisto."})
|
||||||
|
|
Loading…
Reference in New Issue