Fixed: Better names for nlp variables in pipelines' tests and docs. (#11752)

* Fixed: Better names for nlp variables in pipelines' tests and docs.

* Fixed: Better variable names
This commit is contained in:
Vyom Pathak 2021-05-18 19:17:28 +05:30 committed by GitHub
parent cebb96f53a
commit fd3b12e8c3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 163 additions and 159 deletions

View File

@ -69,13 +69,13 @@ This returns a label ("POSITIVE" or "NEGATIVE") alongside a score, as follows:
>>> from transformers import pipeline
>>> nlp = pipeline("sentiment-analysis")
>>> classifier = pipeline("sentiment-analysis")
>>> result = nlp("I hate you")[0]
>>> result = classifier("I hate you")[0]
>>> print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
label: NEGATIVE, with score: 0.9991
>>> result = nlp("I love you")[0]
>>> result = classifier("I love you")[0]
>>> print(f"label: {result['label']}, with score: {round(result['score'], 4)}")
label: POSITIVE, with score: 0.9999
@ -182,7 +182,7 @@ leverages a fine-tuned model on SQuAD.
>>> from transformers import pipeline
>>> nlp = pipeline("question-answering")
>>> question_answerer = pipeline("question-answering")
>>> context = r"""
... Extractive Question Answering is the task of extracting an answer from a text given a question. An example of a
@ -195,11 +195,11 @@ positions of the extracted answer in the text.
.. code-block::
>>> result = nlp(question="What is extractive question answering?", context=context)
>>> result = question_answerer(question="What is extractive question answering?", context=context)
>>> print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")
Answer: 'the task of extracting an answer from a text given a question.', score: 0.6226, start: 34, end: 96
>>> result = nlp(question="What is a good example of a question answering dataset?", context=context)
>>> result = question_answerer(question="What is a good example of a question answering dataset?", context=context)
>>> print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}, end: {result['end']}")
Answer: 'SQuAD dataset,', score: 0.5053, start: 147, end: 161
@ -336,14 +336,14 @@ Here is an example of using pipelines to replace a mask from a sequence:
>>> from transformers import pipeline
>>> nlp = pipeline("fill-mask")
>>> unmasker = pipeline("fill-mask")
This outputs the sequences with the mask filled, the confidence score, and the token id in the tokenizer vocabulary:
.. code-block::
>>> from pprint import pprint
>>> pprint(nlp(f"HuggingFace is creating a {nlp.tokenizer.mask_token} that the community uses to solve NLP tasks."))
>>> pprint(unmasker(f"HuggingFace is creating a {unmasker.tokenizer.mask_token} that the community uses to solve NLP tasks."))
[{'score': 0.1792745739221573,
'sequence': '<s>HuggingFace is creating a tool that the community uses to '
'solve NLP tasks.</s>',
@ -627,7 +627,7 @@ It leverages a fine-tuned model on CoNLL-2003, fine-tuned by `@stefan-it <https:
>>> from transformers import pipeline
>>> nlp = pipeline("ner")
>>> ner_pipe = pipeline("ner")
>>> sequence = """Hugging Face Inc. is a company based in New York City. Its headquarters are in DUMBO,
... therefore very close to the Manhattan Bridge which is visible from the window."""
@ -638,7 +638,7 @@ Here are the expected results:
.. code-block::
>>> print(nlp(sequence))
>>> print(ner_pipe(sequence))
[
{'word': 'Hu', 'score': 0.9995632767677307, 'entity': 'I-ORG'},
{'word': '##gging', 'score': 0.9915938973426819, 'entity': 'I-ORG'},

View File

@ -138,10 +138,10 @@ class OnnxExportTestCase(unittest.TestCase):
self._test_infer_dynamic_axis(model, tokenizer, "tf")
def _test_infer_dynamic_axis(self, model, tokenizer, framework):
nlp = FeatureExtractionPipeline(model, tokenizer)
feature_extractor = FeatureExtractionPipeline(model, tokenizer)
variable_names = ["input_ids", "token_type_ids", "attention_mask", "output_0", "output_1"]
input_vars, output_vars, shapes, tokens = infer_shapes(nlp, framework)
input_vars, output_vars, shapes, tokens = infer_shapes(feature_extractor, framework)
# Assert all variables are present
self.assertEqual(len(shapes), len(variable_names))

View File

@ -73,60 +73,60 @@ class CustomInputPipelineCommonMixin:
@require_torch
def test_torch_small(self):
for model_name in self.small_models:
nlp = pipeline(
pipe_small = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
framework="pt",
**self.pipeline_loading_kwargs,
)
self._test_pipeline(nlp)
self._test_pipeline(pipe_small)
@require_tf
def test_tf_small(self):
for model_name in self.small_models:
nlp = pipeline(
pipe_small = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
framework="tf",
**self.pipeline_loading_kwargs,
)
self._test_pipeline(nlp)
self._test_pipeline(pipe_small)
@require_torch
@slow
def test_torch_large(self):
for model_name in self.large_models:
nlp = pipeline(
pipe_large = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
framework="pt",
**self.pipeline_loading_kwargs,
)
self._test_pipeline(nlp)
self._test_pipeline(pipe_large)
@require_tf
@slow
def test_tf_large(self):
for model_name in self.large_models:
nlp = pipeline(
pipe_large = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
framework="tf",
**self.pipeline_loading_kwargs,
)
self._test_pipeline(nlp)
self._test_pipeline(pipe_large)
def _test_pipeline(self, nlp: Pipeline):
def _test_pipeline(self, pipe: Pipeline):
raise NotImplementedError
@require_torch
def test_compare_slow_fast_torch(self):
for model_name in self.small_models:
nlp_slow = pipeline(
pipe_slow = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
@ -134,7 +134,7 @@ class CustomInputPipelineCommonMixin:
use_fast=False,
**self.pipeline_loading_kwargs,
)
nlp_fast = pipeline(
pipe_fast = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
@ -142,12 +142,12 @@ class CustomInputPipelineCommonMixin:
use_fast=True,
**self.pipeline_loading_kwargs,
)
self._compare_slow_fast_pipelines(nlp_slow, nlp_fast, method="forward")
self._compare_slow_fast_pipelines(pipe_slow, pipe_fast, method="forward")
@require_tf
def test_compare_slow_fast_tf(self):
for model_name in self.small_models:
nlp_slow = pipeline(
pipe_slow = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
@ -155,7 +155,7 @@ class CustomInputPipelineCommonMixin:
use_fast=False,
**self.pipeline_loading_kwargs,
)
nlp_fast = pipeline(
pipe_fast = pipeline(
task=self.pipeline_task,
model=model_name,
tokenizer=model_name,
@ -163,23 +163,25 @@ class CustomInputPipelineCommonMixin:
use_fast=True,
**self.pipeline_loading_kwargs,
)
self._compare_slow_fast_pipelines(nlp_slow, nlp_fast, method="call")
self._compare_slow_fast_pipelines(pipe_slow, pipe_fast, method="call")
def _compare_slow_fast_pipelines(self, nlp_slow: Pipeline, nlp_fast: Pipeline, method: str):
def _compare_slow_fast_pipelines(self, pipe_slow: Pipeline, pipe_fast: Pipeline, method: str):
"""We check that the inputs to the models forward passes are identical for
slow and fast tokenizers.
"""
with mock.patch.object(
nlp_slow.model, method, wraps=getattr(nlp_slow.model, method)
) as mock_slow, mock.patch.object(nlp_fast.model, method, wraps=getattr(nlp_fast.model, method)) as mock_fast:
pipe_slow.model, method, wraps=getattr(pipe_slow.model, method)
) as mock_slow, mock.patch.object(
pipe_fast.model, method, wraps=getattr(pipe_fast.model, method)
) as mock_fast:
for inputs in self.valid_inputs:
if isinstance(inputs, dict):
inputs.update(self.pipeline_running_kwargs)
_ = nlp_slow(**inputs)
_ = nlp_fast(**inputs)
_ = pipe_slow(**inputs)
_ = pipe_fast(**inputs)
else:
_ = nlp_slow(inputs, **self.pipeline_running_kwargs)
_ = nlp_fast(inputs, **self.pipeline_running_kwargs)
_ = pipe_slow(inputs, **self.pipeline_running_kwargs)
_ = pipe_fast(inputs, **self.pipeline_running_kwargs)
mock_slow.assert_called()
mock_fast.assert_called()
@ -209,10 +211,10 @@ class MonoInputPipelineCommonMixin(CustomInputPipelineCommonMixin):
expected_multi_result: Optional[List] = None
expected_check_keys: Optional[List[str]] = None
def _test_pipeline(self, nlp: Pipeline):
self.assertIsNotNone(nlp)
def _test_pipeline(self, pipe: Pipeline):
self.assertIsNotNone(pipe)
mono_result = nlp(self.valid_inputs[0], **self.pipeline_running_kwargs)
mono_result = pipe(self.valid_inputs[0], **self.pipeline_running_kwargs)
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], (dict, list))
@ -222,7 +224,7 @@ class MonoInputPipelineCommonMixin(CustomInputPipelineCommonMixin):
for key in self.mandatory_keys:
self.assertIn(key, mono_result[0])
multi_result = [nlp(input, **self.pipeline_running_kwargs) for input in self.valid_inputs]
multi_result = [pipe(input, **self.pipeline_running_kwargs) for input in self.valid_inputs]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -241,4 +243,4 @@ class MonoInputPipelineCommonMixin(CustomInputPipelineCommonMixin):
for key in self.mandatory_keys:
self.assertIn(key, result)
self.assertRaises(Exception, nlp, self.invalid_inputs)
self.assertRaises(Exception, pipe, self.invalid_inputs)

View File

@ -128,41 +128,41 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
invalid_inputs = ["Hi there!", Conversation()]
def _test_pipeline(
self, nlp
self, conversation_agent
): # override the default test method to check that the output is a `Conversation` object
self.assertIsNotNone(nlp)
self.assertIsNotNone(conversation_agent)
# We need to recreate conversation for successive tests to pass as
# Conversation objects get *consumed* by the pipeline
conversation = Conversation("Hi there!")
mono_result = nlp(conversation)
mono_result = conversation_agent(conversation)
self.assertIsInstance(mono_result, Conversation)
conversations = [Conversation("Hi there!"), Conversation("How are you?")]
multi_result = nlp(conversations)
multi_result = conversation_agent(conversations)
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], Conversation)
# Conversation have been consumed and are not valid anymore
# Inactive conversations passed to the pipeline raise a ValueError
self.assertRaises(ValueError, nlp, conversation)
self.assertRaises(ValueError, nlp, conversations)
self.assertRaises(ValueError, conversation_agent, conversation)
self.assertRaises(ValueError, conversation_agent, conversations)
for bad_input in self.invalid_inputs:
self.assertRaises(Exception, nlp, bad_input)
self.assertRaises(Exception, nlp, self.invalid_inputs)
self.assertRaises(Exception, conversation_agent, bad_input)
self.assertRaises(Exception, conversation_agent, self.invalid_inputs)
@require_torch
@slow
def test_integration_torch_conversation(self):
# When
nlp = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM)
conversation_agent = pipeline(task="conversational", device=DEFAULT_DEVICE_NUM)
conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
conversation_2 = Conversation("What's the last book you have read?")
# Then
self.assertEqual(len(conversation_1.past_user_inputs), 0)
self.assertEqual(len(conversation_2.past_user_inputs), 0)
# When
result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
# Then
self.assertEqual(result, [conversation_1, conversation_2])
self.assertEqual(len(result[0].past_user_inputs), 1)
@ -175,7 +175,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
self.assertEqual(result[1].generated_responses[0], "The Last Question")
# When
conversation_2.add_user_input("Why do you recommend it?")
result = nlp(conversation_2, do_sample=False, max_length=1000)
result = conversation_agent(conversation_2, do_sample=False, max_length=1000)
# Then
self.assertEqual(result, conversation_2)
self.assertEqual(len(result.past_user_inputs), 2)
@ -187,12 +187,12 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
@slow
def test_integration_torch_conversation_truncated_history(self):
# When
nlp = pipeline(task="conversational", min_length_for_response=24, device=DEFAULT_DEVICE_NUM)
conversation_agent = pipeline(task="conversational", min_length_for_response=24, device=DEFAULT_DEVICE_NUM)
conversation_1 = Conversation("Going to the movies tonight - any suggestions?")
# Then
self.assertEqual(len(conversation_1.past_user_inputs), 0)
# When
result = nlp(conversation_1, do_sample=False, max_length=36)
result = conversation_agent(conversation_1, do_sample=False, max_length=36)
# Then
self.assertEqual(result, conversation_1)
self.assertEqual(len(result.past_user_inputs), 1)
@ -201,7 +201,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
self.assertEqual(result.generated_responses[0], "The Big Lebowski")
# When
conversation_1.add_user_input("Is it an action movie?")
result = nlp(conversation_1, do_sample=False, max_length=36)
result = conversation_agent(conversation_1, do_sample=False, max_length=36)
# Then
self.assertEqual(result, conversation_1)
self.assertEqual(len(result.past_user_inputs), 2)
@ -214,19 +214,19 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
def test_integration_torch_conversation_dialogpt_input_ids(self):
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
nlp = ConversationalPipeline(model=model, tokenizer=tokenizer)
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
conversation_1 = Conversation("hello")
inputs = nlp._parse_and_tokenize([conversation_1])
inputs = conversation_agent._parse_and_tokenize([conversation_1])
self.assertEqual(inputs["input_ids"].tolist(), [[31373, 50256]])
conversation_2 = Conversation("how are you ?", past_user_inputs=["hello"], generated_responses=["Hi there!"])
inputs = nlp._parse_and_tokenize([conversation_2])
inputs = conversation_agent._parse_and_tokenize([conversation_2])
self.assertEqual(
inputs["input_ids"].tolist(), [[31373, 50256, 17250, 612, 0, 50256, 4919, 389, 345, 5633, 50256]]
)
inputs = nlp._parse_and_tokenize([conversation_1, conversation_2])
inputs = conversation_agent._parse_and_tokenize([conversation_1, conversation_2])
self.assertEqual(
inputs["input_ids"].tolist(),
[
@ -240,11 +240,11 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
def test_integration_torch_conversation_blenderbot_400M_input_ids(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
nlp = ConversationalPipeline(model=model, tokenizer=tokenizer)
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
# test1
conversation_1 = Conversation("hello")
inputs = nlp._parse_and_tokenize([conversation_1])
inputs = conversation_agent._parse_and_tokenize([conversation_1])
self.assertEqual(inputs["input_ids"].tolist(), [[1710, 86, 2]])
# test2
@ -255,7 +255,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
" Do you like lasagne? It is a traditional Italian dish consisting of a shepherd's pie."
],
)
inputs = nlp._parse_and_tokenize([conversation_1])
inputs = conversation_agent._parse_and_tokenize([conversation_1])
self.assertEqual(
inputs["input_ids"].tolist(),
[
@ -310,10 +310,10 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
def test_integration_torch_conversation_blenderbot_400M(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot-400M-distill")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot-400M-distill")
nlp = ConversationalPipeline(model=model, tokenizer=tokenizer)
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer)
conversation_1 = Conversation("hello")
result = nlp(
result = conversation_agent(
conversation_1,
)
self.assertEqual(
@ -325,7 +325,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
)
conversation_1 = Conversation("Lasagne hello")
result = nlp(conversation_1, encoder_no_repeat_ngram_size=3)
result = conversation_agent(conversation_1, encoder_no_repeat_ngram_size=3)
self.assertEqual(
result.generated_responses[0],
" Do you like lasagne? It is a traditional Italian dish consisting of a shepherd's pie.",
@ -334,7 +334,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
conversation_1 = Conversation(
"Lasagne hello Lasagne is my favorite Italian dish. Do you like lasagne? I like lasagne."
)
result = nlp(
result = conversation_agent(
conversation_1,
encoder_no_repeat_ngram_size=3,
)
@ -349,7 +349,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
# When
tokenizer = AutoTokenizer.from_pretrained("facebook/blenderbot_small-90M")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/blenderbot_small-90M")
nlp = ConversationalPipeline(model=model, tokenizer=tokenizer, device=DEFAULT_DEVICE_NUM)
conversation_agent = ConversationalPipeline(model=model, tokenizer=tokenizer, device=DEFAULT_DEVICE_NUM)
conversation_1 = Conversation("My name is Sarah and I live in London")
conversation_2 = Conversation("Going to the movies tonight, What movie would you recommend? ")
@ -357,7 +357,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
self.assertEqual(len(conversation_1.past_user_inputs), 0)
self.assertEqual(len(conversation_2.past_user_inputs), 0)
# When
result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
# Then
self.assertEqual(result, [conversation_1, conversation_2])
self.assertEqual(len(result[0].past_user_inputs), 1)
@ -378,7 +378,7 @@ class ConversationalPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
# When
conversation_1.add_user_input("Not yet, what about you?")
conversation_2.add_user_input("What's your name?")
result = nlp([conversation_1, conversation_2], do_sample=False, max_length=1000)
result = conversation_agent([conversation_1, conversation_2], do_sample=False, max_length=1000)
# Then
self.assertEqual(result, [conversation_1, conversation_2])
self.assertEqual(len(result[0].past_user_inputs), 2)

View File

@ -63,16 +63,16 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
@require_torch
def test_torch_fill_mask(self):
valid_inputs = "My name is <mask>"
nlp = pipeline(task="fill-mask", model=self.small_models[0])
outputs = nlp(valid_inputs)
unmasker = pipeline(task="fill-mask", model=self.small_models[0])
outputs = unmasker(valid_inputs)
self.assertIsInstance(outputs, list)
# This passes
outputs = nlp(valid_inputs, targets=[" Patrick", " Clara"])
outputs = unmasker(valid_inputs, targets=[" Patrick", " Clara"])
self.assertIsInstance(outputs, list)
# This used to fail with `cannot mix args and kwargs`
outputs = nlp(valid_inputs, something=False)
outputs = unmasker(valid_inputs, something=False)
self.assertIsInstance(outputs, list)
@require_torch
@ -81,13 +81,13 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]]
invalid_targets = [[], [""], ""]
for model_name in self.small_models:
nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt")
unmasker = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="pt")
for targets in valid_targets:
outputs = nlp(valid_inputs, targets=targets)
outputs = unmasker(valid_inputs, targets=targets)
self.assertIsInstance(outputs, list)
self.assertEqual(len(outputs), len(targets))
for targets in invalid_targets:
self.assertRaises(ValueError, nlp, valid_inputs, targets=targets)
self.assertRaises(ValueError, unmasker, valid_inputs, targets=targets)
@require_tf
def test_tf_fill_mask_with_targets(self):
@ -95,13 +95,13 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
valid_targets = [[" Teven", " Patrick", " Clara"], [" Sam"]]
invalid_targets = [[], [""], ""]
for model_name in self.small_models:
nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf")
unmasker = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf")
for targets in valid_targets:
outputs = nlp(valid_inputs, targets=targets)
outputs = unmasker(valid_inputs, targets=targets)
self.assertIsInstance(outputs, list)
self.assertEqual(len(outputs), len(targets))
for targets in invalid_targets:
self.assertRaises(ValueError, nlp, valid_inputs, targets=targets)
self.assertRaises(ValueError, unmasker, valid_inputs, targets=targets)
@require_torch
@slow
@ -113,7 +113,7 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
]
valid_targets = [" Patrick", " Clara"]
for model_name in self.large_models:
nlp = pipeline(
unmasker = pipeline(
task="fill-mask",
model=model_name,
tokenizer=model_name,
@ -121,14 +121,14 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
top_k=2,
)
mono_result = nlp(valid_inputs[0], targets=valid_targets)
mono_result = unmasker(valid_inputs[0], targets=valid_targets)
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], dict)
for mandatory_key in mandatory_keys:
self.assertIn(mandatory_key, mono_result[0])
multi_result = [nlp(valid_input) for valid_input in valid_inputs]
multi_result = [unmasker(valid_input) for valid_input in valid_inputs]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -146,17 +146,17 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
for key in mandatory_keys:
self.assertIn(key, result)
self.assertRaises(Exception, nlp, [None])
self.assertRaises(Exception, unmasker, [None])
valid_inputs = valid_inputs[:1]
mono_result = nlp(valid_inputs[0], targets=valid_targets)
mono_result = unmasker(valid_inputs[0], targets=valid_targets)
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], dict)
for mandatory_key in mandatory_keys:
self.assertIn(mandatory_key, mono_result[0])
multi_result = [nlp(valid_input) for valid_input in valid_inputs]
multi_result = [unmasker(valid_input) for valid_input in valid_inputs]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -174,7 +174,7 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
for key in mandatory_keys:
self.assertIn(key, result)
self.assertRaises(Exception, nlp, [None])
self.assertRaises(Exception, unmasker, [None])
@require_tf
@slow
@ -186,16 +186,16 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
]
valid_targets = [" Patrick", " Clara"]
for model_name in self.large_models:
nlp = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", top_k=2)
unmasker = pipeline(task="fill-mask", model=model_name, tokenizer=model_name, framework="tf", top_k=2)
mono_result = nlp(valid_inputs[0], targets=valid_targets)
mono_result = unmasker(valid_inputs[0], targets=valid_targets)
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], dict)
for mandatory_key in mandatory_keys:
self.assertIn(mandatory_key, mono_result[0])
multi_result = [nlp(valid_input) for valid_input in valid_inputs]
multi_result = [unmasker(valid_input) for valid_input in valid_inputs]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -213,17 +213,17 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
for key in mandatory_keys:
self.assertIn(key, result)
self.assertRaises(Exception, nlp, [None])
self.assertRaises(Exception, unmasker, [None])
valid_inputs = valid_inputs[:1]
mono_result = nlp(valid_inputs[0], targets=valid_targets)
mono_result = unmasker(valid_inputs[0], targets=valid_targets)
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], dict)
for mandatory_key in mandatory_keys:
self.assertIn(mandatory_key, mono_result[0])
multi_result = [nlp(valid_input) for valid_input in valid_inputs]
multi_result = [unmasker(valid_input) for valid_input in valid_inputs]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -241,4 +241,4 @@ class FillMaskPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase):
for key in mandatory_keys:
self.assertIn(key, result)
self.assertRaises(Exception, nlp, [None])
self.assertRaises(Exception, unmasker, [None])

View File

@ -70,16 +70,16 @@ class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
valid_inputs = [
{"question": "Where was HuggingFace founded ?", "context": "Paris"},
]
nlps = self.get_pipelines()
question_answering_pipelines = self.get_pipelines()
output_keys = {"score", "answer", "start", "end"}
for nlp in nlps:
result = nlp(valid_inputs, **self.pipeline_running_kwargs)
for question_answering_pipeline in question_answering_pipelines:
result = question_answering_pipeline(valid_inputs, **self.pipeline_running_kwargs)
self.assertIsInstance(result, dict)
for key in output_keys:
self.assertIn(key, result)
def _test_pipeline(self, nlp: Pipeline):
def _test_pipeline(self, question_answering_pipeline: Pipeline):
output_keys = {"score", "answer", "start", "end"}
valid_inputs = [
{"question": "Where was HuggingFace founded ?", "context": "HuggingFace was founded in Paris."},
@ -94,15 +94,15 @@ class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
{"question": "What is does with empty context ?", "context": ""},
{"question": "What is does with empty context ?", "context": None},
]
self.assertIsNotNone(nlp)
self.assertIsNotNone(question_answering_pipeline)
mono_result = nlp(valid_inputs[0])
mono_result = question_answering_pipeline(valid_inputs[0])
self.assertIsInstance(mono_result, dict)
for key in output_keys:
self.assertIn(key, mono_result)
multi_result = nlp(valid_inputs)
multi_result = question_answering_pipeline(valid_inputs)
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], dict)
@ -110,8 +110,8 @@ class QAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
for key in output_keys:
self.assertIn(key, result)
for bad_input in invalid_inputs:
self.assertRaises(ValueError, nlp, bad_input)
self.assertRaises(ValueError, nlp, invalid_inputs)
self.assertRaises(ValueError, question_answering_pipeline, bad_input)
self.assertRaises(ValueError, question_answering_pipeline, invalid_inputs)
def test_argument_handler(self):
qa = QuestionAnsweringArgumentHandler()

View File

@ -70,13 +70,13 @@ class SimpleSummarizationPipelineTests(unittest.TestCase):
# real_tokenizer._tokenizer.save("tokenizer.json")
# # + add missing config.json with albert as model_type
tokenizer = AutoTokenizer.from_pretrained("Narsil/small_summarization_test")
nlp = pipeline(task="summarization", model=model, tokenizer=tokenizer)
summarizer = pipeline(task="summarization", model=model, tokenizer=tokenizer)
with self.assertLogs("transformers", level="WARNING"):
with self.assertRaises(IndexError):
_ = nlp("This is a test")
_ = summarizer("This is a test")
output = nlp("This is a test", truncation=TruncationStrategy.ONLY_FIRST)
output = summarizer("This is a test", truncation=TruncationStrategy.ONLY_FIRST)
# 2 is default BOS from Bart.
self.assertEqual(output, [{"summary_text": "\x02 L L L"}])
@ -95,8 +95,8 @@ class SummarizationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCase
@require_torch
@slow
def test_integration_torch_summarization(self):
nlp = pipeline(task="summarization", device=DEFAULT_DEVICE_NUM)
summarizer = pipeline(task="summarization", device=DEFAULT_DEVICE_NUM)
cnn_article = ' (CNN)The Palestinian Authority officially became the 123rd member of the International Criminal Court on Wednesday, a step that gives the court jurisdiction over alleged crimes in Palestinian territories. The formal accession was marked with a ceremony at The Hague, in the Netherlands, where the court is based. The Palestinians signed the ICC\'s founding Rome Statute in January, when they also accepted its jurisdiction over alleged crimes committed "in the occupied Palestinian territory, including East Jerusalem, since June 13, 2014." Later that month, the ICC opened a preliminary examination into the situation in Palestinian territories, paving the way for possible war crimes investigations against Israelis. As members of the court, Palestinians may be subject to counter-charges as well. Israel and the United States, neither of which is an ICC member, opposed the Palestinians\' efforts to join the body. But Palestinian Foreign Minister Riad al-Malki, speaking at Wednesday\'s ceremony, said it was a move toward greater justice. "As Palestine formally becomes a State Party to the Rome Statute today, the world is also a step closer to ending a long era of impunity and injustice," he said, according to an ICC news release. "Indeed, today brings us closer to our shared goals of justice and peace." Judge Kuniko Ozaki, a vice president of the ICC, said acceding to the treaty was just the first step for the Palestinians. "As the Rome Statute today enters into force for the State of Palestine, Palestine acquires all the rights as well as responsibilities that come with being a State Party to the Statute. These are substantive commitments, which cannot be taken lightly," she said. Rights group Human Rights Watch welcomed the development. "Governments seeking to penalize Palestine for joining the ICC should immediately end their pressure, and countries that support universal acceptance of the court\'s treaty should speak out to welcome its membership," said Balkees Jarrah, international justice counsel for the group. "What\'s objectionable is the attempts to undermine international justice, not Palestine\'s decision to join a treaty to which over 100 countries around the world are members." In January, when the preliminary ICC examination was opened, Israeli Prime Minister Benjamin Netanyahu described it as an outrage, saying the court was overstepping its boundaries. The United States also said it "strongly" disagreed with the court\'s decision. "As we have said repeatedly, we do not believe that Palestine is a state and therefore we do not believe that it is eligible to join the ICC," the State Department said in a statement. It urged the warring sides to resolve their differences through direct negotiations. "We will continue to oppose actions against Israel at the ICC as counterproductive to the cause of peace," it said. But the ICC begs to differ with the definition of a state for its purposes and refers to the territories as "Palestine." While a preliminary examination is not a formal investigation, it allows the court to review evidence and determine whether to investigate suspects on both sides. Prosecutor Fatou Bensouda said her office would "conduct its analysis in full independence and impartiality." The war between Israel and Hamas militants in Gaza last summer left more than 2,000 people dead. The inquiry will include alleged war crimes committed since June. The International Criminal Court was set up in 2002 to prosecute genocide, crimes against humanity and war crimes. CNN\'s Vasco Cotovio, Kareem Khadder and Faith Karimi contributed to this report.'
expected_cnn_summary = " The Palestinian Authority becomes the 123rd member of the International Criminal Court . The move gives the court jurisdiction over alleged crimes in Palestinian territories . Israel and the United States opposed the Palestinians' efforts to join the court . Rights group Human Rights Watch welcomes the move, says governments seeking to penalize Palestine should end pressure ."
result = nlp(cnn_article)
result = summarizer(cnn_article)
self.assertEqual(result[0]["summary_text"], expected_cnn_summary)

View File

@ -214,7 +214,7 @@ class TQAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
@slow
def test_integration_wtq(self):
tqa_pipeline = pipeline("table-question-answering")
table_querier = pipeline("table-question-answering")
data = {
"Repository": ["Transformers", "Datasets", "Tokenizers"],
@ -230,7 +230,7 @@ class TQAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
"What is the total amount of stars?",
]
results = tqa_pipeline(data, queries)
results = table_querier(data, queries)
expected_results = [
{"answer": "Transformers", "coordinates": [(0, 0)], "cells": ["Transformers"], "aggregator": "NONE"},
@ -258,7 +258,7 @@ class TQAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
@slow
def test_integration_sqa(self):
tqa_pipeline = pipeline(
table_querier = pipeline(
"table-question-answering",
model="google/tapas-base-finetuned-sqa",
tokenizer="google/tapas-base-finetuned-sqa",
@ -270,7 +270,7 @@ class TQAPipelineTests(CustomInputPipelineCommonMixin, unittest.TestCase):
"Date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"],
}
queries = ["How many movies has George Clooney played in?", "How old is he?", "What's his date of birth?"]
results = tqa_pipeline(data, queries, sequential=True)
results = table_querier(data, queries, sequential=True)
expected_results = [
{"answer": "69", "coordinates": [(2, 2)], "cells": ["69"]},

View File

@ -27,16 +27,16 @@ class TextGenerationPipelineTests(MonoInputPipelineCommonMixin, unittest.TestCas
large_models = [] # Models tested with the @slow decorator
def test_simple_generation(self):
nlp = pipeline(task="text-generation", model=self.small_models[0])
text_generator = pipeline(task="text-generation", model=self.small_models[0])
# text-generation is non-deterministic by nature, we can't fully test the output
outputs = nlp("This is a test")
outputs = text_generator("This is a test")
self.assertEqual(len(outputs), 1)
self.assertEqual(list(outputs[0].keys()), ["generated_text"])
self.assertEqual(type(outputs[0]["generated_text"]), str)
outputs = nlp(["This is a test", "This is a second test"])
outputs = text_generator(["This is a test", "This is a second test"])
self.assertEqual(len(outputs[0]), 1)
self.assertEqual(list(outputs[0][0].keys()), ["generated_text"])
self.assertEqual(type(outputs[0][0]["generated_text"]), str)

View File

@ -33,14 +33,14 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
] # Default model - Models tested without the @slow decorator
large_models = [] # Models tested with the @slow decorator
def _test_pipeline(self, nlp: Pipeline):
def _test_pipeline(self, token_classifier: Pipeline):
output_keys = {"entity", "word", "score", "start", "end", "index"}
if nlp.aggregation_strategy != AggregationStrategy.NONE:
if token_classifier.aggregation_strategy != AggregationStrategy.NONE:
output_keys = {"entity_group", "word", "score", "start", "end"}
self.assertIsNotNone(nlp)
self.assertIsNotNone(token_classifier)
mono_result = nlp(VALID_INPUTS[0])
mono_result = token_classifier(VALID_INPUTS[0])
self.assertIsInstance(mono_result, list)
self.assertIsInstance(mono_result[0], (dict, list))
@ -50,7 +50,7 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
for key in output_keys:
self.assertIn(key, mono_result[0])
multi_result = [nlp(input) for input in VALID_INPUTS]
multi_result = [token_classifier(input) for input in VALID_INPUTS]
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], (dict, list))
@ -328,7 +328,7 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
model_name = self.small_models[0]
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="pt")
sentence = "Hello there"
@ -346,7 +346,9 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
# First element in [CLS]
scores = np.array([[1, 0, 0], [0.1, 0.3, 0.6], [0.8, 0.1, 0.1]])
pre_entities = nlp.gather_pre_entities(sentence, input_ids, scores, offset_mapping, special_tokens_mask)
pre_entities = token_classifier.gather_pre_entities(
sentence, input_ids, scores, offset_mapping, special_tokens_mask
)
self.assertEqual(
nested_simplify(pre_entities),
[
@ -366,39 +368,39 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
def test_tf_only(self):
model_name = "Narsil/small" # This model only has a TensorFlow version
# We test that if we don't specificy framework='tf', it gets detected automatically
nlp = pipeline(task="ner", model=model_name)
self._test_pipeline(nlp)
token_classifier = pipeline(task="ner", model=model_name)
self._test_pipeline(token_classifier)
@require_tf
def test_tf_defaults(self):
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="tf")
self._test_pipeline(nlp)
token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer, framework="tf")
self._test_pipeline(token_classifier)
@require_tf
def test_tf_small_ignore_subwords_available_for_fast_tokenizers(self):
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(
token_classifier = pipeline(
task="ner",
model=model_name,
tokenizer=tokenizer,
framework="tf",
aggregation_strategy=AggregationStrategy.FIRST,
)
self._test_pipeline(nlp)
self._test_pipeline(token_classifier)
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(
token_classifier = pipeline(
task="ner",
model=model_name,
tokenizer=tokenizer,
framework="tf",
aggregation_strategy=AggregationStrategy.SIMPLE,
)
self._test_pipeline(nlp)
self._test_pipeline(token_classifier)
@require_torch
def test_pt_ignore_subwords_slow_tokenizer_raises(self):
@ -418,14 +420,14 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
def test_pt_defaults_slow_tokenizer(self):
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name)
nlp = pipeline(task="ner", model=model_name, tokenizer=tokenizer)
self._test_pipeline(nlp)
token_classifier = pipeline(task="ner", model=model_name, tokenizer=tokenizer)
self._test_pipeline(token_classifier)
@require_torch
def test_pt_defaults(self):
for model_name in self.small_models:
nlp = pipeline(task="ner", model=model_name)
self._test_pipeline(nlp)
token_classifier = pipeline(task="ner", model=model_name)
self._test_pipeline(token_classifier)
@slow
@require_torch
@ -442,10 +444,10 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
@slow
@require_torch
def test_simple(self):
nlp = pipeline(task="ner", model="dslim/bert-base-NER", aggregation_strategy=AggregationStrategy.SIMPLE)
token_classifier = pipeline(task="ner", model="dslim/bert-base-NER", grouped_entities=True)
sentence = "Hello Sarah Jessica Parker who Jessica lives in New York"
sentence2 = "This is a simple test"
output = nlp(sentence)
output = token_classifier(sentence)
output_ = nested_simplify(output)
@ -464,7 +466,7 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
],
)
output = nlp([sentence, sentence2])
output = token_classifier([sentence, sentence2])
output_ = nested_simplify(output)
self.assertEqual(
@ -483,17 +485,17 @@ class TokenClassificationPipelineTests(CustomInputPipelineCommonMixin, unittest.
def test_pt_small_ignore_subwords_available_for_fast_tokenizers(self):
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(
task="ner", model=model_name, tokenizer=tokenizer, aggregation_strategy=AggregationStrategy.FIRST
token_classifier = pipeline(
task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=True
)
self._test_pipeline(nlp)
self._test_pipeline(token_classifier)
for model_name in self.small_models:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)
nlp = pipeline(
task="ner", model=model_name, tokenizer=tokenizer, aggregation_strategy=AggregationStrategy.SIMPLE
token_classifier = pipeline(
task="ner", model=model_name, tokenizer=tokenizer, grouped_entities=True, ignore_subwords=False
)
self._test_pipeline(nlp)
self._test_pipeline(token_classifier)
class TokenClassificationArgumentHandlerTestCase(unittest.TestCase):

View File

@ -89,10 +89,10 @@ class TranslationNewFormatPipelineTests(unittest.TestCase):
def test_translation_default_language_selection(self):
model = "patrickvonplaten/t5-tiny-random"
with pytest.warns(UserWarning, match=r".*translation_en_to_de.*"):
nlp = pipeline(task="translation", model=model)
self.assertEqual(nlp.task, "translation_en_to_de")
self.assertEquals(nlp.src_lang, "en")
self.assertEquals(nlp.tgt_lang, "de")
translator = pipeline(task="translation", model=model)
self.assertEqual(translator.task, "translation_en_to_de")
self.assertEquals(translator.src_lang, "en")
self.assertEquals(translator.tgt_lang, "de")
@require_torch
def test_translation_with_no_language_no_model_fails(self):

View File

@ -45,25 +45,25 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
sum += score
self.assertAlmostEqual(sum, 1.0, places=5)
def _test_entailment_id(self, nlp: Pipeline):
config = nlp.model.config
def _test_entailment_id(self, zero_shot_classifier: Pipeline):
config = zero_shot_classifier.model.config
original_config = deepcopy(config)
config.label2id = {"LABEL_0": 0, "LABEL_1": 1, "LABEL_2": 2}
self.assertEqual(nlp.entailment_id, -1)
self.assertEqual(zero_shot_classifier.entailment_id, -1)
config.label2id = {"entailment": 0, "neutral": 1, "contradiction": 2}
self.assertEqual(nlp.entailment_id, 0)
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 0, "NON-ENTAIL": 1}
self.assertEqual(nlp.entailment_id, 0)
self.assertEqual(zero_shot_classifier.entailment_id, 0)
config.label2id = {"ENTAIL": 2, "NEUTRAL": 1, "CONTR": 0}
self.assertEqual(nlp.entailment_id, 2)
self.assertEqual(zero_shot_classifier.entailment_id, 2)
nlp.model.config = original_config
zero_shot_classifier.model.config = original_config
def _test_pipeline(self, nlp: Pipeline):
def _test_pipeline(self, zero_shot_classifier: Pipeline):
output_keys = {"sequence", "labels", "scores"}
valid_mono_inputs = [
{"sequences": "Who are you voting for in 2020?", "candidate_labels": "politics"},
@ -102,12 +102,12 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
"hypothesis_template": "Template without formatting syntax.",
},
]
self.assertIsNotNone(nlp)
self.assertIsNotNone(zero_shot_classifier)
self._test_entailment_id(nlp)
self._test_entailment_id(zero_shot_classifier)
for mono_input in valid_mono_inputs:
mono_result = nlp(**mono_input)
mono_result = zero_shot_classifier(**mono_input)
self.assertIsInstance(mono_result, dict)
if len(mono_result["labels"]) > 1:
self._test_scores_sum_to_one(mono_result)
@ -115,7 +115,7 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
for key in output_keys:
self.assertIn(key, mono_result)
multi_result = nlp(**valid_multi_input)
multi_result = zero_shot_classifier(**valid_multi_input)
self.assertIsInstance(multi_result, list)
self.assertIsInstance(multi_result[0], dict)
self.assertEqual(len(multi_result), len(valid_multi_input["sequences"]))
@ -128,9 +128,9 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
self._test_scores_sum_to_one(result)
for bad_input in invalid_inputs:
self.assertRaises(Exception, nlp, **bad_input)
self.assertRaises(Exception, zero_shot_classifier, **bad_input)
if nlp.model.name_or_path in self.large_models:
if zero_shot_classifier.model.name_or_path in self.large_models:
# We also check the outputs for the large models
inputs = [
{
@ -158,7 +158,7 @@ class ZeroShotClassificationPipelineTests(CustomInputPipelineCommonMixin, unitte
]
for input, expected_output in zip(inputs, expected_outputs):
output = nlp(**input)
output = zero_shot_classifier(**input)
for key in output:
if key == "scores":
for output_score, expected_score in zip(output[key], expected_output[key]):