From 21b0ad05a0cba74a6de2341b521db26d26a1feff Mon Sep 17 00:00:00 2001 From: Nicolas Patry Date: Wed, 16 Nov 2022 17:17:40 +0100 Subject: [PATCH] Adding doctest for `image-to-text` pipeline. (#20257) * Adding `zero-shot-object-detection` pipeline doctest. * Adding doctest for `image-to-text` pipeline. * Remove nested_simplify. --- src/transformers/pipelines/image_to_text.py | 12 ++++++++++ .../pipelines/zero_shot_object_detection.py | 23 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/transformers/pipelines/image_to_text.py b/src/transformers/pipelines/image_to_text.py index a4dc052b13..f55d253d79 100644 --- a/src/transformers/pipelines/image_to_text.py +++ b/src/transformers/pipelines/image_to_text.py @@ -30,6 +30,18 @@ class ImageToTextPipeline(Pipeline): """ Image To Text pipeline using a `AutoModelForVision2Seq`. This pipeline predicts a caption for a given image. + Example: + + ```python + >>> from transformers import pipeline + + >>> captioner = pipeline(model="ydshieh/vit-gpt2-coco-en") + >>> captioner("https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png") + [{'generated_text': 'two birds are standing next to each other '}] + ``` + + [Learn more about the basics of using a pipeline in the [pipeline tutorial]](../pipeline_tutorial) + This image to text pipeline can currently be loaded from pipeline() using the following task identifier: "image-to-text". diff --git a/src/transformers/pipelines/zero_shot_object_detection.py b/src/transformers/pipelines/zero_shot_object_detection.py index 8c18bd502e..a24b7201c8 100644 --- a/src/transformers/pipelines/zero_shot_object_detection.py +++ b/src/transformers/pipelines/zero_shot_object_detection.py @@ -33,6 +33,27 @@ class ZeroShotObjectDetectionPipeline(Pipeline): Zero shot object detection pipeline using `OwlViTForObjectDetection`. This pipeline predicts bounding boxes of objects when you provide an image and a set of `candidate_labels`. + Example: + + ```python + >>> from transformers import pipeline + + >>> detector = pipeline(model="google/owlvit-base-patch32", task="zero-shot-object-detection") + >>> detector( + ... "http://images.cocodataset.org/val2017/000000039769.jpg", + ... candidate_labels=["cat", "couch"], + ... ) + [[{'score': 0.287, 'label': 'cat', 'box': {'xmin': 324, 'ymin': 20, 'xmax': 640, 'ymax': 373}}, {'score': 0.254, 'label': 'cat', 'box': {'xmin': 1, 'ymin': 55, 'xmax': 315, 'ymax': 472}}, {'score': 0.121, 'label': 'couch', 'box': {'xmin': 4, 'ymin': 0, 'xmax': 642, 'ymax': 476}}]] + + >>> detector( + ... "https://huggingface.co/datasets/Narsil/image_dummy/raw/main/parrots.png", + ... candidate_labels=["head", "bird"], + ... ) + [[{'score': 0.119, 'label': 'bird', 'box': {'xmin': 71, 'ymin': 170, 'xmax': 410, 'ymax': 508}}]] + ``` + + [Learn more about the basics of using a pipeline in the [pipeline tutorial]](../pipeline_tutorial) + This object detection pipeline can currently be loaded from [`pipeline`] using the following task identifier: `"zero-shot-object-detection"`. @@ -87,6 +108,8 @@ class ZeroShotObjectDetectionPipeline(Pipeline): - **box** (`Dict[str,int]`) -- Bounding box of the detected object in image's original size. It is a dictionary with `x_min`, `x_max`, `y_min`, `y_max` keys. """ + if "candidate_labels" in kwargs: + text_queries = kwargs.pop("candidate_labels") if isinstance(text_queries, str) or (isinstance(text_queries, List) and not isinstance(text_queries[0], List)): if isinstance(images, (str, Image.Image)): inputs = {"images": images, "text_queries": text_queries}