parent
54d0b1c278
commit
67b1335cb9
|
@ -146,7 +146,7 @@ As a summary, consider the following table:
|
|||
| **Model** | [`~transformers.DetrForObjectDetection`] | [`~transformers.DetrForSegmentation`] | [`~transformers.DetrForSegmentation`] |
|
||||
| **Example dataset** | COCO detection | COCO detection, COCO panoptic | COCO panoptic | |
|
||||
| **Format of annotations to provide to** [`~transformers.DetrImageProcessor`] | {'image_id': `int`, 'annotations': `List[Dict]`} each Dict being a COCO object annotation | {'image_id': `int`, 'annotations': `List[Dict]`} (in case of COCO detection) or {'file_name': `str`, 'image_id': `int`, 'segments_info': `List[Dict]`} (in case of COCO panoptic) | {'file_name': `str`, 'image_id': `int`, 'segments_info': `List[Dict]`} and masks_path (path to directory containing PNG files of the masks) |
|
||||
| **Postprocessing** (i.e. converting the output of the model to COCO API) | [`~transformers.DetrImageProcessor.post_process`] | [`~transformers.DetrImageProcessor.post_process_segmentation`] | [`~transformers.DetrImageProcessor.post_process_segmentation`], [`~transformers.DetrImageProcessor.post_process_panoptic`] |
|
||||
| **Postprocessing** (i.e. converting the output of the model to Pascal VOC format) | [`~transformers.DetrImageProcessor.post_process`] | [`~transformers.DetrImageProcessor.post_process_segmentation`] | [`~transformers.DetrImageProcessor.post_process_segmentation`], [`~transformers.DetrImageProcessor.post_process_panoptic`] |
|
||||
| **evaluators** | `CocoEvaluator` with `iou_types="bbox"` | `CocoEvaluator` with `iou_types="bbox"` or `"segm"` | `CocoEvaluator` with `iou_tupes="bbox"` or `"segm"`, `PanopticEvaluator` |
|
||||
|
||||
In short, one should prepare the data either in COCO detection or COCO panoptic format, then use
|
||||
|
|
|
@ -56,7 +56,7 @@ OWLv2 is, just like its predecessor [OWL-ViT](owlvit), a zero-shot text-conditio
|
|||
|
||||
>>> # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
|
||||
>>> target_sizes = torch.Tensor([image.size[::-1]])
|
||||
>>> # Convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
|
||||
>>> results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
|
||||
>>> i = 0 # Retrieve predictions for the first image for the corresponding text queries
|
||||
>>> text = texts[i]
|
||||
|
|
|
@ -55,7 +55,7 @@ OWL-ViT is a zero-shot text-conditioned object detection model. OWL-ViT uses [CL
|
|||
|
||||
>>> # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
|
||||
>>> target_sizes = torch.Tensor([image.size[::-1]])
|
||||
>>> # Convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
|
||||
>>> i = 0 # Retrieve predictions for the first image for the corresponding text queries
|
||||
>>> text = texts[i]
|
||||
|
|
|
@ -512,7 +512,7 @@ Finally, load the metrics and run the evaluation.
|
|||
... outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
|
||||
|
||||
... orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to COCO api
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
|
||||
... module.add(prediction=results, reference=labels)
|
||||
... del batch
|
||||
|
|
|
@ -518,7 +518,7 @@ DETR モデルをトレーニングできる「ラベル」。画像プロセッ
|
|||
... outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
|
||||
|
||||
... orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to COCO api
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
|
||||
... module.add(prediction=results, reference=labels)
|
||||
... del batch
|
||||
|
|
|
@ -504,7 +504,7 @@ COCO 데이터 세트를 빌드하는 API는 데이터를 특정 형식으로
|
|||
... outputs = model(pixel_values=pixel_values, pixel_mask=pixel_mask)
|
||||
|
||||
... orig_target_sizes = torch.stack([target["orig_size"] for target in labels], dim=0)
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to COCO api
|
||||
... results = im_processor.post_process(outputs, orig_target_sizes) # convert outputs of model to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
|
||||
... module.add(prediction=results, reference=labels)
|
||||
... del batch
|
||||
|
|
|
@ -1330,8 +1330,8 @@ class ConditionalDetrImageProcessor(BaseImageProcessor):
|
|||
# POSTPROCESSING METHODS - TODO: add support for other frameworks
|
||||
def post_process(self, outputs, target_sizes):
|
||||
"""
|
||||
Converts the output of [`ConditionalDetrForObjectDetection`] into the format expected by the COCO api. Only
|
||||
supports PyTorch.
|
||||
Converts the output of [`ConditionalDetrForObjectDetection`] into the format expected by the Pascal VOC format (xmin, ymin, xmax, ymax).
|
||||
Only supports PyTorch.
|
||||
|
||||
Args:
|
||||
outputs ([`ConditionalDetrObjectDetectionOutput`]):
|
||||
|
|
|
@ -1805,7 +1805,7 @@ class ConditionalDetrForObjectDetection(ConditionalDetrPreTrainedModel):
|
|||
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
|
@ -1900,7 +1900,7 @@ class DeformableDetrForObjectDetection(DeformableDetrPreTrainedModel):
|
|||
>>> inputs = image_processor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
|
@ -1851,7 +1851,7 @@ class DetaForObjectDetection(DetaPreTrainedModel):
|
|||
>>> inputs = image_processor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.5, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
|
@ -1535,7 +1535,7 @@ class DetrForObjectDetection(DetrPreTrainedModel):
|
|||
>>> inputs = image_processor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
|
@ -1576,7 +1576,7 @@ class Owlv2ForObjectDetection(Owlv2PreTrainedModel):
|
|||
|
||||
>>> target_sizes = torch.Tensor([unnormalized_image.size[::-1]])
|
||||
|
||||
>>> # Convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> results = processor.post_process_image_guided_detection(
|
||||
... outputs=outputs, threshold=0.9, nms_threshold=0.3, target_sizes=target_sizes
|
||||
... )
|
||||
|
|
|
@ -1517,7 +1517,7 @@ class OwlViTForObjectDetection(OwlViTPreTrainedModel):
|
|||
... outputs = model.image_guided_detection(**inputs)
|
||||
>>> # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
|
||||
>>> target_sizes = torch.Tensor([image.size[::-1]])
|
||||
>>> # Convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # Convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> results = processor.post_process_image_guided_detection(
|
||||
... outputs=outputs, threshold=0.6, nms_threshold=0.3, target_sizes=target_sizes
|
||||
... )
|
||||
|
|
|
@ -1431,7 +1431,7 @@ class TableTransformerForObjectDetection(TableTransformerPreTrainedModel):
|
|||
>>> inputs = image_processor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
|
@ -756,7 +756,7 @@ class YolosForObjectDetection(YolosPreTrainedModel):
|
|||
>>> inputs = image_processor(images=image, return_tensors="pt")
|
||||
>>> outputs = model(**inputs)
|
||||
|
||||
>>> # convert outputs (bounding boxes and class logits) to COCO API
|
||||
>>> # convert outputs (bounding boxes and class logits) to Pascal VOC format (xmin, ymin, xmax, ymax)
|
||||
>>> target_sizes = torch.tensor([image.size[::-1]])
|
||||
>>> results = image_processor.post_process_object_detection(outputs, threshold=0.9, target_sizes=target_sizes)[
|
||||
... 0
|
||||
|
|
Loading…
Reference in New Issue