Detr preprocessor fix (#19007)

* fix in-place preprocessing of inputs
2022-09-23 18:49:31 +03:00 · 2022-09-23 18:49:31 +03:00 · fe01ec343b
parent 7e84723fe4
commit fe01ec343b
3 changed files with 24 additions and 0 deletions
--- a/src/transformers/models/conditional_detr/feature_extraction_conditional_detr.py
+++ b/src/transformers/models/conditional_detr/feature_extraction_conditional_detr.py
@ -555,6 +555,12 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
            if annotations is not None:
                annotations = [annotations]

+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
+
        # prepare (COCO annotations as a list of Dict -> ConditionalDETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@ -587,6 +593,8 @@ class ConditionalDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtrac
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]

        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask
--- a/src/transformers/models/deformable_detr/feature_extraction_deformable_detr.py
+++ b/src/transformers/models/deformable_detr/feature_extraction_deformable_detr.py
@ -555,6 +555,12 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
            if annotations is not None:
                annotations = [annotations]

+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
+
        # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@ -587,6 +593,8 @@ class DeformableDetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtract
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]

        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask
--- a/src/transformers/models/detr/feature_extraction_detr.py
+++ b/src/transformers/models/detr/feature_extraction_detr.py
@ -547,6 +547,12 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
            if annotations is not None:
                annotations = [annotations]

+        # Create deep copies to avoid editing inputs in place
+        images = [image for image in images]
+
+        if annotations is not None:
+            annotations = [annotation for annotation in annotations]
+
        # prepare (COCO annotations as a list of Dict -> DETR target as a single Dict per image)
        if annotations is not None:
            for idx, (image, target) in enumerate(zip(images, annotations)):
@ -579,6 +585,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
                images = [
                    self._normalize(image=image, mean=self.image_mean, std=self.image_std)[0] for image in images
                ]
+        else:
+            images = [np.array(image) for image in images]

        if pad_and_return_pixel_mask:
            # pad images up to largest image in batch and create pixel_mask