Fix YOLOS image processor resizing (#30436)

* Add test for square image that fails

* Fix for square images

* Extend test cases

* Fix resizing in tests

* Style fixup
This commit is contained in:
Pavel Iakubovskii 2024-04-24 09:50:17 +01:00 committed by GitHub
parent 89c510d842
commit 767e351840
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 7 deletions

View File

@ -120,7 +120,7 @@ def get_size_with_aspect_ratio(image_size, size, max_size=None) -> Tuple[int, in
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if width < height and width != size:
if width <= height and width != size:
height = int(size * height / width)
width = size
elif height < width and height != size:

View File

@ -18,6 +18,8 @@ import json
import pathlib
import unittest
from parameterized import parameterized
from transformers.testing_utils import require_torch, require_vision, slow
from transformers.utils import is_torch_available, is_vision_available
@ -98,7 +100,7 @@ class YolosImageProcessingTester(unittest.TestCase):
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if width < height and width != size:
if width <= height and width != size:
height = int(size * height / width)
width = size
elif height < width and height != size:
@ -183,17 +185,32 @@ class YolosImageProcessingTest(AnnotationFormatTestMixin, ImageProcessingTestMix
torch.allclose(encoded_images_with_method["pixel_values"], encoded_images["pixel_values"], atol=1e-4)
)
def test_resize_max_size_respected(self):
@parameterized.expand(
[
((3, 100, 1500), 1333, 800),
((3, 400, 400), 1333, 800),
((3, 1500, 1500), 1333, 800),
((3, 800, 1333), 1333, 800),
((3, 1333, 800), 1333, 800),
((3, 800, 800), 400, 400),
]
)
def test_resize_max_size_respected(self, image_size, longest_edge, shortest_edge):
image_processor = self.image_processing_class(**self.image_processor_dict)
# create torch tensors as image
image = torch.randint(0, 256, (3, 100, 1500), dtype=torch.uint8)
image = torch.randint(0, 256, image_size, dtype=torch.uint8)
processed_image = image_processor(
image, size={"longest_edge": 1333, "shortest_edge": 800}, do_pad=False, return_tensors="pt"
image,
size={"longest_edge": longest_edge, "shortest_edge": shortest_edge},
do_pad=False,
return_tensors="pt",
)["pixel_values"]
self.assertTrue(processed_image.shape[-1] <= 1333)
self.assertTrue(processed_image.shape[-2] <= 800)
shape = list(processed_image.shape[-2:])
max_size, min_size = max(shape), min(shape)
self.assertTrue(max_size <= 1333, f"Expected max_size <= 1333, got image shape {shape}")
self.assertTrue(min_size <= 800, f"Expected min_size <= 800, got image shape {shape}")
@slow
def test_call_pytorch_with_coco_detection_annotations(self):