tensorlayer3/tensorlayer/vision/paddle_vision.py

609 lines
20 KiB
Python

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import paddle
from . import functional_cv2 as F_cv2
from . import functional_pil as F_pil
import sys
import math
import numbers
import warnings
import collections
import numpy as np
from PIL import Image
from numpy import sin, cos, tan
import paddle
import random
__all__ = [
'central_crop',
'to_tensor',
'crop',
'pad',
'resize',
'transpose',
'hwc_to_chw',
'chw_to_hwc',
'rgb_to_hsv',
'hsv_to_rgb',
'rgb_to_gray',
'adjust_brightness',
'adjust_contrast',
'adjust_hue',
'adjust_saturation',
'normalize',
'hflip',
'vflip',
'padtoboundingbox',
'standardize',
'random_brightness',
'random_contrast',
'random_saturation',
'random_hue',
'random_crop',
'random_resized_crop',
'random_vflip',
'random_hflip',
'random_rotation',
'random_shear',
'random_shift',
'random_zoom',
'random_affine',
]
def _is_pil_image(img):
return isinstance(img, Image.Image)
def _is_tensor_image(img):
return isinstance(img, paddle.Tensor)
def _is_numpy_image(img):
return isinstance(img, np.ndarray) and (img.ndim in {2, 3})
def to_tensor(img, data_format='HWC'):
return paddle.vision.functional.to_tensor(img, data_format=data_format)
def _get_image_size(img):
if _is_pil_image(img):
return img.size[::-1]
elif _is_numpy_image(img):
return img.shape[:2]
else:
raise TypeError("Unexpected type {}".format(type(img)))
def random_factor(factor, name, center=1, bound=(0, float('inf')), non_negative=True):
if isinstance(factor, numbers.Number):
if factor < 0:
raise ValueError('The input value of {} cannot be negative.'.format(name))
factor = [center - factor, center + factor]
if non_negative:
factor[0] = max(0, factor[0])
elif isinstance(factor, (tuple, list)) and len(factor) == 2:
if not bound[0] <= factor[0] <= factor[1] <= bound[1]:
raise ValueError(
"Please check your value range of {} is valid and "
"within the bound {}.".format(name, bound)
)
else:
raise TypeError("Input of {} should be either a single value, or a list/tuple of " "length 2.".format(name))
factor = np.random.uniform(factor[0], factor[1])
return factor
def central_crop(image, size=None, central_fraction=None):
if size is None and central_fraction is None:
raise ValueError('central_fraction and size can not be both None')
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.center_crop(image, size, central_fraction)
else:
return F_cv2.center_crop(image, size, central_fraction)
def crop(image, offset_height, offset_width, target_height, target_width):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.crop(image, offset_height, offset_width, target_height, target_width)
else:
return F_cv2.crop(image, offset_height, offset_width, target_height, target_width)
def pad(image, padding, padding_value, mode):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.pad(image, padding, padding_value, mode)
else:
return F_cv2.pad(image, padding, padding_value, mode)
def resize(image, size, method):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.resize(image, size, method)
else:
return F_cv2.resize(image, size, method)
def transpose(image, order):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.transpose(image, order)
else:
return F_cv2.transpose(image, order)
def hwc_to_chw(image):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.hwc_to_chw(image)
else:
return F_cv2.hwc_to_chw(image)
def chw_to_hwc(image):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.chw_to_hwc(image)
else:
return F_cv2.chw_to_hwc(image)
def rgb_to_hsv(image):
if not (_is_pil_image(image) or isinstance(image, np.ndarray) and (image.ndim == 3)):
raise TypeError('image should be PIL Image or ndarray with dim=3. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.rgb_to_hsv(image)
else:
return F_cv2.rgb_to_hsv(image)
def hsv_to_rgb(image):
if not (_is_pil_image(image) or isinstance(image, np.ndarray) and (image.ndim == 3)):
raise TypeError('image should be PIL Image or ndarray with dim=3. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.hsv_to_rgb(image)
else:
return F_cv2.hsv_to_rgb(image)
def rgb_to_gray(image, num_output_channels):
if not (_is_pil_image(image) or isinstance(image, np.ndarray) and (image.ndim == 3)):
raise TypeError('image should be PIL Image or ndarray with dim=3. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.rgb_to_gray(image, num_output_channels)
else:
return F_cv2.rgb_to_gray(image, num_output_channels)
def adjust_brightness(image, brightness_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.adjust_brightness(image, brightness_factor)
else:
return F_cv2.adjust_brightness(image, brightness_factor)
def adjust_contrast(image, contrast_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.adjust_contrast(image, contrast_factor)
else:
return F_cv2.adjust_contrast(image, contrast_factor)
def adjust_hue(image, hue_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.adjust_hue(image, hue_factor)
else:
return F_cv2.adjust_hue(image, hue_factor)
def adjust_saturation(image, saturation_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.adjust_saturation(image, saturation_factor)
else:
return F_cv2.adjust_saturation(image, saturation_factor)
def hflip(image):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.hflip(image)
else:
return F_cv2.hflip(image)
def vflip(image):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.vflip(image)
else:
return F_cv2.vflip(image)
def padtoboundingbox(image, offset_height, offset_width, target_height, target_width, padding_value):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.padtoboundingbox(image, offset_height, offset_width, target_height, target_width, padding_value)
else:
return F_cv2.padtoboundingbox(image, offset_height, offset_width, target_height, target_width, padding_value)
def normalize(image, mean, std, data_format):
if not _is_tensor_image(image):
if _is_pil_image(image):
image = np.asarray(image)
image = paddle.to_tensor(image)
image = image.astype('float32')
if data_format == 'CHW':
num_channels = image.shape[0]
elif data_format == 'HWC':
num_channels = image.shape[2]
if isinstance(mean, numbers.Number):
mean = (mean, ) * num_channels
elif isinstance(mean, (list, tuple)):
if len(mean) != num_channels:
raise ValueError("Length of mean must be 1 or equal to the number of channels({0}).".format(num_channels))
if isinstance(std, numbers.Number):
std = (std, ) * num_channels
elif isinstance(std, (list, tuple)):
if len(std) != num_channels:
raise ValueError("Length of std must be 1 or equal to the number of channels({0}).".format(num_channels))
if data_format == 'CHW':
std = np.array(std).reshape((-1, 1, 1))
mean = np.array(mean).reshape((-1, 1, 1))
elif data_format == 'HWC':
mean = np.array(mean)
std = np.array(std)
mean = paddle.to_tensor(mean).astype('float32')
std = paddle.to_tensor(std).astype('float32')
return (image - mean) / std
def standardize(image):
'''
Reference to tf.image.per_image_standardization().
Linearly scales each image in image to have mean 0 and variance 1.
'''
if not _is_tensor_image(image):
if _is_pil_image(image):
image = np.asarray(image)
image = paddle.to_tensor(image)
image = image.astype('float32')
num_pixels = paddle.to_tensor(image.size, dtype='float32')
image_mean = paddle.mean(image)
stddev = paddle.std(image)
min_stddev = 1.0 / paddle.sqrt(num_pixels)
adjusted_stddev = paddle.maximum(stddev, min_stddev)
return (image - image_mean) / adjusted_stddev
def random_brightness(image, brightness_factor):
'''
Perform a random brightness on the input image.
Parameters
----------
image:
Input images to adjust random brightness
brightness_factor:
Brightness adjustment factor (default=(1, 1)). Cannot be negative.
If it is a float, the factor is uniformly chosen from the range [max(0, 1-brightness), 1+brightness].
If it is a sequence, it should be [min, max] for the range.
Returns:
Adjusted image.
-------
'''
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
brightness_factor = random_factor(brightness_factor, name='brightness')
if _is_pil_image(image):
return F_pil.adjust_brightness(image, brightness_factor)
else:
return F_cv2.adjust_brightness(image, brightness_factor)
def random_contrast(image, contrast_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
contrast_factor = random_factor(contrast_factor, name='contrast')
if _is_pil_image(image):
return F_pil.adjust_contrast(image, contrast_factor)
else:
return F_cv2.adjust_contrast(image, contrast_factor)
def random_saturation(image, saturation_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
saturation_factor = random_factor(saturation_factor, name='saturation')
if _is_pil_image(image):
return F_pil.adjust_saturation(image, saturation_factor)
else:
return F_cv2.adjust_saturation(image, saturation_factor)
def random_hue(image, hue_factor):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
hue_factor = random_factor(hue_factor, name='hue', center=0, bound=(-0.5, 0.5), non_negative=False)
if _is_pil_image(image):
return F_pil.adjust_hue(image, hue_factor)
else:
return F_cv2.adjust_hue(image, hue_factor)
def random_crop(image, size, padding, pad_if_needed, fill, padding_mode):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if isinstance(size, int):
size = (size, size)
elif isinstance(size, (tuple, list)) and len(size) == 2:
size = size
else:
raise ValueError('Size should be a int or a list/tuple with length of 2. ' 'But got {}'.format(size))
if padding is not None:
image = pad(image, padding, fill, padding_mode)
h, w = _get_image_size(image)
# pad the width if needed
if pad_if_needed and w < size[1]:
image = pad(image, (size[1] - w, 0), fill, padding_mode)
# pad the height if needed
if pad_if_needed and h < size[0]:
image = pad(image, (0, size[0] - h), fill, padding_mode)
h, w = _get_image_size(image)
target_height, target_width = size
if h < target_height or w < target_width:
raise ValueError(
'Crop size {} should be smaller than input image size {}. '.format((target_height, target_width), (h, w))
)
offset_height = random.randint(0, h - target_height)
offset_width = random.randint(0, w - target_width)
return crop(image, offset_height, offset_width, target_height, target_width)
def random_resized_crop(image, size, scale, ratio, interpolation):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if isinstance(size, int):
size = (size, size)
elif isinstance(size, (list, tuple)) and len(size) == 2:
size = size
else:
raise TypeError('Size should be a int or a list/tuple with length of 2.' 'But got {}.'.format(size))
if not (isinstance(scale, (list, tuple)) and len(scale) == 2):
raise TypeError('Scale should be a list/tuple with length of 2.' 'But got {}.'.format(scale))
if not (isinstance(ratio, (list, tuple)) and len(ratio) == 2):
raise TypeError('Scale should be a list/tuple with length of 2.' 'But got {}.'.format(ratio))
if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
raise ValueError("Scale and ratio should be of kind (min, max)")
def _get_param(image, scale, ratio):
height, width = _get_image_size(image)
area = height * width
log_ratio = tuple(math.log(x) for x in ratio)
for _ in range(10):
target_area = np.random.uniform(*scale) * area
aspect_ratio = math.exp(np.random.uniform(*log_ratio))
w = int(round(math.sqrt(target_area * aspect_ratio)))
h = int(round(math.sqrt(target_area / aspect_ratio)))
if 0 < w <= width and 0 < h <= height:
i = random.randint(0, height - h)
j = random.randint(0, width - w)
return i, j, h, w
# Fallback to central crop
in_ratio = float(width) / float(height)
if in_ratio < min(ratio):
w = width
h = int(round(w / min(ratio)))
elif in_ratio > max(ratio):
h = height
w = int(round(h * max(ratio)))
else:
# return whole image
w = width
h = height
i = (height - h) // 2
j = (width - w) // 2
return i, j, h, w
offset_height, offset_width, target_height, target_width = _get_param(image, scale, ratio)
image = crop(image, offset_height, offset_width, target_height, target_width)
image = resize(image, size, interpolation)
return image
def random_vflip(image, prob):
if random.random() < prob:
return vflip(image)
return image
def random_hflip(image, prob):
if random.random() < prob:
return hflip(image)
return image
def random_rotation(image, degrees, interpolation, expand, center, fill):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if isinstance(degrees, numbers.Number):
if degrees < 0:
raise ValueError('If degrees is a single number, it must be positive.' 'But got {}'.format(degrees))
degrees = (-degrees, degrees)
elif not (isinstance(degrees, (list, tuple)) and len(degrees) == 2):
raise ValueError('If degrees is a list/tuple, it must be length of 2.' 'But got {}'.format(degrees))
else:
if degrees[0] > degrees[1]:
raise ValueError('if degrees is a list/tuple, it should be (min, max).')
angle = np.random.uniform(degrees[0], degrees[1])
if _is_pil_image(image):
return F_pil.rotate(image, angle, interpolation, expand, center, fill)
else:
return F_cv2.rotate(image, angle, interpolation, expand, center, fill)
def random_shear(image, degrees, interpolation, fill):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if isinstance(degrees, numbers.Number):
degrees = (-degrees, degrees, 0, 0)
elif isinstance(degrees, (list, tuple)) and (len(degrees) == 2 or len(degrees) == 4):
if len(degrees) == 2:
degrees = (degrees[0], degrees[1], 0, 0)
else:
raise ValueError(
'degrees should be a single number or a list/tuple with length in (2 ,4).'
'But got {}'.format(degrees)
)
if _is_pil_image(image):
return F_pil.random_shear(image, degrees, interpolation, fill)
else:
return F_cv2.random_shear(image, degrees, interpolation, fill)
def random_shift(image, shift, interpolation, fill):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if not (isinstance(shift, (tuple, list)) and len(shift) == 2):
raise ValueError('Shift should be a list/tuple with length of 2.' 'But got {}'.format(shift))
if _is_pil_image(image):
return F_pil.random_shift(image, shift, interpolation, fill)
else:
return F_cv2.random_shift(image, shift, interpolation, fill)
def random_zoom(image, zoom, interpolation, fill):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if not (isinstance(zoom, (tuple, list)) and len(zoom) == 2):
raise ValueError('Zoom should be a list/tuple with length of 2.' 'But got {}'.format(zoom))
if not (0 <= zoom[0] <= zoom[1]):
raise ValueError('Zoom values should be positive, and zoom[1] should be greater than zoom[0].')
if _is_pil_image(image):
return F_pil.random_zoom(image, zoom, interpolation, fill)
else:
return F_cv2.random_zoom(image, zoom, interpolation, fill)
def random_affine(image, degrees, shift, zoom, shear, interpolation, fill):
if not (_is_pil_image(image) or _is_numpy_image(image)):
raise TypeError('image should be PIL Image or ndarray with dim=[2 or 3]. Got {}'.format(type(image)))
if _is_pil_image(image):
return F_pil.random_affine(image, degrees, shift, zoom, shear, interpolation, fill)
else:
return F_cv2.random_affine(image, degrees, shift, zoom, shear, interpolation, fill)