forked from TensorLayer/tensorlayer3
2256 lines
77 KiB
Python
2256 lines
77 KiB
Python
#! /usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import tensorflow as tf
|
|
from tensorflow.python.framework import ops
|
|
from tensorflow.python.ops import math_ops
|
|
from tensorflow.python.training import moving_averages
|
|
from math import floor, ceil
|
|
import numpy as np
|
|
# loss function
|
|
sparse_softmax_cross_entropy_with_logits = tf.nn.sparse_softmax_cross_entropy_with_logits
|
|
sigmoid_cross_entropy_with_logits = tf.nn.sigmoid_cross_entropy_with_logits
|
|
|
|
|
|
def padding_format(padding):
|
|
"""
|
|
Checks that the padding format correspond format.
|
|
|
|
Parameters
|
|
----------
|
|
padding : str
|
|
Must be one of the following:"same", "SAME", "VALID", "valid"
|
|
|
|
Returns
|
|
-------
|
|
str "SAME" or "VALID"
|
|
"""
|
|
|
|
if padding in ["SAME", "same"]:
|
|
padding = "SAME"
|
|
elif padding in ["VALID", "valid"]:
|
|
padding = "VALID"
|
|
elif padding == None:
|
|
padding = None
|
|
else:
|
|
raise Exception("Unsupported padding: " + str(padding))
|
|
return padding
|
|
|
|
|
|
def preprocess_1d_format(data_format, padding):
|
|
"""
|
|
Checks that the 1-D dataformat format correspond format.
|
|
|
|
Parameters
|
|
----------
|
|
data_format : str
|
|
Must be one of the following:"channels_last","NWC","NCW","channels_first"
|
|
padding : str
|
|
Must be one of the following:"same","valid","SAME","VALID"
|
|
|
|
Returns
|
|
-------
|
|
str "NWC" or "NCW" and "SAME" or "VALID"
|
|
"""
|
|
if data_format in ["channels_last", "NWC"]:
|
|
data_format = "NWC"
|
|
elif data_format in ["channels_first", "NCW"]:
|
|
data_format = "NCW"
|
|
elif data_format == None:
|
|
data_format = None
|
|
else:
|
|
raise Exception("Unsupported data format: " + str(data_format))
|
|
padding = padding_format(padding)
|
|
return data_format, padding
|
|
|
|
|
|
def preprocess_2d_format(data_format, padding):
|
|
"""
|
|
Checks that the 2-D dataformat format correspond format.
|
|
|
|
Parameters
|
|
----------
|
|
data_format : str
|
|
Must be one of the following:"channels_last","NHWC","NCHW","channels_first"
|
|
padding : str
|
|
Must be one of the following:"same","valid","SAME","VALID"
|
|
|
|
Returns
|
|
-------
|
|
str "NHWC" or "NCHW" and "SAME" or "VALID"
|
|
"""
|
|
|
|
if data_format in ["channels_last", "NHWC"]:
|
|
data_format = "NHWC"
|
|
elif data_format in ["channels_first", "NCHW"]:
|
|
data_format = "NCHW"
|
|
elif data_format == None:
|
|
data_format = None
|
|
else:
|
|
raise Exception("Unsupported data format: " + str(data_format))
|
|
padding = padding_format(padding)
|
|
return data_format, padding
|
|
|
|
|
|
def preprocess_3d_format(data_format, padding):
|
|
"""
|
|
Checks that the 3-D dataformat format correspond format.
|
|
|
|
Parameters
|
|
----------
|
|
data_format : str
|
|
Must be one of the following:"channels_last","NDHWC","NCDHW","channels_first"
|
|
padding : str
|
|
Must be one of the following:"same","valid","SAME","VALID"
|
|
|
|
Returns
|
|
-------
|
|
str "NDHWC" or "NCDHW" and "SAME" or "VALID"
|
|
"""
|
|
|
|
if data_format in ['channels_last', 'NDHWC']:
|
|
data_format = 'NDHWC'
|
|
elif data_format in ['channels_first', 'NCDHW']:
|
|
data_format = 'NCDHW'
|
|
elif data_format == None:
|
|
data_format = None
|
|
else:
|
|
raise Exception("Unsupported data format: " + str(data_format))
|
|
padding = padding_format(padding)
|
|
return data_format, padding
|
|
|
|
|
|
def nchw_to_nhwc(x):
|
|
"""
|
|
Channels first to channels last
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
channels first tensor data
|
|
|
|
Returns
|
|
-------
|
|
channels last tensor data
|
|
"""
|
|
|
|
if len(x.shape) == 3:
|
|
x = tf.transpose(x, (0, 2, 1))
|
|
elif len(x.shape) == 4:
|
|
x = tf.transpose(x, (0, 2, 3, 1))
|
|
elif len(x.shape) == 5:
|
|
x = tf.transpose(x, (0, 2, 3, 4, 1))
|
|
else:
|
|
raise Exception("Unsupported dimensions")
|
|
return x
|
|
|
|
|
|
def nhwc_to_nchw(x):
|
|
"""
|
|
Channles last to channels first
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
channels last tensor data
|
|
|
|
Returns
|
|
-------
|
|
channels first tensor data
|
|
"""
|
|
|
|
if len(x.shape) == 3:
|
|
x = tf.transpose(x, (0, 2, 1))
|
|
elif len(x.shape) == 4:
|
|
x = tf.transpose(x, (0, 3, 1, 2))
|
|
elif len(x.shape) == 5:
|
|
x = tf.transpose(x, (0, 4, 1, 2, 3))
|
|
else:
|
|
raise Exception("Unsupported dimensions")
|
|
return x
|
|
|
|
|
|
class ReLU(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.relu(x)
|
|
|
|
|
|
def relu(x):
|
|
"""
|
|
Computes rectified linear: max(features, 0).
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
Must be one of the following types: float32, float64, int32, uint8, int16,
|
|
int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as features.
|
|
"""
|
|
|
|
return tf.nn.relu(x)
|
|
|
|
|
|
class ReLU6(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.relu6(x)
|
|
|
|
|
|
def relu6(x):
|
|
"""
|
|
Computes Rectified Linear 6: min(max(features, 0), 6).
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
Must be one of the following types: float32, float64, int32, uint8, int16,
|
|
int8, int64, bfloat16, uint16, half, uint32, uint64, qint8.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as features.
|
|
"""
|
|
|
|
return tf.nn.relu6(x)
|
|
|
|
|
|
class LeakyReLU(object):
|
|
|
|
def __init__(self, alpha=0.2):
|
|
self.alpha = alpha
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.leaky_relu(x, alpha=self.alpha)
|
|
|
|
|
|
def leaky_relu(x, alpha=0.2):
|
|
"""
|
|
Compute the Leaky ReLU activation function.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
representing preactivation values. Must be one of the following types:
|
|
float16, float32, float64, int32, int64.
|
|
|
|
Returns
|
|
-------
|
|
The activation value.
|
|
"""
|
|
|
|
return tf.nn.leaky_relu(x, alpha=alpha)
|
|
|
|
|
|
class Softplus(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.softplus(x)
|
|
|
|
|
|
def softplus(x):
|
|
"""
|
|
Computes softplus: log(exp(features) + 1).
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
Must be one of the following types: half, bfloat16, float32, float64.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as features.
|
|
"""
|
|
|
|
return tf.nn.softplus(x)
|
|
|
|
|
|
class Tanh(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.tanh(x)
|
|
|
|
|
|
def tanh(x):
|
|
"""
|
|
Computes hyperbolic tangent of x element-wise.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
Must be one of the following types: bfloat16, half, float32, float64, complex64, complex128.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as x.
|
|
"""
|
|
|
|
return tf.nn.tanh(x)
|
|
|
|
|
|
class Sigmoid(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.sigmoid(x)
|
|
|
|
|
|
def sigmoid(x):
|
|
"""
|
|
Computes sigmoid of x element-wise.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
A Tensor with type float16, float32, float64, complex64, or complex128.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as x.
|
|
"""
|
|
|
|
return tf.nn.sigmoid(x)
|
|
|
|
|
|
class Softmax(object):
|
|
|
|
def __init__(self):
|
|
pass
|
|
|
|
def __call__(self, x):
|
|
return tf.nn.softmax(x)
|
|
|
|
|
|
def softmax(logits, axis=None):
|
|
"""
|
|
Computes softmax activations.
|
|
|
|
Parameters
|
|
----------
|
|
logits : tensor
|
|
Must be one of the following types: half, float32, float64.
|
|
axis : int
|
|
The dimension softmax would be performed on. The default is -1 which indicates the last dimension.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type and shape as logits.
|
|
"""
|
|
|
|
return tf.nn.softmax(logits, axis)
|
|
|
|
|
|
class Dropout(object):
|
|
|
|
def __init__(self, keep, seed=0):
|
|
self.keep = keep
|
|
self.seed = seed
|
|
|
|
def __call__(self, inputs, *args, **kwargs):
|
|
outputs = tf.nn.dropout(inputs, rate=1 - (self.keep), seed=self.seed)
|
|
return outputs
|
|
|
|
|
|
class BiasAdd(object):
|
|
"""
|
|
Adds bias to value.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
|
|
bias : tensor
|
|
Must be the same type as value unless value is a quantized type,
|
|
in which case a different quantized type may be used.
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as value.
|
|
"""
|
|
|
|
def __init__(self, data_format=None):
|
|
self.data_format = data_format
|
|
|
|
def __call__(self, x, bias):
|
|
return tf.nn.bias_add(x, bias, data_format=self.data_format)
|
|
|
|
|
|
def bias_add(x, bias, data_format=None, name=None):
|
|
"""
|
|
Adds bias to value.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
A Tensor with type float, double, int64, int32, uint8, int16, int8, complex64, or complex128.
|
|
bias : tensor
|
|
Must be the same type as value unless value is a quantized type,
|
|
in which case a different quantized type may be used.
|
|
data_format : A string.
|
|
'N...C' and 'NC...' are supported.
|
|
name : str
|
|
A name for the operation (optional).
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as value.
|
|
"""
|
|
|
|
x = tf.nn.bias_add(x, bias, data_format=data_format, name=name)
|
|
return x
|
|
|
|
|
|
class Conv1D(object):
|
|
|
|
def __init__(self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None):
|
|
self.stride = stride
|
|
self.dilations = dilations
|
|
self.data_format, self.padding = preprocess_1d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
outputs = tf.nn.conv1d(
|
|
input=input,
|
|
filters=filters,
|
|
stride=self.stride,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
# name=name
|
|
)
|
|
return outputs
|
|
|
|
|
|
def conv1d(input, filters, stride, padding, data_format='NWC', dilations=None):
|
|
"""
|
|
Computes a 1-D convolution given 3-D input and filter tensors.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 3D Tensor. Must be of type float16, float32, or float64
|
|
filters : tensor
|
|
A 3D Tensor. Must have the same type as input.
|
|
stride : int of list
|
|
An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
|
|
padding : string
|
|
'SAME' or 'VALID'
|
|
data_format : string
|
|
An optional string from "NWC", "NCW". Defaults to "NWC", the data is stored in the order of
|
|
[batch, in_width, in_channels]. The "NCW" format stores data as [batch, in_channels, in_width].
|
|
dilations : int or list
|
|
An int or list of ints that has length 1 or 3 which defaults to 1.
|
|
The dilation factor for each dimension of input. If set to k > 1,
|
|
there will be k-1 skipped cells between each filter element on that dimension.
|
|
Dilations in the batch and depth dimensions must be 1.
|
|
name : string
|
|
A name for the operation (optional).
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as input.
|
|
"""
|
|
|
|
data_format, padding = preprocess_1d_format(data_format, padding)
|
|
outputs = tf.nn.conv1d(
|
|
input=input,
|
|
filters=filters,
|
|
stride=stride,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
# name=name
|
|
)
|
|
return outputs
|
|
|
|
|
|
class Conv2D(object):
|
|
|
|
def __init__(self, strides, padding, data_format='NHWC', dilations=None, out_channel=None, k_size=None):
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
outputs = tf.nn.conv2d(
|
|
input=input,
|
|
filters=filters,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def conv2d(input, filters, strides, padding, data_format='NHWC', dilations=None):
|
|
"""
|
|
Computes a 2-D convolution given 4-D input and filters tensors.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
Must be one of the following types: half, bfloat16, float32, float64. A 4-D tensor.
|
|
The dimension order is interpreted according to the value of data_format, see below for details.
|
|
filters : tensor
|
|
Must have the same type as input. A 4-D tensor of shape [filter_height, filter_width, in_channels, out_channels]
|
|
strides : int of list
|
|
The stride of the sliding window for each dimension of input. If a single value is given it is replicated in the H and W dimension.
|
|
By default the N and C dimensions are set to 1. The dimension order is determined by the value of data_format, see below for details.
|
|
padding : string
|
|
"SAME" or "VALID"
|
|
data_format : string
|
|
"NHWC", "NCHW". Defaults to "NHWC".
|
|
dilations : list or ints
|
|
list of ints that has length 1, 2 or 4, defaults to 1. The dilation factor for each dimension ofinput.
|
|
name : string
|
|
A name for the operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as input.
|
|
"""
|
|
|
|
data_format, padding = preprocess_2d_format(data_format, padding)
|
|
outputs = tf.nn.conv2d(
|
|
input=input,
|
|
filters=filters,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class Conv3D(object):
|
|
|
|
def __init__(self, strides, padding, data_format='NDHWC', dilations=None, out_channel=None, k_size=None):
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.data_format, self.padding = preprocess_3d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
outputs = tf.nn.conv3d(
|
|
input=input,
|
|
filters=filters,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def conv3d(input, filters, strides, padding, data_format='NDHWC', dilations=None):
|
|
"""
|
|
Computes a 3-D convolution given 5-D input and filters tensors.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
Must be one of the following types: half, bfloat16, float32, float64.
|
|
Shape [batch, in_depth, in_height, in_width, in_channels].
|
|
filters : tensor
|
|
Must have the same type as input. Shape [filter_depth, filter_height, filter_width, in_channels, out_channels].
|
|
in_channels must match between input and filters.
|
|
strides : list of ints
|
|
A list of ints that has length >= 5. 1-D tensor of length 5.
|
|
The stride of the sliding window for each dimension of input.
|
|
Must have strides[0] = strides[4] = 1.
|
|
padding : string
|
|
A string from: "SAME", "VALID". The type of padding algorithm to use.
|
|
data_format : string
|
|
An optional string from: "NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
|
|
With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
|
|
Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
|
|
dilations : list of ints
|
|
Defaults to [1, 1, 1, 1, 1]. 1-D tensor of length 5. The dilation factor for each dimension of input.
|
|
If set to k > 1, there will be k-1 skipped cells between each filter element on that dimension.
|
|
The dimension order is determined by the value of data_format, see above for details.
|
|
Dilations in the batch and depth dimensions must be 1.
|
|
name : string
|
|
A name for the operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as input.
|
|
"""
|
|
|
|
data_format, padding = preprocess_3d_format(data_format, padding)
|
|
outputs = tf.nn.conv3d(
|
|
input=input,
|
|
filters=filters,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format, # 'NDHWC',
|
|
dilations=dilations, # [1, 1, 1, 1, 1],
|
|
# name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def lrn(inputs, depth_radius, bias, alpha, beta):
|
|
"""
|
|
Local Response Normalization.
|
|
|
|
Parameters
|
|
----------
|
|
inputs : tensor
|
|
Must be one of the following types: half, bfloat16, float32. 4-D.
|
|
depth_radius : int
|
|
Defaults to 5. 0-D. Half-width of the 1-D normalization window.
|
|
bias : float
|
|
Defaults to 1. An offset (usually positive to avoid dividing by 0).
|
|
alpha : float
|
|
Defaults to 1. A scale factor, usually positive.
|
|
beta : float
|
|
Defaults to 0.5. An exponent.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor. Has the same type as input.
|
|
"""
|
|
|
|
outputs = tf.nn.lrn(inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta)
|
|
return outputs
|
|
|
|
|
|
def moments(x, axes, shift=None, keepdims=False):
|
|
"""
|
|
Calculates the mean and variance of x.
|
|
|
|
Parameters
|
|
----------
|
|
x : tensor
|
|
A Tensor
|
|
axes : list or ints
|
|
Axes along which to compute mean and variance.
|
|
shift : int
|
|
Not used in the current implementation.
|
|
keepdims : bool
|
|
produce moments with the same dimensionality as the input.
|
|
|
|
Returns
|
|
-------
|
|
Two Tensor objects: mean and variance.
|
|
"""
|
|
|
|
outputs = tf.nn.moments(x, axes, shift, keepdims)
|
|
return outputs
|
|
|
|
|
|
class MaxPool1d(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
|
|
def __call__(self, inputs):
|
|
outputs = tf.nn.max_pool(
|
|
input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
|
|
)
|
|
return outputs
|
|
|
|
|
|
class MaxPool(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
self.data_format = data_format
|
|
self.padding = padding
|
|
|
|
def __call__(self, inputs):
|
|
if inputs.ndim == 3:
|
|
self.data_format, self.padding = preprocess_1d_format(data_format=self.data_format, padding=self.padding)
|
|
elif inputs.ndim == 4:
|
|
self.data_format, self.padding = preprocess_2d_format(data_format=self.data_format, padding=self.padding)
|
|
elif inputs.ndim == 5:
|
|
self.data_format, self.padding = preprocess_3d_format(data_format=self.data_format, padding=self.padding)
|
|
|
|
outputs = tf.nn.max_pool(
|
|
input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
|
|
)
|
|
return outputs
|
|
|
|
|
|
def max_pool(input, ksize, strides, padding, data_format=None):
|
|
"""
|
|
Performs the max pooling on the input.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels] if data_format does not start
|
|
with "NC" (default), or [batch_size, num_channels] + input_spatial_shape if data_format starts with "NC".
|
|
Pooling happens over the spatial dimensions only.
|
|
ksize : int or list of ints
|
|
An int or list of ints that has length 1, N or N+2.
|
|
The size of the window for each dimension of the input tensor.
|
|
strides : int or list of ints
|
|
An int or list of ints that has length 1, N or N+2.
|
|
The stride of the sliding window for each dimension of the input tensor.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
name : string
|
|
A name for the operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A Tensor of format specified by data_format. The max pooled output tensor.
|
|
"""
|
|
|
|
if input.ndim == 3:
|
|
data_format, padding = preprocess_1d_format(data_format=data_format, padding=padding)
|
|
elif input.ndim == 4:
|
|
data_format, padding = preprocess_2d_format(data_format=data_format, padding=padding)
|
|
elif input.ndim == 5:
|
|
data_format, padding = preprocess_3d_format(data_format=data_format, padding=padding)
|
|
|
|
outputs = tf.nn.max_pool(input=input, ksize=ksize, strides=strides, padding=padding, data_format=data_format)
|
|
return outputs
|
|
|
|
|
|
class AvgPool1d(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.data_format, self.padding = preprocess_1d_format(data_format=data_format, padding=padding)
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
|
|
def __call__(self, inputs):
|
|
outputs = tf.nn.pool(
|
|
input=inputs,
|
|
window_shape=self.ksize,
|
|
pooling_type="AVG",
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class AvgPool(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
self.data_format = data_format
|
|
self.padding = padding_format(padding)
|
|
|
|
def __call__(self, inputs):
|
|
outputs = tf.nn.avg_pool(
|
|
input=inputs, ksize=self.ksize, strides=self.strides, padding=self.padding, data_format=self.data_format
|
|
)
|
|
return outputs
|
|
|
|
|
|
def avg_pool(input, ksize, strides, padding):
|
|
"""
|
|
Performs the avg pooling on the input.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
|
|
if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
|
|
if data_format starts with "NC". Pooling happens over the spatial dimensions only.
|
|
ksize : int or list of ints
|
|
An int or list of ints that has length 1, N or N+2.
|
|
The size of the window for each dimension of the input tensor.
|
|
strides : int or list of ints
|
|
An int or list of ints that has length 1, N or N+2.
|
|
The stride of the sliding window for each dimension of the input tensor.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
name : string
|
|
Optional name for the operation.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor of format specified by data_format. The average pooled output tensor.
|
|
"""
|
|
|
|
padding = padding_format(padding)
|
|
outputs = tf.nn.avg_pool(
|
|
input=input,
|
|
ksize=ksize,
|
|
strides=strides,
|
|
padding=padding,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class MaxPool3d(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.data_format, self.padding = preprocess_3d_format(data_format, padding)
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
|
|
def __call__(self, inputs):
|
|
outputs = tf.nn.max_pool3d(
|
|
input=inputs,
|
|
ksize=self.ksize,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def max_pool3d(input, ksize, strides, padding, data_format=None):
|
|
"""
|
|
Performs the max pooling on the input.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 5-D Tensor of the format specified by data_format.
|
|
ksize : int or list of ints
|
|
An int or list of ints that has length 1, 3 or 5.
|
|
The size of the window for each dimension of the input tensor.
|
|
strides : int or list of ints
|
|
An int or list of ints that has length 1, 3 or 5.
|
|
The stride of the sliding window for each dimension of the input tensor.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
"NDHWC", "NCDHW". Defaults to "NDHWC". The data format of the input and output data.
|
|
With the default format "NDHWC", the data is stored in the order of: [batch, in_depth, in_height, in_width, in_channels].
|
|
Alternatively, the format could be "NCDHW", the data storage order is: [batch, in_channels, in_depth, in_height, in_width].
|
|
name : string
|
|
A name for the operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A Tensor of format specified by data_format. The max pooled output tensor.
|
|
"""
|
|
|
|
data_format, padding = preprocess_3d_format(data_format, padding)
|
|
outputs = tf.nn.max_pool3d(
|
|
input=input,
|
|
ksize=ksize,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class AvgPool3d(object):
|
|
|
|
def __init__(self, ksize, strides, padding, data_format=None):
|
|
self.data_format, self.padding = preprocess_3d_format(data_format, padding)
|
|
self.ksize = ksize
|
|
self.strides = strides
|
|
|
|
def __call__(self, inputs):
|
|
outputs = tf.nn.avg_pool3d(
|
|
input=inputs,
|
|
ksize=self.ksize,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def avg_pool3d(input, ksize, strides, padding, data_format=None):
|
|
"""
|
|
Performs the average pooling on the input.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 5-D Tensor of shape [batch, height, width, channels] and type float32, float64, qint8, quint8, or qint32.
|
|
ksize : int or list of ints
|
|
An int or list of ints that has length 1, 3 or 5. The size of the window for each dimension of the input tensor.
|
|
strides : int or list of ints
|
|
An int or list of ints that has length 1, 3 or 5.
|
|
The stride of the sliding window for each dimension of the input tensor.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
'NDHWC' and 'NCDHW' are supported.
|
|
name : string
|
|
Optional name for the operation.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as value. The average pooled output tensor.
|
|
"""
|
|
|
|
data_format, padding = preprocess_3d_format(data_format, padding)
|
|
outputs = tf.nn.avg_pool3d(
|
|
input=input,
|
|
ksize=ksize,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def pool(input, window_shape, pooling_type, strides=None, padding='VALID', data_format=None, dilations=None, name=None):
|
|
"""
|
|
Performs an N-D pooling operation.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
Tensor of rank N+2, of shape [batch_size] + input_spatial_shape + [num_channels]
|
|
if data_format does not start with "NC" (default), or [batch_size, num_channels] + input_spatial_shape
|
|
if data_format starts with "NC". Pooling happens over the spatial dimensions only.
|
|
window_shape : int
|
|
Sequence of N ints >= 1.
|
|
pooling_type : string
|
|
Specifies pooling operation, must be "AVG" or "MAX".
|
|
strides : ints
|
|
Sequence of N ints >= 1. Defaults to [1]*N. If any value of strides is > 1, then all values of dilation_rate must be 1.
|
|
padding : string
|
|
The padding algorithm, must be "SAME" or "VALID". Defaults to "SAME".
|
|
See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
Specifies whether the channel dimension of the input and output is the last dimension (default, or if data_format does not start with "NC"),
|
|
or the second dimension (if data_format starts with "NC").
|
|
For N=1, the valid values are "NWC" (default) and "NCW". For N=2, the valid values are "NHWC" (default) and "NCHW".
|
|
For N=3, the valid values are "NDHWC" (default) and "NCDHW".
|
|
dilations : list of ints
|
|
Dilation rate. List of N ints >= 1. Defaults to [1]*N. If any value of dilation_rate is > 1, then all values of strides must be 1.
|
|
name : string
|
|
Optional. Name of the op.
|
|
|
|
Returns
|
|
-------
|
|
Tensor of rank N+2, of shape [batch_size] + output_spatial_shape + [num_channels]
|
|
"""
|
|
if pooling_type in ["MAX", "max"]:
|
|
pooling_type = "MAX"
|
|
elif pooling_type in ["AVG", "avg"]:
|
|
pooling_type = "AVG"
|
|
else:
|
|
raise ValueError('Unsupported pool_mode: ' + str(pooling_type))
|
|
padding = padding_format(padding)
|
|
outputs = tf.nn.pool(
|
|
input=input,
|
|
window_shape=window_shape,
|
|
pooling_type=pooling_type,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class DepthwiseConv2d(object):
|
|
|
|
def __init__(self, strides, padding, data_format=None, dilations=None, ksize=None, channel_multiplier=1):
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
|
|
def __call__(self, input, filter):
|
|
outputs = tf.nn.depthwise_conv2d(
|
|
input=input,
|
|
filter=filter,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def depthwise_conv2d(input, filter, strides, padding, data_format=None, dilations=None, name=None):
|
|
"""
|
|
Depthwise 2-D convolution.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
4-D with shape according to data_format.
|
|
filter : tensor
|
|
4-D with shape [filter_height, filter_width, in_channels, channel_multiplier].
|
|
strides : list
|
|
1-D of size 4. The stride of the sliding window for each dimension of input.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
The data format for input. Either "NHWC" (default) or "NCHW".
|
|
dilations : list
|
|
1-D of size 2. The dilation rate in which we sample input values across the height and width dimensions in atrous convolution.
|
|
If it is greater than 1, then all values of strides must be 1.
|
|
name : string
|
|
A name for this operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A 4-D Tensor with shape according to data_format.
|
|
E.g., for "NHWC" format, shape is [batch, out_height, out_width, in_channels * channel_multiplier].
|
|
"""
|
|
|
|
data_format, padding = preprocess_2d_format(data_format, padding)
|
|
outputs = tf.nn.depthwise_conv2d(
|
|
input=input,
|
|
filter=filter,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class Conv1d_transpose(object):
|
|
|
|
def __init__(
|
|
self, stride, padding, data_format='NWC', dilations=None, out_channel=None, k_size=None, in_channels=None
|
|
):
|
|
self.stride = stride
|
|
self.dilations = dilations
|
|
self.data_format, self.padding = preprocess_1d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
batch_size = input.shape[0]
|
|
if self.data_format == 'NWC':
|
|
w_axis, c_axis = 1, 2
|
|
else:
|
|
w_axis, c_axis = 2, 1
|
|
|
|
input_shape = input.shape.as_list()
|
|
filters_shape = filters.shape.as_list()
|
|
input_w = input_shape[w_axis]
|
|
filters_w = filters_shape[0]
|
|
output_channels = filters_shape[1]
|
|
dilations_w = 1
|
|
|
|
if isinstance(self.stride, int):
|
|
strides_w = self.stride
|
|
else:
|
|
strides_list = list(self.stride)
|
|
strides_w = strides_list[w_axis]
|
|
|
|
if self.dilations is not None:
|
|
if isinstance(self.dilations, int):
|
|
dilations_w = self.dilations
|
|
else:
|
|
dilations_list = list(self.dilations)
|
|
dilations_w = dilations_list[w_axis]
|
|
|
|
filters_w = filters_w + (filters_w - 1) * (dilations_w - 1)
|
|
assert self.padding in {'SAME', 'VALID'}
|
|
if self.padding == 'VALID':
|
|
output_w = input_w * strides_w + max(filters_w - strides_w, 0)
|
|
elif self.padding == 'SAME':
|
|
output_w = input_w * strides_w
|
|
|
|
if self.data_format == 'NCW':
|
|
output_shape = (batch_size, output_channels, output_w)
|
|
else:
|
|
output_shape = (batch_size, output_w, output_channels)
|
|
output_shape = tf.stack(output_shape)
|
|
outputs = tf.nn.conv1d_transpose(
|
|
input=input,
|
|
filters=filters,
|
|
output_shape=output_shape,
|
|
strides=self.stride,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def conv1d_transpose(
|
|
input, filters, output_shape, strides, padding='SAME', data_format='NWC', dilations=None, name=None
|
|
):
|
|
"""
|
|
The transpose of conv1d.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 3-D Tensor of type float and shape [batch, in_width, in_channels]
|
|
for NWC data format or [batch, in_channels, in_width] for NCW data format.
|
|
filters : tensor
|
|
A 3-D Tensor with the same type as value and shape [filter_width, output_channels, in_channels].
|
|
filter's in_channels dimension must match that of value.
|
|
output_shape : tensor
|
|
A 1-D Tensor, containing three elements, representing the output shape of the deconvolution op.
|
|
strides : list
|
|
An int or list of ints that has length 1 or 3. The number of entries by which the filter is moved right at each step.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
'NWC' and 'NCW' are supported.
|
|
dilations : list
|
|
An int or list of ints that has length 1 or 3 which defaults to 1.
|
|
The dilation factor for each dimension of input. If set to k > 1,
|
|
there will be k-1 skipped cells between each filter element on that dimension.
|
|
Dilations in the batch and depth dimensions must be 1.
|
|
name : string
|
|
Optional name for the returned tensor.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as value.
|
|
"""
|
|
|
|
data_format, padding = preprocess_1d_format(data_format, padding)
|
|
outputs = tf.nn.conv1d_transpose(
|
|
input=input,
|
|
filters=filters,
|
|
output_shape=output_shape,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class Conv2d_transpose(object):
|
|
|
|
def __init__(
|
|
self, strides, padding, data_format='NHWC', dilations=None, name=None, out_channel=None, k_size=None,
|
|
in_channels=None
|
|
):
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.name = name
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
if self.data_format == 'NHWC':
|
|
h_axis, w_axis = 1, 2
|
|
else:
|
|
h_axis, w_axis = 2, 3
|
|
|
|
input_shape = input.shape.as_list()
|
|
filters_shape = filters.shape.as_list()
|
|
batch_size = input.shape[0]
|
|
input_h, input_w = input_shape[h_axis], input_shape[w_axis]
|
|
kernel_h, kernel_w = filters_shape[0], filters_shape[1]
|
|
output_channels = filters_shape[2]
|
|
dilations_h, dilations_w = 1, 1
|
|
|
|
if isinstance(self.strides, int):
|
|
strides_h = self.strides
|
|
strides_w = self.strides
|
|
else:
|
|
strides_list = list(self.strides)
|
|
if len(strides_list) == 2:
|
|
strides_h = strides_list[0]
|
|
strides_w = strides_list[1]
|
|
elif len(strides_list) == 4:
|
|
strides_h = strides_list[h_axis]
|
|
strides_w = strides_list[w_axis]
|
|
|
|
if self.dilations is not None:
|
|
if isinstance(self.dilations, int):
|
|
dilations_h = self.dilations
|
|
dilations_w = self.dilations
|
|
else:
|
|
dilations_list = list(self.dilations)
|
|
if len(dilations_list) == 2:
|
|
dilations_h = dilations_list[0]
|
|
dilations_w = dilations_list[1]
|
|
elif len(dilations_list) == 4:
|
|
dilations_h = dilations_list[h_axis]
|
|
dilations_w = dilations_list[w_axis]
|
|
|
|
kernel_h = kernel_h + (kernel_h - 1) * (dilations_h - 1)
|
|
kernel_w = kernel_w + (kernel_w - 1) * (dilations_w - 1)
|
|
|
|
assert self.padding in {'SAME', 'VALID'}
|
|
if self.padding == 'VALID':
|
|
output_h = input_h * strides_h + max(kernel_h - strides_h, 0)
|
|
output_w = input_w * strides_w + max(kernel_w - strides_w, 0)
|
|
elif self.padding == 'SAME':
|
|
output_h = input_h * strides_h
|
|
output_w = input_w * strides_w
|
|
|
|
if self.data_format == 'NCHW':
|
|
out_shape = (batch_size, output_channels, output_h, output_w)
|
|
else:
|
|
out_shape = (batch_size, output_h, output_w, output_channels)
|
|
|
|
output_shape = tf.stack(out_shape)
|
|
|
|
outputs = tf.nn.conv2d_transpose(
|
|
input=input, filters=filters, output_shape=output_shape, strides=self.strides, padding=self.padding,
|
|
data_format=self.data_format, dilations=self.dilations, name=self.name
|
|
)
|
|
return outputs
|
|
|
|
|
|
def conv2d_transpose(
|
|
input, filters, output_shape, strides, padding='SAME', data_format='NHWC', dilations=None, name=None
|
|
):
|
|
"""
|
|
The transpose of conv2d.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 4-D Tensor of type float and shape [batch, height, width, in_channels]
|
|
for NHWC data format or [batch, in_channels, height, width] for NCHW data format.
|
|
filters : tensor
|
|
A 4-D Tensor with the same type as input and shape [height, width,
|
|
output_channels, in_channels]. filter's in_channels dimension must match that of input.
|
|
output_shape : tensor
|
|
A 1-D Tensor representing the output shape of the deconvolution op.
|
|
strides : list
|
|
An int or list of ints that has length 1, 2 or 4. The stride of the sliding window for each dimension of input.
|
|
If a single value is given it is replicated in the H and W dimension.
|
|
By default the N and C dimensions are set to 0.
|
|
The dimension order is determined by the value of data_format, see below for details.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
'NHWC' and 'NCHW' are supported.
|
|
dilations : list
|
|
An int or list of ints that has length 1, 2 or 4, defaults to 1.
|
|
name : string
|
|
Optional name for the returned tensor.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as input.
|
|
"""
|
|
|
|
data_format, padding = preprocess_2d_format(data_format, padding)
|
|
outputs = tf.nn.conv2d_transpose(
|
|
input=input,
|
|
filters=filters,
|
|
output_shape=output_shape,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
class Conv3d_transpose(object):
|
|
|
|
def __init__(
|
|
self, strides, padding, data_format='NDHWC', dilations=None, name=None, out_channel=None, k_size=None,
|
|
in_channels=None
|
|
):
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.name = name
|
|
self.out_channel = out_channel
|
|
self.data_format, self.padding = preprocess_3d_format(data_format, padding)
|
|
|
|
def __call__(self, input, filters):
|
|
if self.data_format == 'NDHWC':
|
|
d_axis, h_axis, w_axis = 1, 2, 3
|
|
else:
|
|
d_axis, h_axis, w_axis = 2, 3, 4
|
|
|
|
input_shape = input.shape.as_list()
|
|
filters_shape = filters.shape.as_list()
|
|
batch_size = input_shape[0]
|
|
input_d, input_h, input_w = input_shape[d_axis], input_shape[h_axis], input_shape[w_axis]
|
|
kernel_d, kernel_h, kernel_w = filters_shape[0], filters_shape[1], filters_shape[2]
|
|
dilations_d, dilations_h, dilations_w = 1, 1, 1
|
|
|
|
if isinstance(self.strides, int):
|
|
strides_d, strides_h, strides_w = self.strides
|
|
else:
|
|
strides_list = list(self.strides)
|
|
if len(strides_list) == 3:
|
|
strides_d, strides_h, strides_w = \
|
|
strides_list[0], \
|
|
strides_list[1], \
|
|
strides_list[2]
|
|
elif len(strides_list) == 5:
|
|
strides_d, strides_h, strides_w = \
|
|
strides_list[d_axis], \
|
|
strides_list[h_axis], \
|
|
strides_list[w_axis]
|
|
|
|
if self.dilations is not None:
|
|
if isinstance(self.dilations, int):
|
|
dilations_d, dilations_h, dilations_w = self.dilations
|
|
else:
|
|
dilations_list = list(self.dilations)
|
|
if len(dilations_list) == 3:
|
|
dilations_d, dilations_h, dilations_w = \
|
|
dilations_list[0], \
|
|
dilations_list[1], \
|
|
dilations_list[2]
|
|
elif len(dilations_list) == 5:
|
|
dilations_d, dilations_h, dilations_w = \
|
|
dilations_list[d_axis],\
|
|
dilations_list[h_axis], \
|
|
dilations_list[w_axis]
|
|
|
|
assert self.padding in {'VALID', 'SAME'}
|
|
|
|
kernel_d = kernel_d + (kernel_d - 1) * (dilations_d - 1)
|
|
kernel_h = kernel_h + (kernel_h - 1) * (dilations_h - 1)
|
|
kernel_w = kernel_w + (kernel_w - 1) * (dilations_w - 1)
|
|
|
|
if self.padding == 'VALID':
|
|
output_d = input_d * strides_d + max(kernel_d - strides_d, 0)
|
|
output_h = input_h * strides_h + max(kernel_h - strides_h, 0)
|
|
output_w = input_w * strides_w + max(kernel_w - strides_w, 0)
|
|
elif self.padding == 'SAME':
|
|
output_d = input_d * strides_d
|
|
output_h = input_h * strides_h
|
|
output_w = input_w * strides_w
|
|
|
|
if self.data_format == 'NDHWC':
|
|
output_shape = (batch_size, output_d, output_h, output_w, self.out_channel)
|
|
else:
|
|
output_shape = (batch_size, self.out_channel, output_d, output_h, output_w)
|
|
|
|
output_shape = tf.stack(output_shape)
|
|
outputs = tf.nn.conv3d_transpose(
|
|
input=input, filters=filters, output_shape=output_shape, strides=self.strides, padding=self.padding,
|
|
data_format=self.data_format, dilations=self.dilations, name=self.name
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
def conv3d_transpose(
|
|
input, filters, output_shape, strides, padding='SAME', data_format='NDHWC', dilations=None, name=None
|
|
):
|
|
"""
|
|
The transpose of conv3d.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
A 5-D Tensor of type float and shape [batch, height, width, in_channels] for
|
|
NHWC data format or [batch, in_channels, height, width] for NCHW data format.
|
|
filters : tensor
|
|
A 5-D Tensor with the same type as value and shape [height, width, output_channels, in_channels].
|
|
filter's in_channels dimension must match that of value.
|
|
output_shape : tensor
|
|
A 1-D Tensor representing the output shape of the deconvolution op.
|
|
strides : list
|
|
An int or list of ints that has length 1, 3 or 5.
|
|
padding : string
|
|
'VALID' or 'SAME'. The padding algorithm. See the "returns" section of tf.ops.convolution for details.
|
|
data_format : string
|
|
'NDHWC' and 'NCDHW' are supported.
|
|
dilations : list of ints
|
|
An int or list of ints that has length 1, 3 or 5, defaults to 1.
|
|
name : string
|
|
Optional name for the returned tensor.
|
|
|
|
Returns
|
|
-------
|
|
A Tensor with the same type as value.
|
|
"""
|
|
|
|
data_format, padding = preprocess_3d_format(data_format, padding)
|
|
outputs = tf.nn.conv3d_transpose(
|
|
input=input, filters=filters, output_shape=output_shape, strides=strides, padding=padding,
|
|
data_format=data_format, dilations=dilations, name=name
|
|
)
|
|
return outputs
|
|
|
|
|
|
def depthwise_conv2d(input, filters, strides, padding='SAME', data_format='NHWC', dilations=None, name=None):
|
|
"""
|
|
Depthwise 2-D convolution.
|
|
|
|
Parameters
|
|
----------
|
|
input : tensor
|
|
4-D with shape according to data_format.
|
|
filters : tensor
|
|
4-D with shape [filter_height, filter_width, in_channels, channel_multiplier].
|
|
strides : tuple
|
|
1-D of size 4. The stride of the sliding window for each dimension of input.
|
|
padding : string
|
|
'VALID' or 'SAME'
|
|
data_format : string
|
|
"NHWC" (default) or "NCHW".
|
|
dilations : tuple
|
|
The dilation rate in which we sample input values across the height and width dimensions in atrous convolution.
|
|
If it is greater than 1, then all values of strides must be 1.
|
|
name : string
|
|
A name for this operation (optional).
|
|
|
|
Returns
|
|
-------
|
|
A 4-D Tensor with shape according to data_format.
|
|
"""
|
|
|
|
data_format, padding = preprocess_2d_format(data_format, padding)
|
|
outputs = tf.nn.depthwise_conv2d(
|
|
input=input,
|
|
filter=filters,
|
|
strides=strides,
|
|
padding=padding,
|
|
data_format=data_format,
|
|
dilations=dilations,
|
|
name=name,
|
|
)
|
|
return outputs
|
|
|
|
|
|
def _to_channel_first_bias(b):
|
|
"""Reshape [c] to [c, 1, 1]."""
|
|
channel_size = int(b.shape[0])
|
|
new_shape = (channel_size, 1, 1)
|
|
return tf.reshape(b, new_shape)
|
|
|
|
|
|
def _bias_scale(x, b, data_format):
|
|
"""The multiplication counter part of tf.nn.bias_add."""
|
|
if data_format == 'NHWC':
|
|
return x * b
|
|
elif data_format == 'NCHW':
|
|
return x * _to_channel_first_bias(b)
|
|
else:
|
|
raise ValueError('invalid data_format: %s' % data_format)
|
|
|
|
|
|
def _bias_add(x, b, data_format):
|
|
"""Alternative implementation of tf.nn.bias_add which is compatiable with tensorRT."""
|
|
if data_format == 'NHWC':
|
|
return tf.add(x, b)
|
|
elif data_format == 'NCHW':
|
|
return tf.add(x, _to_channel_first_bias(b))
|
|
else:
|
|
raise ValueError('invalid data_format: %s' % data_format)
|
|
|
|
|
|
def batch_normalization(x, mean, variance, offset, scale, variance_epsilon, data_format, name=None):
|
|
"""Data Format aware version of tf.nn.batch_normalization."""
|
|
if data_format == 'channels_last':
|
|
mean = tf.reshape(mean, [1] * (len(x.shape) - 1) + [-1])
|
|
variance = tf.reshape(variance, [1] * (len(x.shape) - 1) + [-1])
|
|
offset = tf.reshape(offset, [1] * (len(x.shape) - 1) + [-1])
|
|
scale = tf.reshape(scale, [1] * (len(x.shape) - 1) + [-1])
|
|
elif data_format == 'channels_first':
|
|
mean = tf.reshape(mean, [1] + [-1] + [1] * (len(x.shape) - 2))
|
|
variance = tf.reshape(variance, [1] + [-1] + [1] * (len(x.shape) - 2))
|
|
offset = tf.reshape(offset, [1] + [-1] + [1] * (len(x.shape) - 2))
|
|
scale = tf.reshape(scale, [1] + [-1] + [1] * (len(x.shape) - 2))
|
|
else:
|
|
raise ValueError('invalid data_format: %s' % data_format)
|
|
|
|
with ops.name_scope(name, 'batchnorm', [x, mean, variance, scale, offset]):
|
|
inv = math_ops.rsqrt(variance + variance_epsilon)
|
|
if scale is not None:
|
|
inv *= scale
|
|
|
|
a = math_ops.cast(inv, x.dtype)
|
|
b = math_ops.cast(offset - mean * inv if offset is not None else -mean * inv, x.dtype)
|
|
# Return a * x + b with customized data_format.
|
|
# Currently TF doesn't have bias_scale, and tensorRT has bug in converting tf.nn.bias_add
|
|
# So we reimplemted them to allow make the model work with tensorRT.
|
|
# See https://github.com/tensorlayer/openpose-plus/issues/75 for more details.
|
|
# df = {'channels_first': 'NCHW', 'channels_last': 'NHWC'}
|
|
# return _bias_add(_bias_scale(x, a, df[data_format]), b, df[data_format])
|
|
return a * x + b
|
|
|
|
|
|
class BatchNorm(object):
|
|
"""
|
|
The :class:`BatchNorm` is a batch normalization layer for both fully-connected and convolution outputs.
|
|
See ``tf.nn.batch_normalization`` and ``tf.nn.moments``.
|
|
|
|
Parameters
|
|
----------
|
|
decay : float
|
|
A decay factor for `ExponentialMovingAverage`.
|
|
Suggest to use a large value for large dataset.
|
|
epsilon : float
|
|
Eplison.
|
|
act : activation function
|
|
The activation function of this layer.
|
|
is_train : boolean
|
|
Is being used for training or inference.
|
|
beta_init : initializer or None
|
|
The initializer for initializing beta, if None, skip beta.
|
|
Usually you should not skip beta unless you know what happened.
|
|
gamma_init : initializer or None
|
|
The initializer for initializing gamma, if None, skip gamma.
|
|
When the batch normalization layer is use instead of 'biases', or the next layer is linear, this can be
|
|
disabled since the scaling can be done by the next layer. see `Inception-ResNet-v2 <https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py>`__
|
|
moving_mean_init : initializer or None
|
|
The initializer for initializing moving mean, if None, skip moving mean.
|
|
moving_var_init : initializer or None
|
|
The initializer for initializing moving var, if None, skip moving var.
|
|
num_features: int
|
|
Number of features for input tensor. Useful to build layer if using BatchNorm1d, BatchNorm2d or BatchNorm3d,
|
|
but should be left as None if using BatchNorm. Default None.
|
|
data_format : str
|
|
channels_last 'channel_last' (default) or channels_first.
|
|
name : None or str
|
|
A unique layer name.
|
|
|
|
Examples
|
|
---------
|
|
With TensorLayer
|
|
|
|
>>> net = tl.layers.Input([None, 50, 50, 32], name='input')
|
|
>>> net = tl.layers.BatchNorm()(net)
|
|
|
|
Notes
|
|
-----
|
|
The :class:`BatchNorm` is universally suitable for 3D/4D/5D input in static model, but should not be used
|
|
in dynamic model where layer is built upon class initialization. So the argument 'num_features' should only be used
|
|
for subclasses :class:`BatchNorm1d`, :class:`BatchNorm2d` and :class:`BatchNorm3d`. All the three subclasses are
|
|
suitable under all kinds of conditions.
|
|
|
|
References
|
|
----------
|
|
- `Source <https://github.com/ry/tensorflow-resnet/blob/master/resnet.py>`__
|
|
- `stackoverflow <http://stackoverflow.com/questions/38312668/how-does-one-do-inference-with-batch-normalization-with-tensor-flow>`__
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self, decay=0.9, epsilon=0.00001, beta=None, gamma=None, moving_mean=None, moving_var=None, num_features=None,
|
|
data_format='channels_last', is_train=False
|
|
):
|
|
self.decay = decay
|
|
self.epsilon = epsilon
|
|
self.data_format = data_format
|
|
self.beta = beta
|
|
self.gamma = gamma
|
|
self.moving_mean = moving_mean
|
|
self.moving_var = moving_var
|
|
self.num_features = num_features
|
|
self.is_train = is_train
|
|
self.axes = None
|
|
|
|
if self.decay < 0.0 or 1.0 < self.decay:
|
|
raise ValueError("decay should be between 0 to 1")
|
|
|
|
def _get_param_shape(self, inputs_shape):
|
|
if self.data_format == 'channels_last':
|
|
axis = -1
|
|
elif self.data_format == 'channels_first':
|
|
axis = 1
|
|
else:
|
|
raise ValueError('data_format should be either %s or %s' % ('channels_last', 'channels_first'))
|
|
|
|
channels = inputs_shape[axis]
|
|
params_shape = [channels]
|
|
|
|
return params_shape
|
|
|
|
def _check_input_shape(self, inputs):
|
|
if inputs.ndim <= 1:
|
|
raise ValueError('expected input at least 2D, but got {}D input'.format(inputs.ndim))
|
|
|
|
def __call__(self, inputs):
|
|
self._check_input_shape(inputs)
|
|
self.channel_axis = len(inputs.shape) - 1 if self.data_format == 'channels_last' else 1
|
|
if self.axes is None:
|
|
self.axes = [i for i in range(len(inputs.shape)) if i != self.channel_axis]
|
|
|
|
mean, var = tf.nn.moments(inputs, self.axes, keepdims=False)
|
|
if self.is_train:
|
|
# update moving_mean and moving_var
|
|
self.moving_mean = moving_averages.assign_moving_average(
|
|
self.moving_mean, mean, self.decay, zero_debias=False
|
|
)
|
|
self.moving_var = moving_averages.assign_moving_average(self.moving_var, var, self.decay, zero_debias=False)
|
|
outputs = batch_normalization(inputs, mean, var, self.beta, self.gamma, self.epsilon, self.data_format)
|
|
else:
|
|
outputs = batch_normalization(
|
|
inputs, self.moving_mean, self.moving_var, self.beta, self.gamma, self.epsilon, self.data_format
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class GroupConv2D(object):
|
|
|
|
def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, groups):
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.groups = groups
|
|
if self.data_format == 'NHWC':
|
|
self.channels_axis = 3
|
|
else:
|
|
self.channels_axis = 1
|
|
|
|
def __call__(self, input, filters):
|
|
|
|
if self.groups == 1:
|
|
outputs = tf.nn.conv2d(
|
|
input=input,
|
|
filters=filters,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
else:
|
|
inputgroups = tf.split(input, num_or_size_splits=self.groups, axis=self.channels_axis)
|
|
weightsgroups = tf.split(filters, num_or_size_splits=self.groups, axis=self.channels_axis)
|
|
convgroups = []
|
|
for i, k in zip(inputgroups, weightsgroups):
|
|
convgroups.append(
|
|
tf.nn.conv2d(
|
|
input=i,
|
|
filters=k,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
)
|
|
outputs = tf.concat(axis=self.channels_axis, values=convgroups)
|
|
|
|
return outputs
|
|
|
|
|
|
class SeparableConv1D(object):
|
|
|
|
def __init__(self, stride, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier):
|
|
self.data_format, self.padding = preprocess_1d_format(data_format, padding)
|
|
|
|
if self.data_format == 'NWC':
|
|
self.spatial_start_dim = 1
|
|
self.strides = (1, stride, stride, 1)
|
|
self.data_format = 'NHWC'
|
|
else:
|
|
self.spatial_start_dim = 2
|
|
self.strides = (1, 1, stride, stride)
|
|
self.data_format = 'NCHW'
|
|
self.dilation_rate = (1, dilations)
|
|
|
|
def __call__(self, inputs, depthwise_filters, pointwise_filters):
|
|
inputs = tf.expand_dims(inputs, axis=self.spatial_start_dim)
|
|
depthwise_filters = tf.expand_dims(depthwise_filters, 0)
|
|
pointwise_filters = tf.expand_dims(pointwise_filters, 0)
|
|
|
|
outputs = tf.nn.separable_conv2d(
|
|
inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding,
|
|
dilations=self.dilation_rate, data_format=self.data_format
|
|
)
|
|
|
|
outputs = tf.squeeze(outputs, axis=self.spatial_start_dim)
|
|
|
|
return outputs
|
|
|
|
|
|
class SeparableConv2D(object):
|
|
|
|
def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel, depth_multiplier):
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
self.strides = strides
|
|
self.dilations = (dilations[2], dilations[2])
|
|
|
|
def __call__(self, inputs, depthwise_filters, pointwise_filters):
|
|
|
|
outputs = tf.nn.separable_conv2d(
|
|
inputs, depthwise_filters, pointwise_filters, strides=self.strides, padding=self.padding,
|
|
dilations=self.dilations, data_format=self.data_format
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class AdaptiveMeanPool1D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_1d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, input):
|
|
|
|
if self.data_format == 'NWC':
|
|
n, w, c = input.shape
|
|
else:
|
|
n, c, w = input.shape
|
|
|
|
stride = floor(w / self.output_size)
|
|
kernel = w - (self.output_size - 1) * stride
|
|
output = tf.nn.avg_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID')
|
|
|
|
return output
|
|
|
|
|
|
class AdaptiveMeanPool2D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_2d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, inputs):
|
|
|
|
if self.data_format == 'NHWC':
|
|
n, h, w, c = inputs.shape
|
|
else:
|
|
n, c, h, w = inputs.shape
|
|
|
|
out_h, out_w = self.output_size
|
|
stride_h = floor(h / out_h)
|
|
kernel_h = h - (out_h - 1) * stride_h
|
|
stride_w = floor(w / out_w)
|
|
kernel_w = w - (out_w - 1) * stride_w
|
|
|
|
outputs = tf.nn.avg_pool2d(
|
|
inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format,
|
|
padding='VALID'
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class AdaptiveMeanPool3D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_3d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, inputs):
|
|
|
|
if self.data_format == 'NDHWC':
|
|
n, d, h, w, c = inputs.shape
|
|
else:
|
|
n, c, d, h, w = inputs.shape
|
|
|
|
out_d, out_h, out_w = self.output_size
|
|
stride_d = floor(d / out_d)
|
|
kernel_d = d - (out_d - 1) * stride_d
|
|
stride_h = floor(h / out_h)
|
|
kernel_h = h - (out_h - 1) * stride_h
|
|
stride_w = floor(w / out_w)
|
|
kernel_w = w - (out_w - 1) * stride_w
|
|
|
|
outputs = tf.nn.avg_pool3d(
|
|
inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w),
|
|
data_format=self.data_format, padding='VALID'
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class AdaptiveMaxPool1D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_1d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, input):
|
|
|
|
if self.data_format == 'NWC':
|
|
n, w, c = input.shape
|
|
else:
|
|
n, c, w = input.shape
|
|
|
|
stride = floor(w / self.output_size)
|
|
kernel = w - (self.output_size - 1) * stride
|
|
output = tf.nn.max_pool1d(input, ksize=kernel, strides=stride, data_format=self.data_format, padding='VALID')
|
|
|
|
return output
|
|
|
|
|
|
class AdaptiveMaxPool2D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_2d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, inputs):
|
|
|
|
if self.data_format == 'NHWC':
|
|
n, h, w, c = inputs.shape
|
|
else:
|
|
n, c, h, w = inputs.shape
|
|
|
|
out_h, out_w = self.output_size
|
|
stride_h = floor(h / out_h)
|
|
kernel_h = h - (out_h - 1) * stride_h
|
|
stride_w = floor(w / out_w)
|
|
kernel_w = w - (out_w - 1) * stride_w
|
|
|
|
outputs = tf.nn.max_pool2d(
|
|
inputs, ksize=(kernel_h, kernel_w), strides=(stride_h, stride_w), data_format=self.data_format,
|
|
padding='VALID'
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class AdaptiveMaxPool3D(object):
|
|
|
|
def __init__(self, output_size, data_format):
|
|
self.data_format, _ = preprocess_3d_format(data_format, None)
|
|
self.output_size = output_size
|
|
|
|
def __call__(self, inputs):
|
|
|
|
if self.data_format == 'NDHWC':
|
|
n, d, h, w, c = inputs.shape
|
|
else:
|
|
n, c, d, h, w = inputs.shape
|
|
|
|
out_d, out_h, out_w = self.output_size
|
|
stride_d = floor(d / out_d)
|
|
kernel_d = d - (out_d - 1) * stride_d
|
|
stride_h = floor(h / out_h)
|
|
kernel_h = h - (out_h - 1) * stride_h
|
|
stride_w = floor(w / out_w)
|
|
kernel_w = w - (out_w - 1) * stride_w
|
|
|
|
outputs = tf.nn.max_pool3d(
|
|
inputs, ksize=(kernel_d, kernel_h, kernel_w), strides=(stride_d, stride_h, stride_w),
|
|
data_format=self.data_format, padding='VALID'
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class BinaryConv2D(object):
|
|
|
|
def __init__(self, strides, padding, data_format, dilations, out_channel, k_size, in_channel):
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
|
|
# @tf.RegisterGradient("TL_Sign_QuantizeGrad")
|
|
# def _quantize_grad(op, grad):
|
|
# """Clip and binarize tensor using the straight through estimator (STE) for the gradient."""
|
|
# return tf.clip_by_value(grad, -1, 1)
|
|
|
|
def quantize(self, x):
|
|
# ref: https://github.com/AngusG/tensorflow-xnor-bnn/blob/master/models/binary_net.py#L70
|
|
# https://github.com/itayhubara/BinaryNet.tf/blob/master/nnUtils.py
|
|
with tf.compat.v1.get_default_graph().gradient_override_map({"Sign": "TL_Sign_QuantizeGrad"}):
|
|
return tf.sign(x)
|
|
|
|
def __call__(self, inputs, filters):
|
|
|
|
filters = self.quantize(filters)
|
|
|
|
outputs = tf.nn.conv2d(
|
|
input=inputs, filters=filters, strides=self.strides, padding=self.padding, data_format=self.data_format,
|
|
dilations=self.dilations
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class DorefaConv2D(object):
|
|
|
|
def __init__(self, bitW, bitA, strides, padding, data_format, dilations, out_channel, k_size, in_channel):
|
|
self.data_format, self.padding = preprocess_2d_format(data_format, padding)
|
|
self.strides = strides
|
|
self.dilations = dilations
|
|
self.bitW = bitW
|
|
self.bitA = bitA
|
|
|
|
def _quantize_dorefa(self, x, k):
|
|
G = tf.compat.v1.get_default_graph()
|
|
n = float(2**k - 1)
|
|
with G.gradient_override_map({"Round": "Identity"}):
|
|
return tf.round(x * n) / n
|
|
|
|
def cabs(self, x):
|
|
return tf.minimum(1.0, tf.abs(x), name='cabs')
|
|
|
|
def quantize_active(self, x, bitA):
|
|
if bitA == 32:
|
|
return x
|
|
return self._quantize_dorefa(x, bitA)
|
|
|
|
def quantize_weight(self, x, bitW, force_quantization=False):
|
|
|
|
G = tf.compat.v1.get_default_graph()
|
|
if bitW == 32 and not force_quantization:
|
|
return x
|
|
if bitW == 1: # BWN
|
|
with G.gradient_override_map({"Sign": "Identity"}):
|
|
E = tf.stop_gradient(tf.reduce_mean(input_tensor=tf.abs(x)))
|
|
return tf.sign(x / E) * E
|
|
x = tf.clip_by_value(
|
|
x * 0.5 + 0.5, 0.0, 1.0
|
|
) # it seems as though most weights are within -1 to 1 region anyways
|
|
return 2 * self._quantize_dorefa(x, bitW) - 1
|
|
|
|
def __call__(self, inputs, filters):
|
|
|
|
inputs = self.quantize_active(self.cabs(inputs), self.bitA)
|
|
|
|
filters = self.quantize_weight(filters, self.bitW)
|
|
|
|
outputs = tf.nn.conv2d(
|
|
input=inputs,
|
|
filters=filters,
|
|
strides=self.strides,
|
|
padding=self.padding,
|
|
data_format=self.data_format,
|
|
dilations=self.dilations,
|
|
)
|
|
|
|
return outputs
|
|
|
|
|
|
class rnncell(object):
|
|
|
|
def __init__(self, weight_ih, weight_hh, bias_ih, bias_hh, act):
|
|
self.weight_ih = weight_ih
|
|
self.weight_hh = weight_hh
|
|
self.bias_ih = bias_ih
|
|
self.bias_hh = bias_hh
|
|
self.act_fn = tf.nn.relu if act == 'relu' else tf.nn.tanh
|
|
|
|
def __call__(self, input, h, c=None):
|
|
|
|
i2h = tf.matmul(input, self.weight_ih, transpose_b=True)
|
|
if self.bias_ih is not None:
|
|
i2h += self.bias_ih
|
|
h2h = tf.matmul(h, self.weight_hh, transpose_b=True)
|
|
if self.bias_hh is not None:
|
|
h2h += self.bias_hh
|
|
h = self.act_fn(i2h + h2h)
|
|
return h, h
|
|
|
|
|
|
class lstmcell(object):
|
|
|
|
def __init__(self, weight_ih, weight_hh, bias_ih, bias_hh, act=None):
|
|
self.weight_ih = weight_ih
|
|
self.weight_hh = weight_hh
|
|
self.bias_ih = bias_ih
|
|
self.bias_hh = bias_hh
|
|
self.gate_act_fn = tf.sigmoid
|
|
self.act_fn = tf.tanh
|
|
|
|
def __call__(self, input, h, c):
|
|
|
|
gates = tf.matmul(input, self.weight_ih, transpose_b=True)
|
|
if self.bias_ih is not None:
|
|
gates = gates + self.bias_ih
|
|
gates += tf.matmul(h, self.weight_hh, transpose_b=True)
|
|
if self.bias_hh is not None:
|
|
gates += self.bias_hh
|
|
|
|
gate_slices = tf.split(gates, num_or_size_splits=4, axis=-1)
|
|
i = self.gate_act_fn(gate_slices[0])
|
|
f = self.gate_act_fn(gate_slices[1])
|
|
o = self.gate_act_fn(gate_slices[3])
|
|
c = f * c + i * self.act_fn(gate_slices[2])
|
|
h = o * self.act_fn(c)
|
|
|
|
return h, h, c
|
|
|
|
|
|
class grucell(object):
|
|
|
|
def __init__(self, weight_ih, weight_hh, bias_ih, bias_hh, act=None):
|
|
self.weight_ih = weight_ih
|
|
self.weight_hh = weight_hh
|
|
self.bias_ih = bias_ih
|
|
self.bias_hh = bias_hh
|
|
self.gate_act_fn = tf.sigmoid
|
|
self.act_fn = tf.tanh
|
|
|
|
def __call__(self, input, h, c=None):
|
|
|
|
x_gates = tf.matmul(input, self.weight_ih, transpose_b=True)
|
|
if self.bias_ih is not None:
|
|
x_gates = x_gates + self.bias_ih
|
|
h_gates = tf.matmul(h, self.weight_hh, transpose_b=True)
|
|
if self.bias_hh is not None:
|
|
h_gates = h_gates + self.bias_hh
|
|
|
|
x_r, x_z, x_c = tf.split(x_gates, num_or_size_splits=3, axis=-1)
|
|
h_r, h_z, h_c = tf.split(h_gates, num_or_size_splits=3, axis=-1)
|
|
|
|
r = self.gate_act_fn(x_r + h_r)
|
|
z = self.gate_act_fn(x_r + h_z)
|
|
c = self.act_fn(x_c + r * h_c)
|
|
h = (h - c) * z + c
|
|
|
|
return h, h
|
|
|
|
|
|
class rnnbase(object):
|
|
|
|
def __init__(
|
|
self,
|
|
mode,
|
|
input_size,
|
|
hidden_size,
|
|
num_layers,
|
|
bias,
|
|
batch_first,
|
|
dropout,
|
|
bidirectional,
|
|
is_train,
|
|
weights_fw,
|
|
weights_bw,
|
|
bias_fw,
|
|
bias_bw,
|
|
):
|
|
self.mode = mode
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.num_layers = num_layers
|
|
self.bias = bias
|
|
self.batch_first = batch_first
|
|
self.dropout = float(dropout)
|
|
self.train = is_train
|
|
if not 0 <= dropout < 1:
|
|
raise ValueError("dropout should be a number in range [0, 1).")
|
|
if dropout > 0 and num_layers == 1:
|
|
raise ValueError(
|
|
"dropout option adds dropout after all but last "
|
|
"recurrent layer, so non-zero dropout expects "
|
|
"num_layers greater than 1, but got dropout={} and "
|
|
"num_layers={}".format(dropout, num_layers)
|
|
)
|
|
self.bidirect = 2 if bidirectional else 1
|
|
|
|
self.weights_fw = weights_fw
|
|
self.bias_fw = bias_fw
|
|
self.weights_bw = weights_bw
|
|
self.bias_bw = bias_bw
|
|
|
|
# stdv = 1.0 / np.sqrt(self.hidden_size)
|
|
# _init = tf.random_uniform_initializer(minval=-stdv, maxval=stdv)
|
|
|
|
self.act_fn = None
|
|
if mode == 'LSTM':
|
|
# gate_size = 4 * hidden_size
|
|
self.rnn_cell = lstmcell
|
|
elif mode == 'GRU':
|
|
# gate_size = 3 * hidden_size
|
|
self.rnn_cell = grucell
|
|
elif mode == 'RNN_TANH':
|
|
# gate_size = hidden_size
|
|
self.rnn_cell = rnncell
|
|
self.act_fn = 'tanh'
|
|
elif mode == 'RNN_RELU':
|
|
# gate_size = hidden_size
|
|
self.rnn_cell = rnncell
|
|
self.act_fn = 'relu'
|
|
|
|
# for layer in range(num_layers):
|
|
# for direction in range(self.bidirect):
|
|
# layer_input_size = input_size if layer==0 else hidden_size*self.bidirect
|
|
# if direction == 0:
|
|
# self.w_ih = tf.Variable(initial_value= _init(shape=(gate_size, layer_input_size)),name = 'weight_ih_l'+str(layer), trainable=True)
|
|
# self.w_hh = tf.Variable(initial_value=_init(shape=(gate_size, hidden_size)),
|
|
# name='weight_hh_l'+str(layer), trainable=True)
|
|
# # self.w_ih = self.weights_init('weight_ih_l'+str(layer), shape = (gate_size, layer_input_size), init = _init)
|
|
# # self.w_hh = self.weights_init('weight_ih_l' + str(layer), shape=(gate_size, hidden_size),
|
|
# # init=_init)
|
|
# self.weights_fw.append(self.w_ih)
|
|
# self.weights_fw.append(self.w_hh)
|
|
# if bias:
|
|
# self.b_ih = tf.Variable(initial_value=_init(shape=(gate_size,)),
|
|
# name='bias_ih_l'+str(layer), trainable=True)
|
|
# self.b_hh = tf.Variable(initial_value=_init(shape=(gate_size,)),
|
|
# name='bias_hh_l'+str(layer), trainable=True)
|
|
# # self.b_ih = self.weights_init('bias_ih_l'+str(layer), shape=(gate_size,), init=_init)
|
|
# # self.b_hh = self.weights_init('bias_hh_l'+str(layer), shape=(gate_size,), init=_init)
|
|
# self.bias_fw.append(self.b_ih)
|
|
# self.bias_fw.append(self.b_hh)
|
|
# else:
|
|
# self.w_ih = tf.Variable(initial_value= _init(shape=(gate_size, layer_input_size)),name = 'weight_ih_l'+str(layer)+'_reverse', trainable=True)
|
|
# self.w_hh = tf.Variable(initial_value=_init(shape=(gate_size, hidden_size)),
|
|
# name='weight_hh_l'+str(layer)+'_reverse', trainable=True)
|
|
# # self.w_ih = self.weights_init('weight_ih_l'+str(layer)+'_reverse', shape = (gate_size, layer_input_size), init = _init)
|
|
# # self.w_hh = self.weights_init('weight_hh_l'+str(layer)+'_reverse', shape=(gate_size, hidden_size),
|
|
# # init=_init)
|
|
# self.weights_bw.append(self.w_ih)
|
|
# self.weights_bw.append(self.w_hh)
|
|
# if bias:
|
|
# self.b_ih = tf.Variable(initial_value=_init(shape=(gate_size,)),
|
|
# name='bias_ih_l'+str(layer)+'_reverse', trainable=True)
|
|
# self.b_hh = tf.Variable(initial_value=_init(shape=(gate_size,)),
|
|
# name='bias_hh_l'+str(layer)+'_reverse', trainable=True)
|
|
# # self.b_ih = self.weights_init('bias_ih_l'+str(layer)+'_reverse', shape=(gate_size,), init=_init)
|
|
# # self.b_hh = self.weights_init('bias_hh_l'+str(layer)+'_reverse', shape=(gate_size,), init=_init)
|
|
# self.bias_bw.append(self.b_ih)
|
|
# self.bias_bw.append(self.b_hh)
|
|
|
|
def _bi_rnn_forward(self, x, h, c=None):
|
|
time_step, batch_size, input_size = x.shape
|
|
h_out = []
|
|
c_out = []
|
|
y = []
|
|
pre_layer = x
|
|
for i in range(self.num_layers):
|
|
weight_ih_fw = self.weights_fw[2 * i]
|
|
weight_hh_fw = self.weights_fw[2 * i + 1]
|
|
weight_ih_bw = self.weights_bw[2 * i]
|
|
weight_hh_bw = self.weights_bw[2 * i + 1]
|
|
if self.bias:
|
|
bias_ih_fw = self.bias_fw[2 * i]
|
|
bias_hh_fw = self.bias_fw[2 * i + 1]
|
|
bias_ih_bw = self.bias_bw[2 * i]
|
|
bias_hh_bw = self.bias_bw[2 * i + 1]
|
|
else:
|
|
bias_ih_fw = None
|
|
bias_hh_fw = None
|
|
bias_ih_bw = None
|
|
bias_hh_bw = None
|
|
h_i_fw = h[i, :, :]
|
|
h_i_bw = h[i + 1, :, :]
|
|
if i != 0 and self.train:
|
|
pre_layer = tf.nn.dropout(pre_layer, rate=self.dropout)
|
|
if c is not None:
|
|
c_i_fw = c[i, :, :]
|
|
c_i_bw = c[i + 1, :, :]
|
|
for j in range(time_step):
|
|
input = pre_layer[j, :, :]
|
|
cell_fw = self.rnn_cell(weight_ih_fw, weight_hh_fw, bias_ih_fw, bias_hh_fw, self.act_fn)
|
|
cell_bw = self.rnn_cell(weight_ih_bw, weight_hh_bw, bias_ih_bw, bias_hh_bw, self.act_fn)
|
|
bw_input = tf.reverse(input, axis=[0])
|
|
step_out_fw, h_i_fw, c_i_fw = cell_fw(input, h_i_fw, c_i_fw)
|
|
step_out_bw, h_i_bw, c_i_bw = cell_bw(bw_input, h_i_bw, c_i_bw)
|
|
step_out_bw = tf.reverse(step_out_bw, axis=[0])
|
|
step_out = tf.concat([step_out_fw, step_out_bw], axis=-1)
|
|
y.append(step_out)
|
|
h_out.append(h_i_fw)
|
|
h_out.append(h_i_bw)
|
|
c_out.append(c_i_fw)
|
|
c_out.append(c_i_bw)
|
|
pre_layer = tf.stack(y)
|
|
y = []
|
|
else:
|
|
for j in range(time_step):
|
|
input = pre_layer[j, :, :]
|
|
cell_fw = self.rnn_cell(weight_ih_fw, weight_hh_fw, bias_ih_fw, bias_hh_fw, self.act_fn)
|
|
cell_bw = self.rnn_cell(weight_ih_bw, weight_hh_bw, bias_ih_bw, bias_hh_bw, self.act_fn)
|
|
bw_input = tf.reverse(input, axis=[0])
|
|
step_out_fw, h_i_fw = cell_fw(input, h_i_fw)
|
|
step_out_bw, h_i_bw = cell_bw(bw_input, h_i_bw)
|
|
step_out_bw = tf.reverse(step_out_bw, axis=[0])
|
|
step_out = tf.concat([step_out_fw, step_out_bw], axis=-1)
|
|
y.append(step_out)
|
|
h_out.append(h_i_fw)
|
|
h_out.append(h_i_bw)
|
|
pre_layer = tf.stack(y)
|
|
y = []
|
|
h_out = tf.stack(h_out)
|
|
c_out = tf.stack(c_out) if c is not None else None
|
|
|
|
return pre_layer, h_out, c_out
|
|
|
|
def _rnn_forward(self, x, h, c=None):
|
|
pre_layer = x
|
|
h_out = []
|
|
c_out = []
|
|
y = []
|
|
time_step, batch_size, input_size = x.shape
|
|
for i in range(self.num_layers):
|
|
weight_ih = self.weights_fw[2 * i]
|
|
weight_hh = self.weights_fw[2 * i + 1]
|
|
if self.bias:
|
|
bias_ih = self.bias_fw[2 * i]
|
|
bias_hh = self.bias_fw[2 * i + 1]
|
|
else:
|
|
bias_ih = None
|
|
bias_hh = None
|
|
h_i = h[i, :, :]
|
|
if i != 0 and self.train:
|
|
pre_layer = tf.nn.dropout(pre_layer, rate=self.dropout)
|
|
if c is not None:
|
|
c_i = c[i, :, :]
|
|
for j in range(time_step):
|
|
input = pre_layer[j, :, :]
|
|
cell = self.rnn_cell(weight_ih, weight_hh, bias_ih, bias_hh, self.act_fn)
|
|
step_out, h_i, c_i = cell(input, h_i, c_i)
|
|
y.append(step_out)
|
|
h_out.append(h_i)
|
|
c_out.append(c_i)
|
|
pre_layer = tf.stack(y)
|
|
y = []
|
|
else:
|
|
for j in range(time_step):
|
|
input = pre_layer[j, :, :]
|
|
cell = self.rnn_cell(weight_hh, weight_ih, bias_ih, bias_hh, self.act_fn)
|
|
step_out, h_i = cell(input, h_i)
|
|
y.append(step_out)
|
|
h_out.append(h_i)
|
|
pre_layer = tf.stack(y)
|
|
y = []
|
|
h_out = tf.stack(h_out)
|
|
c_out = tf.stack(c_out) if c is not None else None
|
|
|
|
return pre_layer, h_out, c_out
|
|
|
|
def check_input(self, input_shape):
|
|
if len(input_shape) != 3:
|
|
raise ValueError("input must have 3 dimensions. But got {}.".format(len(input_shape)))
|
|
if self.input_size != input_shape[-1]:
|
|
raise ValueError(
|
|
"The last dimension of input should be equal to input_size {}.But got {}".format(
|
|
self.input_size, input_shape[-1]
|
|
)
|
|
)
|
|
|
|
def check_hidden(self, h, batch_size):
|
|
expected_hidden_size = (self.num_layers * self.bidirect, batch_size, self.hidden_size)
|
|
if h.shape != expected_hidden_size:
|
|
raise ValueError('Expected hidden size {}, got {}.'.format(expected_hidden_size, h.shape))
|
|
|
|
def __call__(self, input, states):
|
|
if self.batch_first:
|
|
input = tf.transpose(input, perm=(1, 0, 2))
|
|
input_dtype = input.dtype
|
|
input_shape = input.shape
|
|
time_step, batch_size, input_size = input_shape
|
|
self.check_input(input_shape)
|
|
if self.mode == "LSTM":
|
|
if states is not None:
|
|
h, c = states
|
|
self.check_hidden(h, batch_size)
|
|
self.check_hidden(c, batch_size)
|
|
else:
|
|
h = tf.zeros(shape=(self.num_layers * self.bidirect, batch_size, self.hidden_size), dtype=input_dtype)
|
|
c = tf.zeros(shape=(self.num_layers * self.bidirect, batch_size, self.hidden_size), dtype=input_dtype)
|
|
if self.bidirect == 1:
|
|
y, new_h, new_c = self._rnn_forward(input, h, c)
|
|
else:
|
|
y, new_h, new_c = self._bi_rnn_forward(input, h, c)
|
|
new_states = (new_h, new_c)
|
|
else:
|
|
if states is not None:
|
|
h = states
|
|
self.check_hidden(h, batch_size)
|
|
else:
|
|
h = tf.zeros(shape=(self.num_layers * self.bidirect, batch_size, self.hidden_size), dtype=input_dtype)
|
|
if self.bidirect == 1:
|
|
y, new_h, _ = self._rnn_forward(input, h)
|
|
else:
|
|
y, new_h, _ = self._bi_rnn_forward(input, h)
|
|
new_states = new_h
|
|
if self.batch_first:
|
|
y = tf.transpose(y, perm=(1, 0, 2))
|
|
return y, new_states
|