706 lines
28 KiB
Python
706 lines
28 KiB
Python
#! /usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import numpy as np
|
|
import tensorlayer as tl
|
|
from tensorlayer import logging
|
|
from tensorlayer.backend.ops.load_backend import BACKEND
|
|
from tensorlayer.layers.core import Module
|
|
|
|
__all__ = [
|
|
'RNN',
|
|
'RNNCell',
|
|
'GRU',
|
|
'LSTM',
|
|
'GRUCell',
|
|
'LSTMCell',
|
|
]
|
|
|
|
|
|
class RNNCell(Module):
|
|
"""An Elman RNN cell with tanh or ReLU non-linearity.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
act : activation function
|
|
The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
A tensor with shape `[batch_size, input_size]`.
|
|
states : tensor or None
|
|
A tensor with shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults to None.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
A tensor with shape `[batch_size, hidden_size]`.
|
|
states : tensor
|
|
A tensor with shape `[batch_size, hidden_size]`.
|
|
Tensor containing the next hidden state for each element in the batch
|
|
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([4, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4,32])
|
|
>>> cell = tl.layers.RNNCell(input_size=16, hidden_size=32, bias=True, act='tanh', name='rnncell_1')
|
|
>>> y, h = cell(input, prev_h)
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
bias=True,
|
|
act='tanh',
|
|
name=None,
|
|
):
|
|
super(RNNCell, self).__init__(name)
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.bias = bias
|
|
if act not in ('relu', 'tanh'):
|
|
raise ValueError("Activation should be 'tanh' or 'relu'.")
|
|
self.act = act
|
|
self.build(None)
|
|
logging.info("RNNCell %s: input_size: %d hidden_size: %d act: %s" % (self.name, input_size, hidden_size, act))
|
|
|
|
def __repr__(self):
|
|
actstr = self.act
|
|
s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
|
|
s += ', bias=True' if self.bias else ', bias=False'
|
|
s += (',' + actstr)
|
|
if self.name is not None:
|
|
s += ', name=\'{name}\''
|
|
s += ')'
|
|
return s.format(classname=self.__class__.__name__, **self.__dict__)
|
|
|
|
def check_input(self, input_shape):
|
|
if input_shape[1] != self.input_size:
|
|
raise ValueError(
|
|
'input should have consistent input_size. But got {}, expected {}'.format(
|
|
input_shape[1], self.input_size
|
|
)
|
|
)
|
|
|
|
def check_hidden(self, input_shape, h_shape, hidden_label):
|
|
if input_shape[0] != h_shape[0]:
|
|
raise ValueError(
|
|
'input batch size{} should match hidden{} batch size{}.'.format(
|
|
input_shape[0], hidden_label, h_shape[0]
|
|
)
|
|
)
|
|
if h_shape[1] != self.hidden_size:
|
|
raise ValueError(
|
|
'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
|
|
hidden_label, h_shape[1], self.hidden_size
|
|
)
|
|
)
|
|
|
|
def build(self, inputs_shape):
|
|
stdv = 1.0 / np.sqrt(self.hidden_size)
|
|
_init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
|
|
self.weight_ih_shape = (self.hidden_size, self.input_size)
|
|
self.weight_hh_shape = (self.hidden_size, self.hidden_size)
|
|
self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
|
|
self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
|
|
|
|
if self.bias:
|
|
self.bias_ih_shape = (self.hidden_size, )
|
|
self.bias_hh_shape = (self.hidden_size, )
|
|
self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
|
|
self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
|
|
else:
|
|
self.bias_ih = None
|
|
self.bias_hh = None
|
|
self.rnncell = tl.ops.rnncell(
|
|
weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh, act=self.act
|
|
)
|
|
|
|
def forward(self, inputs, states=None):
|
|
input_shape = tl.get_tensor_shape(inputs)
|
|
self.check_input(input_shape)
|
|
if states is None:
|
|
states = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
|
|
states_shape = tl.get_tensor_shape(states)
|
|
self.check_hidden(input_shape, states_shape, hidden_label='h')
|
|
output, states = self.rnncell(inputs, states)
|
|
return output, states
|
|
|
|
|
|
class LSTMCell(Module):
|
|
"""A long short-term memory (LSTM) cell.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
A tensor with shape `[batch_size, input_size]`.
|
|
states : tuple or None
|
|
A tuple of two tensor `(h, c)`, each of shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults: None.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
A tensor with shape `[batch_size, hidden_size]`.
|
|
states : tensor
|
|
A tuple of two tensor `(h, c)`, each of shape `[batch_size, hidden_size]`.
|
|
Tensors containing the next hidden state and next cell state for each element in the batch.
|
|
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([4, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4,32])
|
|
>>> prev_c = tl.layers.Input([4,32])
|
|
>>> cell = tl.layers.LSTMCell(input_size=16, hidden_size=32, bias=True, name='lstmcell_1')
|
|
>>> y, (h, c)= cell(input, (prev_h, prev_c))
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
bias=True,
|
|
name=None,
|
|
):
|
|
super(LSTMCell, self).__init__(name)
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.bias = bias
|
|
self.build(None)
|
|
logging.info("LSTMCell %s: input_size: %d hidden_size: %d " % (self.name, input_size, hidden_size))
|
|
|
|
def __repr__(self):
|
|
s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
|
|
s += ', bias=True' if self.bias else ', bias=False'
|
|
if self.name is not None:
|
|
s += ', name=\'{name}\''
|
|
s += ')'
|
|
return s.format(classname=self.__class__.__name__, **self.__dict__)
|
|
|
|
def check_input(self, input_shape):
|
|
if input_shape[1] != self.input_size:
|
|
raise ValueError(
|
|
'input should have consistent input_size. But got {}, expected {}'.format(
|
|
input_shape[1], self.input_size
|
|
)
|
|
)
|
|
|
|
def check_hidden(self, input_shape, h_shape, hidden_label):
|
|
if input_shape[0] != h_shape[0]:
|
|
raise ValueError(
|
|
'input batch size{} should match hidden{} batch size{}.'.format(
|
|
input_shape[0], hidden_label, h_shape[0]
|
|
)
|
|
)
|
|
if h_shape[1] != self.hidden_size:
|
|
raise ValueError(
|
|
'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
|
|
hidden_label, h_shape[1], self.hidden_size
|
|
)
|
|
)
|
|
|
|
def build(self, inputs_shape):
|
|
stdv = 1.0 / np.sqrt(self.hidden_size)
|
|
_init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
|
|
self.weight_ih_shape = (4 * self.hidden_size, self.input_size)
|
|
self.weight_hh_shape = (4 * self.hidden_size, self.hidden_size)
|
|
self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
|
|
self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
|
|
|
|
if self.bias:
|
|
self.bias_ih_shape = (4 * self.hidden_size, )
|
|
self.bias_hh_shape = (4 * self.hidden_size, )
|
|
self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
|
|
self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
|
|
else:
|
|
self.bias_ih = None
|
|
self.bias_hh = None
|
|
|
|
self.lstmcell = tl.ops.lstmcell(
|
|
weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh
|
|
)
|
|
|
|
def forward(self, inputs, states=None):
|
|
input_shape = tl.get_tensor_shape(inputs)
|
|
self.check_input(input_shape)
|
|
if states is not None:
|
|
h, c = states
|
|
else:
|
|
h = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
|
|
c = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
|
|
h_shape = tl.get_tensor_shape(h)
|
|
c_shape = tl.get_tensor_shape(c)
|
|
self.check_hidden(input_shape, h_shape, hidden_label='h')
|
|
self.check_hidden(input_shape, c_shape, hidden_label='c')
|
|
output, new_h, new_c = self.lstmcell(inputs, h, c)
|
|
return output, (new_h, new_c)
|
|
|
|
|
|
class GRUCell(Module):
|
|
"""A gated recurrent unit (GRU) cell.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
A tensor with shape `[batch_size, input_size]`.
|
|
states : tensor or None
|
|
A tensor with shape `[batch_size, hidden_size]`. When states is None, zero state is used. Defaults: `None`.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
A tensor with shape `[batch_size, hidden_size]`.
|
|
states : tensor
|
|
A tensor with shape `[batch_size, hidden_size]`.
|
|
Tensor containing the next hidden state for each element in the batch
|
|
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([4, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4,32])
|
|
>>> cell = tl.layers.GRUCell(input_size=16, hidden_size=32, bias=True, name='grucell_1')
|
|
>>> y, h= cell(input, prev_h)
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
bias=True,
|
|
name=None,
|
|
):
|
|
super(GRUCell, self).__init__(name)
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.bias = bias
|
|
self.build(None)
|
|
logging.info("GRUCell %s: input_size: %d hidden_size: %d " % (self.name, input_size, hidden_size))
|
|
|
|
def __repr__(self):
|
|
s = ('{classname}(input_size={input_size}, hidden_size={hidden_size}')
|
|
s += ', bias=True' if self.bias else ', bias=False'
|
|
if self.name is not None:
|
|
s += ', name=\'{name}\''
|
|
s += ')'
|
|
return s.format(classname=self.__class__.__name__, **self.__dict__)
|
|
|
|
def check_input(self, input_shape):
|
|
if input_shape[1] != self.input_size:
|
|
raise ValueError(
|
|
'input should have consistent input_size. But got {}, expected {}'.format(
|
|
input_shape[1], self.input_size
|
|
)
|
|
)
|
|
|
|
def check_hidden(self, input_shape, h_shape, hidden_label):
|
|
if input_shape[0] != h_shape[0]:
|
|
raise ValueError(
|
|
'input batch size{} should match hidden{} batch size{}.'.format(
|
|
input_shape[0], hidden_label, h_shape[0]
|
|
)
|
|
)
|
|
if h_shape[1] != self.hidden_size:
|
|
raise ValueError(
|
|
'hidden{} should have consistent hidden_size. But got {}, expected {}.'.format(
|
|
hidden_label, h_shape[1], self.hidden_size
|
|
)
|
|
)
|
|
|
|
def build(self, inputs_shape):
|
|
stdv = 1.0 / np.sqrt(self.hidden_size)
|
|
_init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
|
|
self.weight_ih_shape = (3 * self.hidden_size, self.input_size)
|
|
self.weight_hh_shape = (3 * self.hidden_size, self.hidden_size)
|
|
self.weight_ih = self._get_weights("weight_ih", shape=self.weight_ih_shape, init=_init)
|
|
self.weight_hh = self._get_weights("weight_hh", shape=self.weight_hh_shape, init=_init)
|
|
|
|
if self.bias:
|
|
self.bias_ih_shape = (3 * self.hidden_size, )
|
|
self.bias_hh_shape = (3 * self.hidden_size, )
|
|
self.bias_ih = self._get_weights('bias_ih', shape=self.bias_ih_shape, init=_init)
|
|
self.bias_hh = self._get_weights('bias_hh', shape=self.bias_hh_shape, init=_init)
|
|
else:
|
|
self.bias_ih = None
|
|
self.bias_hh = None
|
|
|
|
self.grucell = tl.ops.grucell(
|
|
weight_ih=self.weight_ih, weight_hh=self.weight_hh, bias_ih=self.bias_ih, bias_hh=self.bias_hh
|
|
)
|
|
|
|
def forward(self, inputs, states=None):
|
|
input_shape = tl.get_tensor_shape(inputs)
|
|
self.check_input(input_shape)
|
|
if states is None:
|
|
states = tl.zeros(shape=(input_shape[0], self.hidden_size), dtype=inputs.dtype)
|
|
states_shape = tl.get_tensor_shape(states)
|
|
self.check_hidden(input_shape, states_shape, hidden_label='h')
|
|
output, states = self.grucell(inputs, states)
|
|
return output, states
|
|
|
|
|
|
class RNNBase(Module):
|
|
"""
|
|
RNNBase class for RNN networks. It provides `forward` and other common methods for RNN, LSTM and GRU.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
mode,
|
|
input_size,
|
|
hidden_size,
|
|
num_layers=1,
|
|
bias=True,
|
|
batch_first=False,
|
|
dropout=0.0,
|
|
bidirectional=False,
|
|
name=None,
|
|
):
|
|
super(RNNBase, self).__init__(name)
|
|
self.mode = mode
|
|
self.input_size = input_size
|
|
self.hidden_size = hidden_size
|
|
self.num_layers = num_layers
|
|
self.bias = bias
|
|
self.batch_first = batch_first
|
|
self.dropout = dropout
|
|
self.bidirectional = bidirectional
|
|
self.build(None)
|
|
|
|
logging.info(
|
|
"%s: %s: input_size: %d hidden_size: %d num_layers: %d " %
|
|
(self.mode, self.name, input_size, hidden_size, num_layers)
|
|
)
|
|
|
|
def __repr__(self):
|
|
s = (
|
|
'{classname}(input_size={input_size}, hidden_size={hidden_size}, num_layers={num_layers}'
|
|
', dropout={dropout}'
|
|
)
|
|
s += ', bias=True' if self.bias else ', bias=False'
|
|
s += ', bidirectional=True' if self.bidirectional else ', bidirectional=False'
|
|
if self.name is not None:
|
|
s += ', name=\'{name}\''
|
|
s += ')'
|
|
return s.format(classname=self.__class__.__name__, **self.__dict__)
|
|
|
|
def build(self, inputs_shape):
|
|
if BACKEND == 'tensorflow':
|
|
bidirect = 2 if self.bidirectional else 1
|
|
self.weights_fw = []
|
|
self.bias_fw = []
|
|
self.weights_bw = []
|
|
self.bias_bw = []
|
|
stdv = 1.0 / np.sqrt(self.hidden_size)
|
|
_init = tl.initializers.RandomUniform(minval=-stdv, maxval=stdv)
|
|
if self.mode == 'LSTM':
|
|
gate_size = 4 * self.hidden_size
|
|
elif self.mode == 'GRU':
|
|
gate_size = 3 * self.hidden_size
|
|
else:
|
|
gate_size = self.hidden_size
|
|
for layer in range(self.num_layers):
|
|
for direction in range(bidirect):
|
|
layer_input_size = self.input_size if layer == 0 else self.hidden_size * bidirect
|
|
if direction == 0:
|
|
self.w_ih = self._get_weights(
|
|
'weight_ih_l' + str(layer), shape=(gate_size, layer_input_size), init=_init
|
|
)
|
|
self.w_hh = self._get_weights(
|
|
'weight_ih_l' + str(layer), shape=(gate_size, self.hidden_size), init=_init
|
|
)
|
|
self.weights_fw.append(self.w_ih)
|
|
self.weights_fw.append(self.w_hh)
|
|
if self.bias:
|
|
self.b_ih = self._get_weights('bias_ih_l' + str(layer), shape=(gate_size, ), init=_init)
|
|
self.b_hh = self._get_weights('bias_hh_l' + str(layer), shape=(gate_size, ), init=_init)
|
|
self.bias_fw.append(self.b_ih)
|
|
self.bias_fw.append(self.b_hh)
|
|
else:
|
|
self.w_ih = self._get_weights(
|
|
'weight_ih_l' + str(layer) + '_reverse', shape=(gate_size, layer_input_size), init=_init
|
|
)
|
|
self.w_hh = self._get_weights(
|
|
'weight_hh_l' + str(layer) + '_reverse', shape=(gate_size, self.hidden_size), init=_init
|
|
)
|
|
self.weights_bw.append(self.w_ih)
|
|
self.weights_bw.append(self.w_hh)
|
|
if self.bias:
|
|
self.b_ih = self._get_weights(
|
|
'bias_ih_l' + str(layer) + '_reverse', shape=(gate_size, ), init=_init
|
|
)
|
|
self.b_hh = self._get_weights(
|
|
'bias_hh_l' + str(layer) + '_reverse', shape=(gate_size, ), init=_init
|
|
)
|
|
self.bias_bw.append(self.b_ih)
|
|
self.bias_bw.append(self.b_hh)
|
|
|
|
self.rnn = tl.ops.rnnbase(
|
|
mode=self.mode, input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
|
|
bias=self.bias, batch_first=self.batch_first, dropout=self.dropout, bidirectional=self.bidirectional,
|
|
is_train=self.is_train, weights_fw=self.weights_fw, weights_bw=self.weights_bw, bias_fw=self.bias_fw,
|
|
bias_bw=self.bias_bw
|
|
)
|
|
else:
|
|
self.rnn = tl.ops.rnnbase(
|
|
mode=self.mode,
|
|
input_size=self.input_size,
|
|
hidden_size=self.hidden_size,
|
|
num_layers=self.num_layers,
|
|
bias=self.bias,
|
|
batch_first=self.batch_first,
|
|
dropout=self.dropout,
|
|
bidirectional=self.bidirectional,
|
|
is_train=self.is_train,
|
|
)
|
|
|
|
def forward(self, input, states=None):
|
|
|
|
output, new_states = self.rnn(input, states)
|
|
return output, new_states
|
|
|
|
|
|
class RNN(RNNBase):
|
|
"""Multilayer Elman network(RNN). It takes input sequences and initial
|
|
states as inputs, and returns the output sequences and the final states.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
num_layers : int
|
|
Number of recurrent layers. Default: 1
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
batch_first : bool
|
|
If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
|
|
dropout : float
|
|
If non-zero, introduces a `Dropout` layer on the outputs of each RNN layer except the last layer,
|
|
with dropout probability equal to `dropout`. Default: 0
|
|
bidirectional : bool
|
|
If ``True``, becomes a bidirectional RNN. Default: ``False``
|
|
act : activation function
|
|
The non-linearity to use. Can be either 'tanh' or 'relu'. Default: 'tanh'
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
|
|
initial_states : tensor or None
|
|
the initial states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
|
|
If the RNN is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
|
|
else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
|
|
final_states : tensor
|
|
final states. The shape is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the RNN is Bidirectional, the forward states are (0,2,4,6,...) and
|
|
the backward states are (1,3,5,7,....).
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([23, 32, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4, 32, 32])
|
|
>>> cell = tl.layers.RNN(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, act='tanh', batch_first=False, dropout=0, name='rnn_1')
|
|
>>> y, h= cell(input, prev_h)
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
num_layers=1,
|
|
bias=True,
|
|
batch_first=False,
|
|
dropout=0.0,
|
|
bidirectional=False,
|
|
act='tanh',
|
|
name=None,
|
|
):
|
|
if act == 'tanh':
|
|
mode = 'RNN_TANH'
|
|
elif act == 'relu':
|
|
mode = 'RNN_RELU'
|
|
else:
|
|
raise ValueError("act should be in ['tanh', 'relu'], but got {}.".format(act))
|
|
super(RNN, self
|
|
).__init__(mode, input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)
|
|
|
|
|
|
class LSTM(RNNBase):
|
|
"""Applies a multi-layer long short-term memory (LSTM) RNN to an input sequence.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
num_layers : int
|
|
Number of recurrent layers. Default: 1
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
batch_first : bool
|
|
If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
|
|
dropout : float
|
|
If non-zero, introduces a `Dropout` layer on the outputs of each LSTM layer except the last layer,
|
|
with dropout probability equal to `dropout`. Default: 0
|
|
bidirectional : bool
|
|
If ``True``, becomes a bidirectional LSTM. Default: ``False``
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
|
|
initial_states : tensor or None
|
|
the initial states. A tuple of tensor (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
|
|
If the LSTM is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
|
|
else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
|
|
final_states : tensor
|
|
final states. A tuple of two tensor. The shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the LSTM is Bidirectional, the forward states are (0,2,4,6,...) and
|
|
the backward states are (1,3,5,7,....).
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([23, 32, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4, 32, 32])
|
|
>>> prev_c = tl.layers.Input([4, 32, 32])
|
|
>>> cell = tl.layers.LSTM(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, batch_first=False, dropout=0, name='lstm_1')
|
|
>>> y, (h, c)= cell(input, (prev_h, prev_c))
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
num_layers=1,
|
|
bias=True,
|
|
batch_first=False,
|
|
dropout=0.0,
|
|
bidirectional=False,
|
|
name=None,
|
|
):
|
|
super(LSTM, self
|
|
).__init__('LSTM', input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)
|
|
|
|
|
|
class GRU(RNNBase):
|
|
"""Applies a multi-layer gated recurrent unit (GRU) RNN to an input sequence.
|
|
|
|
Parameters
|
|
----------
|
|
input_size : int
|
|
The number of expected features in the input `x`
|
|
hidden_size : int
|
|
The number of features in the hidden state `h`
|
|
num_layers : int
|
|
Number of recurrent layers. Default: 1
|
|
bias : bool
|
|
If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True``
|
|
batch_first : bool
|
|
If ``True``, then the input and output tensors are provided as `[batch_size, seq, input_size]`, Default: ``False``
|
|
dropout : float
|
|
If non-zero, introduces a `Dropout` layer on the outputs of each GRU layer except the last layer,
|
|
with dropout probability equal to `dropout`. Default: 0
|
|
bidirectional : bool
|
|
If ``True``, becomes a bidirectional LSTM. Default: ``False``
|
|
name : None or str
|
|
A unique layer name
|
|
--------------------------------------------------------
|
|
inputs : tensor
|
|
the input sequence. if `batch_first` is True, the shape is `[batch_size, seq, input_size]`, else, the shape is `[seq, batch_size, input_size]`.
|
|
initial_states : tensor or None
|
|
the initial states. A tuple of tensor (h, c), the shape of each is `[num_layers * num_directions, batch_size, hidden_size]`.If initial_state is not given, zero initial states are used.
|
|
If the GRU is Bidirectional, num_directions should be 2, else it should be 1. Default: None.
|
|
|
|
Returns
|
|
----------
|
|
outputs : tensor
|
|
the output sequence. if `batch_first` is True, the shape is `[batch_size, seq, num_directions * hidden_size]`,
|
|
else, the shape is `[seq, batch_size, num_directions * hidden_size]`.
|
|
final_states : tensor
|
|
final states. A tuple of two tensor. The shape of each is `[num_layers * num_directions, batch_size, hidden_size]`. Note that if the GRU is Bidirectional, the forward states are (0,2,4,6,...) and
|
|
the backward states are (1,3,5,7,....).
|
|
|
|
Examples
|
|
--------
|
|
With TensorLayer
|
|
|
|
>>> input = tl.layers.Input([23, 32, 16], name='input')
|
|
>>> prev_h = tl.layers.Input([4, 32, 32])
|
|
>>> cell = tl.layers.GRU(input_size=16, hidden_size=32, bias=True, num_layers=2, bidirectional = True, batch_first=False, dropout=0, name='GRU_1')
|
|
>>> y, h= cell(input, prev_h)
|
|
>>> print(y.shape)
|
|
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
input_size,
|
|
hidden_size,
|
|
num_layers=1,
|
|
bias=True,
|
|
batch_first=False,
|
|
dropout=0.0,
|
|
bidirectional=False,
|
|
name=None,
|
|
):
|
|
super(GRU, self
|
|
).__init__('GRU', input_size, hidden_size, num_layers, bias, batch_first, dropout, bidirectional, name)
|