tensorlayer3/tensorlayer/optimizers/paddle_optimizers.py

348 lines
12 KiB
Python

#! /usr/bin/python
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import paddle
from paddle.optimizer import Optimizer
__all__ = ['Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS']
class Adadelta(Optimizer):
def __init__(self, learning_rate=0.001, epsilon=1.0e-6, rho=0.95):
if learning_rate is None:
raise ValueError('learn_rate is not set.')
if epsilon is None:
raise ValueError('epsilon is not set.')
if rho is None:
raise ValueError('rho is not set')
self.learning_rate = learning_rate
self.epsilon = epsilon
self.rho = rho
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.adadelta = paddle.optimizer.Adadelta(
learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, parameters=weights
)
loss.backward()
weights_and_grads = self.adadelta.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.adadelta._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.adadelta.clear_grad()
class Adagrad(Optimizer):
def __init__(self, learning_rate, initial_accumulator_value=0.0, epsilon=1.0e-6):
if learning_rate is None:
raise ValueError('learning_rate is not set.')
if initial_accumulator_value is None:
raise ValueError('initial_accumulator_value is not set.')
if epsilon is None:
raise ValueError('epsilon is not set.')
self.learning_rate = learning_rate
self.initial_accumulator_value = initial_accumulator_value
self.epsilon = epsilon
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.adagrad = paddle.optimizer.Adagrad(
learning_rate=self.learning_rate, epsilon=self.epsilon,
initial_accumulator_value=self.initial_accumulator_value, parameters=weights
)
loss.backward()
weights_and_grads = self.adagrad.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.adagrad._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.adagrad.clear_grad()
class Adam(Optimizer):
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
if learning_rate is None:
raise ValueError('learning_rate is not set.')
if beta_1 is None:
raise ValueError('beta_1 is not set.')
if beta_2 is None:
raise ValueError('beta_2 is not set.')
if epsilon is None:
raise ValueError('epsilon is not set.')
if not 0 <= beta_1 < 1:
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
if not 0 <= beta_2 < 1:
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
self.learning_rate = learning_rate
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon = epsilon
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.adam = paddle.optimizer.Adam(
learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
parameters=weights
)
loss.backward()
weights_and_grads = self.adam.backward(loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.adam._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.adam.clear_grad()
class Adamax(Optimizer):
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
if learning_rate is None:
raise ValueError('learning_rate is not set.')
if beta_1 is None:
raise ValueError('beta_1 is not set.')
if beta_2 is None:
raise ValueError('beta_2 is not set.')
if epsilon is None:
raise ValueError('epsilon is not set.')
if not 0 <= beta_1 < 1:
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
if not 0 <= beta_2 < 1:
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
self.learning_rate = learning_rate
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon = epsilon
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.adamax = paddle.optimizer.Adamax(
learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
parameters=weights
)
loss.backward()
weights_and_grads = self.adamax.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.adamax._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.adamax.clear_grad()
class Ftrl(Optimizer):
def __init__(self):
raise Exception('Ftrl optimizer function not implemented')
class Nadam(Optimizer):
def __init__(self):
raise Exception('Nadam optimizer function not implemented')
class RMSprop(Optimizer):
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1.0e-6, momentum=0.0, centered=False):
if learning_rate is None:
raise ValueError("learning_rate is not set.")
if rho is None:
raise ValueError("rho is not set.")
if epsilon is None:
raise ValueError("epsilon is not set.")
if momentum is None:
raise ValueError("momentum is not set.")
if not 0.0 <= epsilon:
raise ValueError("Invalid value of epsilon, expect epsilon >= 0.")
if not 0.0 <= momentum:
raise ValueError("Invalid value of momentum, expect momentum >= 0.")
if not 0.0 <= rho:
raise ValueError("Invalid value of rho, expect rho >= 0.")
self.learning_rate = learning_rate
self.epsilon = epsilon
self.rho = rho
self.momentum = momentum
self.centered = centered
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.rmsprop = paddle.optimizer.RMSProp(
learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, momentum=self.momentum,
parameters=weights
)
loss.backward()
weights_and_grads = self.rmsprop.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.rmsprop._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.rmsprop.clear_grad()
class SGD(Optimizer):
def __init__(self, learning_rate=0.001):
if learning_rate is None:
raise ValueError("learning_rate is not set.")
self.learning_rate = learning_rate
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.sgd = paddle.optimizer.SGD(learning_rate=self.learning_rate, parameters=weights)
loss.backward()
weights_and_grads = self.sgd.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.sgd._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.sgd.clear_grad()
class Momentum(Optimizer):
def __init__(self, learning_rate=0.001, momentum=0.9, nesterov=False):
if learning_rate is None:
raise ValueError("learning_rate is not set")
if momentum is None:
raise ValueError("momentum is not set")
self.learning_rate = learning_rate
self.momentum = momentum
self.nesterov = nesterov
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.moment = paddle.optimizer.Momentum(
learning_rate=self.learning_rate, momentum=self.momentum, parameters=weights, use_nesterov=self.nesterov
)
loss.backward()
weights_and_grads = self.moment.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.moment._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.moment.clear_grad()
class Lamb(Optimizer):
def __init__(self, learning_rate=0.001, lamb_weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1.0e-6):
if learning_rate is None:
raise ValueError('learning_rate is not set.')
if lamb_weight_decay is None:
raise ValueError('lamb_weight_decay is not set.')
if beta_1 is None:
raise ValueError('beta_1 is not set.')
if beta_2 is None:
raise ValueError('beta_2 is not set.')
if epsilon is None:
raise ValueError('epsilon is not set.')
if not 0 <= beta_1 < 1:
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
if not 0 <= beta_2 < 1:
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
self.learning_rate = learning_rate
self.lamb_weight_decay = lamb_weight_decay
self.beta_1 = beta_1
self.beta_2 = beta_2
self.epsilon = epsilon
def gradient(self, loss, weights):
if loss is None:
raise ValueError('loss is not set.')
if weights is None:
raise ValueError('weights is not set.')
self.lamb = paddle.optimizer.Lamb(
learning_rate=self.learning_rate, lamb_weight_decay=self.lamb_weight_decay, beta1=self.beta_1,
beta2=self.beta_2, epsilon=self.epsilon, parameters=weights
)
loss.backward()
weights_and_grads = self.lamb.backward(loss=loss, parameters=weights)
return weights_and_grads
def apply_gradients(self, weights_and_grads):
if weights_and_grads is None:
raise ValueError('weights_and_grads is not set.')
self.lamb._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
self.lamb.clear_grad()
class LARS(Optimizer):
def __init__(self):
pass
def gradient(self):
pass
def apply_gradients(self, weights_and_grads):
raise Exception('LARS optimizer function not implemented')