forked from TensorLayer/tensorlayer3
348 lines
12 KiB
Python
348 lines
12 KiB
Python
#! /usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
from __future__ import absolute_import, division, print_function
|
|
import paddle
|
|
from paddle.optimizer import Optimizer
|
|
|
|
__all__ = ['Adadelta', 'Adagrad', 'Adam', 'Adamax', 'Ftrl', 'Nadam', 'RMSprop', 'SGD', 'Momentum', 'Lamb', 'LARS']
|
|
|
|
|
|
class Adadelta(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, epsilon=1.0e-6, rho=0.95):
|
|
if learning_rate is None:
|
|
raise ValueError('learn_rate is not set.')
|
|
if epsilon is None:
|
|
raise ValueError('epsilon is not set.')
|
|
if rho is None:
|
|
raise ValueError('rho is not set')
|
|
self.learning_rate = learning_rate
|
|
self.epsilon = epsilon
|
|
self.rho = rho
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
|
|
self.adadelta = paddle.optimizer.Adadelta(
|
|
learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.adadelta.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.adadelta._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.adadelta.clear_grad()
|
|
|
|
|
|
class Adagrad(Optimizer):
|
|
|
|
def __init__(self, learning_rate, initial_accumulator_value=0.0, epsilon=1.0e-6):
|
|
|
|
if learning_rate is None:
|
|
raise ValueError('learning_rate is not set.')
|
|
if initial_accumulator_value is None:
|
|
raise ValueError('initial_accumulator_value is not set.')
|
|
if epsilon is None:
|
|
raise ValueError('epsilon is not set.')
|
|
|
|
self.learning_rate = learning_rate
|
|
self.initial_accumulator_value = initial_accumulator_value
|
|
self.epsilon = epsilon
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
self.adagrad = paddle.optimizer.Adagrad(
|
|
learning_rate=self.learning_rate, epsilon=self.epsilon,
|
|
initial_accumulator_value=self.initial_accumulator_value, parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.adagrad.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.adagrad._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.adagrad.clear_grad()
|
|
|
|
|
|
class Adam(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
|
|
|
|
if learning_rate is None:
|
|
raise ValueError('learning_rate is not set.')
|
|
if beta_1 is None:
|
|
raise ValueError('beta_1 is not set.')
|
|
if beta_2 is None:
|
|
raise ValueError('beta_2 is not set.')
|
|
if epsilon is None:
|
|
raise ValueError('epsilon is not set.')
|
|
|
|
if not 0 <= beta_1 < 1:
|
|
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
|
|
if not 0 <= beta_2 < 1:
|
|
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
|
|
|
|
self.learning_rate = learning_rate
|
|
self.beta_1 = beta_1
|
|
self.beta_2 = beta_2
|
|
self.epsilon = epsilon
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
self.adam = paddle.optimizer.Adam(
|
|
learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
|
|
parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.adam.backward(loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.adam._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.adam.clear_grad()
|
|
|
|
|
|
class Adamax(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1.0e-8):
|
|
|
|
if learning_rate is None:
|
|
raise ValueError('learning_rate is not set.')
|
|
if beta_1 is None:
|
|
raise ValueError('beta_1 is not set.')
|
|
if beta_2 is None:
|
|
raise ValueError('beta_2 is not set.')
|
|
if epsilon is None:
|
|
raise ValueError('epsilon is not set.')
|
|
|
|
if not 0 <= beta_1 < 1:
|
|
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
|
|
if not 0 <= beta_2 < 1:
|
|
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
|
|
|
|
self.learning_rate = learning_rate
|
|
self.beta_1 = beta_1
|
|
self.beta_2 = beta_2
|
|
self.epsilon = epsilon
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
self.adamax = paddle.optimizer.Adamax(
|
|
learning_rate=self.learning_rate, beta1=self.beta_1, beta2=self.beta_2, epsilon=self.epsilon,
|
|
parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.adamax.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.adamax._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.adamax.clear_grad()
|
|
|
|
|
|
class Ftrl(Optimizer):
|
|
|
|
def __init__(self):
|
|
|
|
raise Exception('Ftrl optimizer function not implemented')
|
|
|
|
|
|
class Nadam(Optimizer):
|
|
|
|
def __init__(self):
|
|
|
|
raise Exception('Nadam optimizer function not implemented')
|
|
|
|
|
|
class RMSprop(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, rho=0.95, epsilon=1.0e-6, momentum=0.0, centered=False):
|
|
if learning_rate is None:
|
|
raise ValueError("learning_rate is not set.")
|
|
if rho is None:
|
|
raise ValueError("rho is not set.")
|
|
if epsilon is None:
|
|
raise ValueError("epsilon is not set.")
|
|
if momentum is None:
|
|
raise ValueError("momentum is not set.")
|
|
if not 0.0 <= epsilon:
|
|
raise ValueError("Invalid value of epsilon, expect epsilon >= 0.")
|
|
if not 0.0 <= momentum:
|
|
raise ValueError("Invalid value of momentum, expect momentum >= 0.")
|
|
if not 0.0 <= rho:
|
|
raise ValueError("Invalid value of rho, expect rho >= 0.")
|
|
|
|
self.learning_rate = learning_rate
|
|
self.epsilon = epsilon
|
|
self.rho = rho
|
|
self.momentum = momentum
|
|
self.centered = centered
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
|
|
self.rmsprop = paddle.optimizer.RMSProp(
|
|
learning_rate=self.learning_rate, epsilon=self.epsilon, rho=self.rho, momentum=self.momentum,
|
|
parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.rmsprop.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.rmsprop._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.rmsprop.clear_grad()
|
|
|
|
|
|
class SGD(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001):
|
|
if learning_rate is None:
|
|
raise ValueError("learning_rate is not set.")
|
|
|
|
self.learning_rate = learning_rate
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
|
|
self.sgd = paddle.optimizer.SGD(learning_rate=self.learning_rate, parameters=weights)
|
|
loss.backward()
|
|
weights_and_grads = self.sgd.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.sgd._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.sgd.clear_grad()
|
|
|
|
|
|
class Momentum(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, momentum=0.9, nesterov=False):
|
|
if learning_rate is None:
|
|
raise ValueError("learning_rate is not set")
|
|
if momentum is None:
|
|
raise ValueError("momentum is not set")
|
|
|
|
self.learning_rate = learning_rate
|
|
self.momentum = momentum
|
|
self.nesterov = nesterov
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
|
|
self.moment = paddle.optimizer.Momentum(
|
|
learning_rate=self.learning_rate, momentum=self.momentum, parameters=weights, use_nesterov=self.nesterov
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.moment.backward(loss=loss, parameters=weights)
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.moment._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.moment.clear_grad()
|
|
|
|
|
|
class Lamb(Optimizer):
|
|
|
|
def __init__(self, learning_rate=0.001, lamb_weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1.0e-6):
|
|
|
|
if learning_rate is None:
|
|
raise ValueError('learning_rate is not set.')
|
|
if lamb_weight_decay is None:
|
|
raise ValueError('lamb_weight_decay is not set.')
|
|
if beta_1 is None:
|
|
raise ValueError('beta_1 is not set.')
|
|
if beta_2 is None:
|
|
raise ValueError('beta_2 is not set.')
|
|
if epsilon is None:
|
|
raise ValueError('epsilon is not set.')
|
|
|
|
if not 0 <= beta_1 < 1:
|
|
raise ValueError("Invaild value of beta1, expect beta1 in [0,1).")
|
|
if not 0 <= beta_2 < 1:
|
|
raise ValueError("Invaild value of beta2, expect beta2 in [0,1).")
|
|
|
|
self.learning_rate = learning_rate
|
|
self.lamb_weight_decay = lamb_weight_decay
|
|
self.beta_1 = beta_1
|
|
self.beta_2 = beta_2
|
|
self.epsilon = epsilon
|
|
|
|
def gradient(self, loss, weights):
|
|
if loss is None:
|
|
raise ValueError('loss is not set.')
|
|
if weights is None:
|
|
raise ValueError('weights is not set.')
|
|
|
|
self.lamb = paddle.optimizer.Lamb(
|
|
learning_rate=self.learning_rate, lamb_weight_decay=self.lamb_weight_decay, beta1=self.beta_1,
|
|
beta2=self.beta_2, epsilon=self.epsilon, parameters=weights
|
|
)
|
|
loss.backward()
|
|
weights_and_grads = self.lamb.backward(loss=loss, parameters=weights)
|
|
|
|
return weights_and_grads
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
if weights_and_grads is None:
|
|
raise ValueError('weights_and_grads is not set.')
|
|
self.lamb._apply_optimize(loss=None, startup_program=None, params_grads=weights_and_grads)
|
|
self.lamb.clear_grad()
|
|
|
|
|
|
class LARS(Optimizer):
|
|
|
|
def __init__(self):
|
|
|
|
pass
|
|
|
|
def gradient(self):
|
|
|
|
pass
|
|
|
|
def apply_gradients(self, weights_and_grads):
|
|
|
|
raise Exception('LARS optimizer function not implemented')
|