335 lines
12 KiB
C
335 lines
12 KiB
C
#include "backprop.h"
|
|
|
|
void soft_max(TYPE net_outputs[possible_outputs],
|
|
TYPE activations[possible_outputs]) {
|
|
int i;
|
|
TYPE sum;
|
|
sum = (TYPE)0.0;
|
|
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
sum += exp(-activations[i]);
|
|
}
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
net_outputs[i] = exp(-activations[i]) / sum;
|
|
}
|
|
}
|
|
|
|
void RELU_hidden(TYPE activations[nodes_per_layer],
|
|
TYPE dactivations[nodes_per_layer]) {
|
|
int i;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
dactivations[i] = activations[i] * (1.0 - activations[i]);
|
|
activations[i] = 1.0 / (1.0 + exp(-activations[i]));
|
|
}
|
|
}
|
|
|
|
void RELU_output(TYPE activations[possible_outputs],
|
|
TYPE dactivations[possible_outputs]) {
|
|
int i;
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
dactivations[i] = activations[i] * (1.0 - activations[i]);
|
|
activations[i] = 1.0 / (1.0 + exp(-activations[i]));
|
|
}
|
|
}
|
|
|
|
void add_bias_to_activations_hidden(TYPE biases[nodes_per_layer],
|
|
TYPE activations[nodes_per_layer]) {
|
|
int i;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
activations[i] = activations[i] + biases[i];
|
|
}
|
|
}
|
|
|
|
void add_bias_to_activations_output(TYPE biases[possible_outputs],
|
|
TYPE activations[possible_outputs]) {
|
|
int i;
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
activations[i] = activations[i] + biases[i];
|
|
}
|
|
}
|
|
|
|
void matrix_vector_product_with_bias_input_layer(
|
|
TYPE biases[nodes_per_layer],
|
|
TYPE weights[input_dimension * nodes_per_layer],
|
|
TYPE activations[nodes_per_layer], TYPE input_sample[input_dimension]) {
|
|
int i, j;
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
activations[j] = (TYPE)0.0;
|
|
for (i = 0; i < input_dimension; i++) {
|
|
activations[j] += weights[j * input_dimension + i] * input_sample[i];
|
|
}
|
|
}
|
|
add_bias_to_activations_hidden(biases, activations);
|
|
}
|
|
|
|
void matrix_vector_product_with_bias_second_layer(
|
|
TYPE biases[nodes_per_layer],
|
|
TYPE weights[nodes_per_layer * nodes_per_layer],
|
|
TYPE activations[nodes_per_layer],
|
|
TYPE input_activations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
activations[i] = (TYPE)0.0;
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
activations[i] += weights[i * nodes_per_layer + j] * input_activations[j];
|
|
}
|
|
}
|
|
add_bias_to_activations_hidden(biases, activations);
|
|
}
|
|
|
|
void matrix_vector_product_with_bias_output_layer(
|
|
TYPE biases[possible_outputs],
|
|
TYPE weights[nodes_per_layer * possible_outputs],
|
|
TYPE activations[possible_outputs],
|
|
TYPE input_activations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (j = 0; j < possible_outputs; j++) {
|
|
activations[j] = (TYPE)0.0;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
activations[j] += weights[j * nodes_per_layer + i] * input_activations[i];
|
|
}
|
|
}
|
|
add_bias_to_activations_output(biases, activations);
|
|
}
|
|
|
|
void take_difference(TYPE net_outputs[possible_outputs],
|
|
TYPE solutions[possible_outputs],
|
|
TYPE output_difference[possible_outputs],
|
|
TYPE dactivations[possible_outputs]) {
|
|
int i;
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
output_difference[i] =
|
|
(((net_outputs[i]) - solutions[i]) * -1.0) * dactivations[i];
|
|
}
|
|
}
|
|
|
|
void get_delta_matrix_weights3(
|
|
TYPE delta_weights3[nodes_per_layer * possible_outputs],
|
|
TYPE output_difference[possible_outputs],
|
|
TYPE last_activations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < possible_outputs; j++) {
|
|
delta_weights3[i * possible_outputs + j] =
|
|
last_activations[i] * output_difference[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
void get_oracle_activations2(TYPE weights3[nodes_per_layer * possible_outputs],
|
|
TYPE output_differences[possible_outputs],
|
|
TYPE oracle_activations[nodes_per_layer],
|
|
TYPE dactivations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
oracle_activations[i] = (TYPE)0.0;
|
|
for (j = 0; j < possible_outputs; j++) {
|
|
oracle_activations[i] +=
|
|
output_differences[j] * weights3[i * possible_outputs + j];
|
|
}
|
|
oracle_activations[i] = oracle_activations[i] * dactivations[i];
|
|
}
|
|
}
|
|
|
|
void get_delta_matrix_weights2(
|
|
TYPE delta_weights2[nodes_per_layer * nodes_per_layer],
|
|
TYPE output_difference[nodes_per_layer],
|
|
TYPE last_activations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
delta_weights2[i * nodes_per_layer + j] =
|
|
last_activations[i] * output_difference[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
void get_oracle_activations1(TYPE weights2[nodes_per_layer * nodes_per_layer],
|
|
TYPE output_differences[nodes_per_layer],
|
|
TYPE oracle_activations[nodes_per_layer],
|
|
TYPE dactivations[nodes_per_layer]) {
|
|
int i, j;
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
oracle_activations[i] = (TYPE)0.0;
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
oracle_activations[i] +=
|
|
output_differences[j] * weights2[i * nodes_per_layer + j];
|
|
}
|
|
oracle_activations[i] = oracle_activations[i] * dactivations[i];
|
|
}
|
|
}
|
|
|
|
void get_delta_matrix_weights1(
|
|
TYPE delta_weights1[input_dimension * nodes_per_layer],
|
|
TYPE output_difference[nodes_per_layer],
|
|
TYPE last_activations[input_dimension]) {
|
|
int i, j;
|
|
for (i = 0; i < input_dimension; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
delta_weights1[i * nodes_per_layer + j] =
|
|
last_activations[i] * output_difference[j];
|
|
}
|
|
}
|
|
}
|
|
|
|
void update_weights(TYPE weights1[input_dimension * nodes_per_layer],
|
|
TYPE weights2[nodes_per_layer * nodes_per_layer],
|
|
TYPE weights3[nodes_per_layer * possible_outputs],
|
|
TYPE d_weights1[input_dimension * nodes_per_layer],
|
|
TYPE d_weights2[nodes_per_layer * nodes_per_layer],
|
|
TYPE d_weights3[nodes_per_layer * possible_outputs],
|
|
TYPE biases1[nodes_per_layer],
|
|
TYPE biases2[nodes_per_layer],
|
|
TYPE biases3[possible_outputs],
|
|
TYPE d_biases1[nodes_per_layer],
|
|
TYPE d_biases2[nodes_per_layer],
|
|
TYPE d_biases3[possible_outputs]) {
|
|
int i, j;
|
|
double norm, bias_norm;
|
|
norm = 0.0;
|
|
bias_norm = 0.0;
|
|
|
|
for (i = 0; i < input_dimension; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
weights1[i * nodes_per_layer + j] -=
|
|
(d_weights1[i * nodes_per_layer + j] * learning_rate);
|
|
norm +=
|
|
weights1[i * nodes_per_layer + j] * weights1[i * nodes_per_layer + j];
|
|
}
|
|
}
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
biases1[i] -= (d_biases1[i] * learning_rate);
|
|
bias_norm += biases1[i] * biases1[i];
|
|
}
|
|
|
|
norm = sqrt(norm);
|
|
bias_norm = sqrt(bias_norm);
|
|
|
|
for (i = 0; i < input_dimension; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
weights1[i * nodes_per_layer + j] =
|
|
(weights1[i * nodes_per_layer + j] / norm);
|
|
}
|
|
}
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
biases1[i] = (biases1[i] / bias_norm);
|
|
}
|
|
|
|
norm = (double)0.0;
|
|
bias_norm = (double)0.0;
|
|
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
weights2[i * nodes_per_layer + j] -=
|
|
(d_weights2[i * nodes_per_layer + j] * learning_rate);
|
|
norm +=
|
|
weights2[i * nodes_per_layer + j] * weights2[i * nodes_per_layer + j];
|
|
}
|
|
}
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
biases2[i] -= (d_biases2[i] * learning_rate);
|
|
bias_norm += biases2[i] * biases2[i];
|
|
}
|
|
|
|
norm = sqrt(norm);
|
|
bias_norm = sqrt(bias_norm);
|
|
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
weights2[i * nodes_per_layer + j] =
|
|
(weights2[i * nodes_per_layer + j] / norm);
|
|
}
|
|
}
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
biases2[i] = (biases2[i] / bias_norm);
|
|
}
|
|
|
|
norm = (double)0.0;
|
|
bias_norm = (double)0.0;
|
|
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < possible_outputs; j++) {
|
|
weights3[i * possible_outputs + j] -=
|
|
(d_weights3[i * possible_outputs + j] * learning_rate);
|
|
norm += weights3[i * possible_outputs + j] *
|
|
weights3[i * possible_outputs + j];
|
|
}
|
|
}
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
biases3[i] -= d_biases3[i] * learning_rate;
|
|
bias_norm += biases3[i] * biases3[i];
|
|
}
|
|
|
|
norm = sqrt(norm);
|
|
bias_norm = sqrt(bias_norm);
|
|
|
|
for (i = 0; i < nodes_per_layer; i++) {
|
|
for (j = 0; j < possible_outputs; j++) {
|
|
weights3[i * possible_outputs + j] =
|
|
(weights3[i * possible_outputs + j] / norm);
|
|
}
|
|
}
|
|
for (i = 0; i < possible_outputs; i++) {
|
|
biases3[i] = (biases3[i] / bias_norm);
|
|
}
|
|
}
|
|
|
|
void backprop(TYPE weights1[input_dimension * nodes_per_layer],
|
|
TYPE weights2[nodes_per_layer * nodes_per_layer],
|
|
TYPE weights3[nodes_per_layer * possible_outputs],
|
|
TYPE biases1[nodes_per_layer], TYPE biases2[nodes_per_layer],
|
|
TYPE biases3[possible_outputs],
|
|
TYPE training_data[training_sets * input_dimension],
|
|
TYPE training_targets[training_sets * possible_outputs]) {
|
|
int i, j;
|
|
// Forward and training structures
|
|
TYPE activations1[nodes_per_layer];
|
|
TYPE activations2[nodes_per_layer];
|
|
TYPE activations3[possible_outputs];
|
|
TYPE dactivations1[nodes_per_layer];
|
|
TYPE dactivations2[nodes_per_layer];
|
|
TYPE dactivations3[possible_outputs];
|
|
TYPE net_outputs[possible_outputs];
|
|
// Training structure
|
|
TYPE output_difference[possible_outputs];
|
|
TYPE delta_weights1[input_dimension * nodes_per_layer];
|
|
TYPE delta_weights2[nodes_per_layer * nodes_per_layer];
|
|
TYPE delta_weights3[nodes_per_layer * possible_outputs];
|
|
TYPE oracle_activations1[nodes_per_layer];
|
|
TYPE oracle_activations2[nodes_per_layer];
|
|
|
|
for (i = 0; i < training_sets; i++) {
|
|
for (j = 0; j < nodes_per_layer; j++) {
|
|
activations1[j] = (TYPE)0.0;
|
|
activations2[j] = (TYPE)0.0;
|
|
if (j < possible_outputs) {
|
|
activations3[j] = (TYPE)0.0;
|
|
}
|
|
}
|
|
matrix_vector_product_with_bias_input_layer(
|
|
biases1, weights1, activations1, &training_data[i * input_dimension]);
|
|
RELU_hidden(activations1, dactivations1);
|
|
matrix_vector_product_with_bias_second_layer(biases2, weights2,
|
|
activations2, activations1);
|
|
RELU_hidden(activations2, dactivations2);
|
|
matrix_vector_product_with_bias_output_layer(biases3, weights3,
|
|
activations3, activations2);
|
|
RELU_output(activations3, dactivations3);
|
|
soft_max(net_outputs, activations3);
|
|
take_difference(net_outputs, &training_targets[i * possible_outputs],
|
|
output_difference, dactivations3);
|
|
get_delta_matrix_weights3(delta_weights3, output_difference, activations2);
|
|
get_oracle_activations2(weights3, output_difference, oracle_activations2,
|
|
dactivations2);
|
|
get_delta_matrix_weights2(delta_weights2, oracle_activations2,
|
|
activations1);
|
|
get_oracle_activations1(weights2, oracle_activations2, oracle_activations1,
|
|
dactivations1);
|
|
get_delta_matrix_weights1(delta_weights1, oracle_activations1,
|
|
&training_data[i * input_dimension]);
|
|
update_weights(weights1, weights2, weights3, delta_weights1, delta_weights2,
|
|
delta_weights3, biases1, biases2, biases3,
|
|
oracle_activations1, oracle_activations2, output_difference);
|
|
}
|
|
}
|