blob: d5f7a5893b08c72464e3647cf68b8bba93154c52 [file] [log] [blame] [edit]
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define DADEPT_FLOATING_POINT_TYPE float
#include <adept_source.h>
#include <adept_arrays.h>
using adept::adouble;
using adept::aMatrix;
using adept::aVector;
using adept::Vector;
using adept::adouble;
using adept::aReal;
//extern "C" {
// from https://github.com/AndrewCarterUK/mnist-neural-network-plain-c
#define MNIST_LABEL_MAGIC 0x00000801
#define MNIST_IMAGE_MAGIC 0x00000803
#define MNIST_IMAGE_WIDTH 28
#define MNIST_IMAGE_HEIGHT 28
#define MNIST_IMAGE_SIZE MNIST_IMAGE_WIDTH * MNIST_IMAGE_HEIGHT
#define MNIST_LABELS 10
typedef struct mnist_label_file_header_t_ {
uint32_t magic_number;
uint32_t number_of_labels;
} __attribute__((packed)) mnist_label_file_header_t;
typedef struct mnist_image_file_header_t_ {
uint32_t magic_number;
uint32_t number_of_images;
uint32_t number_of_rows;
uint32_t number_of_columns;
} __attribute__((packed)) mnist_image_file_header_t;
typedef struct mnist_image_t_ {
uint8_t pixels[MNIST_IMAGE_SIZE];
} __attribute__((packed)) mnist_image_t;
typedef struct mnist_dataset_t_ {
mnist_image_t * images;
uint8_t * labels;
uint32_t size;
} mnist_dataset_t;
typedef struct neural_network_t_ {
float b[MNIST_LABELS];
float W[MNIST_LABELS][MNIST_IMAGE_SIZE];
} neural_network_t;
typedef struct aneural_network_t_ {
adept::FixedArray<float,true,MNIST_LABELS> b;
adept::FixedArray<float,true,MNIST_LABELS,MNIST_IMAGE_SIZE> W;
} aneural_network_t;
typedef struct neural_network_gradient_t_ {
float b_grad[MNIST_LABELS];
float W_grad[MNIST_LABELS][MNIST_IMAGE_SIZE];
} neural_network_gradient_t;
/**
* Convert from the big endian format in the dataset if we're on a little endian
* machine.
*/
uint32_t map_uint32(uint32_t in)
{
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return (
((in & 0xFF000000) >> 24) |
((in & 0x00FF0000) >> 8) |
((in & 0x0000FF00) << 8) |
((in & 0x000000FF) << 24)
);
#else
return in;
#endif
}
/**
* Read labels from file.
*
* File format: http://yann.lecun.com/exdb/mnist/
*/
uint8_t * get_labels(const char * path, uint32_t * number_of_labels)
{
FILE * stream;
mnist_label_file_header_t header;
uint8_t * labels;
stream = fopen(path, "rb");
if (NULL == stream) {
fprintf(stderr, "Could not open file: %s\n", path);
return NULL;
}
if (1 != fread(&header, sizeof(mnist_label_file_header_t), 1, stream)) {
fprintf(stderr, "Could not read label file header from: %s\n", path);
fclose(stream);
return NULL;
}
header.magic_number = map_uint32(header.magic_number);
header.number_of_labels = map_uint32(header.number_of_labels);
if (MNIST_LABEL_MAGIC != header.magic_number) {
fprintf(stderr, "Invalid header read from label file: %s (%08X not %08X)\n", path, header.magic_number, MNIST_LABEL_MAGIC);
fclose(stream);
return NULL;
}
*number_of_labels = header.number_of_labels;
labels = (uint8_t*)malloc(*number_of_labels * sizeof(uint8_t));
if (labels == NULL) {
fprintf(stderr, "Could not allocated memory for %d labels\n", *number_of_labels);
fclose(stream);
return NULL;
}
if (*number_of_labels != fread(labels, 1, *number_of_labels, stream)) {
fprintf(stderr, "Could not read %d labels from: %s\n", *number_of_labels, path);
free(labels);
fclose(stream);
return NULL;
}
fclose(stream);
return labels;
}
/**
* Read images from file.
*
* File format: http://yann.lecun.com/exdb/mnist/
*/
mnist_image_t * get_images(const char * path, uint32_t * number_of_images)
{
FILE * stream;
mnist_image_file_header_t header;
mnist_image_t * images;
stream = fopen(path, "rb");
if (NULL == stream) {
fprintf(stderr, "Could not open file: %s\n", path);
return NULL;
}
if (1 != fread(&header, sizeof(mnist_image_file_header_t), 1, stream)) {
fprintf(stderr, "Could not read image file header from: %s\n", path);
fclose(stream);
return NULL;
}
header.magic_number = map_uint32(header.magic_number);
header.number_of_images = map_uint32(header.number_of_images);
header.number_of_rows = map_uint32(header.number_of_rows);
header.number_of_columns = map_uint32(header.number_of_columns);
if (MNIST_IMAGE_MAGIC != header.magic_number) {
fprintf(stderr, "Invalid header read from image file: %s (%08X not %08X)\n", path, header.magic_number, MNIST_IMAGE_MAGIC);
fclose(stream);
return NULL;
}
if (MNIST_IMAGE_WIDTH != header.number_of_rows) {
fprintf(stderr, "Invalid number of image rows in image file %s (%d not %d)\n", path, header.number_of_rows, MNIST_IMAGE_WIDTH);
}
if (MNIST_IMAGE_HEIGHT != header.number_of_columns) {
fprintf(stderr, "Invalid number of image columns in image file %s (%d not %d)\n", path, header.number_of_columns, MNIST_IMAGE_HEIGHT);
}
*number_of_images = header.number_of_images;
images = (mnist_image_t*)malloc(*number_of_images * sizeof(mnist_image_t));
if (images == NULL) {
fprintf(stderr, "Could not allocated memory for %d images\n", *number_of_images);
fclose(stream);
return NULL;
}
if (*number_of_images != fread(images, sizeof(mnist_image_t), *number_of_images, stream)) {
fprintf(stderr, "Could not read %d images from: %s\n", *number_of_images, path);
free(images);
fclose(stream);
return NULL;
}
fclose(stream);
return images;
}
/**
* Free all the memory allocated in a dataset. This should not be used on a
* batched dataset as the memory is allocated to the parent.
*/
void mnist_free_dataset(mnist_dataset_t * dataset)
{
free(dataset->images);
free(dataset->labels);
free(dataset);
}
mnist_dataset_t * mnist_get_dataset(const char * image_path, const char * label_path)
{
mnist_dataset_t * dataset;
uint32_t number_of_images, number_of_labels;
dataset = (mnist_dataset_t*)calloc(1, sizeof(mnist_dataset_t));
if (NULL == dataset) {
return NULL;
}
dataset->images = get_images(image_path, &number_of_images);
if (NULL == dataset->images) {
mnist_free_dataset(dataset);
return NULL;
}
dataset->labels = get_labels(label_path, &number_of_labels);
if (NULL == dataset->labels) {
mnist_free_dataset(dataset);
return NULL;
}
if (number_of_images != number_of_labels) {
fprintf(stderr, "Number of images does not match number of labels (%d != %d)\n", number_of_images, number_of_labels);
mnist_free_dataset(dataset);
return NULL;
}
dataset->size = number_of_images;
return dataset;
}
/**
* Fills the batch dataset with a subset of the parent dataset.
*/
int mnist_batch(mnist_dataset_t * dataset, mnist_dataset_t * batch, int size, int number)
{
int start_offset;
start_offset = size * number;
if (start_offset >= dataset->size) {
return 0;
}
batch->images = &dataset->images[start_offset];
batch->labels = &dataset->labels[start_offset];
batch->size = size;
if (start_offset + batch->size > dataset->size) {
batch->size = dataset->size - start_offset;
}
return 1;
}
#define STEPS 1000
#define BATCH_SIZE 100
#include <stdlib.h>
#include <string.h>
#include <math.h>
// Convert a pixel value from 0-255 to one from 0 to 1
#define PIXEL_SCALE(x) (((float) (x)) / 255.0f)
// Returns a random value between 0 and 1
#define RAND_FLOAT() (((float) rand()) / ((float) RAND_MAX))
/**
* Initialise the weights and bias vectors with values between 0 and 1
*/
void neural_network_random_weights(neural_network_t * network)
{
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
network->b[i] = RAND_FLOAT();
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
network->W[i][j] = RAND_FLOAT();
}
}
}
/**
* Calculate the softmax vector from the activations. This uses a more
* numerically stable algorithm that normalises the activations to prevent
* large exponents.
*/
void neural_network_softmax(float * activations, int length)
{
int i;
float sum, max;
for (i = 1, max = activations[0]; i < length; i++) {
if (activations[i] > max) {
max = activations[i];
}
}
for (i = 0, sum = 0; i < length; i++) {
activations[i] = exp(activations[i] - max);
sum += activations[i];
}
for (i = 0; i < length; i++) {
activations[i] /= sum;
}
}
/**
* Calculate the softmax vector from the activations. This uses a more
* numerically stable algorithm that normalises the activations to prevent
* large exponents.
*/
void aneural_network_softmax(adept::FixedArray<float,true,MNIST_LABELS> &activations, int length)
{
int i;
aReal sum, max;
for (i = 1, max = activations[0]; i < length; i++) {
if (activations[i] > max) {
max = activations[i];
}
}
for (i = 0, sum = 0; i < length; i++) {
activations[i] = exp(activations[i] - max);
sum += activations[i];
}
activations /= sum;
for (i = 0; i < length; i++) {
activations[i] /= sum;
}
}
static double maxval(const float *activations, int length) {
float max = activations[0];
for (int i = 1; i < length; i++) {
if (activations[i] > max) {
max = activations[i];
}
}
return max;
}
static double sumval(const float *activations, int length) {
float sum = 0;
for (int i = 0; i < length; i++) {
sum += activations[i];
}
return sum;
}
static void makeexps(float* exps, const float* activations, int length, double max) {
for (int i = 0; i < length; i++) {
exps[i] = exp(activations[i] - max);
}
}
/**
* Calculate the softmax vector from the activations. This uses a more
* numerically stable algorithm that normalises the activations to prevent
* large exponents.
*/
static void neural_network_softmax_v2(const float * activations, float* outp, int length)
{
int i;
float sum, max;
for (i = 1, max = activations[0]; i < length; i++) {
if (activations[i] > max) {
max = activations[i];
}
}
for (i = 0, sum = 0; i < length; i++) {
sum += exp(activations[i] - max);
}
for (i = 0; i < length; i++) {
outp[i] = exp(activations[i] - max) / sum;
}
#if 0
float max = maxval(activations, length);
float exps[length];
makeexps(exps,activations, length, max);
float sum = sumval(exps,length);
/*
for (int i = 0; i < length; i++) {
double tmp = exps[i];//exp(activations[i] - max);
sum += tmp;
}
*/
for (int i = 0; i < length; i++) {
outp[i] = exps[i] / sum;
}
#endif
}
/**
* Use the weights and bias vector to forward propogate through the neural
* network and calculate the activations.
*/
void neural_network_hypothesis(const mnist_image_t * image, const neural_network_t * network, float activations[MNIST_LABELS])
{
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
activations[i] = network->b[i];
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
activations[i] += network->W[i][j] * PIXEL_SCALE(image->pixels[j]);
}
}
neural_network_softmax(activations, MNIST_LABELS);
}
/**
* Use the weights and bias vector to forward propogate through the neural
* network and calculate the activations.
*/
static float neural_network_hypothesis_v2(const mnist_image_t * image, const neural_network_t * network, uint8_t label)
{
float activations[MNIST_LABELS] = {0};
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
activations[i] = network->b[i];
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
activations[i] += network->W[i][j] * PIXEL_SCALE(image->pixels[j]);
}
}
float activations2[MNIST_LABELS] = { 0 };
neural_network_softmax_v2(activations, activations2, MNIST_LABELS);
return -log(activations2[label]);
}
static aReal neural_network_hypothesis_adept(const mnist_image_t * image, const aneural_network_t * network, uint8_t label)
{
adept::FixedArray<float,true,MNIST_LABELS> activations = network->b;
int i, j;
for (i = 0; i < MNIST_LABELS; i++) {
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
activations(i) += network->W(i,j) * PIXEL_SCALE(image->pixels[j]);
}
}
aneural_network_softmax(activations, MNIST_LABELS);
return -log(activations[label]);
}
static void calculateDerivatives_adept(mnist_image_t * image, bool run, adept::Stack& stack, const aneural_network_t * anetwork, neural_network_t* gradient, uint8_t label) {
if (!run) {
stack.new_recording();
// run = true;
} else
stack.continue_recording();
auto resa = neural_network_hypothesis_adept(image, anetwork, label);
resa.set_gradient(1.0);
stack.reverse();
stack.pause_recording();
for (int i = 0; i < MNIST_LABELS; i++) {
gradient->b[i] = anetwork->b(i).get_gradient();
for (int j = 0; j < MNIST_IMAGE_SIZE; j++) {
gradient->W[i][j] = anetwork->W(i,j).get_gradient();
}
}
}
extern "C" {
#include <adBuffer.h>
}
void neural_network_softmax_b(float *activations, float *activationsb, int
length) {
float sum, max;
float sumb, maxb;
int branch;
max = activations[0];
for (int i = 1; i < length; ++i)
if (activations[i] > max) {
max = activations[i];
pushControl1b(1);
} else
pushControl1b(0);
sum = 0;
for (int i = 0; i < length; ++i) {
pushReal4(activations[i]);
activations[i] = (float)exp(activations[i] - max);
sum = sum + activations[i];
}
for (int i = 0; i < length; ++i) {
pushReal4(activations[i]);
activations[i] = activations[i]/sum;
}
sumb = 0.0;
for (int i = length-1; i > -1; --i) {
popReal4(&(activations[i]));
sumb = sumb - activations[i]*activationsb[i]/(sum*sum);
activationsb[i] = activationsb[i]/sum;
}
{
float tempb;
maxb = 0.0;
for (int i = length-1; i > -1; --i) {
activationsb[i] = activationsb[i] + sumb;
popReal4(&(activations[i]));
tempb = exp(activations[i]-max)*activationsb[i];
maxb = maxb - tempb;
activationsb[i] = tempb;
}
}
for (int i = length-1; i > 0; --i) {
popControl1b(&branch);
if (branch != 0) {
activationsb[i] = activationsb[i] + maxb;
maxb = 0.0;
}
}
activationsb[0] = activationsb[0] + maxb;
}
/**
* Calculate the softmax vector from the activations. This uses a more
* numerically stable algorithm that normalises the activations to prevent
* large exponents.
*/
// Convert a pixel value from 0-255 to one from 0 to 1
// Returns a random value between 0 and 1
void neural_network_softmax_c(float *activations, int length) {
float sum, max;
max = activations[0];
int i;
for (i = 1; i < length; ++i)
if (activations[i] > max)
max = activations[i];
sum = 0;
for (i = 0; i < length; ++i) {
activations[i] = (float)exp(activations[i] - max);
sum += activations[i];
}
for (i = 0; i < length; ++i)
activations[i] /= sum;
}
/*
Differentiation of neural_network_hypothesis_tapenadesource in reverse (adjoint) mode:
gradient of useful results: neural_network_hypothesis_tapenadesource
*network.b[0:10-1] *network.W[0:10-1][0:28*28-1]
with respect to varying inputs: *network.b[0:10-1] *network.W[0:10-1][0:28*28-1]
RW status of diff variables: neural_network_hypothesis_tapenadesource:in-killed
*network.b[0:10-1]:incr *network.W[0:10-1][0:28*28-1]:incr
Plus diff mem management of: network:in *network.b:in *network.W:in
*network.W[0:10-1]:in
*/
static void neural_network_hypothesis_tapenadesource_b(const mnist_image_t *
image, const neural_network_t *network, neural_network_t *networkb,
uint8_t label, float neural_network_hypothesis_tapenadesourceb) {
float activations[10];
float activationsb[10];
int ii1;
float neural_network_hypothesis_tapenadesource;
for (int i = 0; i < 10; ++i) {
activations[i] = network->b[i];
for (int j = 0; j < 784; ++j)
activations[i] = activations[i] + network->W[i][j]*((float)image->
pixels[j]/255.0f);
}
pushReal4Array(activations, 10);
neural_network_softmax_c(activations, 10);
for (ii1 = 0; ii1 < 10; ++ii1)
activationsb[ii1] = 0.0;
activationsb[(int)label] = activationsb[(int)label] -
neural_network_hypothesis_tapenadesourceb/activations[(int)label];
popReal4Array(activations, 10);
neural_network_softmax_b(activations, activationsb, 10);
for (int i = 9; i > -1; --i) {
for (int j = 783; j > -1; --j)
networkb->W[i][j] = networkb->W[i][j] + (float)image->pixels[j]*
activationsb[i]/255.0f;
networkb->b[i] = networkb->b[i] + activationsb[i];
activationsb[i] = 0.0;
}
}
/**
* Update the gradients for this step of gradient descent using the gradient
* contributions from a single training example (image).
*
* This function returns the loss ontribution from this training example.
*/
float neural_network_gradient_update(mnist_image_t * image, const neural_network_t * network, neural_network_gradient_t * gradient, uint8_t label)
{
float activations[MNIST_LABELS];
float b_grad, W_grad;
int i, j;
// First forward propagate through the network to calculate activations
neural_network_hypothesis(image, network, activations);
for (i = 0; i < MNIST_LABELS; i++) {
// This is the gradient for a softmax bias input
b_grad = (i == label) ? activations[i] - 1 : activations[i];
for (j = 0; j < MNIST_IMAGE_SIZE; j++) {
// The gradient for the neuron weight is the bias multiplied by the input weight
W_grad = b_grad * PIXEL_SCALE(image->pixels[j]);
// Update the weight gradient
gradient->W_grad[i][j] += W_grad;
}
// Update the bias gradient
gradient->b_grad[i] += b_grad;
}
// Cross entropy loss
return 0.0f - log(activations[label]);
}
extern int enzyme_const;
template<typename Return, typename... T>
Return __enzyme_autodiff(T...);
static void calculateDerivatives(mnist_image_t * image, const neural_network_t * network, neural_network_t* gradient, uint8_t label) {
__enzyme_autodiff<void>(neural_network_hypothesis_v2, enzyme_const, image, network, gradient, enzyme_const, label);
}
/**
* Run one step of gradient descent and update the neural network.
*/
float neural_network_training_step(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate)
{
neural_network_t gradient = {0};
neural_network_t gradient2 = {0};
/*
adept::Stack stack;
aneural_network_t anetwork;
for (int i = 0; i < MNIST_LABELS; i++) {
anetwork.b[i] = network->b[i];
for (int j = 0; j < MNIST_IMAGE_SIZE; j++) {
anetwork.W[i][j] = network->W[i][j];
}
}*/
float total_loss;
int i, j;
// Calculate the gradient and the loss by looping through the training set
for (i = 0, total_loss = 0; i < dataset->size; i++) {
mnist_image_t* image = &dataset->images[i];
uint8_t label = dataset->labels[i];
// First forward propagate through the network to calculate activations
//calculateDerivatives(image, network, &gradient, label);
//calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label);
//neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0);
total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient, label);
//total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label);
//float activations[MNIST_LABELS];
//neural_network_hypothesis(image, network, activations);
//total_loss -= log(activations[label]);
}
// Apply gradient descent to the network
for (i = 0; i < MNIST_LABELS; i++) {
//printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]);
network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size);
for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) {
network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size);
}
}
return total_loss;
}
/**
* Run one step of gradient descent and update the neural network.
*/
float neural_network_training_step_enzyme(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate)
{
neural_network_t gradient = {0};
neural_network_t gradient2 = {0};
float total_loss;
int i, j;
// Calculate the gradient and the loss by looping through the training set
for (i = 0, total_loss = 0; i < dataset->size; i++) {
mnist_image_t* image = &dataset->images[i];
uint8_t label = dataset->labels[i];
// First forward propagate through the network to calculate activations
calculateDerivatives(image, network, &gradient, label);
//calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label);
//neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0);
//total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label);
float activations[MNIST_LABELS];
neural_network_hypothesis(image, network, activations);
total_loss -= log(activations[label]);
}
// Apply gradient descent to the network
for (i = 0; i < MNIST_LABELS; i++) {
//printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]);
network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size);
for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) {
network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size);
}
}
return total_loss;
}
/**
* Run one step of gradient descent and update the neural network.
*/
float neural_network_training_step_adept(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate)
{
neural_network_t gradient = {0};
neural_network_t gradient2 = {0};
adept::Stack stack;
aneural_network_t anetwork;
for (int i = 0; i < MNIST_LABELS; i++) {
anetwork.b[i] = network->b[i];
for (int j = 0; j < MNIST_IMAGE_SIZE; j++) {
anetwork.W[i][j] = network->W[i][j];
}
}
float total_loss;
int i, j;
// Calculate the gradient and the loss by looping through the training set
for (i = 0, total_loss = 0; i < dataset->size; i++) {
mnist_image_t* image = &dataset->images[i];
uint8_t label = dataset->labels[i];
// First forward propagate through the network to calculate activations
calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label);
//total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label);
float activations[MNIST_LABELS];
neural_network_hypothesis(image, network, activations);
total_loss -= log(activations[label]);
}
// Apply gradient descent to the network
for (i = 0; i < MNIST_LABELS; i++) {
//printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]);
network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size);
for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) {
network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size);
}
}
return total_loss;
}
/**
* Run one step of gradient descent and update the neural network.
*/
float neural_network_training_step_tapenade(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate)
{
neural_network_t gradient = {0};
neural_network_t gradient2 = {0};
float total_loss;
int i, j;
// Calculate the gradient and the loss by looping through the training set
for (i = 0, total_loss = 0; i < dataset->size; i++) {
mnist_image_t* image = &dataset->images[i];
uint8_t label = dataset->labels[i];
// First forward propagate through the network to calculate activations
neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0);
//total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label);
float activations[MNIST_LABELS];
neural_network_hypothesis(image, network, activations);
total_loss -= log(activations[label]);
}
// Apply gradient descent to the network
for (i = 0; i < MNIST_LABELS; i++) {
//printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]);
network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size);
for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) {
network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size);
}
}
return total_loss;
}
/**
* Downloaded from: http://yann.lecun.com/exdb/mnist/
*/
const char * train_images_file = "data/train-images-idx3-ubyte";
const char * train_labels_file = "data/train-labels-idx1-ubyte";
const char * test_images_file = "data/t10k-images-idx3-ubyte";
const char * test_labels_file = "data/t10k-labels-idx1-ubyte";
/**
* Calculate the accuracy of the predictions of a neural network on a dataset.
*/
float calculate_accuracy(mnist_dataset_t * dataset, neural_network_t * network) {
float activations[MNIST_LABELS], max_activation;
int i, j, correct, predict;
// Loop through the dataset
for (i = 0, correct = 0; i < dataset->size; i++) {
// Calculate the activations for each image using the neural network
neural_network_hypothesis(&dataset->images[i], network, activations);
// Set predict to the index of the greatest activation
for (j = 0, predict = 0, max_activation = activations[0]; j < MNIST_LABELS; j++) {
if (max_activation < activations[j]) {
max_activation = activations[j];
predict = j;
}
}
// Increment the correct count if we predicted the right label
if (predict == dataset->labels[i]) {
correct++;
}
}
// Return the percentage we predicted correctly as the accuracy
return ((float) correct) / ((float) dataset->size);
}
#include <sys/time.h>
#include <stdlib.h>
#include <math.h>
#include <inttypes.h>
#include <string.h>
float tdiff(struct timeval *start, struct timeval *end) {
return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec);
}
void run(float (*fn)(mnist_dataset_t*, neural_network_t*, float)) {
mnist_dataset_t * train_dataset, * test_dataset;
mnist_dataset_t batch;
neural_network_t network;
float loss, accuracy;
int i, batches;
// Read the datasets from the files
train_dataset = mnist_get_dataset(train_images_file, train_labels_file);
test_dataset = mnist_get_dataset(test_images_file, test_labels_file);
// Initialise weights and biases with random values
neural_network_random_weights(&network);
// Calculate how many batches (so we know when to wrap around)
batches = train_dataset->size / BATCH_SIZE;
struct timeval start, end;
gettimeofday(&start, NULL);
for (i = 0; i < STEPS; i++) {
// Initialise a new batch
mnist_batch(train_dataset, &batch, 100, i % batches);
// Run one step of gradient descent and calculate the loss
loss = fn(&batch, &network, 0.5);
// Calculate the accuracy using the whole test dataset
accuracy = calculate_accuracy(test_dataset, &network);
printf("Step %04d\tAverage Loss: %.2f\tAccuracy: %.3f\n", i, loss / batch.size, accuracy);
}
gettimeofday(&end, NULL);
printf("%0.6f\n", tdiff(&start, &end));
// Cleanup
mnist_free_dataset(train_dataset);
mnist_free_dataset(test_dataset);
}
int main(int argc, char *argv[])
{
printf("Regular\n");
run(neural_network_training_step);
printf("Enzyme\n");
run(neural_network_training_step_enzyme);
printf("Adept\n");
run(neural_network_training_step_adept);
printf("Tapenade\n");
run(neural_network_training_step_tapenade);
return 0;
}