| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdint.h> |
| #include <math.h> |
| #include <stdlib.h> |
| |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <stdint.h> |
| |
| #define DADEPT_FLOATING_POINT_TYPE float |
| #include <adept_source.h> |
| #include <adept_arrays.h> |
| using adept::adouble; |
| using adept::aMatrix; |
| using adept::aVector; |
| |
| using adept::Vector; |
| using adept::adouble; |
| using adept::aReal; |
| |
| //extern "C" { |
| // from https://github.com/AndrewCarterUK/mnist-neural-network-plain-c |
| |
| #define MNIST_LABEL_MAGIC 0x00000801 |
| #define MNIST_IMAGE_MAGIC 0x00000803 |
| #define MNIST_IMAGE_WIDTH 28 |
| #define MNIST_IMAGE_HEIGHT 28 |
| #define MNIST_IMAGE_SIZE MNIST_IMAGE_WIDTH * MNIST_IMAGE_HEIGHT |
| #define MNIST_LABELS 10 |
| |
| typedef struct mnist_label_file_header_t_ { |
| uint32_t magic_number; |
| uint32_t number_of_labels; |
| } __attribute__((packed)) mnist_label_file_header_t; |
| |
| typedef struct mnist_image_file_header_t_ { |
| uint32_t magic_number; |
| uint32_t number_of_images; |
| uint32_t number_of_rows; |
| uint32_t number_of_columns; |
| } __attribute__((packed)) mnist_image_file_header_t; |
| |
| typedef struct mnist_image_t_ { |
| uint8_t pixels[MNIST_IMAGE_SIZE]; |
| } __attribute__((packed)) mnist_image_t; |
| |
| typedef struct mnist_dataset_t_ { |
| mnist_image_t * images; |
| uint8_t * labels; |
| uint32_t size; |
| } mnist_dataset_t; |
| |
| typedef struct neural_network_t_ { |
| float b[MNIST_LABELS]; |
| float W[MNIST_LABELS][MNIST_IMAGE_SIZE]; |
| } neural_network_t; |
| |
| typedef struct aneural_network_t_ { |
| adept::FixedArray<float,true,MNIST_LABELS> b; |
| adept::FixedArray<float,true,MNIST_LABELS,MNIST_IMAGE_SIZE> W; |
| } aneural_network_t; |
| |
| typedef struct neural_network_gradient_t_ { |
| float b_grad[MNIST_LABELS]; |
| float W_grad[MNIST_LABELS][MNIST_IMAGE_SIZE]; |
| } neural_network_gradient_t; |
| |
| /** |
| * Convert from the big endian format in the dataset if we're on a little endian |
| * machine. |
| */ |
| uint32_t map_uint32(uint32_t in) |
| { |
| #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ |
| return ( |
| ((in & 0xFF000000) >> 24) | |
| ((in & 0x00FF0000) >> 8) | |
| ((in & 0x0000FF00) << 8) | |
| ((in & 0x000000FF) << 24) |
| ); |
| #else |
| return in; |
| #endif |
| } |
| |
| /** |
| * Read labels from file. |
| * |
| * File format: http://yann.lecun.com/exdb/mnist/ |
| */ |
| uint8_t * get_labels(const char * path, uint32_t * number_of_labels) |
| { |
| FILE * stream; |
| mnist_label_file_header_t header; |
| uint8_t * labels; |
| |
| stream = fopen(path, "rb"); |
| |
| if (NULL == stream) { |
| fprintf(stderr, "Could not open file: %s\n", path); |
| return NULL; |
| } |
| |
| if (1 != fread(&header, sizeof(mnist_label_file_header_t), 1, stream)) { |
| fprintf(stderr, "Could not read label file header from: %s\n", path); |
| fclose(stream); |
| return NULL; |
| } |
| |
| header.magic_number = map_uint32(header.magic_number); |
| header.number_of_labels = map_uint32(header.number_of_labels); |
| |
| if (MNIST_LABEL_MAGIC != header.magic_number) { |
| fprintf(stderr, "Invalid header read from label file: %s (%08X not %08X)\n", path, header.magic_number, MNIST_LABEL_MAGIC); |
| fclose(stream); |
| return NULL; |
| } |
| |
| *number_of_labels = header.number_of_labels; |
| |
| labels = (uint8_t*)malloc(*number_of_labels * sizeof(uint8_t)); |
| |
| if (labels == NULL) { |
| fprintf(stderr, "Could not allocated memory for %d labels\n", *number_of_labels); |
| fclose(stream); |
| return NULL; |
| } |
| |
| if (*number_of_labels != fread(labels, 1, *number_of_labels, stream)) { |
| fprintf(stderr, "Could not read %d labels from: %s\n", *number_of_labels, path); |
| free(labels); |
| fclose(stream); |
| return NULL; |
| } |
| |
| fclose(stream); |
| |
| return labels; |
| } |
| |
| /** |
| * Read images from file. |
| * |
| * File format: http://yann.lecun.com/exdb/mnist/ |
| */ |
| mnist_image_t * get_images(const char * path, uint32_t * number_of_images) |
| { |
| FILE * stream; |
| mnist_image_file_header_t header; |
| mnist_image_t * images; |
| |
| stream = fopen(path, "rb"); |
| |
| if (NULL == stream) { |
| fprintf(stderr, "Could not open file: %s\n", path); |
| return NULL; |
| } |
| |
| if (1 != fread(&header, sizeof(mnist_image_file_header_t), 1, stream)) { |
| fprintf(stderr, "Could not read image file header from: %s\n", path); |
| fclose(stream); |
| return NULL; |
| } |
| |
| header.magic_number = map_uint32(header.magic_number); |
| header.number_of_images = map_uint32(header.number_of_images); |
| header.number_of_rows = map_uint32(header.number_of_rows); |
| header.number_of_columns = map_uint32(header.number_of_columns); |
| |
| if (MNIST_IMAGE_MAGIC != header.magic_number) { |
| fprintf(stderr, "Invalid header read from image file: %s (%08X not %08X)\n", path, header.magic_number, MNIST_IMAGE_MAGIC); |
| fclose(stream); |
| return NULL; |
| } |
| |
| if (MNIST_IMAGE_WIDTH != header.number_of_rows) { |
| fprintf(stderr, "Invalid number of image rows in image file %s (%d not %d)\n", path, header.number_of_rows, MNIST_IMAGE_WIDTH); |
| } |
| |
| if (MNIST_IMAGE_HEIGHT != header.number_of_columns) { |
| fprintf(stderr, "Invalid number of image columns in image file %s (%d not %d)\n", path, header.number_of_columns, MNIST_IMAGE_HEIGHT); |
| } |
| |
| *number_of_images = header.number_of_images; |
| images = (mnist_image_t*)malloc(*number_of_images * sizeof(mnist_image_t)); |
| |
| if (images == NULL) { |
| fprintf(stderr, "Could not allocated memory for %d images\n", *number_of_images); |
| fclose(stream); |
| return NULL; |
| } |
| |
| if (*number_of_images != fread(images, sizeof(mnist_image_t), *number_of_images, stream)) { |
| fprintf(stderr, "Could not read %d images from: %s\n", *number_of_images, path); |
| free(images); |
| fclose(stream); |
| return NULL; |
| } |
| |
| fclose(stream); |
| |
| return images; |
| } |
| |
| /** |
| * Free all the memory allocated in a dataset. This should not be used on a |
| * batched dataset as the memory is allocated to the parent. |
| */ |
| void mnist_free_dataset(mnist_dataset_t * dataset) |
| { |
| free(dataset->images); |
| free(dataset->labels); |
| free(dataset); |
| } |
| |
| mnist_dataset_t * mnist_get_dataset(const char * image_path, const char * label_path) |
| { |
| mnist_dataset_t * dataset; |
| uint32_t number_of_images, number_of_labels; |
| |
| dataset = (mnist_dataset_t*)calloc(1, sizeof(mnist_dataset_t)); |
| |
| if (NULL == dataset) { |
| return NULL; |
| } |
| |
| dataset->images = get_images(image_path, &number_of_images); |
| |
| if (NULL == dataset->images) { |
| mnist_free_dataset(dataset); |
| return NULL; |
| } |
| |
| dataset->labels = get_labels(label_path, &number_of_labels); |
| |
| if (NULL == dataset->labels) { |
| mnist_free_dataset(dataset); |
| return NULL; |
| } |
| |
| if (number_of_images != number_of_labels) { |
| fprintf(stderr, "Number of images does not match number of labels (%d != %d)\n", number_of_images, number_of_labels); |
| mnist_free_dataset(dataset); |
| return NULL; |
| } |
| |
| dataset->size = number_of_images; |
| |
| return dataset; |
| } |
| |
| /** |
| * Fills the batch dataset with a subset of the parent dataset. |
| */ |
| int mnist_batch(mnist_dataset_t * dataset, mnist_dataset_t * batch, int size, int number) |
| { |
| int start_offset; |
| |
| start_offset = size * number; |
| |
| if (start_offset >= dataset->size) { |
| return 0; |
| } |
| |
| batch->images = &dataset->images[start_offset]; |
| batch->labels = &dataset->labels[start_offset]; |
| batch->size = size; |
| |
| if (start_offset + batch->size > dataset->size) { |
| batch->size = dataset->size - start_offset; |
| } |
| |
| return 1; |
| } |
| |
| |
| |
| #define STEPS 1000 |
| #define BATCH_SIZE 100 |
| |
| #include <stdlib.h> |
| #include <string.h> |
| #include <math.h> |
| |
| |
| // Convert a pixel value from 0-255 to one from 0 to 1 |
| #define PIXEL_SCALE(x) (((float) (x)) / 255.0f) |
| |
| // Returns a random value between 0 and 1 |
| #define RAND_FLOAT() (((float) rand()) / ((float) RAND_MAX)) |
| |
| /** |
| * Initialise the weights and bias vectors with values between 0 and 1 |
| */ |
| void neural_network_random_weights(neural_network_t * network) |
| { |
| int i, j; |
| |
| for (i = 0; i < MNIST_LABELS; i++) { |
| network->b[i] = RAND_FLOAT(); |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| network->W[i][j] = RAND_FLOAT(); |
| } |
| } |
| } |
| |
| /** |
| * Calculate the softmax vector from the activations. This uses a more |
| * numerically stable algorithm that normalises the activations to prevent |
| * large exponents. |
| */ |
| void neural_network_softmax(float * activations, int length) |
| { |
| int i; |
| float sum, max; |
| |
| for (i = 1, max = activations[0]; i < length; i++) { |
| if (activations[i] > max) { |
| max = activations[i]; |
| } |
| } |
| |
| for (i = 0, sum = 0; i < length; i++) { |
| activations[i] = exp(activations[i] - max); |
| sum += activations[i]; |
| } |
| |
| for (i = 0; i < length; i++) { |
| activations[i] /= sum; |
| } |
| } |
| |
| /** |
| * Calculate the softmax vector from the activations. This uses a more |
| * numerically stable algorithm that normalises the activations to prevent |
| * large exponents. |
| */ |
| void aneural_network_softmax(adept::FixedArray<float,true,MNIST_LABELS> &activations, int length) |
| { |
| int i; |
| aReal sum, max; |
| |
| for (i = 1, max = activations[0]; i < length; i++) { |
| if (activations[i] > max) { |
| max = activations[i]; |
| } |
| } |
| |
| for (i = 0, sum = 0; i < length; i++) { |
| activations[i] = exp(activations[i] - max); |
| sum += activations[i]; |
| } |
| |
| activations /= sum; |
| for (i = 0; i < length; i++) { |
| activations[i] /= sum; |
| } |
| } |
| static double maxval(const float *activations, int length) { |
| float max = activations[0]; |
| |
| for (int i = 1; i < length; i++) { |
| if (activations[i] > max) { |
| max = activations[i]; |
| } |
| } |
| return max; |
| } |
| |
| static double sumval(const float *activations, int length) { |
| float sum = 0; |
| |
| for (int i = 0; i < length; i++) { |
| sum += activations[i]; |
| } |
| return sum; |
| } |
| |
| |
| static void makeexps(float* exps, const float* activations, int length, double max) { |
| for (int i = 0; i < length; i++) { |
| exps[i] = exp(activations[i] - max); |
| } |
| |
| } |
| /** |
| * Calculate the softmax vector from the activations. This uses a more |
| * numerically stable algorithm that normalises the activations to prevent |
| * large exponents. |
| */ |
| static void neural_network_softmax_v2(const float * activations, float* outp, int length) |
| { |
| int i; |
| float sum, max; |
| |
| for (i = 1, max = activations[0]; i < length; i++) { |
| if (activations[i] > max) { |
| max = activations[i]; |
| } |
| } |
| |
| for (i = 0, sum = 0; i < length; i++) { |
| sum += exp(activations[i] - max); |
| } |
| |
| for (i = 0; i < length; i++) { |
| outp[i] = exp(activations[i] - max) / sum; |
| } |
| #if 0 |
| float max = maxval(activations, length); |
| float exps[length]; |
| makeexps(exps,activations, length, max); |
| float sum = sumval(exps,length); |
| /* |
| for (int i = 0; i < length; i++) { |
| double tmp = exps[i];//exp(activations[i] - max); |
| sum += tmp; |
| } |
| */ |
| |
| for (int i = 0; i < length; i++) { |
| outp[i] = exps[i] / sum; |
| } |
| #endif |
| } |
| /** |
| * Use the weights and bias vector to forward propogate through the neural |
| * network and calculate the activations. |
| */ |
| void neural_network_hypothesis(const mnist_image_t * image, const neural_network_t * network, float activations[MNIST_LABELS]) |
| { |
| int i, j; |
| |
| for (i = 0; i < MNIST_LABELS; i++) { |
| activations[i] = network->b[i]; |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| activations[i] += network->W[i][j] * PIXEL_SCALE(image->pixels[j]); |
| } |
| } |
| |
| neural_network_softmax(activations, MNIST_LABELS); |
| } |
| |
| /** |
| * Use the weights and bias vector to forward propogate through the neural |
| * network and calculate the activations. |
| */ |
| static float neural_network_hypothesis_v2(const mnist_image_t * image, const neural_network_t * network, uint8_t label) |
| { |
| float activations[MNIST_LABELS] = {0}; |
| int i, j; |
| |
| for (i = 0; i < MNIST_LABELS; i++) { |
| activations[i] = network->b[i]; |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| activations[i] += network->W[i][j] * PIXEL_SCALE(image->pixels[j]); |
| } |
| } |
| |
| float activations2[MNIST_LABELS] = { 0 }; |
| neural_network_softmax_v2(activations, activations2, MNIST_LABELS); |
| return -log(activations2[label]); |
| } |
| |
| |
| static aReal neural_network_hypothesis_adept(const mnist_image_t * image, const aneural_network_t * network, uint8_t label) |
| { |
| adept::FixedArray<float,true,MNIST_LABELS> activations = network->b; |
| int i, j; |
| |
| for (i = 0; i < MNIST_LABELS; i++) { |
| for (j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| activations(i) += network->W(i,j) * PIXEL_SCALE(image->pixels[j]); |
| } |
| } |
| |
| aneural_network_softmax(activations, MNIST_LABELS); |
| return -log(activations[label]); |
| } |
| |
| |
| static void calculateDerivatives_adept(mnist_image_t * image, bool run, adept::Stack& stack, const aneural_network_t * anetwork, neural_network_t* gradient, uint8_t label) { |
| |
| if (!run) { |
| stack.new_recording(); |
| // run = true; |
| } else |
| stack.continue_recording(); |
| auto resa = neural_network_hypothesis_adept(image, anetwork, label); |
| resa.set_gradient(1.0); |
| stack.reverse(); |
| stack.pause_recording(); |
| |
| for (int i = 0; i < MNIST_LABELS; i++) { |
| gradient->b[i] = anetwork->b(i).get_gradient(); |
| for (int j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| gradient->W[i][j] = anetwork->W(i,j).get_gradient(); |
| } |
| } |
| } |
| |
| extern "C" { |
| #include <adBuffer.h> |
| } |
| |
| void neural_network_softmax_b(float *activations, float *activationsb, int |
| length) { |
| float sum, max; |
| float sumb, maxb; |
| int branch; |
| max = activations[0]; |
| for (int i = 1; i < length; ++i) |
| if (activations[i] > max) { |
| max = activations[i]; |
| pushControl1b(1); |
| } else |
| pushControl1b(0); |
| sum = 0; |
| for (int i = 0; i < length; ++i) { |
| pushReal4(activations[i]); |
| activations[i] = (float)exp(activations[i] - max); |
| sum = sum + activations[i]; |
| } |
| for (int i = 0; i < length; ++i) { |
| pushReal4(activations[i]); |
| activations[i] = activations[i]/sum; |
| } |
| sumb = 0.0; |
| for (int i = length-1; i > -1; --i) { |
| popReal4(&(activations[i])); |
| sumb = sumb - activations[i]*activationsb[i]/(sum*sum); |
| activationsb[i] = activationsb[i]/sum; |
| } |
| { |
| float tempb; |
| maxb = 0.0; |
| for (int i = length-1; i > -1; --i) { |
| activationsb[i] = activationsb[i] + sumb; |
| popReal4(&(activations[i])); |
| tempb = exp(activations[i]-max)*activationsb[i]; |
| maxb = maxb - tempb; |
| activationsb[i] = tempb; |
| } |
| } |
| for (int i = length-1; i > 0; --i) { |
| popControl1b(&branch); |
| if (branch != 0) { |
| activationsb[i] = activationsb[i] + maxb; |
| maxb = 0.0; |
| } |
| } |
| activationsb[0] = activationsb[0] + maxb; |
| } |
| |
| /** |
| * Calculate the softmax vector from the activations. This uses a more |
| * numerically stable algorithm that normalises the activations to prevent |
| * large exponents. |
| */ |
| // Convert a pixel value from 0-255 to one from 0 to 1 |
| // Returns a random value between 0 and 1 |
| void neural_network_softmax_c(float *activations, int length) { |
| float sum, max; |
| max = activations[0]; |
| int i; |
| for (i = 1; i < length; ++i) |
| if (activations[i] > max) |
| max = activations[i]; |
| sum = 0; |
| for (i = 0; i < length; ++i) { |
| activations[i] = (float)exp(activations[i] - max); |
| sum += activations[i]; |
| } |
| for (i = 0; i < length; ++i) |
| activations[i] /= sum; |
| } |
| |
| /* |
| Differentiation of neural_network_hypothesis_tapenadesource in reverse (adjoint) mode: |
| gradient of useful results: neural_network_hypothesis_tapenadesource |
| *network.b[0:10-1] *network.W[0:10-1][0:28*28-1] |
| with respect to varying inputs: *network.b[0:10-1] *network.W[0:10-1][0:28*28-1] |
| RW status of diff variables: neural_network_hypothesis_tapenadesource:in-killed |
| *network.b[0:10-1]:incr *network.W[0:10-1][0:28*28-1]:incr |
| Plus diff mem management of: network:in *network.b:in *network.W:in |
| *network.W[0:10-1]:in |
| */ |
| static void neural_network_hypothesis_tapenadesource_b(const mnist_image_t * |
| image, const neural_network_t *network, neural_network_t *networkb, |
| uint8_t label, float neural_network_hypothesis_tapenadesourceb) { |
| float activations[10]; |
| float activationsb[10]; |
| int ii1; |
| float neural_network_hypothesis_tapenadesource; |
| for (int i = 0; i < 10; ++i) { |
| activations[i] = network->b[i]; |
| for (int j = 0; j < 784; ++j) |
| activations[i] = activations[i] + network->W[i][j]*((float)image-> |
| pixels[j]/255.0f); |
| } |
| pushReal4Array(activations, 10); |
| neural_network_softmax_c(activations, 10); |
| for (ii1 = 0; ii1 < 10; ++ii1) |
| activationsb[ii1] = 0.0; |
| activationsb[(int)label] = activationsb[(int)label] - |
| neural_network_hypothesis_tapenadesourceb/activations[(int)label]; |
| popReal4Array(activations, 10); |
| neural_network_softmax_b(activations, activationsb, 10); |
| for (int i = 9; i > -1; --i) { |
| for (int j = 783; j > -1; --j) |
| networkb->W[i][j] = networkb->W[i][j] + (float)image->pixels[j]* |
| activationsb[i]/255.0f; |
| networkb->b[i] = networkb->b[i] + activationsb[i]; |
| activationsb[i] = 0.0; |
| } |
| } |
| |
| /** |
| * Update the gradients for this step of gradient descent using the gradient |
| * contributions from a single training example (image). |
| * |
| * This function returns the loss ontribution from this training example. |
| */ |
| float neural_network_gradient_update(mnist_image_t * image, const neural_network_t * network, neural_network_gradient_t * gradient, uint8_t label) |
| { |
| float activations[MNIST_LABELS]; |
| float b_grad, W_grad; |
| int i, j; |
| |
| // First forward propagate through the network to calculate activations |
| neural_network_hypothesis(image, network, activations); |
| |
| for (i = 0; i < MNIST_LABELS; i++) { |
| // This is the gradient for a softmax bias input |
| b_grad = (i == label) ? activations[i] - 1 : activations[i]; |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| // The gradient for the neuron weight is the bias multiplied by the input weight |
| W_grad = b_grad * PIXEL_SCALE(image->pixels[j]); |
| |
| // Update the weight gradient |
| gradient->W_grad[i][j] += W_grad; |
| } |
| |
| // Update the bias gradient |
| gradient->b_grad[i] += b_grad; |
| } |
| |
| // Cross entropy loss |
| return 0.0f - log(activations[label]); |
| } |
| |
| |
| extern int enzyme_const; |
| template<typename Return, typename... T> |
| Return __enzyme_autodiff(T...); |
| |
| static void calculateDerivatives(mnist_image_t * image, const neural_network_t * network, neural_network_t* gradient, uint8_t label) { |
| __enzyme_autodiff<void>(neural_network_hypothesis_v2, enzyme_const, image, network, gradient, enzyme_const, label); |
| } |
| |
| /** |
| * Run one step of gradient descent and update the neural network. |
| */ |
| float neural_network_training_step(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate) |
| { |
| neural_network_t gradient = {0}; |
| neural_network_t gradient2 = {0}; |
| |
| /* |
| adept::Stack stack; |
| aneural_network_t anetwork; |
| |
| for (int i = 0; i < MNIST_LABELS; i++) { |
| anetwork.b[i] = network->b[i]; |
| for (int j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| anetwork.W[i][j] = network->W[i][j]; |
| } |
| }*/ |
| |
| float total_loss; |
| int i, j; |
| |
| // Calculate the gradient and the loss by looping through the training set |
| for (i = 0, total_loss = 0; i < dataset->size; i++) { |
| mnist_image_t* image = &dataset->images[i]; |
| uint8_t label = dataset->labels[i]; |
| |
| // First forward propagate through the network to calculate activations |
| |
| //calculateDerivatives(image, network, &gradient, label); |
| //calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label); |
| //neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0); |
| |
| total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient, label); |
| |
| //total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label); |
| |
| //float activations[MNIST_LABELS]; |
| //neural_network_hypothesis(image, network, activations); |
| //total_loss -= log(activations[label]); |
| |
| } |
| |
| // Apply gradient descent to the network |
| for (i = 0; i < MNIST_LABELS; i++) { |
| //printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]); |
| network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size); |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) { |
| network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size); |
| } |
| } |
| |
| return total_loss; |
| } |
| |
| /** |
| * Run one step of gradient descent and update the neural network. |
| */ |
| float neural_network_training_step_enzyme(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate) |
| { |
| neural_network_t gradient = {0}; |
| neural_network_t gradient2 = {0}; |
| |
| float total_loss; |
| int i, j; |
| |
| // Calculate the gradient and the loss by looping through the training set |
| for (i = 0, total_loss = 0; i < dataset->size; i++) { |
| mnist_image_t* image = &dataset->images[i]; |
| uint8_t label = dataset->labels[i]; |
| |
| // First forward propagate through the network to calculate activations |
| |
| calculateDerivatives(image, network, &gradient, label); |
| //calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label); |
| //neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0); |
| |
| //total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label); |
| |
| float activations[MNIST_LABELS]; |
| neural_network_hypothesis(image, network, activations); |
| total_loss -= log(activations[label]); |
| |
| } |
| |
| // Apply gradient descent to the network |
| for (i = 0; i < MNIST_LABELS; i++) { |
| //printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]); |
| network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size); |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) { |
| network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size); |
| } |
| } |
| |
| return total_loss; |
| } |
| |
| /** |
| * Run one step of gradient descent and update the neural network. |
| */ |
| float neural_network_training_step_adept(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate) |
| { |
| neural_network_t gradient = {0}; |
| neural_network_t gradient2 = {0}; |
| |
| adept::Stack stack; |
| aneural_network_t anetwork; |
| |
| for (int i = 0; i < MNIST_LABELS; i++) { |
| anetwork.b[i] = network->b[i]; |
| for (int j = 0; j < MNIST_IMAGE_SIZE; j++) { |
| anetwork.W[i][j] = network->W[i][j]; |
| } |
| } |
| |
| float total_loss; |
| int i, j; |
| |
| // Calculate the gradient and the loss by looping through the training set |
| for (i = 0, total_loss = 0; i < dataset->size; i++) { |
| mnist_image_t* image = &dataset->images[i]; |
| uint8_t label = dataset->labels[i]; |
| |
| // First forward propagate through the network to calculate activations |
| |
| calculateDerivatives_adept(image, i != 0, stack, &anetwork, &gradient, label); |
| |
| //total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label); |
| |
| float activations[MNIST_LABELS]; |
| neural_network_hypothesis(image, network, activations); |
| total_loss -= log(activations[label]); |
| |
| } |
| |
| // Apply gradient descent to the network |
| for (i = 0; i < MNIST_LABELS; i++) { |
| //printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]); |
| network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size); |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) { |
| network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size); |
| } |
| } |
| |
| return total_loss; |
| } |
| |
| /** |
| * Run one step of gradient descent and update the neural network. |
| */ |
| float neural_network_training_step_tapenade(mnist_dataset_t * dataset, neural_network_t * network, float learning_rate) |
| { |
| neural_network_t gradient = {0}; |
| neural_network_t gradient2 = {0}; |
| |
| float total_loss; |
| int i, j; |
| |
| // Calculate the gradient and the loss by looping through the training set |
| for (i = 0, total_loss = 0; i < dataset->size; i++) { |
| mnist_image_t* image = &dataset->images[i]; |
| uint8_t label = dataset->labels[i]; |
| |
| // First forward propagate through the network to calculate activations |
| |
| neural_network_hypothesis_tapenadesource_b(image, network, &gradient, label, 1.0); |
| //total_loss +=neural_network_gradient_update(image, network, (neural_network_gradient_t*)&gradient2, label); |
| |
| float activations[MNIST_LABELS]; |
| neural_network_hypothesis(image, network, activations); |
| total_loss -= log(activations[label]); |
| } |
| |
| // Apply gradient descent to the network |
| for (i = 0; i < MNIST_LABELS; i++) { |
| //printf("b'[i] %f %f\n", gradient.b[i], gradient2.b[i]); |
| network->b[i] -= learning_rate * gradient.b[i] / ((float) dataset->size); |
| |
| for (j = 0; j < MNIST_IMAGE_SIZE + 1; j++) { |
| network->W[i][j] -= learning_rate * gradient.W[i][j] / ((float) dataset->size); |
| } |
| } |
| |
| return total_loss; |
| } |
| |
| /** |
| * Downloaded from: http://yann.lecun.com/exdb/mnist/ |
| */ |
| const char * train_images_file = "data/train-images-idx3-ubyte"; |
| const char * train_labels_file = "data/train-labels-idx1-ubyte"; |
| const char * test_images_file = "data/t10k-images-idx3-ubyte"; |
| const char * test_labels_file = "data/t10k-labels-idx1-ubyte"; |
| |
| /** |
| * Calculate the accuracy of the predictions of a neural network on a dataset. |
| */ |
| float calculate_accuracy(mnist_dataset_t * dataset, neural_network_t * network) { |
| float activations[MNIST_LABELS], max_activation; |
| int i, j, correct, predict; |
| |
| // Loop through the dataset |
| for (i = 0, correct = 0; i < dataset->size; i++) { |
| // Calculate the activations for each image using the neural network |
| neural_network_hypothesis(&dataset->images[i], network, activations); |
| |
| // Set predict to the index of the greatest activation |
| for (j = 0, predict = 0, max_activation = activations[0]; j < MNIST_LABELS; j++) { |
| if (max_activation < activations[j]) { |
| max_activation = activations[j]; |
| predict = j; |
| } |
| } |
| |
| // Increment the correct count if we predicted the right label |
| if (predict == dataset->labels[i]) { |
| correct++; |
| } |
| } |
| |
| // Return the percentage we predicted correctly as the accuracy |
| return ((float) correct) / ((float) dataset->size); |
| } |
| |
| #include <sys/time.h> |
| #include <stdlib.h> |
| #include <math.h> |
| #include <inttypes.h> |
| #include <string.h> |
| |
| float tdiff(struct timeval *start, struct timeval *end) { |
| return (end->tv_sec-start->tv_sec) + 1e-6*(end->tv_usec-start->tv_usec); |
| } |
| |
| void run(float (*fn)(mnist_dataset_t*, neural_network_t*, float)) { |
| mnist_dataset_t * train_dataset, * test_dataset; |
| mnist_dataset_t batch; |
| neural_network_t network; |
| float loss, accuracy; |
| int i, batches; |
| |
| // Read the datasets from the files |
| train_dataset = mnist_get_dataset(train_images_file, train_labels_file); |
| test_dataset = mnist_get_dataset(test_images_file, test_labels_file); |
| |
| // Initialise weights and biases with random values |
| neural_network_random_weights(&network); |
| |
| // Calculate how many batches (so we know when to wrap around) |
| batches = train_dataset->size / BATCH_SIZE; |
| |
| struct timeval start, end; |
| gettimeofday(&start, NULL); |
| |
| for (i = 0; i < STEPS; i++) { |
| // Initialise a new batch |
| mnist_batch(train_dataset, &batch, 100, i % batches); |
| |
| // Run one step of gradient descent and calculate the loss |
| loss = fn(&batch, &network, 0.5); |
| |
| // Calculate the accuracy using the whole test dataset |
| accuracy = calculate_accuracy(test_dataset, &network); |
| |
| printf("Step %04d\tAverage Loss: %.2f\tAccuracy: %.3f\n", i, loss / batch.size, accuracy); |
| } |
| |
| gettimeofday(&end, NULL); |
| printf("%0.6f\n", tdiff(&start, &end)); |
| |
| // Cleanup |
| mnist_free_dataset(train_dataset); |
| mnist_free_dataset(test_dataset); |
| |
| } |
| |
| int main(int argc, char *argv[]) |
| { |
| printf("Regular\n"); |
| run(neural_network_training_step); |
| printf("Enzyme\n"); |
| run(neural_network_training_step_enzyme); |
| printf("Adept\n"); |
| run(neural_network_training_step_adept); |
| printf("Tapenade\n"); |
| run(neural_network_training_step_tapenade); |
| return 0; |
| } |