Neural Network With Eigen Lib

Here is a code for C like neural network XOR operation example. The only dependency is the Eigen library which is a very light algebra library. 

// 
MakeYourOwnNeuralNetwork.cpp : Defines the entry point for the console application.
//

#ifdef __linux__
#elif _WIN32
#include "stdafx.h"
#else
#endif

#include <iostream>
#include <vector>
#include <random>
#include <cmath>
#include <chrono>
#include <Eigen/Dense>
#include <Eigen/Core>

using namespace Eigen;

/*
Quick diagram of how the layers are organized.

  |Layer zero| Layer One
i1|--------N1|--------N1----- O1
  |\______/  | \______/
  |/      \  | /      \
i2|--------N2|--------N2----- O2
  |          |

Error[0] vector is the error of the last/output layer.
Error[n] is the error of the input layer. Where n is the number of layers in the case above n = 1.

*/

//#define _WEIGHT_DEBUG
//#define _SOMA_DEBUG
//#define _ACTIVATION_DEBUG
//#define _ERROR_DEBUG

void forward_calculation(std::vector<MatrixXd> weights, std::vector<VectorXd> &somas, std::vector<VectorXd> &activations, VectorXd inputs)
{
/*
* Forward feeding through the network.
*/
  for (int i = 0; i < weights.size(); i++) { // For every layer
    if (i == 0) // Calculate the forward summation for the input layer
      somas[i] = (weights[i] * inputs);
    else // Calculate the forward summation for every other layer
      somas[i] = (weights[i] * activations[i - 1]);

  // Calculate the sigmoid function activation of the layer
  activations[i] = 1/(1 + (-somas[i].array()).array().exp());

#ifdef _WEIGHT_DEBUG
  std::cout << "weights : " << i << "\n" << weights[i] << std::endl;
#endif
#ifdef _SOMA_DEBUG
  std::cout << "Soma : " << i << "\n" << somas[i] << std::endl;
#endif
#ifdef _ACTIVATION_DEBUG
  std::cout << "Activations : " << i << "\n" << activations[i] << std::endl;
#endif
  }
}

void backpropagation(std::vector<MatrixXd> weights, std::vector<VectorXd> activations, std::vector<VectorXd> &errors, VectorXd targets)
{
/*
* Calculate Errors
*/
  int err_count = 0;
  for (int i = (weights.size() - 1); i >= 0; i--) { // For every layer   starting from the last one

    if (i == (weights.size() - 1)) { // If last layer calculate the error in base of the targets (only using (target - activation) as it is the only needed calculation for the derivative of gredient decent)
      VectorXd e(activations[i].size());
      e = (targets - activations[i]);
      errors[0] = e;
    }
    else { // Calculate the error of all other layers E = W^t * E.
      VectorXd e(activations[i].size());
      e = weights[i + 1].transpose()*errors[err_count - 1];
      errors[err_count] = e;
    }
#ifdef _ERROR_DEBUG
  std::cout << "errors: " << err_count << "\n" << errors[err_count] << std::endl;
#endif
  err_count++;
  }
}

void gradient_decent(std::vector<MatrixXd> &weights, std::vector<VectorXd> activations, std::vector<VectorXd> errors, VectorXd inputs, VectorXd targets, double learning_rate)
{
  for (int i = weights.size() - 1; i >= 0; i--) { // For every layer in the network, starting from the last/output layer.
    MatrixXd delta;
    Eigen::Index n = activations[i].size();

    if (i != 0) { // Calculate the delta (gradient decent) of every layer but the input layer: D = (El o Ol o (1-Ol))*O^t(l-1) (o represent the Hadamard product or element wise multiplication.)
      delta = -((VectorXd)(errors[(weights.size() - 1)-i].array()*activations[i].array()*(VectorXd::Ones(n) - activations[i]).array()))*activations[i - 1].transpose();
    }
    else { // Calculate the delta (gradient decent) of every layer but the input layer: D = (El o Ol o (1-Ol))*I^t (o represent the Hadamard product or element wise multiplication.)
      delta = -((VectorXd)(errors[(weights.size() - 1)-i].array()*activations[i].array()*(VectorXd::Ones(n) - activations[i]).array()))*inputs.transpose();
    }

    weights[i] = weights[i] - (learning_rate * delta); // Update the weights Wn = W - (learningrate*D)
  }
}

int main()
{
  Eigen::initParallel();

  //std::cout << "Number of threads" << Eigen::nbThreads() << std::endl;
  //getchar();
/*
* Used for random.
*/
  std::default_random_engine generator;
  generator.seed(std::chrono::system_clock::now().time_since_epoch().count());
  std::normal_distribution<double> distribution(0, 1);

/*
* Network description.
*/
  int number_of_inputs = 2;
  int number_of_layers = 2;
  int *number_neurons_in_layer = new int[number_of_layers];

  number_neurons_in_layer[0] = 10;
  number_neurons_in_layer[1] = 2;

/*
* Network and training information.
*/
  std::vector<VectorXd> inputs;
  std::vector<VectorXd> targets;
  std::vector<MatrixXd> weights;
  std::vector<VectorXd> somas;
  std::vector<VectorXd> activations;
  std::vector<VectorXd> errors;
  double totalError = 10;

/*
* Setting training parameters.
*/
  inputs.push_back(Vector2d(0.99, 0.99));
  inputs.push_back(Vector2d(0.99, 0.01));
  inputs.push_back(Vector2d(0.01, 0.99));
  inputs.push_back(Vector2d(0.01, 0.01));

  targets.push_back(Vector2d(0.01, 0.01));
  targets.push_back(Vector2d(0.01, 0.99));
  targets.push_back(Vector2d(0.99, 0.01));
  targets.push_back(Vector2d(0.01, 0.01));

/*
* Initialize network.
*/
  for (int i = 0; i < number_of_layers; i++) {
    MatrixXd t;
    if (i == 0)
      t = MatrixXd(number_neurons_in_layer[i], number_of_inputs);
    else
      t = MatrixXd(number_neurons_in_layer[i], number_neurons_in_layer[i - 1]);

    for (int i = 0; i < t.cols(); i++)
      for (int j = 0; j < t.rows(); j++)
        t(j, i) = distribution(generator);

    weights.push_back(t);

    if (i == 0)
      somas.push_back((-1)*(weights[i] * inputs[0]));
    else
      somas.push_back((-1)*(weights[i] * activations[activations.size() - 1]));

    VectorXd tt = 1 + somas[i].array().exp();
    activations.push_back(tt.array().inverse());
#ifdef _MYDEBUG2
    std::cout << "weights : " << i << "\n" << weights[i] << std::endl;
    std::cout << "Soma : " << i << "\n" << somas[i] << std::endl;
    std::cout << "Activations : " << i << "\n" << activations[i] << std::endl;
#endif
}

/*
* Initialize Errors
*/
    for (int i = weights.size(); i > 0; i--) {
      if (i == (weights.size())) {
        VectorXd e(activations[i - 1].size());
        for (int o = 0; o < activations[i - 1].size(); o++) {
          e(o) = (targets[0](o) - activations[i - 1](o));
        }
      errors.push_back(e);
    }
    else {
      VectorXd e(activations[i - 1].size());
      e = weights[i].transpose()*errors[errors.size() - 1];
      errors.push_back(e);
    }
  }

  FILE* ofile;
  fopen_s(&ofile, "errors.txt", "w+");
  double co = 5;
  double larning_rate = 0.1;
  int batch = 10;
  do {
    totalError = 0;

    for (int i = 0; i < targets.size(); i++) {
      for (int k = 0; k < batch; k++) {
        forward_calculation(weights, somas, activations, inputs[i]);
        backpropagation(weights, activations, errors, targets[i]);
        totalError += fabs(errors[0].sum());
        gradient_decent(weights, activations, errors, inputs[i], targets[i], larning_rate);
      }
    }
    co = totalError;
    std::cout << "Total Error: " << totalError << std::endl;
    fprintf(ofile, "%lf\n", totalError);
  }while(totalError / batch > 0.1);

  fclose(ofile);

  while (true) {
    int selected;
    std::cin >> selected;
    if (selected >= 0 && selected <= 3) {
      forward_calculation(weights, somas, activations, inputs[selected]);
      std::cout << "Selected: " << selected << std::endl;
      for (int i = 0; i < inputs[selected].size(); i++)
        std::cout << "In: " << inputs[selected](i) << " ; Output: " << activations[activations.size() - 1](i) << std::endl;
    }
    else break;
  }

  return 0;
}