SNABSuite  0.x
Spiking Neural Architecture Benchmark Suite
mnist_mlp.hpp
Go to the documentation of this file.
1 /*
2  * SNABSuite -- Spiking Neural Architecture Benchmark Suite
3  * Copyright (C) 2019 Christoph Ostrau
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  */
18 
19 #pragma once
20 #include <algorithm>
21 #include <cmath>
22 #include <cypress/cypress.hpp>
23 
24 #include "helper_functions.hpp"
25 namespace MNIST {
26 using cypress::Json;
27 using cypress::Matrix;
28 using cypress::Real;
29 
34 class MSE {
35 public:
36  static inline Real calc_loss(const uint16_t label,
37  const std::vector<Real> &output)
38  {
39  Real res = 0.0;
40  for (size_t neuron = 0; neuron < output.size(); neuron++) {
41  if (label == neuron) {
42  res += (output[neuron] - 1.0) * (output[neuron] - 1.0);
43  }
44  else {
45  res += output[neuron] * output[neuron];
46  }
47  }
48  res = sqrt(res / Real(output.size()));
49  return res;
50  }
51  static inline std::vector<Real> calc_error(const uint16_t label,
52  const std::vector<Real> &output)
53  {
54  std::vector<Real> res(output.size(), 0.0);
55  for (size_t neuron = 0; neuron < output.size(); neuron++) {
56  if (label == neuron) {
57  res[neuron] = output[neuron] - 1.0;
58  }
59  else {
60  res[neuron] = output[neuron] - 0.0;
61  }
62  }
63  return res;
64  }
65 };
66 
71 class CatHinge {
72 public:
73  static inline Real calc_loss(const uint16_t label,
74  const std::vector<Real> &output)
75  {
76  Real res = 0.0;
77  for (size_t neuron = 0; neuron < output.size(); neuron++) {
78  if (label == neuron) {
79  res += std::max(0.0, 1.0 - Real(output[neuron]));
80  }
81  else {
82  res += std::max(0.0, 1.0 + Real(output[neuron]));
83  }
84  }
85  return res;
86  }
87  static inline std::vector<Real> calc_error(const uint16_t label,
88  const std::vector<Real> &output)
89  {
90  // Real pos = output[label];
91  std::vector<Real> vec = output;
92  vec[label] = -0.0;
93  auto neg_elem = std::max_element(vec.begin(), vec.end());
94  auto index = std::distance(vec.begin(), neg_elem);
95  auto res = std::vector<Real>(output.size(), 0.0);
96 
97  // Require that label neuron and the next most active neuron have at
98  // least a difference of 1
99  if ((*neg_elem) - output[label] + 1 >= 0.0) {
100  res[label] = -1.0;
101  if (label != index) {
102  res[index] = +1;
103  }
104  }
105  return res;
106  }
107 };
108 
113 class ReLU {
114 public:
115  static inline std::vector<Real> function(std::vector<Real> input)
116  {
117  for (auto &i : input) {
118  i = std::max(0.0, i);
119  }
120  return input;
121  }
122  static inline std::vector<Real> derivative(std::vector<Real> input)
123  {
124  for (auto &i : input) {
125  i = i >= 0 ? 1.0 : 0.0;
126  }
127  return input;
128  }
129 };
130 
136 public:
137  static inline void constrain_weights(std::vector<cypress::Matrix<Real>> &)
138  {
139  }
140 
141  void setup(std::vector<cypress::Matrix<Real>> &) {}
142 };
143 
149 public:
150  void setup(std::vector<cypress::Matrix<Real>> &) {}
151  inline void constrain_weights(std::vector<cypress::Matrix<Real>> &layers)
152  {
153  for (auto &i : layers) {
154  for (auto &j : i) {
155  if (j < 0.0) {
156  j = 0.0;
157  }
158  }
159  }
160  }
161 };
162 
164 public:
165  Real m_max = 0.0;
166 
167  void setup(std::vector<cypress::Matrix<Real>> &layers)
168  {
169  for (auto &layer : layers) {
170  auto w = mnist_helper::max_weight(layer);
171  if (w > m_max)
172  m_max = w;
173  }
174  }
175 
176  inline void constrain_weights(std::vector<cypress::Matrix<Real>> &layers)
177  {
178  for (auto &i : layers) {
179  for (auto &j : i) {
180  if (j < 0.0) {
181  j = 0.0;
182  }
183  if (j > m_max) {
184  j = m_max;
185  }
186  }
187  }
188  }
189 };
190 
196 class MLPBase {
197 public:
198  virtual Real max_weight() const = 0;
199  virtual Real min_weight() const = 0;
200  virtual Real max_weight_abs() const = 0;
201  virtual Real conv_max_weight(size_t layer_id = 0) const = 0;
202  virtual const size_t &epochs() const = 0;
203  virtual const size_t &batchsize() const = 0;
204  virtual const Real &learnrate() const = 0;
205  virtual const mnist_helper::MNIST_DATA &mnist_train_set() = 0;
206  virtual const mnist_helper::MNIST_DATA &mnist_test_set() = 0;
207  virtual const std::vector<cypress::Matrix<Real>> &get_weights() = 0;
208  virtual const std::vector<mnist_helper::CONVOLUTION_LAYER> &get_conv_layers() = 0;
209  virtual const std::vector<mnist_helper::POOLING_LAYER> &get_pooling_layers() = 0;
210  virtual const std::vector<size_t> &get_layer_sizes() = 0;
211  virtual const std::vector<mnist_helper::LAYER_TYPE> &get_layer_types() = 0;
212  virtual void scale_down_images(size_t pooling_size = 3) = 0;
213  virtual inline bool correct(const uint16_t label,
214  const std::vector<Real> &output) const = 0;
215  virtual std::vector<std::vector<std::vector<Real>>> forward_path(
216  const std::vector<size_t> &indices, const size_t start) const = 0;
217  virtual Real forward_path_test() const = 0;
218  virtual void backward_path(
219  const std::vector<size_t> &indices, const size_t start,
220  const std::vector<std::vector<std::vector<Real>>> &activations,
221  bool last_only) = 0;
222  virtual void backward_path_2(
223  const std::vector<uint16_t> &labels,
224  const std::vector<std::vector<std::vector<Real>>> &activations,
225  bool last_only = false) = 0;
226  virtual size_t accuracy(
227  const std::vector<std::vector<std::vector<Real>>> &activations,
228  const std::vector<size_t> &indices, const size_t start) = 0;
229  virtual void train(unsigned seed = 0) = 0;
230  virtual ~MLPBase() {}
231 };
232 
239 template <typename Loss = MSE, typename ActivationFunction = ReLU,
240  typename Constraint = NoConstraint>
241 class MLP : public MLPBase {
242 protected:
243  std::vector<cypress::Matrix<Real>> m_layers;
244  std::vector<size_t> m_layer_sizes;
245  std::vector<mnist_helper::CONVOLUTION_LAYER> m_filters;
246  std::vector<mnist_helper::POOLING_LAYER> m_pools;
247  std::vector<mnist_helper::LAYER_TYPE> m_layer_types;
248  size_t m_epochs = 20;
249  size_t m_batchsize = 100;
250  Real learn_rate = 0.01;
253 
254  void load_data(std::string path)
255  {
256  m_mnist = mnist_helper::loadMnistData(60000, path + "train");
257  m_mnist_test = mnist_helper::loadMnistData(10000, path + "t10k");
258  }
259 
260  Constraint m_constraint;
261 
262 public:
274  MLP(std::vector<size_t> layer_sizes, size_t epochs = 20,
275  size_t batchsize = 100, Real learn_rate = 0.01)
276  : m_layer_sizes(layer_sizes),
277  m_epochs(epochs),
278  m_batchsize(batchsize),
279  learn_rate(learn_rate)
280  {
281  for (size_t i = 0; i < layer_sizes.size() - 1; i++) {
282  m_layers.emplace_back(
283  Matrix<Real>(layer_sizes[i], layer_sizes[i + 1]));
284  }
285 
286  int seed = std::chrono::system_clock::now().time_since_epoch().count();
287  auto rng = std::default_random_engine(seed);
288  std::normal_distribution<Real> distribution(0.0, 1.0);
289  for (auto &layer : m_layers) {
290  // Kaiming init, best suited for ReLU activation functions
291  auto scale = std::sqrt(2.0 / double(layer.rows()));
292  for (size_t i = 0; i < layer.size(); i++) {
293  layer[i] = distribution(rng) * scale;
294  }
295  }
296 
297  // Glorot uniform
298  /*for (auto &layer : m_layers) {
299  auto limit = std::sqrt(6.0 / Real(layer.rows()+ layer.cols()));
300  std::uniform_real_distribution<Real> distribution(0, limit);
301  for (size_t i = 0; i < layer.size(); i++) {
302  layer[i] = distribution(rng);
303  }
304  }*/
305  try {
306  load_data("");
307  }
308  catch (...) {
309  load_data("../");
310  }
311  m_constraint.setup(m_layers);
312  }
313 
326  MLP(Json &data, size_t epochs = 20, size_t batchsize = 100,
327  Real learn_rate = 0.01, bool random = false,
328  Constraint constraint = Constraint())
329  : m_epochs(epochs),
330  m_batchsize(batchsize),
331  learn_rate(learn_rate),
332  m_constraint(constraint)
333  {
334  int seed = std::chrono::system_clock::now().time_since_epoch().count();
335  auto rng = std::default_random_engine(seed);
336  std::normal_distribution<Real> distribution(0.0, 1.0);
337  for (auto &layer : data["netw"]) {
338  if (layer["class_name"].get<std::string>() == "Dense") {
339  auto &json = layer["weights"];
340  m_layers.emplace_back(
341  Matrix<Real>(json.size(), json[0].size()));
342  auto &weights = m_layers.back();
343  auto scale = std::sqrt(2.0 / double(weights.rows()));
344  for (size_t i = 0; i < json.size(); i++) {
345  for (size_t j = 0; j < json[i].size(); j++) {
346  if (!random) {
347  weights(i, j) = json[i][j].get<Real>();
348  }
349  else {
350  weights(i, j) = distribution(rng) * scale;
351  }
352  }
353  }
354  m_layer_sizes.emplace_back(m_layers.back().rows());
355  m_layer_types.push_back(mnist_helper::LAYER_TYPE::Dense);
356  cypress::global_logger().debug(
357  "MNIST", "Dense layer detected with size " +
358  std::to_string(weights.rows()) + " times " +
359  std::to_string(weights.cols()));
360  }
361  else if (layer["class_name"].get<std::string>() == "Conv2D") {
362  auto &json = layer["weights"];
363  size_t kernel_x = json.size();
364  size_t kernel_y = json[0].size();
365  size_t kernel_z = json[0][0].size();
366  size_t output = json[0][0][0].size();
367  size_t stride = layer["stride"];
368  size_t padding = layer["padding"] == "valid" ? 0 : 1;
369  std::vector<size_t> input_sizes;
370  std::vector<size_t> output_sizes;
371  if (!layer["input_shape_x"].empty()){
372  input_sizes.push_back(layer["input_shape_x"]);
373  input_sizes.push_back(layer["input_shape_y"]);
374  input_sizes.push_back(layer["input_shape_z"]);
375  } else {
376  if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Conv) {
377  input_sizes.push_back(m_filters.back().output_sizes[0]);
378  input_sizes.push_back(m_filters.back().output_sizes[1]);
379  input_sizes.push_back(m_filters.back().output_sizes[2]);
380  } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Pooling) {
381  input_sizes.push_back(m_pools.back().output_sizes[0]);
382  input_sizes.push_back(m_pools.back().output_sizes[1]);
383  input_sizes.push_back(m_pools.back().output_sizes[2]);
384  } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Dense) {
385  throw std::runtime_error("Conv after Dense layer not implemented!");
386  }
387  }
388  output_sizes.push_back((input_sizes[0] - kernel_x + 2*padding)/stride+1);
389  output_sizes.push_back((input_sizes[1] - kernel_x + 2*padding)/stride+1);
390  output_sizes.push_back(output);
392  kernel_x,
393  std::vector<std::vector<std::vector<Real>>>(kernel_y,
394  std::vector<std::vector<Real>>(kernel_z,
395  std::vector<Real>(output)))
396  );
398  conv_filter,
399  input_sizes,
400  output_sizes,
401  stride,
402  padding};
403  m_filters.emplace_back(conv);
404  auto &weights = m_filters.back().filter;
405  //auto scale = std::sqrt(2.0 / double(weights.rows()));
406  for (size_t i = 0; i < json.size(); i++){
407  for (size_t j = 0; j < json[i].size(); j++){
408  for (size_t k = 0; k < json[i][j].size(); k++){
409  for (size_t l = 0 ; l < json[i][j][k].size(); l++){
410  weights[i][j][k][l] = json[i][j][k][l].get<Real>();
411  }
412  }
413  }
414  }
415  m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
416  m_layer_types.push_back(mnist_helper::LAYER_TYPE::Conv);
417  cypress::global_logger().debug(
418  "MNIST", "Conv layer detected with size ("+
419  std::to_string(json.size())+","+std::to_string(json[0].size())+
420  ","+std::to_string(json[0][0].size())+","+std::to_string(json[0][0][0].size())+")");
421  } else if(layer["class_name"].get<std::string>() == "MaxPooling2D"){
422  std::vector<size_t> size = layer["size"];
423  size_t stride = layer["stride"];
424  std::vector<size_t> input_sizes;
425  std::vector<size_t> output_sizes;
426  if (m_layer_types.empty()){
427  throw std::runtime_error("Pooling layer must not be the first layer!");
428  }
429  if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Conv){
430  input_sizes.push_back(m_filters.back().output_sizes[0]);
431  input_sizes.push_back(m_filters.back().output_sizes[1]);
432  input_sizes.push_back(m_filters.back().output_sizes[2]);
433  } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Pooling){
434  input_sizes.push_back(m_pools.back().output_sizes[0]);
435  input_sizes.push_back(m_pools.back().output_sizes[1]);
436  input_sizes.push_back(m_pools.back().output_sizes[2]);
437  } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Dense){
438  throw std::runtime_error("Pooling after Dense not implemented!");
439  }
440  output_sizes.push_back((input_sizes[0] - size[0] + 2*0)/stride+1);
441  output_sizes.push_back((input_sizes[1] - size[1] + 2*0)/stride+1);
442  output_sizes.push_back(input_sizes[2]);
443  mnist_helper::POOLING_LAYER pool = {input_sizes, output_sizes, size, stride};
444  m_pools.emplace_back(pool);
445  m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
446  m_layer_types.emplace_back(mnist_helper::LAYER_TYPE::Pooling);
447  cypress::global_logger().debug(
448  "MNIST", "Pooling layer detected with size (" +
449  std::to_string(size[0]) + ", " + std::to_string(size[1]) +
450  ") and stride " + std::to_string(stride));
451  }
452  else {
453  throw std::runtime_error("Unknown layer type");
454  }
455  }
456  m_layer_sizes.push_back(m_layers.back().cols());
457 // for (auto &layer : m_layers) {
458 // m_layer_sizes.emplace_back(layer.cols());
459 // }
460 
461  m_mnist = mnist_helper::loadMnistData(60000, "train");
462  m_mnist_test = mnist_helper::loadMnistData(10000, "t10k");
463  m_constraint.setup(m_layers);
464  }
465 
471  Real max_weight() const override
472  {
473  Real max = 0.0;
474  for (auto &layer : m_layers) {
475  auto w = mnist_helper::max_weight(layer);
476  if (w > max)
477  max = w;
478  }
479  return max;
480  }
481 
482  Real conv_max_weight(size_t layer_id) const override
483  {
484  Real max = 0.0;
485  auto layer = m_filters[layer_id];
486  auto filter = layer.filter;
487  for (size_t f = 0; f < layer.output_sizes[2]; f++) {
488  for (size_t x = 0; x < filter.size(); x++) {
489  for (size_t y = 0; y < filter[0].size(); y++) {
490  for (size_t z = 0; z < filter[0][0].size(); z++) {
491  max = filter[x][y][z][f] > max ? filter[x][y][z][f] : max;
492  }
493  }
494  }
495  }
496  return max;
497  }
498 
504  Real min_weight() const override
505  {
506  Real min = 0.0;
507  for (auto &layer : m_layers) {
508  auto w = mnist_helper::min_weight(layer);
509  if (w < min)
510  min = w;
511  }
512  return min;
513  }
514 
520  Real max_weight_abs() const override
521  {
522  Real max = 0.0;
523  for (auto &layer : m_layers) {
524  auto w = mnist_helper::max_weight_abs(layer);
525  if (w > max)
526  max = w;
527  }
528  return max;
529  }
530 
531  const size_t &epochs() const override { return m_epochs; }
532  const size_t &batchsize() const override { return m_batchsize; }
533  const Real &learnrate() const override { return learn_rate; }
534 
541  {
542  return m_mnist;
543  }
550  {
551  return m_mnist_test;
552  }
553 
559  const std::vector<cypress::Matrix<Real>> &get_weights() override
560  {
561  return m_layers;
562  }
563 
569  // TODO: return whole conv struct? mh
570  const std::vector<mnist_helper::CONVOLUTION_LAYER> &get_conv_layers() override
571  {
572  return m_filters;
573  }
574 
575  const std::vector<mnist_helper::POOLING_LAYER> &get_pooling_layers() override
576  {
577  return m_pools;
578  }
579 
585  const std::vector<size_t> &get_layer_sizes() override
586  {
587  return m_layer_sizes;
588  }
589 
590  const std::vector<mnist_helper::LAYER_TYPE> &get_layer_types() override
591  {
592  return m_layer_types;
593  }
594 
601  void scale_down_images(size_t pooling_size = 3) override
602  {
603  m_mnist = mnist_helper::scale_mnist(m_mnist, pooling_size);
604  m_mnist_test = mnist_helper::scale_mnist(m_mnist_test, pooling_size);
605  }
606 
614  static inline std::vector<Real> mat_X_vec(const Matrix<Real> &mat,
615  const std::vector<Real> &vec)
616  {
617 #ifndef NDEBUG
618  assert(mat.cols() == vec.size());
619 #endif
620  std::vector<Real> res(mat.rows(), 0.0);
621  for (size_t i = 0; i < mat.rows(); i++) {
622  for (size_t j = 0; j < mat.cols(); j++) {
623  res[i] += mat(i, j) * vec[j];
624  }
625  }
626  return res;
627  }
628 
636  static inline std::vector<Real> mat_trans_X_vec(
637  const Matrix<Real> &mat, const std::vector<Real> &vec)
638  {
639 #ifndef NDEBUG
640  assert(mat.rows() == vec.size());
641 #endif
642  std::vector<Real> res(mat.cols(), 0.0);
643  for (size_t i = 0; i < mat.cols(); i++) {
644  for (size_t j = 0; j < mat.rows(); j++) {
645  res[i] += mat(j, i) * vec[j];
646  }
647  }
648  return res;
649  }
650 
658  static inline std::vector<Real> vec_X_vec_comp(
659  const std::vector<Real> &vec1, const std::vector<Real> &vec2)
660  {
661 #ifndef NDEBUG
662  assert(vec1.size() == vec2.size());
663 #endif
664  std::vector<Real> res(vec1.size());
665  for (size_t i = 0; i < vec1.size(); i++) {
666  res[i] = vec1[i] * vec2[i];
667  }
668  return res;
669  }
670 
678  inline bool correct(const uint16_t label,
679  const std::vector<Real> &output) const override
680  {
681  auto it = std::max_element(output.begin(), output.end());
682  auto out = std::distance(output.begin(), it);
683  return out == label;
684  }
685 
696  static inline void update_mat(Matrix<Real> &mat,
697  const std::vector<Real> &errors,
698  const std::vector<Real> &pre_output,
699  const size_t sample_num,
700  const Real learn_rate)
701  {
702 #ifndef NDEBUG
703  assert(mat.rows() == pre_output.size());
704  assert(mat.cols() == errors.size());
705 #endif
706  Real sample_num_r(sample_num);
707  for (size_t i = 0; i < mat.rows(); i++) {
708  for (size_t j = 0; j < mat.cols(); j++) {
709  mat(i, j) = mat(i, j) - learn_rate * pre_output[i] * errors[j] /
710  sample_num_r;
711  }
712  }
713  }
714 
724  virtual std::vector<std::vector<std::vector<Real>>> forward_path(
725  const std::vector<size_t> &indices, const size_t start) const override
726  {
727  if(!m_filters.empty()){
728  throw std::runtime_error("Conv layer not supported in forward_path function!");
729  }
730  if(!m_pools.empty()){
731  throw std::runtime_error("Pooling layer layer not supported in forward_path function!");
732  }
733  auto &input = std::get<0>(m_mnist);
734  std::vector<std::vector<std::vector<Real>>> res;
735  std::vector<std::vector<Real>> activations;
736  for (auto size : m_layer_sizes) {
737  activations.emplace_back(std::vector<Real>(size, 0.0));
738  }
739  for (size_t sample = 0; sample < m_batchsize; sample++) {
740  res.emplace_back(activations);
741  }
742 
743  for (size_t sample = 0; sample < m_batchsize; sample++) {
744  if (start + sample >= indices.size())
745  break;
746  res[sample][0] = input[indices[start + sample]];
747  for (size_t layer = 0; layer < m_layers.size(); layer++) {
748  res[sample][layer + 1] = ActivationFunction::function(
749  mat_trans_X_vec(m_layers[layer], res[sample][layer]));
750  }
751  }
752  return res;
753  }
754 
760  virtual Real forward_path_test() const override
761  {
762  if(!m_filters.empty()){
763  throw std::runtime_error("Conv layer not supported in forward_path function!");
764  }
765  if(!m_pools.empty()){
766  throw std::runtime_error("Pooling layer layer not supported in forward_path function!");
767  }
768  auto &input = std::get<0>(m_mnist_test);
769  auto &labels = std::get<1>(m_mnist_test);
770  std::vector<std::vector<Real>> activations;
771  for (auto size : m_layer_sizes) {
772  activations.emplace_back(std::vector<Real>(size, 0.0));
773  }
774  size_t sum = 0;
775  for (size_t sample = 0; sample < input.size(); sample++) {
776  activations[0] = input[sample];
777  for (size_t layer = 0; layer < m_layers.size(); layer++) {
778  activations[layer + 1] = ActivationFunction::function(
779  mat_trans_X_vec(m_layers[layer], activations[layer]));
780  }
781  if (correct(labels[sample], activations.back()))
782  sum++;
783  }
784 
785  return Real(sum) / Real(labels.size());
786  }
787 
798  virtual void backward_path(
799  const std::vector<size_t> &indices, const size_t start,
800  const std::vector<std::vector<std::vector<Real>>> &activations,
801  bool last_only = false) override
802  {
803 #ifndef NDEBUG
804  assert(m_batchsize == activations.size());
805 #endif
806  if(!m_filters.empty()){
807  throw std::runtime_error("Conv layer not supported in forward_path function!");
808  }
809  if(!m_pools.empty()){
810  throw std::runtime_error("Pooling layer layer not supported in forward_path function!");
811  }
812  const auto &labels = std::get<1>(m_mnist);
813  const std::vector<cypress::Matrix<cypress::Real>> orig_weights =
814  m_layers;
815  for (size_t sample = 0; sample < m_batchsize; sample++) {
816  if (start + sample >= indices.size())
817  break;
818  const auto &activ = activations[sample];
819  auto error = vec_X_vec_comp(
820  Loss::calc_error(labels[indices[start + sample]], activ.back()),
821  ActivationFunction::derivative(activ.back()));
822  // TODO works for ReLU only
823  update_mat(m_layers.back(), error, activ[activ.size() - 2],
824  m_batchsize, learn_rate);
825  if (!last_only) {
826  for (size_t inv_layer = 1; inv_layer < m_layers.size();
827  inv_layer++) {
828  size_t layer_id = m_layers.size() - inv_layer - 1;
829 
830  error = vec_X_vec_comp(
831  mat_X_vec(orig_weights[layer_id + 1], error),
832  ActivationFunction::derivative(activ[layer_id + 1]));
833  update_mat(m_layers[layer_id], error, activ[layer_id],
834  m_batchsize, learn_rate);
835  }
836  }
837  }
838  m_constraint.constrain_weights(m_layers);
839  }
848  virtual void backward_path_2(
849  const std::vector<uint16_t> &labels,
850  const std::vector<std::vector<std::vector<Real>>> &activations,
851  bool last_only = false) override
852  {
853 #ifndef NDEBUG
854  assert(m_batchsize == activations.back().size());
855 #endif
856  if(!m_filters.empty()){
857  throw std::runtime_error("Conv layer not supported in forward_path function!");
858  }
859  if(!m_pools.empty()){
860  throw std::runtime_error("Pooling layer layer not supported in forward_path function!");
861  }
862  const auto orig_weights = m_layers;
863  for (size_t sample = 0; sample < m_batchsize; sample++) {
864 
865  auto error = vec_X_vec_comp(
866  Loss::calc_error(labels[sample], activations.back()[sample]),
867  ActivationFunction::derivative(activations.back()[sample]));
868  // TODO works for ReLU only
869  update_mat(m_layers.back(), error,
870  activations[activations.size() - 2][sample], m_batchsize,
871  learn_rate);
872  if (!last_only) {
873  for (size_t inv_layer = 1; inv_layer < m_layers.size();
874  inv_layer++) {
875  size_t layer_id = m_layers.size() - inv_layer - 1;
876 
877  error = vec_X_vec_comp(
878  mat_X_vec(orig_weights[layer_id + 1], error),
879  ActivationFunction::derivative(
880  activations[layer_id + 1][sample]));
881  update_mat(m_layers[layer_id], error,
882  activations[layer_id][sample], m_batchsize,
883  learn_rate);
884  }
885  }
886  m_constraint.constrain_weights(m_layers);
887  }
888  }
889 
900  size_t accuracy(
901  const std::vector<std::vector<std::vector<Real>>> &activations,
902  const std::vector<size_t> &indices, const size_t start) override
903  {
904 #ifndef NDEBUG
905  assert(activations.size() == m_batchsize);
906 #endif
907 
908  auto &labels = std::get<1>(m_mnist);
909  size_t sum = 0;
910 
911  for (size_t sample = 0; sample < m_batchsize; sample++) {
912  if (start + sample >= indices.size())
913  break;
914  if (correct(labels[indices[start + sample]],
915  activations[sample].back()))
916  sum++;
917  }
918  return sum;
919  }
920 
926  void train(unsigned seed = 0) override
927  {
928  std::vector<size_t> indices(std::get<0>(m_mnist).size());
929  m_constraint.constrain_weights(m_layers);
930  for (size_t i = 0; i < indices.size(); i++) {
931  indices[i] = i;
932  }
933  if (seed == 0) {
934  seed = std::chrono::system_clock::now().time_since_epoch().count();
935  }
936  auto rng = std::default_random_engine{seed};
937 
938  for (size_t epoch = 0; epoch < m_epochs; epoch++) {
939  size_t correct = 0;
940  std::shuffle(indices.begin(), indices.end(), rng);
941  for (size_t current_idx = 0;
942  current_idx < std::get<1>(m_mnist).size();
943  current_idx += m_batchsize) {
944  auto activations = forward_path(indices, current_idx);
945  correct += accuracy(activations, indices, current_idx);
946  backward_path(indices, current_idx, activations);
947  m_constraint.constrain_weights(m_layers);
948  }
949  cypress::global_logger().info(
950  "MLP", "Accuracy of epoch " + std::to_string(epoch) + ": " +
951  std::to_string(Real(correct) /
952  Real(std::get<1>(m_mnist).size())));
953  }
954  }
955 };
956 } // namespace MNIST
virtual Real forward_path_test() const override
Forward path of test data.
Definition: mnist_mlp.hpp:760
Constraint for weights in neural network: No constraint.
Definition: mnist_mlp.hpp:135
Real max_weight_abs(const T &json)
Calculate the max absolute weight.
void setup(std::vector< cypress::Matrix< Real >> &)
Definition: mnist_mlp.hpp:141
Real max_weight_abs() const override
Return the largest absolute weight in the network.
Definition: mnist_mlp.hpp:520
virtual ~MLPBase()
Definition: mnist_mlp.hpp:230
std::vector< mnist_helper::POOLING_LAYER > m_pools
Definition: mnist_mlp.hpp:246
MNIST_DATA scale_mnist(MNIST_DATA &data, size_t pooling_size=3)
downscale the complete MNIST dataset
const std::vector< mnist_helper::POOLING_LAYER > & get_pooling_layers() override
Definition: mnist_mlp.hpp:575
Real min_weight() const override
Return the smallest weight in the network.
Definition: mnist_mlp.hpp:504
std::vector< size_t > m_layer_sizes
Definition: mnist_mlp.hpp:244
The standard densely connected multilayer Perceptron. Template arguments provide the loss function...
Definition: mnist_mlp.hpp:241
std::vector< mnist_helper::CONVOLUTION_LAYER > m_filters
Definition: mnist_mlp.hpp:245
const std::vector< size_t > & get_layer_sizes() override
Return the number of neurons per layer.
Definition: mnist_mlp.hpp:585
const Real & learnrate() const override
Definition: mnist_mlp.hpp:533
Real min_weight(const T &json)
Calculate the min weight.
const std::vector< cypress::Matrix< Real > > & get_weights() override
Return all weights in the form of weights[layer](src,tar)
Definition: mnist_mlp.hpp:559
void constrain_weights(std::vector< cypress::Matrix< Real >> &layers)
Definition: mnist_mlp.hpp:176
std::pair< std::vector< std::vector< Real > >, std::vector< uint16_t > > MNIST_DATA
Base class for Multi Layer Networks (–> currently Perceptron only). Allows us to use polymorphism wi...
Definition: mnist_mlp.hpp:196
MLP(std::vector< size_t > layer_sizes, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01)
Constructor for random init.
Definition: mnist_mlp.hpp:274
static Real calc_loss(const uint16_t label, const std::vector< Real > &output)
Definition: mnist_mlp.hpp:36
static std::vector< Real > mat_trans_X_vec(const Matrix< Real > &mat, const std::vector< Real > &vec)
Implements transposed matrix vector multiplication.
Definition: mnist_mlp.hpp:636
Root Mean Squared Error.
Definition: mnist_mlp.hpp:34
void scale_down_images(size_t pooling_size=3) override
Scale down the whole data set, reduces the image by a given factor in every dimension.
Definition: mnist_mlp.hpp:601
static std::vector< Real > mat_X_vec(const Matrix< Real > &mat, const std::vector< Real > &vec)
Implements matrix vector multiplication.
Definition: mnist_mlp.hpp:614
const size_t & batchsize() const override
Definition: mnist_mlp.hpp:532
Categorical hinge loss. Use if weights are restricted to be >0.
Definition: mnist_mlp.hpp:71
Constraint m_constraint
Definition: mnist_mlp.hpp:260
Real conv_max_weight(size_t layer_id) const override
Definition: mnist_mlp.hpp:482
static std::vector< Real > vec_X_vec_comp(const std::vector< Real > &vec1, const std::vector< Real > &vec2)
Vector vector multiplication, component-wise.
Definition: mnist_mlp.hpp:658
const mnist_helper::MNIST_DATA & mnist_train_set() override
Returns reference to the train data.
Definition: mnist_mlp.hpp:540
static void constrain_weights(std::vector< cypress::Matrix< Real >> &)
Definition: mnist_mlp.hpp:137
void setup(std::vector< cypress::Matrix< Real >> &)
Definition: mnist_mlp.hpp:150
size_t accuracy(const std::vector< std::vector< std::vector< Real >>> &activations, const std::vector< size_t > &indices, const size_t start) override
Calculate the overall accuracy from the given neural network output.
Definition: mnist_mlp.hpp:900
const std::vector< mnist_helper::LAYER_TYPE > & get_layer_types() override
Definition: mnist_mlp.hpp:590
mnist_helper::MNIST_DATA m_mnist
Definition: mnist_mlp.hpp:251
virtual std::vector< std::vector< std::vector< Real > > > forward_path(const std::vector< size_t > &indices, const size_t start) const override
Forward path of the network (–> inference)
Definition: mnist_mlp.hpp:724
std::vector< cypress::Matrix< Real > > m_layers
Definition: mnist_mlp.hpp:243
void setup(std::vector< cypress::Matrix< Real >> &layers)
Definition: mnist_mlp.hpp:167
mnist_helper::MNIST_DATA m_mnist_test
Definition: mnist_mlp.hpp:252
const std::vector< mnist_helper::CONVOLUTION_LAYER > & get_conv_layers() override
Return all filter weights in the form of weights[x][y][depth][filter].
Definition: mnist_mlp.hpp:570
Constraint for weights in neural network: Only weights >0.
Definition: mnist_mlp.hpp:148
static Real calc_loss(const uint16_t label, const std::vector< Real > &output)
Definition: mnist_mlp.hpp:73
const size_t & epochs() const override
Definition: mnist_mlp.hpp:531
static std::vector< Real > calc_error(const uint16_t label, const std::vector< Real > &output)
Definition: mnist_mlp.hpp:87
virtual void backward_path_2(const std::vector< uint16_t > &labels, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
Implementation of backprop, adapted for usage in SNNs.
Definition: mnist_mlp.hpp:848
Real max_weight(const T &json)
Calculate the max weight, ignore negative values.
void load_data(std::string path)
Definition: mnist_mlp.hpp:254
void constrain_weights(std::vector< cypress::Matrix< Real >> &layers)
Definition: mnist_mlp.hpp:151
static std::vector< Real > calc_error(const uint16_t label, const std::vector< Real > &output)
Definition: mnist_mlp.hpp:51
void train(unsigned seed=0) override
Starts the full training process.
Definition: mnist_mlp.hpp:926
static void update_mat(Matrix< Real > &mat, const std::vector< Real > &errors, const std::vector< Real > &pre_output, const size_t sample_num, const Real learn_rate)
Updates the weight matrix based on the error in this layer and the output of the previous layer...
Definition: mnist_mlp.hpp:696
std::vector< std::vector< std::vector< std::vector< Real > > > > CONVOLUTION_FILTER
bool correct(const uint16_t label, const std::vector< Real > &output) const override
Checks if the output of the network was correct.
Definition: mnist_mlp.hpp:678
virtual void backward_path(const std::vector< size_t > &indices, const size_t start, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
implementation of backprop
Definition: mnist_mlp.hpp:798
MNIST_DATA loadMnistData(const size_t num_data, const std::string path)
Read in MNIST data from files.
const mnist_helper::MNIST_DATA & mnist_test_set() override
Returns reference to the test data.
Definition: mnist_mlp.hpp:549
static std::vector< Real > derivative(std::vector< Real > input)
Definition: mnist_mlp.hpp:122
std::vector< mnist_helper::LAYER_TYPE > m_layer_types
Definition: mnist_mlp.hpp:247
Real max_weight() const override
Return the largest weight in the network.
Definition: mnist_mlp.hpp:471
ActivationFunction ReLU: Rectified Linear Unit.
Definition: mnist_mlp.hpp:113
MLP(Json &data, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01, bool random=false, Constraint constraint=Constraint())
Constructs the network from json file. The repo provides python scripts to create those from a keras ...
Definition: mnist_mlp.hpp:326