22 #include <cypress/cypress.hpp> 27 using cypress::Matrix;
37 const std::vector<Real> &output)
40 for (
size_t neuron = 0; neuron < output.size(); neuron++) {
41 if (label == neuron) {
42 res += (output[neuron] - 1.0) * (output[neuron] - 1.0);
45 res += output[neuron] * output[neuron];
48 res = sqrt(res / Real(output.size()));
52 const std::vector<Real> &output)
54 std::vector<Real> res(output.size(), 0.0);
55 for (
size_t neuron = 0; neuron < output.size(); neuron++) {
56 if (label == neuron) {
57 res[neuron] = output[neuron] - 1.0;
60 res[neuron] = output[neuron] - 0.0;
74 const std::vector<Real> &output)
77 for (
size_t neuron = 0; neuron < output.size(); neuron++) {
78 if (label == neuron) {
79 res +=
std::max(0.0, 1.0 - Real(output[neuron]));
82 res +=
std::max(0.0, 1.0 + Real(output[neuron]));
88 const std::vector<Real> &output)
91 std::vector<Real> vec = output;
93 auto neg_elem = std::max_element(vec.begin(), vec.end());
94 auto index = std::distance(vec.begin(), neg_elem);
95 auto res = std::vector<Real>(output.size(), 0.0);
99 if ((*neg_elem) - output[label] + 1 >= 0.0) {
101 if (label != index) {
115 static inline std::vector<Real>
function(std::vector<Real> input)
117 for (
auto &
i : input) {
122 static inline std::vector<Real>
derivative(std::vector<Real> input)
124 for (
auto &
i : input) {
125 i =
i >= 0 ? 1.0 : 0.0;
141 void setup(std::vector<cypress::Matrix<Real>> &) {}
150 void setup(std::vector<cypress::Matrix<Real>> &) {}
153 for (
auto &
i : layers) {
167 void setup(std::vector<cypress::Matrix<Real>> &layers)
169 for (
auto &layer : layers) {
178 for (
auto &
i : layers) {
201 virtual Real conv_max_weight(
size_t layer_id = 0)
const = 0;
202 virtual const size_t &
epochs()
const = 0;
203 virtual const size_t &batchsize()
const = 0;
204 virtual const Real &learnrate()
const = 0;
207 virtual const std::vector<cypress::Matrix<Real>> &get_weights() = 0;
208 virtual const std::vector<mnist_helper::CONVOLUTION_LAYER> &get_conv_layers() = 0;
209 virtual const std::vector<mnist_helper::POOLING_LAYER> &get_pooling_layers() = 0;
210 virtual const std::vector<size_t> &get_layer_sizes() = 0;
211 virtual const std::vector<mnist_helper::LAYER_TYPE> &get_layer_types() = 0;
212 virtual void scale_down_images(
size_t pooling_size = 3) = 0;
213 virtual inline bool correct(
const uint16_t
label,
214 const std::vector<Real> &output)
const = 0;
215 virtual std::vector<std::vector<std::vector<Real>>> forward_path(
216 const std::vector<size_t> &indices,
const size_t start)
const = 0;
217 virtual Real forward_path_test()
const = 0;
218 virtual void backward_path(
219 const std::vector<size_t> &indices,
const size_t start,
220 const std::vector<std::vector<std::vector<Real>>> &activations,
222 virtual void backward_path_2(
223 const std::vector<uint16_t> &labels,
224 const std::vector<std::vector<std::vector<Real>>> &activations,
225 bool last_only =
false) = 0;
226 virtual size_t accuracy(
227 const std::vector<std::vector<std::vector<Real>>> &activations,
228 const std::vector<size_t> &indices,
const size_t start) = 0;
229 virtual void train(
unsigned seed = 0) = 0;
239 template <
typename Loss =
MSE,
typename ActivationFunction =
ReLU,
246 std::vector<mnist_helper::POOLING_LAYER>
m_pools;
248 size_t m_epochs = 20;
249 size_t m_batchsize = 100;
250 Real learn_rate = 0.01;
274 MLP(std::vector<size_t> layer_sizes,
size_t epochs = 20,
275 size_t batchsize = 100, Real learn_rate = 0.01)
276 : m_layer_sizes(layer_sizes),
278 m_batchsize(batchsize),
279 learn_rate(learn_rate)
281 for (
size_t i = 0;
i < layer_sizes.size() - 1;
i++) {
282 m_layers.emplace_back(
283 Matrix<Real>(layer_sizes[
i], layer_sizes[i + 1]));
286 int seed = std::chrono::system_clock::now().time_since_epoch().count();
287 auto rng = std::default_random_engine(seed);
288 std::normal_distribution<Real> distribution(0.0, 1.0);
289 for (
auto &layer : m_layers) {
291 auto scale = std::sqrt(2.0 /
double(layer.rows()));
292 for (
size_t i = 0;
i < layer.size();
i++) {
293 layer[
i] = distribution(rng) * scale;
311 m_constraint.setup(m_layers);
327 Real learn_rate = 0.01,
bool random =
false,
328 Constraint constraint = Constraint())
330 m_batchsize(batchsize),
331 learn_rate(learn_rate),
332 m_constraint(constraint)
334 int seed = std::chrono::system_clock::now().time_since_epoch().count();
335 auto rng = std::default_random_engine(seed);
336 std::normal_distribution<Real> distribution(0.0, 1.0);
337 for (
auto &layer : data[
"netw"]) {
338 if (layer[
"class_name"].get<std::string>() ==
"Dense") {
339 auto &json = layer[
"weights"];
340 m_layers.emplace_back(
341 Matrix<Real>(json.size(), json[0].size()));
342 auto &weights = m_layers.back();
343 auto scale = std::sqrt(2.0 /
double(weights.rows()));
344 for (
size_t i = 0;
i < json.size();
i++) {
345 for (
size_t j = 0; j < json[
i].size(); j++) {
347 weights(
i, j) = json[
i][j].get<Real>();
350 weights(
i, j) = distribution(rng) * scale;
354 m_layer_sizes.emplace_back(m_layers.back().rows());
356 cypress::global_logger().debug(
357 "MNIST",
"Dense layer detected with size " +
358 std::to_string(weights.rows()) +
" times " +
359 std::to_string(weights.cols()));
361 else if (layer[
"class_name"].get<std::string>() ==
"Conv2D") {
362 auto &json = layer[
"weights"];
363 size_t kernel_x = json.size();
364 size_t kernel_y = json[0].size();
365 size_t kernel_z = json[0][0].size();
366 size_t output = json[0][0][0].size();
367 size_t stride = layer[
"stride"];
368 size_t padding = layer[
"padding"] ==
"valid" ? 0 : 1;
369 std::vector<size_t> input_sizes;
370 std::vector<size_t> output_sizes;
371 if (!layer[
"input_shape_x"].empty()){
372 input_sizes.push_back(layer[
"input_shape_x"]);
373 input_sizes.push_back(layer[
"input_shape_y"]);
374 input_sizes.push_back(layer[
"input_shape_z"]);
377 input_sizes.push_back(m_filters.back().output_sizes[0]);
378 input_sizes.push_back(m_filters.back().output_sizes[1]);
379 input_sizes.push_back(m_filters.back().output_sizes[2]);
381 input_sizes.push_back(m_pools.back().output_sizes[0]);
382 input_sizes.push_back(m_pools.back().output_sizes[1]);
383 input_sizes.push_back(m_pools.back().output_sizes[2]);
385 throw std::runtime_error(
"Conv after Dense layer not implemented!");
388 output_sizes.push_back((input_sizes[0] - kernel_x + 2*padding)/stride+1);
389 output_sizes.push_back((input_sizes[1] - kernel_x + 2*padding)/stride+1);
390 output_sizes.push_back(output);
393 std::vector<std::vector<std::vector<Real>>>(kernel_y,
394 std::vector<std::vector<Real>>(kernel_z,
395 std::vector<Real>(output)))
403 m_filters.emplace_back(conv);
404 auto &weights = m_filters.back().filter;
406 for (
size_t i = 0;
i < json.size();
i++){
407 for (
size_t j = 0; j < json[
i].size(); j++){
408 for (
size_t k = 0; k < json[
i][j].size(); k++){
409 for (
size_t l = 0 ; l < json[
i][j][k].size(); l++){
410 weights[
i][j][k][l] = json[
i][j][k][l].get<Real>();
415 m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
417 cypress::global_logger().debug(
418 "MNIST",
"Conv layer detected with size ("+
419 std::to_string(json.size())+
","+std::to_string(json[0].size())+
420 ","+std::to_string(json[0][0].size())+
","+std::to_string(json[0][0][0].size())+
")");
421 }
else if(layer[
"class_name"].get<std::string>() ==
"MaxPooling2D"){
422 std::vector<size_t> size = layer[
"size"];
423 size_t stride = layer[
"stride"];
424 std::vector<size_t> input_sizes;
425 std::vector<size_t> output_sizes;
426 if (m_layer_types.empty()){
427 throw std::runtime_error(
"Pooling layer must not be the first layer!");
430 input_sizes.push_back(m_filters.back().output_sizes[0]);
431 input_sizes.push_back(m_filters.back().output_sizes[1]);
432 input_sizes.push_back(m_filters.back().output_sizes[2]);
434 input_sizes.push_back(m_pools.back().output_sizes[0]);
435 input_sizes.push_back(m_pools.back().output_sizes[1]);
436 input_sizes.push_back(m_pools.back().output_sizes[2]);
438 throw std::runtime_error(
"Pooling after Dense not implemented!");
440 output_sizes.push_back((input_sizes[0] - size[0] + 2*0)/stride+1);
441 output_sizes.push_back((input_sizes[1] - size[1] + 2*0)/stride+1);
442 output_sizes.push_back(input_sizes[2]);
444 m_pools.emplace_back(pool);
445 m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
447 cypress::global_logger().debug(
448 "MNIST",
"Pooling layer detected with size (" +
449 std::to_string(size[0]) +
", " + std::to_string(size[1]) +
450 ") and stride " + std::to_string(stride));
453 throw std::runtime_error(
"Unknown layer type");
456 m_layer_sizes.push_back(m_layers.back().cols());
463 m_constraint.setup(m_layers);
474 for (
auto &layer : m_layers) {
485 auto layer = m_filters[layer_id];
486 auto filter = layer.filter;
487 for (
size_t f = 0; f < layer.output_sizes[2]; f++) {
488 for (
size_t x = 0;
x < filter.size();
x++) {
489 for (
size_t y = 0; y < filter[0].size(); y++) {
490 for (
size_t z = 0; z < filter[0][0].size(); z++) {
491 max = filter[
x][y][z][f] > max ? filter[
x][y][z][f] :
max;
507 for (
auto &layer : m_layers) {
523 for (
auto &layer : m_layers) {
531 const size_t &
epochs()
const override {
return m_epochs; }
532 const size_t &
batchsize()
const override {
return m_batchsize; }
533 const Real &
learnrate()
const override {
return learn_rate; }
587 return m_layer_sizes;
592 return m_layer_types;
614 static inline std::vector<Real>
mat_X_vec(
const Matrix<Real> &mat,
615 const std::vector<Real> &vec)
618 assert(mat.cols() == vec.size());
620 std::vector<Real> res(mat.rows(), 0.0);
621 for (
size_t i = 0;
i < mat.rows();
i++) {
622 for (
size_t j = 0; j < mat.cols(); j++) {
623 res[
i] += mat(
i, j) * vec[j];
637 const Matrix<Real> &mat,
const std::vector<Real> &vec)
640 assert(mat.rows() == vec.size());
642 std::vector<Real> res(mat.cols(), 0.0);
643 for (
size_t i = 0;
i < mat.cols();
i++) {
644 for (
size_t j = 0; j < mat.rows(); j++) {
645 res[
i] += mat(j,
i) * vec[j];
659 const std::vector<Real> &vec1,
const std::vector<Real> &vec2)
662 assert(vec1.size() == vec2.size());
664 std::vector<Real> res(vec1.size());
665 for (
size_t i = 0;
i < vec1.size();
i++) {
666 res[
i] = vec1[
i] * vec2[
i];
679 const std::vector<Real> &output)
const override 681 auto it = std::max_element(output.begin(), output.end());
682 auto out = std::distance(output.begin(), it);
697 const std::vector<Real> &errors,
698 const std::vector<Real> &pre_output,
699 const size_t sample_num,
700 const Real learn_rate)
703 assert(mat.rows() == pre_output.size());
704 assert(mat.cols() == errors.size());
706 Real sample_num_r(sample_num);
707 for (
size_t i = 0;
i < mat.rows();
i++) {
708 for (
size_t j = 0; j < mat.cols(); j++) {
709 mat(
i, j) = mat(
i, j) - learn_rate * pre_output[
i] * errors[j] /
725 const std::vector<size_t> &indices,
const size_t start)
const override 727 if(!m_filters.empty()){
728 throw std::runtime_error(
"Conv layer not supported in forward_path function!");
730 if(!m_pools.empty()){
731 throw std::runtime_error(
"Pooling layer layer not supported in forward_path function!");
733 auto &input = std::get<0>(m_mnist);
734 std::vector<std::vector<std::vector<Real>>> res;
735 std::vector<std::vector<Real>> activations;
736 for (
auto size : m_layer_sizes) {
737 activations.emplace_back(std::vector<Real>(size, 0.0));
739 for (
size_t sample = 0; sample < m_batchsize; sample++) {
740 res.emplace_back(activations);
743 for (
size_t sample = 0; sample < m_batchsize; sample++) {
744 if (start + sample >= indices.size())
746 res[sample][0] = input[indices[start + sample]];
747 for (
size_t layer = 0; layer < m_layers.size(); layer++) {
748 res[sample][layer + 1] = ActivationFunction::function(
749 mat_trans_X_vec(m_layers[layer], res[sample][layer]));
762 if(!m_filters.empty()){
763 throw std::runtime_error(
"Conv layer not supported in forward_path function!");
765 if(!m_pools.empty()){
766 throw std::runtime_error(
"Pooling layer layer not supported in forward_path function!");
768 auto &input = std::get<0>(m_mnist_test);
769 auto &labels = std::get<1>(m_mnist_test);
770 std::vector<std::vector<Real>> activations;
771 for (
auto size : m_layer_sizes) {
772 activations.emplace_back(std::vector<Real>(size, 0.0));
775 for (
size_t sample = 0; sample < input.size(); sample++) {
776 activations[0] = input[sample];
777 for (
size_t layer = 0; layer < m_layers.size(); layer++) {
778 activations[layer + 1] = ActivationFunction::function(
779 mat_trans_X_vec(m_layers[layer], activations[layer]));
781 if (correct(labels[sample], activations.back()))
785 return Real(sum) / Real(labels.size());
799 const std::vector<size_t> &indices,
const size_t start,
800 const std::vector<std::vector<std::vector<Real>>> &activations,
801 bool last_only =
false)
override 804 assert(m_batchsize == activations.size());
806 if(!m_filters.empty()){
807 throw std::runtime_error(
"Conv layer not supported in forward_path function!");
809 if(!m_pools.empty()){
810 throw std::runtime_error(
"Pooling layer layer not supported in forward_path function!");
812 const auto &labels = std::get<1>(m_mnist);
813 const std::vector<cypress::Matrix<cypress::Real>> orig_weights =
815 for (
size_t sample = 0; sample < m_batchsize; sample++) {
816 if (start + sample >= indices.size())
818 const auto &activ = activations[sample];
819 auto error = vec_X_vec_comp(
820 Loss::calc_error(labels[indices[start + sample]], activ.back()),
821 ActivationFunction::derivative(activ.back()));
823 update_mat(m_layers.back(), error, activ[activ.size() - 2],
824 m_batchsize, learn_rate);
826 for (
size_t inv_layer = 1; inv_layer < m_layers.size();
828 size_t layer_id = m_layers.size() - inv_layer - 1;
830 error = vec_X_vec_comp(
831 mat_X_vec(orig_weights[layer_id + 1], error),
832 ActivationFunction::derivative(activ[layer_id + 1]));
833 update_mat(m_layers[layer_id], error, activ[layer_id],
834 m_batchsize, learn_rate);
838 m_constraint.constrain_weights(m_layers);
849 const std::vector<uint16_t> &labels,
850 const std::vector<std::vector<std::vector<Real>>> &activations,
851 bool last_only =
false)
override 854 assert(m_batchsize == activations.back().size());
856 if(!m_filters.empty()){
857 throw std::runtime_error(
"Conv layer not supported in forward_path function!");
859 if(!m_pools.empty()){
860 throw std::runtime_error(
"Pooling layer layer not supported in forward_path function!");
862 const auto orig_weights = m_layers;
863 for (
size_t sample = 0; sample < m_batchsize; sample++) {
865 auto error = vec_X_vec_comp(
866 Loss::calc_error(labels[sample], activations.back()[sample]),
867 ActivationFunction::derivative(activations.back()[sample]));
869 update_mat(m_layers.back(), error,
870 activations[activations.size() - 2][sample], m_batchsize,
873 for (
size_t inv_layer = 1; inv_layer < m_layers.size();
875 size_t layer_id = m_layers.size() - inv_layer - 1;
877 error = vec_X_vec_comp(
878 mat_X_vec(orig_weights[layer_id + 1], error),
879 ActivationFunction::derivative(
880 activations[layer_id + 1][sample]));
881 update_mat(m_layers[layer_id], error,
882 activations[layer_id][sample], m_batchsize,
886 m_constraint.constrain_weights(m_layers);
901 const std::vector<std::vector<std::vector<Real>>> &activations,
902 const std::vector<size_t> &indices,
const size_t start)
override 905 assert(activations.size() == m_batchsize);
908 auto &labels = std::get<1>(m_mnist);
911 for (
size_t sample = 0; sample < m_batchsize; sample++) {
912 if (start + sample >= indices.size())
914 if (correct(labels[indices[start + sample]],
915 activations[sample].back()))
926 void train(
unsigned seed = 0)
override 928 std::vector<size_t> indices(std::get<0>(m_mnist).size());
929 m_constraint.constrain_weights(m_layers);
930 for (
size_t i = 0;
i < indices.size();
i++) {
934 seed = std::chrono::system_clock::now().time_since_epoch().count();
936 auto rng = std::default_random_engine{seed};
938 for (
size_t epoch = 0; epoch < m_epochs; epoch++) {
940 std::shuffle(indices.begin(), indices.end(), rng);
941 for (
size_t current_idx = 0;
942 current_idx < std::get<1>(m_mnist).size();
943 current_idx += m_batchsize) {
944 auto activations = forward_path(indices, current_idx);
945 correct += accuracy(activations, indices, current_idx);
946 backward_path(indices, current_idx, activations);
947 m_constraint.constrain_weights(m_layers);
949 cypress::global_logger().info(
950 "MLP",
"Accuracy of epoch " + std::to_string(epoch) +
": " +
951 std::to_string(Real(correct) /
952 Real(std::get<1>(m_mnist).size())));
virtual Real forward_path_test() const override
Forward path of test data.
Constraint for weights in neural network: No constraint.
Real max_weight_abs(const T &json)
Calculate the max absolute weight.
void setup(std::vector< cypress::Matrix< Real >> &)
Real max_weight_abs() const override
Return the largest absolute weight in the network.
std::vector< mnist_helper::POOLING_LAYER > m_pools
MNIST_DATA scale_mnist(MNIST_DATA &data, size_t pooling_size=3)
downscale the complete MNIST dataset
const std::vector< mnist_helper::POOLING_LAYER > & get_pooling_layers() override
Real min_weight() const override
Return the smallest weight in the network.
std::vector< size_t > m_layer_sizes
The standard densely connected multilayer Perceptron. Template arguments provide the loss function...
std::vector< mnist_helper::CONVOLUTION_LAYER > m_filters
const std::vector< size_t > & get_layer_sizes() override
Return the number of neurons per layer.
const Real & learnrate() const override
Real min_weight(const T &json)
Calculate the min weight.
const std::vector< cypress::Matrix< Real > > & get_weights() override
Return all weights in the form of weights[layer](src,tar)
void constrain_weights(std::vector< cypress::Matrix< Real >> &layers)
std::pair< std::vector< std::vector< Real > >, std::vector< uint16_t > > MNIST_DATA
Base class for Multi Layer Networks (–> currently Perceptron only). Allows us to use polymorphism wi...
MLP(std::vector< size_t > layer_sizes, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01)
Constructor for random init.
static Real calc_loss(const uint16_t label, const std::vector< Real > &output)
static std::vector< Real > mat_trans_X_vec(const Matrix< Real > &mat, const std::vector< Real > &vec)
Implements transposed matrix vector multiplication.
void scale_down_images(size_t pooling_size=3) override
Scale down the whole data set, reduces the image by a given factor in every dimension.
static std::vector< Real > mat_X_vec(const Matrix< Real > &mat, const std::vector< Real > &vec)
Implements matrix vector multiplication.
const size_t & batchsize() const override
Categorical hinge loss. Use if weights are restricted to be >0.
Real conv_max_weight(size_t layer_id) const override
static std::vector< Real > vec_X_vec_comp(const std::vector< Real > &vec1, const std::vector< Real > &vec2)
Vector vector multiplication, component-wise.
const mnist_helper::MNIST_DATA & mnist_train_set() override
Returns reference to the train data.
static void constrain_weights(std::vector< cypress::Matrix< Real >> &)
void setup(std::vector< cypress::Matrix< Real >> &)
size_t accuracy(const std::vector< std::vector< std::vector< Real >>> &activations, const std::vector< size_t > &indices, const size_t start) override
Calculate the overall accuracy from the given neural network output.
const std::vector< mnist_helper::LAYER_TYPE > & get_layer_types() override
mnist_helper::MNIST_DATA m_mnist
virtual std::vector< std::vector< std::vector< Real > > > forward_path(const std::vector< size_t > &indices, const size_t start) const override
Forward path of the network (–> inference)
std::vector< cypress::Matrix< Real > > m_layers
void setup(std::vector< cypress::Matrix< Real >> &layers)
mnist_helper::MNIST_DATA m_mnist_test
const std::vector< mnist_helper::CONVOLUTION_LAYER > & get_conv_layers() override
Return all filter weights in the form of weights[x][y][depth][filter].
Constraint for weights in neural network: Only weights >0.
static Real calc_loss(const uint16_t label, const std::vector< Real > &output)
const size_t & epochs() const override
static std::vector< Real > calc_error(const uint16_t label, const std::vector< Real > &output)
virtual void backward_path_2(const std::vector< uint16_t > &labels, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
Implementation of backprop, adapted for usage in SNNs.
Real max_weight(const T &json)
Calculate the max weight, ignore negative values.
void load_data(std::string path)
void constrain_weights(std::vector< cypress::Matrix< Real >> &layers)
static std::vector< Real > calc_error(const uint16_t label, const std::vector< Real > &output)
void train(unsigned seed=0) override
Starts the full training process.
static void update_mat(Matrix< Real > &mat, const std::vector< Real > &errors, const std::vector< Real > &pre_output, const size_t sample_num, const Real learn_rate)
Updates the weight matrix based on the error in this layer and the output of the previous layer...
std::vector< std::vector< std::vector< std::vector< Real > > > > CONVOLUTION_FILTER
bool correct(const uint16_t label, const std::vector< Real > &output) const override
Checks if the output of the network was correct.
virtual void backward_path(const std::vector< size_t > &indices, const size_t start, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
implementation of backprop
MNIST_DATA loadMnistData(const size_t num_data, const std::string path)
Read in MNIST data from files.
const mnist_helper::MNIST_DATA & mnist_test_set() override
Returns reference to the test data.
static std::vector< Real > derivative(std::vector< Real > input)
std::vector< mnist_helper::LAYER_TYPE > m_layer_types
Real max_weight() const override
Return the largest weight in the network.
ActivationFunction ReLU: Rectified Linear Unit.
MLP(Json &data, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01, bool random=false, Constraint constraint=Constraint())
Constructs the network from json file. The repo provides python scripts to create those from a keras ...