The standard densely connected multilayer Perceptron. Template arguments provide the loss function, the activation function of neurons (experimental) and a possible constraint for the weights. More...

#include <mnist_mlp.hpp>

Inheritance diagram for MNIST::MLP< Loss, ActivationFunction, Constraint >:

Public Member Functions
	MLP (std::vector< size_t > layer_sizes, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01)
	Constructor for random init. More...

	MLP (Json &data, size_t epochs=20, size_t batchsize=100, Real learn_rate=0.01, bool random=false, Constraint constraint=Constraint())
	Constructs the network from json file. The repo provides python scripts to create those from a keras network. More...

Real	max_weight () const override
	Return the largest weight in the network. More...

Real	conv_max_weight (size_t layer_id) const override

Real	min_weight () const override
	Return the smallest weight in the network. More...

Real	max_weight_abs () const override
	Return the largest absolute weight in the network. More...

const size_t &	epochs () const override

const size_t &	batchsize () const override

const Real &	learnrate () const override

const mnist_helper::MNIST_DATA &	mnist_train_set () override
	Returns reference to the train data. More...

const mnist_helper::MNIST_DATA &	mnist_test_set () override
	Returns reference to the test data. More...

const std::vector< cypress::Matrix< Real > > &	get_weights () override
	Return all weights in the form of weights[layer](src,tar) More...

const std::vector< mnist_helper::CONVOLUTION_LAYER > &	get_conv_layers () override
	Return all filter weights in the form of weights[x][y][depth][filter]. More...

const std::vector< mnist_helper::POOLING_LAYER > &	get_pooling_layers () override

const std::vector< size_t > &	get_layer_sizes () override
	Return the number of neurons per layer. More...

const std::vector< mnist_helper::LAYER_TYPE > &	get_layer_types () override

void	scale_down_images (size_t pooling_size=3) override
	Scale down the whole data set, reduces the image by a given factor in every dimension. More...

bool	correct (const uint16_t label, const std::vector< Real > &output) const override
	Checks if the output of the network was correct. More...

virtual std::vector< std::vector< std::vector< Real > > >	forward_path (const std::vector< size_t > &indices, const size_t start) const override
	Forward path of the network (–> inference) More...

virtual Real	forward_path_test () const override
	Forward path of test data. More...

virtual void	backward_path (const std::vector< size_t > &indices, const size_t start, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
	implementation of backprop More...

virtual void	backward_path_2 (const std::vector< uint16_t > &labels, const std::vector< std::vector< std::vector< Real >>> &activations, bool last_only=false) override
	Implementation of backprop, adapted for usage in SNNs. More...

size_t	accuracy (const std::vector< std::vector< std::vector< Real >>> &activations, const std::vector< size_t > &indices, const size_t start) override
	Calculate the overall accuracy from the given neural network output. More...

void	train (unsigned seed=0) override
	Starts the full training process. More...

Static Public Member Functions
static std::vector< Real >	mat_X_vec (const Matrix< Real > &mat, const std::vector< Real > &vec)
	Implements matrix vector multiplication. More...

static std::vector< Real >	mat_trans_X_vec (const Matrix< Real > &mat, const std::vector< Real > &vec)
	Implements transposed matrix vector multiplication. More...

static std::vector< Real >	vec_X_vec_comp (const std::vector< Real > &vec1, const std::vector< Real > &vec2)
	Vector vector multiplication, component-wise. More...

static void	update_mat (Matrix< Real > &mat, const std::vector< Real > &errors, const std::vector< Real > &pre_output, const size_t sample_num, const Real learn_rate)
	Updates the weight matrix based on the error in this layer and the output of the previous layer. More...

Protected Member Functions
void	load_data (std::string path)

Protected Attributes
std::vector< cypress::Matrix< Real > >	m_layers

std::vector< size_t >	m_layer_sizes

std::vector< mnist_helper::CONVOLUTION_LAYER >	m_filters

std::vector< mnist_helper::POOLING_LAYER >	m_pools

std::vector< mnist_helper::LAYER_TYPE >	m_layer_types

size_t	m_epochs = 20

size_t	m_batchsize = 100

Real	learn_rate = 0.01

mnist_helper::MNIST_DATA	m_mnist

mnist_helper::MNIST_DATA	m_mnist_test

Constraint	m_constraint

Detailed Description

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint>
class MNIST::MLP< Loss, ActivationFunction, Constraint >

The standard densely connected multilayer Perceptron. Template arguments provide the loss function, the activation function of neurons (experimental) and a possible constraint for the weights.

Definition at line 241 of file mnist_mlp.hpp.

Constructor & Destructor Documentation

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint>

MNIST::MLP< Loss, ActivationFunction, Constraint >::MLP	(	std::vector< size_t >	layer_sizes,
		size_t	epochs = `20`,
		size_t	batchsize = `100`,
		Real	learn_rate = `0.01`
	)

inline

Constructor for random init.

Parameters

layer_sizes	list of #neurons beginning with input and ending with output layer
epochs	number of epochs to train
batchsize	mini batchsize before updating the weights
learn_rate	gradients are multiplied with this rate
constrain	constrains the weights during training, defaults to no constraint

Definition at line 274 of file mnist_mlp.hpp.

         : m_layer_sizes(layer_sizes),
           m_epochs(epochs),
           m_batchsize(batchsize),
           learn_rate(learn_rate)
     {
         for (size_t i = 0; i < layer_sizes.size() - 1; i++) {
             m_layers.emplace_back(
                 Matrix<Real>(layer_sizes[i], layer_sizes[i + 1]));
         }
 
         int seed = std::chrono::system_clock::now().time_since_epoch().count();
         auto rng = std::default_random_engine(seed);
         std::normal_distribution<Real> distribution(0.0, 1.0);
         for (auto &layer : m_layers) {
             // Kaiming init, best suited for ReLU activation functions
             auto scale = std::sqrt(2.0 / double(layer.rows()));
             for (size_t i = 0; i < layer.size(); i++) {
                 layer[i] = distribution(rng) * scale;
             }
         }
 
         // Glorot uniform
         /*for (auto &layer : m_layers) {
             auto limit = std::sqrt(6.0 / Real(layer.rows()+ layer.cols()));
             std::uniform_real_distribution<Real> distribution(0, limit);
             for (size_t i = 0; i < layer.size(); i++) {
                 layer[i] = distribution(rng);
             }
         }*/
         try {
             load_data("");
         }
         catch (...) {
             load_data("../");
         }
         m_constraint.setup(m_layers);
     }

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint>

MNIST::MLP< Loss, ActivationFunction, Constraint >::MLP	(	Json &	data,
		size_t	epochs = `20`,
		size_t	batchsize = `100`,
		Real	learn_rate = `0.01`,
		bool	random = `false`,
		Constraint	constraint = `Constraint()`
	)

inline

Constructs the network from json file. The repo provides python scripts to create those from a keras network.

Parameters

data	json object containing the network information
epochs	number of epochs to train
batchsize	mini batchsize before updating the weights
learn_rate	gradients are multiplied with this rate
random	Use structure from Json, initialize weights random if true
constrain	constrains the weights during training, defaults to no constraint

Definition at line 326 of file mnist_mlp.hpp.

         : m_epochs(epochs),
           m_batchsize(batchsize),
           learn_rate(learn_rate),
           m_constraint(constraint)
     {
         int seed = std::chrono::system_clock::now().time_since_epoch().count();
         auto rng = std::default_random_engine(seed);
         std::normal_distribution<Real> distribution(0.0, 1.0);
         for (auto &layer : data["netw"]) {
             if (layer["class_name"].get<std::string>() == "Dense") {
                 auto &json = layer["weights"];
                 m_layers.emplace_back(
                     Matrix<Real>(json.size(), json[0].size()));
                 auto &weights = m_layers.back();
                 auto scale = std::sqrt(2.0 / double(weights.rows()));
                 for (size_t i = 0; i < json.size(); i++) {
                     for (size_t j = 0; j < json[i].size(); j++) {
                         if (!random) {
                             weights(i, j) = json[i][j].get<Real>();
                         }
                         else {
                             weights(i, j) = distribution(rng) * scale;
                         }
                     }
                 }
                 m_layer_sizes.emplace_back(m_layers.back().rows());
                 m_layer_types.push_back(mnist_helper::LAYER_TYPE::Dense);
                 cypress::global_logger().debug(
                     "MNIST", "Dense layer detected with size " +
                                  std::to_string(weights.rows()) + " times " +
                                  std::to_string(weights.cols()));
             }
             else if (layer["class_name"].get<std::string>() == "Conv2D") {
                 auto &json = layer["weights"];
                 size_t kernel_x = json.size();
                 size_t kernel_y = json[0].size();
                 size_t kernel_z = json[0][0].size();
                 size_t output = json[0][0][0].size();
                 size_t stride = layer["stride"];
                 size_t padding = layer["padding"] == "valid" ? 0 : 1;
                 std::vector<size_t> input_sizes;
                 std::vector<size_t> output_sizes;
                 if (!layer["input_shape_x"].empty()){
                     input_sizes.push_back(layer["input_shape_x"]);
                     input_sizes.push_back(layer["input_shape_y"]);
                     input_sizes.push_back(layer["input_shape_z"]);
                 } else {
                     if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Conv) {
                         input_sizes.push_back(m_filters.back().output_sizes[0]);
                         input_sizes.push_back(m_filters.back().output_sizes[1]);
                         input_sizes.push_back(m_filters.back().output_sizes[2]);
                     } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Pooling) {
                         input_sizes.push_back(m_pools.back().output_sizes[0]);
                         input_sizes.push_back(m_pools.back().output_sizes[1]);
                         input_sizes.push_back(m_pools.back().output_sizes[2]);
                     } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Dense) {
                         throw std::runtime_error("Conv after Dense layer not implemented!");
                     }
                 }
                 output_sizes.push_back((input_sizes[0] - kernel_x + 2*padding)/stride+1);
                 output_sizes.push_back((input_sizes[1] - kernel_x + 2*padding)/stride+1);
                 output_sizes.push_back(output);
                 mnist_helper::CONVOLUTION_FILTER conv_filter(
                     kernel_x,
                     std::vector<std::vector<std::vector<Real>>>(kernel_y,
                     std::vector<std::vector<Real>>(kernel_z,
                     std::vector<Real>(output)))
                     );
                 mnist_helper::CONVOLUTION_LAYER conv = {
                     conv_filter,
                     input_sizes,
                     output_sizes,
                     stride,
                     padding};
                 m_filters.emplace_back(conv);
                 auto &weights = m_filters.back().filter;
                 //auto scale = std::sqrt(2.0 / double(weights.rows()));
                 for (size_t i = 0; i < json.size(); i++){
                     for (size_t j = 0; j < json[i].size(); j++){
                         for (size_t k = 0; k < json[i][j].size(); k++){
                             for (size_t l = 0 ; l < json[i][j][k].size(); l++){
                                 weights[i][j][k][l] = json[i][j][k][l].get<Real>();
                             }
                         }
                     }
                 }
                 m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
                 m_layer_types.push_back(mnist_helper::LAYER_TYPE::Conv);
                 cypress::global_logger().debug(
                     "MNIST", "Conv layer detected with size ("+
                         std::to_string(json.size())+","+std::to_string(json[0].size())+
                         ","+std::to_string(json[0][0].size())+","+std::to_string(json[0][0][0].size())+")");
             } else if(layer["class_name"].get<std::string>() == "MaxPooling2D"){
                 std::vector<size_t> size = layer["size"];
                 size_t stride = layer["stride"];
                 std::vector<size_t> input_sizes;
                 std::vector<size_t> output_sizes;
                 if (m_layer_types.empty()){
                     throw std::runtime_error("Pooling layer must not be the first layer!");
                 }
                 if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Conv){
                     input_sizes.push_back(m_filters.back().output_sizes[0]);
                     input_sizes.push_back(m_filters.back().output_sizes[1]);
                     input_sizes.push_back(m_filters.back().output_sizes[2]);
                 } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Pooling){
                     input_sizes.push_back(m_pools.back().output_sizes[0]);
                     input_sizes.push_back(m_pools.back().output_sizes[1]);
                     input_sizes.push_back(m_pools.back().output_sizes[2]);
                 } else if (m_layer_types.back() == mnist_helper::LAYER_TYPE::Dense){
                     throw std::runtime_error("Pooling after Dense not implemented!");
                 }
                 output_sizes.push_back((input_sizes[0] - size[0] + 2*0)/stride+1);
                 output_sizes.push_back((input_sizes[1] - size[1] + 2*0)/stride+1);
                 output_sizes.push_back(input_sizes[2]);
                 mnist_helper::POOLING_LAYER pool = {input_sizes, output_sizes, size, stride};
                 m_pools.emplace_back(pool);
                 m_layer_sizes.emplace_back(input_sizes[0] * input_sizes[1] * input_sizes[2]);
                 m_layer_types.emplace_back(mnist_helper::LAYER_TYPE::Pooling);
                 cypress::global_logger().debug(
                     "MNIST", "Pooling layer detected with size (" +
                                  std::to_string(size[0]) + ", " + std::to_string(size[1]) +
                                 ") and stride " + std::to_string(stride));
             }
             else {
                 throw std::runtime_error("Unknown layer type");
             }
         }
         m_layer_sizes.push_back(m_layers.back().cols());
 //      for (auto &layer : m_layers) {
 //          m_layer_sizes.emplace_back(layer.cols());
 //      }
 
         m_mnist = mnist_helper::loadMnistData(60000, "train");
         m_mnist_test = mnist_helper::loadMnistData(10000, "t10k");
         m_constraint.setup(m_layers);
     }

Member Function Documentation

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint>

size_t MNIST::MLP< Loss, ActivationFunction, Constraint >::accuracy	(	const std::vector< std::vector< std::vector< Real >>> &	activations,
		const std::vector< size_t > &	indices,
		const size_t	start
	)

inlineoverridevirtual

Calculate the overall accuracy from the given neural network output.

Parameters

activations	output of forward path
start	the start index, uses images indices[start] until indices[start +batchsize -1]
activations	result of forward path

Returns: the number of correctly classified images

indices	list of shuffled (?) indices
start	the start index, uses images indices[start] until indices[start +batchsize -1]
activations	result of forward path
last_only	true for last layer only training (Perceptron learn rule)

labels	vector containing labels of the given batch
activations	activations in the form of [layer][sample][neuron]
last_only	true for last layer only training (Perceptron learn rule)

mat	the matrix to transpose: mat.rows() ==! vec.size()
vec	the vector

mat	weight matrix.
errors	error vector in this layer
pre_output	output rates of previous layer
sample_num	number of samples in this batch == mini batchsize
learn_rate	the learn rate multiplied with the gradient

Public Member Functions

Static Public Member Functions

Protected Member Functions

Protected Attributes

Detailed Description

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint> class MNIST::MLP< Loss, ActivationFunction, Constraint >

Constructor & Destructor Documentation

Member Function Documentation

Member Data Documentation

template<typename Loss = MSE, typename ActivationFunction = ReLU, typename Constraint = NoConstraint>
class MNIST::MLP< Loss, ActivationFunction, Constraint >