/* File name: mlp.h Created by: Ljubomir Buturovic Created: 03/11/2001 Purpose: structure and function declarations for multi-layer perceptron (MLP) learning. */ /* Copyright 2004 Ljubomir J. Buturovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #ifndef _MLP_H_ #define _MLP_H_ /* Target values for MLP learning. */ #define MLP_TARGET_HIGH 0.9 #define MLP_TARGET_LOW 0.1 /* Characteristic string in .mlp files. */ #define MLP_SIGNATURE_STRING "iterations:" /* Write/append MLP file constants. */ #define MLP_MODE_WRITE 0 #define MLP_MODE_APPEND 1 /* Supported MLP optimization methods. The methods are simplex (Nelder-Mead), Gilbert & Nocedal cgfam() (CG+), gradient descent, WNLIB and NR. */ #define MLP_OPT_CGPLUS 1 #define MLP_OPT_GRADIENT_DESCENT 2 #define MLP_OPT_WNLIB 3 #define MLP_OPT_SIMPLEX 5 /* User-friendly names of the above. */ #define MLP_STR_CGPLUS "CG+ conjugate gradient" #define MLP_STR_GRADIENT_DESCENT "gradient descent" #define MLP_STR_WNLIB "WNLIB gradient descent" #define MLP_STR_UNSPECIFIED "unspecified" #include #include "pcp.h" /* Struct mlp describes a multi-layer perceptron neural network with 'd' inputs, 'nlayers' layers and 'npl[i]' nodes in layer i, and weights 'w'. 'nlayers' counts hidden layers and output layer, so for a network with two hidden layers 'nlayers' is 3. 'wlen' is total number of weights in the network. 'nodes' is total number of nodes. It counts hidden nodes and output nodes. 'a' is vector of node activations. 'z' is vector of node outputs. 'delta' is vector of errors as defined in Bishop, equation (4.32). The length of 'a', 'z' and 'delta' equals 'nodes'. 'noff' is offset of the first (left-most) output node in the arrays. 'seed' is seed for pseudo-random number generator rand() used to initialize the neural network weights. 'range' is amplitude of initial pseudo-random weights (i.e., the initial weights are assigned pseudo-randomly within [-range, range]). 'iterations' is the number of iterations so far in the current training session. 'p_iter' is the number of iterations previously applied to this perceptron. 'itmax' is maximum number of iterations allowed in the current training session. 'error' is average output deviation per node and per input sample (MSE). 'mce' is number of misclassified samples. 'fname' is network output file name. The nodes are numbered from 0, starting from left. So, for a two-layer network the nodes in first hidden layer are numbered 0 to npl[0]-1. Output nodes are numbered npl[0] to npl[0]+npl[1]-1. In general, the number of output nodes equals npl[nl-1], where nl is the number of layers. Weights numbered 0 to d go from bias value (w[0]) and the d inputs (w[1] to w[d]) to node 0. Weights numbered d+1 to 2*d-1 go from the bias value to node 1, etc. Thus, weights going into a node are sequentially numbered. 'method' is optimization method. */ struct mlp { int d; int nlayers; int *npl; float *w; int nodes; int wlen; float *a; float *z; float *delta; int noff; int method; float range; int iterations; int p_iter; int itmax; float error; int mce; char *fname; unsigned int seed; }; /* This structure is used to pass MLP-specific parameters to adaboost() and bagging(). */ struct mlp_options { int nlayers; int *npl; int itmax; float range; float opt_method; float eta; float mu; float alpha; }; /* Free mlp struct. Returns (struct mlp *) 0. */ struct mlp *mlp_free(struct mlp *perceptron); /* Return deep copy of 'perceptron'. */ struct mlp *mlp_clone(struct mlp *perceptron); /* Calculate outputs for MLP defined in 'perceptron' for a given input 'x'. The function allocates space for the returned vector - it is the caller's responsiblity to free() it. */ float *mlp_output(struct mlp *perceptron, float *x); /* Generate target outputs for MLP, given input categorized in 'nc' classes with 'nd[i]' samples in class i. In case of malloc() failure, return NULL and set errno. */ float **mlp_target(int nc, int *nd); /* The function calculates the value of error function for the perceptron described by 'perceptron' at weight vector 'w[1..wlen]'. 'w' is 1-based, as prescribed by Numerical Recipes optimization routines API. 'nsamples' is the number of input samples in 'x', 't' are target output values. 'nd[i]' is cardinality of class 'i'. The function is specific to NR optimizer. The other optimizers use mlp_function(). */ float mlp_criterion(float *w, struct mlp *perceptron, float **x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); /* The function sets the derivative vector 'dw' of MLP criterion function for MLP defined by 'perceptron' and 'w'. */ void mlp_derivative(float *w, float *dw, struct mlp *perceptron, float **x, int nsamples, float **t, int *errc, FILE *outdev, FILE *fdbg); /* Optimize weights of an MLP neural network using up to 'itmax' iterations of 'opt_method' optimization method. 'x' are 'nsamples' input vectors of length 'd' with 'nd[i]' samples per class 'i'. 't' are 'nsamples' target output vectors. 'eta' and 'mu' are the step size and momentum parameters for gradient descent optimization (therefore used only if 'opt_method' is MLP_OPT_GRADIENT_DESCENT). 'mlp_continue' is 0 to begin learning, 1 to continue starting with network in 'fname'. The function calculates the set of weights which optimize the square error and stores them in 'fname'. It periodically reports results on 'outdev'. The possible values for opt_method are defined in mlp.h. In case of success, the function returns 'mlp' structure which contains the trained neural network and sets 'errc' to 0. In case of error, return NULL and set 'errc', except for LERR_LNSEARCH (line search failure in optimization routine) and LERR_DESCENT (optimization procedure unable to find descent direction) errors. In those cases, the function returns the trained network, but sets 'errc' to LERR_LNSEARCH or LERR_DESCENT. This is done because optimization function often reports these errors, and yet finds a decent minimum. The caller may choose to ignore these two error codes. */ struct mlp *mlp_learn(int opt_method, float **x, int nsamples, int *nd, int d, float **t, int nlayers, int *npl, int itmax, float range, float eta, float mu, FILE *outdev, int mlp_continue, char *fname, unsigned int seed, int *errc, FILE *fdbg); /* Optimize weights of an MLP neural network using modified Polak-Ribiere conjugate gradient method. 't' are 'dset->nv' target output vectors. The function calculates the set of weights which optimize the square error and stores them in mlp->w, and saves them in mlp->fname. It periodically reports results on 'outdev'. In case of success, return 0, otherwise return -1 and set error code in 'errc'. */ int mlp_optimize(struct dataset *dset, float **t, struct mlp *perceptron, FILE *outdev, int *errc, FILE *fdbg); /* Optimize weights of an MLP neural network using gradient descent algorithm defined by step size 'eta' and using momentum term defined by 'mu'. 't' are 'dset->nv' target output vectors. The function calculates the set of weights which optimize the square error and stores them in mlp->w, and saves them in mlp->fname. It periodically reports results on 'outdev'. In case of success, return 0, otherwise return -1 and set error code in 'errc'. */ int mlp_optimize_gradient_descent(struct dataset *dset, float **t, struct mlp *perceptron, float eta, float mu, FILE *outdev, int *errc, FILE *fdbg); /* Optimize weights of an MLP neural network using modified Polak-Ribiere conjugate gradient method. 't' are 'dset->nv' target output vectors. The function calculates the set of weights which optimize the square error and stores them in mlp->w, and saves them in mlp->fname. It periodically reports results on 'outdev'. In case of success, return 0, otherwise return -1 and set error code in 'errc'. */ int mlp_optimize(struct dataset *dset, float **t, struct mlp *perceptron, FILE *outdev, int *errc, FILE *fdbg); /* Save multi-layer perceptron model described by 'perceptron' in perceptron->fname. The model is written (mode == MLP_MODE_WRITE) or appended (mode == MLP_MODE_APPEND) to the perceptron->fname file. If mode == MLP_MODE_APPEND, prepend 'index'/'weight' to the model saved, otherwise prepend the default values (1/1.0). In case of success, return 0. In case of failure, return -1 and set errno. */ int mlp_save(struct mlp *perceptron, int mode, int index, float weight); /* File-pointer-based version of mlp_save(). In case of success, return 0. In case of failure, return -1 and set errno. */ int mlp_write(FILE *fptr, struct mlp *perceptron, int mode, int index, float weight); typedef float (*mlp_func)(float [], struct mlp *, float **, int, int *, float **, int *, FILE *, FILE *); typedef void (*mlp_dfunc)(float [], float [], struct mlp *, float **, int, float **, int *, FILE *, FILE *); typedef float (*mlp_f1dim)(float, struct mlp *, float **, int, int *, float **, int *, FILE *, FILE *); typedef float (*mlp_df1dim)(float, struct mlp *, float **, int, int *, float **, int *, FILE *, FILE *); void frprmn(float p[], int n, float ftol, int *iter, float *fret, mlp_func func, mlp_dfunc dfunc, struct mlp *perceptron, float **x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); void linmin(float p[], float xi[], int n, float *fret, mlp_func func, struct mlp *perceptron, float **x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); float brent(float ax, float bx, float cx, mlp_f1dim f, float tol, float *xmin, struct mlp *perceptron, float **_x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); float f1dim(float x, struct mlp *perceptron, float **_x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); void mnbrak(float *ax, float *bx, float *cx, float *fa, float *fb, float *fc, mlp_f1dim func, struct mlp *perceptron, float **x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); void dlinmin(float p[], float xi[], int n, float *fret, mlp_func func, mlp_dfunc dfunc, struct mlp *perceptron, float **x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); float dbrent(float ax, float bx, float cx, mlp_f1dim f, mlp_df1dim df, float tol, float *xmin, struct mlp *perceptron, float **_x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); float df1dim(float x, struct mlp *perceptron, float **_x, int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg); /* Classify dataset 'dset' using MLP defined in 'perceptrons'. The MLP outputs are placed in the returned struct mlp, and the classification results are placed in dset->cx. The function assigns each vector to the class corresponding to the node with the largest output, unless 'thd' is > 0. In that case, the assignment is performed only if the largest node output exceeds 'thd'. 'perceptrons' is an array of 'struct mlp' objects. It can be created, for example, from a file using calling mlp_load(). In case of error, return NULL and place error code in 'errc'. */ struct mlp *mlp_predict(struct dataset *dset, struct mlp **perceptrons, float thd, int *errc); /* Load one or more multi-layer perceptrons described in 'fname' into (struct mlp **) array. The last element in the array is NULL. In case of success, returns 0. In case of failure, returns -1 and sets errno. */ struct mlp **mlp_load(char *fname); #endif /* _MLP_H_ */