/*
  File name: mlp.h
  Created by: Ljubomir Buturovic
  Created: 03/11/2001
  Purpose: structure and function declarations for multi-layer
  perceptron (MLP) learning.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#ifndef _MLP_H_
#define _MLP_H_

/*
  Target values for MLP learning.
*/
#define MLP_TARGET_HIGH         0.9
#define MLP_TARGET_LOW          0.1

/*
  Characteristic string in .mlp files.
*/
#define MLP_SIGNATURE_STRING    "iterations:"

/*
  Write/append MLP file constants.
*/
#define MLP_MODE_WRITE          0
#define MLP_MODE_APPEND         1

/*
  Supported MLP optimization methods. The methods are simplex
  (Nelder-Mead), Gilbert & Nocedal cgfam() (CG+), gradient descent,
  WNLIB and NR.
*/
#define MLP_OPT_CGPLUS            1
#define MLP_OPT_GRADIENT_DESCENT  2
#define MLP_OPT_WNLIB             3
#define MLP_OPT_SIMPLEX           5

/*
  User-friendly names of the above.
*/
#define MLP_STR_CGPLUS            "CG+ conjugate gradient"
#define MLP_STR_GRADIENT_DESCENT  "gradient descent"
#define MLP_STR_WNLIB             "WNLIB gradient descent"
#define MLP_STR_UNSPECIFIED       "unspecified"

#include <stdio.h>
#include "pcp.h"

/*

  Struct mlp describes a multi-layer perceptron neural network with
  'd' inputs, 'nlayers' layers and 'npl[i]' nodes in layer i, and
  weights 'w'. 'nlayers' counts hidden layers and output layer, so for
  a network with two hidden layers 'nlayers' is 3. 'wlen' is total
  number of weights in the network. 'nodes' is total number of
  nodes. It counts hidden nodes and output nodes. 'a' is vector of
  node activations. 'z' is vector of node outputs. 'delta' is vector
  of errors as defined in Bishop, equation (4.32). The length of 'a',
  'z' and 'delta' equals 'nodes'. 'noff' is offset of the first
  (left-most) output node in the arrays.

  'seed' is seed for pseudo-random number generator rand() used to
  initialize the neural network weights. 'range' is amplitude of
  initial pseudo-random weights (i.e., the initial weights are
  assigned pseudo-randomly within [-range, range]). 'iterations' is
  the number of iterations so far in the current training
  session. 'p_iter' is the number of iterations previously applied to
  this perceptron.  'itmax' is maximum number of iterations allowed in
  the current training session. 'error' is average output deviation
  per node and per input sample (MSE). 'mce' is number of
  misclassified samples. 'fname' is network output file name.

  The nodes are numbered from 0, starting from left. So, for a
  two-layer network the nodes in first hidden layer are numbered 0 to
  npl[0]-1. Output nodes are numbered npl[0] to npl[0]+npl[1]-1. In
  general, the number of output nodes equals npl[nl-1], where nl is
  the number of layers.

  Weights numbered 0 to d go from bias value (w[0]) and the d inputs
  (w[1] to w[d]) to node 0. Weights numbered d+1 to 2*d-1 go from the
  bias value to node 1, etc. Thus, weights going into a node are
  sequentially numbered.  

  'method' is optimization method. 
*/
struct mlp 
{
  int   d;
  int   nlayers;
  int   *npl;
  float *w;
  int   nodes;
  int   wlen;
  float *a;
  float *z;
  float *delta;
  int   noff;
  int   method;
  float range;
  int   iterations;
  int   p_iter;
  int   itmax;
  float error;
  int   mce;
  char  *fname;
  unsigned int seed;
};

/*
  This structure is used to pass MLP-specific parameters to adaboost()
  and bagging().
*/
struct mlp_options
{
  int   nlayers;
  int   *npl;
  int   itmax;
  float range;
  float opt_method;
  float eta;
  float mu;
  float alpha;
};

/*
  Free mlp struct. Returns (struct mlp *) 0.
*/
struct mlp *mlp_free(struct mlp *perceptron);

/*
  Return deep copy of 'perceptron'.
*/
struct mlp *mlp_clone(struct mlp *perceptron);

/*
  Calculate outputs for MLP defined in 'perceptron' for a given input
  'x'. The function allocates space for the returned vector - it is
  the caller's responsiblity to free() it.
*/
float *mlp_output(struct mlp *perceptron, float *x);

/*
  Generate target outputs for MLP, given input categorized in 'nc'
  classes with 'nd[i]' samples in class i.

  In case of malloc() failure, return NULL and set errno.
*/
float **mlp_target(int nc, int *nd);

/*
  The function calculates the value of error function for the
  perceptron described by 'perceptron' at weight vector
  'w[1..wlen]'. 'w' is 1-based, as prescribed by Numerical Recipes
  optimization routines API. 'nsamples' is the number of input samples
  in 'x', 't' are target output values.  'nd[i]' is cardinality of
  class 'i'.

  The function is specific to NR optimizer. The other optimizers use
  mlp_function().
*/
float mlp_criterion(float *w, struct mlp *perceptron, float **x, int nsamples,
		    int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg);

/*
  The function sets the derivative vector 'dw' of MLP criterion
  function for MLP defined by 'perceptron' and 'w'.
*/
void mlp_derivative(float *w, float *dw, struct mlp *perceptron, float **x,
		    int nsamples, float **t, int *errc, FILE *outdev, 
		    FILE *fdbg);

/*
  Optimize weights of an MLP neural network using up to 'itmax'
  iterations of 'opt_method' optimization method.  'x' are 'nsamples'
  input vectors of length 'd' with 'nd[i]' samples per class 'i'. 't'
  are 'nsamples' target output vectors. 'eta' and 'mu' are the step
  size and momentum parameters for gradient descent optimization
  (therefore used only if 'opt_method' is
  MLP_OPT_GRADIENT_DESCENT). 'mlp_continue' is 0 to begin learning, 1
  to continue starting with network in 'fname'. The function
  calculates the set of weights which optimize the square error and
  stores them in 'fname'. It periodically reports results on 'outdev'.

  The possible values for opt_method are defined in mlp.h.

  In case of success, the function returns 'mlp' structure which
  contains the trained neural network and sets 'errc' to 0. In case of
  error, return NULL and set 'errc', except for LERR_LNSEARCH (line
  search failure in optimization routine) and LERR_DESCENT
  (optimization procedure unable to find descent direction) errors. In
  those cases, the function returns the trained network, but sets
  'errc' to LERR_LNSEARCH or LERR_DESCENT. This is done because
  optimization function often reports these errors, and yet finds a
  decent minimum. The caller may choose to ignore these two error
  codes.
*/
struct mlp *mlp_learn(int opt_method, float **x, int nsamples, int *nd, int d, 
		      float **t, int nlayers, int *npl, int itmax, float range, 
		      float eta, float mu, FILE *outdev, int mlp_continue, 
		      char *fname, unsigned int seed, int *errc, FILE *fdbg);

/*
  Optimize weights of an MLP neural network using modified
  Polak-Ribiere conjugate gradient method. 't' are 'dset->nv' target
  output vectors.  The function calculates the set of weights which
  optimize the square error and stores them in mlp->w, and saves them
  in mlp->fname. It periodically reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
int mlp_optimize(struct dataset *dset, float **t, struct mlp *perceptron,
		 FILE *outdev, int *errc, FILE *fdbg);

/*
  Optimize weights of an MLP neural network using gradient descent
  algorithm defined by step size 'eta' and using momentum term defined
  by 'mu'. 't' are 'dset->nv' target output vectors.  The function
  calculates the set of weights which optimize the square error and
  stores them in mlp->w, and saves them in mlp->fname. It periodically
  reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
int mlp_optimize_gradient_descent(struct dataset *dset, float **t, struct mlp *perceptron,
				  float eta, float mu, FILE *outdev, int *errc, FILE *fdbg);

/*
  Optimize weights of an MLP neural network using modified
  Polak-Ribiere conjugate gradient method. 't' are 'dset->nv' target
  output vectors.  The function calculates the set of weights which
  optimize the square error and stores them in mlp->w, and saves them
  in mlp->fname. It periodically reports results on 'outdev'.

  In case of success, return 0, otherwise return -1 and set error code
  in 'errc'.
*/
int mlp_optimize(struct dataset *dset, float **t, struct mlp *perceptron,
		 FILE *outdev, int *errc, FILE *fdbg);

/*
  Save multi-layer perceptron model described by 'perceptron' in
  perceptron->fname. The model is written (mode == MLP_MODE_WRITE) or
  appended (mode == MLP_MODE_APPEND) to the perceptron->fname file.
  If mode == MLP_MODE_APPEND, prepend 'index'/'weight' to the model
  saved, otherwise prepend the default values (1/1.0).

  In case of success, return 0. In case of failure, return -1 and set
  errno.  
*/
int mlp_save(struct mlp *perceptron, int mode, int index, float weight);

/*
  File-pointer-based version of mlp_save().

  In case of success, return 0. In case of failure, return -1 and set
  errno.  
*/
int mlp_write(FILE *fptr, struct mlp *perceptron, int mode, int index, float weight);

typedef float (*mlp_func)(float [], struct mlp *, float **, int, int *, float **,
			  int *, FILE *, FILE *);

typedef void (*mlp_dfunc)(float [], float [], struct mlp *, float **, int, 
			  float **, int *, FILE *, FILE *);

typedef float (*mlp_f1dim)(float, struct mlp *, float **, int, int *, float **,
			   int *, FILE *, FILE *);

typedef float (*mlp_df1dim)(float, struct mlp *, float **, int, int *, float **,
			    int *, FILE *, FILE *);

void frprmn(float p[], int n, float ftol, int *iter, float *fret, mlp_func func,
	    mlp_dfunc dfunc, struct mlp *perceptron, float **x, int nsamples, 
	    int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg);

void linmin(float p[], float xi[], int n, float *fret, mlp_func func,
	    struct mlp *perceptron, float **x, int nsamples, int *nd, float **t,
	    int *errc, FILE *outdev, FILE *fdbg);

float brent(float ax, float bx, float cx, mlp_f1dim f, float tol, float *xmin,
	    struct mlp *perceptron, float **_x, int nsamples, int *nd, float **t,
	    int *errc, FILE *outdev, FILE *fdbg);

float f1dim(float x, struct mlp *perceptron, float **_x, int nsamples, int *nd, 
	    float **t, int *errc, FILE *outdev, FILE *fdbg);

void mnbrak(float *ax, float *bx, float *cx, float *fa, float *fb,
	    float *fc, mlp_f1dim func, struct mlp *perceptron, float **x,
	    int nsamples, int *nd, float **t, int *errc, FILE *outdev,
	    FILE *fdbg);

void dlinmin(float p[], float xi[], int n, float *fret, mlp_func func, mlp_dfunc dfunc,
	     struct mlp *perceptron, float **x, int nsamples, int *nd, float **t,
	     int *errc, FILE *outdev, FILE *fdbg);

float dbrent(float ax, float bx, float cx, mlp_f1dim f, mlp_df1dim df,
	     float tol, float *xmin, struct mlp *perceptron, float **_x, 
	     int nsamples, int *nd, float **t, int *errc, FILE *outdev, FILE *fdbg);

float df1dim(float x, struct mlp *perceptron, float **_x, int nsamples, int *nd, 
	     float **t, int *errc, FILE *outdev, FILE *fdbg);

/*
  Classify dataset 'dset' using MLP defined in 'perceptrons'. The MLP
  outputs are placed in the returned struct mlp, and the
  classification results are placed in dset->cx. The function assigns
  each vector to the class corresponding to the node with the largest
  output, unless 'thd' is > 0. In that case, the assignment is
  performed only if the largest node output exceeds 'thd'.

  'perceptrons' is an array of 'struct mlp' objects. It can be
  created, for example, from a file using calling mlp_load().

  In case of error, return NULL and place error code in 'errc'.
*/
struct mlp *mlp_predict(struct dataset *dset, struct mlp **perceptrons, float thd, int *errc);

/*
  Load one or more multi-layer perceptrons described in 'fname' into
  (struct mlp **) array. The last element in the array is NULL.  In
  case of success, returns 0. In case of failure, returns -1 and sets
  errno.
*/
struct mlp **mlp_load(char *fname);

#endif /* _MLP_H_ */