/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include "lau.h"
#include "lmat.h"
#include "pcl_svm.h"

static struct svm_model **free_models(struct svm_model **models, int nmodels)
{
  int j;

  for (j = 0; j < nmodels; j++)
    free(models[j]);
  return (struct svm_model **) 0;
}

/*
  Load SVM models from 'fname', potentially in Adaboost format. Return
  number of models in 'nmodels' and their respective weights in
  'weights'.

  In case of error, return NULL and set errno. Possible errors are
  malloc() and file access errors.
*/
struct svm_model **pcl_svm_load(char *svm_fname, int *nmodels, float **weights)
{
  int   nl;
  int   llen;
  int   nmd;
  int   status;
  int   adaboost_file;
  int   done;
  int   capacity;
  char  *temp_fname;
  char  *line;
  char  *str;
  float *w;
  char  **tokens;
  struct svm_model **models = (struct svm_model **) 0;
  struct svm_model *model;
  FILE *fptr;
  FILE *temp_fptr;

  nmd = 0;
  temp_fname = (char *) 0;
  temp_fptr = (FILE *) 0;
  /*
    Check if this is an Adaboost model file.
  */
  fptr = fopen(svm_fname, "r");
  if (fptr)
    {
      nl = file_info(svm_fname, &llen, (int *) 0, '\0');
      if (nl >= 0)
	{
	  line = malloc((llen+2)*sizeof(char));
	  if (line)
	    {
	      adaboost_file = 0;
	      fgets(line, llen+2, fptr);
	      if (strstr(line, "combined"))
		adaboost_file = 1;
	      fclose(fptr);
	      capacity = INIT_NO_MODELS;
	      /*
		Read the file.
	      */
	      w = malloc(capacity*sizeof(float));
	      if (w)
		{
		  fptr = fopen(svm_fname, "r");
		  if (fptr)
		    {
		      models = malloc(capacity*sizeof(struct svm_model *));
		      if (models)
			{
			  status = 0;
			  w[0] = 1.0;
			  nmd = 0;
			  done = 0;
			  temp_fname = malloc((L_tmpnam+1)*sizeof(char));
			  if (temp_fname)
			    {
			      tmpnam(temp_fname);
			      temp_fptr = fopen(temp_fname, "w");
			      if (!temp_fptr)
				{
				  status = -1;
				  string_free(temp_fname);
				}
			    }
			  else
			    status = -1;
			  while ((done == 0) && (status == 0))
			    {
			      str = fgets(line, llen+2, fptr);
			      if (!str || (*line == '\n'))
				{
				  fclose(temp_fptr);
				  if (!str)
				    done = 1;
				  if ((adaboost_file == 0) || ((adaboost_file == 1) && (done == 0)))
				    {
				      model = svm_load_model(temp_fname);
				      if (model == (struct svm_model *) 0)
					{
					  models = free_models(models, nmd);
					  status = -1;
					}
				      else
					models[nmd++] = model;
				      if (adaboost_file == 1)
					temp_fptr = fopen(temp_fname, "w");
				      if (nmd >= capacity)
					{
					  capacity += capacity;
					  models = realloc(models, capacity*sizeof(struct svm_model *));
					  w = realloc(w, capacity*sizeof(float));
					}
				    }
				}
			      else if (strstr(line, "combined"))
				{
				  tokens = str_tokenize(line, " ");
				  w[nmd] = atof(tokens[6]);
				  str_free(tokens);
				}
			      else
				fprintf(temp_fptr, line);
			    }
			  if (status == 0)
			    *weights = w;
			  *nmodels = nmd;
			}
		      fclose(fptr);
		      status = remove(temp_fname);
		      if (status == -1)
			models = free_models(models, nmd);
		    }
		  *weights = w;
		}
	    }
	  string_free(line);
	}
    }
  return models;
}

/*
  Save 'models' in 'svm_fname'. 

  Return 0 in case of success, -1 in case of error and set errno. The
  possible errors are file access errors or EINVAL. errno is set to
  EINVAL if svm_fname is NULL, svm_model is NULL, or nmodels <= 0.
*/
int pcl_svm_save(char *svm_fname, struct svm_model **models, int nmodels)
{
  int  status;
  int  i;
  FILE *fptr;
	      
  status = 0;
  if (!models || (nmodels <= 0) || !svm_fname)
    status = -1;
  else
    {
      fptr = fopen(svm_fname, "w");
      if (fptr)
	{
	  if (nmodels > 1)
	    {
	      for (i = 0; (i < nmodels) && !status; i++)
		status = save_svm(fptr, models[i], i+1, 1.0);
	    }
	  else
	    status = svm_save_model(svm_fname, models[0]);
	  if (!status)
	    status = fclose(fptr);
	}
      else
	status = -1;
    }
  return status;
}

static struct svm_parameter *clone_parameters(struct pcl_svm_parameters *parameters)
{
  struct svm_parameter *svm_parameters;
  
  svm_parameters = calloc(1, sizeof(struct svm_parameter));
  svm_parameters->svm_type = C_SVC;
  svm_parameters->cache_size = 40;
  svm_parameters->eps = 0.001;
  svm_parameters->shrinking = 1;

  svm_parameters->kernel_type = parameters->kernel_type;
  svm_parameters->degree = parameters->degree;
  svm_parameters->gamma = parameters->gamma;
  svm_parameters->coef0 = parameters->coef0;
  svm_parameters->C = parameters->C;
  return svm_parameters;
}

/*
  Train a committee of 'nmodels' Support Vector Machines using 'dset'
  and 'parameters'.

  Return the combined model represented by an array of (struct
  svm_model *) structures. The array can be used for prediction with
  pcl_svm_predict().

  In case of error, return NULL.
*/
struct svm_model **pcl_svm_learn(struct dataset *dset, int nmodels, 
				 struct pcl_svm_parameters *parameters, int *errc)
{
  int    i;
  int    nvec;
  int    status;
  int    bag_size;
  int    *bnd;
  float  **bag = (float **) 0;
  struct svm_model *model; /* individual model */
  /* array of individual models, comprising the bagging classifier */
  struct svm_model **models = (struct svm_model **) 0; 
  struct dataset *bag_set;
  struct svm_problem *problem;
  struct svm_problem **probs;
  struct svm_parameter *svm_parameters;
  FILE   *fdbg;

  fdbg = (FILE *) 0;
  /*
    NOTE: In November 2002, decided to fix bag_size to number of
    vectors in the training dataset. Perhaps make it a parameter.
  */
  bag_size = dset->nv;
  probs = malloc(nmodels*sizeof(struct svm_problem *));
  bag = malloc(bag_size*sizeof(float *));
  bnd = calloc(dset->c, sizeof(int));
  status = 0;
  if (status == 0)
    models = calloc(nmodels+1, sizeof((void **) 0));
  svm_parameters = clone_parameters(parameters);
  for (i = 0; (i < nmodels) && (status == 0); i++)
    {
      /*
	By design, we don't perform resampling if only one model is
	requested.
      */
      if (nmodels == 1)
	{
	  nvec = dset->nv;
	  ivec_copy(bnd, dset->nd, dset->c);
	  bag = dset->x;
	}
      else
	nvec = resample(i, dset, bag_size, bag, bnd, fdbg);
      bag_set = dataset_lt(dset->d, dset->c, bnd, nvec, (char **) 0, bag);
      problem = create_problem(bag_set); 
      if (!problem)
	status = -1;
      else
	{
	  free(bag_set);
	  probs[i] = problem;
	  model = svm_train(problem, svm_parameters);
	  models[i] = model;
	}
    }
  if (status == -1)
    {
      free(models);
      models = (struct svm_model **) 0;
      *errc = errno;
    }
  free(bag);
  free(bnd);
  free(svm_parameters);
  return models;
}

/*
  Check Support Vector Machine 'parameters' for 'dset'.

  Return NULL in case of success; otherwise return string containing
  the error message.
*/
char *pcl_svm_check(struct dataset *dset, struct pcl_svm_parameters *parameters)
{
  char *pcheck;
  struct svm_problem *problem;
  struct svm_parameter *svm_parameters;

  pcheck = (char *) 0;
  if (dset && parameters)
    {
      problem = create_problem(dset); 
      svm_parameters = clone_parameters(parameters);
      pcheck = (char *) svm_check_parameter(problem, svm_parameters);
      free(svm_parameters);
      free_problem(problem);
      /*
	Additional checks.
      */
      if (!pcheck)
	{
	  if ((svm_parameters->kernel_type == POLY) &&
	      (svm_parameters->degree < 0))
	    pcheck = strdup("negative degree for POLY kernel");
	}
    }
  return pcheck;
}

/*
  Classify 'dset' using a committee of 'nmodels' Support Vector
  Machine classifiers stored in 'models'. Predicted classes are stored
  in dset->prediction.

  The input models can be built using pcl_svm_learn().
*/
void pcl_svm_predict(struct dataset *dset, struct svm_model **models, int nmodels,
		     float *weights, int *errc)
{
  int i;
  int j;
  int predicted_class;
  float *predictions; 
  struct svm_node *svm_vector;
  struct svm_model *model;

  predictions = calloc(dset->c, sizeof(float));
  dset->prediction = malloc(dset->nv*sizeof(int));
  for (i = 0; i < dset->nv; i++)
    {
      svm_vector = create_svm_vector(dset->x[i], dset->d);
      fvec_set(predictions, dset->c, 0.0);
      for (j = 0; j < nmodels; j++)
	{
	  model = models[j];
	  predicted_class = svm_predict(model, svm_vector)-1;
	  predictions[predicted_class] += weights[j];
	}
      predicted_class = fvec_argmax(predictions, dset->c);
      free(svm_vector);
      dset->prediction[i] = predicted_class;
    }
}

/*
  Build a SVM model (if 'nmodels' is > 1, build a bagging model) using
  'training_dset' and 'parameters', and test it using 'test_dset'. If
  'normalize' is 1, normalize the training set to zero mean/one
  standard deviation. If 'dr_method' is not NONE, reduce the dimension
  to 'idr' before learning.

  The function returns array of false-negative counts per class, for
  the 'test_dset'. Return NULL in case of error, and set error code in
  'errc'. Possible error codes are EINVAL and memory allocation error
  codes.
*/
static int *pcl_xlearn(struct dataset *training_dset, struct dataset *test_dset, 
		       void *parameters, int nmodels, int normalize, int dr_method,
		       int idr, int *errc)
{
  int   i;
  int   j;
  int   c;
  int   tcl;
  int   tcum;
  int   sdr;
  int   predicted_class;
  int   *fnc;
  float *combined_output;
  struct svm_parameters *svm_parameters;
  struct svm_node *svm_vector;
  struct svm_model **models;

  fnc = (int *) 0;
  if (training_dset && test_dset)
    {
      c = training_dset->c;
      sdr = training_dset->d;
      *errc = 0;
#if 0
      test_set = preprocess(&training_set, testing_set, dr_method, idr, &sdr,
			    select_dist, normalize, errc);
#endif
      if (test_dset)
	{
	  svm_parameters = (struct svm_parameters *) parameters;
	  models = pcl_svm_learn(training_dset, nmodels, parameters, errc);
	  /*
	    Classify using the built models.
	  */
	  if (models)
	    {
	      fnc = calloc(c, sizeof(int));
	      combined_output = malloc(c*sizeof(float));
	      tcl = 0; /* correct class of vector i */
	      tcum = test_dset->nd[0]; /* cumulative cardinality of classes 0..tcl */
	      for (i = 0; i < test_dset->nv; i++)
		{
		  /*
		    For each test set vector, generate predictions for all
		    models. combined_output[n] has weighted number of votes
		    for class n.
		  */
		  if (i == tcum)
		    tcum += test_dset->nd[++tcl];
		  fvec_set(combined_output, c, 0.0);
		  svm_vector = create_svm_vector(test_dset->x[i], sdr);
		  for (j = 0; j < nmodels; j++)
		    {
		      predicted_class = svm_predict(models[j], svm_vector)-1;
		      combined_output[predicted_class] += 1.0;
		    }
		  predicted_class = fvec_argmax(combined_output, c);
		  if (predicted_class != tcl)
		    fnc[tcl]++;
		  free(svm_vector);
		}
	      free(combined_output);
	    }
	}
#if 0
      else
	*errc = PERR_DR;
#endif
    }
  else
    {
      if (errc)
	*errc = EINVAL;
    }
  return fnc;
}

/*
  Perform cross-validation results for Support Vector Machine on
  'dset'.  The function calculates FN (false negative) counts per
  class/experiment.

  If 'normalize' is 1, normalize the data sets before applying the
  learning method.

  Optionally reduce dimension of the training/testing data sets to
  'idr' using 'dr_method'.

  'nmodels' is the number of models for bagging/Adaboost methods.

  In case of success, the function returns the array of false negative
  counts per experiment and class and experiment. In case of failure,
  it returns -1 and sets 'errc'.
*/
int **pcl_svm_xlearn(struct dataset *dset, int dr_method, int idr, int normalize,
		     int nexp, unsigned int seed, int nmodels, 
		     struct pcl_svm_parameters *parameters, int *errc)
{
  int j;
  int status;
  int ex;
  int nxval;
  int idx;
  int *fnc; /* number of false negatives per class */
  int **fn; /* number of false negatives per class/experiment */
  struct dataset *training_dset;
  struct dataset *test_dset;
  struct subset  **xset;
  
  status = 0;
  srand(seed);
  nxval = 10; /* TBD: fixed; make an argument? */
  fn = imx_alloc(nexp, dset->c);
  if (fn)
    {
      imx_set(fn, nexp, dset->c, 0);
      for (ex = 0; (ex < nexp) && !status; ex++)
	{
	  xset = dataset_partition(dset, nxval);
	  if (xset)
	    {
	      for (idx = 0; idx < nxval && !status; idx++)
		{
		  training_dset = dataset_subset(dset, xset[idx], 1);
		  test_dset = dataset_subset(dset, xset[idx], 0);
		  fnc = pcl_xlearn(training_dset, test_dset, parameters, nmodels, 
				   normalize, dr_method, idr, errc);
		  if (fnc)
		    {
		      for (j = 0; j < dset->c; j++)
			fn[ex][j] += fnc[j];
		      free(fnc);
		    }
		}
	    }
	  else
	    {
	      status = -1;
	      /* TBD: free */
	    }
	}
    }
  return fn;
}

/*
  Create LIBSVM representation of feature vector 'x' of length 'd'.
*/
struct svm_node *create_svm_vector(float *x, int d)
{
  int i;
  struct svm_node *svm_vector = (struct svm_node *) 0;
  
  if (x && (d > 0))
    {
      svm_vector = malloc((d+1)*sizeof(struct svm_node));
      if (svm_vector)
	{
	  for (i = 0; i < d; i++)
	    {
	      svm_vector[i].index = i+1;
	      svm_vector[i].value = x[i];
	    }
	  svm_vector[d].index = -1;
	}
    }
  return svm_vector;
}

/*
  Convert 'dset' to LIBSVM-compatible structure. Don't use the
  'sparse' representation. See LIBSVM README file for more information
  on the 'svm_node' structure.
*/
struct svm_problem *create_problem(struct dataset *dset)
{
  int    i;
  int    j;
  int    cdx;
  int    offset;
  int    status = 0;
  double *y;
  struct svm_node **x = (struct svm_node **) 0;
  struct svm_problem *problem;

  problem = calloc(1, sizeof(struct svm_problem));
  if (problem)
    {
      y = malloc(dset->nv*sizeof(double));
      if (y)
	{
	  cdx = 1;
	  offset = dset->nd[0];
	  for (i = 0; i < dset->nv; i++)
	    {
	      if (i == offset)
		offset += dset->nd[cdx++];
	      y[i] = cdx;
	    }
	  problem->y = y;
	  problem->l = dset->nv;
	  x = malloc(dset->nv*sizeof(struct svm_node *));
	  if (x)
	    {
	      for (i = 0; (i < dset->nv) && (status == 0); i++) 

		{
		  x[i] = create_svm_vector(dset->x[i], dset->d);
		  if (!x[i])
		    {
		      for (j = 0; j < i; j++)
			free(x[j]);
		      free(x);
		      free(problem->y);
		      free(problem);
		      problem = (struct svm_problem *) 0;
		      status = -1;
		    }
		}
	      if (status == 0)
		problem->x = x;
	    }
	  else
	    {
	      free(problem->y);
	      free(problem);
	      problem = (struct svm_problem *) 0;
	    }
	}
      else
	{
	  free(problem);
	  problem = (struct svm_problem *) 0;
	}
    }
  return problem;
}

/*
  Convert 'dset' to LIBSVM-compatible two-class structure. The first
  class is `cx', the second class is everything else. `cx' is 0-based
  index.

  Don't use the 'sparse' representation. See LIBSVM README file for
  more information on the 'svm_node' structure.

  TBD-indet: placeholder, to be implemented.
*/
struct svm_problem *create_binary_problem(struct dataset *dset, int cx)
{
  int    i;
  int    j;
  int    cdx;
  int    offset;
  int    status = 0;
  double *y;
  struct svm_node **x = (struct svm_node **) 0;
  struct svm_problem *problem;

  problem = calloc(1, sizeof(struct svm_problem));
  if (problem)
    {
      y = malloc(dset->nv*sizeof(double));
      if (y)
	{
	  cdx = 1;
	  offset = dset->nd[0];
	  for (i = 0; i < dset->nv; i++)
	    {
	      if (i == offset)
		offset += dset->nd[cdx++];
	      y[i] = cdx;
	    }
	  problem->y = y;
	  problem->l = dset->nv;
	  x = malloc(dset->nv*sizeof(struct svm_node *));
	  if (x)
	    {
	      for (i = 0; (i < dset->nv) && (status == 0); i++) 

		{
		  x[i] = create_svm_vector(dset->x[i], dset->d);
		  if (!x[i])
		    {
		      for (j = 0; j < i; j++)
			free(x[j]);
		      free(x);
		      free(problem->y);
		      free(problem);
		      problem = (struct svm_problem *) 0;
		      status = -1;
		    }
		}
	      if (status == 0)
		problem->x = x;
	    }
	  else
	    {
	      free(problem->y);
	      free(problem);
	      problem = (struct svm_problem *) 0;
	    }
	}
      else
	{
	  free(problem);
	  problem = (struct svm_problem *) 0;
	}
    }
  return problem;
}

/*
  Free 'problem'. 
*/
void free_problem(struct svm_problem *problem)
{
  int i;

  if (problem)
    {
      free(problem->y);
      for (i = 0; i < problem->l; i++)
	free(problem->x[i]);
      free(problem->x);
      free(problem);
    }
}

/*
  This function is used to append an individual SVM model to a file
  containing a SVM bagging (committee) classifier. Each model in the
  file has an index and weight. The function appends 'model', whose
  model index number is 'mdx' and weight 'weight', to 'fptr'.

  In case of success, return 0. In case of failure, return -1 and set
  errno.
*/
int save_svm(FILE *fptr, void *model, int mdx, float weight)
{
  int  status;
  int  errno_save;
  char *temp_fname;

  status = -1;
  temp_fname = malloc((L_tmpnam+1)*sizeof(char));
  if (temp_fname)
    {
      if (tmpnam(temp_fname))
	{
	  if ((status = svm_save_model(temp_fname, model)) == 0)
	    {
	      fprintf(fptr, "combined model %d / combined weight: %g\n", 
		      mdx, weight);
	      if ((status = fcat(fptr, temp_fname)) == 0)
		{
		  fprintf(fptr, "\n");
		  remove(temp_fname);
		}
	    }
	}
      errno_save = errno;
      free(temp_fname);
      errno = errno_save;
    }
  return status;
}



syntax highlighted by Code2HTML, v. 0.9.1