/*
  Module name: lau.h
  Created by: Ljubomir Buturovic
  Created: 05/12/2001
  Purpose: declarations and macros for utilities in lau.c.
*/

/*
  Copyright 2004 Ljubomir J. Buturovic

  Permission is hereby granted, free of charge, to any person
  obtaining a copy of this software and associated documentation files
  (the "Software"), to deal in the Software without restriction,
  including without limitation the rights to use, copy, modify, merge,
  publish, distribute, sublicense, and/or sell copies of the Software,
  and to permit persons to whom the Software is furnished to do so,
  subject to the following conditions:

  The above copyright notice and this permission notice shall be
  included in all copies or substantial portions of the Software.

  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  SOFTWARE.
*/

#include <stdio.h>

#define INIT_LENGTH            100
#define WHITESPACE             " \t\r\n"

/*
  File formats for function load_file(). LAU_FF_RAW is tab-delimited
  lines of numbers; LAU_FF_ROW assumes first column is an arbitrary
  string (normally the row ID); LAU_FF_COL means first row has column
  names; LAU_FF_COLROW means first row has column names, first colum
  are row IDs.
*/

#define LAU_FF_RAW             0
#define LAU_FF_ROW             1
#define LAU_FF_COL             2
#define LAU_FF_COLROW          3

/*
  Extract tokens (separated by characters in 'delimiter' string) from
  'str'. Return NULL-terminated array of tokens.
  
  The function allocates the space for the returned array. It is the
  caller's responsibility to free it.
*/
char **str_tokenize(char *str, char *delimiter);

/*
  Remove blanks from end of 'str'.
*/
void str_trim(char *str);

/*
  Return 'str' extended enough to accomodate
  length+added_length. 'capacity' is currently allocated space for
  'str', excluding the null terminator; 'current_length' is the current
  length of the string; 'added_length' is the number of characters to be
  concatenated to the string. If 'capacity' is sufficient to accomodate
  'current_length' plus 'added_length', the function returns unmodified
  'str'. Otherwise the space is reallocated to accomodate the desired
  length.
  
  Typical usage:

  str = str_extend(str, &capacity, current_length, added_length);

  In case of realloc() error, return NULL and set errno.
*/
char *str_extend(char *str, int *capacity, int current_length, int added_length);

/*
  Return clone of NULL-terminated array `nta'. 

  Return NULL and set errno in case of failure.
 */
char **nta_clone(char **nta);

/*
  Return length of NULL-terminated (char **) array 'nta'.
*/
int str_length(char **nta);

/*
  Free NULL-terminated (char **) array 'nta'. The function preserves
  errno.
*/
void str_free(char **nta);

/*
  Create string of length 'len' populated with 'ch'.
*/
char *str_create(int len, char ch);

/*
  Insert strings in 'str' before character at position 'pos' ('pos' is
  0-based). The function has variable number of arguments. 'nargs' is
  the number of arguments following 'nargs'. The following arguments
  are 'str' (char *) and 'pos' (int), followed by the insert strings.

  The function returns the composite string.

  For example, if the function is called as 

  str_insert(4, "abc.dat", 3, "_d", "_e");

  it returns "abc_d_e.dat".

  If nargs is less than 3, or 'pos' is negative, or any of the insert
  strings is NULL, the function returns NULL and sets errno to
  EINVAL. In case of malloc() error, the function returns NULL and
  sets errno.
*/
char *str_insert(int nargs, ...);

/*
  Free non-NULL-terminated (char **) array 'str' of length
  'length'. The function does not change the value of `errno'.
*/
void strlen_free(char **str, int length);

/*
  Clone non-NULL-terminated (char **) array 'str' of length 'length'.

  Return NULL in case of malloc() failure and set errno.
*/
char **str_clone(char **str, int length);

/*
  Copy integer vector 'src' into 'dest'.
*/
void ivec_copy(int *dest, int *src, int len);

/*
  Copy floating point vector 'src' into 'dist'.
*/
void fvec_copy(float *dist, float *src, int len);

/*
  Return 'double' clone (duplicate) of float vector 'vec'.
*/
double *double_clone(float *vec, int len);

/*
  Return 'double' clone (duplicate) of double vector 'vec'.
*/
double *dvec_clone(double *vec, int len);

/*
  Return clone (duplicate) of float vector 'vec'.
*/
float *fvec_clone(float *vec, int len);

/*
  Return 'len'-long 'index' subset of 'vector' of length 'vlen'. In
  case of improper arguments, or if 'index' points outside of
  'vector', return NULL.

  In case of malloc() error, return NULL and set errno.
*/
float *fvec_subset(float *vector, int vlen, int *index, int len);

/*
  Return matrix whose rows are fvec_subset() of rows in 'mx'. In case
  of improper arguments, or if 'index' points outside of 'columns',
  return NULL.

  In case of malloc() error, return NULL and set errno.

  TBD: untested function.
*/
float **fmx_subset(float **mx, int rows, int columns, int *index, int len);

/*
  Return sum of elements in 'vec'.
*/
int ivec_sum(int *vec, int len);

/*
  Set elements of 'vec' to 'value'.
*/
void ivec_set(int *vec, int len, int value);

/*
  Return clone (duplicate) of integer vector 'ivec'.
*/
int *ivec_clone(int *ivec, int len);

/*
  Return pseudo-random integer in [min, max] range (i.e., including
  'min' and 'max').
*/
int rand_int(int min, int max);

/*
  Return minimum element in 'vec'.
*/
int ivec_min(int *vec, int len);

/*
  Return max. element in 'vec'.
*/
int ivec_max(int *vec, int len);

/*
  Return index of max. element in 'vec'. In case of ties, return the
  lowest index. Return -1 for empty vector.
*/
int ivec_argmax(int *vec, int len);

/*
  Return index of max. element in 'vec'. Resolve ties pseudo-randomly,
  using rand_int(). Return -1 for empty vector. In case of malloc()
  failure, return -1 and set errno.
*/
int ivec_rand_argmax(int *vec, int len);

/*
  Allocate floating point matrix 'rows' by 'columns'. Return (float
  **) 0 in case of malloc() error and set errno.
*/
float **fmx_alloc(int rows, int columns);

/*
  Allocate integer matrix 'rows' by 'columns'. Return NULL in case of
  malloc() error and set errno.
*/
int **imx_alloc(int rows, int columns);

/*
  Set 'fmx' to 'value'.
*/
void fmx_set(float **fmx, int rows, int columns, float value);

/*
  Set 'imx' to 'value'.
*/
void imx_set(int **imx, int rows, int columns, int value);

/*
  Clone floating point matrix 'fmx', with dimensions'rows' by
  'columns'. Return (float **) 0 in case of malloc() error and set
  errno.
*/
float **fmx_clone(float **fmx, int rows, int columns);

/*
  Clone double matrix `dmx', with dimensions `rows' by
  `columns'. Return (double **) 0 in case of malloc() error and set
  errno.
*/
double **dmx_clone(double **matrix, int rows, int columns);

/*
  Normalize matrix 'fmx'. The normalization is: reduce to zero mean
  and divide each column by its' standard deviation. If the standard
  deviation is zero, just shift the columns.
*/
void fmx_norm(float **fmx, int rows, int columns);

/*
  Normalize matrix 'fmx' using precomputed 'xmean', 'std'. The
  normalization is: for each column, subtract 'xmean' and, if the
  column 'std' is non-zero, divide by 'std'.
*/
void fmx_prenorm(float **fmx, int rows, int columns, float *xmean, float *std);

/*
  Return normalized version of 'fmx'. The normalization is: reduce to
  zero mean and divide each column by its' standard deviation.

  Return (float **) 0 in case of malloc() error and set
  errno.
*/
float **fmx_normalize(float **fmx, int rows, int columns);

/*
  Return mean values of columns in 'fmx'.
*/
float *fmx_mean(float **fmx, int rows, int columns);

/*
  Return standard deviations of columns in 'fmx'.

  Return (float **) 0 in case of malloc() error and set
  errno.
*/
float *fmx_std(float **fmx, int rows, int columns);

/*
  Free matrix 'mx' with 'rows' rows. Return (void **) 0. This function
  ignores all free() errors.
*/
void **mx_free(void **mx, int rows);

/*
  Free 'vector'. Return (void *) 0. This function ignores all free()
  errors.
*/
void *vx_free(void *vector);

/*
  Multiply 'amatrix' with 'bmatrix'. 'amatrix' is 'arows' by
  'acolumns'. 'bmatrix' must be 'acolumns' by 'bdim'. The product is
  'arows' by 'bdim'.

  If 'tflag' is 1, multiply 'amatrix' with transpose of 'bmatrix'. In
  this case 'bmatrix' must be 'bdim' by 'acolumns' (meaning that the
  transpose is 'acolumns' by 'bdim'). The result is again 'arows' by
  'bdim'.

  The function allocates space for the product.

  In case of error, return (float **) 0 and set errno. The only
  possible errors are memory allocation errors.
*/
float **fmx_mult(float **amatrix, int arows, int acolumns, float **bmatrix, 
		 int bdim, int tflag);

/*
  Return number of lines in 'fname' and optionally additional file
  information.

  If 'llen' is not NULL, store the length of the longest line in
  *llen. If 'ntok' is not NULL, store number of tokens in each line in
  *ntok, and change semantics of return value: ignore blank
  (whitespace-only) lines. If lines have varying number of tokens,
  *ntok is set to -1. This behavior is consistent with MATLAB load()
  function.

  Tokens are assumed to be separated by a single 'delimiter'
  character. If 'delimiter' is null character ('\0'), the tokens are
  assumed to be separated by a string of WHITESPACE characters.

  The number of lines and max. line length are consistent with values
  returned by UNIX command 'wc' - in particular, llen does not count
  the newline character.

  Return -1 in case of failure and set errno.  
*/
int file_info(char *fname, int *llen, int *ntok, char delimiter);

/*
  Return max. length of a filename in the current filesystem (the
  current filesystem is the filesystem in which the current directory
  resides).

  In case of failure, return -1 and set errno.
*/
long get_namelen(void);

/*
  Return basic name (i.e., basename without suffix). For example, for
  path '/dir/fname.dat' bname() returns string 'fname'. The function
  allocates memory for the returned string, and it is the caller's
  responsibility to free() it.

  In case of error, return (char *) 0 and set errno.
*/
char *bname(char *path);

/*
  Safe version of basename() (i.e., it does not modify the input
  string).

  The difference from bname() is that this one includes the extension.

  In case of error, return (char *) 0 and set errno.
*/
char *base_name(char *path);

/*
  Allocate double floating point matrix 'rows' by 'columns'. Return
  (double **) 0 in case of malloc() error and set errno.
*/
double **dmx_alloc(int rows, int columns);

/*
  Allocate string matrix 'rows' by 'columns'. Each element of the
  matrix is (char *) and is initialized to (char *) 0. Return (char
  ***) 0 in case of malloc() error and set errno.
*/
char ***cmx_alloc(int rows, int columns);

/*
  Save 'matrix' ('transpose' == 0) or transpose of 'matrix'
  ('transpose' == 1) in 'fname'. Return -1 in case of error and set
  errno.
*/
int fmx_save(float **matrix, int rows, int columns, char *fname, int transpose);

/*
  Write 'matrix' ('transpose' == 0) or transpose of 'matrix'
  ('transpose' == 1) to 'fptr'. In case of success, return 0,
  otherwise return -1 and set errno.
*/
int fmx_write(FILE *fptr, float **matrix, int rows, int columns, int transpose);

/*
  Write 'matrix' to 'fptr'. Store 'xnames[i]' in the first column of
  i-th row.

  In case of success, return 0, otherwise return -1 and set errno.
*/
int fmx_nwrite(FILE *fptr, float **matrix, char **xnames, int rows, int columns);

/*
  Set elements of 'vec' to 'value'.
*/
void fvec_set(float *fvec, int len, float value);

/*
  Return sum of elements in 'vec'.
*/
float fvec_sum(float *vec, int len);

/*
  Load matrix from 'fname'. Skip lines which begin with 'skip'
  character.

  In case of error, return NULL and set errno.
*/
float **fmx_load(char *fname, int *rows, int *columns, char skip);

/*
  Return sum of elements in 'vec'.
*/
double dvec_sum(double *vec, int len);

/*
  Copy double vector 'src' into 'dest'.
*/
void dvec_copy(double *dest, double *src, int len);

/*
  Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1].

  Return -1 if 'r' is outside range.
*/
int dloc(double r, double *vector, int len);

/*
  Return string copy of 'fname'.

  Return -1 in case of error and set errno. errno can be stat(),
  open(), mmap() or malloc() error.
*/
char *str_file(char *fname);

/*
  free(str) without changing errno. Return (char *) 0.
*/
char *string_free(char *str);

/*
  Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1].

  Return -1 if 'r' is outside range.
*/
int floc(float r, float *vector, int len);

/*
  Return clone of array of strings of length 'length'.
*/
char **string_copy(char **str, int length);

/*
  Return clone of NULL-terminated 'str'.
*/
char **string_clone(char **str);

/*
  Like unlink(), but preserves errno.
*/
void remove_file(char *fname);

/*
  Like fclose(), preserving errno.
*/
void fptr_close(FILE *fptr);

/*
  Reverse elements in 'vec'.
*/
void ivec_reverse(int *vec, int len);

/*
  Calculate standard deviation of integer vector 'vec' of length
  'len'.
*/
float ivec_mean(int *vec, int len);

/*
  Calculate standard deviation of integer vector 'vec' of length
  'len'.
*/
float ivec_sd(int *vec, int len);

/*
  TBD: the following three functions untested.
*/

/*
  Calculate mean of vector 'vec' of length 'len'.
*/
float fvec_mean(float *vec, int len);

/*
  Calculate standard deviation of vector 'vec' of length 'len'.
*/
float fvec_sd(float *vec, int len);

/*
  Calculate covariance between 'len'-dimensional

  vectors 'vec1' and 'vec2' with mean values 'mean1' and 'mean2'.
*/
float fvec_cov(float *vec1, float *vec2, int len, float mean1, float mean2);

/*
  Reverse elements in 'vec'.
*/
void fvec_reverse(float *vec, int len);

/*
  Return min(int1, int2).
*/
int int_min(int int1, int int2);

/*
  Append 'fname' to 'fptr'. Return -1 in case of error and set errno.
*/
int fcat(FILE *fptr, char *fname);

/*
  In-place integer sorting function based on algorithm by
  R. Sedgewick. 

  BIG NOTE: 'vector' needs to have len+1 elements. The first len
  elements are the numbers to be sorted. The last element is used
  internally by sedgesort() function (which is called by ssort()) as a
  'sentinel'.
*/
void intsort(int *vector, int len);

/*
  Load 'nx' vectors from file 'fname' into 'x'. Each vector (line) is
  assumed to have 'd' elements, unless the file is empty. Matrix 'x'
  has to be preallocated sufficient space to hold the data.

  The function understands four formats. All formats contain
  whitespace-delimited rows of equal number of columns:

  LAU_FF_RAW         raw data, no row or column labels
  LAU_FF_COL         first row has column labels (names)
  LAU_FF_ROW         first column has row labels
  LAU_FF_COLROW      first row has column labels, first column has row labels
  
  If format equals LAU_FF_RAW, all file content is loaded into `x'.

  If format equals LAU_FF_COL, first line is loaded into pre-allocated
  array `clab', the rest is loaded into `x'.

  If format equals LAU_FF_ROW, first column in each row is loaded into
  pre-allocated array `rlab', the rest is loaded into `x'.

  If format equals LAU_FF_COLROW, first column of first row is loaded
  into `clab0'. The remaining columns of the first row are loaded into
  pre-allocated array `clab'. The first column in each row except
  first is loaded into pre-allocated array `rlab'. The rest of content
  is loaded into `x'.

  If all cases, `d' and `nx' refer to the actual numeric (float) data
  content of the file, not labels.

  Return -1 in case of error and set 'errc'. The error codes are
  EINVAL, for invalid arguments, any of the file operations errno
  codes, LERR_FILE_FORMAT, for unrecognized file format, and
  LERR_VAR_NCOL, for variable number of columns in `fname'.

  If `fname' is an empty file, return 0.
*/
int load_file(char *fname, float **x, int format, char **rlab, char **clab0,
	      char **clab, int d, int nx, int *errc);

/*
  Return diff between 'vec1' and 'vec2'; i.e., vector of integers
  present in 'vec1' and not in 'vec2'. Return the length of the diff
  vector in 'length'.

  In case of error, return NULL and set errno.

  NOTE: because the function uses hash program from a library called
  'Kazlib', and in which INT_MAX has a special meaning, vec2 may not
  contain value INT_MAX.
*/
int *ivec_diff(int *vec1, int len1, int *vec2, int len2, int *length);

/*
  Return 0 if `vec1' equals 'vec2', otherwise return -1.
*/
int ivec_cmp(int *vec1, int *vec2, int len);

/*
  Return version of `str' exactly `len' characters long (plus 1 for
  null terminator).

  If `str' is shorter than `len' the returned string is padded with
  space characters.
*/
char *lau_str_lim(char *str, int len);


syntax highlighted by Code2HTML, v. 0.9.1