/*
Module name: lau.c
Created by: Ljubomir Buturovic
Created: 05/12/2001
Purpose: assorted utilities.
*/
/*
Copyright 2004 Ljubomir J. Buturovic
Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation files
(the "Software"), to deal in the Software without restriction,
including without limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of the Software,
and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <stdarg.h>
#include "lerr.h"
#include "lau.h"
#include "hash_util.h"
static char rcsid[] = "$Id: lau.c,v 1.114 2006/03/27 17:46:40 ljubomir Exp $";
/*
Extract tokens (separated by characters in 'delimiter' string) from
'str'. Return NULL-terminated array of tokens.
The function allocates the space for the returned array. It is the
caller's responsibility to free it.
*/
char **str_tokenize(char *str, char *delimiter)
{
int len;
int i;
int rlen;
int cntr;
int capacity;
char *xtr;
char *ptr;
char **tokens;
char **xtokens;
tokens = (char **) 0;
cntr = 0;
capacity = 0;
if ((str != (char *) 0) && (delimiter != (char *) 0))
{
rlen = strlen(str);
if (rlen > 0)
{
tokens = malloc(INIT_LENGTH*sizeof(char *));
capacity = INIT_LENGTH;
}
xtr = str;
while (rlen > 0)
{
len = strspn(xtr, delimiter);
xtr += len;
rlen -= len;
len = strcspn(xtr, delimiter);
rlen -= len;
if (len > 0)
{
ptr = malloc(len+1);
memcpy(ptr, xtr, len);
ptr[len] = '\0';
xtr += len;
tokens[cntr++] = ptr;
if (cntr >= capacity)
{
capacity += capacity;
xtokens = malloc(capacity*sizeof(char *));
for (i = 0; i < cntr; i++)
{
xtokens[i] = strdup(tokens[i]);
free(tokens[i]);
}
free(tokens);
tokens = xtokens;
}
}
}
if (tokens)
tokens[cntr] = (char *) 0;
}
return tokens;
}
/*
Remove blanks from end of 'str'.
*/
void str_trim(char *str)
{
int len;
if (str && *str)
{
len = strlen(str)-1;
while (str[len] == ' ')
{
str[len] = '\0';
len--;
}
}
}
/*
Return 'str' extended enough to accomodate
length+added_length. 'capacity' is currently allocated space for
'str', excluding the null terminator; 'current_length' is the current
length of the string; 'added_length' is the number of characters to be
concatenated to the string. If 'capacity' is sufficient to accomodate
'current_length' plus 'added_length', the function returns unmodified
'str'. Otherwise the space is reallocated to accomodate the desired
length.
Typical usage:
str = str_extend(str, &capacity, current_length, added_length);
In case of realloc() error, return NULL and set errno.
*/
char *str_extend(char *str, int *capacity, int current_length, int added_length)
{
int new_length;
char *xstr;
xstr = str;
new_length = current_length+added_length;
if (*capacity && (new_length > *capacity))
{
while (*capacity <= new_length)
*capacity += *capacity;
xstr = realloc(str, *capacity+1);
}
return xstr;
}
/*
Return clone of NULL-terminated array `str'.
Return NULL and set errno in case of failure.
*/
char **nta_clone(char **nta)
{
int i;
int length;
int status;
char **clone;
status = 0;
clone = (char **) 0;
length = str_length(nta);
if (length > 0)
{
clone = calloc(length+1, sizeof(char *));
if (clone)
{
for (i = 0; (i < length) && !status; i++)
{
clone[i] = strdup(nta[i]);
if (!clone[i])
{
mx_free((void **) clone, i);
status = -1;
}
}
}
}
return clone;
}
/*
Return length of NULL-terminated (char **) array 'nta'.
*/
int str_length(char **nta)
{
int length;
char **atr;
length = 0;
if (nta)
{
atr = nta;
while (*atr)
{
length++;
atr++;
}
}
return length;
}
/*
Free NULL-terminated (char **) array 'nta'. The function preserves
errno.
*/
void str_free(char **nta)
{
int i;
int errno_save;
errno_save = errno;
i = 0;
if (nta)
{
while (nta[i])
{
free(nta[i]);
i++;
}
free(nta);
}
errno = errno_save;
}
/*
Free non-NULL-terminated (char **) array 'str' of length
'length'. The function does not change the value of `errno'.
*/
void strlen_free(char **str, int length)
{
int i;
if (str)
{
for (i = 0; i < length; i++)
vx_free(str[i]);
vx_free(str);
}
}
/*
Clone non-NULL-terminated (char **) array 'str' of length 'length'.
Return NULL in case of malloc() failure and set errno.
*/
char **str_clone(char **str, int length)
{
int i;
int status;
char **clone;
status = 0;
clone = (char **) 0;
if (str && (length > 0))
{
clone = malloc(length*sizeof(char *));
if (clone)
{
for (i = 0; i < length; i++)
{
clone[i] = strdup(str[i]);
if (!clone[i])
{
status = -1;
strlen_free(clone, i);
}
}
}
}
return clone;
}
/*
Create string of length 'len' populated with 'ch'.
*/
char *str_create(int len, char ch)
{
char *str;
str = (char *) 0;
if (len >= 0)
{
str = malloc((len+1)*sizeof(char));
if (str)
{
memset(str, ch, len);
str[len] = '\0';
}
}
return str;
}
/*
Insert strings in 'str' before character at position 'pos' ('pos' is
0-based). The function has variable number of arguments. 'nargs' is
the number of arguments following 'nargs'. The following arguments
are 'str' (char *) and 'pos' (int), followed by the insert strings.
The function returns the composite string.
For example, if the function is called as
str_insert(4, "abc.dat", 3, "_d", "_e");
it returns "abc_d_e.dat".
If nargs is less than 3, or 'pos' is negative, or any of the insert
strings is NULL, the function returns NULL and sets errno to
EINVAL. In case of malloc() error, the function returns NULL and
sets errno.
*/
char *str_insert(int nargs, ...)
{
int i;
int pos;
int len;
int rlen;
int slen;
int status;
char *str;
char *xtr;
char *itr;
char *ptr;
char **strings;
va_list VariableArgsPtr;
itr = (char *) 0;
pos = 0;
str = (char *) 0;
va_start(VariableArgsPtr, nargs);
if (nargs < 3)
errno = EINVAL;
else
{
strings = calloc(nargs-1, sizeof(char *));
if (strings)
{
status = 0;
for (i = 0; (i < nargs) && !status; i++)
{
if (i == 0)
str = va_arg(VariableArgsPtr, char *);
else if (i == 1)
pos = va_arg(VariableArgsPtr, int);
else
{
xtr = va_arg(VariableArgsPtr, char *);
if (!xtr)
{
errno = EINVAL;
status = -1;
}
else
{
strings[i-2] = strdup(xtr);
if (!strings[i-2])
status = -1;
}
}
}
if (!status)
{
len = strlen(str);
rlen = len-pos;
slen = 0;
for (i = 0; i < nargs-2; i++)
slen += strlen(strings[i]);
itr = malloc((len+slen+1)*sizeof(char));
if (itr)
{
itr[len+slen] = '\0';
ptr = itr;
memcpy(itr, str, pos);
ptr += pos;
for (i = 0; i < nargs-2; i++)
{
len = strlen(strings[i]);
memcpy(ptr, strings[i], len);
ptr += len;
}
memcpy(ptr, str+pos, rlen);
}
}
str_free(strings);
}
}
return itr;
}
/*
Copy integer vector 'src' into 'dest'.
*/
void ivec_copy(int *dest, int *src, int len)
{
int i;
if ((len > 0) && dest && src)
for (i = 0; i < len; i++)
dest[i] = src[i];
}
/*
Copy floating point vector 'src' into 'dest'.
*/
void fvec_copy(float *dest, float *src, int len)
{
int i;
if ((len > 0) && (dest != (float *) 0) && (src != (float *) 0))
for (i = 0; i < len; i++)
dest[i] = src[i];
}
/*
Return 'len'-long 'index' subset of 'vector' of length 'vlen'. In
case of improper arguments, or if 'index' points outside of
'vector', return NULL.
In case of malloc() error, return NULL and set errno.
*/
float *fvec_subset(float *vector, int vlen, int *index, int len)
{
int i;
int idx;
int status;
float *fsub;
status = 0;
fsub = (float *) 0;
if (vector && (vlen >= len) && index && (len > 0))
{
fsub = malloc(len*sizeof(float));
if (fsub)
for (i = 0; (i < len) && !status; i++)
{
idx = index[i];
if (idx >= vlen)
{
fsub = vx_free(fsub);
status = -1;
}
else
fsub[i] = vector[idx];
}
}
return fsub;
}
/*
Return matrix whose rows are fvec_subset() of rows in 'mx'. In case
of improper arguments, or if 'index' points outside of 'columns',
return NULL.
In case of malloc() error, return NULL and set errno.
TBD: untested function.
*/
float **fmx_subset(float **mx, int rows, int columns, int *index, int len)
{
int i;
int status;
float **fmx;
i = 0;
status = 0;
fmx = (float **) 0;
if (mx && (columns >= len) && index && (len > 0))
{
fmx = fmx_alloc(rows, len);
if (fmx)
for (i = 0; (i < len) && !status; i++)
{
fmx[i] = fvec_subset(mx[i], columns, index, len);
if (!fmx[i])
status = -1;
}
if (status == -1)
fmx = (float **) mx_free((void **) fmx, i);
}
return fmx;
}
/*
Copy double vector 'src' into 'dest'.
*/
void dvec_copy(double *dest, double *src, int len)
{
int i;
if ((len > 0) && dest && src)
for (i = 0; i < len; i++)
dest[i] = src[i];
}
/*
Return clone (duplicate) of float vector 'vec'.
*/
float *fvec_clone(float *vec, int len)
{
int i;
float *clone;
clone = (float *) 0;
if ((len > 0) && (vec != (float *) 0))
{
clone = malloc(len*sizeof(float));
if (clone != (float *) 0)
for (i = 0; i < len; i++)
clone[i] = vec[i];
}
return clone;
}
/*
Return 'double' clone (duplicate) of float vector 'vec'.
*/
double *double_clone(float *vec, int len)
{
int i;
double *clone;
clone = (double *) 0;
if ((len > 0) && vec)
{
clone = malloc(len*sizeof(double));
if (clone)
for (i = 0; i < len; i++)
clone[i] = vec[i];
}
return clone;
}
/*
Return 'double' clone (duplicate) of double vector 'vec'.
*/
double *dvec_clone(double *vec, int len)
{
int i;
double *clone;
clone = (double *) 0;
if ((len > 0) && vec)
{
clone = malloc(len*sizeof(double));
if (clone)
for (i = 0; i < len; i++)
clone[i] = vec[i];
}
return clone;
}
/*
Return clone (duplicate) of integer vector 'ivec'.
*/
int *ivec_clone(int *ivec, int len)
{
int i;
int *clone;
clone = (int *) 0;
if ((len > 0) && (ivec != (int *) 0))
{
clone = malloc(len*sizeof(int));
if (clone != (int *) 0)
for (i = 0; i < len; i++)
clone[i] = ivec[i];
}
return clone;
}
/*
Return sum of elements in 'vec'.
*/
int ivec_sum(int *vec, int len)
{
int i;
int sum = 0;
if ((vec != (int *) 0) && len > 0)
for (i = 0; i < len; i++)
sum += vec[i];
return sum;
}
/*
Return minimum element in 'vec'.
*/
int ivec_min(int *vec, int len)
{
int i;
int min = 0;
if ((vec != (int *) 0) && len > 0)
{
min = vec[0];
for (i = 1; i < len; i++)
if (vec[i] < min)
min = vec[i];
}
return min;
}
/*
Return max. element in 'vec'.
*/
int ivec_max(int *vec, int len)
{
int i;
int max = 0;
if ((vec != (int *) 0) && len > 0)
{
max = vec[0];
for (i = 1; i < len; i++)
if (vec[i] > max)
max = vec[i];
}
return max;
}
/*
Return index of max. element in 'vec'. In case of ties, return the
lowest index. Return -1 for empty vector.
*/
int ivec_argmax(int *vec, int len)
{
int i;
int max;
int argmax = -1;
if (vec && (len > 0))
{
max = vec[0];
argmax = 0;
for (i = 1; i < len; i++)
if (vec[i] > max)
{
max = vec[i];
argmax = i;
}
}
return argmax;
}
/*
Return index of max. element in 'vec'. Resolve ties pseudo-randomly,
using rand_int(). Return -1 for empty vector. In case of malloc()
failure, return -1 and set errno.
*/
int ivec_rand_argmax(int *vec, int len)
{
int i;
int cntr;
int max;
int argmax = -1;
int tie;
int idx;
int *ties;
if (vec && (len > 0))
{
max = vec[0];
argmax = 0;
for (i = 1; i < len; i++)
if (vec[i] > max)
{
max = vec[i];
argmax = i;
}
if (len > 1)
{
cntr = 0;
for (i = 0; i < len; i++)
if (vec[i] == max)
cntr++;
if (cntr > 1)
{
ties = malloc(cntr*sizeof(int));
if (ties)
{
idx = 0;
for (i = 0; i < len; i++)
if (vec[i] == max)
ties[idx++] = i;
tie = rand_int(0, cntr-1);
argmax = ties[tie];
free(ties);
}
else
argmax = -1;
}
}
}
return argmax;
}
/*
Set elements of 'vec' to 'value'.
*/
void ivec_set(int *vec, int len, int value)
{
int i;
if ((vec != (int *) 0) && len > 0)
for (i = 0; i < len; i++)
vec[i] = value;
}
/*
Set elements of 'vec' to 'value'.
*/
void fvec_set(float *vec, int len, float value)
{
int i;
if ((vec != (float *) 0) && len > 0)
for (i = 0; i < len; i++)
vec[i] = value;
}
/*
Return sum of elements in 'vec'.
*/
float fvec_sum(float *vec, int len)
{
int i;
double sum = 0.0;
if ((vec != (float *) 0) && len > 0)
for (i = 0; i < len; i++)
sum += vec[i];
return (float) sum;
}
/*
Load matrix from 'fname'. Skip lines which begin with 'skip'
character.
In case of error, return NULL and set errno.
*/
float **fmx_load(char *fname, int *rows, int *columns, char skip)
{
int status = 0;
int ivec;
int ift;
int maxlen;
int lcnt;
int len;
int dnx;
char *str;
char *line;
char ftr[31]; /* we assume the longest float won't exceed 30 characters */
float **fmx;
FILE *fptr;
line = (char *) 0;
dnx = 0;
lcnt = 0;
fmx = (float **) 0;
fptr = fopen(fname, "r");
if (fptr)
{
lcnt = file_info(fname, &maxlen, &dnx, '\0');
if ((lcnt != -1) && (dnx != -1))
{
fmx = fmx_alloc(lcnt, dnx);
line = malloc(maxlen+2);
if (line && fmx)
{
ivec = 0;
while (fgets(line, maxlen+2, fptr) && (status == 0))
{
if (*line != skip)
{
str = line;
str += strspn(str, WHITESPACE);
ift = 0;
while ((str && *str) && (ift < dnx))
{
len = strcspn(str, WHITESPACE);
memcpy(ftr, str, len);
ftr[len] = '\0';
fmx[ivec][ift++] = atof(ftr);
str += len;
str += strspn(str, WHITESPACE);
}
if (ift > 0)
ivec++;
if (str && *str)
{
status = -1;
errno = EINVAL;
}
}
}
if (rows)
*rows = ivec;
if (columns)
*columns = dnx;
free(line);
}
else
status = -1;
}
else
{
status = -1;
if (dnx == -1)
errno = EINVAL;
}
fclose(fptr);
}
else
status = -1;
if (status == -1)
fmx = (float **) mx_free((void **) fmx, lcnt);
return fmx;
}
/*
Return sum of elements in 'vec'.
*/
double dvec_sum(double *vec, int len)
{
int i;
double sum = 0.0;
if (vec && (len > 0))
for (i = 0; i < len; i++)
sum += vec[i];
return sum;
}
/*
Return pseudo-random integer in [min, max] range (i.e., including
'min' and 'max').
*/
int rand_int(int min, int max)
{
int irand;
double ftcx; /* `double' ensures consistent results between optimized and debug versions of the function */
ftcx = max-min+1;
irand = min+(int) (ftcx*rand()/(RAND_MAX+1.0));
return irand;
}
/*
Allocate floating point matrix 'rows' by 'columns'. Return (float
**) 0 in case of malloc() error and set errno.
*/
float **fmx_alloc(int rows, int columns)
{
int i;
int j;
float **mx;
mx = malloc(rows*sizeof(float *));
for (i = 0; (i < rows) && mx; i++)
{
mx[i] = malloc(columns*sizeof(float));
if (!mx[i])
{
for (j = 0; j < i; j++)
free(mx[j]);
free(mx);
mx = (float **) 0;
}
}
return mx;
}
/*
Allocate integer matrix 'rows' by 'columns'. Return NULL in case of
malloc() error and set errno.
*/
int **imx_alloc(int rows, int columns)
{
int i;
int j;
int **mx;
mx = malloc(rows*sizeof(int *));
for (i = 0; (i < rows) && mx; i++)
{
mx[i] = malloc(columns*sizeof(int));
if (!mx[i])
{
for (j = 0; j < i; j++)
free(mx[j]);
free(mx);
mx = (int **) 0;
}
}
return mx;
}
/*
Set 'fmx' to 'value'.
*/
void fmx_set(float **fmx, int rows, int columns, float value)
{
int i;
int j;
if (fmx)
{
for (i = 0; i < rows; i++)
for (j = 0; j < columns; j++)
fmx[i][j] = value;
}
}
/*
Set 'imx' to 'value'.
*/
void imx_set(int **imx, int rows, int columns, int value)
{
int i;
int j;
if (imx)
{
for (i = 0; i < rows; i++)
for (j = 0; j < columns; j++)
imx[i][j] = value;
}
}
/*
Clone floating point matrix 'fmx', with dimensions'rows' by
'columns'. Return (float **) 0 in case of malloc() error and set
errno.
*/
float **fmx_clone(float **fmx, int rows, int columns)
{
int i;
int j;
float **mx;
mx = malloc(rows*sizeof(float *));
for (i = 0; (i < rows) && mx; i++)
{
mx[i] = malloc(columns*sizeof(float));
if (mx[i])
fvec_copy(mx[i], fmx[i], columns);
else
{
for (j = 0; j < i; j++)
free(mx[j]);
free(mx);
mx = (float **) 0;
}
}
return mx;
}
/*
Clone double matrix `dmx', with dimensions `rows' by
`columns'. Return (double **) 0 in case of malloc() error and set
errno.
*/
double **dmx_clone(double **matrix, int rows, int columns)
{
int i;
int j;
double **rx;
rx = malloc(rows*sizeof(double *));
for (i = 0; (i < rows) && rx; i++)
{
rx[i] = malloc(columns*sizeof(double));
if (rx[i])
dvec_copy(rx[i], matrix[i], columns);
else
{
for (j = 0; j < i; j++)
free(rx[j]);
free(rx);
rx = (double **) 0;
}
}
return rx;
}
/*
Return normalized version of 'fmx'. The normalization is: reduce to
zero mean and divide each column by its' standard deviation.
Return (float **) 0 in case of malloc() error and set
errno.
*/
float **fmx_normalize(float **fmx, int rows, int columns)
{
float **mx;
mx = (float **) 0;
if (fmx && (rows > 0) && (columns > 0))
{
mx = fmx_clone(fmx, rows, columns);
fmx_norm(mx, rows, columns);
}
return mx;
}
/*
Normalize matrix 'fmx'. The normalization is: reduce to zero mean
and divide each column by its' standard deviation. If the standard
deviation is zero, just shift the columns.
*/
void fmx_norm(float **fmx, int rows, int columns)
{
int i;
int j;
float xmean;
double s;
if (fmx && (rows > 0) && (columns > 0))
{
for (i = 0; i < columns; i++)
{
s = 0.0;
for (j = 0; j < rows; j++)
s += fmx[j][i];
xmean = s/rows;
for (j = 0; j < rows; j++)
fmx[j][i] -= xmean;
if (rows > 1)
{
s = 0.0;
for (j = 0; j < rows; j++)
s += fmx[j][i]*fmx[j][i];
if (s > 0.0)
{
s = sqrt(s/(rows-1));
for (j = 0; j < rows; j++)
fmx[j][i] /= s;
}
}
}
}
}
/*
Normalize matrix 'fmx' using precomputed 'xmean', 'std'. The
normalization is: for each column, subtract 'xmean' and, if the
column 'std' is non-zero, divide by 'std'.
*/
void fmx_prenorm(float **fmx, int rows, int columns, float *xmean, float *std)
{
int i;
int j;
float f;
if (fmx && (rows > 0) && (columns > 0))
for (i = 0; i < columns; i++)
for (j = 0; j < rows; j++)
{
f = fmx[j][i];
if (xmean && std)
{
if (std[i] != 0)
fmx[j][i] = (f-xmean[i])/std[i];
else
fmx[j][i] = f-xmean[i];
}
else if (xmean)
fmx[j][i] = f-xmean[i];
else if (std && (std[i] != 0))
fmx[j][i] = f/std[i];
}
}
/*
Return mean values of columns in 'fmx'.
*/
float *fmx_mean(float **fmx, int rows, int columns)
{
int i;
int j;
double s;
float *xmean;
xmean = (float *) 0;
if (fmx && (rows > 0) && (columns > 0))
{
xmean = malloc(columns*sizeof(float));
if (xmean)
{
if (rows > 1)
{
for (i = 0; i < columns; i++)
{
s = 0.0;
for (j = 0; j < rows; j++)
s += fmx[j][i];
xmean[i] = s/rows;
}
}
else
{
for (i = 0; i < columns; i++)
xmean[i] = fmx[0][i];
}
}
}
return xmean;
}
/*
Return standard deviations of columns in 'fmx'.
Return (float **) 0 in case of malloc() error and set
errno.
*/
float *fmx_std(float **fmx, int rows, int columns)
{
int i;
int j;
float xmean;
double s;
float *std;
std = (float *) 0;
if (fmx && (rows > 0) && (columns > 0))
{
std = malloc(columns*sizeof(float));
if (rows > 1)
{
for (i = 0; i < columns; i++)
{
s = 0.0;
for (j = 0; j < rows; j++)
s += fmx[j][i];
xmean = s/rows;
s = 0.0;
for (j = 0; j < rows; j++)
s += (fmx[j][i]-xmean)*(fmx[j][i]-xmean);
std[i] = sqrt(s/(rows-1));
}
}
else
fvec_set(std, columns, 0.0);
}
return std;
}
/*
Allocate double floating point matrix 'rows' by 'columns'. Return
(double **) 0 in case of malloc() error and set errno.
*/
double **dmx_alloc(int rows, int columns)
{
int i;
int j;
double **mx;
mx = malloc(rows*sizeof(double *));
for (i = 0; (i < rows) && (mx != (double **) 0); i++)
{
mx[i] = malloc(columns*sizeof(double));
if (mx[i] == (double *) 0)
{
for (j = 0; j < i; j++)
free(mx[j]);
free(mx);
mx = (double **) 0;
}
}
return mx;
}
/*
Allocate string matrix 'rows' by 'columns'. Each element of the
matrix is (char *) and is initialized to (char *) 0. Return (char
***) 0 in case of malloc() error and set errno.
*/
char ***cmx_alloc(int rows, int columns)
{
int i;
char ***mx;
mx = malloc(rows*sizeof(char **));
for (i = 0; (i < rows) && (mx != (char ***) 0); i++)
{
mx[i] = malloc(columns*sizeof(char **));
if (mx[i] == (char **) 0)
mx = (char ***) mx_free((void **) mx, i);
}
return mx;
}
/*
Free matrix 'mx' with 'rows' rows. Return (void **) 0. This function
ignores all free() errors.
*/
void **mx_free(void **mx, int rows)
{
int j;
int errno_save;
if (mx && (rows >= 0))
{
errno_save = errno;
for (j = 0; j < rows; j++)
free(mx[j]);
free(mx);
errno = errno_save;
}
return (void **) 0;
}
/*
Free 'vector'. Return (void *) 0. This function ignores all free()
errors.
*/
void *vx_free(void *vector)
{
int errno_save;
errno_save = errno;
free(vector);
errno = errno_save;
return (void *) 0;
}
/*
Initial memory allocation for file_info().
*/
#define INIT_CAPACITY 100
/*
Return number of lines in 'fname' and optionally additional file
information.
If 'llen' is not NULL, store the length of the longest line in
*llen. If 'ntok' is not NULL, store number of tokens in each line in
*ntok, and change semantics of return value: ignore blank
(whitespace-only) lines. If lines have varying number of tokens,
*ntok is set to -1. This behavior is consistent with MATLAB load()
function.
Tokens are assumed to be separated by a single 'delimiter'
character. If 'delimiter' is null character ('\0'), the tokens are
assumed to be separated by a string of WHITESPACE characters.
The number of lines and max. line length are consistent with values
returned by UNIX command 'wc' - in particular, llen does not count
the newline character.
Return -1 in case of failure and set errno.
*/
int file_info(char *fname, int *llen, int *ntok, char delimiter)
{
int lc;
int i;
int size;
int fd;
int status;
int first_line;
int maxlen;
int capacity;
int nctr;
int cctr;
int line_flag; /* 0 indicates empty line */
int linlen;
int len;
int errno_save;
char *fstr;
char *xtr; /* current pointer in the file */
char *lstr; /* pointer to beginning of line */
char *line;
char *dstring;
char **tokens;
struct stat buf;
cctr = 0;
errno_save = errno;
if (delimiter == '\0')
dstring = strdup(WHITESPACE);
else
{
dstring = malloc(2);
dstring[0] = delimiter;
dstring[1] = '\0';
}
lc = 0;
nctr = 0;
capacity = INIT_CAPACITY;
maxlen = 0;
if (fname && *fname)
{
status = stat(fname, &buf);
if (status == 0)
{
size = buf.st_size;
if (size > 0)
{
fd = open(fname, O_RDONLY);
if (fd != -1)
{
fstr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fstr == MAP_FAILED)
{
lc = -1;
errno_save = errno;
}
else
{
xtr = fstr;
lstr = fstr;
first_line = 1;
line = malloc((capacity+1)*sizeof(char));
if (!line)
{
lc = -1;
errno_save = errno;
}
for (i = 0; (i < size) && (lc != -1); i++)
{
if ((*xtr == '\n') || /* UNIX files */
((*xtr == '\r') && (*(xtr+1) == '\n'))) /* DOS files */
{
line_flag = 1;
len = xtr-lstr;
if (len > maxlen)
maxlen = len;
if (len > capacity)
{
while (capacity <= len)
capacity += capacity;
line = realloc(line, (capacity+1)*sizeof(char));
if (!line)
{
lc = -1;
errno_save = errno;
}
}
if (ntok && (lc != -1))
{
memcpy(line, lstr, len);
line[len] = '\0';
linlen = strlen(line);
if ((linlen > 0) && (linlen != strspn(line, dstring)))
{
tokens = str_tokenize(line, dstring);
if (tokens)
{
nctr = str_length(tokens);
str_free(tokens);
if (first_line == 1)
cctr = nctr;
else if (nctr != cctr)
{
nctr = -1;
cctr = -1;
}
}
else
{
lc = -1;
errno_save = errno;
}
first_line = 0;
}
else
line_flag = 0;
}
if (lc != -1)
{
if (line_flag)
lc++;
lstr = xtr;
lstr++;
}
}
if ((*xtr == '\r') && (*(xtr+1) == '\n'))
{
xtr++;
i++;
}
xtr++;
}
free(line);
status = munmap(fstr, size);
if (status == -1)
{
lc = -1;
errno_save = errno;
}
}
status = close(fd);
if (status == -1)
lc = -1;
}
else
lc = -1;
}
else
{
lc = 0;
if (llen)
*llen = 0;
if (ntok)
*ntok = 0;
}
}
else
{
lc = -1;
errno_save = errno;
}
}
else
lc = -1;
if (lc == -1)
errno = errno_save;
else
{
if (llen)
*llen = maxlen;
if (ntok)
*ntok = nctr;
}
free(dstring);
return lc;
}
/*
Return max. length of a filename in the current filesystem (the
current filesystem is the filesystem in which the current directory
resides).
In case of failure, return -1 and set errno.
*/
long get_namelen(void)
{
long length;
length = pathconf(".", _PC_NAME_MAX);
return length;
}
/*
Save 'matrix' ('transpose' == 0) or transpose of 'matrix'
('transpose' == 1) in 'fname'. Return -1 in case of error and set
errno.
*/
int fmx_save(float **matrix, int rows, int columns, char *fname, int transpose)
{
int status = 0;
FILE *fptr;
if ((rows > 0) && (columns > 0))
{
fptr = fopen(fname, "w");
if (fptr)
{
status = fmx_write(fptr, matrix, rows, columns, transpose);
if (!status)
status = fclose(fptr);
}
else
status = -1;
}
return status;
}
/*
Write 'matrix' ('transpose' == 0) or transpose of 'matrix'
('transpose' == 1) to 'fptr'. In case of success, return 0,
otherwise return -1 and set errno.
*/
int fmx_write(FILE *fptr, float **matrix, int rows, int columns, int transpose)
{
int i;
int j;
int status = 0;
if ((rows > 0) && (columns > 0) && fptr)
{
if (transpose == 1)
{
for (i = 0; (i < columns) && (status >= 0); i++)
for (j = 0; (j < rows) && (status >= 0); j++)
{
if (j == rows-1)
status = fprintf(fptr, "%g\n", matrix[j][i]);
else
status = fprintf(fptr, "%g\t", matrix[j][i]);
}
}
else
{
for (i = 0; (i < rows) && (status >= 0); i++)
for (j = 0; (j < columns) && (status >= 0); j++)
{
if (j == columns-1)
status = fprintf(fptr, "%g\n", matrix[i][j]);
else
status = fprintf(fptr, "%g\t", matrix[i][j]);
}
}
}
if (status < 0)
status = -1;
else
status = 0;
return status;
}
/*
Write 'matrix' to 'fptr'. Store 'xnames[i]', if not NULL, in the
first column of i-th row.
In case of success, return 0, otherwise return -1 and set errno.
*/
int fmx_nwrite(FILE *fptr, float **matrix, char **xnames, int rows, int columns)
{
int i;
int j;
int status = 0;
if ((rows > 0) && (columns > 0) && fptr)
{
for (i = 0; (i < rows) && (status >= 0); i++)
{
if (xnames)
status = fprintf(fptr, "%s\t", xnames[i]);
for (j = 0; (j < columns) && (status >= 0); j++)
{
if (j == columns-1)
status = fprintf(fptr, "%g\n", matrix[i][j]);
else
status = fprintf(fptr, "%g\t", matrix[i][j]);
}
}
}
if (status < 0)
status = -1;
else
status = 0;
return status;
}
/*
Return basic name (i.e., basename without suffix). For example, for
path '/dir/fname.dat' bname() returns string 'fname'. The function
allocates memory for the returned string, and it is the caller's
responsibility to free() it.
In case of error, return (char *) 0 and set errno.
*/
char *bname(char *path)
{
int len;
char *str;
char *name = (char *) 0;
if (path && *path)
{
if ((str = strrchr(path, '/')) != (char *) 0)
str++;
else
str = path;
len = strcspn(str, ".");
name = malloc((len+1)*sizeof(char));
if (name != (char *) 0)
{
memcpy(name, str, len);
name[len] = '\0';
}
}
return name;
}
/*
Safe version of basename() (i.e., it does not modify the input
string).
The difference from bname() is that this one includes the extension.
In case of error, return (char *) 0 and set errno.
*/
char *base_name(char *path)
{
int len;
char *str;
char *name = (char *) 0;
if (path && *path)
{
if ((str = strrchr(path, '/')))
str++;
else
str = path;
len = strlen(str);
name = malloc((len+1)*sizeof(char));
if (name)
{
memcpy(name, str, len);
name[len] = '\0';
}
}
return name;
}
/*
Return string copy of 'fname'. If the file is zero bytes long,
return empty string.
Return NULL in case of error and set errno. errno can be stat(),
open(), mmap() or malloc() error.
*/
char *str_file(char *fname)
{
int lc;
int size;
int fd;
int status;
int maxlen;
int nctr;
char *fstr;
char *string = (char *) 0;
struct stat buf;
lc = 0;
nctr = 0;
maxlen = 0;
if (fname && *fname)
{
status = stat(fname, &buf);
if (status == 0)
{
size = buf.st_size;
if (size > 0)
{
fd = open(fname, O_RDONLY);
if (fd != -1)
{
fstr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
if (fstr != MAP_FAILED)
{
string = malloc((size+1)*sizeof(char));
if (string != (char *) 0)
{
memcpy(string, fstr, size);
string[size] = '\0';
}
if (munmap(fstr, size))
string = string_free(string);
}
if (close(fd))
string = string_free(string);
}
}
else
string = strdup("");
}
}
return string;
}
/*
free(str) without changing errno. Return (char *) 0.
*/
char *string_free(char *str)
{
int errno_save;
errno_save = errno;
free(str);
errno = errno_save;
return (char *) 0;
}
/*
Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1].
Return -1 if 'r' is outside range.
*/
int floc(float r, float *vector, int len)
{
int loc;
int ileft;
int iright;
int imiddle;
int done = 0;
loc = -1;
if (vector && len > 0)
{
ileft = 0;
iright = len-1;
if ((r < vector[ileft]) || (r > vector[iright]))
loc = -1;
else
{
imiddle = len/2;
while (done == 0)
{
if (vector[imiddle] < r)
ileft = imiddle;
else
iright = imiddle;
imiddle = ileft+iright;
imiddle = imiddle/2;
if (iright-ileft <=1)
{
if (r == vector[ileft])
loc = ileft;
else if (r == vector[iright])
loc = iright;
else
loc = ileft;
done = 1;
}
}
}
}
return loc;
}
/*
Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1].
Return -1 if 'r' is outside range.
*/
int dloc(double r, double *vector, int len)
{
int loc;
int ileft;
int iright;
int imiddle;
int done = 0;
loc = -1;
if (vector && len > 0)
{
ileft = 0;
iright = len-1;
if ((r < vector[ileft]) || (r > vector[iright]))
loc = -1;
else
{
imiddle = len/2;
while (done == 0)
{
if (vector[imiddle] < r)
ileft = imiddle;
else
iright = imiddle;
imiddle = ileft+iright;
imiddle = imiddle/2;
if (iright-ileft <=1)
{
if (r == vector[ileft])
loc = ileft;
else if (r == vector[iright])
loc = iright;
else
loc = ileft;
done = 1;
}
}
}
}
return loc;
}
/*
Return clone of NULL-terminated 'str'.
*/
char **string_clone(char **str)
{
int len;
char **ptr;
char **string = (char **) 0;
len = 0;
if (str)
{
ptr = str;
while (ptr)
{
ptr++;
len++;
}
string = string_copy(str, len);
}
return string;
}
/*
Return clone of array of strings of length 'length'.
*/
char **string_copy(char **str, int length)
{
int i;
int j;
char **string = (char **) 0;
if (str)
{
string = malloc(length*sizeof(char *));
for (i = 0; (i < length) && string; i++)
{
string[i] = strdup(str[i]);
if (!string[i])
{
for (j = 0; j < i; j++)
free(string[j]);
free(string);
string = (char **) 0;
}
}
}
return string;
}
/*
Like unlink(), but preserves errno.
*/
void remove_file(char *fname)
{
int errno_save;
errno_save = errno;
if (fname && *fname)
unlink(fname);
errno = errno_save;
}
/*
Like fclose(), preserving errno.
*/
void fptr_close(FILE *fptr)
{
int errno_save;
errno_save = errno;
if (fptr)
fclose(fptr);
errno = errno_save;
}
/*
Reverse elements in 'vec'.
*/
void ivec_reverse(int *vec, int len)
{
int i;
int tmp;
if (vec && (len > 1))
{
for (i = 0; i < len/2; i++)
{
tmp = vec[len-1-i];
vec[len-1-i] = vec[i];
vec[i] = tmp;
}
}
}
/*
Reverse elements in 'vec'.
*/
void fvec_reverse(float *vec, int len)
{
int i;
float tmp;
if (vec && (len > 1))
{
for (i = 0; i < len/2; i++)
{
tmp = vec[len-1-i];
vec[len-1-i] = vec[i];
vec[i] = tmp;
}
}
}
/*
Calculate standard deviation of integer vector 'vec' of length
'len'.
*/
float ivec_mean(int *vec, int len)
{
int i;
float mean;
double dsum;
mean = 0.0;
dsum = 0.0;
if (vec && len > 0)
{
for (i = 0; i < len; i++)
dsum += vec[i];
mean = dsum/len;
}
return mean;
}
/*
Calculate standard deviation of integer vector 'vec' of length
'len'.
*/
float ivec_sd(int *vec, int len)
{
int i;
float mean;
double diff;
double dsum;
float sd;
sd = 0.0;
dsum = 0.0;
if (vec && (len > 1))
{
mean = ivec_mean(vec, len);
for (i = 0; i < len; i++)
{
diff = vec[i]-mean;
dsum += diff*diff;
}
sd = sqrt(dsum/(len-1));
}
return sd;
}
/*
TBD: the following three functions untested.
*/
/*
Calculate mean of vector 'vec' of length 'len'.
*/
float fvec_mean(float *vec, int len)
{
int i;
float mean;
double dsum;
mean = 0.0;
dsum = 0.0;
if (vec && len > 0)
{
for (i = 0; i < len; i++)
dsum += vec[i];
mean = dsum/len;
}
return mean;
}
/*
Calculate standard deviation of vector 'vec' of length 'len'.
*/
float fvec_sd(float *vec, int len)
{
int i;
float mean;
double diff;
double dsum;
float sd;
sd = 0.0;
dsum = 0.0;
if (vec && (len > 1))
{
mean = fvec_mean(vec, len);
for (i = 0; i < len; i++)
{
diff = vec[i]-mean;
dsum += diff*diff;
}
sd = sqrt(dsum/(len-1));
}
return sd;
}
/*
Calculate covariance between 'len'-dimensional
vectors 'vec1' and 'vec2' with mean values 'mean1' and 'mean2'.
*/
float fvec_cov(float *vec1, float *vec2, int len, float mean1, float mean2)
{
int i;
float cov;
double dsum;
dsum = 0.0;
for (i = 0; i < len; i++)
dsum += vec1[i]*vec2[i];
cov = dsum/(len-1)-mean1*len*mean2/(len-1);
return cov;
}
/*
Return min(int1, int2).
*/
int int_min(int int1, int int2)
{
int imin;
if (int1 <= int2)
imin = int1;
else
imin = int2;
return imin;
}
/*
Append 'fname' to 'fptr'. Return -1 in case of error and set errno.
*/
int fcat(FILE *fptr, char *fname)
{
int llen;
int nl;
int status = 0;
char *line;
FILE *fileptr;
if (fptr && fname && *fname)
{
nl = file_info(fname, &llen, (int *) 0, '\0');
if (nl >= 0)
{
line = malloc((llen+2)*sizeof(char));
if (line != (char *) 0)
{
fileptr = fopen(fname, "r");
if (fileptr)
{
while (fgets(line, llen+2, fileptr))
fprintf(fptr, "%s", line);
fclose(fileptr);
}
else
status = -1;
string_free(line);
}
else
status = -1;
}
else
status = -1;
}
return status;
}
/*
Load 'nx' vectors from file 'fname' into 'x'. Each vector (line) is
assumed to have 'd' elements, unless the file is empty. Matrix 'x'
has to be preallocated sufficient space to hold the data.
The function understands four formats. All formats contain
whitespace-delimited rows of equal number of columns:
LAU_FF_RAW raw data, no row or column labels
LAU_FF_COL first row has column labels (names)
LAU_FF_ROW first column has row labels
LAU_FF_COLROW first row has column labels, first column has row labels
If format equals LAU_FF_RAW, all file content is loaded into `x'.
If format equals LAU_FF_COL, first line is loaded into pre-allocated
array `clab', the rest is loaded into `x'.
If format equals LAU_FF_ROW, first column in each row is loaded into
pre-allocated array `rlab', the rest is loaded into `x'.
If format equals LAU_FF_COLROW, first column of first row is loaded
into `clab0'. The remaining columns of the first row are loaded into
pre-allocated array `clab'. The first column in each row except
first is loaded into pre-allocated array `rlab'. The rest of content
is loaded into `x'.
If all cases, `d' and `nx' refer to the actual numeric (float) data
content of the file, not labels.
Return -1 in case of error and set 'errc'. The error codes are
EINVAL, for invalid arguments, any of the file operations errno
codes, LERR_FILE_FORMAT, for unrecognized file format, and
LERR_VAR_NCOL, for variable number of columns in `fname'.
If `fname' is an empty file, return 0.
*/
int load_file(char *fname, float **x, int format, char **rlab, char **clab0,
char **clab, int d, int nx, int *errc)
{
int status = 0;
int ivec;
int ift;
int idx;
int maxlen;
int lcnt;
int len;
int dnx;
int first_line;
char *str;
char *line;
char ftr[31]; /* we assume the longest float won't exceed 30 characters */
char **tokens;
FILE *fptr;
line = (char *) 0;
dnx = 0;
fptr = fopen(fname, "r");
*errc = 0;
if (fptr)
{
lcnt = file_info(fname, &maxlen, &dnx, '\0');
if ((lcnt != -1) && (dnx != -1))
{
if (format == LAU_FF_ROW)
dnx--;
else if (format == LAU_FF_COL)
lcnt--;
else if (format == LAU_FF_COLROW)
{
dnx--;
lcnt--;
}
}
if ((lcnt != -1) && ((dnx == d) || (lcnt == 0)))
{
line = malloc(maxlen+2);
if (line)
{
ivec = 0;
first_line = 1;
while (fgets(line, maxlen+2, fptr) && (status == 0))
{
/*
Get column names from the first line.
*/
if (((format == LAU_FF_COL) || (format == LAU_FF_COLROW)) && (first_line == 1) && clab)
{
tokens = str_tokenize(line, WHITESPACE);
if (format == LAU_FF_COLROW)
{
*clab0 = strdup(tokens[0]);
for (idx = 1; idx < str_length(tokens); idx++)
clab[idx-1] = strdup(tokens[idx]);
}
else
{
for (idx = 0; idx < str_length(tokens); idx++)
clab[idx] = strdup(tokens[idx]);
}
str_free(tokens);
}
else if ((format == LAU_FF_RAW) || (format == LAU_FF_ROW) || (first_line == 0))
{
str = line;
str += strspn(str, WHITESPACE);
ift = 0;
idx = 0;
while ((str && *str) && (idx < d))
{
len = strcspn(str, WHITESPACE);
memcpy(ftr, str, len);
ftr[len] = '\0';
if ((ift == 0) && ((format == LAU_FF_ROW) || (format == LAU_FF_COLROW)) && rlab)
rlab[ivec] = strdup(ftr);
else
x[ivec][idx++] = atof(ftr);
ift++;
str += len;
str += strspn(str, WHITESPACE);
}
if (ift > 0)
ivec++;
if (str && *str)
{
status = -1;
if (errc)
*errc = EINVAL;
}
}
first_line = 0;
}
/*
'nx' vectors requested, 'ivec' found. Report
LERR_FILE_FORMAT.
*/
if (ivec != nx)
{
status = -1;
if (errc)
*errc = LERR_FILE_FORMAT;
}
free(line);
}
else
status = -1;
}
else
{
status = -1;
if (errc)
{
if (dnx == -1)
*errc = LERR_VAR_NCOL;
else if (dnx != d)
*errc = LERR_FILE_FORMAT;
}
}
if (!status)
status = fclose(fptr);
else
fclose(fptr);
}
else
status = -1;
if (errc && (status == -1) && !*errc)
*errc = errno;
return status;
}
/*
Return diff between 'vec1' and 'vec2'; i.e., vector of integers
present in 'vec1' and not in 'vec2'. Return the length of the diff
vector in 'length'.
In case of error, return NULL and set errno.
NOTE: because the function uses hash program from a library called
'Kazlib', and in which INT_MAX has a special meaning, vec2 may not
contain value INT_MAX.
*/
int *ivec_diff(int *vec1, int len1, int *vec2, int len2, int *length)
{
int i;
int j;
int *diff;
hash_t *xhash;
diff = (int *) 0;
xhash = hashCreate();
if (xhash)
{
diff = malloc(len1*sizeof(int));
if (diff)
{
for (i = 0; i < len2; i++)
hashPutInt(xhash, vec2[i], vec2[i]);
j = 0;
for (i = 0; i < len1; i++)
if (hashGetInt(xhash, vec1[i]) == INT_MAX)
diff[j++] = vec1[i];
if (length)
*length = j;
hashDelete(xhash);
}
}
return diff;
}
/*
Return 0 if `vec1' equals 'vec2', otherwise return -1.
*/
int ivec_cmp(int *vec1, int *vec2, int len)
{
int i;
int retval;
retval = 0;
if (vec1 && vec2)
{
for (i = 0; (i < len) && !retval; i++)
{
if (vec1[i] != vec2[i])
retval = -1;
}
}
else if (vec1 && !vec2)
retval = -1;
else if (!vec1 && vec2)
retval = -1;
return retval;
}
/*
Return version of `str' exactly `len' characters long (plus 1 for
null terminator).
If `str' is shorter than `len' the returned string is padded with
space characters.
*/
char *lau_str_lim(char *str, int len)
{
int icntr;
char *lim;
char *ptr;
char *xtr;
lim = (char *) 0;
if (str)
{
lim = calloc(len+1, sizeof(char));
if (lim)
{
icntr = 0;
ptr = str;
xtr = lim;
while ((*ptr) && (icntr < len))
{
*xtr = *ptr;
icntr++;
xtr++;
ptr++;
}
if (icntr < len)
memset(xtr, ' ', len-icntr);
}
}
return lim;
}
syntax highlighted by Code2HTML, v. 0.9.1