/* Module name: lau.c Created by: Ljubomir Buturovic Created: 05/12/2001 Purpose: assorted utilities. */ /* Copyright 2004 Ljubomir J. Buturovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ #include #include #include #include #include #include #include #include #include #include #include "lerr.h" #include "lau.h" #include "hash_util.h" static char rcsid[] = "$Id: lau.c,v 1.114 2006/03/27 17:46:40 ljubomir Exp $"; /* Extract tokens (separated by characters in 'delimiter' string) from 'str'. Return NULL-terminated array of tokens. The function allocates the space for the returned array. It is the caller's responsibility to free it. */ char **str_tokenize(char *str, char *delimiter) { int len; int i; int rlen; int cntr; int capacity; char *xtr; char *ptr; char **tokens; char **xtokens; tokens = (char **) 0; cntr = 0; capacity = 0; if ((str != (char *) 0) && (delimiter != (char *) 0)) { rlen = strlen(str); if (rlen > 0) { tokens = malloc(INIT_LENGTH*sizeof(char *)); capacity = INIT_LENGTH; } xtr = str; while (rlen > 0) { len = strspn(xtr, delimiter); xtr += len; rlen -= len; len = strcspn(xtr, delimiter); rlen -= len; if (len > 0) { ptr = malloc(len+1); memcpy(ptr, xtr, len); ptr[len] = '\0'; xtr += len; tokens[cntr++] = ptr; if (cntr >= capacity) { capacity += capacity; xtokens = malloc(capacity*sizeof(char *)); for (i = 0; i < cntr; i++) { xtokens[i] = strdup(tokens[i]); free(tokens[i]); } free(tokens); tokens = xtokens; } } } if (tokens) tokens[cntr] = (char *) 0; } return tokens; } /* Remove blanks from end of 'str'. */ void str_trim(char *str) { int len; if (str && *str) { len = strlen(str)-1; while (str[len] == ' ') { str[len] = '\0'; len--; } } } /* Return 'str' extended enough to accomodate length+added_length. 'capacity' is currently allocated space for 'str', excluding the null terminator; 'current_length' is the current length of the string; 'added_length' is the number of characters to be concatenated to the string. If 'capacity' is sufficient to accomodate 'current_length' plus 'added_length', the function returns unmodified 'str'. Otherwise the space is reallocated to accomodate the desired length. Typical usage: str = str_extend(str, &capacity, current_length, added_length); In case of realloc() error, return NULL and set errno. */ char *str_extend(char *str, int *capacity, int current_length, int added_length) { int new_length; char *xstr; xstr = str; new_length = current_length+added_length; if (*capacity && (new_length > *capacity)) { while (*capacity <= new_length) *capacity += *capacity; xstr = realloc(str, *capacity+1); } return xstr; } /* Return clone of NULL-terminated array `str'. Return NULL and set errno in case of failure. */ char **nta_clone(char **nta) { int i; int length; int status; char **clone; status = 0; clone = (char **) 0; length = str_length(nta); if (length > 0) { clone = calloc(length+1, sizeof(char *)); if (clone) { for (i = 0; (i < length) && !status; i++) { clone[i] = strdup(nta[i]); if (!clone[i]) { mx_free((void **) clone, i); status = -1; } } } } return clone; } /* Return length of NULL-terminated (char **) array 'nta'. */ int str_length(char **nta) { int length; char **atr; length = 0; if (nta) { atr = nta; while (*atr) { length++; atr++; } } return length; } /* Free NULL-terminated (char **) array 'nta'. The function preserves errno. */ void str_free(char **nta) { int i; int errno_save; errno_save = errno; i = 0; if (nta) { while (nta[i]) { free(nta[i]); i++; } free(nta); } errno = errno_save; } /* Free non-NULL-terminated (char **) array 'str' of length 'length'. The function does not change the value of `errno'. */ void strlen_free(char **str, int length) { int i; if (str) { for (i = 0; i < length; i++) vx_free(str[i]); vx_free(str); } } /* Clone non-NULL-terminated (char **) array 'str' of length 'length'. Return NULL in case of malloc() failure and set errno. */ char **str_clone(char **str, int length) { int i; int status; char **clone; status = 0; clone = (char **) 0; if (str && (length > 0)) { clone = malloc(length*sizeof(char *)); if (clone) { for (i = 0; i < length; i++) { clone[i] = strdup(str[i]); if (!clone[i]) { status = -1; strlen_free(clone, i); } } } } return clone; } /* Create string of length 'len' populated with 'ch'. */ char *str_create(int len, char ch) { char *str; str = (char *) 0; if (len >= 0) { str = malloc((len+1)*sizeof(char)); if (str) { memset(str, ch, len); str[len] = '\0'; } } return str; } /* Insert strings in 'str' before character at position 'pos' ('pos' is 0-based). The function has variable number of arguments. 'nargs' is the number of arguments following 'nargs'. The following arguments are 'str' (char *) and 'pos' (int), followed by the insert strings. The function returns the composite string. For example, if the function is called as str_insert(4, "abc.dat", 3, "_d", "_e"); it returns "abc_d_e.dat". If nargs is less than 3, or 'pos' is negative, or any of the insert strings is NULL, the function returns NULL and sets errno to EINVAL. In case of malloc() error, the function returns NULL and sets errno. */ char *str_insert(int nargs, ...) { int i; int pos; int len; int rlen; int slen; int status; char *str; char *xtr; char *itr; char *ptr; char **strings; va_list VariableArgsPtr; itr = (char *) 0; pos = 0; str = (char *) 0; va_start(VariableArgsPtr, nargs); if (nargs < 3) errno = EINVAL; else { strings = calloc(nargs-1, sizeof(char *)); if (strings) { status = 0; for (i = 0; (i < nargs) && !status; i++) { if (i == 0) str = va_arg(VariableArgsPtr, char *); else if (i == 1) pos = va_arg(VariableArgsPtr, int); else { xtr = va_arg(VariableArgsPtr, char *); if (!xtr) { errno = EINVAL; status = -1; } else { strings[i-2] = strdup(xtr); if (!strings[i-2]) status = -1; } } } if (!status) { len = strlen(str); rlen = len-pos; slen = 0; for (i = 0; i < nargs-2; i++) slen += strlen(strings[i]); itr = malloc((len+slen+1)*sizeof(char)); if (itr) { itr[len+slen] = '\0'; ptr = itr; memcpy(itr, str, pos); ptr += pos; for (i = 0; i < nargs-2; i++) { len = strlen(strings[i]); memcpy(ptr, strings[i], len); ptr += len; } memcpy(ptr, str+pos, rlen); } } str_free(strings); } } return itr; } /* Copy integer vector 'src' into 'dest'. */ void ivec_copy(int *dest, int *src, int len) { int i; if ((len > 0) && dest && src) for (i = 0; i < len; i++) dest[i] = src[i]; } /* Copy floating point vector 'src' into 'dest'. */ void fvec_copy(float *dest, float *src, int len) { int i; if ((len > 0) && (dest != (float *) 0) && (src != (float *) 0)) for (i = 0; i < len; i++) dest[i] = src[i]; } /* Return 'len'-long 'index' subset of 'vector' of length 'vlen'. In case of improper arguments, or if 'index' points outside of 'vector', return NULL. In case of malloc() error, return NULL and set errno. */ float *fvec_subset(float *vector, int vlen, int *index, int len) { int i; int idx; int status; float *fsub; status = 0; fsub = (float *) 0; if (vector && (vlen >= len) && index && (len > 0)) { fsub = malloc(len*sizeof(float)); if (fsub) for (i = 0; (i < len) && !status; i++) { idx = index[i]; if (idx >= vlen) { fsub = vx_free(fsub); status = -1; } else fsub[i] = vector[idx]; } } return fsub; } /* Return matrix whose rows are fvec_subset() of rows in 'mx'. In case of improper arguments, or if 'index' points outside of 'columns', return NULL. In case of malloc() error, return NULL and set errno. TBD: untested function. */ float **fmx_subset(float **mx, int rows, int columns, int *index, int len) { int i; int status; float **fmx; i = 0; status = 0; fmx = (float **) 0; if (mx && (columns >= len) && index && (len > 0)) { fmx = fmx_alloc(rows, len); if (fmx) for (i = 0; (i < len) && !status; i++) { fmx[i] = fvec_subset(mx[i], columns, index, len); if (!fmx[i]) status = -1; } if (status == -1) fmx = (float **) mx_free((void **) fmx, i); } return fmx; } /* Copy double vector 'src' into 'dest'. */ void dvec_copy(double *dest, double *src, int len) { int i; if ((len > 0) && dest && src) for (i = 0; i < len; i++) dest[i] = src[i]; } /* Return clone (duplicate) of float vector 'vec'. */ float *fvec_clone(float *vec, int len) { int i; float *clone; clone = (float *) 0; if ((len > 0) && (vec != (float *) 0)) { clone = malloc(len*sizeof(float)); if (clone != (float *) 0) for (i = 0; i < len; i++) clone[i] = vec[i]; } return clone; } /* Return 'double' clone (duplicate) of float vector 'vec'. */ double *double_clone(float *vec, int len) { int i; double *clone; clone = (double *) 0; if ((len > 0) && vec) { clone = malloc(len*sizeof(double)); if (clone) for (i = 0; i < len; i++) clone[i] = vec[i]; } return clone; } /* Return 'double' clone (duplicate) of double vector 'vec'. */ double *dvec_clone(double *vec, int len) { int i; double *clone; clone = (double *) 0; if ((len > 0) && vec) { clone = malloc(len*sizeof(double)); if (clone) for (i = 0; i < len; i++) clone[i] = vec[i]; } return clone; } /* Return clone (duplicate) of integer vector 'ivec'. */ int *ivec_clone(int *ivec, int len) { int i; int *clone; clone = (int *) 0; if ((len > 0) && (ivec != (int *) 0)) { clone = malloc(len*sizeof(int)); if (clone != (int *) 0) for (i = 0; i < len; i++) clone[i] = ivec[i]; } return clone; } /* Return sum of elements in 'vec'. */ int ivec_sum(int *vec, int len) { int i; int sum = 0; if ((vec != (int *) 0) && len > 0) for (i = 0; i < len; i++) sum += vec[i]; return sum; } /* Return minimum element in 'vec'. */ int ivec_min(int *vec, int len) { int i; int min = 0; if ((vec != (int *) 0) && len > 0) { min = vec[0]; for (i = 1; i < len; i++) if (vec[i] < min) min = vec[i]; } return min; } /* Return max. element in 'vec'. */ int ivec_max(int *vec, int len) { int i; int max = 0; if ((vec != (int *) 0) && len > 0) { max = vec[0]; for (i = 1; i < len; i++) if (vec[i] > max) max = vec[i]; } return max; } /* Return index of max. element in 'vec'. In case of ties, return the lowest index. Return -1 for empty vector. */ int ivec_argmax(int *vec, int len) { int i; int max; int argmax = -1; if (vec && (len > 0)) { max = vec[0]; argmax = 0; for (i = 1; i < len; i++) if (vec[i] > max) { max = vec[i]; argmax = i; } } return argmax; } /* Return index of max. element in 'vec'. Resolve ties pseudo-randomly, using rand_int(). Return -1 for empty vector. In case of malloc() failure, return -1 and set errno. */ int ivec_rand_argmax(int *vec, int len) { int i; int cntr; int max; int argmax = -1; int tie; int idx; int *ties; if (vec && (len > 0)) { max = vec[0]; argmax = 0; for (i = 1; i < len; i++) if (vec[i] > max) { max = vec[i]; argmax = i; } if (len > 1) { cntr = 0; for (i = 0; i < len; i++) if (vec[i] == max) cntr++; if (cntr > 1) { ties = malloc(cntr*sizeof(int)); if (ties) { idx = 0; for (i = 0; i < len; i++) if (vec[i] == max) ties[idx++] = i; tie = rand_int(0, cntr-1); argmax = ties[tie]; free(ties); } else argmax = -1; } } } return argmax; } /* Set elements of 'vec' to 'value'. */ void ivec_set(int *vec, int len, int value) { int i; if ((vec != (int *) 0) && len > 0) for (i = 0; i < len; i++) vec[i] = value; } /* Set elements of 'vec' to 'value'. */ void fvec_set(float *vec, int len, float value) { int i; if ((vec != (float *) 0) && len > 0) for (i = 0; i < len; i++) vec[i] = value; } /* Return sum of elements in 'vec'. */ float fvec_sum(float *vec, int len) { int i; double sum = 0.0; if ((vec != (float *) 0) && len > 0) for (i = 0; i < len; i++) sum += vec[i]; return (float) sum; } /* Load matrix from 'fname'. Skip lines which begin with 'skip' character. In case of error, return NULL and set errno. */ float **fmx_load(char *fname, int *rows, int *columns, char skip) { int status = 0; int ivec; int ift; int maxlen; int lcnt; int len; int dnx; char *str; char *line; char ftr[31]; /* we assume the longest float won't exceed 30 characters */ float **fmx; FILE *fptr; line = (char *) 0; dnx = 0; lcnt = 0; fmx = (float **) 0; fptr = fopen(fname, "r"); if (fptr) { lcnt = file_info(fname, &maxlen, &dnx, '\0'); if ((lcnt != -1) && (dnx != -1)) { fmx = fmx_alloc(lcnt, dnx); line = malloc(maxlen+2); if (line && fmx) { ivec = 0; while (fgets(line, maxlen+2, fptr) && (status == 0)) { if (*line != skip) { str = line; str += strspn(str, WHITESPACE); ift = 0; while ((str && *str) && (ift < dnx)) { len = strcspn(str, WHITESPACE); memcpy(ftr, str, len); ftr[len] = '\0'; fmx[ivec][ift++] = atof(ftr); str += len; str += strspn(str, WHITESPACE); } if (ift > 0) ivec++; if (str && *str) { status = -1; errno = EINVAL; } } } if (rows) *rows = ivec; if (columns) *columns = dnx; free(line); } else status = -1; } else { status = -1; if (dnx == -1) errno = EINVAL; } fclose(fptr); } else status = -1; if (status == -1) fmx = (float **) mx_free((void **) fmx, lcnt); return fmx; } /* Return sum of elements in 'vec'. */ double dvec_sum(double *vec, int len) { int i; double sum = 0.0; if (vec && (len > 0)) for (i = 0; i < len; i++) sum += vec[i]; return sum; } /* Return pseudo-random integer in [min, max] range (i.e., including 'min' and 'max'). */ int rand_int(int min, int max) { int irand; double ftcx; /* `double' ensures consistent results between optimized and debug versions of the function */ ftcx = max-min+1; irand = min+(int) (ftcx*rand()/(RAND_MAX+1.0)); return irand; } /* Allocate floating point matrix 'rows' by 'columns'. Return (float **) 0 in case of malloc() error and set errno. */ float **fmx_alloc(int rows, int columns) { int i; int j; float **mx; mx = malloc(rows*sizeof(float *)); for (i = 0; (i < rows) && mx; i++) { mx[i] = malloc(columns*sizeof(float)); if (!mx[i]) { for (j = 0; j < i; j++) free(mx[j]); free(mx); mx = (float **) 0; } } return mx; } /* Allocate integer matrix 'rows' by 'columns'. Return NULL in case of malloc() error and set errno. */ int **imx_alloc(int rows, int columns) { int i; int j; int **mx; mx = malloc(rows*sizeof(int *)); for (i = 0; (i < rows) && mx; i++) { mx[i] = malloc(columns*sizeof(int)); if (!mx[i]) { for (j = 0; j < i; j++) free(mx[j]); free(mx); mx = (int **) 0; } } return mx; } /* Set 'fmx' to 'value'. */ void fmx_set(float **fmx, int rows, int columns, float value) { int i; int j; if (fmx) { for (i = 0; i < rows; i++) for (j = 0; j < columns; j++) fmx[i][j] = value; } } /* Set 'imx' to 'value'. */ void imx_set(int **imx, int rows, int columns, int value) { int i; int j; if (imx) { for (i = 0; i < rows; i++) for (j = 0; j < columns; j++) imx[i][j] = value; } } /* Clone floating point matrix 'fmx', with dimensions'rows' by 'columns'. Return (float **) 0 in case of malloc() error and set errno. */ float **fmx_clone(float **fmx, int rows, int columns) { int i; int j; float **mx; mx = malloc(rows*sizeof(float *)); for (i = 0; (i < rows) && mx; i++) { mx[i] = malloc(columns*sizeof(float)); if (mx[i]) fvec_copy(mx[i], fmx[i], columns); else { for (j = 0; j < i; j++) free(mx[j]); free(mx); mx = (float **) 0; } } return mx; } /* Clone double matrix `dmx', with dimensions `rows' by `columns'. Return (double **) 0 in case of malloc() error and set errno. */ double **dmx_clone(double **matrix, int rows, int columns) { int i; int j; double **rx; rx = malloc(rows*sizeof(double *)); for (i = 0; (i < rows) && rx; i++) { rx[i] = malloc(columns*sizeof(double)); if (rx[i]) dvec_copy(rx[i], matrix[i], columns); else { for (j = 0; j < i; j++) free(rx[j]); free(rx); rx = (double **) 0; } } return rx; } /* Return normalized version of 'fmx'. The normalization is: reduce to zero mean and divide each column by its' standard deviation. Return (float **) 0 in case of malloc() error and set errno. */ float **fmx_normalize(float **fmx, int rows, int columns) { float **mx; mx = (float **) 0; if (fmx && (rows > 0) && (columns > 0)) { mx = fmx_clone(fmx, rows, columns); fmx_norm(mx, rows, columns); } return mx; } /* Normalize matrix 'fmx'. The normalization is: reduce to zero mean and divide each column by its' standard deviation. If the standard deviation is zero, just shift the columns. */ void fmx_norm(float **fmx, int rows, int columns) { int i; int j; float xmean; double s; if (fmx && (rows > 0) && (columns > 0)) { for (i = 0; i < columns; i++) { s = 0.0; for (j = 0; j < rows; j++) s += fmx[j][i]; xmean = s/rows; for (j = 0; j < rows; j++) fmx[j][i] -= xmean; if (rows > 1) { s = 0.0; for (j = 0; j < rows; j++) s += fmx[j][i]*fmx[j][i]; if (s > 0.0) { s = sqrt(s/(rows-1)); for (j = 0; j < rows; j++) fmx[j][i] /= s; } } } } } /* Normalize matrix 'fmx' using precomputed 'xmean', 'std'. The normalization is: for each column, subtract 'xmean' and, if the column 'std' is non-zero, divide by 'std'. */ void fmx_prenorm(float **fmx, int rows, int columns, float *xmean, float *std) { int i; int j; float f; if (fmx && (rows > 0) && (columns > 0)) for (i = 0; i < columns; i++) for (j = 0; j < rows; j++) { f = fmx[j][i]; if (xmean && std) { if (std[i] != 0) fmx[j][i] = (f-xmean[i])/std[i]; else fmx[j][i] = f-xmean[i]; } else if (xmean) fmx[j][i] = f-xmean[i]; else if (std && (std[i] != 0)) fmx[j][i] = f/std[i]; } } /* Return mean values of columns in 'fmx'. */ float *fmx_mean(float **fmx, int rows, int columns) { int i; int j; double s; float *xmean; xmean = (float *) 0; if (fmx && (rows > 0) && (columns > 0)) { xmean = malloc(columns*sizeof(float)); if (xmean) { if (rows > 1) { for (i = 0; i < columns; i++) { s = 0.0; for (j = 0; j < rows; j++) s += fmx[j][i]; xmean[i] = s/rows; } } else { for (i = 0; i < columns; i++) xmean[i] = fmx[0][i]; } } } return xmean; } /* Return standard deviations of columns in 'fmx'. Return (float **) 0 in case of malloc() error and set errno. */ float *fmx_std(float **fmx, int rows, int columns) { int i; int j; float xmean; double s; float *std; std = (float *) 0; if (fmx && (rows > 0) && (columns > 0)) { std = malloc(columns*sizeof(float)); if (rows > 1) { for (i = 0; i < columns; i++) { s = 0.0; for (j = 0; j < rows; j++) s += fmx[j][i]; xmean = s/rows; s = 0.0; for (j = 0; j < rows; j++) s += (fmx[j][i]-xmean)*(fmx[j][i]-xmean); std[i] = sqrt(s/(rows-1)); } } else fvec_set(std, columns, 0.0); } return std; } /* Allocate double floating point matrix 'rows' by 'columns'. Return (double **) 0 in case of malloc() error and set errno. */ double **dmx_alloc(int rows, int columns) { int i; int j; double **mx; mx = malloc(rows*sizeof(double *)); for (i = 0; (i < rows) && (mx != (double **) 0); i++) { mx[i] = malloc(columns*sizeof(double)); if (mx[i] == (double *) 0) { for (j = 0; j < i; j++) free(mx[j]); free(mx); mx = (double **) 0; } } return mx; } /* Allocate string matrix 'rows' by 'columns'. Each element of the matrix is (char *) and is initialized to (char *) 0. Return (char ***) 0 in case of malloc() error and set errno. */ char ***cmx_alloc(int rows, int columns) { int i; char ***mx; mx = malloc(rows*sizeof(char **)); for (i = 0; (i < rows) && (mx != (char ***) 0); i++) { mx[i] = malloc(columns*sizeof(char **)); if (mx[i] == (char **) 0) mx = (char ***) mx_free((void **) mx, i); } return mx; } /* Free matrix 'mx' with 'rows' rows. Return (void **) 0. This function ignores all free() errors. */ void **mx_free(void **mx, int rows) { int j; int errno_save; if (mx && (rows >= 0)) { errno_save = errno; for (j = 0; j < rows; j++) free(mx[j]); free(mx); errno = errno_save; } return (void **) 0; } /* Free 'vector'. Return (void *) 0. This function ignores all free() errors. */ void *vx_free(void *vector) { int errno_save; errno_save = errno; free(vector); errno = errno_save; return (void *) 0; } /* Initial memory allocation for file_info(). */ #define INIT_CAPACITY 100 /* Return number of lines in 'fname' and optionally additional file information. If 'llen' is not NULL, store the length of the longest line in *llen. If 'ntok' is not NULL, store number of tokens in each line in *ntok, and change semantics of return value: ignore blank (whitespace-only) lines. If lines have varying number of tokens, *ntok is set to -1. This behavior is consistent with MATLAB load() function. Tokens are assumed to be separated by a single 'delimiter' character. If 'delimiter' is null character ('\0'), the tokens are assumed to be separated by a string of WHITESPACE characters. The number of lines and max. line length are consistent with values returned by UNIX command 'wc' - in particular, llen does not count the newline character. Return -1 in case of failure and set errno. */ int file_info(char *fname, int *llen, int *ntok, char delimiter) { int lc; int i; int size; int fd; int status; int first_line; int maxlen; int capacity; int nctr; int cctr; int line_flag; /* 0 indicates empty line */ int linlen; int len; int errno_save; char *fstr; char *xtr; /* current pointer in the file */ char *lstr; /* pointer to beginning of line */ char *line; char *dstring; char **tokens; struct stat buf; cctr = 0; errno_save = errno; if (delimiter == '\0') dstring = strdup(WHITESPACE); else { dstring = malloc(2); dstring[0] = delimiter; dstring[1] = '\0'; } lc = 0; nctr = 0; capacity = INIT_CAPACITY; maxlen = 0; if (fname && *fname) { status = stat(fname, &buf); if (status == 0) { size = buf.st_size; if (size > 0) { fd = open(fname, O_RDONLY); if (fd != -1) { fstr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); if (fstr == MAP_FAILED) { lc = -1; errno_save = errno; } else { xtr = fstr; lstr = fstr; first_line = 1; line = malloc((capacity+1)*sizeof(char)); if (!line) { lc = -1; errno_save = errno; } for (i = 0; (i < size) && (lc != -1); i++) { if ((*xtr == '\n') || /* UNIX files */ ((*xtr == '\r') && (*(xtr+1) == '\n'))) /* DOS files */ { line_flag = 1; len = xtr-lstr; if (len > maxlen) maxlen = len; if (len > capacity) { while (capacity <= len) capacity += capacity; line = realloc(line, (capacity+1)*sizeof(char)); if (!line) { lc = -1; errno_save = errno; } } if (ntok && (lc != -1)) { memcpy(line, lstr, len); line[len] = '\0'; linlen = strlen(line); if ((linlen > 0) && (linlen != strspn(line, dstring))) { tokens = str_tokenize(line, dstring); if (tokens) { nctr = str_length(tokens); str_free(tokens); if (first_line == 1) cctr = nctr; else if (nctr != cctr) { nctr = -1; cctr = -1; } } else { lc = -1; errno_save = errno; } first_line = 0; } else line_flag = 0; } if (lc != -1) { if (line_flag) lc++; lstr = xtr; lstr++; } } if ((*xtr == '\r') && (*(xtr+1) == '\n')) { xtr++; i++; } xtr++; } free(line); status = munmap(fstr, size); if (status == -1) { lc = -1; errno_save = errno; } } status = close(fd); if (status == -1) lc = -1; } else lc = -1; } else { lc = 0; if (llen) *llen = 0; if (ntok) *ntok = 0; } } else { lc = -1; errno_save = errno; } } else lc = -1; if (lc == -1) errno = errno_save; else { if (llen) *llen = maxlen; if (ntok) *ntok = nctr; } free(dstring); return lc; } /* Return max. length of a filename in the current filesystem (the current filesystem is the filesystem in which the current directory resides). In case of failure, return -1 and set errno. */ long get_namelen(void) { long length; length = pathconf(".", _PC_NAME_MAX); return length; } /* Save 'matrix' ('transpose' == 0) or transpose of 'matrix' ('transpose' == 1) in 'fname'. Return -1 in case of error and set errno. */ int fmx_save(float **matrix, int rows, int columns, char *fname, int transpose) { int status = 0; FILE *fptr; if ((rows > 0) && (columns > 0)) { fptr = fopen(fname, "w"); if (fptr) { status = fmx_write(fptr, matrix, rows, columns, transpose); if (!status) status = fclose(fptr); } else status = -1; } return status; } /* Write 'matrix' ('transpose' == 0) or transpose of 'matrix' ('transpose' == 1) to 'fptr'. In case of success, return 0, otherwise return -1 and set errno. */ int fmx_write(FILE *fptr, float **matrix, int rows, int columns, int transpose) { int i; int j; int status = 0; if ((rows > 0) && (columns > 0) && fptr) { if (transpose == 1) { for (i = 0; (i < columns) && (status >= 0); i++) for (j = 0; (j < rows) && (status >= 0); j++) { if (j == rows-1) status = fprintf(fptr, "%g\n", matrix[j][i]); else status = fprintf(fptr, "%g\t", matrix[j][i]); } } else { for (i = 0; (i < rows) && (status >= 0); i++) for (j = 0; (j < columns) && (status >= 0); j++) { if (j == columns-1) status = fprintf(fptr, "%g\n", matrix[i][j]); else status = fprintf(fptr, "%g\t", matrix[i][j]); } } } if (status < 0) status = -1; else status = 0; return status; } /* Write 'matrix' to 'fptr'. Store 'xnames[i]', if not NULL, in the first column of i-th row. In case of success, return 0, otherwise return -1 and set errno. */ int fmx_nwrite(FILE *fptr, float **matrix, char **xnames, int rows, int columns) { int i; int j; int status = 0; if ((rows > 0) && (columns > 0) && fptr) { for (i = 0; (i < rows) && (status >= 0); i++) { if (xnames) status = fprintf(fptr, "%s\t", xnames[i]); for (j = 0; (j < columns) && (status >= 0); j++) { if (j == columns-1) status = fprintf(fptr, "%g\n", matrix[i][j]); else status = fprintf(fptr, "%g\t", matrix[i][j]); } } } if (status < 0) status = -1; else status = 0; return status; } /* Return basic name (i.e., basename without suffix). For example, for path '/dir/fname.dat' bname() returns string 'fname'. The function allocates memory for the returned string, and it is the caller's responsibility to free() it. In case of error, return (char *) 0 and set errno. */ char *bname(char *path) { int len; char *str; char *name = (char *) 0; if (path && *path) { if ((str = strrchr(path, '/')) != (char *) 0) str++; else str = path; len = strcspn(str, "."); name = malloc((len+1)*sizeof(char)); if (name != (char *) 0) { memcpy(name, str, len); name[len] = '\0'; } } return name; } /* Safe version of basename() (i.e., it does not modify the input string). The difference from bname() is that this one includes the extension. In case of error, return (char *) 0 and set errno. */ char *base_name(char *path) { int len; char *str; char *name = (char *) 0; if (path && *path) { if ((str = strrchr(path, '/'))) str++; else str = path; len = strlen(str); name = malloc((len+1)*sizeof(char)); if (name) { memcpy(name, str, len); name[len] = '\0'; } } return name; } /* Return string copy of 'fname'. If the file is zero bytes long, return empty string. Return NULL in case of error and set errno. errno can be stat(), open(), mmap() or malloc() error. */ char *str_file(char *fname) { int lc; int size; int fd; int status; int maxlen; int nctr; char *fstr; char *string = (char *) 0; struct stat buf; lc = 0; nctr = 0; maxlen = 0; if (fname && *fname) { status = stat(fname, &buf); if (status == 0) { size = buf.st_size; if (size > 0) { fd = open(fname, O_RDONLY); if (fd != -1) { fstr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); if (fstr != MAP_FAILED) { string = malloc((size+1)*sizeof(char)); if (string != (char *) 0) { memcpy(string, fstr, size); string[size] = '\0'; } if (munmap(fstr, size)) string = string_free(string); } if (close(fd)) string = string_free(string); } } else string = strdup(""); } } return string; } /* free(str) without changing errno. Return (char *) 0. */ char *string_free(char *str) { int errno_save; errno_save = errno; free(str); errno = errno_save; return (char *) 0; } /* Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1]. Return -1 if 'r' is outside range. */ int floc(float r, float *vector, int len) { int loc; int ileft; int iright; int imiddle; int done = 0; loc = -1; if (vector && len > 0) { ileft = 0; iright = len-1; if ((r < vector[ileft]) || (r > vector[iright])) loc = -1; else { imiddle = len/2; while (done == 0) { if (vector[imiddle] < r) ileft = imiddle; else iright = imiddle; imiddle = ileft+iright; imiddle = imiddle/2; if (iright-ileft <=1) { if (r == vector[ileft]) loc = ileft; else if (r == vector[iright]) loc = iright; else loc = ileft; done = 1; } } } } return loc; } /* Return index 'j' within 'vector' such that vector[j] <= r < vector[j+1]. Return -1 if 'r' is outside range. */ int dloc(double r, double *vector, int len) { int loc; int ileft; int iright; int imiddle; int done = 0; loc = -1; if (vector && len > 0) { ileft = 0; iright = len-1; if ((r < vector[ileft]) || (r > vector[iright])) loc = -1; else { imiddle = len/2; while (done == 0) { if (vector[imiddle] < r) ileft = imiddle; else iright = imiddle; imiddle = ileft+iright; imiddle = imiddle/2; if (iright-ileft <=1) { if (r == vector[ileft]) loc = ileft; else if (r == vector[iright]) loc = iright; else loc = ileft; done = 1; } } } } return loc; } /* Return clone of NULL-terminated 'str'. */ char **string_clone(char **str) { int len; char **ptr; char **string = (char **) 0; len = 0; if (str) { ptr = str; while (ptr) { ptr++; len++; } string = string_copy(str, len); } return string; } /* Return clone of array of strings of length 'length'. */ char **string_copy(char **str, int length) { int i; int j; char **string = (char **) 0; if (str) { string = malloc(length*sizeof(char *)); for (i = 0; (i < length) && string; i++) { string[i] = strdup(str[i]); if (!string[i]) { for (j = 0; j < i; j++) free(string[j]); free(string); string = (char **) 0; } } } return string; } /* Like unlink(), but preserves errno. */ void remove_file(char *fname) { int errno_save; errno_save = errno; if (fname && *fname) unlink(fname); errno = errno_save; } /* Like fclose(), preserving errno. */ void fptr_close(FILE *fptr) { int errno_save; errno_save = errno; if (fptr) fclose(fptr); errno = errno_save; } /* Reverse elements in 'vec'. */ void ivec_reverse(int *vec, int len) { int i; int tmp; if (vec && (len > 1)) { for (i = 0; i < len/2; i++) { tmp = vec[len-1-i]; vec[len-1-i] = vec[i]; vec[i] = tmp; } } } /* Reverse elements in 'vec'. */ void fvec_reverse(float *vec, int len) { int i; float tmp; if (vec && (len > 1)) { for (i = 0; i < len/2; i++) { tmp = vec[len-1-i]; vec[len-1-i] = vec[i]; vec[i] = tmp; } } } /* Calculate standard deviation of integer vector 'vec' of length 'len'. */ float ivec_mean(int *vec, int len) { int i; float mean; double dsum; mean = 0.0; dsum = 0.0; if (vec && len > 0) { for (i = 0; i < len; i++) dsum += vec[i]; mean = dsum/len; } return mean; } /* Calculate standard deviation of integer vector 'vec' of length 'len'. */ float ivec_sd(int *vec, int len) { int i; float mean; double diff; double dsum; float sd; sd = 0.0; dsum = 0.0; if (vec && (len > 1)) { mean = ivec_mean(vec, len); for (i = 0; i < len; i++) { diff = vec[i]-mean; dsum += diff*diff; } sd = sqrt(dsum/(len-1)); } return sd; } /* TBD: the following three functions untested. */ /* Calculate mean of vector 'vec' of length 'len'. */ float fvec_mean(float *vec, int len) { int i; float mean; double dsum; mean = 0.0; dsum = 0.0; if (vec && len > 0) { for (i = 0; i < len; i++) dsum += vec[i]; mean = dsum/len; } return mean; } /* Calculate standard deviation of vector 'vec' of length 'len'. */ float fvec_sd(float *vec, int len) { int i; float mean; double diff; double dsum; float sd; sd = 0.0; dsum = 0.0; if (vec && (len > 1)) { mean = fvec_mean(vec, len); for (i = 0; i < len; i++) { diff = vec[i]-mean; dsum += diff*diff; } sd = sqrt(dsum/(len-1)); } return sd; } /* Calculate covariance between 'len'-dimensional vectors 'vec1' and 'vec2' with mean values 'mean1' and 'mean2'. */ float fvec_cov(float *vec1, float *vec2, int len, float mean1, float mean2) { int i; float cov; double dsum; dsum = 0.0; for (i = 0; i < len; i++) dsum += vec1[i]*vec2[i]; cov = dsum/(len-1)-mean1*len*mean2/(len-1); return cov; } /* Return min(int1, int2). */ int int_min(int int1, int int2) { int imin; if (int1 <= int2) imin = int1; else imin = int2; return imin; } /* Append 'fname' to 'fptr'. Return -1 in case of error and set errno. */ int fcat(FILE *fptr, char *fname) { int llen; int nl; int status = 0; char *line; FILE *fileptr; if (fptr && fname && *fname) { nl = file_info(fname, &llen, (int *) 0, '\0'); if (nl >= 0) { line = malloc((llen+2)*sizeof(char)); if (line != (char *) 0) { fileptr = fopen(fname, "r"); if (fileptr) { while (fgets(line, llen+2, fileptr)) fprintf(fptr, "%s", line); fclose(fileptr); } else status = -1; string_free(line); } else status = -1; } else status = -1; } return status; } /* Load 'nx' vectors from file 'fname' into 'x'. Each vector (line) is assumed to have 'd' elements, unless the file is empty. Matrix 'x' has to be preallocated sufficient space to hold the data. The function understands four formats. All formats contain whitespace-delimited rows of equal number of columns: LAU_FF_RAW raw data, no row or column labels LAU_FF_COL first row has column labels (names) LAU_FF_ROW first column has row labels LAU_FF_COLROW first row has column labels, first column has row labels If format equals LAU_FF_RAW, all file content is loaded into `x'. If format equals LAU_FF_COL, first line is loaded into pre-allocated array `clab', the rest is loaded into `x'. If format equals LAU_FF_ROW, first column in each row is loaded into pre-allocated array `rlab', the rest is loaded into `x'. If format equals LAU_FF_COLROW, first column of first row is loaded into `clab0'. The remaining columns of the first row are loaded into pre-allocated array `clab'. The first column in each row except first is loaded into pre-allocated array `rlab'. The rest of content is loaded into `x'. If all cases, `d' and `nx' refer to the actual numeric (float) data content of the file, not labels. Return -1 in case of error and set 'errc'. The error codes are EINVAL, for invalid arguments, any of the file operations errno codes, LERR_FILE_FORMAT, for unrecognized file format, and LERR_VAR_NCOL, for variable number of columns in `fname'. If `fname' is an empty file, return 0. */ int load_file(char *fname, float **x, int format, char **rlab, char **clab0, char **clab, int d, int nx, int *errc) { int status = 0; int ivec; int ift; int idx; int maxlen; int lcnt; int len; int dnx; int first_line; char *str; char *line; char ftr[31]; /* we assume the longest float won't exceed 30 characters */ char **tokens; FILE *fptr; line = (char *) 0; dnx = 0; fptr = fopen(fname, "r"); *errc = 0; if (fptr) { lcnt = file_info(fname, &maxlen, &dnx, '\0'); if ((lcnt != -1) && (dnx != -1)) { if (format == LAU_FF_ROW) dnx--; else if (format == LAU_FF_COL) lcnt--; else if (format == LAU_FF_COLROW) { dnx--; lcnt--; } } if ((lcnt != -1) && ((dnx == d) || (lcnt == 0))) { line = malloc(maxlen+2); if (line) { ivec = 0; first_line = 1; while (fgets(line, maxlen+2, fptr) && (status == 0)) { /* Get column names from the first line. */ if (((format == LAU_FF_COL) || (format == LAU_FF_COLROW)) && (first_line == 1) && clab) { tokens = str_tokenize(line, WHITESPACE); if (format == LAU_FF_COLROW) { *clab0 = strdup(tokens[0]); for (idx = 1; idx < str_length(tokens); idx++) clab[idx-1] = strdup(tokens[idx]); } else { for (idx = 0; idx < str_length(tokens); idx++) clab[idx] = strdup(tokens[idx]); } str_free(tokens); } else if ((format == LAU_FF_RAW) || (format == LAU_FF_ROW) || (first_line == 0)) { str = line; str += strspn(str, WHITESPACE); ift = 0; idx = 0; while ((str && *str) && (idx < d)) { len = strcspn(str, WHITESPACE); memcpy(ftr, str, len); ftr[len] = '\0'; if ((ift == 0) && ((format == LAU_FF_ROW) || (format == LAU_FF_COLROW)) && rlab) rlab[ivec] = strdup(ftr); else x[ivec][idx++] = atof(ftr); ift++; str += len; str += strspn(str, WHITESPACE); } if (ift > 0) ivec++; if (str && *str) { status = -1; if (errc) *errc = EINVAL; } } first_line = 0; } /* 'nx' vectors requested, 'ivec' found. Report LERR_FILE_FORMAT. */ if (ivec != nx) { status = -1; if (errc) *errc = LERR_FILE_FORMAT; } free(line); } else status = -1; } else { status = -1; if (errc) { if (dnx == -1) *errc = LERR_VAR_NCOL; else if (dnx != d) *errc = LERR_FILE_FORMAT; } } if (!status) status = fclose(fptr); else fclose(fptr); } else status = -1; if (errc && (status == -1) && !*errc) *errc = errno; return status; } /* Return diff between 'vec1' and 'vec2'; i.e., vector of integers present in 'vec1' and not in 'vec2'. Return the length of the diff vector in 'length'. In case of error, return NULL and set errno. NOTE: because the function uses hash program from a library called 'Kazlib', and in which INT_MAX has a special meaning, vec2 may not contain value INT_MAX. */ int *ivec_diff(int *vec1, int len1, int *vec2, int len2, int *length) { int i; int j; int *diff; hash_t *xhash; diff = (int *) 0; xhash = hashCreate(); if (xhash) { diff = malloc(len1*sizeof(int)); if (diff) { for (i = 0; i < len2; i++) hashPutInt(xhash, vec2[i], vec2[i]); j = 0; for (i = 0; i < len1; i++) if (hashGetInt(xhash, vec1[i]) == INT_MAX) diff[j++] = vec1[i]; if (length) *length = j; hashDelete(xhash); } } return diff; } /* Return 0 if `vec1' equals 'vec2', otherwise return -1. */ int ivec_cmp(int *vec1, int *vec2, int len) { int i; int retval; retval = 0; if (vec1 && vec2) { for (i = 0; (i < len) && !retval; i++) { if (vec1[i] != vec2[i]) retval = -1; } } else if (vec1 && !vec2) retval = -1; else if (!vec1 && vec2) retval = -1; return retval; } /* Return version of `str' exactly `len' characters long (plus 1 for null terminator). If `str' is shorter than `len' the returned string is padded with space characters. */ char *lau_str_lim(char *str, int len) { int icntr; char *lim; char *ptr; char *xtr; lim = (char *) 0; if (str) { lim = calloc(len+1, sizeof(char)); if (lim) { icntr = 0; ptr = str; xtr = lim; while ((*ptr) && (icntr < len)) { *xtr = *ptr; icntr++; xtr++; ptr++; } if (icntr < len) memset(xtr, ' ', len-icntr); } } return lim; }