/* File name: lin.h Created by: Ljubomir Buturovic Created: 08/04/2004 Purpose: declarations for common linear classifer functions. Some of the code also used for quadratic classifier. */ /* Copyright 2004 Ljubomir J. Buturovic Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* File types of linear and quadratic classifier models. MODEL_SINGLE file contains single model, MODEL_MULTI has multiple models. See function detect_type() for more details. */ #define MODEL_SINGLE 1 #define MODEL_MULTI 2 /* Heuristic method to detect the type of a classifier stored in 'fname'. The file types are MODEL_SINGLE and MODEL_MULTI. A MODEL_SINGLE file contains a single classifier and looks like this: 0.0836344 0.253315 -0.219745 -0.0778078 0.00201759 0.236757 -0.330904 0.121814 -0.452632 0.0100153 -0.173435 0.166277 0.0747342 0.484082 -0.00424457 A MODEL_MULTI file contains a committee of classifiers and looks like this: 10 1 1 -23.683 24.7274 -40.1631 10 1 1 13.7798 18.8408 -22.4262 10 1 1 28.6913 22.7929 -51.1134 10 2 1 -28.2378 33.2956 -50.2484 10 2 1 17.2896 27.7555 -30.9856 10 2 1 36.7149 32.92 -67.6445 10 3 1 -20.7337 28.2182 -41.2056 10 3 1 14.6608 24.2857 -26.3482 10 3 1 30.7893 30.0967 -59.885 10 4 1 -26.5747 32.7901 -50.5292 10 4 1 14.4997 26.1508 -28.0794 10 4 1 32.2912 30.2114 -61.0424 ... The first column is total number of models in the file. The second column is model index, the third is model weight, followed by the model coefficients. The algorithm for heuristic detection of the model type goes through the file and reads the first two columns. If neither has a dot character (meaning that they are most likely integers), and the first column is constant, and the second column has a range of 1 through the value in the first column, the file is considered to be MODEL_MULTI. Otherwise it is MODEL_SINGLE. This is not 100% foolproof, but _very_ unlikely to fail. The function returns the model type and sets the number of models in 'nmd' (obviously, if the type is MODEL_SINGLE, 'nmd' is set to 1). In case of file or malloc() error, return -1 and set errno. */ int detect_model(char *fname, int *nmd); /* Return predicted classification of 'vector' by linear classifer stored in 'model'. The model is a 'c' (number of classes) by 'ad' matrix. It is assumed that the length of 'vector' is ad-1, and that the last column of 'model' contains the bias term. Return -1 in case of bad arguments. Otherwise, the result is in the [0..c-1] interval. */ int lin_predict(float **model, int c, int ad, float *vector); /* Calculate 'model' output for 'vector'. The model is a 'c' (number of classes) by 'ad' matrix. It is assumed that vector is ad-1 long, and that the last column of 'model' contains the bias term. The output is a 'c' long vector. The function allocates space for the output vector, and it is the caller's responsibility to free() it. Return NULL in case of memory error or bad arguments. In case of memory error, set errno. */ float *lin_output(float **model, int c, int ad, float *vector); /* Load linear classifiers from 'fname'. The function automatically recognizes two types of model files: single model (MODEL_SINGLE) or multiple model (MODEL_MULTI). Single model file contains one linear classifier, while multiple model contains multiple classifiers. Single model file has following format: c rows by d+1 columns (d features plus one bias term), where c is the number of classes and d the number of features. Multiple model file has number of models in the first column, followed by the model ID and model weight. The rest of the information is the same as in single model file. Note that in both cases, 'rows' and 'columns' contains information for one model. The function sets the model file type in 'type'. It returns an array 'models', where models[i] is the i-th model. Each model is a 'rows' by 'columns' floating point matrix. For multiple model file, the number of models is returnes in 'nmodels', and model weights are returned in 'weights'. In case of failure, return NULL and set 'errc'. Possible errors are memory allocation and file access errors. */ float ***lin_load_models(char *fname, int *type, int *nmodels, float **weights, int *rows, int *columns, int *errc); /* Write linear classifier stored in 'model' in 'fptr'. The first column is number of models 'nmodels', followed by the model index 'mdx' and model weight 'weight'. */ void lin_write(FILE *fptr, int nmodels, float **model, int rows, int columns, int mdx, float weight);