// Copyright (C) 2001 Jean-Marc Valin
#include "FeatureMap.h"
#include <string>
#include "ObjectParser.h"
#include "misc.h"
#include <algorithm>
#ifdef HAVE_VALUES_H
#include <values.h>
#endif
#ifdef HAVE_FLOAT_H
#include <float.h>
#endif
using namespace std;
namespace FD {
DECLARE_TYPE(FeatureMap)
//@implements FeatureMap
void FeatureMap::recursiveSplit (const vector<float *> &inData, const vector<float *> &outData, int level)
{
if (level <= 0)
//if (data.size() < 50)
{
DYN_VEC(float, outDimension, mean);
//float mean[outDimension];
for (int i=0;i<outDimension;i++)
mean[i]=0;
//mapData[i]=0;
for (int k=0;k<outData.size();k++)
for (int i=0;i<outDimension;i++)
mean[i]+=outData[k][i];
//mapData[i]+=outData[k][i];
if (outData.size() != 0)
{
float sum_dist=0;
for (int i=0;i<outDimension;i++)
mean[i]/=outData.size();
//mapData[i]/=outData.size();
float mindist = FLT_MAX;
int closest = 0;
for (int k=0;k<outData.size();k++)
{
float dist=0;
for (int i=0;i<outDimension;i++)
dist += (outData[k][i]-mean[i]) * (outData[k][i]-mean[i]);
if (dist < mindist)
{
closest=k;
mindist=dist;
}
sum_dist+=dist;
}
cout << "average dist = " << sum_dist/outData.size()/outDimension << endl;
//cerr << "closest = " << closest << "\t dist = " << mindist << endl;
for (int i=0;i<outDimension;i++)
mapData[i] = outData[closest][i];
} else {
for (int i=0;i<outDimension;i++)
mapData[i] = 0;
}
cout << "LEAF: " << inData.size() << endl;
return;
}
int dim;
float thresh;
split(inData, outData, dim, thresh);
vector<float *> firstInData;
vector<float *> secondInData;
vector<float *> firstOutData;
vector<float *> secondOutData;
for (int i=0;i<inData.size();i++)
{
bool less=false;
if (inData[i][dim] < thresh)
less=true;
if (inData[i][dim] == thresh)
less=rand()&1;
if (less)
{
firstInData.insert(firstInData.end(), inData[i]);
firstOutData.insert(firstOutData.end(), outData[i]);
} else {
if (inData[i][dim] == thresh)
cout << "equal: " << dim << " " << thresh << endl;
secondInData.insert(secondInData.end(), inData[i]);
secondOutData.insert(secondOutData.end(), outData[i]);
}
}
splitDimension = dim;
threshold = thresh;
//cout << dimension << endl;
first = new FeatureMap (inDimension, outDimension);
second = new FeatureMap (inDimension, outDimension);
terminal = false;
first->recursiveSplit(firstInData, firstOutData, level-1);
second->recursiveSplit(secondInData, secondOutData, level-1);
}
void FeatureMap::split(const vector<float *> &inData, const vector<float *> &outData, int &bestDim, float &bestThreshold)
{
bestDim=0;
int nbEqual=0;
bestThreshold=0;
float bestMutual = -FLT_MAX;
for (int i=0;i<inDimension;i++)
{
float threshold;
float currentMutual;
findThreshold(inData, outData, i, threshold, currentMutual);
//cerr << "threshold: " << threshold << " currentMutual: " << currentMutual << endl;
bool isBest = false;
if (currentMutual > bestMutual)
{
isBest=true;
nbEqual=0;
}
if (currentMutual == bestMutual)
{
nbEqual++;
if (rand()%nbEqual==0)
isBest=true;
}
if (isBest)
{
bestMutual=currentMutual;
bestDim=i;
bestThreshold=threshold;
}
}
cerr << "bestDim: " << bestDim << " bestThreshold: " << bestThreshold << endl;
//if (some condition on bestMutual) don't perform the split
//splitWithThreshold(data, bestDim, bestThreshold);
}
static int float_less(const void *a, const void *b)
{
return *((float *)a) < *((float *)b);
}
//find threshold using split at median and mutual information
void FeatureMap::findThreshold(const vector<float *> &inData, const vector<float *> &outData, int dim, float &thresh, float &score)
{
float sum = 0;
int i,k;
if (inData.size()==0) thresh=0;
else {
//FIXME: Allocation for sorted should be done faster, but with risk of
//smashing the stack
//float sorted[inData.size()];
float *sorted = new float [inData.size()];
for (i=0;i<inData.size();i++)
sorted[i] = inData[i][dim];
//qsort(sorted,data.size(),sizeof(float), float_less);
sort (sorted,sorted+inData.size());
thresh=sorted[inData.size()/2];
delete [] sorted;
}
//float sumA[outDimension];
//float sumB[outDimension];
//float s2A[outDimension];
//float s2B[outDimension];
DYN_VEC(float, outDimension, sumA);
DYN_VEC(float, outDimension, sumB);
DYN_VEC(float, outDimension, s2A);
DYN_VEC(float, outDimension, s2B);
for (i=0;i<outDimension;i++)
{
sumA[i]=sumB[i]=s2A[i]=s2B[i]=0;
}
for (k=0;k<inData.size();k++)
{
bool greater=false;
if (inData[k][dim] > thresh)
greater=true;
if (inData[k][dim] == thresh)
greater=rand()&1;
if (greater)
{
for (i=0;i<outDimension;i++)
{
sumA[i]+=outData[k][i];
s2A[i]+=outData[k][i]*outData[k][i];
}
} else {
for (i=0;i<outDimension;i++)
{
sumB[i]+=outData[k][i];
s2B[i]+=outData[k][i]*outData[k][i];
}
}
}
score = 0;
for (i=0;i<outDimension;i++)
{
score += s2A[i] - (sumA[i]*sumA[i]/inData.size());
score += s2B[i] - (sumB[i]*sumB[i]/inData.size());
}
score = -score;
}
int FeatureMap::setNumbering(int start)
{
if (terminal)
{
cellID=start;
//cerr << start << endl;
return start+1;
} else {
return second->setNumbering(first->setNumbering(start));
}
}
void FeatureMap::calc(const float *in, float *out)
{
if (terminal)
{
//cerr << "found!\n";
//cerr << mapData.size() << endl;
for (int i=0;i<outDimension;i++)
out[i]=mapData[i];
return;
}
//cerr << "+";
if (in[splitDimension] < threshold)
first->calc(in, out);
else
second->calc(in, out);
}
/*
int FeatureMap::belongs(float *vect) const
{
if (terminal) return cellID;
if (vect[splitDimension] < threshold)
return first->belongs(vect);
else
return second->belongs(vect);
}
void FeatureMap::calcTemplate (const vector<float *> &features, vector<int> &templ) const
{
for (vector<float *>::const_iterator feature = features.begin();
feature < features.end(); feature++)
{
//cerr << "(" << (*feature)[0] << "," << (*feature)[1] << "): " << belongs(*feature) << endl;
templ[belongs(*feature)]++;
}
}
*/
void FeatureMap::printOn(ostream &out) const
{
out << "<FeatureMap " << endl;
out << "<inDimension " << inDimension << ">" << endl;
out << "<outDimension " << outDimension << ">" << endl;
//out << "<numberClasses " << numberClasses << ">" << endl;
out << "<terminal " << terminal << ">" << endl;
if (terminal)
{
out << "<cellID " << cellID << ">" << endl;
out << "<mapData " << mapData << ">" << endl;
} else {
out << "<threshold " << threshold << ">" << endl;
out << "<splitDimension " << splitDimension << ">" << endl;
out << "<first " << *first << ">" << endl;;
out << "<second " << *second << ">" << endl;;
}
out << ">\n";
}
ostream &operator << (ostream &out, const FeatureMap &cell)
{
cell.printOn(out);
return out;
}
void FeatureMap::readFrom (istream &in)
{
string tag;
while (1)
{
char ch;
in >> ch;
if (ch == '>') break;
in >> tag;
if (tag == "inDimension")
in >> inDimension;
else if (tag == "outDimension")
in >> outDimension;
//else if (tag == "numberClasses")
// in >> numberClasses;
else if (tag == "terminal")
in >> terminal;
else if (tag == "cellID")
in >> cellID;
else if (tag == "threshold")
in >> threshold;
else if (tag == "mapData")
in >> mapData;
else if (tag == "splitDimension")
in >> splitDimension;
else if (tag == "first")
{
FeatureMap *tmp = new FeatureMap;
in >> *tmp;
first = tmp;
}
else if (tag == "second")
{
FeatureMap *tmp = new FeatureMap;
in >> *tmp;
second = tmp;
} else
throw new ParsingException ("FeatureMap::readFrom : unknown argument: " + tag);
if (!in) throw new ParsingException ("FeatureMap::readFrom : Parse error trying to build " + tag);
in >> tag;
if (tag != ">") throw new ParsingException ("FeatureMap::readFrom : Parse error: '>' expected ");
}
}
istream &operator >> (istream &in, FeatureMap &cell)
{
if (!isValidType(in, "FeatureMap"))
return in;
cell.readFrom(in);
return in;
}
}//namespace FD
syntax highlighted by Code2HTML, v. 0.9.1