// osbf-util.c - utility for munging css files, version X0.1
// Copyright 2001-2006 William S. Yerazunis, all rights reserved.
//
// This software is licensed to the public under the Free Software
// Foundation's GNU GPL, version 2.0. You may obtain a copy of the
// GPL by visiting the Free Software Foundations web site at
// www.fsf.org . Other licenses may be negotiated; contact the
// author for details.
//
// OBS: This program is a modified version of the original cssutil,
// specific for the new osbf format. It is not compatible with
// the original css format. -- Fidelis Assis
//
// include some standard files
#include "crm114_sysincludes.h"
// include any local crm114 configuration file
#include "crm114_config.h"
// include the crm114 data structures file
#include "crm114_structs.h"
// and include the routine declarations file
#include "crm114.h"
#include "crm114_osbf.h"
char version[] = "1.1";
void
helptext ()
{
fprintf (stdout,
"osbf-util version %s - generic osbf file utility.\n"
"Usage: osbfutil [options]... css-filename\n"
" -b - brief; print only summary\n"
" -h - print this help\n"
" -q - quite mode; no warning messages\n"
" -r - report then exit (no menu)\n"
" -s css-size - if no css file found, create new\n"
" one with this many buckets.\n"
" -S css-size - same as -s, but round up to next\n"
" 2^n + 1 boundary.\n"
" -v - print version and exit\n"
" -D - dump css file to stdout in CSV format.\n"
" -R csv-file - create and restore css from CSV.\n"
" Options -s and -S are ignored when"
" restoring.\n", VERSION);
}
int
main (int argc, char **argv)
{
long i, k; // some random counters, when we need a loop
long v;
long sparse_spectrum_file_length = OSBF_DEFAULT_SPARSE_SPECTRUM_FILE_LENGTH;
long user_set_css_length = 0;
long hfsize;
long long sum; // sum of the hits... can be _big_.
int brief = 0, quiet = 0, dump = 0, restore = 0;
int opt, fields;
int report_only = 0;
long *bcounts;
long maxchain;
long curchain;
long totchain;
long fbuckets;
long nchains;
long ofbins;
char cmdstr[255];
char cssfile[255];
char csvfile[255];
unsigned char cmdchr[2];
char crapchr[2];
float cmdval;
int zloop, cmdloop, version_index;
// the following for crm114.h's happiness
char *newinputbuf;
newinputbuf = (char *) &hfsize;
bcounts = malloc (sizeof (unsigned long) * OSBF_FEATUREBUCKET_VALUE_MAX);
{
struct stat statbuf; // filestat buffer
OSBF_FEATURE_HEADER_STRUCT *header; // the header of the hash file
OSBF_FEATUREBUCKET_STRUCT *hashes; // the text of the hash file
// parse cmdline options
while ((opt = getopt (argc, argv, "bDhR:rqs:S:v")) != -1)
{
switch (opt)
{
case 'b':
brief = 1; // brief, no 'bin value ...' lines
break;
case 'D':
dump = 1; // dump css file, no cmd menu
break;
case 'q':
quiet = 1; // quiet mode, no warning messages
break;
case 'R':
{
FILE *f;
unsigned long key, hash, value;
OSBF_FEATURE_HEADER_STRUCT h;
// count lines to determine the number of buckets and check CSV format
if (user_trace)
fprintf (stderr, "Opening OSBF file %s for read\n", optarg);
if ((f = fopen (optarg, "rb")) != NULL)
{
// try to find the header reading first 2 "buckets"
if (fscanf
(f, "%lu;%lu;%lu\n", (unsigned long *) h.version,
&(h.flags), &(h.buckets_start)) != 3)
{
fprintf (stderr,
"\n %s is not in the right CSV format.\n",
optarg);
exit (EXIT_FAILURE);
}
if (*((unsigned long *) h.version) != OSBF_VERSION)
{
fprintf (stderr,
"\n %s is not an OSBF CSV file.\n", optarg);
fclose (f);
exit (EXIT_FAILURE);
}
if (fscanf (f, "%lu;%lu;%lu\n", &(h.buckets), &hash, &value)
!= 3)
{
fprintf (stderr,
"\n %s is not in the right CSV format.\n",
optarg);
exit (EXIT_FAILURE);
}
// start with -headersize buckets, discounting 2 "buckets" alread read
sparse_spectrum_file_length = 2 - h.buckets_start;
while (!feof (f))
if (fscanf (f, "%lu;%lu;%lu\n", &key, &hash, &value) == 3)
sparse_spectrum_file_length++;
else
{
fprintf (stderr,
"\n %s is not in the right CSV format.\n",
optarg);
exit (EXIT_FAILURE);
}
fclose (f);
// check the number of buckets
if (sparse_spectrum_file_length != h.buckets)
{
fprintf (stderr,
"\n Wrong number of buckets! %s is not in the right CSV format.\n",
optarg);
exit (EXIT_FAILURE);
}
strcpy (csvfile, optarg);
}
else
{
fprintf (stderr,
"\n Couldn't open csv file %s; errno=%d.\n",
optarg, errno);
exit (EXIT_FAILURE);
}
}
restore = 1; // restore css file, no cmd menu
break;
case 'r':
report_only = 1; // print stats only, no cmd menu.
break;
case 's': // set css size to option value
case 'S': // same as above but round up to next 2^n+1
if (restore)
{
fprintf (stderr,
"\nOptions -s, -S ignored when restoring.\n");
break;
}
if (sscanf (optarg, "%ld", &sparse_spectrum_file_length))
{
if (!quiet)
fprintf (stderr,
"\nOverride css creation length to %ld\n",
sparse_spectrum_file_length);
user_set_css_length = 1;
}
else
{
fprintf (stderr,
"On -%c flag: Missing or incomprehensible number of buckets.\n",
opt);
exit (EXIT_FAILURE);
}
if (opt == 'S') // round up to next 2^n+1
{
int k;
k = (long) floor (log10 (sparse_spectrum_file_length - 1)
/ log10 (2.0));
while ((2 << k) + 1 < sparse_spectrum_file_length)
k++;
sparse_spectrum_file_length = (2 << k) + 1;
user_set_css_length = 1;
}
break;
case 'v':
fprintf (stderr, " This is osbf-util, version %s\n", version);
fprintf (stderr, " Copyright 2004-2006 William S. Yerazunis.\n");
fprintf (stderr,
" This software is licensed under the GPL with ABSOLUTELY NO WARRANTY\n");
exit (EXIT_SUCCESS);
default:
helptext ();
exit (EXIT_SUCCESS);
break;
}
}
if (optind < argc)
strncpy (cssfile, argv[optind], sizeof (cssfile));
else
{
helptext ();
exit (EXIT_SUCCESS);
}
// and stat it to get it's length
k = stat (cssfile, &statbuf);
// quick check- does the file even exist?
if (k == 0)
{
if (restore)
{
fprintf (stderr,
"\n.CSS file %s exists! Restore operation aborted.\n",
cssfile);
exit (EXIT_FAILURE);
}
hfsize = statbuf.st_size;
if (!quiet && user_set_css_length)
fprintf (stderr,
"\n.CSS file %s exists; -s, -S options ignored.\n",
cssfile);
}
else
{
// file didn't exist... create it
if (!quiet && !restore)
fprintf (stdout, "\nHad to create .CSS file %s with %lu buckets\n",
cssfile, sparse_spectrum_file_length);
if (crm_osbf_create_cssfile
(cssfile, sparse_spectrum_file_length, OSBF_VERSION, 0,
OSBF_CSS_SPECTRA_START) != EXIT_SUCCESS)
exit (EXIT_FAILURE);
k = stat (cssfile, &statbuf);
hfsize = statbuf.st_size;
}
//
// mmap the hash file into memory so we can bitwhack it
header = crm_mmap_file ( cssfile,
0, hfsize,
PROT_READ | PROT_WRITE,
MAP_SHARED,
NULL);
if (header == MAP_FAILED)
{
fprintf (stderr,
"\n Couldn't mmap file %s into memory; errno=%d .\n",
cssfile, errno);
exit (EXIT_FAILURE);
}
if (*((unsigned long *) (header->version)) != OSBF_VERSION)
{
fprintf (stderr,
"\n %s is the wrong version. We're expecting a %s css file.\n",
cssfile, CSS_version_name[OSBF_VERSION]);
crm_munmap_file ((void *) header);
exit (EXIT_FAILURE);
}
hashes = (OSBF_FEATUREBUCKET_STRUCT *) header + header->buckets_start;
if (hashes == MAP_FAILED)
{
fprintf (stderr,
"\n Couldn't open RW file %s; errno=%d .\n", cssfile, errno);
exit (EXIT_FAILURE);
}
// from now on, hfsize is buckets, not bytes.
hfsize = statbuf.st_size / sizeof (OSBF_FEATUREBUCKET_STRUCT);
if (dump)
{
/* dump the css file */
OSBF_FEATUREBUCKET_STRUCT *bucket;
unsigned long *p;
bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
for (i = 0; i < hfsize; i++)
{
p = (unsigned long *) &bucket[i];
printf ("%lu;%lu;%lu\n", p[0], p[1], p[2]);
}
}
if (restore)
{
FILE *f;
OSBF_FEATUREBUCKET_STRUCT *bucket;
unsigned long *p;
// restore the css file - note that if we DIDN'T create
// it already, then this will fail.
//
if ((f = fopen (csvfile, "rb")) == NULL)
{
fprintf (stderr, "\n Couldn't open csv file %s; errno=%d.\n",
csvfile, errno);
exit (EXIT_FAILURE);
}
bucket = (OSBF_FEATUREBUCKET_STRUCT *) header;
for (i = 0; i < hfsize; i++)
{
p = (unsigned long *) &bucket[i];
fscanf (f, "%lu;%lu;%lu\n", &p[0], &p[1], &p[2]);
}
fclose (f);
}
zloop = 1;
while (zloop == 1 && !restore && !dump)
{
zloop = 0;
crm_osbf_packcss (header, 0, header->buckets - 1);
sum = 0;
maxchain = 0;
curchain = 0;
totchain = 0;
fbuckets = 0;
nchains = 0;
ofbins = 0;
for (i = 0; i < header->buckets; i++)
{
sum += GET_BUCKET_VALUE(hashes[i]);
if (GET_BUCKET_VALUE(hashes[i]) != 0)
{
fbuckets++;
curchain++;
if (GET_BUCKET_VALUE(hashes[i]) >= OSBF_FEATUREBUCKET_VALUE_MAX)
ofbins++;
}
else
{
if (curchain > 0)
{
totchain += curchain;
nchains++;
if (curchain > maxchain)
maxchain = curchain;
curchain = 0;
}
}
}
version_index = *((unsigned long *) header->version);
if (version_index < 0 || version_index > UNKNOWN_VERSION)
version_index = UNKNOWN_VERSION;
fprintf (stdout, "\n Sparse spectra file %s statistics: \n", cssfile);
fprintf (stdout, "\n CSS file version : %12s",
CSS_version_name[version_index]);
fprintf (stdout, "\n Header size (bytes) : %12ld",
header->buckets_start * sizeof (OSBF_FEATUREBUCKET_STRUCT));
fprintf (stdout, "\n Bucket size (bytes) : %12d",
sizeof (OSBF_FEATUREBUCKET_STRUCT));
fprintf (stdout, "\n Total available buckets : %12ld",
header->buckets);
fprintf (stdout, "\n Total buckets in use : %12ld",
fbuckets);
fprintf (stdout, "\n Number of trainings : %12lu",
header->learnings);
fprintf (stdout, "\n Total buckets with value >= max : %12ld",
ofbins);
fprintf (stdout, "\n Total hashed datums in file : %12lld", sum);
fprintf (stdout, "\n Average datums per bucket : %12.2f",
(fbuckets > 0) ? (sum * 1.0) / (fbuckets * 1.0) : 0);
fprintf (stdout, "\n Number of chains : %12ld",
nchains);
fprintf (stdout, "\n Maximum length of overflow chain : %12ld",
maxchain);
fprintf (stdout, "\n Average length of overflow chain : %12.2f",
nchains > 0 ? (totchain * 1.0) / (nchains * 1.0) : 0);
fprintf (stdout, "\n Average packing density : %12.2f\n",
(fbuckets * 1.0) / (header->buckets * 1.0));
for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
bcounts[i] = 0;
for (v = 0; v < header->buckets; v++)
{
if (GET_BUCKET_VALUE(hashes[v]) < OSBF_FEATUREBUCKET_VALUE_MAX)
bcounts[GET_BUCKET_VALUE(hashes[v])]++;
}
if (!brief)
for (i = 0; i < OSBF_FEATUREBUCKET_VALUE_MAX; i++)
{
if (bcounts[i] > 0)
{
fprintf (stdout, "\n bin value %8ld found %9ld times",
i, bcounts[i]);
}
}
fprintf (stdout, "\n");
cmdloop = 1;
while (!report_only && cmdloop)
{
// clear command buffer
cmdchr[0] = '\0';
fprintf (stdout, "Options:\n");
fprintf (stdout, " Z n - zero bins at or below a value\n");
fprintf (stdout, " S n - subtract a constant from all bins\n");
fprintf (stdout, " D n - divide all bins by a constant\n");
fprintf (stdout, " R - rescan\n");
fprintf (stdout, " P - pack\n");
fprintf (stdout, " Q - quit\n");
fprintf (stdout, ">>> ");
clearerr (stdin);
fscanf (stdin, "%[^\n]", cmdstr);
fscanf (stdin, "%c", crapchr);
fields = sscanf (cmdstr, "%s %f", cmdchr, &cmdval);
if (strlen ( (char *)cmdchr) != 1)
{
fprintf (stdout, "Unknown command: %s\n", cmdchr);
continue;
}
switch (tolower ((int)cmdchr[0]))
{
case 'z':
if (fields != 2)
fprintf (stdout,
"Z command requires a numeric argument!\n");
else
{
fprintf (stdout, "Working...");
for (i = 0; i < header->buckets; i++)
if (GET_BUCKET_VALUE(hashes[i]) <= cmdval)
BUCKET_RAW_VALUE(hashes[i]) = 0;
fprintf (stdout, "done.\n");
}
break;
case 's':
if (fields != 2)
fprintf (stdout,
"S command requires a numeric argument!\n");
else
{
fprintf (stdout, "Working...");
for (i = 0; i < header->buckets; i++)
{
if (GET_BUCKET_VALUE(hashes[i]) > (int) cmdval)
{
BUCKET_RAW_VALUE(hashes[i]) =
GET_BUCKET_VALUE(hashes[i]) - cmdval;
}
else
{
BUCKET_RAW_VALUE(hashes[i]) = 0;
}
}
fprintf (stdout, "done.\n");
}
break;
case 'd':
if (fields != 2)
fprintf (stdout,
"D command requires a numeric argument!\n");
else if (cmdval == 0)
fprintf (stdout, "You can't divide by zero, nimrod!\n");
else
{
fprintf (stdout, "Working...");
for (i = 0; i < header->buckets; i++)
BUCKET_RAW_VALUE(hashes[i]) =
GET_BUCKET_VALUE(hashes[i]) / cmdval;
fprintf (stdout, "done.\n");
}
break;
case 'r':
zloop = 1;
cmdloop = 0;
break;
case 'p':
fprintf (stdout, "Working...");
crm_osbf_packcss (header, 0, header->buckets - 1);
zloop = 1;
cmdloop = 0;
break;
case 'q':
fprintf (stdout, "Bye! \n");
cmdloop = 0;
break;
default:
fprintf (stdout, "Unknown command: %c\n", cmdchr[0]);
break;
}
}
}
}
return 0;
}
syntax highlighted by Code2HTML, v. 0.9.1