/* ** Copyright (C) 2004-2007 by Carnegie Mellon University. ** ** @OPENSOURCE_HEADER_START@ ** ** Use of the SILK system and related source code is subject to the terms ** of the following licenses: ** ** GNU Public License (GPL) Rights pursuant to Version 2, June 1991 ** Government Purpose License Rights (GPLR) pursuant to DFARS 252.225-7013 ** ** NO WARRANTY ** ** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER ** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY ** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN ** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY ** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT ** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE, ** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE ** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT, ** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY ** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF ** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES. ** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF ** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON ** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE ** DELIVERABLES UNDER THIS LICENSE. ** ** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie ** Mellon University, its trustees, officers, employees, and agents from ** all claims or demands made against them (and any related losses, ** expenses, or attorney's fees) arising out of, or relating to Licensee's ** and/or its sub licensees' negligent use or willful misuse of or ** negligent conduct or willful misconduct regarding the Software, ** facilities, or other rights or assistance granted by Carnegie Mellon ** University under this License, including, but not limited to, any ** claims of product liability, personal injury, death, damage to ** property, or violation of any laws or regulations. ** ** Carnegie Mellon University Software Engineering Institute authored ** documents are sponsored by the U.S. Department of Defense under ** Contract F19628-00-C-0003. Carnegie Mellon University retains ** copyrights in all material produced under this contract. The U.S. ** Government retains a non-exclusive, royalty-free license to publish or ** reproduce these documents, or allow others to do so, for U.S. ** Government purposes only pursuant to the copyright license under the ** contract clause at 252.227.7013. ** ** @OPENSOURCE_HEADER_END@ */ /* ** rwbagcat reads a binary bag, converts it to text, and outputs it ** to stdout. It can also print various statistics and summary ** information about the bag. It attempts to read the bag(s) from ** stdin or from any arguments. ** */ #include "silk.h" RCSIDENT("$SiLK: rwbagcat.c 8269 2007-08-03 18:54:48Z mthomas $"); #include "bagtree.h" #include "iptree.h" #include "utils.h" #include "skprintnets.h" #include "skstream.h" /* LOCAL DEFINES AND TYPEDEFS */ /* where to write --help output */ #define USAGE_FH stdout /* width of count fields in columnar output */ #define COUNT_WIDTH 20 /* return TRUE if a record's 'key' and 'counter' values are within the * global limits */ #define IS_RECORD_WITHIN_LIMITS(k, c) \ (((k) >= minkey) && ((k) <= maxkey) && \ ((c) >= mincounter) && ((c) <= maxcounter)) typedef enum _binScheme { BINSCHEME_NONE=0, BINSCHEME_LINEAR=1, BINSCHEME_BINARY=2, BINSCHEME_DECIMAL=3 } binScheme_en; /* LOCAL FUNCTIONS */ static void appUsageLong(void); /* never returns */ static void appTeardown(void); static void appSetup(int argc, char **argv); /* never returns when error */ static int appOptionsHandler(clientData, int, char *); static int addInput(const char* filename); static void removeInputs(void); static int setOutput(const char* filename, skstream_t **stream_out); static int printStats( skBag_header_t *bag, skstream_t *s_out); /* LOCAL VARIABLES */ /* global I/O state */ static skstream_t **inputs = NULL; /* array of all input files/streams/pipes */ static skstream_t *output; static skstream_t *stats; static skstream_t *treestats; static int f_print_stats = 0; static int f_print_treestats = 0; static int f_print_network = 0; static binScheme_en bin_scheme = BINSCHEME_NONE; static const char *net_structure = NULL; /* delimiter between output columns for hosts/counts */ static char output_delimiter = '|'; /* whether key/counter output is in columns or scrunched together */ static int output_is_columnar = 1; /* whether to print keys as integers or IP addresses */ static int f_print_integer_keys = 0; /* zero pad ips */ static int f_zero_pad_ips = 0; /* print out keys whose counter is zero (provided that minkey and * maxkey are both set */ static int f_print_zero_counts = 0; /* initialize these to their opposites to know when they have been set * by the user */ static skBag_counter_t mincounter = SKBAG_COUNTER_MAX; static skBag_counter_t maxcounter = SKBAG_COUNTER_MIN; static skBag_key_t minkey = SKBAG_KEY_MAX; static skBag_key_t maxkey = SKBAG_KEY_MIN; /* name of program to run to page output */ static char *pager = NULL; /* OPTIONS SETUP */ typedef enum _appOptionsEnum { OPT_NETWORK_STRUCTURE, OPT_BIN_IPS, OPT_STATS, OPT_TREE_STATS, OPT_MINCOUNTER, OPT_MAXCOUNTER, OPT_MINKEY, OPT_MAXKEY, OPT_ZERO_COUNTS, OPT_OUTPUT, OPT_INTEGER_KEYS, OPT_ZERO_PAD_IPS, OPT_NO_COLUMNS, OPT_COLUMN_SEPARATOR, OPT_DELIMITED, OPT_PAGER } appOptionsEnum; static struct option appOptions[] = { {"network-structure", OPTIONAL_ARG, 0, OPT_NETWORK_STRUCTURE}, {"bin-ips", OPTIONAL_ARG, 0, OPT_BIN_IPS}, {"stats", OPTIONAL_ARG, 0, OPT_STATS}, {"tree-stats", OPTIONAL_ARG, 0, OPT_TREE_STATS}, {"mincounter", REQUIRED_ARG, 0, OPT_MINCOUNTER}, {"maxcounter", REQUIRED_ARG, 0, OPT_MAXCOUNTER}, {"minkey", REQUIRED_ARG, 0, OPT_MINKEY}, {"maxkey", REQUIRED_ARG, 0, OPT_MAXKEY}, {"zero-counts", NO_ARG, 0, OPT_ZERO_COUNTS}, {"output", REQUIRED_ARG, 0, OPT_OUTPUT}, {"integer-keys", NO_ARG, 0, OPT_INTEGER_KEYS}, {"zero-pad-ips", NO_ARG, 0, OPT_ZERO_PAD_IPS}, {"no-columns", NO_ARG, 0, OPT_NO_COLUMNS}, {"column-separator", REQUIRED_ARG, 0, OPT_COLUMN_SEPARATOR}, {"delimited", OPTIONAL_ARG, 0, OPT_DELIMITED}, {"pager", REQUIRED_ARG, 0, OPT_PAGER}, {0,0,0,0 } /* sentinel entry */ }; static const char *appHelp[] = { ("Print the network structure around the keys.\n" "\tChoose from TABCXHS for grand total, /8, /16, /24, /27, \n" "\t/32 headers and summary information"), ("Invert the bag and count by distinct volume values.\n" "\tlinear: volume => count(IP)\n" "\tbinary: log2(volume) => count(IP)\n" "\tdecimal: variation on log10(volume) => count(IP)"), ("Print general statistics about the bag to named stream.\n" "\tSend output to optional stream, or to stderr if none given"), ("Print bag performance data to named stream.\n" "\tSend output to optional stream, or to stdout if none given"), ("Output records whose counter is at least VALUE, an integer\n" "\tfrom 1 to 18446744073709551615. Def. Records with non-zero counters"), ("Output records whose counter is not more than VALUE, an\n" "\tinteger from 1 to 18446744073709551615"), ("Output records whose key is at least VALUE, an integer from 0\n" "\tto 4294967295 or an IP address. Def. Records with non-zero counters"), ("Output records whose key is not more than VALUE, an integer\n" "\tor an IP address. Def. Records with non-zero counters"), ("Print keys with a counter of zero. Def. No\n" "\t(requires --minkey and --maxkey)"), "Write output to named stream. Def. stdout", "Print keys as integers. Def. dotted decimal", "Print keys as dotted-decimal with 3 digits per octet", "Disable fixed-width columnar output. Def. Columnar", "Use specified character between columns. Def. '|'", "Shortcut for --no-columns --column-sep=CHAR", "Program to invoke to page output. Def. $SILK_PAGER or $PAGER", (char *) NULL }; /* FUNCTION DEFINITIONS */ /* * appUsageLong(); * * Print complete usage information to USAGE_FH. Pass this * function to skOptionsSetUsageCallback(); optionsParse() will * call this funciton and then exit the program when the --help * option is given. */ static void appUsageLong(void) { #define USAGE_MSG \ ("[SWITCHES] [BAG_FILES]\n" \ "\tPrint binary Bag files as text.\n") FILE *fh = USAGE_FH; skAppStandardUsage(fh, USAGE_MSG, appOptions, appHelp); } /* * appTeardown() * * Teardown all modules, close all files, and tidy up all * application state. * * This function is idempotent. */ static void appTeardown(void) { static int teardownFlag = 0; if (teardownFlag) { return; } teardownFlag = 1; /* local teardown segment */ /* Close open files */ removeInputs(); if (treestats != output) { skStreamDestroy(&treestats); } if (stats != output) { skStreamDestroy(&stats); } skStreamDestroy(&output); treestats = stats = NULL; skAppUnregister(); } /* * appSetup(argc, argv); * * Perform all the setup for this application include setting up * required modules, parsing options, etc. This function should be * passed the same arguments that were passed into main(). * * Returns to the caller if all setup succeeds. If anything fails, * this function will cause the application to exit with a FAILURE * exit status. */ static void appSetup(int argc, char **argv) { int arg_index; int using_pager = 0; /* verify same number of options and help strings */ assert((sizeof(appHelp)/sizeof(char *)) == (sizeof(appOptions)/sizeof(struct option))); /* register the application */ skAppRegister(argv[0]); skOptionsSetUsageCallback(&appUsageLong); /* default output streams */ output = NULL; stats = NULL; treestats = NULL; /* register the options */ if (optionsRegister(appOptions, (optHandler)appOptionsHandler, NULL)) { skAppPrintErr("unable to register options"); exit(EXIT_FAILURE); } /* parse options */ arg_index = optionsParse(argc, argv); assert(arg_index <= argc); if (arg_index < 0) { /* options parsing should print error */ skAppUsage(); /* never returns */ } if (f_print_network == 1 && bin_scheme != BINSCHEME_NONE) { skAppPrintErr("Cannot have both --%s and --%s", appOptions[OPT_NETWORK_STRUCTURE].name, appOptions[OPT_BIN_IPS].name); skAppUsage(); /* never returns */ } /* default to printing network hosts */ if (!f_print_stats && !f_print_treestats && !f_print_network && bin_scheme == BINSCHEME_NONE) { f_print_network = 1; net_structure = "H"; } /* if minkey and maxkey aren't set, error if zero-count printing * requested */ if (f_print_zero_counts && (minkey == SKBAG_KEY_MAX || maxkey == SKBAG_KEY_MIN)) { skAppPrintErr("To use --%s, --%s and --%s must both be specified", appOptions[OPT_ZERO_COUNTS].name, appOptions[OPT_MINKEY].name, appOptions[OPT_MAXKEY].name); skAppUsage(); /* never returns */ } /* set the minima and maxima */ if (mincounter == SKBAG_COUNTER_MAX) { mincounter = SKBAG_COUNTER_MIN; } if (maxcounter == SKBAG_COUNTER_MIN) { maxcounter = SKBAG_COUNTER_MAX; } if (minkey == SKBAG_KEY_MAX) { minkey = SKBAG_KEY_MIN; } if (maxkey == SKBAG_KEY_MIN) { maxkey = SKBAG_KEY_MAX; } /* error if a minimum is greater than a maximum */ if (mincounter > maxcounter) { skAppPrintErr(("Minimum counter greater than maximum: " "%" PRIu64 " > %" PRIu64), mincounter, maxcounter); exit(EXIT_FAILURE); } if (minkey > maxkey) { skAppPrintErr(("Minimum key greater than maximum: " "%" PRIu32 " > %" PRIu32), minkey, maxkey); exit(EXIT_FAILURE); } /* add one for "stdin" and add one for sentinel */ inputs = calloc((2 + argc - arg_index), sizeof(skstream_t*)); if (!inputs) { skAppPrintErr("Out of memory"); exit(EXIT_FAILURE); } if (arg_index < argc) { /* get filenames from command line */ for ( ; arg_index < argc; ++arg_index) { if (addInput(argv[arg_index])) { exit(EXIT_FAILURE); } } } else if ( !FILEIsATty(stdin)) { /* no files on command line, try to read from stdin */ if (addInput("stdin")) { exit(EXIT_FAILURE); } } else { skAppPrintErr("No files given on command line and" " stdin is connected to a terminal"); skAppUsage(); } if (inputs[0] == NULL) { skAppPrintErr("No inputs to process"); exit(EXIT_FAILURE); } /* Set the default output if none was set */ if (output == NULL) { if (setOutput("stdout", &output)) { skAppPrintErr("Unable to print to stdout"); exit(EXIT_FAILURE); } } /* Allow paging of the output, and see if paging is active */ skStreamPageOutput(output, pager); using_pager = (NULL != skStreamGetPager(output)); /* If stats/treestats was requested but its output stream hasn't * been set, set it to stderr/stdout unless stderr/stdout is a * terminal and a pager is being used, in which case use the * pager. */ if (f_print_stats && stats == NULL) { if (using_pager && FILEIsATty(stderr)) { stats = output; } else { if (setOutput("stderr", &stats)) { skAppPrintErr("Unable to print to stderr"); exit(EXIT_FAILURE); } } } if (f_print_treestats && treestats == NULL) { if (using_pager || (0==strcmp("stdout", skStreamGetPathname(output)))){ treestats = output; } else { if (setOutput("stdout", &treestats)) { skAppPrintErr("Unable to print to stdout"); exit(EXIT_FAILURE); } } } if (atexit(appTeardown) < 0) { skAppPrintErr("unable to register appTeardown() with atexit()"); appTeardown(); exit(EXIT_FAILURE); } return; /* OK */ } /* * status = appOptionsHandler(cData, opt_index, opt_arg); * * Called by optionsParse(), this handles a user-specified switch * that the application has registered, typically by setting global * variables. Returns 1 if the switch processing failed or 0 if it * succeeded. Returning a non-zero from from the handler causes * optionsParse() to return a negative value. * * The clientData in 'cData' is typically ignored; 'opt_index' is * the index number that was specified as the last value for each * struct option in appOptions[]; 'opt_arg' is the user's argument * to the switch for options that have a REQUIRED_ARG or an * OPTIONAL_ARG. */ static int appOptionsHandler( clientData UNUSED(cData), int opt_index, char *opt_arg) { uint32_t val32; uint64_t val64; int rv; switch ((appOptionsEnum)opt_index) { case OPT_NETWORK_STRUCTURE: net_structure = opt_arg; f_print_network = 1; break; case OPT_BIN_IPS: if (opt_arg == NULL) { bin_scheme = BINSCHEME_LINEAR; } else { size_t len = strlen(opt_arg); if (len == 0) { skAppPrintErr("The --%s switch requires an argument", appOptions[opt_index].name); return 1; } if (strncmp(opt_arg, "linear", len) == 0) { bin_scheme = BINSCHEME_LINEAR; } else if (strncmp(opt_arg, "binary", len) == 0) { bin_scheme = BINSCHEME_BINARY; } else if (strncmp(opt_arg, "decimal", len) == 0) { bin_scheme = BINSCHEME_DECIMAL; } else { skAppPrintErr("Illegal bin scheme. " "Should be one of: linear, binary, decimal."); return 1; } } break; case OPT_STATS: if (opt_arg != NULL) { if (stats) { skAppPrintErr("The --%s switch was given multiple times", appOptions[opt_index].name); return 1; } if (setOutput(opt_arg, &stats)) { skAppPrintErr("Error with %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } } f_print_stats = 1; break; case OPT_TREE_STATS: if (opt_arg != NULL) { if (treestats) { skAppPrintErr("The --%s switch was given multiple times", appOptions[opt_index].name); return 1; } if (setOutput(opt_arg, &treestats)) { skAppPrintErr("Error with %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } } f_print_treestats = 1; break; case OPT_MINCOUNTER: rv = skStringParseUint64(&val64, opt_arg, 1, 0); if (rv == -11) { skAppPrintErr(("Smallest allowable --%s value is 1.\n" "\tUse --%s to write records whose counters are 0"), appOptions[opt_index].name, appOptions[OPT_ZERO_COUNTS].name); return 1; } if (rv) { skAppPrintErr("Unable to parse %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } mincounter = (skBag_counter_t)val64; break; case OPT_MAXCOUNTER: rv = skStringParseUint64(&val64, opt_arg, 1, 0); if (rv == -11) { skAppPrintErr("Smallest allowable --%s value is 1", appOptions[opt_index].name); return 1; } if (rv) { skAppPrintErr("Unable to parse %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } maxcounter = (skBag_counter_t)val64; break; case OPT_MINKEY: if (skStringParseIP(&val32, opt_arg)) { skAppPrintErr("Unable to parse %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } minkey = (skBag_key_t)val32; break; case OPT_MAXKEY: if (skStringParseIP(&val32, opt_arg)) { skAppPrintErr("Unable to parse %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } maxkey = (skBag_key_t)val32; break; case OPT_OUTPUT: if (output) { skAppPrintErr("The --%s switch was given multiple times", appOptions[opt_index].name); return 1; } if (setOutput(opt_arg, &output)) { skAppPrintErr("Error with %s value '%s'", appOptions[opt_index].name, opt_arg); return 1; } break; case OPT_NO_COLUMNS: output_is_columnar = 0; break; case OPT_COLUMN_SEPARATOR: output_delimiter = opt_arg[0]; break; case OPT_DELIMITED: if (opt_arg) { output_delimiter = opt_arg[0]; } output_is_columnar = 0; break; case OPT_INTEGER_KEYS: f_print_integer_keys = 1; break; case OPT_ZERO_PAD_IPS: f_zero_pad_ips = 1; break; case OPT_ZERO_COUNTS: f_print_zero_counts = 1; break; case OPT_PAGER: pager = opt_arg; break; } return 0; /* OK */ } /* * status = addInput(filename); * * Add 'filename' as an entry to the array of inputs. */ static int addInput(const char *filename) { static int input_count = 0; static int stdin_active = 0; skstream_t *s = NULL; int rv; assert(inputs); if (strcmp(filename, "stdin") == 0) { if (stdin_active == 1) { skAppPrintErr("Can only read from stdin one time"); return -1; } stdin_active = 1; } if ((rv = skStreamCreate(&s, SK_IO_READ, SK_CONTENT_SILK)) || (rv = skStreamBind(s, filename)) || (rv = skStreamOpen(s))) { skStreamPrintLastErr(s, rv, &skAppPrintErr); skStreamDestroy(&s); return -1; } inputs[input_count] = s; ++input_count; return 0; } /* * removeInputs(); * * Destroy all the streams in the global 'inputs' array. */ static void removeInputs(void) { int i; if (inputs == NULL) { return; } for (i = 0; inputs[i]; ++i) { skStreamDestroy(&inputs[i]); } free(inputs); inputs = NULL; } /* * status = setOutput(name, &stream); * * Set stream's output to 'name'. Return 0 on success, -1 * otherwise. */ static int setOutput(const char *filename, skstream_t **stream) { int rv; if (stream == NULL) { skAppPrintErr("Not a valid skstream"); return -1; } if (filename == NULL || filename[0] == '\0') { skAppPrintErr("Empty filename"); return -1; } if ((rv = skStreamCreate(stream, SK_IO_WRITE, SK_CONTENT_TEXT)) || (rv = skStreamBind(*stream, filename)) || (rv = skStreamOpen(*stream))) { skStreamPrintLastErr(*stream, rv, &skAppPrintErr); skStreamDestroy(stream); return -1; } return 0; } static int printInvertedBag(skBag_header_t *bag) { char s_label[64]; skBag_iterator_t *iter; skBag_key_t key; skBag_counter_t counter; if (skBag_allocIterator(bag, &iter) != SKBAG_OK) { return 1; } while (skBag_getNext(iter, &key, &counter) == SKBAG_OK) { switch (bin_scheme) { case BINSCHEME_LINEAR: /* label is just bin number */ snprintf(s_label, sizeof(s_label), "%u", key); break; case BINSCHEME_BINARY: /* label is range of values "2^03 to 2^04-1" */ snprintf(s_label, sizeof(s_label), "2^%02u to 2^%02u-1", key, key + 1); break; case BINSCHEME_DECIMAL: /* label is the median value of possible keys in that bin */ if (key < (skBag_key_t)100) { snprintf(s_label, sizeof(s_label), "%u", key); } else { double min, max, mid; min = ceil(pow(10, (((double) key / 100.0) + 1.0))); max = floor(pow(10, ((((double) key + 1.0) / 100.0) + 1.0))); mid = floor((min + max) / 2.0); snprintf(s_label, sizeof(s_label), "%.0f", mid); } break; case BINSCHEME_NONE: assert(bin_scheme != BINSCHEME_NONE); abort(); } if (output_is_columnar) { skStreamPrint(output, ("%*s%c%*" PRIu64 "%c\n"), COUNT_WIDTH, s_label, output_delimiter, COUNT_WIDTH, counter, output_delimiter); } else { skStreamPrint(output, ("%s%c%" PRIu64 "%c\n"), s_label, output_delimiter, counter,output_delimiter); } } if (skBag_freeIterator(iter) != SKBAG_OK) { return 1; } return 0; } static int bagcatInvertBag(skBag_header_t *bag) { skBag_iterator_t *iter = NULL; skBag_key_t key; skBag_counter_t counter; int rv = 1; /* Create an inverted bag */ skBag_header_t *inverted_bag = NULL; if (skBag_create(&inverted_bag) != SKBAG_OK) { goto END; } if (skBag_allocIterator(bag, &iter) != SKBAG_OK) { goto END; } /* add inverted entry to bag */ while (skBag_getNext(iter, &key, &counter) == SKBAG_OK) { if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) { continue; } switch (bin_scheme) { case BINSCHEME_LINEAR: key = (skBag_key_t)((counter < UINT32_MAX) ? counter : UINT32_MAX); break; case BINSCHEME_BINARY: key = (skBag_key_t)floor(log10((double)counter) / log10((double)2)); break; case BINSCHEME_DECIMAL: if (counter < (skBag_counter_t)100) { key = (skBag_key_t)counter; } else { key = (skBag_key_t)floor((log10((double)counter) - 1.0) * 100.0); } break; case BINSCHEME_NONE: assert(bin_scheme != BINSCHEME_NONE); abort(); } if (skBag_incrCounter(inverted_bag, &key) != SKBAG_OK) { goto END; } } if (printInvertedBag(inverted_bag)) { goto END; } rv = 0; END: if (inverted_bag) { skBag_free(inverted_bag); } if (iter) { skBag_freeIterator(iter); } return rv; } static int printNetwork(skBag_header_t *bag) { skBag_key_t key; skBag_counter_t counter; skBag_iterator_t *iter; skBag_err_t rv; skBag_key_t cur_key; netStruct_t *ns; /* Set up the netStruct */ if (netStructureCreate(&ns, 1)) { skAppPrintErr("Error creating network-structure"); return 1; } netStructureSetCountWidth(ns, COUNT_WIDTH); if (netStructureParse(ns, net_structure)) { return 1; } netStructureSetOutputStream(ns, output); netStructureSetDelimiter(ns, output_delimiter); if ( !output_is_columnar) { netStructureSetNoColumns(ns); } if (f_zero_pad_ips) { netStructureSetIpFormat(ns, SKIP_IPF_ZERO); } else if (f_print_integer_keys) { netStructureSetIpFormat(ns, SKIP_IPF_DEC); } if (f_print_zero_counts) { /* * if we are to print zero counts, then we just loop through * all values, printing them. */ for (cur_key = minkey; cur_key <= maxkey; ++cur_key) { rv = skBag_getCounter(bag, &cur_key, &counter); switch (rv) { case SKBAG_ERR_KEY_NOT_FOUND: counter = 0; /* FALLTHROUGH */ case SKBAG_OK: if (counter <= maxcounter) { netStructurePrintIP((uint32_t)cur_key, (uint64_t*)&counter, ns); } break; default: skAppPrintErr("Error reading key %u from bag: %s", cur_key, skBag_strerror(rv)); } /* handle potential roller */ if (cur_key == SKBAG_KEY_MAX) { break; } } netStructurePrintFinalize(ns); } else { /* * otherwise, only pull the values we will actually print */ if (skBag_allocIterator(bag, &iter) != SKBAG_OK) { return 1; } while (SKBAG_OK == (rv = skBag_getNext(iter, &key, &counter))) { /* Skip if key or counter is not in range */ if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) { continue; } netStructurePrintIP((uint32_t)key, (uint64_t*)&counter, ns); } if (rv == SKBAG_ERR_KEY_NOT_FOUND) { /* Run one more time in order to close blocks and print * the final footers. */ netStructurePrintFinalize(ns); } else { /* unexpected error from bag */ skAppPrintErr("Error reading from bag: %s", skBag_strerror(rv)); } skBag_freeIterator(iter); } netStructureDestroy(&ns); return 0; } static int printStats( skBag_header_t *bag, skstream_t *stream_out) { double counter_temp = 0.0; double counter_mult = 0.0; double sum = 0.0; /* straight sum */ double sum2 = 0.0; /* sum of squares */ double sum3 = 0.0; /* sum of cubes */ double key_count = 0.0; double mean = 0.0; double stddev = 0.0; double temp = 0.0; double variance = 0.0; double skew = 0.0; double kurtosis = 0.0; skBag_iterator_t *iter; skBag_key_t key; skBag_counter_t counter; skBag_key_t min_seen_key, max_seen_key; skBag_counter_t min_seen_counter, max_seen_counter; char min_seen_key_st[SK_NUM2DOT_STRLEN]; char max_seen_key_st[SK_NUM2DOT_STRLEN]; assert(bag != NULL); assert(stream_out != NULL); min_seen_key = max_seen_key = SKBAG_KEY_MIN; min_seen_counter = max_seen_counter = SKBAG_COUNTER_MAX; if (skBag_allocIterator(bag, &iter) != SKBAG_OK) { return 1; } while (skBag_getNext(iter, &key, &counter) == SKBAG_OK) { if ( !IS_RECORD_WITHIN_LIMITS(key, counter)) { continue; } if (key_count < 1.0) { /* first entry */ min_seen_key = max_seen_key = key; min_seen_counter = max_seen_counter = counter; } else { max_seen_key = key; if (counter < min_seen_counter) { min_seen_counter = counter; } else if (counter > max_seen_counter) { max_seen_counter = counter; } } /* straight sum */ counter_temp = (double) counter; sum += counter_temp; /* sum of squares */ counter_mult = counter_temp * counter_temp; sum2 += counter_mult; /* sum of cubes */ counter_mult *= counter_temp; sum3 += counter_mult; ++key_count; } if (skBag_freeIterator(iter) != SKBAG_OK) { return 1; } skStreamPrint(stream_out, "\nStatistics\n"); if (key_count < 1.0) { skStreamPrint(stream_out, " No entries in bag.\n"); return 0; } if (f_print_integer_keys) { snprintf(min_seen_key_st, sizeof(min_seen_key_st), "%u", min_seen_key); snprintf(max_seen_key_st, sizeof(max_seen_key_st), "%u", max_seen_key); } else if (f_zero_pad_ips) { num2dot0_r(min_seen_key, min_seen_key_st); num2dot0_r(max_seen_key, max_seen_key_st); } else { num2dot_r(min_seen_key, min_seen_key_st); num2dot_r(max_seen_key, max_seen_key_st); } /* formulae derived from HyperStat Online - David M. Lane */ /* http://davidmlane.com/hyperstat/A15885.html (mean) */ mean = sum / key_count; /* http://davidmlane.com/hyperstat/A16252.html (variance) */ temp = sum2 - (2.0 * mean * sum) + (key_count * mean * mean); variance = temp / (key_count - 1.0); /* http://davidmlane.com/hyperstat/A16252.html (standard deviation) */ stddev = sqrt(variance); /* http://davidmlane.com/hyperstat/A11284.html (skew) */ skew = ((sum3 - (3.0 * mean * sum2) + (3.0 * mean * mean * sum) - (key_count * mean * mean * mean)) / (key_count * variance * stddev)); /* http://davidmlane.com/hyperstat/A53638.html (kurtosis) */ kurtosis = (temp * temp) / (key_count * variance * variance); skStreamPrint(stream_out, ("%18s: %" PRIu64 "\n%18s: %" PRIu64 "\n" "%18s: %s\n%18s: %s\n" "%18s: %" PRIu64 "\n%18s: %" PRIu64 "\n" "%18s: %.4g\n%18s: %.4g\n%18s: %.4g\n" "%18s: %.4g\n%18s: %.4g\n"), "keys", (uint64_t)key_count, "sum of counters", (uint64_t)sum, "minimum key", min_seen_key_st, "maximum key", max_seen_key_st, "minimum counter", (uint64_t)min_seen_counter, "maximum counter", (uint64_t)max_seen_counter, "mean", mean, "variance", variance, "standard deviation", stddev, "skew", skew, "kurtosis", kurtosis); return 0; } /* * Output bag using current state of options */ static int processBag(skBag_header_t *bag) { if (f_print_network != 0) { if (printNetwork(bag) != 0) { skAppPrintErr("Cannot print network structure"); exit(EXIT_FAILURE); } } if (bin_scheme != BINSCHEME_NONE) { bagcatInvertBag(bag); } if (f_print_stats) { printStats(bag, stats); } if (f_print_treestats) { skBag_printTreeStats(bag, treestats); } return 0; } int main(int argc, char **argv) { skBag_err_t err; skBag_header_t *bag = NULL; int i; appSetup(argc, argv); /* never returns on error */ for (i = 0; inputs[i]; ++i) { err = skBag_readBinary(&bag, inputs[i]); if (err != SKBAG_OK) { skAppPrintErr("Error reading bag from input stream '%s'", skStreamGetPathname(inputs[i])); exit(EXIT_FAILURE); } if (processBag(bag)) { skAppPrintErr("Error processing bag '%s'", skStreamGetPathname(inputs[i])); exit(EXIT_FAILURE); } err = skBag_free(bag); if (err != SKBAG_OK) { skAppPrintErr("Error deallocating bag '%s'", skStreamGetPathname(inputs[i])); exit(EXIT_FAILURE); } } /* done */ appTeardown(); return 0; } /* ** Local Variables: ** mode:c ** indent-tabs-mode:nil ** c-basic-offset:4 ** End: */