/* ** Copyright (C) 2001-2007 by Carnegie Mellon University. ** ** @OPENSOURCE_HEADER_START@ ** ** Use of the SILK system and related source code is subject to the terms ** of the following licenses: ** ** GNU Public License (GPL) Rights pursuant to Version 2, June 1991 ** Government Purpose License Rights (GPLR) pursuant to DFARS 252.225-7013 ** ** NO WARRANTY ** ** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER ** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY ** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN ** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY ** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT ** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE, ** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE ** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT, ** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY ** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF ** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES. ** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF ** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON ** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE ** DELIVERABLES UNDER THIS LICENSE. ** ** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie ** Mellon University, its trustees, officers, employees, and agents from ** all claims or demands made against them (and any related losses, ** expenses, or attorney's fees) arising out of, or relating to Licensee's ** and/or its sub licensees' negligent use or willful misuse of or ** negligent conduct or willful misconduct regarding the Software, ** facilities, or other rights or assistance granted by Carnegie Mellon ** University under this License, including, but not limited to, any ** claims of product liability, personal injury, death, damage to ** property, or violation of any laws or regulations. ** ** Carnegie Mellon University Software Engineering Institute authored ** documents are sponsored by the U.S. Department of Defense under ** Contract F19628-00-C-0003. Carnegie Mellon University retains ** copyrights in all material produced under this contract. The U.S. ** Government retains a non-exclusive, royalty-free license to publish or ** reproduce these documents, or allow others to do so, for U.S. ** Government purposes only pursuant to the copyright license under the ** contract clause at 252.227.7013. ** ** @OPENSOURCE_HEADER_END@ */ /* ** rwstats.c ** ** Implementation of the rwstats suite application. ** ** Reads packed files or reads the output from rwfilter and can ** compute a battery of characterizations and statistics: ** ** -- Top N or Bottom N SIPs with counts; count of unique SIPs ** -- Top N or Bottom N DIPs with counts; count of unique DIPs ** -- Top N or Bottom N SIP/DIP pairs with counts; count of unique ** SIP/DIP pairs (for a limited number of records) ** -- Top N or Bottom N Src Ports with counts; count of unique Src Ports ** -- Top N or Bottom N Dest Ports with counts; count of unique Dest Ports ** -- Top N or Bottom N Protocols with counts; count of unique protocols ** -- For more continuous variables (bytes, packets, bytes/packet) ** provide statistics such as min, max, quartiles, and intervals ** ** Instead of specifying a Top N or Bottom N as an absolute number N, ** the user may specify a cutoff threshold. In this case, the Top N ** or Bottom N required to print all counts meeting the threshold is ** computed by the application. ** ** Instead of specifying the threshold as an absolute count, the user ** may specify the threshold as percentage of all input records. For ** this case, the absolute threshold is calculated and then that is ** used to calculate the Top N or Bottom N. ** ** The application will only do calculations and produce output when ** asked to do so. At least one argument is required to tell the ** application what to do. ** ** Ideas for expansion ** -- Similarly for other variables, e.g., country code. ** -- Output each type of data to its own file ** -- Save intermediate data in files for faster reprocessing by this ** application ** -- Save intermediate data in files for processing by other ** applications ** */ /* ** IMPLEMENTATION NOTES ** ** For each input type (source ip, dest ip, source port, proto, etc), ** there are two globals: g_limit_ contains the value the user ** entered for the input type, and g_wanted_stat_ is a member ** of the wanted_stat_type and says what the g_limit_ value ** represents---e.g., the Top N, the bottom threshold percentage, etc. ** ** The application takes input (either from stdin or as files on ** command line) and calls processFile() on each. A count of each ** unique source IP addresses is stored in the IpCounter hash table ** g_counter_src_ip; Destinations IPs in g_counter_dest_ip; data for ** flow between a Source IP and Destination IP pair are stored in ** g_counter_pair_ip. ** ** Since there are relatively few ports and protocols, two ** 65536-elements arrays, g_src_port_array and g_dest_port_array are ** used to store a count of the records for each source and ** destination port, respectively, and a 256-element array, ** g_proto_array, is used to store a count of each protocol. ** ** Minima, maxima, quartile, and interval data are stored for each of ** bytes, packets, and bytes-per-packet for all flows--regardless of ** protocol--and detailed for a limited number (RWSTATS_NUM_PROTO-1) ** of protocols.. The minima and maxima are each stored in arrays ** for each of bytes, packets, bpp. For example g_bytes_min[0] ** stores the smallest byte count regardless of protocol (ie, over ** all protocols), and g_pkts_max[1] stores the largest packet count ** for the first protocol the user specified. The mapping from ** protocol to array index is given by g_proto_to_stats_idx[], where ** the index into g_proto_to_stats_idx[] returns an integer that is ** the index into g_bytes_min[]. Data for the intervals is stored in ** two dimensional arrays, where the first dimension is the same as ** for the minima and maxima, and the second dimension is the number ** of intervals, NUM_INTERVALS. ** ** Once data is collected, it is processed. ** ** For the IPs, the user is interested the number of unique IPs and ** the IPs with the topN counts (things are similar for the bottomN, ** but we use topN in this dicussion to keep things more clear). In ** the printTopIps() function, an array with 2*topN elements is ** created and passed to calcTopIps(); that array will be the result ** array and it will hold the topN IpAddr and IpCount pairs in sorted ** order. In calcTopIps(), a working array of 2*topN elements and a ** Heap data structure with topN nodes are created. The topN ** IpCounts seen are stored as IpCount/IpAddr pairs in the ** 2*topN-element array (but not in sorted order), and the heap ** stores pointers into that array with the lowest IpCount at the ** root of the heap. As the function iterates over the hash table, ** it compares the IpCount of the current hash-table element with the ** IpCount at the root of the heap. When the IpCount of the ** hash-table element is larger, the root of the heap is removed, the ** IpCount/IpAddr pair pointed to by the former heap-root is removed ** from the 2*topN-element array and replaced with the new ** IpCount/IpAddr pair, and finally a new node is added to the heap ** that points to the new IpCount/IpAddr pair. This continues until ** all hash-table entries are processed. To get the list of topN IPs ** from highest to lowest, calcTopIps() removes elements from the ** heap and stores them in the result array from position N-1 to ** position 0. ** ** Finding the topN source ports, topN destination ports, and topN ** protocols are similar to finding the topN IPs, except the ports ** and protocols are already stored in an array, so pointers directly ** into the g_src_port_array, g_dest_port_array, and g_proto_array ** are stored in the heap. When generating output, the number of the ** port or protocol is determined by the diffence between the pointer ** into the g_*_port_array or g_proto_array and its start. ** ** Instead of specifying a topN, the user may specify a cutoff ** threshold. In this case, the topN required to print all counts ** meeting the threshold is computed by looping over the IP ** hash-table or port/protocol arrays and finding all entries with at ** least threshold hits. ** ** The user may specify a percentage threshold instead of an absolute ** threshold. Once all records are read, the total record count is ** multiplied by the percentage threshold to get the absolute ** threshold cutoff, and that is used to calculate the topN as ** described in the preceeding paragraph. ** ** For the continuous variables bytes, packets, bpp, most of the work ** was done while reading the data, so processing is minimal. Only ** the quartiles must be calculated. */ #include "silk.h" RCSIDENT("$SiLK: rwstats.c 8269 2007-08-03 18:54:48Z mthomas $"); #include "rwstats.h" #include "heaplib.h" /* TYPEDEFS and DEFINES */ /* Initial size of hash table */ #define HASH_INITIAL_SIZE 500000 /* 500,000 */ #define RWSTATS_NO_MEMORY_EXIT_CODE 16 /* EXPORTED VARIABLES */ /* Next four variables determine what type of "top-n" stat to compute; * e.g., top-percentage-dport-bytes */ stat_bt_type_t top_or_btm; stat_stats_type_t stats_type; stat_key_type_t key_type; stat_val_type_t val_type; /* user limit for this stat: N if top N or bottom N, threshold, or * percentage */ uint64_t g_limit = 0; int g_proto_stats = 0; /* Whether to print input filenames */ int8_t g_print_filenames = 0; /* Whether to print column and section titles */ int8_t g_print_titles = 1; /* output column widths. mapped to width_type */ int g_width[6]; /* The delimiter string to print between columns */ char g_delim = '|'; /* Whether to print IP addrs as integers */ int8_t g_integer_ips = 0; /* CIDR block mask for src and dest ips. If 0, use all bits; * otherwise, the IP address should be bitwised ANDed with this * value. */ uint32_t g_cidr_src = 0; uint32_t g_cidr_dest = 0; /* * Index into argv[]. This is set by optionsParse() to the first * file name to open and read. When this equals argc, the app should * read records from the stdin. */ int arg_index = 0; /* Where to copy the input to */ rwIOStruct_t *g_copy_input; /* Where to write the ASCII output */ skstream_t *outstream; /* LOCAL VARIABLES */ /* Total number of records read */ static uint64_t g_record_count; /* Summation of whatever value (bytes, packets, flows) we are using. * When counting flows, this will be equal to g_record_count. */ static uint64_t g_value_total; /* hash table */ static HashTable *g_hash_counter; /* Array that will hold the N tuples */ static uint8_t *tuple_array; /* Byte length of key and value for each entry in hash table and tuple * array */ static size_t val_len = 0; static size_t key_len = 0; static size_t entry_len = 0; #define TUPLE_GET_ENTRY(i) \ (&(tuple_array[((i) * entry_len)])) #define ENTRY_GET_KEY(entry, key_ptr) \ memcpy((key_ptr), (entry) + val_len, key_len) #define ENTRY_SET_KEY(entry, key_ptr) \ memcpy((entry) + val_len, (key_ptr), key_len) #define ENTRY_GET_VAL(entry, val_ptr) \ memcpy((val_ptr), (entry), val_len) #define ENTRY_SET_VAL(entry, val_ptr) \ memcpy((entry), (val_ptr), val_len) /* FUNCTION DEFINITIONS */ /* * status = processFileTopN(rwIOS); * * Read SiLK flow records from the 'rwIOS' stream and update the * counters in the 'g_hash_counter' global table, as well as the * global record count 'g_record_count' and totals 'g_value_total'. * * Returns 0 on success, or RWSTATS_NO_MEMORY_EXIT_CODE if the hash * table runs out of memory. * * Will exit the program if the hashlib returns an unexpected * value. */ int processFileTopn(rwIOStruct_t *rwIOS) { rwRec rwrec; uint8_t *key_ptr = NULL; uint8_t *val_ptr = NULL; uint8_t *hash_val_ptr; uint32_t val32; uint64_t val64; uint32_t ip[2]; uint16_t port[2]; uint8_t proto[4]; int rv = 0; switch (key_type) { case KEY_SIP: case KEY_DIP: case KEY_IP_PAIR: key_ptr = (uint8_t*)ip; break; case KEY_SPORT: case KEY_DPORT: case KEY_PORT_PAIR: case KEY_ICMP: memset(port, 0, sizeof(port)); key_ptr = (uint8_t*)port; break; case KEY_PROTO: memset(proto, 0, sizeof(proto)); key_ptr = proto; break; } switch (val_type) { case VAL_BYTES: val_ptr = (uint8_t*)&val64; break; case VAL_PACKETS: case VAL_FLOWS: val_ptr = (uint8_t*)&val32; break; } while (rwRead(rwIOS, &rwrec)) { switch (key_type) { case KEY_SIP: if (g_cidr_src) { rwrec.sIP.ipnum &= g_cidr_src; } ip[0] = rwrec.sIP.ipnum; break; case KEY_DIP: if (g_cidr_dest) { rwrec.dIP.ipnum &= g_cidr_dest; } ip[0] = rwrec.dIP.ipnum; break; case KEY_IP_PAIR: if (g_cidr_src) { rwrec.sIP.ipnum &= g_cidr_src; } if (g_cidr_dest) { rwrec.dIP.ipnum &= g_cidr_dest; } ip[0] = rwrec.sIP.ipnum; ip[1] = rwrec.dIP.ipnum; break; case KEY_SPORT: port[0] = rwrec.sPort; break; case KEY_DPORT: case KEY_ICMP: port[0] = rwrec.dPort; break; case KEY_PORT_PAIR: port[0] = rwrec.sPort; port[1] = rwrec.dPort; break; case KEY_PROTO: proto[0] = rwrec.proto; break; } rv = hashlib_insert(g_hash_counter, key_ptr, &hash_val_ptr); switch (rv) { case OK: /* new value */ memset(hash_val_ptr, 0, val_len); /* FALLTHROUGH */ case OK_DUPLICATE: memcpy(val_ptr, hash_val_ptr, val_len); switch(val_type) { case VAL_BYTES: val64 += rwrec.bytes; g_value_total += rwrec.bytes; break; case VAL_PACKETS: val32 += rwrec.pkts; g_value_total += rwrec.pkts; break; case VAL_FLOWS: ++val32; ++g_value_total; break; } memcpy(hash_val_ptr, val_ptr, val_len); break; case ERR_OUTOFMEMORY: case ERR_NOMOREBLOCKS: return RWSTATS_NO_MEMORY_EXIT_CODE; default: skAppPrintErr("Bad return code '%d' from hash table insert", rv); exit(EXIT_FAILURE); } ++g_record_count; } /* while rwRead() */ return 0; } int setupTopn(void) { uint8_t *no_val_ptr; /* Set up the key */ switch (key_type) { case KEY_SIP: case KEY_DIP: key_len = sizeof(uint32_t); break; case KEY_IP_PAIR: key_len = 2 * sizeof(uint32_t); break; case KEY_SPORT: case KEY_DPORT: case KEY_ICMP: key_len = sizeof(uint16_t); break; case KEY_PORT_PAIR: key_len = 2 * sizeof(uint16_t); break; case KEY_PROTO: key_len = sizeof(uint8_t); break; } /* align keys on 32bits */ if ((key_len % sizeof(uint32_t)) != 0) { key_len += sizeof(uint32_t) - (key_len % sizeof(uint32_t)); } switch (val_type) { case VAL_BYTES: val_len = sizeof(uint64_t); break; case VAL_PACKETS: case VAL_FLOWS: val_len = sizeof(uint32_t); break; } entry_len = key_len + val_len; /* Create table */ no_val_ptr = malloc(val_len * sizeof(uint8_t)); if (!no_val_ptr) { skAppPrintErr("Out of memory"); exit(EXIT_FAILURE); } memset(no_val_ptr, 0xFF, val_len); g_hash_counter = hashlib_create_table(key_len, val_len, HTT_INPLACE, no_val_ptr, NULL, 0, HASH_INITIAL_SIZE, DEFAULT_LOAD_FACTOR); free(no_val_ptr); if (g_hash_counter == NULL) { skAppPrintErr("Unable to create hash table"); exit(EXIT_FAILURE); } return 0; } void teardownTopn(void) { if (g_hash_counter) { hashlib_free_table(g_hash_counter); } } /* rwstatsCompareCountsTop * Called by heap library to compare Counts for the topN * Arguments: * node1 - pointer to a uint32_t representing a count * node2 - pointer to a uint32_t representing a count * Results: * Returns 1 if the Count for node1 < that for node2; * -1 if Count for node1 > that for node2. * Side effects: NONE. */ static int rwstatsCompareCountsTop32(HeapNode node1, HeapNode node2) { uint32_t a; uint32_t b; ENTRY_GET_VAL(node1, &a); ENTRY_GET_VAL(node2, &b); if (a > b) { return -1; } if (a < b) { return 1; } return 0; } /* rwstatsCompareCountsBtm * Called by heap library to compare Counts for the bottomN * Arguments: * node1 - pointer to a uint32_t representing a count * node2 - pointer to a uint32_t representing a count * Results: * Returns -1 if the Count for node1 < that for node2; * 1 if Count for node1 > that for node2. * Side effects: NONE. */ static int rwstatsCompareCountsBtm32(HeapNode node1, HeapNode node2) { uint32_t a; uint32_t b; ENTRY_GET_VAL(node1, &a); ENTRY_GET_VAL(node2, &b); if (a < b) { return -1; } if (a > b) { return 1; } return 0; } static int rwstatsCompareCountsTop64(HeapNode node1, HeapNode node2) { uint64_t a; uint64_t b; ENTRY_GET_VAL(node1, &a); ENTRY_GET_VAL(node2, &b); if (a > b) { return -1; } if (a < b) { return 1; } return 0; } static int rwstatsCompareCountsBtm64(HeapNode node1, HeapNode node2) { uint64_t a; uint64_t b; ENTRY_GET_VAL(node1, &a); ENTRY_GET_VAL(node2, &b); if (a < b) { return -1; } if (a > b) { return 1; } return 0; } static uint32_t calcTopUint32( const uint32_t topn) { Heap *heap; uint8_t *key; uint8_t *heap_ptr; HeapNodeCompFunc cmp_fn = NULL; HASH_ITER iter; uint8_t *val_ptr = NULL; uint8_t *hash_val_ptr = NULL; uint32_t hash_val32 = 0; uint64_t hash_val64 = 0; uint8_t *heap_val_ptr = NULL; uint32_t heap_val32 = 0; uint64_t heap_val64 = 0; uint32_t heap_num_entries; size_t *index_array = NULL; uint32_t i; uint8_t cache[16]; size_t src, tgt; /* Initialize the value pointers to point at the 32bit or 64bit * values as appropriate. */ switch (val_type) { case VAL_BYTES: heap_val_ptr = (uint8_t*)(&heap_val64); val_ptr = (uint8_t*)(&hash_val64); if (top_or_btm == BT_TOP) { cmp_fn = &rwstatsCompareCountsTop64; } else { cmp_fn = &rwstatsCompareCountsBtm64; } break; case VAL_PACKETS: case VAL_FLOWS: heap_val_ptr = (uint8_t*)(&heap_val32); val_ptr = (uint8_t*)(&hash_val32); if (top_or_btm == BT_TOP) { cmp_fn = &rwstatsCompareCountsTop32; } else { cmp_fn = &rwstatsCompareCountsBtm32; } break; } /* Create the heap */ heap = heapCreate(topn, cmp_fn); if (NULL == heap) { skAppPrintErr("Heap creation failed"); exit(EXIT_FAILURE); } /* The number of nodes in the heap. */ heap_num_entries = 0; /* Iterate over the hash-table. */ iter = hashlib_create_iterator(g_hash_counter); /* load the heap with the first topn entries in the hash table */ while ((heap_num_entries < topn) && (hashlib_iterate(g_hash_counter, &iter, &key, &hash_val_ptr) != ERR_NOMOREENTRIES)) { heap_ptr = TUPLE_GET_ENTRY(heap_num_entries); ENTRY_SET_KEY(heap_ptr, key); ENTRY_SET_VAL(heap_ptr, hash_val_ptr); heapInsert(heap, heap_ptr); ++heap_num_entries; } if (heap_num_entries == 0) { heapFree(heap); return 0; } /* Get the node at the top of heap and its value */ heapGetTop(heap, (HeapNode*)&heap_ptr); ENTRY_GET_VAL(heap_ptr, heap_val_ptr); /* Process entries that remain in the hash table */ while (hashlib_iterate(g_hash_counter, &iter, &key, &hash_val_ptr) != ERR_NOMOREENTRIES) { memcpy(val_ptr, hash_val_ptr, val_len); if (val_type == VAL_BYTES) { if (top_or_btm == BT_TOP) { if (heap_val64 >= hash_val64) { continue; } } else { if (heap_val64 <= hash_val64) { continue; } } } else { if (top_or_btm == BT_TOP) { if (heap_val32 >= hash_val32) { continue; } } else { if (heap_val32 <= hash_val32) { continue; } } } /* The hash-table element we just read is "better" (for topN, * higher than current heap-root's value; for bottomN, lower * than current heap-root's value). Remove the heap's root to * make room for the new node */ heapExtractTop(heap, NULL); /* heap_ptr points into the tuple_array. replace the IP * addresses and count in the tuple_array with this new value */ ENTRY_SET_KEY(heap_ptr, key); ENTRY_SET_VAL(heap_ptr, hash_val_ptr); /* insert this new value into the heap */ heapInsert(heap, heap_ptr); /* the top may have changed; get the new top and its IpCount */ heapGetTop(heap, (HeapNode*)&heap_ptr); ENTRY_GET_VAL(heap_ptr, heap_val_ptr); } index_array = calloc(heap_num_entries, sizeof(size_t)); /* Remove the entries from the heap one at a time and put them into the tuple_array. They will come off from the lowest of the topN to the highest--or highest of the bottomN to the lowest. */ for (i = heap_num_entries; i > 0; --i) { heapExtractTop(heap, (HeapNode*)&heap_ptr); index_array[i - 1] = ((heap_ptr - tuple_array) / entry_len); } for (i = 0; i < heap_num_entries; ++i) { if (i == index_array[i]) { /* entry is in correct position. goto next. */ continue; } /* cache it */ memcpy(&cache, TUPLE_GET_ENTRY(i), entry_len); tgt = i; src = index_array[tgt]; while (i != src) { memcpy(TUPLE_GET_ENTRY(tgt), TUPLE_GET_ENTRY(src), entry_len); index_array[tgt] = tgt; tgt = src; src = index_array[tgt]; } memcpy(TUPLE_GET_ENTRY(tgt), &cache, entry_len); index_array[tgt] = tgt; } /* Clean up */ heapFree(heap); free(index_array); return heap_num_entries; } /* * thresholdToCount * Return the topN/bottomN required to print all * SrcXX+DestXX/PairCount key/value pairs in the tuple_counter * hash-table whose PairCount is at-least/no-more-than threshold. * XX can be either IpAddr or Port. * Arguments: * top_or_btm -whether to compute for topN(1) or bottomN(0) * threshold -find PairCounts with at-least/no-more-than this * many hits * Returns: * number of SrcXX+DestXX/PairCount pairs whose PairCount was * at-least/no-more than threshold * Side Affects: NONE. */ static uint32_t thresholdToCount( const uint64_t threshold) { HASH_ITER iter; uint8_t *key; uint8_t *hash_val_ptr; uint8_t *val_ptr = NULL; uint32_t val32; uint64_t val64; uint32_t count = 0; switch (val_type) { case VAL_BYTES: val_ptr = (uint8_t*)&val64; break; case VAL_PACKETS: case VAL_FLOWS: val_ptr = (uint8_t*)&val32; break; } /* Iterate over the hash-table. */ iter = hashlib_create_iterator(g_hash_counter); while (hashlib_iterate(g_hash_counter, &iter, &key, &hash_val_ptr) != ERR_NOMOREENTRIES) { memcpy(val_ptr, hash_val_ptr, val_len); if (val_type != VAL_BYTES) { val64 = val32; } if (top_or_btm == BT_TOP) { if (val64 >= threshold) { ++count; } } else { if (val64 <= threshold) { ++count; } } } return count; } static uint32_t wantedStatToN(void) { const char *bt_name = ""; const char *above_below = ""; const char *key_name = ""; const char *val_name = ""; uint32_t unique_entries; uint32_t topn = 0; uint64_t threshold; switch (top_or_btm) { case BT_TOP: bt_name = "Top"; above_below = "above"; break; case BT_BOTTOM: bt_name = "Bottom"; above_below = "below"; break; } switch (key_type) { case KEY_SIP: key_name = "SOURCE IP"; break; case KEY_DIP: key_name = "DESTINATION IP"; break; case KEY_IP_PAIR: key_name = "SIP/DIP PAIR"; break; case KEY_SPORT: key_name = "SOURCE PORT"; break; case KEY_DPORT: key_name = "DESTINATION PORT"; break; case KEY_PORT_PAIR: key_name = "SPORT/DPORT PAIR"; break; case KEY_PROTO: key_name = "PROTOCOL"; break; case KEY_ICMP: key_name = "ICMP TYPE/CODE"; break; } switch (val_type) { case VAL_BYTES: val_name = "byte"; break; case VAL_PACKETS: val_name = "packet"; break; case VAL_FLOWS: val_name = "flow"; break; } /* Get a count of unique flows */ unique_entries = hashlib_count_entries(g_hash_counter); if (g_print_titles) { skStreamPrint(outstream, ("INPUT SIZE: %" PRIu64 " records for %u unique keys\n"), g_record_count, unique_entries); } /* Given the statistic the user wants, convert the "limit" value * to actual topN or bottomN */ switch (stats_type) { case STATS_COUNT: /* user gave a count, we are set */ topn = g_limit; if (g_print_titles) { skStreamPrint(outstream, "%s Key: %s %u %s counts\n", key_name, bt_name, topn, val_name); } break; case STATS_THRESHOLD: /* Convert number of records to topN */ topn = thresholdToCount(g_limit); if (topn < 1) { skStreamPrint(outstream, ("%s Key: No %s counts %s threshold of %" PRIu64 "\n"), key_name, val_name, above_below, g_limit); return 0; } if (g_print_titles) { skStreamPrint(outstream, ("%s Key: %s %u %s counts (threshold %" PRIu64 ")\n"), key_name, bt_name, topn, val_name, g_limit); } break; case STATS_PERCENTAGE: /* Convert percertage of records to topN */ threshold = (double)g_value_total * (double)g_limit / 100.0; topn = thresholdToCount(threshold); if (topn < 1) { skStreamPrint(outstream, ("%s Key: No %s counts %s threshold of %" PRIu64 "%% (%" PRIu64 " %ss)\n"), key_name, val_name, above_below, g_limit, threshold, val_name); return 0; } if (g_print_titles) { skStreamPrint(outstream, ("%s Key: %s %u %s counts" " (%" PRIu64 "%% == %" PRIu64 ")\n"), key_name, bt_name, topn, val_name,g_limit,threshold); } break; } return topn; } void printResultsTopn(void) { uint32_t topn; uint32_t topn_found; uint8_t *entry; uint8_t *key_ptr = NULL; uint8_t *val_ptr = NULL; uint32_t val32; uint64_t value; uint32_t i; double percent; double cumul_pct; uint32_t ip[2]; uint16_t port[2]; uint8_t proto[4]; /* Given the statistic the user wants, convert the "limit" value * to an actual topN or bottomN */ topn = wantedStatToN(); if (topn < 1) { return; } /* Create an array to hold the topN or btmN tuples and their * counts */ tuple_array = calloc(topn * entry_len, sizeof(uint8_t)); if (NULL == tuple_array) { skAppPrintErr("Cannot malloc ip array"); exit(EXIT_FAILURE); } /* Call the function to do the actual topN/btmN and fill the * tuple_array */ topn_found = calcTopUint32(topn); switch (key_type) { case KEY_SIP: case KEY_DIP: case KEY_IP_PAIR: key_ptr = (uint8_t*)ip; break; case KEY_SPORT: case KEY_DPORT: case KEY_PORT_PAIR: case KEY_ICMP: key_ptr = (uint8_t*)port; break; case KEY_PROTO: key_ptr = proto; break; } switch (val_type) { case VAL_BYTES: val_ptr = (uint8_t*)&value; break; case VAL_PACKETS: case VAL_FLOWS: val_ptr = (uint8_t*)&val32; break; } /* Print results */ if (g_print_titles) { switch (key_type) { case KEY_SIP: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], "sIP", g_delim); break; case KEY_DIP: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], "dIP", g_delim); break; case KEY_IP_PAIR: skStreamPrint(outstream, "%*s%c%*s%c", g_width[WIDTH_KEY], "sIP", g_delim, g_width[WIDTH_KEY], "dIP", g_delim); break; case KEY_SPORT: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], "sPort", g_delim); break; case KEY_DPORT: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], "dPort", g_delim); break; case KEY_PORT_PAIR: skStreamPrint(outstream, "%*s%c%*s%c", g_width[WIDTH_KEY], "sPort", g_delim, g_width[WIDTH_KEY], "dPort", g_delim); break; case KEY_PROTO: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], "protocol", g_delim); break; case KEY_ICMP: skStreamPrint(outstream, "%*s%c%*s%c", g_width[WIDTH_KEY], "icmpType", g_delim, g_width[WIDTH_KEY], "icmpCode", g_delim); break; } switch (val_type) { case VAL_BYTES: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_VAL], "Bytes", g_delim); break; case VAL_PACKETS: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_VAL], "Packets", g_delim); break; case VAL_FLOWS: skStreamPrint(outstream, "%*s%c", g_width[WIDTH_VAL], "Records", g_delim); break; } skStreamPrint(outstream, "%*s%c%*s%c\n", g_width[WIDTH_PCT], "%_of_total", g_delim, g_width[WIDTH_PCT], "cumul_%", g_delim); } /* end of titles */ cumul_pct = 0.0; for (i = 0; i < topn_found; ++i) { entry = TUPLE_GET_ENTRY(i); ENTRY_GET_KEY(entry, key_ptr); ENTRY_GET_VAL(entry, val_ptr); if (val_type != VAL_BYTES) { value = val32; } switch (key_type) { case KEY_SIP: case KEY_DIP: if (g_integer_ips == 0) { skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], num2dot(ip[0]), g_delim); } else { skStreamPrint(outstream, "%*u%c", g_width[WIDTH_KEY], ip[0], g_delim); } break; case KEY_IP_PAIR: if (g_integer_ips == 0) { /* use two printf's since num2dot uses static buffer */ skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], num2dot(ip[0]), g_delim); skStreamPrint(outstream, "%*s%c", g_width[WIDTH_KEY], num2dot(ip[1]), g_delim); } else { skStreamPrint(outstream, "%*u%c%*u%c", g_width[WIDTH_KEY], ip[0], g_delim, g_width[WIDTH_KEY], ip[1], g_delim); } break; case KEY_SPORT: case KEY_DPORT: skStreamPrint(outstream, "%*u%c", g_width[WIDTH_KEY], (unsigned int)port[0], g_delim); break; case KEY_PORT_PAIR: skStreamPrint(outstream, "%*u%c%*u%c", g_width[WIDTH_KEY], (unsigned int)port[0], g_delim, g_width[WIDTH_KEY], (unsigned int)port[1], g_delim); break; case KEY_PROTO: skStreamPrint(outstream, "%*u%c", g_width[WIDTH_KEY], (unsigned int)proto[0], g_delim); break; case KEY_ICMP: skStreamPrint(outstream, "%*u%c%*u%c", g_width[WIDTH_KEY], ((port[0] >> 8) & 0xFF), g_delim, g_width[WIDTH_KEY], (port[0] & 0xFF), g_delim); break; } percent = 100.0 * (double)value / g_value_total; cumul_pct += percent; skStreamPrint(outstream, ("%*" PRIu64 "%c%*.6f%c%*.6f%c\n"), g_width[WIDTH_VAL], value, g_delim, g_width[WIDTH_PCT], percent, g_delim, g_width[WIDTH_PCT], cumul_pct, g_delim); } /* Clean up */ free(tuple_array); } static int appNextFile(rwIOStruct_t **rwIOS, int argc, char **argv) { static int initialized = 0; const char *fname = NULL; *rwIOS = NULL; if (arg_index < argc) { /* get current file and prepare to get next */ fname = argv[arg_index]; ++arg_index; } else { /* either all input has been processed, or this is the first * time into this function and we should read from stdin */ if (initialized) { /* no more input */ return 0; } /* input is from stdin */ fname = "stdin"; } initialized = 1; /* open file */ *rwIOS = rwOpenFile(fname, g_copy_input); if (*rwIOS == NULL) { skAppPrintErr("Error opening file '%s'; file ignored.", fname); return appNextFile(rwIOS, argc, argv); } if (g_print_filenames) { fprintf(PRINT_FILENAMES_FH, "%s\n", rwGetFileName(*rwIOS)); } return 1; } int main(int argc, char **argv) { rwIOStruct_t *rwIOS; int rv = 0; /* Global setup */ appSetup(argc, argv); if (g_proto_stats) { while (appNextFile(&rwIOS, argc, argv)) { rv = processFileProtoStats(rwIOS); rwCloseFile(rwIOS); } /* Generate output */ printResultsProtoStats(); } else { while (appNextFile(&rwIOS, argc, argv)) { rv = processFileTopn(rwIOS); rwCloseFile(rwIOS); /* if we got an out-of-memory error, break out of the input loop */ if (RWSTATS_NO_MEMORY_EXIT_CODE == rv) { skAppPrintErr(("Out of memory after processing %" PRIu64 " records for\n\t\t%u hash entries." " Printing partial results"), g_record_count, hashlib_count_entries(g_hash_counter)); break; } } /* Generate output */ printResultsTopn(); } appTeardown(); return rv; } /* ** Local Variables: ** mode:c ** indent-tabs-mode:nil ** c-basic-offset:4 ** End: */