/* ** Copyright (C) 2006-2007 by Carnegie Mellon University. ** ** @OPENSOURCE_HEADER_START@ ** ** Use of the SILK system and related source code is subject to the terms ** of the following licenses: ** ** GNU Public License (GPL) Rights pursuant to Version 2, June 1991 ** Government Purpose License Rights (GPLR) pursuant to DFARS 252.225-7013 ** ** NO WARRANTY ** ** ANY INFORMATION, MATERIALS, SERVICES, INTELLECTUAL PROPERTY OR OTHER ** PROPERTY OR RIGHTS GRANTED OR PROVIDED BY CARNEGIE MELLON UNIVERSITY ** PURSUANT TO THIS LICENSE (HEREINAFTER THE "DELIVERABLES") ARE ON AN ** "AS-IS" BASIS. CARNEGIE MELLON UNIVERSITY MAKES NO WARRANTIES OF ANY ** KIND, EITHER EXPRESS OR IMPLIED AS TO ANY MATTER INCLUDING, BUT NOT ** LIMITED TO, WARRANTY OF FITNESS FOR A PARTICULAR PURPOSE, ** MERCHANTABILITY, INFORMATIONAL CONTENT, NONINFRINGEMENT, OR ERROR-FREE ** OPERATION. CARNEGIE MELLON UNIVERSITY SHALL NOT BE LIABLE FOR INDIRECT, ** SPECIAL OR CONSEQUENTIAL DAMAGES, SUCH AS LOSS OF PROFITS OR INABILITY ** TO USE SAID INTELLECTUAL PROPERTY, UNDER THIS LICENSE, REGARDLESS OF ** WHETHER SUCH PARTY WAS AWARE OF THE POSSIBILITY OF SUCH DAMAGES. ** LICENSEE AGREES THAT IT WILL NOT MAKE ANY WARRANTY ON BEHALF OF ** CARNEGIE MELLON UNIVERSITY, EXPRESS OR IMPLIED, TO ANY PERSON ** CONCERNING THE APPLICATION OF OR THE RESULTS TO BE OBTAINED WITH THE ** DELIVERABLES UNDER THIS LICENSE. ** ** Licensee hereby agrees to defend, indemnify, and hold harmless Carnegie ** Mellon University, its trustees, officers, employees, and agents from ** all claims or demands made against them (and any related losses, ** expenses, or attorney's fees) arising out of, or relating to Licensee's ** and/or its sub licensees' negligent use or willful misuse of or ** negligent conduct or willful misconduct regarding the Software, ** facilities, or other rights or assistance granted by Carnegie Mellon ** University under this License, including, but not limited to, any ** claims of product liability, personal injury, death, damage to ** property, or violation of any laws or regulations. ** ** Carnegie Mellon University Software Engineering Institute authored ** documents are sponsored by the U.S. Department of Defense under ** Contract F19628-00-C-0003. Carnegie Mellon University retains ** copyrights in all material produced under this contract. The U.S. ** Government retains a non-exclusive, royalty-free license to publish or ** reproduce these documents, or allow others to do so, for U.S. ** Government purposes only pursuant to the copyright license under the ** contract clause at 252.227.7013. ** ** @OPENSOURCE_HEADER_END@ */ #include "silk.h" RCSIDENT("$SiLK: rwscan_utils.c 7600 2007-06-20 19:05:24Z tonyc $"); #include "rwscan.h" /* TYPEDEFS AND DEFINES */ /* file handle for --help output */ #define USAGE_FH stdout trw_data_t trw_data; /* OPTIONS */ typedef enum { RWSCAN_SCAN_MODEL, RWSCAN_TRW_SIP_SET, RWSCAN_OUTPUT_FILE, RWSCAN_NO_TITLES, RWSCAN_NO_COLUMNS, RWSCAN_COLUMN_SEPARATOR, RWSCAN_DELIMITED, RWSCAN_MODEL_FIELDS, RWSCAN_SCANDB, RWSCAN_WORKER_THREADS, RWSCAN_WORK_QUEUE_DEPTH, RWSCAN_VERBOSE_FLOWS, RWSCAN_VERBOSE_PROGRESS } appOptionsEnum; static struct option appOptions[] = { {"scan-model", REQUIRED_ARG, 0, RWSCAN_SCAN_MODEL}, {"trw-sip-set", REQUIRED_ARG, 0, RWSCAN_TRW_SIP_SET}, {"output-file", REQUIRED_ARG, 0, RWSCAN_OUTPUT_FILE}, {"no-titles", NO_ARG, 0, RWSCAN_NO_TITLES}, {"no-columns", NO_ARG, 0, RWSCAN_NO_COLUMNS}, {"column-separator", REQUIRED_ARG, 0, RWSCAN_COLUMN_SEPARATOR}, {"delimited", OPTIONAL_ARG, 0, RWSCAN_DELIMITED}, {"model-fields", NO_ARG, 0, RWSCAN_MODEL_FIELDS}, {"scandb", NO_ARG, 0, RWSCAN_SCANDB}, {"threads", REQUIRED_ARG, 0, RWSCAN_WORKER_THREADS}, {"queue-depth", REQUIRED_ARG, 0, RWSCAN_WORK_QUEUE_DEPTH}, {"verbose-flows", NO_ARG, 0, RWSCAN_VERBOSE_FLOWS}, {"verbose-progress", REQUIRED_ARG, 0, RWSCAN_VERBOSE_PROGRESS}, {0, 0, 0, 0} /* sentinel entry */ }; static char *appHelp[] = { ("Specify scan model to use. Available scan models:\n" "\t0 - hybrid TRW + BLR (default)\n" "\t1 - Threshold Random Walk (TRW) only\n" "\t2 - Bayesian Logistic Regression (BLR) only"), ("Specify IPset file containing ALL valid internal IPs.\n" "\tThe TRW model requires a list of targetted IPs."), "Write scan records to this file. Def. 'scans.dat'", "Do not print column headers. Def. Print titles.", "Disable fixed-width columnar output. Def. Columnar", "Use specified character between columns. Def. '|'", "Shortcut for --no-columns --column-sep=CHAR", "Show scan model detail fields. Def. No", ("Produce output suitable for loading into a RDBMS. Def. No.\n" "\t(Shortcut for --no-titles --no-columns --model-fields)"), "Set number of worker threads to specified value. Def. 1", "Set the work queue depth to the specified value", ("Write individual flows for events. This produces\n" "\ta lot of output, mostly useful for debugging. Def. No"), ("Report progress as rwscan processes each CIDR\n" "\tblock of the specified size. Def. No"), (char *)NULL }; /* FUNCTION DEFINITIONS */ /* * appUsageLong(); * * Print complete usage information to USAGE_FH. Pass this * function to skOptionsSetUsageCallback(); optionsParse() will * call this funciton and then exit the program when the --help * option is given. */ static void appUsageLong( void) { #define USAGE_MSG \ ("[SWITCHES] [FILES]\n" \ "\tDetects scanning activity in SiLK Flow records. The output\n" \ "\tis a pipe-delimited textual file suitable for loading into a\n" \ "\trelational database. The input records should be pre-sorted\n" \ "\twith rwsort(1) by sip, proto, and dip.\n") FILE *fh = USAGE_FH; int i; fprintf(fh, "%s %s", skAppName(), USAGE_MSG); fprintf(fh, "\nSWITCHES:\n"); skOptionsDefaultUsage(fh); for (i = 0; appOptions[i].name; i++) { fprintf(fh, "--%s %s. %s\n", appOptions[i].name, SK_OPTION_HAS_ARG(appOptions[i]), appHelp[i]); } } /* * status = appOptionsHandler(cData, opt_index, opt_arg); * * This function is passed to optionsRegister(); it will be called * by optionsParse() for each user-specified switch that the * application has registered; it should handle the switch as * required---typically by setting global variables---and return 1 * if the switch processing failed or 0 if it succeeded. Returning * a non-zero from from the handler causes optionsParse() to return * a negative value. * * The clientData in 'cData' is typically ignored; 'opt_index' is * the index number that was specified as the last value for each * struct option in appOptions[]; 'opt_arg' is the user's argument * to the switch for options that have a REQUIRED_ARG or an * OPTIONAL_ARG. */ static int appOptionsHandler( clientData UNUSED (cData), int opt_index, char *opt_arg) { uint32_t i, tmpval; switch ((appOptionsEnum)opt_index) { case RWSCAN_SCAN_MODEL: if (skStringParseUint32(&options.scan_model, opt_arg, 0, 0)) { skAppPrintErr("Invalid argument for --%s: '%s'", appOptions[opt_index].name, opt_arg); return 1; } break; case RWSCAN_TRW_SIP_SET: options.trw_sip_set_file = opt_arg; break; case RWSCAN_OUTPUT_FILE: options.output_file = opt_arg; break; case RWSCAN_MODEL_FIELDS: options.model_fields = 1; break; case RWSCAN_NO_TITLES: /* no titles */ options.no_titles = 1; break; case RWSCAN_NO_COLUMNS: options.no_columns = 1; break; case RWSCAN_COLUMN_SEPARATOR: options.delimiter = opt_arg[0]; break; case RWSCAN_DELIMITED: /* dump as delimited text */ options.no_columns = 1; if (opt_arg) { options.delimiter = opt_arg[0]; } break; case RWSCAN_SCANDB: options.no_titles = 1; options.no_columns = 1; options.model_fields = 1; break; case RWSCAN_VERBOSE_FLOWS: options.verbose_flows = 1; break; case RWSCAN_VERBOSE_PROGRESS: if (skStringParseUint32(&tmpval, opt_arg, 0, 0)) { skAppPrintErr("Invalid argument for --%s: '%s'", appOptions[opt_index].name, opt_arg); return 1; } for (i = 0; i < tmpval; i++) { options.verbose_progress >>= 1; options.verbose_progress |= 0x80000000; } break; case RWSCAN_WORKER_THREADS: if (skStringParseUint32(&options.worker_threads, opt_arg, 0, 0)) { skAppPrintErr("Invalid argument for --%s: '%s'", appOptions[opt_index].name, opt_arg); return 1; } break; case RWSCAN_WORK_QUEUE_DEPTH: if (skStringParseUint32(&options.work_queue_depth, opt_arg, 0, 0)) { skAppPrintErr("Invalid argument for --%s: '%s'", appOptions[opt_index].name, opt_arg); return 1; } break; } return 0; /* OK */ } /* * appSetup(argc, argv); * * Perform all the setup for this application include setting up * required modules, parsing options, etc. This function should be * passed the same arguments that were passed into main(). * * Returns to the caller if all setup succeeds. If anything fails, * this function will cause the application to exit with a FAILURE * exit status. */ void appSetup( int argc, char **argv) { /* verify same number of options and help strings */ assert((sizeof(appHelp) / sizeof(char *)) == (sizeof(appOptions) / sizeof(struct option))); /* register the application */ skAppRegister(argv[0]); skOptionsSetUsageCallback(&appUsageLong); /* initialize globals */ memset(&options, 0, sizeof(options_t)); options.worker_threads = 0; options.work_queue_depth = 0; options.no_titles = 0; options.no_columns = 0; options.delimiter = '|'; memset(&trw_data, 0, sizeof(trw_data_t)); pthread_mutex_init(&trw_data.mutex, NULL); memset(&summary_metrics, 0, sizeof(summary_metrics)); /* register the options */ if (optionsRegister(appOptions, (optHandler)appOptionsHandler, NULL)) { skAppPrintErr("unable to register options"); exit(EXIT_FAILURE); } ioISP = iochecksSetup(0, 0, argc, argv); /* parse options; print usage if error */ ioISP->firstFile = optionsParse(argc, argv); if (ioISP->firstFile < 0) { skAppUsage(); } /* Use STDIN as an input stream if it is not a TTY; make certain * we have some input and we are either reading from STDIN or * using files listed the command line, but not both. */ if (iochecksAcceptFromStdin(ioISP) || iochecksInputs(ioISP, 0)) { skAppUsage(); } if (options.worker_threads == 0) { /* if no thread options were specified, use defaults */ options.worker_threads = 1; options.work_queue_depth = 1; } else if (options.work_queue_depth == 0) { /* if threads was specified but queue depth wasn't, set the queue * depth to the number of threads */ options.work_queue_depth = options.worker_threads; } if (options.output_file == NULL) { options.output_file = "scans.dat"; } if (options.scan_model == 0 || options.scan_model == 1) { if (options.trw_sip_set_file == NULL) { skAppPrintErr("TRW scan model enabled, but --%s not specified", appOptions[RWSCAN_TRW_SIP_SET].name); exit(EXIT_FAILURE); } if (skIPTreeLoad(options.trw_sip_set_file, &(trw_data.existing)) != 0) { skAppPrintErr("Error reading binary IPset from '%s'", options.trw_sip_set_file); exit(EXIT_FAILURE); } skIPTreeCreate(&(trw_data.benign)); skIPTreeCreate(&(trw_data.scanners)); } /* open the output */ out_scans = fopen(options.output_file, "w"); if (out_scans == (FILE *) NULL) { skAppPrintErr("Cannot open %s for writing", options.output_file); exit(EXIT_FAILURE); } return; /* OK */ } /* * appTeardown() * * Teardown all modules, close all files, and tidy up all * application state. * * This function is idempotent. */ void appTeardown( void) { static uint8_t teardownFlag = 0; if (teardownFlag) { return; } teardownFlag = 1; if (out_scans != NULL) { fclose(out_scans); } if (trw_data.benign != NULL) { skIPTreeDelete(&(trw_data.benign)); } if (trw_data.scanners != NULL) { skIPTreeDelete(&(trw_data.scanners)); } if (trw_data.existing != NULL) { skIPTreeDelete(&(trw_data.existing)); } if (ioISP != NULL) { iochecksTeardown(ioISP); } skAppUnregister(); } int rwrec_compare_proto_stime( const void *a, const void *b) { rwRec *pa = (rwRec *) a; rwRec *pb = (rwRec *) b; if (pa->proto > pb->proto) { return 1; } else if (pa->proto < pb->proto) { return -1; } else if (pa->sTime > pb->sTime) { return 1; } else if (pa->sTime < pb->sTime) { return -1; } else if (pa->sTime_msec < pb->sTime_msec) { return 1; } else if (pa->sTime_msec > pb->sTime_msec) { return -1; } else { return 0; } } int rwrec_compare_dip( const void *a, const void *b) { rwRec *pa = (rwRec *) a; rwRec *pb = (rwRec *) b; if (pa->dIP.ipnum > pb->dIP.ipnum) { return 1; } else if (pa->dIP.ipnum < pb->dIP.ipnum) { return -1; } else { return 0; } } int rwrec_compare_dip_sport( const void *a, const void *b) { rwRec *pa = (rwRec *) a; rwRec *pb = (rwRec *) b; if (pa->dIP.ipnum > pb->dIP.ipnum) { return 1; } else if (pa->dIP.ipnum < pb->dIP.ipnum) { return -1; } else if (!(pa->proto == IPPROTO_TCP) || (pa->proto == IPPROTO_UDP)) { return 0; } else if (pa->sPort > pb->sPort) { return 1; } else if (pa->sPort < pb->sPort) { return -1; } else { return 0; } } void calculate_shared_metrics( rwRec *event_flows, event_metrics_t *metrics) { uint32_t last_dip = 0xffffffff; uint32_t last_sp = 0xffffffff; uint32_t last_dp = 0xffffffff; uint32_t i = 0; rwRec *rwcurr = NULL; rwRec *rwprev = NULL; metrics->sp_count = 1; metrics->unique_dips = 1; metrics->unique_dsts = 0; last_dip = event_flows[0].dIP.ipnum; last_sp = event_flows[0].sPort; for (i = 0; i < metrics->event_size; i++) { rwcurr = &(event_flows[i]); metrics->pkts += rwcurr->pkts; metrics->bytes += rwcurr->bytes; if (rwcurr->dIP.ipnum == last_dip) { if ((rwcurr->sPort != last_sp)) { metrics->sp_count++; } } else { metrics->sp_count = 1; metrics->unique_dips++; } /* FIXME: should "unique_dsts be unique dips, or unique dip+dport ? */ if ((rwcurr->dIP.ipnum != last_dip) || (rwcurr->dPort != last_dp)) { metrics->unique_dsts++; } last_sp = rwcurr->sPort; last_dp = rwcurr->dPort; last_dip = rwcurr->dIP.ipnum; rwprev = rwcurr; } } void print_flow( rwRec *rwcurr) { char sipstr[16]; char dipstr[16]; char timestr[SK_TIMESTAMP_STRLEN]; struct timeval tv; num2dot_r(rwcurr->sIP.ipnum, sipstr); num2dot_r(rwcurr->dIP.ipnum, dipstr); tv.tv_usec = rwcurr->sTime_msec * 1000; tv.tv_sec = rwcurr->sTime; sktimestamp_r(timestr, &tv, 0); switch (rwcurr->proto) { case IPPROTO_ICMP: { uint8_t type = 0, code = 0; if (rwcurr->dPort > 0) { /* ICMP type is in the most significant byte of dPort */ type = ((rwcurr->dPort >> 8) & 0xFF); /* ICMP code is in the least significant byte of dPort */ code = (rwcurr->dPort & 0xFF); } else { /* ICMP type is in the least significant byte of sPort */ type = rwcurr->sPort & 0xFF; /* ICMP code is in the most significant byte of sPort */ code = ((rwcurr->sPort >> 8) & 0xFF); } printf("%-4d %16s -> %16s icmp(%03u,%03u) %-24s %6d %3d %6d %8s\n", rwcurr->proto, sipstr, dipstr, type, code, timestr, rwcurr->bytes, rwcurr->pkts, (rwcurr->bytes / rwcurr->pkts), tcpflags_string(rwcurr->flags)); } break; case IPPROTO_TCP: case IPPROTO_UDP: printf("%-4d %16s:%5d -> %16s:%5d %-24s %6d %3d %6d %8s\n", rwcurr->proto, sipstr, rwcurr->sPort, dipstr, rwcurr->dPort, timestr, rwcurr->bytes, rwcurr->pkts, (rwcurr->bytes / rwcurr->pkts), tcpflags_string(rwcurr->flags)); break; default: break; } } /* ** Local Variables: ** mode:c ** indent-tabs-mode:nil ** c-basic-offset:4 ** End: */