/* $Id: surblhost.c 154 2007-04-09 15:10:13Z cslsublevel3org $ * * Copyright (C) 2006, 2007 Christian Stigen Larsen * Distributed under the GNU General Public License v2. * * http://csl.sublevel3.org * */ #if HAVE_CONFIG_H #include "config.h" #endif #include #include #include #if HAVE_NETDB_H #include #endif #if HAVE_SYS_TYPES_H #include #endif #if HAVE_SYS_SOCKET_H #include #endif #if HAVE_NETINET_IN_H #include #endif #if HAVE_ARPA_INET_H #include #endif #if HAVE_ARPA_NAMESER_H #include #endif #if HAVE_RESOLV_H #include #endif #if HAVE_NAMESER8_COMPAT_H #include #endif static struct options_ { const char* surbl; const char* tlds_file; const char* wlist_file; const char** ptlds; const char** pwhitelist; int check_whitelist; int hosts; int print_tlds; int print_whitelist; int quiet; int read_stdin; int recursive_strip; int run_test; int skip_whitelist; int strip_subdomains; int tlds_add; int verbose; int wlist_add; } global_options; static const char version[] = PACKAGE_STRING; static const char copyright[] = "Copyright (C) 2006, 2007 Christian Stigen Larsen "; static const char license[] = "Distributed under the GNU General Public License v2"; extern int h_errno; extern const char* whitelist[]; // whitelist.c extern const char* two_level_tlds[]; // two-level-tlds.c static int is_twolevel_tld(const char* host, const char** tldlist); static void help() { fprintf(stderr, "%s\n%s\n%s", version, copyright, license); fputs("\n\n" "USAGE: surblhost [option(s)] [hostname(s)]\n" "\n" "Checks if given hostnames are blocklisted by surbl.org,\n" "meaning that spam mail have links to the given site.\n" "\n" "OPTIONS\n" " - read hostnames from standard input, one per line\n" " --check-whitelist checks if the whitelist is indeed ok\n" " -h, --help print help\n" " --no-surbl do not add .multi.surbl.org to checked hostnames\n" " --print-tlds print current two-level tld list\n" " --print-whitelist print current whitelist\n" " -q, --quiet run silently\n" " -r, --recursive check all subdomains, e.g. foo.bar.com and bar.com\n" " -f, --skip-whitelist ignore whitelist (force check)\n" " -s, --strip-sub strip subdomains, e.g. abc.def.com -> def.com\n" " --surbl=... use another dns suffix than multi.surbl.org\n" " --test check permanent test point, should be blocklisted\n" " --tlds-add=... as --tlds=... but adds entries to precompiled list\n" " --tlds=... read two-level tld from file, one entry per line\n" " -v, --verbose verbose output\n" " -vv more verbose output\n" " -V --version print program version\n" " --whitelist-add=... as --whitelist but adds entries to precompiled list\n" " --whitelist=... read whitelist from given file, one entry per line\n" "\n" "NOTES\n" " You should only check the base site name and TLD, i.e., if you want to\n" " investigate http://some.place.com, then you will just submit `place.com'\n" " to surblhost. For more information, see http://www.surbl.org\n" "\n" "EXAMPLES\n" " surblhost -v somesite.com\n" " surblhost --test --verbose\n" " surblhost evilsite1.com evilsite2.com goodside1.com\n" " surblhost foo.com -q && echo whitelisted site || echo blocklisted site\n" "\n" "RETURN VALUES\n" " 0 = no sites blocklisted\n" " 1 = error\n" " 2 = one or more site blocklisted\n" "\n" "Report bugs to <" PACKAGE_BUGREPORT ">\n" , stderr); } static int check_blocklist(const int code) { const int v = global_options.verbose; if ( code >= 2 ) printf(v? "is blocklisted by " : "is blocklisted"); if ( v ) { if ( code & 64 ) fputs(v>1? "prolocation/jwspamspy " : "[jp]", stdout); if ( code & 32 ) fputs(v>1? "abusebutler " : "[ab]", stdout); if ( code & 16 ) fputs(v>1? "outblaze " : "[ob]", stdout); if ( code & 8 ) fputs(v>1? "phising " : "[ph]", stdout); if ( code & 2 ) fputs(v>1? "spamcop " : "[sc]", stdout); if ( code & 4 ) fputs(v>1? "w.stearns " : "[ws]", stdout); } printf("\n"); return code >= 2; } inline static int is_ipaddr(const char* s) { int a,b,c,d; return sscanf(s, "%d.%d.%d.%d", &a, &b, &c, &d) == 4; } static void check_hostname(const char* s) { int dots = 0; int maxdots; const char* host = s; if ( *global_options.surbl==0 || is_ipaddr(s) ) return; if ( global_options.recursive_strip ) return; maxdots = is_twolevel_tld(s, global_options.ptlds) ? 2 : 1; while ( *s ) { if ( *s == '.' ) if ( ++dots > maxdots ) { fprintf(stderr, "warning: hostname contains subdomains: %s (try option -r)\n", host); return; } ++s; } } // Example: num_domains("a.b.c") == 3, num_domains("a.b") == 2 static int num_domains(const char* s) { int r = *s? 1 : 0; while ( *s ) { if ( *s++=='.' ) ++r; } return r; } static const char* remove_subdomains(const char* orig, const int output_domains) { const char* s = orig + strlen(orig); int dots=0; while ( s > orig ) { if ( *s == '.') ++dots; if ( dots == output_domains ) { ++s; break; } --s; } return s; } /* * This function is taken from the freshclam Clam Antivirus * package, Copyright (C) 2004 Tomasz Kojm , * and released under the GPL v2. * */ static char* txtquery(const char* domain, unsigned int* ttl) { #if ENABLE_DNSTXT #if HAS_WORKING_RESOLV static int res_initialized = 0; #ifndef HAVE_MEMSET int nn; #endif unsigned char answer[PACKETSZ], *pt; char *txt; char host[128]; int len, exp, cttl, size, txtlen, type; if ( !res_initialized ) { if ( res_init() < 0 ) { printf("res_init failed\n"); return NULL; } res_initialized = 1; } #if HAVE_MEMSET memset(answer, 0, PACKETSZ); #else for ( nn=0; nn= size || !txtlen) { printf("broken TXT record (txtlen = %d, size = %d)\n", txtlen, size); return NULL; } if(!(txt = (char*)malloc(txtlen + 1))) return NULL; pt++; strncpy(txt, (char*)pt, txtlen); txt[txtlen] = 0; return txt; #else // ENABLE_DNSTXT not set return NULL; #endif #endif } static int compmi(const void *m1, const void *m2) { return strcmp(*(const char**)m1, *(const char**)m2); } static size_t listsize(const char** p) { size_t n = 0; while ( *p++ ) ++n; return n; } /* * Returns 1 if `host' is in `wlist' * Returns 0 if not found * * Will automatically remove subdomains, example: * * www.google.com -> google.com -> white (returns 1) * w.googl.com -> google.com -> white * google.com.jp -> google.com -> white * evil.spam.com -> spam.com -> ?? * */ static int in_whitelist(const char* host, const char** wlist) { int n; const int wlen = listsize(wlist); for ( n=num_domains(host); n>1; --n ) { const char* h = remove_subdomains(host, n); if ( bsearch((char*)&h, (char*)wlist, wlen, sizeof(const char*), compmi) != NULL ) { return 1; } } return 0; } static int is_twolevel_tld(const char* host, const char** tldlist) { const int size = listsize(tldlist); if ( num_domains(host) >= 2 ) { const char* host_tld = remove_subdomains(host, 2); if ( bsearch((char*)&host_tld, (char*)tldlist, size, sizeof(const char*), compmi) != NULL ) { return 1; } } if ( global_options.tlds_add && tldlist!=two_level_tlds ) return is_twolevel_tld(host, two_level_tlds); // host is not a two-level TLD return 0; } inline static void print_list(const char** wlist) { while ( *wlist ) puts(*wlist++); } static int lookup(const char* host) { char *lookup; int adr[4]; struct hostent *p, h; int res, bl; if ( !global_options.skip_whitelist ) { int res = in_whitelist(host, global_options.pwhitelist); if ( global_options.wlist_add ) // check user-specified wlist as well? res |= in_whitelist(host, whitelist); if ( res ) { fprintf(stdout, "%s is whitelisted\n", host); return 0; } } check_hostname(host); lookup = (char*)malloc(strlen(host) + strlen(global_options.surbl) + 2); if ( sscanf(host, "%d.%d.%d.%d", &adr[0], &adr[1], &adr[2], &adr[3]) == 4 ) { // reverse numeric addresses sprintf(lookup, "%d.%d.%d.%d", adr[3], adr[2], adr[1], adr[0]); } else strcpy(lookup, host); if ( *global_options.surbl ) { strcat(lookup, "."); strcat(lookup, global_options.surbl); } fprintf(stdout, "%s ", global_options.verbose? lookup : host); if ( (p = gethostbyname(lookup)) ) memcpy(&h, p, sizeof(struct hostent)); res = 0; if ( !p ) { // TODO: check for explicit NXDOMAIN fputs("is not blocklisted\n", stdout); } else { while ( *h.h_addr_list != NULL ) { #if HAVE_INET_NTOA if ( global_options.verbose > 1 ) // print ip address fprintf(stdout, "%s ", inet_ntoa(*(struct in_addr*) *h.h_addr_list)); #endif bl = check_blocklist( (int) (*h.h_addr_list)[3] ); #if ENABLE_DNSTXT if ( bl>0 && global_options.verbose>1 ) { unsigned int ttl; char *p = txtquery(lookup, &ttl); if ( p ) { fprintf(stdout, "%s txt record: %s\n", lookup, p); free(p); } } #endif res += bl; ++h.h_addr_list; } } free(lookup); return res; } // perform lookup on hostname with all parameters in effect // returns 0 if not in blocklist, or number of hits in blocklist static int full_lookup(const char* s) { int blockhits = 0; if ( !global_options.recursive_strip ) { const char* host = s; if ( global_options.strip_subdomains ) host = remove_subdomains(s, is_twolevel_tld(s, global_options.ptlds) ? 3 : 2); blockhits += lookup(host); } else { int domains = num_domains(s); int min = !is_twolevel_tld(s, global_options.ptlds) ? 1 : 2; for ( ; domains > min; --domains ) blockhits += lookup( remove_subdomains(s, domains) ); } return blockhits; } inline static char* remove_newline(char* s) { const int l = strlen(s); if ( s[l-1] == '\n' ) s[l-1] = '\0'; return s; } static const char** read_list(const char* file) { int lsize = 100; // initial list size const char **plist; FILE *f; int count = 0; int l; char *s; char buf[512]; if ( (f = fopen(file, "rt")) == NULL ) return NULL; if ( !(plist = (const char**) malloc( sizeof(char*) * (lsize+1) )) ) { fputs("error: not enough memory to hold list\n", stderr), exit(1); } plist[0] = NULL; while ( !feof(f) ) { if ( !fgets(buf, sizeof(buf)/sizeof(char), f) ) break; remove_newline(buf); l = strlen(buf); s = (char*) malloc(l+1); strcpy(s, buf); plist[count] = s; plist[count+1] = NULL; if ( ++count >= lsize ) { lsize *= 2; if ( !(plist = (const char**) realloc( plist, sizeof(char*) * (lsize+1) )) ) { fputs("error: not enough memory to grow list\n", stderr); exit(1); } } } if ( global_options.verbose ) { fprintf(stderr, "%s %d lines from %s\n", (lsize? "read" : "warning: read"), count, file); } return lsize? plist : NULL; } static void set_defaults(struct options_ *p) { p->surbl = "multi.surbl.org"; p->verbose = p->run_test = p->quiet = p->strip_subdomains = p->recursive_strip = p->skip_whitelist = p->read_stdin = p->print_whitelist = p->print_tlds = p->hosts = p->wlist_add = p->tlds_add = p->check_whitelist = 0; p->pwhitelist = whitelist; // hardcoded p->ptlds = two_level_tlds; // hardcoded p->wlist_file = p->tlds_file = NULL; } static void parse_options(int argc, char** argv, struct options_ *p) { #define IF_OPT(str) if ( !strcmp(argv[n], str) ) #define IF_OPTS(str1, str2) if ( !strcmp(argv[n], str1) || !strcmp(argv[n], str2) ) #define IF_ARG(str, len) if ( !strncmp(argv[n], str, len) ) int n; for ( n=1; nhosts; continue; } if ( argv[n][0] == '-' && argv[n][1] == 0 ) { p->read_stdin = 1; continue; } IF_ARG("--surbl=", 8) { p->surbl = argv[n] + 8; continue; } IF_OPTS("-f", "--skip-whitelist") { p->skip_whitelist = 1; continue; } IF_OPT("--print-tlds") { p->print_tlds = 1; continue; } IF_OPT("--print-whitelist") { p->print_whitelist = 1; continue; } IF_ARG("--whitelist=", 12) { p->wlist_file = argv[n] + 12; continue; } IF_ARG("--whitelist-add=", 16) { p->wlist_file = argv[n] + 16; p->wlist_add = 1; continue; } IF_ARG("--tlds=", 7) { p->tlds_file = argv[n] + 7; continue; } IF_ARG("--tlds-add=", 11) { p->tlds_file = argv[n] + 11; p->tlds_add = 1; continue; } IF_OPT("--no-surbl") { p->surbl = ""; continue; } IF_OPT("--check-whitelist") { p->check_whitelist = 1; continue; } IF_OPTS("-h", "--help") { help(); exit(0); } IF_OPTS("-V", "--version") { fprintf(stderr, "%s\n%s\n%s\n", version, copyright, license); exit(0); } IF_OPTS("-r", "--recursive" ) { p->recursive_strip= 1; continue; } IF_OPTS("-s", "--strip-sub") { p->strip_subdomains = 1; continue; } IF_OPTS("-v", "--verbose") { ++ p->verbose; continue; } IF_OPT("-vv") { p->verbose += 2; continue; } IF_OPTS("-q", "--quiet") { p->quiet = 1; continue; } IF_OPT("--test") { p->run_test = 1; continue; } fprintf(stderr, "error: Unknown option %s\n\n", argv[n]); help(); exit(1); } } static void check_options(struct options_* p) { if ( p->quiet ) { p->verbose = 0; fclose(stdout); } // sort hardcoded lists qsort((char*)whitelist, listsize(whitelist), sizeof(char*), compmi); qsort((char*)two_level_tlds, listsize(two_level_tlds), sizeof(char*), compmi); if ( p->wlist_file ) { if ( (p->pwhitelist = read_list(p->wlist_file)) == NULL ) { fprintf(stderr, "error: could not read whitelist from %s\n", p->wlist_file); exit(1); } // sort for bsearch qsort((char*)p->pwhitelist, listsize(p->pwhitelist), sizeof(char*), compmi); // we do not free allocated memory explicitly (the lists), because // that is a waste of time; your OS will erase the entire process memory // faster. If you don't believe that, ask the glibc developer Ulrich Drepper // (search his page on the web). } if ( p->tlds_file ) { if ( (p->ptlds = read_list(p->tlds_file)) == NULL ) { fprintf(stderr, "error: could not read two-level TLD list from %s\n", p->tlds_file); exit(1); } qsort((char*)p->ptlds, listsize(p->ptlds), sizeof(char*), compmi); } if ( p->print_tlds ) { fputs("two-level TLDs:\n", stderr); print_list(p->ptlds); if ( p->tlds_add && p->ptlds!=two_level_tlds ) print_list(two_level_tlds); } if ( p->print_whitelist ) { fputs("whitelist:\n", stderr); print_list(p->pwhitelist); if ( p->wlist_add && p->pwhitelist!=whitelist ) print_list(whitelist); } if ( p->print_whitelist || p->print_tlds ) exit(0); if ( p->run_test ) { p->surbl = ""; exit( lookup("test.sc.surbl.org.sc.surbl.org") > 0 ? 2 : 0 ); } if ( p->hosts==0 && !p->read_stdin && !p->check_whitelist ) { fputs("error: no host(s) specified\n\n", stderr); help(); exit(1); } } // returns number of hostnames in whitelist that are actually blocklisted static int check_whitelist(const char** wlist) { int hits = 0; // modify some parameters for our check: int skip_whitelist = global_options.skip_whitelist; int recursive_strip = global_options.recursive_strip; global_options.skip_whitelist = 1; global_options.recursive_strip = 1; for ( ; *wlist; ++wlist ) hits += full_lookup(*wlist); global_options.skip_whitelist = skip_whitelist; global_options.recursive_strip = recursive_strip; return hits; } int main(int argc, char** argv) { int hits, blockhits; struct options_ *p = &global_options; set_defaults(p); parse_options(argc, argv, p); check_options(p); if ( *p->surbl && p->verbose ) { fprintf(stderr, "checking against %s%s\n", p->surbl, p->recursive_strip? " recursively" : ""); } if ( p->check_whitelist ) { fputs("checking if whitelist is indeed ok (this should be done rarely)\n\n", stderr); hits = check_whitelist(global_options.pwhitelist); if ( global_options.wlist_add) hits += check_whitelist(whitelist); if ( !hits ) { fputs("\nsummary: the whitelist is ok\n", stderr); return 0; } else { fprintf(stderr, "\nsummary: the whitelist is not ok -- found %d blocked hosts\n", hits); return 2; } } blockhits = 0; if ( p->read_stdin ) { char buf[2048]; while ( fgets(buf, sizeof(buf)/sizeof(char), stdin) ) blockhits += full_lookup( remove_newline(buf) ); } else { int n; for ( n=1; nverbose && blockhits ) fputs("\nSee http://www.surbl.org/lists.html for more information on the blocklists\n", stdout); return blockhits? 2 : 0; }