/* $Id: surblhost.c 154 2007-04-09 15:10:13Z cslsublevel3org $
*
* Copyright (C) 2006, 2007 Christian Stigen Larsen <csl@sublevel3.org>
* Distributed under the GNU General Public License v2.
*
* http://csl.sublevel3.org
*
*/
#if HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if HAVE_NETDB_H
#include <netdb.h>
#endif
#if HAVE_SYS_TYPES_H
#include <sys/types.h>
#endif
#if HAVE_SYS_SOCKET_H
#include <sys/socket.h>
#endif
#if HAVE_NETINET_IN_H
#include <netinet/in.h>
#endif
#if HAVE_ARPA_INET_H
#include <arpa/inet.h>
#endif
#if HAVE_ARPA_NAMESER_H
#include <arpa/nameser.h>
#endif
#if HAVE_RESOLV_H
#include <resolv.h>
#endif
#if HAVE_NAMESER8_COMPAT_H
#include <nameser8_compat.h>
#endif
static struct options_ {
const char* surbl;
const char* tlds_file;
const char* wlist_file;
const char** ptlds;
const char** pwhitelist;
int check_whitelist;
int hosts;
int print_tlds;
int print_whitelist;
int quiet;
int read_stdin;
int recursive_strip;
int run_test;
int skip_whitelist;
int strip_subdomains;
int tlds_add;
int verbose;
int wlist_add;
} global_options;
static const char version[] = PACKAGE_STRING;
static const char copyright[] = "Copyright (C) 2006, 2007 Christian Stigen Larsen <csl@sublevel3.org>";
static const char license[] = "Distributed under the GNU General Public License v2";
extern int h_errno;
extern const char* whitelist[]; // whitelist.c
extern const char* two_level_tlds[]; // two-level-tlds.c
static int is_twolevel_tld(const char* host, const char** tldlist);
static void help()
{
fprintf(stderr, "%s\n%s\n%s", version, copyright, license);
fputs("\n\n"
"USAGE: surblhost [option(s)] [hostname(s)]\n"
"\n"
"Checks if given hostnames are blocklisted by surbl.org,\n"
"meaning that spam mail have links to the given site.\n"
"\n"
"OPTIONS\n"
" - read hostnames from standard input, one per line\n"
" --check-whitelist checks if the whitelist is indeed ok\n"
" -h, --help print help\n"
" --no-surbl do not add .multi.surbl.org to checked hostnames\n"
" --print-tlds print current two-level tld list\n"
" --print-whitelist print current whitelist\n"
" -q, --quiet run silently\n"
" -r, --recursive check all subdomains, e.g. foo.bar.com and bar.com\n"
" -f, --skip-whitelist ignore whitelist (force check)\n"
" -s, --strip-sub strip subdomains, e.g. abc.def.com -> def.com\n"
" --surbl=... use another dns suffix than multi.surbl.org\n"
" --test check permanent test point, should be blocklisted\n"
" --tlds-add=... as --tlds=... but adds entries to precompiled list\n"
" --tlds=... read two-level tld from file, one entry per line\n"
" -v, --verbose verbose output\n"
" -vv more verbose output\n"
" -V --version print program version\n"
" --whitelist-add=... as --whitelist but adds entries to precompiled list\n"
" --whitelist=... read whitelist from given file, one entry per line\n"
"\n"
"NOTES\n"
" You should only check the base site name and TLD, i.e., if you want to\n"
" investigate http://some.place.com, then you will just submit `place.com'\n"
" to surblhost. For more information, see http://www.surbl.org\n"
"\n"
"EXAMPLES\n"
" surblhost -v somesite.com\n"
" surblhost --test --verbose\n"
" surblhost evilsite1.com evilsite2.com goodside1.com\n"
" surblhost foo.com -q && echo whitelisted site || echo blocklisted site\n"
"\n"
"RETURN VALUES\n"
" 0 = no sites blocklisted\n"
" 1 = error\n"
" 2 = one or more site blocklisted\n"
"\n"
"Report bugs to <" PACKAGE_BUGREPORT ">\n"
, stderr);
}
static int check_blocklist(const int code)
{
const int v = global_options.verbose;
if ( code >= 2 )
printf(v? "is blocklisted by " : "is blocklisted");
if ( v ) {
if ( code & 64 ) fputs(v>1? "prolocation/jwspamspy " : "[jp]", stdout);
if ( code & 32 ) fputs(v>1? "abusebutler " : "[ab]", stdout);
if ( code & 16 ) fputs(v>1? "outblaze " : "[ob]", stdout);
if ( code & 8 ) fputs(v>1? "phising " : "[ph]", stdout);
if ( code & 2 ) fputs(v>1? "spamcop " : "[sc]", stdout);
if ( code & 4 ) fputs(v>1? "w.stearns " : "[ws]", stdout);
}
printf("\n");
return code >= 2;
}
inline
static int is_ipaddr(const char* s)
{
int a,b,c,d;
return sscanf(s, "%d.%d.%d.%d", &a, &b, &c, &d) == 4;
}
static void check_hostname(const char* s)
{
int dots = 0;
int maxdots;
const char* host = s;
if ( *global_options.surbl==0 || is_ipaddr(s) )
return;
if ( global_options.recursive_strip )
return;
maxdots = is_twolevel_tld(s, global_options.ptlds) ? 2 : 1;
while ( *s ) {
if ( *s == '.' )
if ( ++dots > maxdots ) {
fprintf(stderr, "warning: hostname contains subdomains: %s (try option -r)\n", host);
return;
}
++s;
}
}
// Example: num_domains("a.b.c") == 3, num_domains("a.b") == 2
static int num_domains(const char* s)
{
int r = *s? 1 : 0;
while ( *s ) {
if ( *s++=='.' ) ++r;
}
return r;
}
static const char* remove_subdomains(const char* orig, const int output_domains)
{
const char* s = orig + strlen(orig);
int dots=0;
while ( s > orig ) {
if ( *s == '.')
++dots;
if ( dots == output_domains ) {
++s;
break;
}
--s;
}
return s;
}
/*
* This function is taken from the freshclam Clam Antivirus
* package, Copyright (C) 2004 Tomasz Kojm <tkojm@clamav.net>,
* and released under the GPL v2.
*
*/
static char* txtquery(const char* domain, unsigned int* ttl)
{
#if ENABLE_DNSTXT
#if HAS_WORKING_RESOLV
static int res_initialized = 0;
#ifndef HAVE_MEMSET
int nn;
#endif
unsigned char answer[PACKETSZ], *pt;
char *txt;
char host[128];
int len, exp, cttl, size, txtlen, type;
if ( !res_initialized ) {
if ( res_init() < 0 ) {
printf("res_init failed\n");
return NULL;
}
res_initialized = 1;
}
#if HAVE_MEMSET
memset(answer, 0, PACKETSZ);
#else
for ( nn=0; nn<PACKETSZ; ++nn ) answer[nn] = 0;
#endif
if ( (len = res_query(domain, ns_c_in, ns_t_txt, answer, PACKETSZ)) < 0 ) {
printf("can't query %s\n", domain);
return NULL;
}
pt = answer + sizeof(HEADER);
if ( (exp = dn_expand(answer, answer + len, pt, host, sizeof(host))) < 0) {
printf("dn_expand failed\n");
return NULL;
}
pt += exp;
GETSHORT(type, pt);
if(type != T_TXT) {
printf("broken DNS reply.\n");
return NULL;
}
pt += INT16SZ; // class
if((exp = dn_expand(answer, answer + len, pt, host, sizeof(host))) < 0) {
printf("second dn_expand failed\n");
return NULL;
}
pt += exp;
GETSHORT(type, pt);
if(type != T_TXT) {
printf("not a TXT record\n");
return NULL;
}
pt += INT16SZ; // class
GETLONG(cttl, pt);
*ttl = cttl;
GETSHORT(size, pt);
txtlen = *pt;
if(txtlen >= size || !txtlen) {
printf("broken TXT record (txtlen = %d, size = %d)\n", txtlen, size);
return NULL;
}
if(!(txt = (char*)malloc(txtlen + 1)))
return NULL;
pt++;
strncpy(txt, (char*)pt, txtlen);
txt[txtlen] = 0;
return txt;
#else
// ENABLE_DNSTXT not set
return NULL;
#endif
#endif
}
static int compmi(const void *m1, const void *m2)
{
return strcmp(*(const char**)m1, *(const char**)m2);
}
static size_t listsize(const char** p)
{
size_t n = 0;
while ( *p++ ) ++n;
return n;
}
/*
* Returns 1 if `host' is in `wlist'
* Returns 0 if not found
*
* Will automatically remove subdomains, example:
*
* www.google.com -> google.com -> white (returns 1)
* w.googl.com -> google.com -> white
* google.com.jp -> google.com -> white
* evil.spam.com -> spam.com -> ??
*
*/
static int in_whitelist(const char* host, const char** wlist)
{
int n;
const int wlen = listsize(wlist);
for ( n=num_domains(host); n>1; --n ) {
const char* h = remove_subdomains(host, n);
if ( bsearch((char*)&h, (char*)wlist, wlen,
sizeof(const char*), compmi) != NULL )
{
return 1;
}
}
return 0;
}
static int is_twolevel_tld(const char* host, const char** tldlist)
{
const int size = listsize(tldlist);
if ( num_domains(host) >= 2 ) {
const char* host_tld = remove_subdomains(host, 2);
if ( bsearch((char*)&host_tld, (char*)tldlist, size,
sizeof(const char*), compmi) != NULL )
{
return 1;
}
}
if ( global_options.tlds_add && tldlist!=two_level_tlds )
return is_twolevel_tld(host, two_level_tlds);
// host is not a two-level TLD
return 0;
}
inline
static void print_list(const char** wlist)
{
while ( *wlist )
puts(*wlist++);
}
static int lookup(const char* host)
{
char *lookup;
int adr[4];
struct hostent *p, h;
int res, bl;
if ( !global_options.skip_whitelist ) {
int res = in_whitelist(host, global_options.pwhitelist);
if ( global_options.wlist_add ) // check user-specified wlist as well?
res |= in_whitelist(host, whitelist);
if ( res ) {
fprintf(stdout, "%s is whitelisted\n", host);
return 0;
}
}
check_hostname(host);
lookup = (char*)malloc(strlen(host) + strlen(global_options.surbl) + 2);
if ( sscanf(host, "%d.%d.%d.%d", &adr[0], &adr[1], &adr[2], &adr[3]) == 4 ) {
// reverse numeric addresses
sprintf(lookup, "%d.%d.%d.%d", adr[3], adr[2], adr[1], adr[0]);
} else
strcpy(lookup, host);
if ( *global_options.surbl ) {
strcat(lookup, ".");
strcat(lookup, global_options.surbl);
}
fprintf(stdout, "%s ", global_options.verbose? lookup : host);
if ( (p = gethostbyname(lookup)) )
memcpy(&h, p, sizeof(struct hostent));
res = 0;
if ( !p ) { // TODO: check for explicit NXDOMAIN
fputs("is not blocklisted\n", stdout);
} else {
while ( *h.h_addr_list != NULL ) {
#if HAVE_INET_NTOA
if ( global_options.verbose > 1 ) // print ip address
fprintf(stdout, "%s ", inet_ntoa(*(struct in_addr*) *h.h_addr_list));
#endif
bl = check_blocklist( (int) (*h.h_addr_list)[3] );
#if ENABLE_DNSTXT
if ( bl>0 && global_options.verbose>1 ) {
unsigned int ttl;
char *p = txtquery(lookup, &ttl);
if ( p ) {
fprintf(stdout, "%s txt record: %s\n", lookup, p);
free(p);
}
}
#endif
res += bl;
++h.h_addr_list;
}
}
free(lookup);
return res;
}
// perform lookup on hostname with all parameters in effect
// returns 0 if not in blocklist, or number of hits in blocklist
static int full_lookup(const char* s)
{
int blockhits = 0;
if ( !global_options.recursive_strip ) {
const char* host = s;
if ( global_options.strip_subdomains )
host = remove_subdomains(s, is_twolevel_tld(s, global_options.ptlds) ? 3 : 2);
blockhits += lookup(host);
} else {
int domains = num_domains(s);
int min = !is_twolevel_tld(s, global_options.ptlds) ? 1 : 2;
for ( ; domains > min; --domains )
blockhits += lookup( remove_subdomains(s, domains) );
}
return blockhits;
}
inline
static char* remove_newline(char* s)
{
const int l = strlen(s);
if ( s[l-1] == '\n' ) s[l-1] = '\0';
return s;
}
static const char** read_list(const char* file)
{
int lsize = 100; // initial list size
const char **plist;
FILE *f;
int count = 0;
int l;
char *s;
char buf[512];
if ( (f = fopen(file, "rt")) == NULL )
return NULL;
if ( !(plist = (const char**) malloc( sizeof(char*) * (lsize+1) )) ) {
fputs("error: not enough memory to hold list\n", stderr),
exit(1);
}
plist[0] = NULL;
while ( !feof(f) ) {
if ( !fgets(buf, sizeof(buf)/sizeof(char), f) )
break;
remove_newline(buf);
l = strlen(buf);
s = (char*) malloc(l+1);
strcpy(s, buf);
plist[count] = s;
plist[count+1] = NULL;
if ( ++count >= lsize ) {
lsize *= 2;
if ( !(plist = (const char**) realloc( plist, sizeof(char*) * (lsize+1) )) ) {
fputs("error: not enough memory to grow list\n", stderr);
exit(1);
}
}
}
if ( global_options.verbose ) {
fprintf(stderr, "%s %d lines from %s\n",
(lsize? "read" : "warning: read"), count, file);
}
return lsize? plist : NULL;
}
static void set_defaults(struct options_ *p)
{
p->surbl = "multi.surbl.org";
p->verbose = p->run_test = p->quiet = p->strip_subdomains
= p->recursive_strip = p->skip_whitelist
= p->read_stdin = p->print_whitelist
= p->print_tlds = p->hosts
= p->wlist_add = p->tlds_add
= p->check_whitelist = 0;
p->pwhitelist = whitelist; // hardcoded
p->ptlds = two_level_tlds; // hardcoded
p->wlist_file = p->tlds_file = NULL;
}
static void parse_options(int argc, char** argv, struct options_ *p)
{
#define IF_OPT(str) if ( !strcmp(argv[n], str) )
#define IF_OPTS(str1, str2) if ( !strcmp(argv[n], str1) || !strcmp(argv[n], str2) )
#define IF_ARG(str, len) if ( !strncmp(argv[n], str, len) )
int n;
for ( n=1; n<argc; ++n ) {
if ( *argv[n] != '-' ) {
++ p->hosts;
continue;
}
if ( argv[n][0] == '-' && argv[n][1] == 0 ) {
p->read_stdin = 1;
continue;
}
IF_ARG("--surbl=", 8) {
p->surbl = argv[n] + 8;
continue;
}
IF_OPTS("-f", "--skip-whitelist") {
p->skip_whitelist = 1;
continue;
}
IF_OPT("--print-tlds") {
p->print_tlds = 1;
continue;
}
IF_OPT("--print-whitelist") {
p->print_whitelist = 1;
continue;
}
IF_ARG("--whitelist=", 12) {
p->wlist_file = argv[n] + 12;
continue;
}
IF_ARG("--whitelist-add=", 16) {
p->wlist_file = argv[n] + 16;
p->wlist_add = 1;
continue;
}
IF_ARG("--tlds=", 7) {
p->tlds_file = argv[n] + 7;
continue;
}
IF_ARG("--tlds-add=", 11) {
p->tlds_file = argv[n] + 11;
p->tlds_add = 1;
continue;
}
IF_OPT("--no-surbl") {
p->surbl = "";
continue;
}
IF_OPT("--check-whitelist") {
p->check_whitelist = 1;
continue;
}
IF_OPTS("-h", "--help") {
help();
exit(0);
}
IF_OPTS("-V", "--version") {
fprintf(stderr, "%s\n%s\n%s\n", version, copyright, license);
exit(0);
}
IF_OPTS("-r", "--recursive" ) {
p->recursive_strip= 1;
continue;
}
IF_OPTS("-s", "--strip-sub") {
p->strip_subdomains = 1;
continue;
}
IF_OPTS("-v", "--verbose") {
++ p->verbose;
continue;
}
IF_OPT("-vv") {
p->verbose += 2;
continue;
}
IF_OPTS("-q", "--quiet") {
p->quiet = 1;
continue;
}
IF_OPT("--test") {
p->run_test = 1;
continue;
}
fprintf(stderr, "error: Unknown option %s\n\n", argv[n]);
help();
exit(1);
}
}
static void check_options(struct options_* p)
{
if ( p->quiet ) {
p->verbose = 0;
fclose(stdout);
}
// sort hardcoded lists
qsort((char*)whitelist, listsize(whitelist), sizeof(char*), compmi);
qsort((char*)two_level_tlds, listsize(two_level_tlds), sizeof(char*), compmi);
if ( p->wlist_file ) {
if ( (p->pwhitelist = read_list(p->wlist_file)) == NULL ) {
fprintf(stderr, "error: could not read whitelist from %s\n", p->wlist_file);
exit(1);
}
// sort for bsearch
qsort((char*)p->pwhitelist, listsize(p->pwhitelist), sizeof(char*), compmi);
// we do not free allocated memory explicitly (the lists), because
// that is a waste of time; your OS will erase the entire process memory
// faster. If you don't believe that, ask the glibc developer Ulrich Drepper
// (search his page on the web).
}
if ( p->tlds_file ) {
if ( (p->ptlds = read_list(p->tlds_file)) == NULL ) {
fprintf(stderr, "error: could not read two-level TLD list from %s\n", p->tlds_file);
exit(1);
}
qsort((char*)p->ptlds, listsize(p->ptlds), sizeof(char*), compmi);
}
if ( p->print_tlds ) {
fputs("two-level TLDs:\n", stderr);
print_list(p->ptlds);
if ( p->tlds_add && p->ptlds!=two_level_tlds )
print_list(two_level_tlds);
}
if ( p->print_whitelist ) {
fputs("whitelist:\n", stderr);
print_list(p->pwhitelist);
if ( p->wlist_add && p->pwhitelist!=whitelist )
print_list(whitelist);
}
if ( p->print_whitelist || p->print_tlds )
exit(0);
if ( p->run_test ) {
p->surbl = "";
exit( lookup("test.sc.surbl.org.sc.surbl.org") > 0 ? 2 : 0 );
}
if ( p->hosts==0 && !p->read_stdin && !p->check_whitelist ) {
fputs("error: no host(s) specified\n\n", stderr);
help();
exit(1);
}
}
// returns number of hostnames in whitelist that are actually blocklisted
static int check_whitelist(const char** wlist)
{
int hits = 0;
// modify some parameters for our check:
int skip_whitelist = global_options.skip_whitelist;
int recursive_strip = global_options.recursive_strip;
global_options.skip_whitelist = 1;
global_options.recursive_strip = 1;
for ( ; *wlist; ++wlist )
hits += full_lookup(*wlist);
global_options.skip_whitelist = skip_whitelist;
global_options.recursive_strip = recursive_strip;
return hits;
}
int main(int argc, char** argv)
{
int hits, blockhits;
struct options_ *p = &global_options;
set_defaults(p);
parse_options(argc, argv, p);
check_options(p);
if ( *p->surbl && p->verbose ) {
fprintf(stderr, "checking against %s%s\n",
p->surbl, p->recursive_strip? " recursively" : "");
}
if ( p->check_whitelist ) {
fputs("checking if whitelist is indeed ok (this should be done rarely)\n\n", stderr);
hits = check_whitelist(global_options.pwhitelist);
if ( global_options.wlist_add)
hits += check_whitelist(whitelist);
if ( !hits ) {
fputs("\nsummary: the whitelist is ok\n", stderr);
return 0;
} else {
fprintf(stderr, "\nsummary: the whitelist is not ok -- found %d blocked hosts\n", hits);
return 2;
}
}
blockhits = 0;
if ( p->read_stdin ) {
char buf[2048];
while ( fgets(buf, sizeof(buf)/sizeof(char), stdin) )
blockhits += full_lookup( remove_newline(buf) );
} else {
int n;
for ( n=1; n<argc; ++n ) {
if ( *argv[n] != '-' )
blockhits += full_lookup(argv[n]);
}
}
if ( p->verbose && blockhits )
fputs("\nSee http://www.surbl.org/lists.html for more information on the blocklists\n", stdout);
return blockhits? 2 : 0;
}
syntax highlighted by Code2HTML, v. 0.9.1