/* * scamper_do_trace.c * * $Id: scamper_do_trace.c,v 1.135 2007/05/23 05:13:56 mjl Exp $ * * Copyright (C) 2005-2007 The University of Waikato * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 2. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ #include #if !defined(__sun__) #include #endif #include #include #include #include #if defined(__linux__) #define __FAVOR_BSD #else #include #endif #if defined(__sun__) #define BSD_COMP #endif #include #include #include #include #include #include #include #include #if defined(__APPLE__) #include #endif #include #include #include #include #include #if defined(DMALLOC) #include #endif #include "scamper.h" #include "scamper_addr.h" #include "scamper_list.h" #include "scamper_tlv.h" #include "scamper_trace.h" #include "scamper_task.h" #include "scamper_queue.h" #include "scamper_icmp_resp.h" #include "scamper_fds.h" #include "scamper_dl.h" #include "scamper_probe.h" #include "scamper_rtsock.h" #include "scamper_privsep.h" #include "scamper_getsrc.h" #include "scamper_file.h" #include "scamper_outfiles.h" #include "scamper_addresslist.h" #include "scamper_debug.h" #include "scamper_do_trace.h" #include "scamper_addr2mac.h" #include "scamper_options.h" #include "scamper_icmp4.h" #include "scamper_icmp6.h" #include "scamper_tcp4.h" #include "scamper_udp4.h" #include "scamper_udp6.h" #include "utils.h" #define SCAMPER_DO_TRACE_ATTEMPTS_MIN 1 #define SCAMPER_DO_TRACE_ATTEMPTS_DEF 2 #define SCAMPER_DO_TRACE_ATTEMPTS_MAX 9 #define SCAMPER_DO_TRACE_DPORT_MIN 1 #define SCAMPER_DO_TRACE_DPORT_DEF (32768+777) #define SCAMPER_DO_TRACE_DPORT_MAX 65535 #define SCAMPER_DO_TRACE_FIRSTHOP_MIN 1 #define SCAMPER_DO_TRACE_FIRSTHOP_DEF 1 #define SCAMPER_DO_TRACE_FIRSTHOP_MAX 255 #define SCAMPER_DO_TRACE_GAPLIMIT_MIN 1 #define SCAMPER_DO_TRACE_GAPLIMIT_DEF 5 #define SCAMPER_DO_TRACE_GAPLIMIT_MAX 255 #define SCAMPER_DO_TRACE_HOLDTIME_MIN 0 #define SCAMPER_DO_TRACE_HOLDTIME_DEF 0 #define SCAMPER_DO_TRACE_HOLDTIME_MAX 255 #define SCAMPER_DO_TRACE_HOPLIMIT_MIN 0 #define SCAMPER_DO_TRACE_HOPLIMIT_DEF 0 #define SCAMPER_DO_TRACE_HOPLIMIT_MAX 255 #define SCAMPER_DO_TRACE_LOOPS_MIN 0 #define SCAMPER_DO_TRACE_LOOPS_DEF 1 /* stop on the first loop found */ #define SCAMPER_DO_TRACE_LOOPS_MAX 255 #define SCAMPER_DO_TRACE_PPS_MIN 1 #define SCAMPER_DO_TRACE_PPS_MAX 1000 #define SCAMPER_DO_TRACE_PPS_DEF 20 #define SCAMPER_DO_TRACE_SPORT_MIN 1 #define SCAMPER_DO_TRACE_SPORT_MAX 65535 #define SCAMPER_DO_TRACE_TOS_MIN 0 #define SCAMPER_DO_TRACE_TOS_DEF 0 #define SCAMPER_DO_TRACE_TOS_MAX 255 #define SCAMPER_DO_TRACE_WAIT_MIN 1 #define SCAMPER_DO_TRACE_WAIT_DEF 5 #define SCAMPER_DO_TRACE_WAIT_MAX 10 /* * pmtud_L2_state * * this struct records state when inferring the MTU of the underlying media. * * when scamper has to discover the MTU of the link itself, it uses the L2 * table above to choose a suitable initial guess. it records the index * into the L2 table into L2_idx. */ typedef struct pmtud_L2_state { int idx; /* index into the L2 table */ int lower; /* lower bounds of the L2 search space */ int upper; /* upper bounds of the L2 search space */ int in; /* probe size not to get a suitable response */ int out; /* size of probe to infer the underlying MTU */ scamper_trace_hop_t *hop; /* the last probe to obtain a response */ } pmtud_L2_state_t; /* * pmtud_TTL_state * * this struct records state when inferring the TTL range of hops that * are responsible for not sending a fragmentation required message where * one is required. */ typedef struct pmtud_TTL_state { int lower; /* lower bounds of the TTL search space */ int upper; /* upper bounds of the TTL search space */ scamper_trace_hop_t *hop; /* the last TTL probe to obtain a response */ } pmtud_TTL_state_t; /* * pmtud_L2 * * this struct associates a known MTU with an index into an array. */ typedef struct pmtud_L2 { int idx; /* index into the L2 array where this node resides */ int mtu; /* the MTU of the link */ char *descr; /* some description of the L2 media */ } pmtud_L2_t; /* * trace_probe * * this struct keeps state of each probe sent with the trace */ typedef struct trace_probe { struct timeval tx_tv; /* the time we transmitted the probe */ struct timeval rx_tv; /* the time we received the first answer */ scamper_addr_t *rx_mac; /* the mac addr where we received the first answer */ uint16_t size; /* the size of the probe sent */ uint8_t ttl; /* the TTL that was set for the probe */ uint8_t id; /* the attempt number made with ttl/size params */ uint8_t rx; /* how many responses scamper got to the probe */ uint8_t mode; /* the mode scamper was in when probe was sent */ uint8_t flags; /* the probe's flags */ } trace_probe_t; #define TRACE_PROBE_FLAG_DL_TX 0x01 #define TRACE_PROBE_FLAG_DL_RX 0x02 #define TRACE_ALLOC_HOPS 16 /* * trace_state * * this is a fairly large struct that keeps state for the traceroute * process. it also deals with state in the PMTUD phase, if used. */ typedef struct trace_state { uint8_t mode; /* current trace mode scamper is in */ uint8_t alive; /* non-zero if responsive */ uint8_t ttl; /* ttl to set in the probe packet */ uint8_t attempt; /* attempt number at the current probe */ uint16_t alloc_hops; /* number of trace->hops allocated */ uint16_t payload_size; /* how much payload to include */ uint16_t header_size; /* size of headers */ scamper_fd_t *route; /* fd to query route socket with */ scamper_fd_t *icmp; /* fd to listen to icmp packets with */ scamper_fd_t *probe; /* fd to probe with */ scamper_fd_t *dl; /* struct to use with datalink access */ uint8_t *dl_hdr; /* header to use with datalink */ uint16_t dl_size; /* how large the header is */ trace_probe_t **probes; /* probes sent so far */ uint16_t id_next; /* next id to use in probes */ uint16_t id_max; /* maximum id available */ pmtud_L2_state_t *L2; /* state kept when doing L2 MTU search */ pmtud_TTL_state_t *TTL; /* state kept when doing a TTL search */ scamper_trace_hop_t *last_fragmsg; /* last fragmentation msg stored */ } trace_state_t; const uint8_t MODE_TRACE = 0; const uint8_t MODE_CHECK_LIFE = 1; const uint8_t MODE_PMTUD_DEFAULT = 2; const uint8_t MODE_PMTUD_SILENT_L2 = 3; const uint8_t MODE_PMTUD_SILENT_TTL = 4; const uint8_t MODE_PMTUD_BADSUGG = 5; const uint8_t MODE_RTSOCK = 6; #define MODE_MIN MODE_TRACE #define MODE_MAX MODE_RTSOCK /* the callback functions registered with the trace task */ static scamper_task_funcs_t trace_funcs; /* address cache used to avoid reallocating the same address multiple times */ extern scamper_addrcache_t *addrcache; /* temporary buffer shared amongst traceroutes */ static uint8_t *pktbuf = NULL; static size_t pktbuf_len = 0; /* socket used to obtain the MTU of a particular interface */ static int if_sock = -1; /* * these MTUs were largely taken from the NetBSD version of traceroute, and * are used to choose a packet size to probe with in the absense of a * Fragmentation Needed message. * * they have been annoted with their corresponding Layer 2 type, largely * taken from RFC 1191 */ static const pmtud_L2_t L2[] = { { 0, 68, "RFC791 MTU"}, /* Official RFC 791 minimum MTU */ { 1, 296, "P2P low delay"}, /* Point-to-Point links, (low delay) */ { 2, 508, ""}, { 3, 512, "NetBIOS"}, /* NetBIOS */ { 4, 544, "DEC Portal"}, /* DEC IP Portal */ { 5, 552, ""}, { 6, 576, "v4 min MTU"}, /* X25 MTU, IPv4 Minimum MTU */ { 7, 1006, "SLIP"}, /* SLIP */ { 8, 1280, "v6 min MTU"}, /* IPv6 Minimum MTU */ { 9, 1454, "PPPoE ADSL"}, /* an optimally sized PPPoE frame in DSL */ {10, 1480, "v4tun Ether"}, /* Ethernet MTU with tunnel over IPv4 */ {11, 1492, "IEEE 802.3"}, /* IEEE 802.3 */ {12, 1500, "Ethernet"}, /* Ethernet MTU */ {13, 1514, "Ethernet Max"}, /* Ethernet Max MTU */ {14, 1536, "Exp. Ether"}, /* Exp. Ethernet Nets */ {15, 2002, "IEEE 802.5"}, /* IEEE 802.5, Recommended MTU */ {16, 2048, "Wideband"}, /* Wideband Network */ {17, 4352, "FDDI"}, /* FDDI */ {18, 4464, "IEEE 802.5"}, /* IEEE 802.5, Maximum MTU */ {19, 4470, "IP over ATM"}, /* ATM / T3 / SONET SDH */ {20, 8166, "IEEE 802.4"}, /* IEEE 802.4 */ {21, 9000, "Broadcom GigE"}, /* Broadcom GigE MTU */ {22, 9192, "OC-192"}, /* OC-192 and other really fast media */ {23, 16110, "Intel GigE"}, /* Intel Pro 1000 MTU */ {24, 17914, "Token Ring"}, /* 16Mb IBM Token Ring */ {25, 65535, "IPv[46] MTU"} /* The IPv[46] Maximum MTU */ }; static const pmtud_L2_t *L2_1454 = &L2[9]; static const pmtud_L2_t *L2_1500 = &L2[12]; static const int L2_cnt = sizeof(L2) / sizeof(pmtud_L2_t); #define TRACE_OPT_DPORT 1 #define TRACE_OPT_FIRSTHOP 2 #define TRACE_OPT_GAPLIMIT 3 #define TRACE_OPT_LOOPS 4 #define TRACE_OPT_MAXTTL 5 #define TRACE_OPT_PMTUD 6 #define TRACE_OPT_PROTOCOL 7 #define TRACE_OPT_ATTEMPTS 8 #define TRACE_OPT_ALLATTEMPTS 9 #define TRACE_OPT_SPORT 10 #define TRACE_OPT_TOS 11 #define TRACE_OPT_WAIT 12 #define TRACE_OPT_GAPCONT 13 static const scamper_option_in_t trace_opts_in[] = { {'d', NULL, TRACE_OPT_DPORT, SCAMPER_OPTION_TYPE_NUM}, {'f', NULL, TRACE_OPT_FIRSTHOP, SCAMPER_OPTION_TYPE_NUM}, {'g', NULL, TRACE_OPT_GAPLIMIT, SCAMPER_OPTION_TYPE_NUM}, {'G', NULL, TRACE_OPT_GAPCONT, SCAMPER_OPTION_TYPE_NULL}, {'l', NULL, TRACE_OPT_LOOPS, SCAMPER_OPTION_TYPE_NUM}, {'m', NULL, TRACE_OPT_MAXTTL, SCAMPER_OPTION_TYPE_NUM}, {'M', NULL, TRACE_OPT_PMTUD, SCAMPER_OPTION_TYPE_NULL}, {'P', NULL, TRACE_OPT_PROTOCOL, SCAMPER_OPTION_TYPE_STR}, {'q', NULL, TRACE_OPT_ATTEMPTS, SCAMPER_OPTION_TYPE_NUM}, {'Q', NULL, TRACE_OPT_ALLATTEMPTS, SCAMPER_OPTION_TYPE_NULL}, {'s', NULL, TRACE_OPT_SPORT, SCAMPER_OPTION_TYPE_NUM}, {'t', NULL, TRACE_OPT_TOS, SCAMPER_OPTION_TYPE_NUM}, {'w', NULL, TRACE_OPT_WAIT, SCAMPER_OPTION_TYPE_NUM}, }; static const int trace_opts_cnt = SCAMPER_OPTION_COUNT(trace_opts_in); /* * if_getmtu * * given an interface index, return the MTU of it. return zero if * we can't get the interface's MTU. */ static int if_getmtu(const int ifindex, uint16_t *ifmtu) { struct ifreq ifr; int mtu; assert(ifindex >= 0); /* given the index, return the interface name to query */ if(if_indextoname((unsigned int)ifindex, ifr.ifr_name) == NULL) { printerror(errno, strerror, __func__, "could not if_indextoname"); return -1; } if(ioctl(if_sock, SIOCGIFMTU, &ifr) == -1) { printerror(errno, strerror, __func__, "could not SIOCGIFMTU"); return -1; } #if defined(__sun__) mtu = ifr.ifr_metric; #else mtu = ifr.ifr_mtu; #endif if(mtu >= 0 && mtu <= 65535) { *ifmtu = mtu; return 0; } return -1; } #if defined(__linux__) || defined(__sun__) static int if_getmac(const int ifindex, uint8_t *mac) { struct ifreq ifr; if(if_indextoname(ifindex, ifr.ifr_name) == NULL) { printerror(errno, strerror, __func__, "could not if_indextoname"); return -1; } #if defined(__linux__) if(ioctl(if_sock, SIOCGIFHWADDR, &ifr) == -1) { printerror(errno, strerror, __func__, "could not SIOCGIFHWADDR"); return -1; } memcpy(mac, ifr.ifr_hwaddr.sa_data, 6); #elif defined(__sun__) if(ioctl(if_sock, SIOCGENADDR, &ifr) == -1) { printerror(errno, strerror, __func__, "could not SIOCGENADDR"); return -1; } memcpy(mac, ifr.ifr_enaddr, 6); #endif return 0; } #else static int if_getmac(const int ifindex, uint8_t *mac) { struct if_msghdr *ifm; struct sockaddr_dl *sdl; int mib[6]; size_t len; uint8_t *buf; mib[0] = CTL_NET; mib[1] = AF_ROUTE; mib[2] = 0; mib[3] = AF_LINK; mib[4] = NET_RT_IFLIST; mib[5] = ifindex; if(sysctl(mib, 6, NULL, &len, NULL, 0) == -1) { printerror(errno, strerror, __func__, "could not sysctl buflen"); return -1; } if((buf = malloc(len)) == NULL) { printerror(errno, strerror, __func__, "could not malloc buf"); return -1; } if(sysctl(mib, 6, buf, &len, NULL, 0) < 0) { printerror(errno, strerror, __func__, "could not sysctl data"); free(buf); return -1; } ifm = (struct if_msghdr *)buf; sdl = (struct sockaddr_dl *)(buf+sizeof(struct if_msghdr)); memcpy(mac, LLADDR(sdl), 6); free(buf); return 0; } #endif /* * pmtud_L2_hop * * utility to replace the currently cached hop record recorded when scamper * does the L2 MTU search. */ static void pmtud_L2_hop(trace_state_t *state, scamper_trace_hop_t *hop) { assert(state->L2 != NULL); if(state->L2->hop != NULL) scamper_trace_hop_free(state->L2->hop); state->L2->hop = hop; return; } /* * pmtud_L2_set_probesize * * given the lower and upper values of the PMTU search, suggest a packet * size to probe next. apply a few heuristics to the search to try and * find the PMTU to the next node faster. */ static void pmtud_L2_set_probesize(trace_state_t *state, const int lower, const int upper) { int idx = state->L2->idx; int size; /* callers should detect end of L2 search before calling this function */ assert(lower + 1 != upper); /* make sure the L2->idx parameter has been set (to something reasonable) */ assert(idx >= 0); assert(idx < L2_cnt); /* make sure the suggested window size is within the current window */ assert(state->L2->lower == -1 || lower >= state->L2->lower); assert(state->L2->upper == -1 || upper <= state->L2->upper); /* * if we've narrowed it down to between two entries in the L2 table, * then try one byte higher than the lower, as there's a fair chance * the underlying mtu will be L2[idx].mtu. * * we make an exception if the lower bounds is Ethernet: there exists * a strong possibility the underlying MTU is Ethernet, and the cost * of guessing wrong [i.e. getting an unexpected response] is small. */ if(lower == 1500 || (lower == L2[idx].mtu && upper <= L2[idx+1].mtu)) { size = lower + 1; } /* * if there is a media MTU higher than the current lower bounds that * is smaller than the upper bounds, then try it */ else if(lower >= L2[idx].mtu && L2[idx+1].mtu < upper) { size = L2[++idx].mtu; } /* * if we did not get a response to the last media MTU probe, and there * is a smaller known media MTU to try, then try it now */ else if(upper == L2[idx].mtu && lower < L2[idx-1].mtu) { size = L2[--idx].mtu; } /* * scamper is operating between two known MTU types, do a binary chop */ else { size = (lower + upper) / 2; } state->attempt = 0; state->payload_size = size - state->header_size; state->L2->idx = idx; state->L2->lower = lower; state->L2->upper = upper; return; } /* * pmtud_L2_init * * utility to search the L2 table for a suitable initial probe size, based * on known [to scamper] L2 media MTUs in relation to the last probe sent that * went unacknowledged. */ static int pmtud_L2_init(trace_state_t *state) { pmtud_L2_state_t *L2s; int size = state->header_size + state->payload_size; int idx; /* * if the probe that was not answered is > 1500 bytes and scamper has * not got a response to a packet 1500 bytes or larger yet, then * forcibly try the ethernet MTU next, as the chances are good that the * media will be plain old ethernet. */ if(size > 1500) { idx = L2_1500->idx; } /* * if the probe that was not answered is > 1454 bytes, then forcibly try * the lower bounds of X-over-ethernet types. */ else if(size > 1454) { idx = L2_1454->idx; } else { for(idx=0; idx L2[idx].mtu && size <= L2[idx+1].mtu) { break; } } } if((L2s = malloc(sizeof(pmtud_L2_state_t))) == NULL) { return -1; } L2s->idx = idx; L2s->hop = NULL; L2s->lower = -1; L2s->upper = size; L2s->in = size; L2s->out = -1; state->L2 = L2s; state->payload_size = L2[idx].mtu - state->header_size; state->attempt = 0; return 0; } /* * pmtud_TTL_hop * * utility to replace the currently cached hop record recorded when scamper * does the TTL search. */ static void pmtud_TTL_hop(trace_state_t *state, scamper_trace_hop_t *hop) { if(state->TTL->hop != NULL) { scamper_trace_hop_free(state->TTL->hop); } state->TTL->hop = hop; return; } /* * pmtud_TTL_set_probettl * * return: 0 if there are no more TTLs to probe, 1 if probing should continue */ static int pmtud_TTL_set_probettl(scamper_task_t *task, const int lower, int upper) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; int cur; /* check to see if we have to do any more TTL searching */ while(lower + 1 < upper) { /* halve the TTL space */ cur = (lower + upper) / 2; /* * check to see if experience at soliciting a TTL expired message has * been good. skip TTLs that have been non-responsive */ while(cur < upper && trace->hops[cur-1] == NULL) { cur++; } /* scamper got a suitable TTL probe value, so we are done */ if(cur != upper) { state->TTL->lower = lower; state->TTL->upper = upper; state->ttl = cur; state->attempt = 0; return 1; } /* * there are no TTLs above the half-way point to probe for, so try for * ones lower */ upper = (lower + upper) / 2; } return 0; } /* * hop_find * * check to see if there is any other hop in the trace with the * same address */ static scamper_trace_hop_t *hop_find(const scamper_trace_t *trace, const scamper_addr_t *addr) { scamper_trace_hop_t *hop; uint16_t i; for(i=0; ihop_count; i++) { for(hop = trace->hops[i]; hop != NULL; hop = hop->hop_next) { if(scamper_addr_cmp(hop->hop_addr, addr) == 0) { return hop; } } } return NULL; } /* * pmtud_TTL_init * * initialise the bounds of a TTL search */ static int pmtud_TTL_init(scamper_task_t *task) { scamper_trace_hop_t *hop; scamper_trace_t *trace = task->data; trace_state_t *state = task->state; int lower, upper; uint8_t turn_ttl; if((state->TTL = malloc_zero(sizeof(pmtud_TTL_state_t))) == NULL) { return -1; } /* * the packet size that is dropped silently is the size we are * doing a TTL limited search with */ state->payload_size = state->L2->in - state->header_size; /* * use the last ICMP fragmentation required message recorded in the * path MTU discovery phase to infer a suitable lower-bound for inferring * the range of TTLs that could be responsible for not sending an ICMP * fragmentation required message */ if(state->last_fragmsg == NULL) { lower = 0; } else { SCAMPER_TRACE_HOP_GET_TURN_TTL(state->last_fragmsg, turn_ttl); if((lower = state->last_fragmsg->hop_probe_ttl - turn_ttl) < 1) { lower = 0; } } /* * the upper bound of TTLs to search is set by closest response past * the hop that sends nothing */ if((hop = hop_find(trace, state->L2->hop->hop_addr)) != NULL) { upper = hop->hop_probe_ttl; } else { SCAMPER_TRACE_HOP_GET_TURN_TTL(state->L2->hop, turn_ttl); upper = state->L2->hop->hop_probe_ttl - turn_ttl + 1; } /* if the TTL limited search is a null operation, then say so */ if(pmtud_TTL_set_probettl(task, lower, upper) == 0) { return 0; } return 1; } /* * pmtud_hopins * * take the hop structure and put it into the list of hops based on the * TTL distance into the path. */ static void pmtud_hopins(scamper_trace_t *trace, scamper_trace_hop_t *hop) { scamper_trace_hop_t *cur, *pre; assert(hop != NULL); if((pre = trace->pmtud->hops) == NULL) { trace->pmtud->hops = hop; return; } for(cur = pre->hop_next; cur != NULL; cur = cur->hop_next) { pre = cur; } pre->hop_next = hop; hop->hop_next = cur; return; } /* * pmtu_L2_search_end * * scamper has had to infer the underlying next-hop MTU due to a pmtud * fault. given the hop used to infer the nhmtu, insert that into the * trace and tidy up. */ static int pmtud_L2_search_end(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_trace_hop_t *hop; uint16_t out; assert(state->L2->out >= 0); assert(state->L2->out <= 65535); out = state->L2->out; /* * copy details of the TTL-expired message furthest into the path * into the trace if there is one to copy */ if(state->TTL != NULL) { /* * if there was no TTL response with the large packet from anywhere * in the path */ if(state->TTL->hop == NULL) { /* * if the lowest TTL tried was zero, then we infer that the host * itself has an MTU mismatch with the particular router it is * using for the destination */ if(state->TTL->lower == 0) { scamper_tlv_set(&trace->pmtud->tlvs, SCAMPER_TRACE_PMTUD_TLV_OUTMTU, 2, &out); } } else { pmtud_hopins(trace, state->TTL->hop); } free(state->TTL); state->TTL = NULL; } /* * copy details of the hop to terminate the largest probe into * the pmtu struct. hops between the TTL expired message (if we * have one) and the ICMP unreach message have their PMTU inferred */ if((hop = state->L2->hop) != NULL) { pmtud_hopins(trace, hop); state->last_fragmsg = hop; free(state->L2); state->L2 = NULL; /* * if the hop that we last recorded is a hop message that would * ordinarily have caused scamper to stop PMTU discovery, then * stop it now */ if(!SCAMPER_TRACE_HOP_IS_ICMP_PACKET_TOO_BIG(hop)) { trace->pmtud->pmtu = hop->hop_probe_size; scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 1; } } if(state->L2 != NULL) { free(state->L2); state->L2 = NULL; } state->payload_size = out - state->header_size; state->mode = MODE_PMTUD_DEFAULT; state->attempt = 0; state->ttl = 255; return 0; } /* * trace_ipid_fudge * * play games with the embedded IP ID, which may come back with a different * IP ID than what was sent; return the ID of the corresponding probe in *id. * this code was inspired by information from David Malone. * * the IPID transmitted is assigned from a counter (state->id_next) which * starts from one -- *not* zero. this is so systems that zero the IPID * will not confuse this algorithm. * * the IPID is transmitted by scamper in network byte order. * */ static int trace_ipid_fudge(const trace_state_t *state, const uint16_t ipid, uint16_t *id) { /* ensure the IP ID is not zero */ if(ipid == 0) { return -1; } /* check if the IP ID is in range */ if(ipid <= state->id_next) { *id = ipid - 1; return 0; } /* check if the IP ID was incremented */ if(ipid == state->id_next + 1) { scamper_debug(__func__, "ip id one greater than sent"); *id = ipid - 2; return 0; } /* check if the IP ID was byte swapped. XXX: is this correct? */ if(byteswap16(ipid) <= state->id_next) { scamper_debug(__func__, "ip id byte swapped"); *id = byteswap16(ipid) - 1; return 0; } return -1; } /* * trace_stop * * set the trace's stop parameters to whatever it is passed */ static void trace_stop(scamper_trace_t *trace, const uint8_t reason, const uint8_t data) { /* if we've already set a stop reason, then don't clobber it */ if(trace->stop_reason != SCAMPER_TRACE_STOP_NONE) { scamper_debug(__func__, "reason %d/%d preceeds %d/%d", trace->stop_reason, trace->stop_data, reason, data); return; } trace->stop_reason = reason; trace->stop_data = data; return; } static void trace_stop_completed(scamper_trace_t *trace) { trace_stop(trace, SCAMPER_TRACE_STOP_COMPLETED, 0); return; } static void trace_stop_dead(scamper_trace_t *trace) { trace_stop(trace, SCAMPER_TRACE_STOP_DEAD, 0); return; } static void trace_stop_error(scamper_trace_t *trace, int error) { trace_stop(trace, SCAMPER_TRACE_STOP_ERROR, error); return; } static void trace_stop_hoplimit(scamper_trace_t *trace) { trace_stop(trace, SCAMPER_TRACE_STOP_HOPLIMIT, 0); return; } /* * trace_isloop * * given a trace and the last hop record, determine if there is a loop. */ static int trace_isloop(const scamper_trace_t *trace, const scamper_trace_hop_t *last) { scamper_trace_hop_t *hop; scamper_trace_hop_t *last_loop; int i, cnt; /* check all probe ttl values */ for(i=0, cnt=0; ihop_probe_ttl-1; i++) { /* for each hop, see if we have a matching address */ for(hop = trace->hops[i]; hop != NULL; hop = hop->hop_next) { /* * if the addresses match, then we either return if we've detected * a loop, or skip to the next hop now that we know we've got * a mtach for this hop */ if(scamper_addr_cmp(last->hop_addr, hop->hop_addr) == 0) { last_loop = hop; if(++cnt >= trace->loops) { return last->hop_probe_ttl - last_loop->hop_probe_ttl; } else { break; } } } } return 0; } /* * trace_hopins * * insert the hop record into the trace at the appropriate place */ static int trace_hopins(scamper_trace_t *trace, scamper_trace_hop_t *hop) { scamper_trace_hop_t *pre, *cur; cur = trace->hops[hop->hop_probe_ttl-1]; pre = NULL; /* search for the place to insert this hop record */ while(cur != NULL && cur->hop_probe_id <= hop->hop_probe_id) { pre = cur; cur = cur->hop_next; } /* there were no hop responses for this hop, insert at head */ if(pre == NULL) { trace->hops[hop->hop_probe_ttl-1] = hop; } else { pre->hop_next = hop; } hop->hop_next = cur; return 0; } /* * trace_handlerror * * the code encountered some error when doing the traceroute, so stop the * trace now. */ static int trace_handleerror(scamper_task_t *task, const int error) { trace_stop_error((scamper_trace_t *)task->data, error); scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 0; } /* * trace_hop * * this function creates a generic hop record with the basic details from * the probe structure copied in, as well as an address based on the details * passed in */ static scamper_trace_hop_t *trace_hop(const trace_probe_t *probe, const int af, const void *addr) { scamper_trace_hop_t *hop; int type; /* determine the scamper address type to use from the address family */ if(af == AF_INET) type = SCAMPER_ADDR_TYPE_IPV4; else if(af == AF_INET6) type = SCAMPER_ADDR_TYPE_IPV6; else return NULL; if((hop = scamper_trace_hop_alloc()) == NULL) { return NULL; } if((hop->hop_addr = scamper_addrcache_get(addrcache, type, addr)) == NULL) { scamper_trace_hop_free(hop); return NULL; } hop->hop_probe_ttl = probe->ttl; hop->hop_probe_id = probe->id + 1; hop->hop_probe_size = probe->size; /* * if the probe's datalink tx timestamp flag is set, scamper has a tx * timestamp recorded */ if(probe->flags & TRACE_PROBE_FLAG_DL_TX) { hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_TS_DL_TX; } return hop; } /* * trace_icmp_hop * * given a trace probe and an ICMP response, allocate and initialise a * scamper_trace_hop record. */ static scamper_trace_hop_t *trace_icmp_hop(trace_probe_t *probe, scamper_icmp_resp_t *ir) { scamper_trace_hop_t *hop; scamper_addr_t addr; /* get a pointer to the source address of the ICMP response */ if(scamper_icmp_resp_src(ir, &addr) != 0) { return NULL; } /* create a generic hop record without any special bits filled out */ if((hop = trace_hop(probe, ir->ir_af, addr.addr)) == NULL) { return NULL; } /* fill out the basic bits of the hop structure */ hop->hop_reply_size = ir->ir_ip_size; hop->hop_icmp_type = ir->ir_icmp_type; hop->hop_icmp_code = ir->ir_icmp_code; /* * we cannot depend on the TTL field of the IP packet being made available, * so we signal explicitly when the reply ttl is valid */ if(ir->ir_ip_ttl != -1) { hop->hop_reply_ttl = ir->ir_ip_ttl; hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_REPLY_TTL; } /* * if the probe's datalink rx timestamp flag is set, scamper has a rx * timestamp recorded */ if(probe->flags & TRACE_PROBE_FLAG_DL_RX) { hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_TS_DL_RX; timeval_rtt(&hop->hop_rtt, &probe->tx_tv, &probe->rx_tv); } else { timeval_rtt(&hop->hop_rtt, &probe->tx_tv, &ir->ir_rx); if(ir->ir_flags & SCAMPER_ICMP_RESP_FLAG_KERNRX) { hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_TS_SOCK_RX; } } if(SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir)) { if(scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_NHMTU, 2, &ir->ir_icmp_nhmtu) == NULL) { goto err; } } if(ir->ir_af == AF_INET) { if(scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_REPLY_IPID, 2, &ir->ir_ip_id) == NULL || scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_REPLY_IPTOS, 1, &ir->ir_ip_tos) == NULL) { goto err; } } if(SCAMPER_ICMP_RESP_INNER_IS_SET(ir)) { if(ir->ir_inner_ip_ttl != 1) { if(scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_INNER_IPTTL, 1, &ir->ir_inner_ip_ttl) == NULL) { goto err; } } if(ir->ir_inner_ip_size != probe->size) { if(scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_INNER_IPLEN, 2, &ir->ir_inner_ip_size) == NULL) { goto err; } } } /* record the fact that we have a hop record thanks to this probe */ probe->rx++; return hop; err: scamper_trace_hop_free(hop); return NULL; } static scamper_trace_hop_t *trace_tcp_hop(trace_probe_t *probe, scamper_dl_rec_t *dl) { scamper_trace_hop_t *hop; /* create a generic hop record without any special bits filled out */ if((hop = trace_hop(probe, dl->dl_af, dl->dl_ip_src)) == NULL) { return NULL; } /* fill out the basic bits of the hop structure */ hop->hop_reply_size = dl->dl_ip_size; hop->hop_reply_ttl = dl->dl_ip_ttl; hop->hop_tcp_flags = dl->dl_tcp_flags; timeval_rtt(&hop->hop_rtt, &probe->tx_tv, &dl->dl_tv); /* set the flags that are known to apply to this hop record */ hop->hop_flags |= (SCAMPER_TRACE_HOP_FLAG_REPLY_TTL | SCAMPER_TRACE_HOP_FLAG_TCP | SCAMPER_TRACE_HOP_FLAG_TS_DL_RX); if(dl->dl_af == AF_INET) { if(scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_REPLY_IPID, 2, &dl->dl_ip_id) == NULL || scamper_tlv_set(&hop->hop_tlvs, SCAMPER_TRACE_HOP_TLV_REPLY_IPTOS, 1, &dl->dl_ip_tos) == NULL) { goto err; } } return hop; err: scamper_trace_hop_free(hop); return NULL; } /* * trace_next_mode * * if the trace is going into another mode, this function figures out * which mode to put it into */ static void trace_next_mode(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; uint16_t ifmtu; int ifindex; if((trace->flags & SCAMPER_TRACE_FLAG_PMTUD) == 0 || trace->stop_reason == SCAMPER_TRACE_STOP_HOPLIMIT || trace->stop_reason == SCAMPER_TRACE_STOP_LOOP || trace->stop_reason == SCAMPER_TRACE_STOP_NONE) { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return; } /* if the interface's MTU is useless, then we can't do PMTUD */ scamper_fd_ifindex(state->dl, &ifindex); if(if_getmtu(ifindex, &ifmtu) == -1 || ifmtu <= state->header_size) { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return; } if((trace->pmtud = malloc_zero(sizeof(struct scamper_trace_pmtud))) == NULL) { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return; } trace->pmtud->ifmtu = ifmtu; state->attempt = 0; state->mode = MODE_PMTUD_DEFAULT; state->payload_size = ifmtu - state->header_size; state->ttl = 255; scamper_queue_probe(task->queue); return; } /* * trace_stop_reason * * check to see if we have a stop condition based on the hop record */ static void trace_stop_reason(scamper_trace_t *trace, scamper_trace_hop_t *hop, uint8_t *stop_reason, uint8_t *stop_data) { int rc; if(SCAMPER_TRACE_HOP_IS_ICMP_UNREACH_PORT(hop)) { *stop_reason = SCAMPER_TRACE_STOP_COMPLETED; *stop_data = 0; } else if(SCAMPER_TRACE_HOP_IS_ICMP_UNREACH(hop)) { *stop_reason = SCAMPER_TRACE_STOP_UNREACH; *stop_data = hop->hop_icmp_code; } else if(SCAMPER_TRACE_HOP_IS_ICMP_ECHO_REPLY(hop)) { *stop_reason = SCAMPER_TRACE_STOP_COMPLETED; *stop_data = 0; } else if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV6 && hop->hop_icmp_type == ICMP6_PACKET_TOO_BIG) { *stop_reason = SCAMPER_TRACE_STOP_ICMP; *stop_data = hop->hop_icmp_type; } else if(trace->loops != 0 && (rc = trace_isloop(trace, hop)) != 0) { *stop_reason = SCAMPER_TRACE_STOP_LOOP; *stop_data = rc; } else if(scamper_addr_cmp(trace->dst, hop->hop_addr) == 0) { /* * in IPv6, we can have anonymous interfaces on the path that * send ICMP errors 'spoofing' their source address as the * destination we sent. if we get a TTL expired / fragmentation * needed message on the path from the 'destination address' then * it is an anonymous interface */ if(trace->dst->type != SCAMPER_ADDR_TYPE_IPV6 || hop->hop_icmp_type == ICMP6_DST_UNREACH) { *stop_reason = SCAMPER_TRACE_STOP_COMPLETED; *stop_data = 0; } else { *stop_reason = SCAMPER_TRACE_STOP_NONE; *stop_data = 0; } } else { *stop_reason = SCAMPER_TRACE_STOP_NONE; *stop_data = 0; } return; } /* * handleicmp_trace * * we received an ICMP response in the traceroute state. check to see * if the probe is in sequence, and adjust the trace accordingly. */ static int handleicmp_trace(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_trace_hop_t *hop; uint8_t stop_reason; uint8_t stop_data; int cwh = 0; /* we should only have to deal with probes sent while in the trace state */ if(probe->mode != MODE_TRACE) { return 0; } /* create a hop record and insert it into the trace */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } trace_hopins(trace, hop); /* if the response is for the current working hop */ if(hop->hop_probe_ttl - 1 == trace->hop_count) { /* current working hop */ cwh = 1; /* if we are sending all allotted probes to the target */ if(trace->flags & SCAMPER_TRACE_FLAG_ALLATTEMPTS) { /* * if we get an out of order reply, then we go back to waiting for * the one we just probed for */ if(probe->id+1 != state->attempt) { return 0; } /* * this response is for the last probe sent. if there are still * probes to send for this hop, then send the next one */ if(state->attempt < trace->attempts) { scamper_queue_probe(task->queue); return 0; } } trace->hop_count++; state->attempt = 0; state->ttl++; } /* check to see if we have a stop reason from the ICMP response */ trace_stop_reason(trace, hop, &stop_reason, &stop_data); if(stop_reason != SCAMPER_TRACE_STOP_NONE) { /* did we get a stop condition out of all that? */ trace_stop(trace, stop_reason, stop_data); trace_next_mode(task); } else if(trace->hop_count == 255 || trace->hop_count == trace->hoplimit) { /* if not, has the hop limit now reached? */ trace_stop_hoplimit(trace); trace_next_mode(task); } else if(cwh != 0) { /* if not, keep probing */ scamper_queue_probe(task->queue); } return 0; } /* * handleicmp_check_life * * we received an ICMP response while checking if the end-host is * responsive. */ static int handleicmp_check_life(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_trace_hop_t *hop; if(probe->mode == MODE_TRACE) { /* record the response in the trace */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } trace_hopins(trace, hop); } else if(probe->mode == MODE_CHECK_LIFE) { /* * if we get a response to one of the packets checking existence, * then the path is not dead */ state->alive = 1; state->mode = MODE_TRACE; state->ttl = trace->hop_count+1; /* XXX */ state->attempt = 0; scamper_queue_probe(task->queue); } return 0; } static int handleicmp_pmtud_default(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_trace_hop_t *hop; /* * if the response is for a probe that fits with the current * probing details, then record it */ if(probe->mode == MODE_PMTUD_DEFAULT && probe->size == state->header_size + state->payload_size) { if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } pmtud_hopins(trace, hop); state->last_fragmsg = hop; if(SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir)) { /* * if the fragmentation required message did not include the * suggested packet size for the next hop, or if the suggested * MTU is larger than (or the same size) as the probe packet, then * abandon the pmtud operation at this point */ if(ir->ir_icmp_nhmtu == 0 || ir->ir_icmp_nhmtu >= probe->size) { state->mode = MODE_PMTUD_BADSUGG; pmtud_L2_init(state); scamper_queue_probe(task->queue); return 0; } /* * if the message supplied an MTU to use, but it is too small * to be probed, then stop probing here. */ if(ir->ir_icmp_nhmtu < state->header_size) { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 0; } state->attempt = 0; state->payload_size = ir->ir_icmp_nhmtu - state->header_size; scamper_queue_probe(task->queue); } else if(SCAMPER_ICMP_RESP_IS_TTL_EXP(ir) || SCAMPER_ICMP_RESP_IS_UNREACH(ir) || SCAMPER_ICMP_RESP_IS_ECHO_REPLY(ir)) { trace->pmtud->pmtu = probe->size; scamper_queue_done(task->queue, scamper_holdtime_get()*1000); } } return 0; } static int handleicmp_pmtud_silent_L2(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { trace_state_t *state = task->state; scamper_trace_hop_t *hop; assert(state->L2 != NULL); /* * if we get a response that is out of the bounds we are searching, it * could be a delayed message. at the moment, we just ignore the response. */ if(probe->size < state->L2->lower || state->L2->upper <= probe->size) { scamper_debug(__func__, "L2 search %d < %d || %d <= %d", probe->size, state->L2->lower, state->L2->upper, probe->size); return 0; } /* record the hop details */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } pmtud_L2_hop(state, hop); /* * if there is still space to search, reduce the search space and send * another probe */ if(probe->size + 1 != state->L2->upper) { /* * raise the lower bounds of our search based on successfully * receiving a response for a given packet size. */ pmtud_L2_set_probesize(state, probe->size, state->L2->upper); } else { state->L2->lower = state->L2->out = probe->size; if(pmtud_TTL_init(task) == 1) { state->mode = MODE_PMTUD_SILENT_TTL; } else { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 0; } } scamper_queue_probe(task->queue); return 0; } static int handleicmp_pmtud_silent_TTL(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { trace_state_t *state = task->state; scamper_trace_hop_t *hop; /* we got a TTL expired message */ if(SCAMPER_ICMP_RESP_IS_TTL_EXP(ir)) { /* record the hop details */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } pmtud_TTL_hop(state, hop); /* if there is no more TTL space to search, then we are done */ if(pmtud_TTL_set_probettl(task, probe->ttl, state->TTL->upper) == 0) { /* * if we are not finished with PMTU yet, put the trace back in * the queue */ if(pmtud_L2_search_end(task) == 1) { return 0; } } } /* * if we get a fragmentation required message during a TTL limited * search for the MTU inferred, then record the message and stop * the TTL limited search */ else if(SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir) && ir->ir_icmp_nhmtu == state->L2->out) { /* record the hop details */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } pmtud_hopins((scamper_trace_t *)task->data, hop); state->attempt = 0; state->payload_size = ir->ir_icmp_nhmtu - state->header_size; state->ttl = 255; state->mode = MODE_PMTUD_DEFAULT; free(state->L2); state->L2 = NULL; free(state->TTL); state->TTL = NULL; } /* put the trace back into the probe queue */ scamper_queue_probe(task->queue); return 0; } /* * handleicmp_pmtud_badsugg * * we are in the badsugg state, which is used to infer a 'correct' next-hop * mtu size when the suggested packet size is no help. */ static int handleicmp_pmtud_badsugg(scamper_task_t *task, scamper_icmp_resp_t *ir, trace_probe_t *probe) { trace_state_t *state = task->state; scamper_trace_hop_t *hop; scamper_addr_t addr; int upper, lower; if(scamper_icmp_resp_src(ir, &addr) != 0) { return -1; } /* * adjust the window we are searching based on where the response came * from and the size of the probe that caused the response */ if(scamper_addr_cmp(state->last_fragmsg->hop_addr, &addr) == 0) { lower = state->L2->lower; upper = probe->size; } else { lower = probe->size; upper = state->L2->upper; /* replace the layer-2 hop we get a response for with this hop */ if((hop = trace_icmp_hop(probe, ir)) == NULL) { return -1; } pmtud_L2_hop(state, hop); } if(lower + 1 != upper) { pmtud_L2_set_probesize(state, lower, upper); } else { /* terminate the search now */ state->L2->lower = state->L2->out = lower; state->L2->upper = upper; /* if the pmtud is completed, then move on */ if(pmtud_L2_search_end(task) == 1) { return 0; } } /* put the trace back into the probe queue */ scamper_queue_probe(task->queue); return 0; } static int do_trace_handle_icmp(scamper_task_t *task, scamper_icmp_resp_t *ir) { static int (*const func[])(scamper_task_t *, scamper_icmp_resp_t *, trace_probe_t *) = { handleicmp_trace, /* MODE_TRACE == 0x00 */ handleicmp_check_life, /* MODE_CHECK_LIFE == 0x01 */ handleicmp_pmtud_default, /* MODE_PMTUD_DEFAULT == 0x02 */ handleicmp_pmtud_silent_L2, /* MODE_PMTUD_SILENT_L2 == 0x03 */ handleicmp_pmtud_silent_TTL, /* MODE_PMTUD_SILENT_TTL == 0x04 */ handleicmp_pmtud_badsugg, /* MODE_PMTUD_BADSUGG == 0x05 */ NULL, /* MODE_RTSOCK == 0x06 */ }; scamper_trace_t *trace = task->data; trace_state_t *state = task->state; uint16_t id; uint8_t proto; assert(state->mode <= MODE_MAX); /* * if the trace is in a mode that does not handle ICMP responses, then * stop now */ if(func[state->mode] == NULL) { return 0; } if(trace->type == SCAMPER_TRACE_TYPE_UDP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { /* if the ICMP type is not something that we care for, then drop it */ if(SCAMPER_ICMP_RESP_IS_TTL_EXP(ir) == 0 && SCAMPER_ICMP_RESP_IS_UNREACH(ir) == 0 && SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir) == 0) { return 0; } /* * if the ICMP response does not reference a UDP probe sent from our * source port to a destination probe we're likely to have probed, then * ignore the packet */ if(SCAMPER_ICMP_RESP_INNER_IS_SET(ir) == 0 || ir->ir_inner_ip_proto != IPPROTO_UDP || ir->ir_inner_udp_sport != trace->sport) { return 0; } if(trace->type == SCAMPER_TRACE_TYPE_UDP) { if(ir->ir_inner_udp_dport < trace->dport || ir->ir_inner_udp_dport >= trace->dport+state->id_next) { return 0; } /* XXX: handle wrap-around */ id = ir->ir_inner_udp_dport - trace->dport; } else if(trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { if(ir->ir_inner_udp_dport != trace->dport) { return 0; } if(ir->ir_af == AF_INET) { if(ir->ir_inner_udp_sum == ir->ir_inner_ip_id && ir->ir_inner_udp_sum != 0) { id = ntohs(ir->ir_inner_udp_sum) - 1; } else if(trace_ipid_fudge(state, ir->ir_inner_ip_id, &id) != 0) { return 0; } } else { if(ir->ir_inner_udp_sum == 0) { id = ir->ir_inner_ip_flow - 1; } else { id = ntohs(ir->ir_inner_udp_sum) - 1; } } } else return 0; } else if(trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO || trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS) { /* if the ICMP type is not something that we care for, then drop it */ if(SCAMPER_ICMP_RESP_IS_ECHO_REPLY(ir) == 0) { if(SCAMPER_ICMP_RESP_IS_TTL_EXP(ir) == 0 && SCAMPER_ICMP_RESP_IS_UNREACH(ir) == 0 && SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir) == 0) { return 0; } if(ir->ir_af == AF_INET) proto = IPPROTO_ICMP; else if(ir->ir_af == AF_INET6) proto = IPPROTO_ICMPV6; else return 0; if(SCAMPER_ICMP_RESP_INNER_IS_SET(ir) == 0 || ir->ir_inner_ip_proto != proto || ir->ir_inner_icmp_id != trace->sport || ir->ir_inner_icmp_seq >= state->id_next) { return 0; } id = ir->ir_inner_icmp_seq; } else { if(ir->ir_icmp_id != trace->sport || ir->ir_icmp_seq >= state->id_next) { return 0; } id = ir->ir_icmp_seq; } } else if(trace->type == SCAMPER_TRACE_TYPE_TCP) { /* if the ICMP type is not something that we care for, then drop it */ if(SCAMPER_ICMP_RESP_IS_TTL_EXP(ir) == 0 && SCAMPER_ICMP_RESP_IS_UNREACH(ir) == 0 && SCAMPER_ICMP_RESP_IS_PACKET_TOO_BIG(ir) == 0) { return 0; } /* * if the ICMP response does not reference a TCP probe sent from our * source port to the destination port specified then ignore the * ICMP packet */ if(SCAMPER_ICMP_RESP_INNER_IS_SET(ir) == 0 || ir->ir_inner_ip_proto != IPPROTO_TCP || ir->ir_inner_tcp_sport != trace->sport || ir->ir_inner_tcp_dport != trace->dport) { return 0; } if(ir->ir_af == AF_INET) { /* determine which probe id the ip id corresponds to */ if(trace_ipid_fudge(state, ir->ir_inner_ip_id, &id) != 0) { return 0; } } else { if(ir->ir_inner_ip_flow == 0) { return 0; } id = ir->ir_inner_ip_flow - 1; } } else { return 0; } if(id < state->id_next) { func[state->mode](task, ir, state->probes[id]); } return 0; } /* * timeout_trace * * this function is called if the trace timed out on the wait queue, and * all allotted attempts have been sent. */ static int timeout_trace(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_trace_hop_t *hop; int i, deadpath; uint8_t stop_reason, stop_data; /* we tried this hop, so move onto the next */ trace->hop_count++; state->ttl++; /* * if we probed for all attempts on the hop, then check to see if we * got any responses on this hop, and if we did, check to see if we * should stop probing this target yet */ if(trace->flags & SCAMPER_TRACE_FLAG_ALLATTEMPTS) { for(hop = trace->hops[trace->hop_count-1];hop != NULL; hop=hop->hop_next) { /* * first, check to see if there is a reason to stop probing with * this particular hop record */ trace_stop_reason(trace, hop, &stop_reason, &stop_data); if(stop_reason != SCAMPER_TRACE_STOP_NONE) { trace_stop(trace, stop_reason, stop_data); trace_next_mode(task); return 0; } } } if(trace->hop_count == 255 || trace->hop_count == trace->hoplimit) { trace_stop_hoplimit(trace); trace_next_mode(task); return 0; } /* * if we haven't checked to see if the path is dead yet, check to see * if we should do so at this time. a dead path is defined as a path * that has an unresponsive target host, which we stop tracing after * five unresponsive hops. */ if(state->alive == 0 && trace->hop_count >= trace->gaplimit) { deadpath = 1; for(i=0; igaplimit; i++) { if(trace->hops[trace->hop_count-1-i] != NULL) { deadpath = 0; break; } } if(deadpath != 0) { if((trace->flags & SCAMPER_TRACE_FLAG_GAPCONT) != 0) { state->mode = MODE_CHECK_LIFE; state->ttl = 255; } else { trace_stop_dead(trace); trace_next_mode(task); } } } return 0; } static int timeout_check_life(scamper_task_t *task) { /* if we get no response, then the path is considered dead */ trace_stop_dead((scamper_trace_t *)task->data); scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 0; } static int timeout_pmtud_default(scamper_task_t *task) { trace_state_t *state = task->state; pmtud_L2_init(state); state->mode = MODE_PMTUD_SILENT_L2; return 0; } static int timeout_pmtud_silent_L2(scamper_task_t *task) { trace_state_t *state = task->state; int size = state->header_size + state->payload_size; /* * have we scanned the L2 table to the official minimum MTU? * if we have, then PMTU fails and we abort. */ if(state->L2->idx == 0) { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); return 0; } /* * we did not get a response for this probe size * if we can halve the search space again, then do that */ if(state->L2->lower + 1 != size) { pmtud_L2_set_probesize(state, state->L2->lower, size); } else { state->L2->out = state->L2->lower; /* set the bounds of the TTL search */ if(pmtud_TTL_init(task) == 1) { state->mode = MODE_PMTUD_SILENT_TTL; } else { scamper_queue_done(task->queue, scamper_holdtime_get()*1000); } } return 0; } static int timeout_pmtud_silent_TTL(scamper_task_t *task) { trace_state_t *state = task->state; /* * select another TTL to probe with, if possible. if not, then * the search halts and we move on */ if(pmtud_TTL_set_probettl(task, state->TTL->lower, state->ttl) == 0) { /* finish this portion of the TTL limited search */ pmtud_L2_search_end(task); } return 0; } /* * timeout_pmtud_badsugg * * if we timeout while trying to determine the underlying MTU on a path * where a router gives a bad suggestion, chances are that an ICMP blackhole * exists later in the path. try sending a larger packet, if we can. */ static int timeout_pmtud_badsugg(scamper_task_t *task) { trace_state_t *state = task->state; int lower, upper; lower = state->header_size + state->payload_size; upper = state->L2->upper; pmtud_L2_hop(state, NULL); if(lower + 1 != upper) { pmtud_L2_set_probesize(state, lower, upper); } else { /* terminate the search now */ state->L2->lower = state->L2->out = lower; state->L2->upper = upper; /* if the pmtud is completed, then move on */ if(pmtud_L2_search_end(task) == 1) { return 0; } } return 0; } static int timeout_rtsock(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; /* * if we can't get sense out of the route socket, then do the trace * anyway, but without pmtud or datalink support */ trace->flags &= ~(SCAMPER_TRACE_FLAG_PMTUD | SCAMPER_TRACE_FLAG_DL); state->mode = MODE_TRACE; return 0; } /* * do_trace_handle_timeout * * the trace has expired while sitting on the wait queue. * handle this event appropriately. */ static int do_trace_handle_timeout(scamper_task_t *task) { static int (* const func[])(scamper_task_t *) = { timeout_trace, /* MODE_TRACE == 0x00 */ timeout_check_life, /* MODE_CHECK_LIFE == 0x01 */ timeout_pmtud_default, /* MODE_PMTUD_DEFAULT == 0x02 */ timeout_pmtud_silent_L2, /* MODE_PMTUD_SILENT_L2 == 0x03 */ timeout_pmtud_silent_TTL, /* MODE_PMTUD_SILENT_TTL == 0x04 */ timeout_pmtud_badsugg, /* MODE_PMTUD_BADSUGG == 0x05 */ timeout_rtsock, /* MODE_RTSOCK == 0x06 */ }; scamper_trace_t *trace = task->data; trace_state_t *state = task->state; assert(state->mode <= MODE_MAX); /* * if we have sent all alloted attempts for this probe type, then * handle this particular probe failing */ if(state->attempt == trace->attempts) { /* we're probably going to send another probe, so reset the attempt # */ state->attempt = 0; /* call the function that handles a timeout in this particular mode */ func[state->mode](task); } return 0; } /* * dl_trace_in * * handle a datalink record for an inbound packet which was sent * for a probe in the trace state. * * in this case, we use the timestamp and the ethernet mac address * [if available] to update the hop record. */ static void dl_trace_in(scamper_task_t *task, scamper_dl_rec_t *dl, trace_probe_t *probe) { scamper_trace_t *trace = task->data; scamper_trace_hop_t *hop; struct timeval tv; #ifndef NDEBUG int adjflag = 0; #endif /* * if the datalink record does not contain anything we care about, * then return now */ if((dl->dl_flags & SCAMPER_DL_FLAG_TIMESTAMP) == 0 && (dl->dl_type != SCAMPER_DL_TYPE_ETHERNET) && trace->type != SCAMPER_TRACE_TYPE_TCP) { return; } /* only record a datalink record for the first time */ if((probe->flags & TRACE_PROBE_FLAG_DL_RX) || probe->rx_mac != NULL) { return; } /* * if this is an inbound TCP packet, then we need to create a hop record * with data out of it. */ if(dl->dl_ip_proto == IPPROTO_TCP) { /* create a hop record based off the TCP data */ if((hop = trace_tcp_hop(probe, dl)) == NULL) { return; } trace_hopins(trace, hop); probe->rx++; /* * we can probe for the next hop only if we have sent the allotted * attempts, or if we are only probing for the first response. */ if(probe->id+1 != trace->attempts && (trace->flags & SCAMPER_TRACE_FLAG_ALLATTEMPTS)) { scamper_queue_probe(task->queue); } else { trace->hop_count++; trace_stop_completed(trace); scamper_queue_done(task->queue, scamper_holdtime_get()*1000); } } if(dl->dl_type == SCAMPER_DL_TYPE_ETHERNET) { probe->rx_mac = scamper_addrcache_get_ethernet(addrcache, dl->dl_lladdr_src); if(probe->rx_mac == NULL) { return; } } /* adjust the rtt based on the timestamp included in the datalink record */ if((dl->dl_flags & SCAMPER_DL_FLAG_TIMESTAMP) != 0 && dl->dl_ip_proto != IPPROTO_TCP) { probe->flags |= TRACE_PROBE_FLAG_DL_RX; timeval_cpy(&probe->rx_tv, &dl->dl_tv); if(probe->rx > 0) { timeval_rtt(&tv, &probe->tx_tv, &probe->rx_tv); for(hop=trace->hops[probe->ttl-1]; hop != NULL; hop = hop->hop_next) { if(probe->id + 1 < hop->hop_probe_id) continue; if(probe->id + 1 > hop->hop_probe_id) break; if(dl->dl_flags & SCAMPER_DL_FLAG_TIMESTAMP) { #ifndef NDEBUG adjflag++; scamper_debug(__func__, "hop %d.%06d dl_rec %d.%06d diff %lld", hop->hop_rtt.tv_sec, hop->hop_rtt.tv_usec, tv.tv_sec, tv.tv_usec, timeval_diff_usec(&hop->hop_rtt, &tv)); #endif hop->hop_flags &= ~(SCAMPER_TRACE_HOP_FLAG_TS_SOCK_RX); hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_TS_DL_RX; timeval_cpy(&hop->hop_rtt, &tv); } } #ifndef NDEBUG if(adjflag == 0) { scamper_debug(__func__, "rtt %d.%06d", tv.tv_sec, tv.tv_usec); } #endif } } return; } /* * dl_trace_out * * handle a datalink record for an outbound packet which was sent * for a probe in the trace state. * * in this case, we use the timestamp to record the time at which the * packet was queued at the network interface. */ static void dl_trace_out(scamper_task_t *task, scamper_dl_rec_t *dl, trace_probe_t *probe) { scamper_trace_t *trace = task->data; scamper_trace_hop_t *hop; int64_t diff; if((dl->dl_flags & SCAMPER_DL_FLAG_TIMESTAMP) == 0) { return; } /* print the difference in timestamps if built for debugging */ scamper_debug(__func__, "probe %d.%06d dl_rec %d.%06d diff %lld", probe->tx_tv.tv_sec, probe->tx_tv.tv_usec, dl->dl_tv.tv_sec, dl->dl_tv.tv_usec, timeval_diff_usec(&probe->tx_tv, &dl->dl_tv)); if(probe->rx > 0) { /* * figure out the difference between the transmit timestamp recorded * with gettimeofday and the transmit timestamp reported by the * datalink; we use this value to adjust the rtt field. * * the difference calculated will almost certainly be negative. * the datalink timestamp will occur after the probe's user-space * timestamp, as the datalink timestamp represents when the probe was * seen 'on the wire' and not when sendto() was called. * * want a negative value so we can 'add' it to the hop's rtt value. */ diff = timeval_diff_usec(&probe->tx_tv, &dl->dl_tv); for(hop = trace->hops[probe->ttl-1]; hop != NULL; hop = hop->hop_next) { if(probe->id + 1 < hop->hop_probe_id) continue; if(probe->id + 1 > hop->hop_probe_id) break; /* datalink provided transmit timestamp */ hop->hop_flags |= SCAMPER_TRACE_HOP_FLAG_TS_DL_TX; timeval_add_usec(&hop->hop_rtt, diff); } } /* update the TX timestamp of the probe */ probe->flags |= TRACE_PROBE_FLAG_DL_TX; timeval_cpy(&probe->tx_tv, &dl->dl_tv); return; } /* * do_trace_handle_dl * * handle a datalink record that may have something useful for the * traceroute, such as a more accurate timestamp or a mac address of * the host that delivered the response to us. */ static int do_trace_handle_dl(scamper_task_t *task, scamper_dl_rec_t *dl) { static void (* const func[][2])(scamper_task_t *, scamper_dl_rec_t *, trace_probe_t *) = { { dl_trace_in, dl_trace_out }, /* MODE_TRACE == 0x00 */ { NULL, NULL}, /* MODE_CHECK_LIFE == 0x01 */ { NULL, NULL}, /* MODE_PMTUD_DEFAULT == 0x02 */ { NULL, NULL}, /* MODE_PMTUD_SILENT_L2 == 0x03 */ { NULL, NULL}, /* MODE_PMTUD_SILENT_TTL == 0x04 */ { NULL, NULL}, /* MODE_PMTUD_BADSUGG == 0x05 */ { NULL, NULL}, /* MODE_RTSOCK == 0x06 */ }; scamper_trace_t *trace = task->data; trace_state_t *state = task->state; trace_probe_t *probe; uint16_t probe_id; int direction; if(trace->type == SCAMPER_TRACE_TYPE_UDP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { if(dl->dl_ip_proto == IPPROTO_UDP) { if(dl->dl_udp_sport != trace->sport) return 0; direction = 1; if(trace->type == SCAMPER_TRACE_TYPE_UDP) { probe_id = dl->dl_udp_dport - trace->dport; } else { probe_id = ntohs(dl->dl_udp_sum) - 1; } } else if(SCAMPER_DL_IS_ICMP(dl)) { if(SCAMPER_DL_IS_ICMP_TTL_EXP(dl) == 0 && SCAMPER_DL_IS_ICMP_UNREACH(dl) == 0 && SCAMPER_DL_IS_ICMP_PACKET_TOO_BIG(dl) == 0) { return 0; } if(dl->dl_icmp_ip_proto != IPPROTO_UDP) return 0; if(dl->dl_icmp_udp_sport != trace->sport) return 0; direction = 0; if(trace->type == SCAMPER_TRACE_TYPE_UDP) { probe_id = dl->dl_icmp_udp_dport - trace->dport; } else { if(dl->dl_icmp_udp_dport != trace->dport) { return 0; } if(dl->dl_af == AF_INET) { if(dl->dl_icmp_udp_sum == dl->dl_icmp_ip_id && dl->dl_icmp_udp_sum != 0) { probe_id = ntohs(dl->dl_icmp_udp_sum) - 1; } else if(trace_ipid_fudge(state,dl->dl_icmp_ip_id, &probe_id) != 0) { return 0; } } else { assert(dl->dl_af == AF_INET6); if(dl->dl_icmp_udp_sum == 0) { if(dl->dl_icmp_ip_flow == 0) { return 0; } probe_id = dl->dl_icmp_ip_flow - 1; } else { probe_id = ntohs(dl->dl_icmp_udp_sum) - 1; } } } } else return 0; } else if(trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO || trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS) { if(SCAMPER_DL_IS_ICMP(dl) == 0) return 0; if(SCAMPER_DL_IS_ICMP_ECHO_REQUEST(dl)) { if(dl->dl_icmp_id != trace->sport) return 0; probe_id = dl->dl_icmp_seq; direction = 1; } else if(SCAMPER_DL_IS_ICMP_ECHO_REPLY(dl)) { if(dl->dl_icmp_id != trace->sport) return 0; probe_id = dl->dl_icmp_seq; direction = 0; } else if((SCAMPER_DL_IS_ICMP_TTL_EXP(dl) || SCAMPER_DL_IS_ICMP_UNREACH(dl) || SCAMPER_DL_IS_ICMP_PACKET_TOO_BIG(dl)) && SCAMPER_DL_IS_ICMP_PROTO_ICMP_ECHO_REQ(dl)) { if(dl->dl_icmp_icmp_id != trace->sport) return 0; probe_id = dl->dl_icmp_icmp_seq; direction = 0; } else return 0; } else if(trace->type == SCAMPER_TRACE_TYPE_TCP) { if(dl->dl_ip_proto == IPPROTO_TCP) { /* * if the syn flag (and only the syn flag is set) and the sport * and dport match what we probe with, then the probe is probably * an outgoing one. */ if((dl->dl_tcp_flags & TH_SYN) == TH_SYN && (dl->dl_tcp_flags & ~TH_SYN) == 0 && dl->dl_tcp_sport == trace->sport) { if(dl->dl_af == AF_INET) probe_id = dl->dl_ip_id - 1; else probe_id = dl->dl_ip_flow - 1; direction = 1; } else if(dl->dl_tcp_sport == trace->dport && dl->dl_tcp_dport == trace->sport) { probe_id = state->id_next - 1; direction = 0; } else return 0; } else if(SCAMPER_DL_IS_ICMP(dl)) { if(SCAMPER_DL_IS_ICMP_TTL_EXP(dl) == 0 && SCAMPER_DL_IS_ICMP_UNREACH(dl) == 0 && SCAMPER_DL_IS_ICMP_PACKET_TOO_BIG(dl) == 0) { return 0; } if(dl->dl_icmp_ip_proto != IPPROTO_TCP) return 0; if(dl->dl_icmp_tcp_sport != trace->sport) return 0; if(dl->dl_icmp_tcp_dport != trace->dport) return 0; /* determine which probe the ICMP response corresponds to */ if(dl->dl_af == AF_INET) { if(trace_ipid_fudge(state, dl->dl_icmp_ip_id, &probe_id) != 0) { return 0; } } else { if(dl->dl_icmp_ip_flow == 0) return 0; probe_id = dl->dl_icmp_ip_flow - 1; } direction = 0; } else return 0; } else return 0; /* make sure the probe id is something sane */ if(probe_id >= state->id_next) { return 0; } probe = state->probes[probe_id]; assert(probe->mode <= MODE_MAX); if(func[probe->mode][direction] != NULL) { func[probe->mode][direction](task, dl, probe); } return 0; } static int rt_framing(scamper_task_t *task, scamper_rt_rec_t *rt) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; scamper_dl_t *dl = scamper_fd_write_state(state->dl); int af, tx_type; /* * determine the header to append when sending packets on the datalink. * if the datalink code cannot send packets, then we can't do PMTUD */ tx_type = scamper_dl_tx_type(dl); /* * determine the size of the header to prepend to the packet to put it * on the datalink */ switch(tx_type) { case SCAMPER_DL_TX_UNSUPPORTED: return -1; case SCAMPER_DL_TX_ETHERNET: case SCAMPER_DL_TX_ETHLOOP: state->dl_size = 14; break; case SCAMPER_DL_TX_NULL: state->dl_size = sizeof(int); break; case SCAMPER_DL_TX_RAW: state->dl_size = 0; return 0; default: scamper_debug(__func__, "unhandled tx_type %d", tx_type); return -1; } /* allocate the header */ if((state->dl_hdr = malloc(state->dl_size)) == NULL) { printerror(errno, strerror, __func__, "could not malloc dl_hdr"); return -1; } if(tx_type == SCAMPER_DL_TX_ETHERNET) { /* * allocate a datalink header to use, and determine the source mac * address to use */ if(if_getmac(rt->ifindex, state->dl_hdr+6) == -1) { scamper_debug(__func__, "could not get source mac"); return -1; } /* * determine the destination mac address (the target). */ if(rt->gwaddr == NULL) { /* no gateway address means destination is on local network */ memcpy(state->dl_hdr, state->dl_hdr+6, 6); if(scamper_addr2mac_whohas(rt->ifindex, trace->src, trace->dst, state->dl_hdr) != 1) { scamper_debug(__func__, "could not get destination mac"); return -1; } } else if(rt->gwaddr->type == SCAMPER_ADDR_TYPE_ETHERNET) { /* the gateway mac address was provided by the route socket */ memcpy(state->dl_hdr, rt->gwaddr->addr, 6); } else { /* the gateway address was returned as an IP */ memcpy(state->dl_hdr, state->dl_hdr+6, 6); if(scamper_addr2mac_whohas(rt->ifindex, trace->src, rt->gwaddr, state->dl_hdr) != 1) { scamper_debug(__func__, "could not get gateway mac"); return -1; } } if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) { state->dl_hdr[12] = 0x08; state->dl_hdr[13] = 0x00; } else if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV6) { state->dl_hdr[12] = 0x86; state->dl_hdr[13] = 0xDD; } else return -1; } else if(tx_type == SCAMPER_DL_TX_NULL) { if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) { af = AF_INET; } else if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV6) { af = AF_INET6; } else return -1; memcpy(state->dl_hdr, &af, sizeof(int)); } else if(tx_type == SCAMPER_DL_TX_ETHLOOP) { memset(state->dl_hdr, 0, 12); if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) { state->dl_hdr[12] = 0x08; state->dl_hdr[13] = 0x00; } else if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV6) { state->dl_hdr[12] = 0x86; state->dl_hdr[13] = 0xDD; } else return -1; } else return -1; return 0; } static int do_trace_handle_rt(scamper_task_t *task, scamper_rt_rec_t *rt) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; if(state->mode != MODE_RTSOCK) { return 0; } /* if there was a problem getting the ifindex, handle that */ if(rt->error != 0 || rt->ifindex < 0) { printerror(errno, strerror, __func__, "could not get ifindex"); goto err; } /* * if scamper is supposed to get tx timestamps from the datalink, or * scamper needs the datalink to transmit packets, then try and get a * datalink on the ifindex specified. */ if((state->dl = scamper_fd_dl(rt->ifindex)) == NULL) { scamper_debug(__func__, "could not get dl for %d", rt->ifindex); goto err; } /* * if we're doing path MTU discovery debugging, or doing tcp traceroute, * or doing udp paris traceroute, determine the underlying framing to use * with each probe packet that will be sent on the datalink. */ if((trace->flags & SCAMPER_TRACE_FLAG_PMTUD) != 0 || trace->type == SCAMPER_TRACE_TYPE_TCP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { if(rt_framing(task, rt) != 0) { goto err; } } done: state->mode = MODE_TRACE; state->attempt = 0; scamper_queue_probe(task->queue); return 0; err: if(trace->type == SCAMPER_TRACE_TYPE_TCP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { goto harderr; } trace->flags &= ~(SCAMPER_TRACE_FLAG_PMTUD | SCAMPER_TRACE_FLAG_DL); goto done; harderr: trace_handleerror(task, errno); return -1; } static int do_trace_write(scamper_task_t *task) { scamper_outfile_t *outfile = scamper_source_getoutfile(task->source); scamper_file_t *sf = scamper_outfile_getfile(outfile); scamper_file_write_trace(sf, (scamper_trace_t *)task->data); return 0; } static void trace_state_free(trace_state_t *state) { trace_probe_t *probe; int i; /* free the probe records scamper kept */ if(state->probes != NULL) { for(i=0; iid_next; i++) { probe = state->probes[i]; if(probe->rx_mac != NULL) { scamper_addr_free(probe->rx_mac); } free(probe); } free(state->probes); } if(state->dl != NULL) scamper_fd_free(state->dl); if(state->icmp != NULL) scamper_fd_free(state->icmp); if(state->probe != NULL) scamper_fd_free(state->probe); if(state->route != NULL) scamper_fd_free(state->route); if(state->dl_hdr != NULL) free(state->dl_hdr); free(state); return; } static int trace_state_alloc(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state; int id_max; assert(trace != NULL); /* allocate struct to keep state while processing the trace */ if((state = malloc_zero(sizeof(trace_state_t))) == NULL) { goto err; } /* allocate memory to record hops */ state->alloc_hops = TRACE_ALLOC_HOPS; if(trace->firsthop >= state->alloc_hops) { if(state->alloc_hops + (uint16_t)trace->firsthop > 256) { state->alloc_hops = 256; } else { state->alloc_hops += trace->firsthop; } } if(scamper_trace_hops_alloc(trace, state->alloc_hops) == -1) { goto err; } /* allocate enough ids to probe each hop with max number of attempts */ id_max = (state->alloc_hops - trace->firsthop + 2) * trace->attempts; /* allocate enough space to store state for each probe */ if((state->probes = malloc(sizeof(trace_probe_t *) * id_max)) == NULL) { goto err; } /* if scamper has to get the ifindex, then start in the rtsock mode */ if((trace->flags & (SCAMPER_TRACE_FLAG_PMTUD|SCAMPER_TRACE_FLAG_DL)) != 0 || trace->type == SCAMPER_TRACE_TYPE_TCP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { state->mode = MODE_RTSOCK; if((state->route = scamper_fd_rtsock()) == NULL) { goto err; } } else { state->mode = MODE_TRACE; } state->dl = NULL; state->dl_hdr = NULL; state->dl_size = 0; state->alive = 0; state->ttl = trace->firsthop; state->attempt = 0; state->header_size = scamper_trace_probe_headerlen(trace); state->payload_size = trace->probe_size - state->header_size; state->id_next = 0; state->id_max = id_max; if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) { state->icmp = scamper_fd_icmp4(); } else if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV6) { state->icmp = scamper_fd_icmp6(); } else goto err; switch(trace->type) { case SCAMPER_TRACE_TYPE_TCP: if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) state->probe = scamper_fd_tcp4(scamper_sport_get()); else state->probe = scamper_fd_tcp6(scamper_sport_get()); break; case SCAMPER_TRACE_TYPE_ICMP_ECHO: case SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS: if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) state->probe = scamper_fd_icmp4(); else state->probe = scamper_fd_icmp6(); break; case SCAMPER_TRACE_TYPE_UDP: case SCAMPER_TRACE_TYPE_UDP_PARIS: if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) state->probe = scamper_fd_udp4(scamper_sport_get()); else state->probe = scamper_fd_udp6(scamper_sport_get()); break; } if(state->icmp == NULL || state->probe == NULL) { goto err; } task->state = state; return 0; err: if(state != NULL) trace_state_free(state); return -1; } static void do_trace_free(scamper_task_t *task) { scamper_trace_t *trace; trace_state_t *state; /* free any state kept */ if((state = task->state) != NULL) { trace_state_free(state); } /* free any trace data collected */ if((trace = task->data) != NULL) { scamper_trace_free(trace); } return; } /* * do_trace_probe * * time to probe, so send the packet. */ static int do_trace_probe(scamper_task_t *task) { scamper_trace_t *trace = task->data; trace_state_t *state = task->state; trace_probe_t *tp = NULL; scamper_probe_t probe; uint32_t sum; uint16_t u16, i; trace_probe_t **probes; uint8_t *buf; assert(trace != NULL); if(state != NULL) { assert(state->attempt < trace->attempts); assert(state->id_next <= state->id_max); assert(state->alloc_hops > 0); assert(state->alloc_hops <= 256); assert(state->ttl != 0); } else { /* timestamp when the trace began */ gettimeofday_wrap(&trace->start); /* determine the source address used for sending probes */ if((trace->src = scamper_getsrc(trace->dst)) == NULL) { trace_handleerror(task, errno); return -1; } /* allocate state and store it with the task */ if(trace_state_alloc(task) != 0) { trace_handleerror(task, errno); return -1; } state = task->state; } /* allocate some more space in the trace to store replies, if necessary */ if(state->alloc_hops == trace->hop_count) { /* * figure out exactly how many hops should be allocated in the * trace structure */ if(256 - state->alloc_hops <= TRACE_ALLOC_HOPS) { u16 = state->alloc_hops + TRACE_ALLOC_HOPS; } else { u16 = 256; } /* allocate the new hops */ if(scamper_trace_hops_alloc(trace, u16) != 0) { printerror(errno, strerror, __func__, "could not realloc hops"); trace_handleerror(task, errno); return -1; } /* initialise the new hops to have null pointers */ for(i=state->alloc_hops; ihops[i] = NULL; } state->alloc_hops = u16; } /* allocate some more space to store probes, if necessary */ if(state->id_next == state->id_max) { u16 = state->id_max + TRACE_ALLOC_HOPS; probes = realloc(state->probes, sizeof(trace_probe_t *) * u16); if(probes == NULL) { printerror(errno, strerror, __func__, "could not realloc"); trace_handleerror(task, errno); return -1; } state->id_max = u16; state->probes = probes; } if(state->mode == MODE_RTSOCK) { if(scamper_rtsock_getroute(state->route, trace->dst) != 0) { trace->flags &= ~(SCAMPER_TRACE_FLAG_PMTUD | SCAMPER_TRACE_FLAG_DL); state->mode = MODE_TRACE; state->attempt = 0; } else { state->attempt++; scamper_queue_wait(task->queue, trace->wait * 1000); return 0; } } /* allocate a larger global pktbuf if needed */ if(pktbuf_len < state->payload_size) { if((buf = realloc(pktbuf, state->payload_size)) == NULL) { printerror(errno, strerror, __func__, "could not realloc"); trace_handleerror(task, errno); return -1; } pktbuf = buf; pktbuf_len = state->payload_size; memset(pktbuf, 0, pktbuf_len); } probe.pr_ip_src = trace->src; probe.pr_ip_dst = trace->dst; probe.pr_ip_tos = trace->tos; probe.pr_ip_ttl = state->ttl; probe.pr_ip_id = 0; probe.pr_ip_flow = 0; probe.pr_data = pktbuf; probe.pr_len = state->payload_size; probe.pr_ipoptc = 0; probe.pr_ipopts = NULL; probe.pr_fd = scamper_fd_fd_get(state->probe); if((state->mode == MODE_PMTUD_DEFAULT || state->mode == MODE_PMTUD_SILENT_L2 || state->mode == MODE_PMTUD_SILENT_TTL || state->mode == MODE_PMTUD_BADSUGG || trace->type == SCAMPER_TRACE_TYPE_TCP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) && state->dl != NULL) { probe.pr_dl = scamper_fd_write_state(state->dl); probe.pr_dl_hdr = state->dl_hdr; probe.pr_dl_size = state->dl_size; } else { probe.pr_dl = NULL; probe.pr_dl_hdr = NULL; probe.pr_dl_size = 0; } if(trace->type == SCAMPER_TRACE_TYPE_UDP || trace->type == SCAMPER_TRACE_TYPE_UDP_PARIS) { probe.pr_ip_proto = IPPROTO_UDP; probe.pr_udp_sport = trace->sport; probe.pr_udp_dport = trace->dport; /* * traditional traceroute identifies probes by varying the UDP * destination port number. UDP-based paris traceroute identifies * probes by varying the UDP checksum -- accomplished by manipulating * the payload of the packet to get sequential values for the checksum */ if(trace->type == SCAMPER_TRACE_TYPE_UDP) { probe.pr_udp_dport += state->id_next; } else { /* * hack the checksum to be our id field by setting the checksum * id we want into the packet's body, then calculate the checksum * across the packet, and then set the packet's body to be the * value returned for the checksum. this effectively swaps two * 16 bit quantities in the packet */ u16 = htons(state->id_next + 1); memcpy(probe.pr_data, &u16, 2); if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) { /* * while the paris traceroute paper says that the payload of the * packet is set so that the checksum field can be used to * identify a returned probe, the paris traceroute code uses the * IP ID field. * this is presumably because FreeBSD systems seem to reset the * UDP checksum quoted in ICMP destination unreachable messages. * scamper's paris traceroute implementation used both IP ID and * UDP checksum. */ probe.pr_ip_id = state->id_next + 1; u16 = scamper_udp4_cksum(&probe); } else { /* * set the flow ID in case checksum is mangled like the IPv4 * UDP checksum seems to be on some systems */ probe.pr_ip_flow = state->id_next + 1; u16 = scamper_udp6_cksum(&probe); } memcpy(probe.pr_data, &u16, 2); } } else if(trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO || trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS) { switch(trace->dst->type) { case SCAMPER_ADDR_TYPE_IPV4: probe.pr_ip_proto = IPPROTO_ICMP; probe.pr_icmp_type = ICMP_ECHO; break; case SCAMPER_ADDR_TYPE_IPV6: probe.pr_ip_proto = IPPROTO_ICMPV6; probe.pr_icmp_type = ICMP6_ECHO_REQUEST; break; } probe.pr_icmp_code = 0; probe.pr_icmp_id = trace->sport; probe.pr_icmp_seq = state->id_next; /* * ICMP-based paris traceroute tries to ensure the same path is taken * through a load balancer by sending all probes with a constant value * for the checksum. manipulate the payload so this happens. * the value chosen to seed the checksum is the trace->sport value, but * it could really be anything. */ if(trace->type == SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS) { sum = trace->sport; sum += htons(((probe.pr_icmp_type << 8) | probe.pr_icmp_code)); sum += htons(probe.pr_icmp_id); sum += htons(probe.pr_icmp_seq); sum = (sum >> 16) + (sum & 0xffff); sum += (sum >> 16); u16 = ~sum; memcpy(probe.pr_data, &u16, 2); } } else if(trace->type == SCAMPER_TRACE_TYPE_TCP) { probe.pr_ip_proto = IPPROTO_TCP; probe.pr_tcp_sport = trace->sport; probe.pr_tcp_dport = trace->dport; probe.pr_tcp_seq = 0; probe.pr_tcp_ack = 0; probe.pr_tcp_flags = TH_SYN; probe.pr_tcp_win = 0; if(trace->dst->type == SCAMPER_ADDR_TYPE_IPV4) probe.pr_ip_id = state->id_next + 1; else probe.pr_ip_flow = state->id_next + 1; } else return -1; /* * allocate a trace probe state record before we try and send the probe * as there is no point sending something into the wild that we can't * record */ if((tp = malloc_zero(sizeof(trace_probe_t))) == NULL) { trace_handleerror(task, errno); return -1; } /* send the probe */ if(scamper_probe(&probe) == -1) { free(tp); trace_handleerror(task, probe.pr_errno); return -1; } timeval_cpy(&tp->tx_tv, &probe.pr_tx); tp->ttl = probe.pr_ip_ttl; tp->size = probe.pr_len + state->header_size; tp->mode = state->mode; tp->id = state->attempt; state->probes[state->id_next] = tp; state->id_next++; state->attempt++; scamper_queue_wait(task->queue, trace->wait * 1000); return 0; } /* * scamper_do_trace_alloc * * given a string representing a traceroute task, parse the parameters and * assemble a trace. return the trace structure so that it is all ready to * go. */ scamper_trace_t *scamper_do_trace_alloc(char *str) { /* default values of various trace parameters */ uint8_t type = SCAMPER_TRACE_TYPE_UDP; uint8_t flags = 0; uint8_t attempts = SCAMPER_DO_TRACE_ATTEMPTS_DEF; uint8_t firsthop = SCAMPER_DO_TRACE_FIRSTHOP_DEF; uint8_t gaplimit = SCAMPER_DO_TRACE_GAPLIMIT_DEF; uint8_t hoplimit = SCAMPER_DO_TRACE_HOPLIMIT_DEF; uint8_t tos = SCAMPER_DO_TRACE_TOS_DEF; uint8_t wait = SCAMPER_DO_TRACE_WAIT_DEF; uint8_t loops = SCAMPER_DO_TRACE_LOOPS_DEF; uint16_t sport = scamper_sport_get(); uint16_t dport = SCAMPER_DO_TRACE_DPORT_DEF; scamper_option_out_t *opts_out = NULL, *opt; scamper_trace_t *trace = NULL; char *addr; long tmp; /* try and parse the string passed in */ if(scamper_options_parse(str, trace_opts_in, trace_opts_cnt, &opts_out, &addr) != 0) { goto err; } /* if there is no IP address after the options string, then stop now */ if(addr == NULL) { goto err; } /* parse the options, do preliminary sanity checks */ for(opt = opts_out; opt != NULL; opt = opt->next) { switch(opt->id) { case TRACE_OPT_DPORT: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_DPORT_MIN || tmp > SCAMPER_DO_TRACE_DPORT_MAX) { goto err; } dport = tmp; break; case TRACE_OPT_FIRSTHOP: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_FIRSTHOP_MIN || tmp > SCAMPER_DO_TRACE_FIRSTHOP_MAX) { goto err; } firsthop = tmp; break; case TRACE_OPT_GAPLIMIT: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_GAPLIMIT_MIN || tmp > SCAMPER_DO_TRACE_GAPLIMIT_MAX) { goto err; } gaplimit = tmp; break; case TRACE_OPT_GAPCONT: flags |= SCAMPER_TRACE_FLAG_GAPCONT; break; case TRACE_OPT_LOOPS: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_LOOPS_MIN || tmp > SCAMPER_DO_TRACE_LOOPS_MAX) { goto err; } loops = tmp; break; case TRACE_OPT_MAXTTL: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_HOPLIMIT_MIN || tmp > SCAMPER_DO_TRACE_HOPLIMIT_MAX) { goto err; } hoplimit = tmp; break; case TRACE_OPT_PMTUD: flags |= SCAMPER_TRACE_FLAG_PMTUD; break; case TRACE_OPT_PROTOCOL: if(strcasecmp(opt->str, "UDP") == 0) type = SCAMPER_TRACE_TYPE_UDP; else if(strcasecmp(opt->str, "TCP") == 0) type = SCAMPER_TRACE_TYPE_TCP; else if(strcasecmp(opt->str, "ICMP") == 0) type = SCAMPER_TRACE_TYPE_ICMP_ECHO; else if(strcasecmp(opt->str, "ICMP-paris") == 0) type = SCAMPER_TRACE_TYPE_ICMP_ECHO_PARIS; else if(strcasecmp(opt->str, "UDP-paris") == 0) type = SCAMPER_TRACE_TYPE_UDP_PARIS; else goto err; break; case TRACE_OPT_ATTEMPTS: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_ATTEMPTS_MIN || tmp > SCAMPER_DO_TRACE_ATTEMPTS_MAX) { goto err; } attempts = tmp; break; case TRACE_OPT_ALLATTEMPTS: flags |= SCAMPER_TRACE_FLAG_ALLATTEMPTS; break; case TRACE_OPT_SPORT: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_SPORT_MIN || tmp > SCAMPER_DO_TRACE_SPORT_MAX) { goto err; } sport = tmp; break; case TRACE_OPT_TOS: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_TOS_MIN || tmp > SCAMPER_DO_TRACE_TOS_MAX) { goto err; } tos = tmp; break; case TRACE_OPT_WAIT: if(string_tolong(opt->str, &tmp) == -1 || tmp < SCAMPER_DO_TRACE_WAIT_MIN || tmp > SCAMPER_DO_TRACE_WAIT_MAX) { goto err; } wait = tmp; break; } } scamper_options_free(opts_out); opts_out = NULL; /* sanity check that we don't begin beyond our probe hoplimit */ if(firsthop > hoplimit && hoplimit != 0) { goto err; } /* can't really do pmtud properly without all of the path */ if(firsthop > 1 && (flags & SCAMPER_TRACE_FLAG_PMTUD)) { goto err; } /* can't really do pmtud properly without a UDP traceroute method */ if((flags & SCAMPER_TRACE_FLAG_PMTUD) == 1 && type != SCAMPER_TRACE_TYPE_UDP && type != SCAMPER_TRACE_TYPE_UDP_PARIS) { goto err; } if((trace = scamper_trace_alloc()) == NULL) { goto err; } if((trace->dst= scamper_addrcache_resolve(addrcache,AF_UNSPEC,addr)) == NULL) { goto err; } trace->type = type; trace->flags = flags; trace->attempts = attempts; trace->hoplimit = hoplimit; trace->gaplimit = gaplimit; trace->firsthop = firsthop; trace->tos = tos; trace->wait = wait; trace->loops = loops; trace->sport = sport; trace->dport = dport; switch(trace->dst->type) { case SCAMPER_ADDR_TYPE_IPV4: if(trace->type == SCAMPER_TRACE_TYPE_TCP) { trace->probe_size = 40; } else { trace->probe_size = 44; } break; case SCAMPER_ADDR_TYPE_IPV6: trace->probe_size = 60; break; default: goto err; } if(scamper_option_dl() != 0) { trace->flags |= SCAMPER_TRACE_FLAG_DL; } return trace; err: if(trace != NULL) scamper_trace_free(trace); if(opts_out != NULL) scamper_options_free(opts_out); return NULL; } scamper_task_t *scamper_do_trace_alloctask(scamper_trace_t *trace, scamper_list_t *list, scamper_cycle_t *cycle) { scamper_task_t *task; /* associate the list and cycle with the trace */ trace->list = scamper_list_use(list); trace->cycle = scamper_cycle_use(cycle); /* allocate the task structure and store the trace with it */ if((task = scamper_task_alloc(trace->dst, &trace_funcs)) != NULL) { task->data = trace; return task; } return NULL; } void scamper_do_trace_cleanup() { if(pktbuf != NULL) { free(pktbuf); pktbuf = NULL; } if(if_sock != -1) { close(if_sock); if_sock = -1; } return; } int scamper_do_trace_init() { if((if_sock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) { return -1; } trace_funcs.probe = do_trace_probe; trace_funcs.handle_icmp = do_trace_handle_icmp; trace_funcs.handle_dl = do_trace_handle_dl; trace_funcs.handle_rt = do_trace_handle_rt; trace_funcs.handle_timeout = do_trace_handle_timeout; trace_funcs.write = do_trace_write; trace_funcs.task_free = do_trace_free; return 0; }