/* Distributed Checksum Clearinghouse * * server database functions * * Copyright (c) 2006 by Rhyolite Software, LLC * * This agreement is not applicable to any entity which sells anti-spam * solutions to others or provides an anti-spam solution as part of a * security solution sold to other entities, or to a private network * which employs the DCC or uses data provided by operation of the DCC * but does not provide corresponding data to other users. * * Permission to use, copy, modify, and distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * Parties not eligible to receive a license under this agreement can * obtain a commercial license to use DCC and permission to use * U.S. Patent 6,330,590 by contacting Commtouch at http://www.commtouch.com/ * or by email to nospam@commtouch.com. * * A commercial license would be for Distributed Checksum and Reputation * Clearinghouse software. That software includes additional features. This * free license for Distributed ChecksumClearinghouse Software does not in any * way grant permision to use Distributed Checksum and Reputation Clearinghouse * software * * THE SOFTWARE IS PROVIDED "AS IS" AND RHYOLITE SOFTWARE, LLC DISCLAIMS ALL * WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL RHYOLITE SOFTWARE, LLC * BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES * OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS * SOFTWARE. * * Rhyolite Software DCC 1.3.50-1.159 $Revision$ */ #include "srvr_defs.h" #include #if defined(HAVE_HW_PHYSMEM) || defined(HAVE_BOOTTIME) #include #endif #ifdef HAVE_PSTAT_GETSTATIC /* HP-UX */ #include #endif #define PSTATIC static /* #define PSTATIC */ /* for profiling */ DB_STATS db_stats; DB_STATES db_sts; DCC_PATH db_path_buf; int db_fd = -1; DCC_PATH db_nm; int db_hash_fd = -1; DCC_PATH db_hash_nm; struct timeval db_locked; /* 1=database not locked */ struct timeval db_time; int db_debug; u_char grey_on; static u_char db_use_write; static u_char db_dirty; static u_char db_extended; static u_char db_rdonly; int db_failed_line; /* bad happened at this line # */ const char *db_failed_file; /* in this file */ static u_char db_invalidate; /* do not write to the files */ #define DB_FAILED() (db_failed_line = __LINE__, db_failed_file = __FILE__) /* Without mmap(MAP_NOSYNC) as on Solaris or a good msync() as on BSD/OS, * we must rely on the kernel's update/syncer/bufdaemon/etc. * Use MAP_NOSYNC if we can because some systems flush too quickly * while others such as FreeBSD 6.1 stall for seconds * while thinking about flushing the database */ #if defined(MAP_NOSYNC) && !defined(HAVE_OLD_MSYNC) #define USE_MAP_NOSYNC #else #undef USE_MAP_NOSYNC #endif #ifndef DB_NO_SYNC #if defined(HAVE_BOOTTIME) || !defined(USE_MAP_NOSYNC) #define DB_NO_SYNC 1 #else #define DB_NO_SYNC 0 #endif #endif static u_char db_no_sync; /* do not need to synchronize */ static u_char db_not_synced; /* not synchronized */ u_char db_minimum_map; /* dbclean is running */ int db_buf_total; /* total # of db buffers */ DB_PTR db_max_rss; /* maximum db resident set size */ /* use DB_PTR instead of off_t because off_t is often only 32-bits */ static u_int system_pagesize; /* kernel page size */ #define DB_HASH_TOTAL DB_BUF_MAX static DB_BUF *db_buf_hash[DB_HASH_TOTAL]; static DB_BUF db_bufs[DB_HASH_TOTAL]; /* control mmap()'ed blocks */ static DB_BUF *buf_oldest, *buf_newest; time_t db_need_flush; static time_t db_need_urgent_flush; #define B2PATH(b) ((b)->type == DB_BUF_TYPE_DB ? db_nm : db_hash_nm) #define DB_BUF_HASH(pg_num) (&db_buf_hash[(pg_num) % DIM(db_buf_hash)]) static const DB_VERSION_BUF version_buf = DB_VERSION_STR; DB_PARMS db_parms; static DB_PARMS db_parms_stored; u_int db_pagesize; /* size of 1 mmap()'ed buffer */ static u_int db_pagesize_part; static off_t hash_fsize; DB_HADDR db_hash_len; /* # of hash table entries */ DB_HADDR db_hash_used; /* # of hash table entries in use */ u_int db_hash_page_len; /* # of HASH_ENTRY's per buffer */ DB_HADDR db_max_hash_entries = 0; /* after db_buf_init()*/ static off_t db_fsize; /* size of database file */ DB_PTR db_csize; /* size of database contents in bytes */ static DB_PTR db_csize_stored_hash; /* DB size stored in hash file */ static DB_HADDR db_hash_used_stored_hash; u_int db_page_max; /* only padding after this in DB buf */ static DB_PTR db_window_size; /* size of mmap() window */ char db_window_size_str[32]; static const u_char dcc_ck_fuzziness[DCC_DIM_CKS] = { 0, /* DCC_CK_INVALID */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_IP */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ DCC_CK_FUZ_LVL1, /* DCC_CK_FUZ1 */ DCC_CK_FUZ_LVL2, /* DCC_CK_FUZ2 */ DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_TOTAL */ DCC_CK_FUZ_LVL_REP, /* DCC_CK_REP_BULK */ DCC_CK_FUZ_LVL2, /* DCC_CK_SRVR_ID */ DCC_CK_FUZ_LVL2 /* DCC_CK_ENV_TO */ }; static const u_char grey_ck_fuzziness[DCC_DIM_CKS] = { 0, /* DCC_CK_INVALID */ DCC_CK_FUZ_LVL2, /* DCC_CK_IP */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_ENV_FROM */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_FROM */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_SUB */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_MESSAGE_ID */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_RECEIVED */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_BODY */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ1 */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_FUZ2 */ DCC_CK_FUZ_LVL_NO, /* DCC_CK_GREY_MSG */ DCC_CK_FUZ_LVL1, /* DCC_CK_GREY_TRIPLE */ DCC_CK_FUZ_LVL1, /* DCC_CK_SRVR_ID */ DCC_CK_FUZ_LVL1 /* DCC_CK_ENV_TO */ }; const u_char *db_ck_fuzziness = dcc_ck_fuzziness; PSTATIC u_char buf_flush(DCC_EMSG, DB_BUF *, u_int); PSTATIC u_char buf_munmap(DCC_EMSG, DB_BUF *); PSTATIC u_char buf_mmap(DCC_EMSG, DB_BUF *, DB_PG_NUM); PSTATIC DB_BUF *find_buf(DCC_EMSG, DB_BUF_TYPE, DB_PG_NUM); PSTATIC u_char map_hash(DCC_EMSG, DB_HADDR, DB_STATE *); PSTATIC u_char map_hash_ctl(DCC_EMSG); PSTATIC u_char map_db(DCC_EMSG, DB_PTR, u_int, DB_STATE *); PSTATIC void rel_db_states(u_char); PSTATIC u_char db_set_sizes(DCC_EMSG); /* compute the least common multiple of two numbers */ static u_int lcm(u_int n, u_int m) { u_int r, x, gcd; /* first get the gcd of the two numbers */ if (n >= m) { x = n; gcd = m; } else { x = m; gcd = n; } for (;;) { r = x % gcd; if (r == 0) return n * (m / gcd); x = gcd; gcd = r; } } double /* hashes or bytes/second */ db_add_rate(const DB_PARMS *parms, u_char hash_or_db) /* 1=hash */ { struct timeval sn; time_t new_rate_secs; time_t total_secs; double added, cur, prev; total_secs = parms->rate_secs; if (hash_or_db) { added = parms->hash_added; cur = parms->hash_used; prev = parms->old_hash_used; } else { added = parms->db_added; cur = parms->db_csize; prev = parms->old_db_csize; } if (total_secs <= 0 || total_secs > DB_MAX_RATE_SECS || added <= 0.0) { added = 0.0; total_secs = 0; } dcc_ts2timeval(&sn, parms->sn); new_rate_secs = parms->last_rate_sec - sn.tv_sec; if (new_rate_secs > 0 && cur > prev) { total_secs += new_rate_secs; added += cur - prev; } if (total_secs <= 0) return -1.0; return added / total_secs; } DB_NOKEEP_CKS def_nokeep_cks(void) { DCC_CK_TYPES type; DB_NOKEEP_CKS nokeep = 0; for (type = DCC_CK_TYPE_FIRST; type <= DCC_CK_TYPE_LAST; ++type) { if (DB_GLOBAL_NOKEEP(grey_on, type)) DB_SET_NOKEEP(nokeep, type); } DB_SET_NOKEEP(nokeep, DCC_CK_INVALID); DB_SET_NOKEEP(nokeep, DCC_CK_FLOD_PATH); return nokeep; } /* At least in BSD/OS, mmap() cannot extend a file */ u_char db_extend(DCC_EMSG emsg, int fd, const char *nm, DB_PTR new_size, DB_PTR old_size, int block_size) { static u_char *zeros; static int zeros_len; int len, i; if (new_size > DB_PTR_MAX) { dcc_pemsg(EX_SOFTWARE, emsg, "invalid new size "L_HPAT" for %s", new_size, nm); return 0; } if (new_size <= old_size) { dcc_pemsg(EX_SOFTWARE, emsg, "new_size "L_HPAT" <= old_size "L_HPAT " in db_extend(%s)", new_size, old_size, nm); return 0; } /* Use write() because FreeBSD documentation cautions against mmap() on * files with holes. */ if (old_size != (DB_PTR)lseek(fd, old_size, SEEK_SET)) { dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"L_HPAT"): %s", nm, old_size, ERROR_STR()); return 0; } if (zeros_len != block_size) { if (zeros) free(zeros); zeros_len = block_size; zeros = malloc(zeros_len); memset(zeros, 0, zeros_len); } for (;;) { len = new_size - old_size; if (len > zeros_len) len = zeros_len; else if (len <= 0) return 1; old_size += len; i = write(fd, zeros, len); if (i != len) { dcc_pemsg(EX_IOERR, emsg, "extend write(%s,%d)=%d: %s", nm, len, i, ERROR_STR()); return 0; } } } /* release all unneeded buffers */ u_char /* 0=problem 1=did nothing 2=did >=1 */ db_unload(DCC_EMSG emsg, u_char some) /* 0=all unlocked, 1=only one */ { DB_BUF *b; u_char result; result = 1; for (b = buf_oldest; b != 0; b = b->newer) { if (b->type == DB_BUF_TYPE_FREE || b->lock_cnt != 0) continue; if (!buf_munmap(emsg, b)) { emsg = 0; result = 0; } if (some) return result*2; } return result*2; } PSTATIC u_char buf_flush(DCC_EMSG emsg, DB_BUF *b, u_int part) /* DB_BUF_NUM_PARTS=buffer */ { int flush_len; char *flush_base; DB_BUF_FM bit; bit = PART2BIT(part) & (b->flush | b->flush_urgent); if (!bit) return 1; b->flush &= ~bit; b->flush_urgent &= ~bit; if (db_invalidate || db_rdonly) return 1; flush_base = b->ranges[part].lo; flush_len = b->ranges[part].hi - flush_base; if (b->flags & DB_BUF_FG_USE_WRITE) { static char *wbuf; static u_int wbuf_len; off_t offset; int fd, i; /* In at least FreeBSD you cannot write() to the file * that underlies a mmap() region from that region */ if (wbuf_len != db_pagesize) { /* the page size for the current file * might be different from the old file */ if (wbuf) free(wbuf); wbuf_len = db_pagesize; wbuf = malloc(db_pagesize); } memcpy(wbuf, flush_base, flush_len); offset = (off_t)b->pg_num * (off_t)db_pagesize; offset += flush_base - b->buf.c; fd = (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd; if (offset != lseek(fd, offset, SEEK_SET)) { dcc_pemsg(EX_IOERR, emsg, "lseek(%s,"OFF_HPAT"): %s", B2PATH(b), offset, ERROR_STR()); DB_FAILED(); return 0; } i = write(fd, wbuf, flush_len); if (i != flush_len) { dcc_pemsg(EX_IOERR, emsg, "buf_flush write(%s,%u)=%d: %s", B2PATH(b), flush_len, i, ERROR_STR()); DB_FAILED(); return 0; } #ifndef HAVE_OLD_MSYNC } else { if (0 > MSYNC(flush_base, flush_len, MS_ASYNC)) { dcc_pemsg(EX_IOERR, emsg, "msync(db buffer %s,%#lx,%#x): %s", B2PATH(b), (long)flush_base, flush_len, ERROR_STR()); DB_FAILED(); return 0; } #endif } return 1; } /* Try to keep the data clean so that the fsync() required by Solaris * when the file is unloaded is not too expensive. * Try to flush frequently so that we don't stall as long in msync(). */ void db_flush_needed(void) { static DB_BUF *next_b = db_bufs; static u_int next_part; DB_BUF *b; u_int part, all_parts; int buf_num; u_char worked; if (db_need_urgent_flush != 0 && DB_IS_TIME(db_need_urgent_flush, DB_URGENT_FLUSH_SECS)) { /* flush the newest first so that it will be in and out of the * disk queue as quickly as possible to minimize the chances of * stalling on the next work */ worked = 0; for (b = buf_newest; b; b = b->older) { if (!b->flush_urgent || b->type == DB_BUF_TYPE_FREE) continue; for (part = 0; part < DB_BUF_NUM_PARTS; ++part) { if ((b->flush_urgent & PART2BIT(part))) { buf_flush(0, b, part); worked = 1; if (!b->flush_urgent) break; } } } /* Keep the clock running if we did any work. This tends to * avoid stalls caused by colliding with the FreeBSD syncer */ if (worked) db_need_urgent_flush = (db_time.tv_sec + DB_URGENT_FLUSH_SECS); else db_need_urgent_flush = 0; } b = next_b; part = next_part; all_parts = DB_PARTS_PER_FLUSH; for (buf_num = DIM(db_bufs); buf_num >= 0; --buf_num) { if (b > LAST(db_bufs)) { part = 0; b = db_bufs; } if (!b->flush || part >= DB_BUF_NUM_PARTS || b->type == DB_BUF_TYPE_FREE) { part = 0; ++b; continue; } while (part < DB_BUF_NUM_PARTS) { if (b->flush & PART2BIT(part)) { buf_flush(0, b, part); if (--all_parts == 0) { next_part = part+1; next_b = b; db_need_flush = (db_time.tv_sec + DB_NEED_FLUSH_SECS); return; } if (!b->flush) part = DB_BUF_NUM_PARTS; } ++part; } } db_need_flush = db_need_urgent_flush; } /* mark part of a buffer dirty */ void db_set_flush(DB_STATE *st, u_char urgent, u_int len) { DB_BUF *b; DB_BUF_FM bit, bits; char *part_end, *start, *end; int part, i; /* nothing to do if the kernel is handling it * or if we are letting the hash table be bad after a system shutdown */ b = st->b; if (!urgent && db_no_sync && !(b->flags & DB_BUF_FG_USE_WRITE)) return; start = st->d.c; part_end = b->buf.c; /* Increase to an even number of pages in the hope that the * file system might be able to page-flip. This might at least * avoid reading into the buffer cache to honor a write(). * This also might cover a few missing settings of buffer dirty bits. * Besides, Solaris' msync() handles only even pages. */ i = (start - part_end) % system_pagesize; start -= i; len += i; i = (len % system_pagesize); if (i != 0) len += system_pagesize - i; end = start + len; if (end > part_end+db_pagesize) dcc_logbad(EX_SOFTWARE, "inflated dirty buffer size"); part = (start - part_end) / db_pagesize_part; part_end += part * db_pagesize_part; bit = PART2BIT(part); bits = 0; do { if (!(b->flush & bit) || b->ranges[part].lo > start) b->ranges[part].lo = start; part_end += db_pagesize_part; if (part_end > end) part_end = end; if (!(b->flush & bit) || b->ranges[part].hi < part_end) b->ranges[part].hi = part_end; bits |= bit; start = part_end; bit <<= 1; ++part; } while (part_end < end); if (urgent) { b->flush_urgent |= bits; if (!db_need_urgent_flush) { db_need_urgent_flush = (db_time.tv_sec + DB_URGENT_FLUSH_SECS); if (db_need_flush == 0) db_need_flush = db_need_urgent_flush; } } else { b->flush |= bits; if (db_need_flush == 0 || db_need_flush > db_time.tv_sec + DB_NEED_FLUSH_SECS) db_need_flush = db_time.tv_sec + DB_NEED_FLUSH_SECS; } } static void db_rel_state(DB_STATE *st) { DB_BUF *b; if ((b = st->b) != 0) { st->b = 0; st->d.v = 0; st->s.rptr = DB_PTR_BAD; if (--b->lock_cnt < 0) dcc_logbad(EX_SOFTWARE, "negative database buffer lock"); } } PSTATIC void rel_db_states(u_char not_hash_ctl) { DB_STATE *st; for (st = &db_sts.rcd; st < &db_sts.hash_ctl; ++st) db_rel_state(st); /* release the buffer with the dirty flag only if allowed */ if (!not_hash_ctl) db_rel_state(st); } /* Shut down the database, including flushing and releasing all * mmap()'ed buffers * Do nothing to the files for mode=-1 because the file is new and garbage * or the caller is a fork of the server shedding memory. */ u_char db_close(int mode) /* -1=invalidate, 0=dirty, 1=clean */ { u_char result; if (mode >= 0) { /* flush the data and then release and flush the dirty flags */ result = make_clean(mode != 0); if (!db_unload(0,0)) result = 0; } else { db_invalidate = 1; rel_db_states(0); result = (db_unload(0, 0) > 0); } /* Close the hash table first because the server is often * waiting for the lock on the main file held by dbclean. * Destroy the hash table if it is bad */ if (db_hash_fd >= 0) { if (0 > close(db_hash_fd)) { dcc_pemsg(EX_IOERR, 0, "close(%s): %s", db_hash_nm, ERROR_STR()); result = 0; } db_hash_fd = -1; } if (db_fd >= 0) { if (0 > close(db_fd)) { dcc_pemsg(EX_IOERR, 0, "close(%s): %s", db_nm, ERROR_STR()); result = 0; } db_fd = -1; } db_locked.tv_sec = 0; return result; } /* Delete the hash table if the system is being rebooted and we * don't trust the file system to get all of the hash table. This might * make system shut down faster */ void db_system_stopping(void) { if (db_hash_fd < 0 || !DB_IS_LOCKED() || !db_not_synced || db_hash_nm[0] == '\0') return; if (0 > unlink(db_hash_nm) && errno != ENOENT) dcc_error_msg("unlink(%s): %s", db_hash_nm, ERROR_STR()); } /* This locking does only multiple-readers/single-writer */ int /* -1=failed, 0=was not locked, 1=was */ db_lock(void) { struct stat sb; if (DB_IS_LOCKED()) return 1; if (!dcc_exlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm)) return -1; if (0 > fstat(db_fd, &sb)) { dcc_error_msg("stat(%s): %s", db_nm, ERROR_STR()); return -1; } if (db_fsize != sb.st_size) { if (db_fsize > sb.st_size || !db_rdonly) { dcc_error_msg("%s size changed from " OFF_HPAT" to "OFF_HPAT, db_nm, db_fsize, sb.st_size); return -1; } db_fsize = sb.st_size; } gettimeofday(&db_locked, 0); return 0; } /* flush buffers to make the disk reasonably correct but not perfect * This does not compensage for a lack of coherent mmap() in the * system. It leaves the disk only as accurate as implied by * db_not_synced */ PSTATIC u_char make_clean_flush(void) { DB_BUF *b; u_int part; DB_BUF_FM bits; u_char result; result = 1; for (b = buf_oldest; b != 0; b = b->newer) { if (b->type == DB_BUF_TYPE_FREE || b->lock_cnt != 0) continue; if ((b->flush == 0 || !(b->flags & DB_BUF_FG_USE_WRITE)) && b->flush_urgent == 0) continue; bits = b->flush_urgent | b->flush; for (part = 0; part < DB_BUF_NUM_PARTS; ++part) { if (bits & PART2BIT(part)) { bits &= ~PART2BIT(part); if (!buf_flush(0, b, part)) result = 0; } } } return result; } /* push all of our database changes to the disk and try to clear the dirty bit * do not necessarily unmap anything */ u_char make_clean(u_char clean) /* 0=leave hash marked dirty, */ { /* 1=marked clean, 2=fsync */ u_char need_db_fsync, result; struct stat sb; /* simply unmap the buffers if they are clean * and do not need to marked cleaner */ if (!db_dirty && (db_rdonly || !db_not_synced || clean < 2)) { rel_db_states(0); return 1; } /* quit if we are giving up */ if (db_invalidate) { rel_db_states(0); return 1; } result = 1; if (db_failed_line || db_fd < 0 || db_hash_fd < 0) clean = 0; /* send any changes to the disk * but keep the database-dirty flags in RAM */ if (clean && !map_hash_ctl(0)) { clean = 0; result = 0; } rel_db_states(1); if (!make_clean_flush()) { clean = 0; result = 0; } need_db_fsync = (!db_no_sync || (db_not_synced && clean == 2)); if (db_extended) { /* Send the meta-data to disk so that other processes * such as dbclean can find the new length of the file * on Solaris. Otherwise the file looks broken because * its contained data length can be larger than its * apparent size on Solaris. Note that Solaris lacks * mmap(MAP_NOSYNC). */ if (db_fd >= 0) { if (0 > stat(db_nm, &sb)) { dcc_error_msg("make_clean stat(%s): %s", db_nm, ERROR_STR()); need_db_fsync = 1; } else if (db_fsize != sb.st_size) { need_db_fsync = 1; } } db_extended = 0; } if (need_db_fsync && 0 > fsync(db_fd)) { dcc_error_msg("make_clean fsync(%s): %s", db_nm, ERROR_STR()); clean = 0; result = 0; } if (db_not_synced && clean == 2) { if (0 > fsync(db_hash_fd)) { dcc_error_msg("make_clean fsync(%s): %s", db_hash_nm, ERROR_STR()); clean = 0; result = 0; } else { db_not_synced = 0; db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_NOSYNC; db_set_flush(&db_sts.hash_ctl, 0, sizeof(HASH_CTL)); } } /* Clean the dirty flag in the hash table. * With luck, this will reach the disk after everything else. */ if (clean && !(db_sts.hash_ctl.d.vals->s.flags & HASH_CTL_FG_CLEAN)) { db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_CLEAN; db_set_flush(&db_sts.hash_ctl, 0, sizeof(HASH_CTL)); } /* finally flush the flag in the hash table */ rel_db_states(0); if (!make_clean_flush()) result = 0; if (clean) db_dirty = 0; return result; } /* mark the hash file and so the database dirty */ static u_char db_make_dirty(DCC_EMSG emsg) { if (db_dirty) return 1; if (!DB_IS_LOCKED()) { dcc_pemsg(EX_SOFTWARE, emsg, "dirtying unlocked database"); return 0; } if (db_rdonly) dcc_logbad(EX_SOFTWARE, "dirtying read-only database"); if (!map_hash_ctl(emsg)) return 0; db_sts.hash_ctl.d.vals->s.flags &= ~HASH_CTL_FG_CLEAN; #ifdef USE_MAP_NOSYNC /* if we are not using msync(), * assume the hash table will never be safely synchronized */ if (db_no_sync) { db_sts.hash_ctl.d.vals->s.flags |= HASH_CTL_FG_NOSYNC; db_not_synced = 1; } #endif db_set_flush(&db_sts.hash_ctl, 1, sizeof(HASH_CTL)); if (!buf_flush(emsg, db_sts.hash_ctl.b, 0)) return 0; db_dirty = 1; return 1; } /* (start to) unlock the database */ u_char /* 0=failed, 1=at least started */ db_unlock(void) { int result; if (!DB_IS_LOCKED()) return 1; /* clear the dirty bit in the database because we may not * be able to lock the database later to clear the dirty bit */ result = make_clean(1); if (!dcc_unlock_fd(0, db_fd, DCC_LOCK_ALL_FILE, "", db_nm)) result = 0; db_locked.tv_sec = 0; return result; } #if defined(RLIMIT_AS) || defined(RLIMIT_RSS) || defined(RLIMIT_FSIZE) /* space used by dccd for rate limiting blocks and so forth */ #define DCCD_PAD (20*1024*1024) static void max_rss_rlimit(int resource, const char *nm, DB_PTR min_rss) { struct rlimit limit_old, limit_new; DB_PTR limit_rss; if (0 > getrlimit(resource, &limit_old)) { dcc_error_msg("getrlimit(%s): %s", nm, ERROR_STR()); return; } if ((DB_PTR)limit_old.rlim_cur >= db_max_rss+DCCD_PAD) return; limit_new = limit_old; limit_new.rlim_cur = limit_new.rlim_max; if ((DB_PTR)limit_new.rlim_cur > db_max_rss+DCCD_PAD) limit_new.rlim_cur = db_max_rss+DCCD_PAD; if ((DB_PTR)limit_new.rlim_max < min_rss+DCCD_PAD) limit_new.rlim_max = min_rss+DCCD_PAD; if (0 > setrlimit(resource, &limit_new)) { dcc_error_msg("setrlimit(%s, "L_DPAT"): %s", nm, db_max_rss, ERROR_STR()); limit_rss = limit_old.rlim_cur - DCCD_PAD; if (limit_rss < min_rss) limit_rss = min_rss; } else { if (limit_old.rlim_cur < limit_new.rlim_cur && db_debug) dcc_trace_msg("increased %s from "L_DPAT" to "L_DPAT, nm, (DB_PTR)limit_old.rlim_cur, (DB_PTR)limit_new.rlim_cur); limit_rss = limit_new.rlim_cur - DCCD_PAD; } if (db_max_rss > limit_rss) { if (db_debug) dcc_trace_msg("%s reduced max_rss from "L_DPAT " to "L_DPAT, nm, db_max_rss, limit_rss); db_max_rss = limit_rss; } } #undef DCCD_PAD #endif static void get_db_max_rss(void) { u_int max_db_mbyte, min_db_mbyte; DB_PTR physmem = 0; #ifdef HAVE_PHYSMEM_TOTAL /* maybe someday physmem_total() will be widely available */ physmem = physmem_total(); if (db_debug) dcc_trace_msg("physmem=%d MByte from physmem_total()", (u_int)(physmem/(1024*1024))); #endif #ifdef HAVE__SC_PHYS_PAGES if (physmem == 0) { long pages, sizepage; if ((pages = sysconf(_SC_PHYS_PAGES)) == -1) { dcc_error_msg("sysconf(_SC_PHYS_PAGES): %s", ERROR_STR()); } else if ((sizepage = sysconf(_SC_PAGESIZE)) == -1) { dcc_error_msg("sysconf(_SC_PAGESIZE): %s", ERROR_STR()); } else { physmem = (DB_PTR)pages * (DB_PTR)sizepage; if (db_debug) dcc_trace_msg("physmem=%d MByte" " from sysconf(_SC_PHYS_PAGES)" " and sysconf(_SC_PAGESIZE)", (u_int)(physmem/(1024*1024))); } } #endif #ifdef HAVE_HW_PHYSMEM if (physmem == 0) { int mib[2] = {CTL_HW, HW_PHYSMEM}; unsigned long int hw_physmem; size_t hw_physmem_len; hw_physmem_len = sizeof(hw_physmem); if (0 > sysctl(mib, 2, &hw_physmem, &hw_physmem_len, 0,0)) { dcc_error_msg("sysctl(HW_PHYSMEM): %s", ERROR_STR()); } else { physmem = hw_physmem; if (db_debug) dcc_trace_msg("physmem=%d MByte" " from sysctl(mib)", (u_int)(physmem/(1024*1024))); } } #endif #ifdef HAVE_PSTAT_GETSTATIC if (physmem == 0) { struct pst_static pss; if (0 > pstat_getstatic(&pss, sizeof pss, 1, 0)) { dcc_error_msg("pstat_getstatic(): %s", ERROR_STR()); } else if (pss.physical_memory <= 0 || pss.page_size < 0) { dcc_error_msg("pstat_getstatic() says" " physical_memory=%d page_size=%d", pss.physical_memory, pss.page_size); } else { physmem = ((DB_PTR)pss.physical_memory * (DB_PTR)pss.page_size); if (db_debug) dcc_trace_msg("physmem=%d MByte" " from pstat_getstatic()", (u_int)(physmem/(1024*1024))); } } #endif /* use default maximum if maximum is bogus or unset by ./configure */ max_db_mbyte = DCC_MAX_DB_MBYTE; if (max_db_mbyte < MIN_MIN_DB_MBYTE || max_db_mbyte > MAX_MAX64_DB_MBYTE) max_db_mbyte = MAX_MAX64_DB_MBYTE; #if !defined(HAVE_BIG_FILES) || !defined(HAVE_64BIT_PTR) /* we cannot safely use big files unless we also have big pointers */ if (max_db_mbyte > MAX_MAX32_DB_MBYTE) max_db_mbyte = MAX_MAX32_DB_MBYTE; #endif /* use default minimum if minimum is bogus or unset by ./configure */ min_db_mbyte = DCC_MIN_DB_MBYTE; if (min_db_mbyte < MIN_MIN_DB_MBYTE || min_db_mbyte > max_db_mbyte) min_db_mbyte = 64; /* Try to use physical memory less 512 MByte * or half of physical memory if there is less than 1 GByte. */ if (physmem > 1024*1024*1024) db_max_rss = physmem - 512*1024*1024; else db_max_rss = physmem/2; /* If we got a reasonable memory size from the kernel, use it. * Use a default if not */ if (db_max_rss/(1024*1024) < min_db_mbyte) { db_max_rss = min_db_mbyte; if (db_debug) dcc_trace_msg("physmem=minimum %d MByte", min_db_mbyte); db_max_rss *= 1024*1024; } else if (db_max_rss/(1024*1024) > max_db_mbyte) { db_max_rss = max_db_mbyte; if (db_debug) dcc_trace_msg("physmem=maximum %d MByte", max_db_mbyte); db_max_rss *= 1024*1024; } #ifdef RLIMIT_AS /* try not to break process virtual memory limit, * but only if it is not ridiculously tiny */ max_rss_rlimit(RLIMIT_AS, "RLIMIT_AS", min_db_mbyte); #endif /* RLIMIT_AS */ #ifdef RLIMIT_RSS /* try not to break process resident memory limit * but only if it is not ridiculously tiny */ max_rss_rlimit(RLIMIT_RSS, "RLIMIT_RSS", min_db_mbyte); #endif /* RLIMIT_RSS */ #ifdef RLIMIT_FSIZE /* we need large files */ max_rss_rlimit(RLIMIT_FSIZE, "RLIMIT_FSIZE", db_max_rss); #endif /* RLIMIT_FSIZE */ } /* Pick a buffer size that will hold an integral number of DB hash * table entries and is a multiple of system's page size. * The entire hash table should reside in memory * if the system has enough memory. */ int db_get_pagesize(u_int old_pagesize, /* 0 or required page size */ u_int tgt_pagesize) /* 0 or target page size */ { u_int min_pagesize, max_pagesize; /* Ask the operating system only once so we don't get differing * answers and so compute a varying page size. * Some systems can't keep their stories straight. */ if (db_max_rss == 0) get_db_max_rss(); /* Compute the least common multiple of the system page and * the DB hash table entry size. * This will give us the smallest page size that we can use. */ system_pagesize = getpagesize(); min_pagesize = lcm(system_pagesize, sizeof(HASH_ENTRY)); /* The DB buffer or page size must also be a multiple of the * the end-of-page padding used in the main database file. */ if (sizeof(DB_RCD) % DB_RCD_HDR_LEN != 0) dcc_logbad(EX_SOFTWARE, "DB padding size %d" " is not a divisor of DB entry size %d", DB_RCD_HDR_LEN, ISZ(DB_RCD)); if (DB_RCD_LEN_MAX % DB_RCD_HDR_LEN != 0) dcc_logbad(EX_SOFTWARE, "DB record not a multiple of header size"); min_pagesize = lcm(min_pagesize, DB_RCD_HDR_LEN); /* Use the old buffer size if available so we are not confused * by padding at the ends of the old pages. * Fail if it is impossible. This should cause dbclean to * rebuild the database. */ if (old_pagesize != 0) { if ((old_pagesize % min_pagesize) != 0) return 0; /* adjust the number of buffers to fit our window size */ db_buf_total = db_max_rss / old_pagesize; if (db_buf_total < (int)DB_BUF_MIN) return 0; if (db_buf_total > DB_BUF_MAX) db_buf_total = DB_BUF_MAX; return old_pagesize; } db_buf_total = DB_BUF_MAX; max_pagesize = db_max_rss / db_buf_total; max_pagesize -= max_pagesize % min_pagesize; /* If we have a target page size, try to use it. */ if (tgt_pagesize != 0 && tgt_pagesize < max_pagesize) { tgt_pagesize -= tgt_pagesize % min_pagesize; if (tgt_pagesize < min_pagesize) tgt_pagesize = min_pagesize; return tgt_pagesize; } else if (max_pagesize > min_pagesize) { return max_pagesize; } else { return min_pagesize; } } /* (re)create the buffer pool * The buffers are small blocks that point to the real mmap()'ed memory. */ u_char db_buf_init(u_int old_pagesize, /* 0 or required page size */ u_int tgt_pagesize) /* 0 or target page size */ { DB_BUF *b, *bprev, *bnext; int i; db_pagesize = db_get_pagesize(old_pagesize, tgt_pagesize); if (db_pagesize == 0) return 0; /* The fragments of pages must be multiple of system pages * so that msync() on Solaris can be given multiples of system * pages. It's also a generally good idea. */ db_pagesize_part = db_pagesize/DB_BUF_NUM_PARTS; db_pagesize_part = ((db_pagesize_part + system_pagesize-1) / system_pagesize) * system_pagesize; db_page_max = db_pagesize - DB_RCD_HDR_LEN; db_hash_page_len = db_pagesize/sizeof(HASH_ENTRY); db_max_hash_entries = (MAX_HASH_ENTRIES - MAX_HASH_ENTRIES % db_hash_page_len); memset(db_bufs, 0, sizeof(db_bufs)); b = db_bufs; buf_oldest = b; bprev = 0; for (i = db_buf_total; --i != 0; b = bnext) { bnext = b+1; b->older = bprev; b->newer = bnext; bprev = b; } b->older = bprev; buf_newest = b; memset(db_buf_hash, 0, sizeof(db_buf_hash)); return 1; } /* Clear new hash file by linking all of its entries into * the free list */ static u_char clear_hash(DCC_EMSG emsg) { HASH_CTL *hash_buf; HASH_ENTRY *hash; DB_HADDR next_haddr, cur_haddr, prev_haddr; int bufs; int i; hash_buf = malloc(db_pagesize); memset(hash_buf, 0, db_pagesize); strcpy(hash_buf->s.magic, HASH_MAGIC_STR); hash_buf->s.flags = HASH_CTL_FG_CLEAN; hash_buf->s.free_fwd = DB_HADDR_MIN; hash_buf->s.free_bak = db_hash_len-1; hash_buf->s.len = db_hash_len; hash_buf->s.used = DB_HADDR_MIN; hash_buf->s.created = time(0); /* The page size is chosen to be a multiple of the size of a * single hash table entry. */ prev_haddr = FREE_HADDR_END; cur_haddr = DB_HADDR_MIN; next_haddr = cur_haddr+1; hash = &hash_buf->h[DB_HADDR_MIN]; for (bufs = hash_fsize/db_pagesize; bufs != 0; --bufs) { do { DB_HADDR_CP(hash->bak, prev_haddr); if (next_haddr == db_hash_len) DB_HADDR_CP(hash->fwd, FREE_HADDR_END); else DB_HADDR_CP(hash->fwd, next_haddr); ++hash; prev_haddr = cur_haddr; cur_haddr = next_haddr++; } while (cur_haddr % db_hash_page_len != 0); i = write(db_hash_fd, hash_buf, db_pagesize); if (i != (int)db_pagesize) { dcc_pemsg(EX_IOERR, emsg, "write(%s,%d)=%d: %s", db_hash_nm, db_pagesize, i, ERROR_STR()); return 0; } memset(hash_buf, 0, db_pagesize); hash = &hash_buf->h[0]; } free(hash_buf); return 1; } static u_char make_new_hash(DCC_EMSG emsg, DB_HADDR new_hash_len) { struct stat sb; if (getuid() == 0) { /* if we are running as root, * don't change the owner of the database */ if (0 > fstat(db_fd, &sb)) { dcc_pemsg(EX_IOERR, emsg, "fstat(%s): %s", db_nm, ERROR_STR()); return 0; } if (0 > fchown(db_hash_fd, sb.st_uid, sb.st_gid)) { dcc_pemsg(EX_IOERR, emsg, "fchown(%s,%d,%d): %s", db_hash_nm, (int)sb.st_uid, (int)sb.st_gid, ERROR_STR()); return 0; } } if (new_hash_len > db_max_hash_entries) new_hash_len = db_max_hash_entries; /* Increase the requested hash table size to a multiple * of the page size. The page size is chosen to be a multiple * of the size of a single hash table entry. */ hash_fsize = new_hash_len * sizeof(HASH_ENTRY); hash_fsize = ((hash_fsize + db_pagesize-1) / db_pagesize) * db_pagesize; new_hash_len = hash_fsize / sizeof(HASH_ENTRY); if (new_hash_len < MIN_HASH_ENTRIES) { dcc_pemsg(EX_DATAERR, emsg, "database size %d is too small", new_hash_len); return 0; } if (new_hash_len > MAX_HASH_ENTRIES) { dcc_pemsg(EX_DATAERR, emsg, "database size %d is too large", new_hash_len); return 0; } /* create the empty hash table file */ rel_db_states(0); if (!db_unload(emsg, 0)) return 0; if (0 > ftruncate(db_hash_fd, 0)) { dcc_pemsg(EX_IOERR, emsg, "truncate(%s,"L_HPAT"): %s", db_hash_nm, db_csize, ERROR_STR()); return 0; } db_hash_len = new_hash_len; db_hash_used_stored_hash = db_hash_used = DB_HADDR_MIN; return clear_hash(emsg); } static u_char check_old_hash(DCC_EMSG emsg, u_char old_db) { static const u_char magic[sizeof(((HASH_CTL*)0)->s.magic) ] = HASH_MAGIC_STR; const HASH_CTL *vals; struct stat sb; /* check the size of the existing hash file */ if (0 > fstat(db_hash_fd, &sb)) { dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", db_hash_nm, ERROR_STR()); return 0; } hash_fsize = sb.st_size; if ((hash_fsize % sizeof(HASH_ENTRY)) != 0) { dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_DPAT"," " not a multiple of %d", db_hash_nm, hash_fsize, ISZ(HASH_ENTRY)); return 0; } db_hash_len = hash_fsize/sizeof(HASH_ENTRY); if (db_hash_len < MIN_HASH_ENTRIES) { dcc_pemsg(EX_DATAERR, emsg, "%s has too few records, "OFF_DPAT" bytes", db_hash_nm, hash_fsize); return 0; } /* check the magic number */ if (!map_hash_ctl(emsg)) return 0; vals = db_sts.hash_ctl.d.vals; if (memcmp(vals->s.magic, &magic, sizeof(magic))) { dcc_pemsg(EX_DATAERR, emsg, "%s has the wrong magic \"%.*s\"", db_hash_nm, ISZ(HASH_ENTRY), vals->s.magic); return 0; } if (!(vals->s.flags & HASH_CTL_FG_CLEAN)) { dcc_pemsg(EX_DATAERR, emsg, "%s was not closed cleanly", db_hash_nm); return 0; } if (vals->s.flags & HASH_CTL_FG_NOSYNC) { #ifdef HAVE_BOOTTIME int mib[2] = {CTL_KERN, KERN_BOOTTIME}; size_t boottime_len; #endif struct timeval boottime; boottime.tv_sec = 0x7fffffff; boottime.tv_usec = DCC_US-1; #ifdef HAVE_BOOTTIME boottime_len = sizeof(boottime); if (0 > sysctl(mib, 2, &boottime, &boottime_len, 0, 0)) { dcc_error_msg("sysctl(KERN_BOOTTIME): %s", ERROR_STR()); } #endif if (vals->s.created <= boottime.tv_sec) { dcc_pemsg(EX_DATAERR, emsg, "%s was not synchronized", db_hash_nm); return 0; } db_not_synced = 1; } if (DB_HADDR_INVALID(vals->s.free_fwd) && (vals->s.free_fwd != FREE_HADDR_END || vals->s.free_fwd != vals->s.free_bak)) { dcc_pemsg(EX_DATAERR, emsg, "%s has a broken free list head of %#x", db_hash_nm, vals->s.free_fwd); return 0; } if (DB_HADDR_INVALID(vals->s.free_bak) && (vals->s.free_bak != FREE_HADDR_END || vals->s.free_fwd != vals->s.free_bak)) { dcc_pemsg(EX_DATAERR, emsg, "%s has a broken free list tail of %#x", db_hash_nm, vals->s.free_bak); return 0; } if (db_hash_len != vals->s.len) { dcc_pemsg(EX_DATAERR, emsg, "%s has %d entries but claims %d", db_hash_nm, db_hash_len, vals->s.len); return 0; } db_hash_used_stored_hash = db_hash_used = vals->s.used; if (db_hash_used < DB_HADDR_MIN) { dcc_pemsg(EX_DATAERR, emsg, "%s contains impossible %u entries", db_hash_nm, HASH_LEN_EXT(db_hash_used)); return 0; } if (db_hash_used >= db_hash_len) { if (db_hash_used > db_hash_len) dcc_pemsg(EX_DATAERR, emsg, "%s contains only %u entries but %u used", db_hash_nm, HASH_LEN_EXT(db_hash_len), HASH_LEN_EXT(db_hash_used)); else dcc_pemsg(EX_DATAERR, emsg, "%s is filled with %u entries", db_hash_nm, HASH_LEN_EXT(db_hash_len)); return 0; } if (db_hash_used != db_parms.hash_used && hash_fsize != 0) { if (old_db) { dcc_trace_msg("repair db_parms.old hash_used" " and old_hash_used"); db_parms.old_hash_used = db_hash_used; db_parms.hash_used = db_hash_used; } else { dcc_pemsg(EX_DATAERR, emsg, "%s contains %d" " entries instead of the %d that %s claims", db_hash_nm, db_hash_used, db_parms.hash_used, db_nm); return 0; } } db_csize_stored_hash = vals->s.db_csize; if (db_csize_stored_hash != db_csize && hash_fsize != 0) { dcc_pemsg(EX_DATAERR, emsg, "%s contains "L_DPAT " bytes instead of the "L_DPAT" that %s claims", db_nm, db_csize, db_csize_stored_hash, db_hash_nm); return 0; } return 1; } /* open the files and generally get ready to work */ u_char /* 0=failed, 1=ok */ db_open(DCC_EMSG emsg, int new_db_fd, /* -1 or already open db_fd */ const char *new_db_nm, DB_HADDR new_hash_len, /* 0 or # of entries */ u_char mode) /* DB_OPEN_* */ { u_int cur_pagesize; int hash_flags, db_open_flags; struct stat db_sb; u_char old_db; # define OPEN_BAIL() {if (new_db_fd >= 0) db_fd = -1; \ db_close(-1); return 0;} db_close(1); db_failed_line = 1; db_not_synced = 0; db_minimum_map = 0; db_invalidate = 0; db_dirty = 0; db_extended = 0; db_locked.tv_sec = 0; db_no_sync = DB_NO_SYNC; db_rdonly = (mode & DB_OPEN_RDONLY) != 0; db_use_write = (mode & DB_OPEN_MMAP_WRITE) != 0; memset(&db_stats, 0, sizeof(db_stats)); if (!new_db_nm && db_nm[0] == '\0') new_db_nm = grey_on ? DB_GREY_NAME : DB_DCC_NAME; if (new_db_nm) { if (!fnm2path(db_nm, new_db_nm, 0) || !fnm2path(db_hash_nm, db_nm, DB_HASH_SUFFIX)) { dcc_pemsg(EX_DATAERR, emsg, "invalid DB nm \"%s\"", new_db_nm); return 0; } } if (new_db_fd >= 0) { if (new_hash_len != 0) { dcc_pemsg(EX_SOFTWARE, emsg, "extending db_open(%s) without locking", db_nm); return 0; } if (!db_rdonly) { dcc_pemsg(EX_SOFTWARE, emsg, "db_open(%s) read/write without locking", db_nm); return 0; } db_open_flags = O_RDONLY; hash_flags = O_RDONLY; db_fd = new_db_fd; } else { db_open_flags = O_RDWR; if (new_hash_len) { if (db_rdonly) { dcc_pemsg(EX_SOFTWARE, emsg, "db_open(%s) creating read-only", db_nm); return 0; } hash_flags = O_RDWR | O_CREAT; } else { /* must open the file read/write to lock it */ hash_flags = O_RDWR; } db_fd = dcc_lock_open(emsg, db_nm, db_open_flags, (mode & DB_OPEN_LOCK_NOWAIT) ? DCC_LOCK_OPEN_NOWAIT : 0, DCC_LOCK_ALL_FILE, 0); if (db_fd == -1) { db_close(-1); return 0; } } gettimeofday(&db_locked, 0); if (0 > fstat(db_fd, &db_sb)) { dcc_pemsg(EX_IOERR, emsg, "stat(%s): %s", db_nm, ERROR_STR()); OPEN_BAIL(); return 0; } db_csize = db_fsize = db_sb.st_size; if (db_fsize < ISZ(DB_HDR)) { dcc_pemsg(EX_IOERR, emsg, "%s with %d bytes is too small to be a DCC database", db_nm, (int)db_fsize); OPEN_BAIL(); } /* check the header of the database file by temporarily mapping it */ db_buf_init(0, 1); if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms)) OPEN_BAIL(); db_parms_stored = *db_sts.db_parms.d.parms; db_parms = *db_sts.db_parms.d.parms; db_rel_state(&db_sts.db_parms); if (memcmp(db_parms.version, version_buf, sizeof(version_buf))) { dcc_pemsg(EX_DATAERR, emsg, "%s contains the wrong magic string \"%.*s\"", db_nm, ISZ(db_parms.version), db_parms.version); OPEN_BAIL(); } if (!(db_parms.flags & DB_PARM_FG_GREY) != !grey_on) { dcc_pemsg(EX_DATAERR, emsg, "%s is%s a greylist database but must%s be", db_nm, (db_parms.flags & DB_PARM_FG_GREY) ? "" : " not", grey_on ? "" : " not"); OPEN_BAIL(); } cur_pagesize = db_parms.pagesize; DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_INVALID); DB_SET_NOKEEP(db_parms.nokeep_cks, DCC_CK_FLOD_PATH); db_ck_fuzziness = grey_on ? grey_ck_fuzziness : dcc_ck_fuzziness; db_csize = db_parms.db_csize; if (db_csize < sizeof(DB_HDR)) { dcc_pemsg(EX_DATAERR, emsg, "%s says it contains "L_DPAT" bytes" " or fewer than the minimum of %d", db_nm, db_csize, DB_PTR_BASE); /* that is a fatal error if we are not rebuilding */ if (new_hash_len != 0) OPEN_BAIL(); } if (db_csize > (DB_PTR)db_fsize) { dcc_pemsg(EX_DATAERR, emsg, "%s says it contains "L_DPAT" bytes" " or more than the actual size of "OFF_DPAT, db_nm, db_csize, db_fsize); /* that is a fatal error if we are not rebuilding */ if (new_hash_len != 0) OPEN_BAIL(); } /* The buffer or page size we use must be the page size used to * write the files. Try to change our size to match the file */ if (cur_pagesize != db_pagesize) { db_invalidate = 1; rel_db_states(0); if (!db_unload(emsg, 0)) OPEN_BAIL(); db_invalidate = 0; if (!db_buf_init(cur_pagesize, 0)) { dcc_error_msg("%s has page size %d" " incompatible with %d in %s", db_nm, cur_pagesize, db_get_pagesize(0, 0), path2fnm(db_hash_nm)); OPEN_BAIL(); } } db_csize_stored_hash = 0; db_hash_len = 0; db_hash_fd = open(db_hash_nm, hash_flags, 0666); if (db_hash_fd < 0) { dcc_pemsg(EX_IOERR, emsg, "open(%s): %s", db_hash_nm, ERROR_STR()); OPEN_BAIL(); } if (0 > fcntl(db_hash_fd, F_SETFD, FD_CLOEXEC)) { dcc_pemsg(EX_IOERR, emsg, "fcntl(%s, FD_CLOEXEC): %s", db_hash_nm, ERROR_STR()); OPEN_BAIL(); } /* old databases lack the growth values */ old_db = 0; if (new_hash_len == 0 /* not new */ && !(mode & DB_OPEN_RDONLY) && db_parms.old_db_csize == 0 && db_parms.db_added == 0 && db_parms.hash_used == 0 && db_parms.old_hash_used == 0 && db_parms.hash_added == 0 && db_parms.rate_secs == 0 && db_parms.last_rate_sec == 0) { dcc_trace_msg("repair old db_parms.db_csize"); db_parms.old_db_csize = db_parms.db_csize; old_db = 1; } if (new_hash_len != 0) { if (!make_new_hash(emsg, new_hash_len)) OPEN_BAIL(); } else { if (!check_old_hash(emsg, old_db)) OPEN_BAIL(); } if (db_fsize % db_pagesize != 0) { dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_HPAT"," " not a multiple of its page size of %#x", db_nm, db_fsize, db_pagesize); OPEN_BAIL(); } /* Fill the last page of the database with zeros in case * the length was wrong. * That is possible without too much database corruption * only if the length is wrong by less than a page. */ if ((DB_PTR)db_fsize > db_csize + db_pagesize || db_csize > (DB_PTR)db_fsize) { dcc_pemsg(EX_DATAERR, emsg, "%s has size "OFF_HPAT" but claims "L_HPAT, db_nm, db_fsize, db_csize); OPEN_BAIL(); } if (!db_rdonly) { if ((DB_PTR)db_fsize > db_csize) { if (!map_db(emsg, db_csize, db_fsize - db_csize, &db_sts.rcd)) OPEN_BAIL(); memset(db_sts.rcd.d.r, 0, db_fsize - db_csize); db_set_flush(&db_sts.rcd, 0, db_fsize - db_csize); } if (!db_flush_parms(emsg)) OPEN_BAIL(); } db_window_size = (DB_PTR)db_pagesize * db_buf_total; if (db_window_size >= (1024*1024)) { snprintf(db_window_size_str, sizeof(db_window_size_str), "%d MByte window", (int)(db_window_size / (1024*1024))); } else { snprintf(db_window_size_str, sizeof(db_window_size_str), "%d KByte window", (int)(db_window_size / 1024)); } rel_db_states(0); db_failed_line = 0; return 1; #undef OPEN_BAIL } /* get a free buffer for a chunk of either the hash table or database files */ PSTATIC DB_BUF * get_free_buf(DCC_EMSG emsg, DB_BUF **bh) { DB_BUF *b; /* Look for an unlocked buffer. * We know there is one because we have more buffers than * can be locked simultaneously. */ b = buf_oldest; for (;;) { if (!b) { dcc_pemsg(EX_SOFTWARE, emsg, "broken DB buffer MRU chain"); DB_FAILED(); return 0; } if (!b->lock_cnt) break; b = b->newer; } /* Found an unlocked buffer. * Unlink it from its hash chain. */ if (b->fwd) b->fwd->bak = b->bak; if (b->bak) b->bak->fwd = b->fwd; else if (b->hash) *b->hash = b->fwd; if (b->type != DB_BUF_TYPE_FREE) { if (!buf_munmap(emsg, b)) return 0; } /* put it on the new hash chain */ b->bak = 0; b->hash = bh; b->fwd = *bh; *bh = b; if (b->fwd) b->fwd->bak = b; return b; } PSTATIC DB_BUF * find_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_PG_NUM pg_num) { DB_BUF *b, **bh; bh = DB_BUF_HASH(pg_num); b = *bh; for (;;) { if (!b) { /* we ran off the end of the buffer hash chain, * so get a free buffer */ b = get_free_buf(emsg, bh); if (!b) return 0; b->type = type; b->pg_num = pg_num; break; } if (b->type == type && b->pg_num == pg_num) break; /* found the buffer we need */ b = b->fwd; } /* make the buffer newest */ if (buf_newest != b) { /* unlink it */ b->newer->older = b->older; if (b->older) b->older->newer = b->newer; else buf_oldest = b->newer; /* insert it at the head of the MRU list */ b->newer = 0; b->older = buf_newest; buf_newest->newer = b; buf_newest = b; } #if 1 /* Do not rely on our buffer dirty bits if we are doing all of the work. * Unintended failures to set the bits would lead to corruption. */ if (!db_rdonly && (b->flags & DB_BUF_FG_USE_WRITE)) { int part; u_int offset; b->flush = (DB_BUF_FM)-1; offset = 0; for (part = 0; part < DB_BUF_NUM_PARTS; ++part) { b->ranges[part].lo = b->buf.c+offset; offset += db_pagesize_part; if (offset > db_pagesize) offset = db_pagesize; b->ranges[part].hi = b->buf.c+offset; } } #endif return b; } PSTATIC DB_BUF * find_st_buf(DCC_EMSG emsg, DB_BUF_TYPE type, DB_STATE *st, DB_PG_NUM pg_num) { DB_BUF *b; /* release previous buffer unless it is the right one */ b = st->b; if (b) { if (b->pg_num == pg_num && b->type == type) return b; /* already have the target buffer */ st->b = 0; st->d.v = 0; if (--b->lock_cnt < 0) dcc_logbad(EX_SOFTWARE, "bad database buffer lock"); } /* look for the buffer */ b = find_buf(emsg, type, pg_num); if (!b) return 0; ++b->lock_cnt; if (!b->buf.v) { /* fill it if it did not exist */ if (!buf_mmap(emsg, b, pg_num)) { b->type = DB_BUF_TYPE_FREE; b->pg_num = -1; if (--b->lock_cnt != 0) dcc_logbad(EX_SOFTWARE, "stolen database buffer lock %d", b->lock_cnt); return 0; } if (type == DB_BUF_TYPE_DB) ++db_stats.db_mmaps; else ++db_stats.hash_mmaps; } st->b = b; st->d.v = 0; return b; } PSTATIC u_char buf_munmap(DCC_EMSG emsg, DB_BUF *b) { int part; DB_BUF_FM bits; u_char result; if (b->lock_cnt != 0) dcc_logbad(EX_SOFTWARE, "unmapping locked DB buffer"); result = 1; /* If using read() and write() instead of mmap(), we must always * flush when releasing a buffer. * If we are close to closing the database, * flush a buffer before unmapping it because on some systems * such as FreeBSD a dirty buffer will lurk in RAM until the * least convenient time */ bits = b->flush_urgent; if (!db_no_sync || (b->flags & DB_BUF_FG_USE_WRITE)) bits |= b->flush; for (part = 0; bits != 0 && part < DB_BUF_NUM_PARTS; ++part) { if (bits & PART2BIT(part)) { if (!buf_flush(emsg, b, part)) { emsg = 0; result = 0; } /* buf_flush() can clear more than 1 bit */ bits &= (b->flush_urgent | b->flush); } } if (0 > munmap(b->buf.v, db_pagesize)) { dcc_pemsg(EX_IOERR, emsg, "munmap(%s,%d): %s", B2PATH(b), db_pagesize, ERROR_STR()); DB_FAILED(); result = 0; } b->buf.v = 0; b->pg_num = -1; b->flush = 0; b->flush_urgent = 0; b->type = DB_BUF_TYPE_FREE; b->flags = 0; return result; } PSTATIC u_char buf_mmap(DCC_EMSG emsg, DB_BUF *b, DB_PG_NUM pg_num) { int flags; off_t offset; void *p; int retry; u_char unloaded; if (db_rdonly) { flags = MAP_PRIVATE; } else if (db_use_write && !db_minimum_map) { /* write() buffers instead of letting the Solaris virtual * memory system do it. Solaris will bog the system down doing * nothing but flushing dirty mmap() pages * We cannot use this hack in two processes, so turn it off * in dccd while dbclean is running */ b->flags |= DB_BUF_FG_USE_WRITE; flags = MAP_PRIVATE; } else { #ifdef USE_MAP_NOSYNC flags = (MAP_SHARED | MAP_NOSYNC); #else flags = MAP_SHARED; #endif } offset = (off_t)pg_num * (off_t)db_pagesize; for (retry = 1, unloaded = 2; unloaded > 1; ++retry) { p = mmap(0, db_pagesize, db_rdonly ? PROT_READ : (PROT_READ | PROT_WRITE), flags, (b->type == DB_BUF_TYPE_DB) ? db_fd : db_hash_fd, offset); if (p == MAP_FAILED) { if (errno == EACCES || errno == EBADF || errno == EINVAL || errno == ENODEV || retry > 20) { dcc_pemsg(EX_IOERR, emsg, "try #%d mmap(%s,%#x,"OFF_HPAT"): %s", retry, B2PATH(b), db_pagesize, offset, ERROR_STR()); return 0; } dcc_error_msg("try #%d mmap(%s,%#x,"OFF_HPAT"): %s", retry, B2PATH(b), db_pagesize, offset, ERROR_STR()); /* #define MMAP_FAIL_DEBUG 3 */ #ifdef MMAP_FAIL_DEBUG } else if (((uint)random() % MMAP_FAIL_DEBUG) == 0) { /* pretend mmap() failed randomly */ dcc_error_msg(" test fail #%d mmap(%s,%#x,"OFF_HPAT")", retry, B2PATH(b), db_pagesize, offset); if (0 > munmap(p, db_pagesize)) dcc_error_msg( "test munmap(): %s", ERROR_STR()); #endif } else { /* It worked. * Say so if it was not the first attempt. */ if (retry != 1) dcc_error_msg("try #%d" " mmap(%s,%#x,"OFF_HPAT") ok", retry, B2PATH(b), db_pagesize, offset); break; } /* mmap() fails occassionally on some systems, * so try to release something and try again */ unloaded = db_unload(0, 1); } #ifdef MADV_WILLNEED /* Get all of our buffers if there is plenty of memory * and we are not trying to stay out of the way of dbclean. * madvise() on some systems including FreeBSD uses a lot * of CPU cycles, so it should not be done unless it is likely * to do a lot of good. */ if (!db_minimum_map && db_window_size >= hash_fsize + db_csize && 0 > madvise(p, db_pagesize, MADV_WILLNEED)) dcc_error_msg("madvise(MADV_WILLNEED %s,%#x,): %s", B2PATH(b), db_pagesize, ERROR_STR()); #endif b->buf.v = p; return 1; } PSTATIC u_char map_hash_ctl(DCC_EMSG emsg) { DB_BUF *b; b = find_st_buf(emsg, DB_BUF_TYPE_HASH, &db_sts.hash_ctl, 0); if (!b) return 0; db_sts.hash_ctl.s.haddr = 0; db_sts.hash_ctl.d.v = b->buf.v; return 1; } /* mmap() a hash table entry */ PSTATIC u_char map_hash(DCC_EMSG emsg, DB_HADDR haddr, /* this entry */ DB_STATE *st) /* point this to the entry */ { DB_PG_NUM pg_num; DB_PG_OFF pg_off; DB_BUF *b; if (haddr >= db_hash_len || haddr < DB_HADDR_MIN) { dcc_pemsg(EX_DATAERR, emsg, "invalid hash address %#x", haddr); return 0; } pg_num = haddr / db_hash_page_len; pg_off = haddr % db_hash_page_len; b = find_st_buf(emsg, DB_BUF_TYPE_HASH, st, pg_num); if (!b) return 0; st->s.haddr = haddr; st->d.h = &b->buf.h[pg_off]; return 1; } /* unlink a hash table entry from the free list */ PSTATIC u_char unlink_free_hash(DCC_EMSG emsg, DB_STATE *hash_st, /* remove this from the free list */ DB_STATE *tmp_st) { DB_HADDR fwd, bak; if (!db_make_dirty(emsg)) return 0; fwd = DB_HADDR_EX(hash_st->d.h->fwd); bak = DB_HADDR_EX(hash_st->d.h->bak); if (!HE_IS_FREE(hash_st->d.h) || (DB_HADDR_INVALID(fwd) && fwd != FREE_HADDR_END) || (DB_HADDR_INVALID(bak) && bak != FREE_HADDR_END) || DB_HPTR_EX(hash_st->d.h->rcd) != DB_PTR_NULL) { dcc_pemsg(EX_DATAERR, emsg, "bad hash free list entry at %#x", hash_st->s.haddr); return 0; } if (fwd != FREE_HADDR_END) { if (!map_hash(emsg, fwd, tmp_st)) return 0; if (DB_HADDR_EX(tmp_st->d.h->bak) != hash_st->s.haddr) { dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", hash_st->s.haddr, fwd); return 0; } DB_HADDR_CP(tmp_st->d.h->bak, bak); db_set_flush(tmp_st, 0, sizeof(HASH_ENTRY)); } else { if (!map_hash_ctl(emsg)) return 0; if (db_sts.hash_ctl.d.vals->s.free_bak != hash_st->s.haddr) { dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", hash_st->s.haddr, fwd); return 0; } db_sts.hash_ctl.d.vals->s.free_bak = bak; db_set_flush(&db_sts.hash_ctl, 0, sizeof(HASH_CTL)); } if (bak != FREE_HADDR_END) { if (!map_hash(emsg, bak, tmp_st)) return 0; if (DB_HADDR_EX(tmp_st->d.h->fwd) != hash_st->s.haddr) { dcc_pemsg(EX_DATAERR, emsg, "bad free %#x <-- free %#x", bak, hash_st->s.haddr); return 0; } DB_HADDR_CP(tmp_st->d.h->fwd, fwd); db_set_flush(tmp_st, 0, sizeof(HASH_ENTRY)); } else { if (!map_hash_ctl(emsg)) return 0; if (db_sts.hash_ctl.d.vals->s.free_fwd != hash_st->s.haddr) { dcc_pemsg(EX_DATAERR, emsg, "free %#x --> bad-free %#x", hash_st->s.haddr, bak); return 0; } db_sts.hash_ctl.d.vals->s.free_fwd = fwd; db_set_flush(&db_sts.hash_ctl, 0, sizeof(HASH_CTL)); } DB_HADDR_CP(hash_st->d.h->fwd, DB_HADDR_NULL); DB_HADDR_CP(hash_st->d.h->bak, DB_HADDR_NULL); db_set_flush(hash_st, 0, sizeof(HASH_ENTRY)); ++db_hash_used; return 1; } /* get a free hash table entry and leave db_sts.free pointing to it */ PSTATIC u_char /* 0=failed, 1=got it */ get_free_hash(DCC_EMSG emsg, DB_HADDR result) /* try near here */ { DB_HADDR pg_lim; int i; if (db_hash_len <= db_hash_used) { dcc_pemsg(EX_SOFTWARE, emsg, "no free hash table entry;" " %d of %d used", db_hash_used, db_hash_len); return 0; } /* look near the target * Try hard because going off the page is so expensive that it * justifies plenty of time here.*/ if (result != DB_HADDR_NULL) { pg_lim = (result - (result % db_hash_page_len) + db_hash_page_len-1); if (pg_lim >= db_hash_len) pg_lim = db_hash_len-1; for (i = 0; i < 50; ++i) { if (!map_hash(emsg, result, &db_sts.free)) return 0; if (HE_IS_FREE(db_sts.free.d.h)) return unlink_free_hash(emsg, &db_sts.free, &db_sts.tmp); if (++result > pg_lim) { result -= db_hash_page_len; if (result < DB_HADDR_MIN) result = DB_HADDR_MIN; } } } /* then try the free list */ if (!map_hash_ctl(emsg)) return 0; result = db_sts.hash_ctl.d.vals->s.free_fwd; if (DB_HADDR_INVALID(result)) { dcc_pemsg(EX_DATAERR, emsg, "broken hash free list head of %#x", result); return 0; } if (!map_hash(emsg, result, &db_sts.free)) return 0; return unlink_free_hash(emsg, &db_sts.free, &db_sts.tmp); } /* mmap() a database entry * We assume that no database entry spans buffers, * and that there are enough buffers to accomodate all possible * concurrent requests. */ PSTATIC u_char map_db(DCC_EMSG emsg, DB_PTR rptr, /* address of the record */ u_int tgt_len, /* its length */ DB_STATE *st) /* point this to the record */ { DB_PG_NUM pg_num; DB_PG_OFF pg_off; DB_BUF *b; if (rptr+tgt_len > (DB_PTR)db_fsize) { dcc_pemsg(EX_DATAERR, emsg, "invalid database address "L_HPAT" or length %d" " past db_fsize "OFF_HPAT" in %s", rptr, tgt_len, db_fsize, db_nm); DB_FAILED(); return 0; } pg_num = rptr / db_pagesize; pg_off = rptr % db_pagesize; /* do not go past the end of a buffer */ if (tgt_len+pg_off > db_pagesize) { dcc_pemsg(EX_DATAERR, emsg, "invalid database address "L_HPAT " or length %#x in %s", rptr, tgt_len, db_nm); DB_FAILED(); return 0; } b = find_st_buf(emsg, DB_BUF_TYPE_DB, st, pg_num); if (!b) return 0; st->s.rptr = rptr; st->d.r = (DB_RCD *)&b->buf.c[pg_off]; return 1; } u_char /* 0=failed, 1=got it */ db_map_rcd(DCC_EMSG emsg, DB_STATE *rcd_st, /* point this to the record */ DB_PTR rptr, /* that is here */ u_int *rcd_lenp) /* put its length here */ { u_int rcd_len; if (DB_PTR_IS_BAD(rptr)) { dcc_pemsg(EX_DATAERR, emsg, "getting bogus record at "L_HPAT", in %s", rptr, db_nm); return 0; } if (!map_db(emsg, rptr, DB_RCD_HDR_LEN, rcd_st)) return 0; rcd_len = DB_RCD_LEN(rcd_st->d.r); if (&rcd_st->d.c[rcd_len] > &rcd_st->b->buf.c[db_pagesize]) { dcc_pemsg(EX_DATAERR, emsg, "invalid checksum count %d at "L_HPAT" in %s", DB_NUM_CKS(rcd_st->d.r), rptr, db_nm); return 0; } if (rcd_lenp) *rcd_lenp = rcd_len; return 1; } /* write the new sizes of the files into the files */ PSTATIC u_char db_set_sizes(DCC_EMSG emsg) { u_char result = 1; if (db_hash_fd != -1 && (db_csize_stored_hash != db_csize || db_hash_used_stored_hash != db_hash_used)) { if (!map_hash_ctl(emsg)) { result = 0; } else { db_sts.hash_ctl.d.vals->s.db_csize = db_csize; db_csize_stored_hash = db_csize; db_sts.hash_ctl.d.vals->s.used = db_hash_used; db_hash_used_stored_hash = db_hash_used; db_set_flush(&db_sts.hash_ctl, 0, sizeof(HASH_CTL)); } } if (db_fd != -1 && (db_parms_stored.db_csize != db_csize || db_parms_stored.hash_used != db_hash_used)) { if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms)) { result = 0; } else { db_sts.db_parms.d.parms->db_csize = db_csize; db_parms_stored.db_csize = db_csize; db_parms.db_csize = db_csize; db_sts.db_parms.d.parms->hash_used = db_hash_used; db_parms_stored.hash_used = db_hash_used; db_parms.hash_used = db_hash_used; db_sts.db_parms.d.parms->last_rate_sec = db_time.tv_sec; db_parms_stored.last_rate_sec = db_time.tv_sec; db_parms.last_rate_sec = db_time.tv_sec; db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); } } return result; } /* write the database parameters into the magic number headers of the files */ u_char db_flush_parms(DCC_EMSG emsg) { if (!db_set_sizes(emsg)) return 0; if (db_fd == -1) return 1; if (memcmp(&db_parms, &db_parms_stored, sizeof(db_parms))) { if (!map_db(emsg, 0, sizeof(DB_HDR), &db_sts.db_parms)) return 0; db_parms.pagesize = db_pagesize; *db_sts.db_parms.d.parms = db_parms; db_parms_stored = db_parms; db_set_flush(&db_sts.db_parms, 1, sizeof(DB_PARMS)); } return 1; } /* find a checksum in an already mapped record */ DB_RCD_CK * /* 0=not found, 1=broken database */ db_find_ck(DCC_EMSG emsg, DB_RCD *rcd, DB_PTR rptr, DCC_CK_TYPES type) /* find this type of checksum */ { DB_RCD_CK *rcd_ck; int i; rcd_ck = rcd->cks; i = DB_NUM_CKS(rcd); if (i >= DCC_NUM_CKS) { dcc_pemsg(EX_DATAERR, emsg, "impossible %d checksums in "L_HPAT" in %s", i, rptr, db_nm); return (DB_RCD_CK *)1; } for (; i != 0; --i, ++rcd_ck) { if (DB_CK_TYPE(rcd_ck) == type) return rcd_ck; } return 0; } /* find a checksum type known to be in a record */ DB_RCD_CK * /* 0=it's not there */ db_map_rcd_ck(DCC_EMSG emsg, DB_STATE *rcd_st, /* point this to the record */ DB_PTR rptr, /* that is here */ DCC_CK_TYPES type) /* find this type of checksum */ { DB_RCD_CK *rcd_ck; if (!db_map_rcd(emsg, rcd_st, rptr, 0)) return 0; rcd_ck = db_find_ck(emsg, rcd_st->d.r, rptr, type); if (rcd_ck == (DB_RCD_CK *)1) return 0; if (rcd_ck == 0) { dcc_pemsg(EX_DATAERR, emsg, "missing \"%s\" checksum in "L_HPAT" in %s", DB_TYPE2STR(type), rptr, db_nm); return 0; } return rcd_ck; } DB_HADDR db_hash(DCC_CK_TYPES type, const DCC_SUM sum) { u_long accum; DB_HADDR haddr; accum = type; accum += (sum[0]<<24)+(sum[1]<<16)+(sum[2]<<8)+sum[3]; accum += (sum[4]<<24)+(sum[5]<<16)+(sum[6]<<8)+sum[7]; accum += (sum[8]<<24)+(sum[9]<<16)+(sum[10]<<8)+sum[11]; accum += (sum[12]<<24)+(sum[13]<<16)+(sum[14]<<8)+sum[15]; haddr = mhash(accum, db_hash_len); if (haddr < DB_HADDR_MIN) haddr = DB_HADDR_MIN; return haddr; } /* look for a checksum in the hash table * return with not-found, the home slot, or the last entry on * the collision chain */ DB_FOUND db_lookup(DCC_EMSG emsg, DCC_CK_TYPES type, const DCC_SUM sum, DB_HADDR lo, /* postpone if out of this window */ DB_HADDR hi, DB_STATE *hash_st, /* hash block for record or related */ DB_STATE *rcd_st, /* put the record or garbage here */ DB_RCD_CK **prcd_ck) /* point to cksum if found */ { DB_HADDR haddr, haddr1; DB_PTR db_ptr; DB_RCD_CK *found_ck; int failsafe; haddr = db_hash(type, sum); if (haddr < lo || haddr > hi) { if (lo == 0 && hi == MAX_HASH_ENTRIES) { dcc_pemsg(EX_DATAERR, emsg, "out of range hash address"); return DB_FOUND_SYSERR; } return DB_FOUND_LATER; } if (prcd_ck) *prcd_ck = 0; if (!map_hash(emsg, haddr, hash_st)) return DB_FOUND_SYSERR; if (HE_IS_FREE(hash_st->d.h)) return DB_FOUND_EMPTY; if (!DB_HADDR_C_NULL(hash_st->d.h->bak)) return DB_FOUND_INTRUDER; /* We know that the current hash table entry is in its home slot. * It might be for the key or checksum we are looking for * or it might be for some other checksum with the same hash value. */ for (failsafe = db_hash_len; failsafe >=0; --failsafe) { if (HE_CMP(hash_st->d.h, type, sum)) { /* This hash table entry could be for our target * checksum. Read the corresponding record so we * decide whether we have a hash collision or we * have found a record containing our target checksum. * * find right type of checksum in the record */ db_ptr = DB_HPTR_EX(hash_st->d.h->rcd); found_ck = db_map_rcd_ck(emsg, rcd_st, db_ptr, type); if (!found_ck) return DB_FOUND_SYSERR; if (!memcmp(sum, found_ck->sum, sizeof(DCC_SUM))) { if (prcd_ck) *prcd_ck = found_ck; return DB_FOUND_IT; } } /* This DB record was a hash collision, or for a checksum * other than our target. * Fail if this is the end of the hash chain */ haddr1 = DB_HADDR_EX(hash_st->d.h->fwd); if (haddr1 == DB_HADDR_NULL) return DB_FOUND_CHAIN; if (DB_HADDR_INVALID(haddr1)) { dcc_pemsg(EX_DATAERR, emsg, "broken hash chain fwd-link %#x at %#x in %s", haddr1, haddr, db_hash_nm); return DB_FOUND_SYSERR; } if (!map_hash(emsg, haddr1, hash_st)) return DB_FOUND_SYSERR; if (DB_HADDR_EX(hash_st->d.h->bak) != haddr) { dcc_pemsg(EX_DATAERR, emsg, "broken hash chain back-link" " %#x<--%#x instead of %#x<--%#x in %s", DB_HADDR_EX(hash_st->d.h->bak), haddr1, haddr, haddr1, db_hash_nm); return DB_FOUND_SYSERR; } haddr = haddr1; } dcc_pemsg(EX_DATAERR, emsg, "infinite hash chain at %#x in %s", haddr, db_hash_nm); return DB_FOUND_SYSERR; } /* combine checksums */ DCC_TGTS db_sum_ck(DCC_TGTS prev, /* previous sum */ DCC_TGTS rcd_tgts, /* from the record */ DCC_CK_TYPES type UATTRIB) { DCC_TGTS res; /* This arithmetic must be commutative (after handling deleted * values), because inter-server flooding causes records to appear in * the database out of temporal order. * * DCC_TGTS_TOO_MANY can be thought of as a count of plus infinity. * DCC_TGTS_OK is like minus infinity. * DCC_TGTS_OK2 like half of minus infinity * DCC_TGTS_TOO_MANY (plus infinity) added to DCC_TGTS_OK (minus * infinity) or DCC_TGTS_OK2 yields DCC_TGTS_OK or DCC_TGTS_OK2. * * Reputations never reach infinity. * * Claims of not-spam from all clients are discarded as they arrive * and before here. They can only come from the local white list */ #define SUM_OK_DEL(p,r) { \ if (rcd_tgts == DCC_TGTS_OK || prev == DCC_TGTS_OK) \ return DCC_TGTS_OK; \ if (rcd_tgts == DCC_TGTS_OK2 || prev == DCC_TGTS_OK2) \ return DCC_TGTS_OK2; \ if (rcd_tgts == DCC_TGTS_DEL) \ return prev; \ } res = prev+rcd_tgts; if (res <= DCC_TGTS_TOO_MANY) return res; SUM_OK_DEL(prev, rcd_tgts); return DCC_TGTS_TOO_MANY; #undef SUM_OK_DEL } /* delete all reports that contain the given checksum */ static u_char /* 1=done, 0=broken database */ del_ck(DCC_EMSG emsg, DCC_TGTS *res, /* residual targets after deletion */ const DB_RCD *new, /* delete reports older than this one */ DCC_CK_TYPES type, /* delete this type of checksum */ DB_RCD_CK *prev_ck, /* starting with this one */ DB_STATE *prev_st) /* use this scratch state block */ { DB_PTR prev; *res = 0; for (;;) { /* delete reports that are older than the delete request */ if (DCC_TS_NEWER_TS(new->ts, prev_st->d.r->ts) && DB_RCD_ID(prev_st->d.r) != DCC_ID_WHITE) { DB_TGTS_RCD_SET(prev_st->d.r, 0); DB_TGTS_CK_SET(prev_ck, 0); db_set_flush(prev_st, 0, DB_RCD_LEN(prev_st->d.r)); } else { /* sum reports that are not deleted */ *res = db_sum_ck(*res, DB_TGTS_RCD(prev_st->d.r), type); } prev = DB_PTR_EX(prev_ck->prev); if (prev == DB_PTR_NULL) return 1; prev_ck = db_map_rcd_ck(emsg, prev_st, prev, type); if (!prev_ck) return 0; } } /* Mark reports made obsolete by a spam report * A new report of spam makes sufficiently old reports obsolete. * Sufficiently recent non-obsolete reports make a new report obsolete, * or at least not worth spending bandwidth to flood. */ PSTATIC u_char /* 1=done, 0=broken database */ db_obs_ck(DCC_EMSG emsg, const DB_RCD *new, DB_RCD_CK *new_ck, DCC_CK_TYPES type, /* check this type of checksum */ DB_RCD_CK *prev_ck, /* starting with this one */ DCC_TGTS prev_ck_tgts, DB_STATE *prev_st) /* use this scratch state block */ { struct timeval tv; time_t secs; DCC_TS ts; int limit; DB_PTR prev; secs = db_parms.ex_secs[type].all; if (secs > DCC_NEW_SPAM_SECS) secs = DCC_NEW_SPAM_SECS; dcc_ts2timeval(&tv, new->ts); dcc_timeval2ts(ts, &tv, -secs); limit = 100; for (;;) { /* preceding white listed entries make new entries obsolete */ if (DB_RCD_ID(prev_st->d.r) == DCC_ID_WHITE) { new_ck->type_fgs |= DB_CK_FG_OBS; return 1; } if (DB_CK_OBS(prev_ck)) { /* don't look forever for recent non-obsolete report */ if (--limit == 0) return 1; } else if (prev_ck_tgts != DCC_TGTS_TOO_MANY) { /* mark this predecessor obsolete if it * was before the checksum became spam */ prev_ck->type_fgs |= DB_CK_FG_OBS; db_set_flush(prev_st, 0, DB_RCD_LEN(prev_st->d.r)); } else if (DCC_TS_OLDER_TS(prev_st->d.r->ts, &ts)) { /* this older predecessor is now obsolete */ prev_ck->type_fgs |= DB_CK_FG_OBS; db_set_flush(prev_st, 0, DB_RCD_LEN(prev_st->d.r)); /* we're finished, because all older preceding reports * were marked obsolete when it was inserted */ return 1; } else { /* this predecessor is recent, so it makes * our new record obsolete. */ new_ck->type_fgs |= DB_CK_FG_OBS; return 1; } prev = DB_PTR_EX(prev_ck->prev); if (prev == DB_PTR_NULL) return 1; /* it is a new report of spam */ prev_ck = db_map_rcd_ck(emsg, &db_sts.rcd2, prev, type); if (!prev_ck) return 0; prev_ck_tgts = DB_TGTS_CK(prev_ck); } } /* mark extra server-ID declarations obsolete */ static u_char /* 1=done, 0=broken database */ srvr_id_ck(DCC_EMSG emsg, const DB_RCD *new, DB_RCD_CK *new_ck, DB_RCD_CK *prev_ck, /* starting with this one */ DB_STATE *prev_st) /* use this scratch state block */ { DB_PTR prev; for (;;) { if (DB_RCD_ID(prev_st->d.r) == DB_RCD_ID(new)) { /* keep newest server-ID declaration */ if (DCC_TS_NEWER_TS(prev_st->d.r, new->ts)) { new_ck->type_fgs |= DB_CK_FG_OBS; /* assume the new buffer is already dirty */ } else { prev_ck->type_fgs |= DB_CK_FG_OBS; db_set_flush(prev_st, 0, DB_RCD_LEN(prev_st->d.r)); } return 1; } prev = DB_PTR_EX(prev_ck->prev); if (prev == DB_PTR_NULL) return 1; prev_ck = db_map_rcd_ck(emsg, prev_st, prev, DCC_CK_SRVR_ID); if (!prev_ck) return 0; } } /* Install pointers in the hash table for a record and fix the accumulated * counts in the record pointed to by db_sts.rcd * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp * The caller must deal with db_make_dirty() and db_set_flush() for * the record itself. */ u_char /* 0=failed, 1=done */ db_link_rcd(DCC_EMSG emsg, DB_HADDR lo, DB_HADDR hi) { DCC_TGTS res; DB_RCD *rcd; DB_RCD_CK *prev_ck; DB_RCD_CK *rcd_ck; DCC_CK_TYPES rcd_type; DCC_TGTS rcd_tgts, prev_ck_tgts; int ck_num; DB_HADDR haddr; rcd = db_sts.rcd.d.r; rcd_tgts = DB_TGTS_RCD_RAW(rcd); rcd_ck = rcd->cks; ck_num = DB_NUM_CKS(rcd); if (ck_num > DIM(rcd->cks)) { dcc_pemsg(EX_SOFTWARE, emsg, "bogus checksum count %#x at "L_HPAT" in %s", rcd->fgs_num_cks, db_sts.rcd.s.rptr, db_nm); return 0; } for (; ck_num > 0; --ck_num, ++rcd_ck) { rcd_type = DB_CK_TYPE(rcd_ck); if (!DCC_CK_OK_DB(grey_on, rcd_type)) { dcc_pemsg(EX_SOFTWARE, emsg, "invalid checksum type %s at "L_HPAT" in %s", DB_TYPE2STR(rcd_type), db_sts.rcd.s.rptr, db_nm); return 0; } rcd_ck->prev = DB_PTR_CP(DB_PTR_NULL); /* Do not link or total some checksums unless they are * whitelist entries. If they are whitelist entries, they * will eventually get set to DCC_TGTS_OK or DCC_TGTS_OK2. */ if (DB_TEST_NOKEEP(db_parms.nokeep_cks, rcd_type) && DB_RCD_ID(rcd) != DCC_ID_WHITE) { DB_TGTS_CK_SET(rcd_ck, 1); continue; } res = (rcd_tgts == DCC_TGTS_DEL) ? 0 : rcd_tgts; switch (db_lookup(emsg, rcd_type, rcd_ck->sum, lo, hi, &db_sts.hash, &db_sts.rcd2, &prev_ck)) { case DB_FOUND_SYSERR: return 0; case DB_FOUND_LATER: continue; case DB_FOUND_IT: /* We found the checksum * Update the hash table to point to the new record */ DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); db_set_flush(&db_sts.hash, 0, sizeof(HASH_ENTRY)); rcd_ck->prev = DB_PTR_CP(db_sts.rcd2.s.rptr); /* delete predecessors to a delete request * and compute the remaining sum */ if (rcd_tgts == DCC_TGTS_DEL) { if (!del_ck(emsg, &res, rcd, rcd_type, prev_ck, &db_sts.rcd2)) return 0; /* delete requests are obsolete if the * checksum is white-listed */ if (res == DCC_TGTS_OK || res == DCC_TGTS_OK2) rcd_ck->type_fgs |= DB_CK_FG_OBS; break; } /* Simple checksum with a predecessor * This does not do the substantial extra work * to notice delete requests that arrived early. * That problem is handled by the incoming * flooding duplicate report detection mechanism. */ prev_ck_tgts = DB_TGTS_CK(prev_ck); if (DB_RCD_SUMRY(rcd) || DB_CK_DUP(rcd_ck)) res = prev_ck_tgts; else res = db_sum_ck(prev_ck_tgts, res, rcd_type); if ((res == DCC_TGTS_OK || res == DCC_TGTS_OK2 || (DB_RCD_ID(db_sts.rcd2.d.r) == DCC_ID_WHITE)) && DB_RCD_ID(rcd) != DCC_ID_WHITE){ /* obsolete white-listed checksums */ rcd_ck->type_fgs |= DB_CK_FG_OBS; break; } if (res == DCC_TGTS_TOO_MANY) { /* mark obsolete unneeded reports of spam */ if (!DB_CK_OBS(rcd_ck) && !db_obs_ck(emsg, rcd, rcd_ck, rcd_type, prev_ck, prev_ck_tgts, &db_sts.rcd2)) return 0; } else if (rcd_type == DCC_CK_SRVR_ID) { /* mark obsolete server-ID assertions */ if (!DB_CK_OBS(rcd_ck) && !srvr_id_ck(emsg, rcd, rcd_ck, prev_ck, &db_sts.rcd2)) return 0; } break; case DB_FOUND_EMPTY: /* We found an empty hash table slot. * Update the slot to point to our new record * after removing it from the free list, * which marks it dirty. */ if (!unlink_free_hash(emsg, &db_sts.hash, &db_sts.tmp)) return 0; DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); db_set_flush(&db_sts.hash, 0, sizeof(HASH_ENTRY)); break; case DB_FOUND_CHAIN: /* We found a hash collision, a chain of 1 or more * records with the same hash value. * Get a free slot, link it to the end of the chain, * and point it to the record. * The buffer containing the free slot is marked * dirty when it is removed from the free list. */ if (!get_free_hash(emsg, db_sts.hash.s.haddr)) return 0; db_set_flush(&db_sts.hash, 0, sizeof(HASH_ENTRY)); DB_HADDR_CP(db_sts.free.d.h->bak, db_sts.hash.s.haddr); DB_HADDR_CP(db_sts.hash.d.h->fwd, db_sts.free.s.haddr); DB_HPTR_CP(db_sts.free.d.h->rcd, db_sts.rcd.s.rptr); HE_MERGE(db_sts.free.d.h,rcd_type, rcd_ck->sum); break; case DB_FOUND_INTRUDER: /* The home hash slot for our key contains an * intruder. Find a place to put it. */ haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd); if (haddr == DB_HADDR_NULL) haddr = DB_HADDR_EX(db_sts.hash.d.h->bak); if (!get_free_hash(emsg, haddr)) return 0; /* Move the intruder */ *db_sts.free.d.h = *db_sts.hash.d.h; /* re-link the neighbors of the intruder */ haddr = DB_HADDR_EX(db_sts.free.d.h->bak); if (haddr == DB_HADDR_NULL) { dcc_pemsg(EX_DATAERR, emsg, "bad hash chain reverse link at %#x" " in %s", haddr, db_hash_nm); return 0; } if (!map_hash(emsg, haddr, &db_sts.tmp)) return 0; DB_HADDR_CP(db_sts.tmp.d.h->fwd, db_sts.free.s.haddr); db_set_flush(&db_sts.tmp, 0, sizeof(HASH_ENTRY)); haddr = DB_HADDR_EX(db_sts.hash.d.h->fwd); if (haddr != DB_HADDR_NULL) { if (!map_hash(emsg, haddr, &db_sts.tmp)) return 0; DB_HADDR_CP(db_sts.tmp.d.h->bak, db_sts.free.s.haddr); db_set_flush(&db_sts.tmp, 0,sizeof(HASH_ENTRY)); } /* install the new entry in its home slot */ DB_HADDR_CP(db_sts.hash.d.h->fwd, DB_HADDR_NULL); DB_HADDR_CP(db_sts.hash.d.h->bak, DB_HADDR_NULL); DB_HPTR_CP(db_sts.hash.d.h->rcd, db_sts.rcd.s.rptr); HE_MERGE(db_sts.hash.d.h,rcd_type, rcd_ck->sum); db_set_flush(&db_sts.hash, 0, sizeof(HASH_ENTRY)); break; } /* Fix the checksum in the report */ DB_TGTS_CK_SET(rcd_ck, res); } return db_set_sizes(emsg); } /* Add a record to the database and the hash table * The record must be known to be valid * Use db_sts.rcd, db_sts.hash, db_sts.rcd2, db_sts.free, db_sts.tmp * On exit db_sts.rcd points to the new record in the database */ DB_PTR /* 0=failed */ db_add_rcd(DCC_EMSG emsg, DB_RCD *new_rcd) { u_int new_rcd_len, pad_len; DB_PTR new_db_csize, new_db_fsize, rcd_pos, new_page_num; if (!db_make_dirty(emsg)) return 0; new_rcd_len = (sizeof(*new_rcd) - sizeof(new_rcd->cks) + (DB_NUM_CKS(new_rcd) * sizeof(new_rcd->cks[0]))); rcd_pos = db_csize; new_db_csize = rcd_pos+new_rcd_len; new_page_num = new_db_csize/db_pagesize; /* Advance rcd_pos with zero filler reports to get past * a page boundary. Then extend the file by a full page. */ if (new_page_num != db_csize/db_pagesize) { pad_len = new_page_num*db_pagesize - db_csize; pad_len = (((pad_len + DB_RCD_HDR_LEN-1) / DB_RCD_HDR_LEN) * DB_RCD_HDR_LEN); rcd_pos = db_csize + pad_len; new_db_fsize = (new_page_num+1)*db_pagesize; db_extended = 1; if (!db_extend(emsg, db_fd, db_nm, new_db_fsize, db_fsize, db_pagesize/8)) return 0; db_fsize = new_db_fsize; db_csize = rcd_pos; new_db_csize = rcd_pos + new_rcd_len; } /* install the record */ if (!map_db(emsg, rcd_pos, new_rcd_len, &db_sts.rcd)) return 0; memcpy(db_sts.rcd.d.r, new_rcd, new_rcd_len); /* Mark its buffer to be sent to the disk to keep the database * as good as possible even if we crash. We don't need to worry * about later changes to the hash links because dbclean will * rebuild them if we crash */ db_set_flush(&db_sts.rcd, 1, new_rcd_len); db_csize = new_db_csize; /* install pointers in the hash table * and update the total counts in the record */ if (!db_link_rcd(emsg, 0, MAX_HASH_ENTRIES)) return 0; ++db_stats.adds; return rcd_pos; }