/* * Copyright 1998-2000 Ben Smithurst * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * remove duplicates from a mailbox (uses Message-ID: only) * input: STDIN, output: STDOUT */ static const char rcsid[] = "$BCPS: src/mailutils/de-dupe.c,v 1.52 2003/01/19 19:18:25 ben Exp $"; #include "misc.h" #define REALLOC(var, svar, add) do { \ svar += (add); \ if ((var = realloc(var, svar * sizeof *(var))) == NULL) \ err(1, "realloc"); \ } while (0) typedef struct _idtree { int left, right; int id; } IDTREE; typedef struct { char *buf; int len, size; } MESSAGE; IDTREE *msgid_alloc(int); int de_dupe(FILE *, FILE *); int de_dupe_maildir(int, char *, FILE *); int id_done(char *, int); static void parse_line(char *, int *, int *); void add_line(char *); void print_message(FILE *); void reset_message(void); void usage(void); IDTREE *msgids = NULL; MESSAGE message = { NULL, 0, 0 }; char *idtext; int pflag = 0; size_t i_used = 0, i_alloc = 0, t_used = 0, t_alloc = 0; static char msgid[1024]; int main(int argc, char *argv[]) { int ch; while ((ch = getopt(argc, argv, "p")) != -1) switch (ch) { case 'p': pflag = 1; break; default: usage(); } argv += optind; argc -= optind; signal(SIGTERM, signal_handler); signal(SIGINT, signal_handler); return (process(argv, de_dupe, de_dupe_maildir) ? 0 : 1); } void tree_free(void) { size_t i; i_used = t_used = 0; for (i = 0; i < i_alloc; i++) msgids[i].left = msgids[i].right = msgids[i].id = -1; } int de_dupe_maildir(int reset, char *file, FILE *in_fp) { static int header, done; char *line; size_t len; int retval = RET_NOCHANGE; if (reset & !pflag) tree_free(); header = 1; done = 0; msgid[0] = '\0'; while ((line = gl_getline(in_fp)) != NULL) { len = strlen(line); if (sig_count > 0) { warnx("de_dupe: signal received, returning"); return (RET_LOCALERROR); } chop(line, len); parse_line(line, &header, &done); if (done) { retval = RET_DELETE; break; } } gl_destroy(in_fp); return (retval); } int de_dupe(FILE *in_fp, FILE *out_fp) { char *line; int header, done; int retval; size_t len; /* initialize all variables */ header = done = 0; msgid[0] = '\0'; retval = RET_NOCHANGE; /* reset the message ID tree, unless -p given */ if (!pflag) tree_free(); while ((line = gl_getline(in_fp)) != NULL) { len = strlen(line); if (sig_count > 0) { warnx("de_dupe: signal received, returning"); return (RET_LOCALERROR); } /* Terminate the string */ len--; if (line[len] != '\n') { warnx("de_dupe: no trailing \\n"); return (RET_LOCALERROR); } line[len] = '\0'; /* reset things at start of new message */ if (is_from(line, 1)) { if (done) retval = RET_CHANGE; else print_message(out_fp); reset_message(); msgid[0] = '\0'; done = 0; header = 1; add_line(line); continue; } /* * check early for messages already done; we can skip * the rest. */ if (done) continue; parse_line(line, &header, &done); add_line(line); } gl_destroy(in_fp); if (done) retval = RET_CHANGE; else print_message(out_fp); reset_message(); return (retval); } /* Parses current line, looking for message id */ static void parse_line(char *line, int *header, int *done) { char *p; if (*line == '\0') { *header = 0; return; } if (!*header || msgid[0] != '\0' || strncasecmp(line, "message-id:", 11) != 0) return; p = line + 11; while (isspace(*p)) p++; if (*p == '<') { char *end; int len; p++; end = strchr(p, '>'); if (end != NULL) { *end = '\0'; len = end - p; } else { warnx("no trailing '>' in " "message-id <%s", p); len = strlen(p); } /* * Don't waste time with strncpy() -- it * zero fills the string. */ if (len >= sizeof msgid) { warnx("message-id <%s> " "too long, truncated", p); p[sizeof msgid - 1] = '\0'; } strcpy(msgid, p); if (end != NULL) *end = '>'; } *done = id_done(msgid, 0); } /* * Add a line to the current message. */ void add_line(char *p) { size_t len; len = strlen(p); if (message.len + len >= message.size) REALLOC(message.buf, message.size, len + 10240); if (len > 0) memcpy(&message.buf[message.len], p, len); message.buf[message.len + len] = '\n'; message.len += len + 1; } /* Print out the current message */ void print_message(FILE *fp) { if (message.len > 0) fwrite(message.buf, 1, message.len, fp); } /* * clear all nodes of a message list. * Don't free(), since they will be reused for the next message. */ void reset_message(void) { message.len = 0; } /* * Mark an id as done, and return 0/1 for whether it was already done * before the change. */ int id_done(char *id, int offset) { int diff; int len; IDTREE *new, *start; if (id[0] == '\0') return (0); len = strlen(id); /* See if the top node doesn't exist */ if (msgids == NULL || msgids->id == -1) { new = msgid_alloc(len); strcpy(&idtext[new->id], id); return (0); } start = &msgids[offset]; assert(start->left != offset && start->right != offset); diff = strcmp(id, &idtext[start->id]); if (diff == 0) return (1); /* see if there's a node on the right side */ if (diff < 0) { if (start->left >= 0) return (id_done(id, start->left)); } else { if (start->right >= 0) return (id_done(id, start->right)); } /* create the new node */ new = msgid_alloc(len); strcpy(&idtext[new->id], id); /* attach to the right side */ start = &msgids[offset]; assert(new - msgids != offset); if (diff < 0) start->left = new - msgids; else start->right = new - msgids; return (0); } /* Allocate a new IDTREE node with the specified length. */ IDTREE * msgid_alloc(int min) { IDTREE *new; if (i_used >= i_alloc) REALLOC(msgids, i_alloc, 128); if (t_used + min >= t_alloc) REALLOC(idtext, t_alloc, min + 10240); new = &msgids[i_used]; new->left = new->right = -1; new->id = t_used; i_used++; t_used += min + 1; return (new); } void usage(void) { fprintf(stderr, "usage: de-dupe [-p] [mailbox ...]\n"); exit(EX_USAGE); }