/*
* Copyright 1998-2000 Ben Smithurst <ben@smithurst.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* remove duplicates from a mailbox (uses Message-ID: only)
* input: STDIN, output: STDOUT
*/
static const char rcsid[] =
"$BCPS: src/mailutils/de-dupe.c,v 1.52 2003/01/19 19:18:25 ben Exp $";
#include "misc.h"
#define REALLOC(var, svar, add) do { \
svar += (add); \
if ((var = realloc(var, svar * sizeof *(var))) == NULL) \
err(1, "realloc"); \
} while (0)
typedef struct _idtree {
int left, right;
int id;
} IDTREE;
typedef struct {
char *buf;
int len, size;
} MESSAGE;
IDTREE *msgid_alloc(int);
int de_dupe(FILE *, FILE *);
int de_dupe_maildir(int, char *, FILE *);
int id_done(char *, int);
static void parse_line(char *, int *, int *);
void add_line(char *);
void print_message(FILE *);
void reset_message(void);
void usage(void);
IDTREE *msgids = NULL;
MESSAGE message = { NULL, 0, 0 };
char *idtext;
int pflag = 0;
size_t i_used = 0, i_alloc = 0, t_used = 0, t_alloc = 0;
static char msgid[1024];
int
main(int argc, char *argv[]) {
int ch;
while ((ch = getopt(argc, argv, "p")) != -1)
switch (ch) {
case 'p':
pflag = 1;
break;
default:
usage();
}
argv += optind;
argc -= optind;
signal(SIGTERM, signal_handler);
signal(SIGINT, signal_handler);
return (process(argv, de_dupe, de_dupe_maildir) ? 0 : 1);
}
void
tree_free(void) {
size_t i;
i_used = t_used = 0;
for (i = 0; i < i_alloc; i++)
msgids[i].left = msgids[i].right = msgids[i].id = -1;
}
int
de_dupe_maildir(int reset, char *file, FILE *in_fp) {
static int header, done;
char *line;
size_t len;
int retval = RET_NOCHANGE;
if (reset & !pflag)
tree_free();
header = 1;
done = 0;
msgid[0] = '\0';
while ((line = gl_getline(in_fp)) != NULL) {
len = strlen(line);
if (sig_count > 0) {
warnx("de_dupe: signal received, returning");
return (RET_LOCALERROR);
}
chop(line, len);
parse_line(line, &header, &done);
if (done) {
retval = RET_DELETE;
break;
}
}
gl_destroy(in_fp);
return (retval);
}
int
de_dupe(FILE *in_fp, FILE *out_fp) {
char *line;
int header, done;
int retval;
size_t len;
/* initialize all variables */
header = done = 0;
msgid[0] = '\0';
retval = RET_NOCHANGE;
/* reset the message ID tree, unless -p given */
if (!pflag)
tree_free();
while ((line = gl_getline(in_fp)) != NULL) {
len = strlen(line);
if (sig_count > 0) {
warnx("de_dupe: signal received, returning");
return (RET_LOCALERROR);
}
/* Terminate the string */
len--;
if (line[len] != '\n') {
warnx("de_dupe: no trailing \\n");
return (RET_LOCALERROR);
}
line[len] = '\0';
/* reset things at start of new message */
if (is_from(line, 1)) {
if (done)
retval = RET_CHANGE;
else
print_message(out_fp);
reset_message();
msgid[0] = '\0';
done = 0;
header = 1;
add_line(line);
continue;
}
/*
* check early for messages already done; we can skip
* the rest.
*/
if (done)
continue;
parse_line(line, &header, &done);
add_line(line);
}
gl_destroy(in_fp);
if (done)
retval = RET_CHANGE;
else
print_message(out_fp);
reset_message();
return (retval);
}
/* Parses current line, looking for message id */
static void
parse_line(char *line, int *header, int *done) {
char *p;
if (*line == '\0') {
*header = 0;
return;
}
if (!*header || msgid[0] != '\0' || strncasecmp(line, "message-id:", 11) != 0)
return;
p = line + 11;
while (isspace(*p)) p++;
if (*p == '<') {
char *end;
int len;
p++;
end = strchr(p, '>');
if (end != NULL) {
*end = '\0';
len = end - p;
} else {
warnx("no trailing '>' in "
"message-id <%s", p);
len = strlen(p);
}
/*
* Don't waste time with strncpy() -- it
* zero fills the string.
*/
if (len >= sizeof msgid) {
warnx("message-id <%s> "
"too long, truncated", p);
p[sizeof msgid - 1] = '\0';
}
strcpy(msgid, p);
if (end != NULL)
*end = '>';
}
*done = id_done(msgid, 0);
}
/*
* Add a line to the current message.
*/
void
add_line(char *p) {
size_t len;
len = strlen(p);
if (message.len + len >= message.size)
REALLOC(message.buf, message.size, len + 10240);
if (len > 0)
memcpy(&message.buf[message.len], p, len);
message.buf[message.len + len] = '\n';
message.len += len + 1;
}
/* Print out the current message */
void
print_message(FILE *fp) {
if (message.len > 0)
fwrite(message.buf, 1, message.len, fp);
}
/*
* clear all nodes of a message list.
* Don't free(), since they will be reused for the next message.
*/
void
reset_message(void) {
message.len = 0;
}
/*
* Mark an id as done, and return 0/1 for whether it was already done
* before the change.
*/
int
id_done(char *id, int offset) {
int diff;
int len;
IDTREE *new, *start;
if (id[0] == '\0')
return (0);
len = strlen(id);
/* See if the top node doesn't exist */
if (msgids == NULL || msgids->id == -1) {
new = msgid_alloc(len);
strcpy(&idtext[new->id], id);
return (0);
}
start = &msgids[offset];
assert(start->left != offset && start->right != offset);
diff = strcmp(id, &idtext[start->id]);
if (diff == 0)
return (1);
/* see if there's a node on the right side */
if (diff < 0) {
if (start->left >= 0)
return (id_done(id, start->left));
} else {
if (start->right >= 0)
return (id_done(id, start->right));
}
/* create the new node */
new = msgid_alloc(len);
strcpy(&idtext[new->id], id);
/* attach to the right side */
start = &msgids[offset];
assert(new - msgids != offset);
if (diff < 0)
start->left = new - msgids;
else
start->right = new - msgids;
return (0);
}
/* Allocate a new IDTREE node with the specified length. */
IDTREE *
msgid_alloc(int min) {
IDTREE *new;
if (i_used >= i_alloc)
REALLOC(msgids, i_alloc, 128);
if (t_used + min >= t_alloc)
REALLOC(idtext, t_alloc, min + 10240);
new = &msgids[i_used];
new->left = new->right = -1;
new->id = t_used;
i_used++;
t_used += min + 1;
return (new);
}
void
usage(void) {
fprintf(stderr, "usage: de-dupe [-p] [mailbox ...]\n");
exit(EX_USAGE);
}
syntax highlighted by Code2HTML, v. 0.9.1