/*
* mf.c -- mail filter subroutines
*
* $Id: mf.c,v 1.3 2005/11/13 22:36:07 pm215 Exp $
*
* This code is Copyright (c) 2002, by the authors of nmh. See the
* COPYRIGHT file in the root directory of the nmh distribution for
* complete copyright information.
*/
#include <h/mf.h>
#include <ctype.h>
#include <stdio.h>
/*
* static prototypes
*/
static char *getcpy (char *);
static char *add (char *, char *);
static void compress (char *, char *);
static int isat (char *);
static int parse_address (void);
static int phrase (char *);
static int route_addr (char *);
static int local_part (char *);
static int domain (char *);
static int route (char *);
static int my_lex (char *);
static char *
getcpy (char *s)
{
register char *p;
if (!s) {
/* causes compiles to blow up because the symbol _cleanup is undefined
where did this ever come from? */
/* _cleanup(); */
abort();
for(;;)
pause();
}
if ((p = malloc ((size_t) (strlen (s) + 2))))
strcpy (p, s);
return p;
}
static char *
add (char *s1, char *s2)
{
register char *p;
if (!s2)
return getcpy (s1);
if ((p = malloc ((size_t) (strlen (s1) + strlen (s2) + 2))))
sprintf (p, "%s%s", s2, s1);
free (s2);
return p;
}
int
isfrom(char *string)
{
return (strncmp (string, "From ", 5) == 0
|| strncmp (string, ">From ", 6) == 0);
}
int
lequal (char *a, char *b)
{
for (; *a; a++, b++)
if (*b == 0)
return FALSE;
else {
char c1 = islower (*a) ? toupper (*a) : *a;
char c2 = islower (*b) ? toupper (*b) : *b;
if (c1 != c2)
return FALSE;
}
return (*b == 0);
}
/*
* seekadrx() is tricky. We want to cover both UUCP-style and ARPA-style
* addresses, so for each list of addresses we see if we can find some
* character to give us a hint.
*/
#define CHKADR 0 /* undertermined address style */
#define UNIXDR 1 /* UNIX-style address */
#define ARPADR 2 /* ARPAnet-style address */
static char *punctuators = ";<>.()[]";
static char *vp = NULL;
static char *tp = NULL;
static struct adrx adrxs1;
struct adrx *
seekadrx (char *addrs)
{
static int state = CHKADR;
register char *cp;
register struct adrx *adrxp;
if (state == CHKADR)
for (state = UNIXDR, cp = addrs; *cp; cp++)
if (strchr(punctuators, *cp)) {
state = ARPADR;
break;
}
switch (state) {
case UNIXDR:
adrxp = uucpadrx (addrs);
break;
case ARPADR:
default:
adrxp = getadrx (addrs);
break;
}
if (adrxp == NULL)
state = CHKADR;
return adrxp;
}
/*
* uucpadrx() implements a partial UUCP-style address parser. It's based
* on the UUCP notion that addresses are separated by spaces or commas.
*/
struct adrx *
uucpadrx (char *addrs)
{
register char *cp, *wp, *xp, *yp, *zp;
register struct adrx *adrxp = &adrxs1;
if (vp == NULL) {
vp = tp = getcpy (addrs);
compress (addrs, vp);
}
else
if (tp == NULL) {
free (vp);
vp = NULL;
return NULL;
}
for (cp = tp; isspace (*cp); cp++)
continue;
if (*cp == 0) {
free (vp);
vp = tp = NULL;
return NULL;
}
if ((wp = strchr(cp, ',')) == NULL) {
if ((wp = strchr(cp, ' ')) != NULL) {
xp = wp;
while (isspace (*xp))
xp++;
if (*xp != 0 && isat (--xp)) {
yp = xp + 4;
while (isspace (*yp))
yp++;
if (*yp != 0) {
if ((zp = strchr(yp, ' ')) != NULL)
*zp = 0, tp = ++zp;
else
tp = NULL;
}
else
*wp = 0, tp = ++wp;
}
else
*wp = 0, tp = ++wp;
}
else
tp = NULL;
}
else
*wp = 0, tp = ++wp;
if (adrxp->text)
free (adrxp->text);
adrxp->text = getcpy (cp);
adrxp->mbox = cp;
adrxp->host = adrxp->path = NULL;
if ((wp = strrchr(cp, '@')) != NULL) {
*wp++ = 0;
adrxp->host = *wp ? wp : NULL;
}
else
for (wp = cp + strlen (cp) - 4; wp >= cp; wp--)
if (isat (wp)) {
*wp++ = 0;
adrxp->host = wp + 3;
}
adrxp->pers = adrxp->grp = adrxp->note = adrxp->err = NULL;
adrxp->ingrp = 0;
return adrxp;
}
static void
compress (char *fp, char *tp)
{
register char c, *cp;
for (c = ' ', cp = tp; (*tp = *fp++) != 0;)
if (isspace (*tp)) {
if (c != ' ')
*tp++ = c = ' ';
}
else
c = *tp++;
if (c == ' ' && cp < tp)
*--tp = 0;
}
static int
isat (char *p)
{
return (strncmp (p, " AT ", 4)
&& strncmp (p, " At ", 4)
&& strncmp (p, " aT ", 4)
&& strncmp (p, " at ", 4) ? FALSE : TRUE);
}
/*
*
* getadrx() implements a partial 822-style address parser. The parser
* is neither complete nor correct. It does however recognize nearly all
* of the 822 address syntax. In addition it handles the majority of the
* 733 syntax as well. Most problems arise from trying to accomodate both.
*
* In terms of 822, the route-specification in
*
* "<" [route] local-part "@" domain ">"
*
* is parsed and returned unchanged. Multiple at-signs are compressed
* via source-routing. Recursive groups are not allowed as per the
* standard.
*
* In terms of 733, " at " is recognized as equivalent to "@".
*
* In terms of both the parser will not complain about missing hosts.
*
* -----
*
* We should not allow addresses like
*
* Marshall T. Rose <MRose@UCI>
*
* but should insist on
*
* "Marshall T. Rose" <MRose@UCI>
*
* Unfortunately, a lot of mailers stupidly let people get away with this.
*
* -----
*
* We should not allow addresses like
*
* <MRose@UCI>
*
* but should insist on
*
* MRose@UCI
*
* Unfortunately, a lot of mailers stupidly let people's UAs get away with
* this.
*
* -----
*
* We should not allow addresses like
*
* @UCI:MRose@UCI-750a
*
* but should insist on
*
* Marshall Rose <@UCI:MRose@UCI-750a>
*
* Unfortunately, a lot of mailers stupidly do this.
*
*/
#define QUOTE '\\'
#define LX_END 0
#define LX_ERR 1
#define LX_ATOM 2
#define LX_QSTR 3
#define LX_DLIT 4
#define LX_SEMI 5
#define LX_COMA 6
#define LX_LBRK 7
#define LX_RBRK 8
#define LX_COLN 9
#define LX_DOT 10
#define LX_AT 11
struct specials {
char lx_chr;
int lx_val;
};
static struct specials special[] = {
{ ';', LX_SEMI },
{ ',', LX_COMA },
{ '<', LX_LBRK },
{ '>', LX_RBRK },
{ ':', LX_COLN },
{ '.', LX_DOT },
{ '@', LX_AT },
{ '(', LX_ERR },
{ ')', LX_ERR },
{ QUOTE, LX_ERR },
{ '"', LX_ERR },
{ '[', LX_ERR },
{ ']', LX_ERR },
{ 0, 0 }
};
static int glevel = 0;
static int ingrp = 0;
static int last_lex = LX_END;
static char *dp = NULL;
static char *cp = NULL;
static char *ap = NULL;
static char *pers = NULL;
static char *mbox = NULL;
static char *host = NULL;
static char *path = NULL;
static char *grp = NULL;
static char *note = NULL;
static char err[BUFSIZ];
static char adr[BUFSIZ];
static struct adrx adrxs2;
struct adrx *
getadrx (char *addrs)
{
register char *bp;
register struct adrx *adrxp = &adrxs2;
if (pers)
free (pers);
if (mbox)
free (mbox);
if (host)
free (host);
if (path)
free (path);
if (grp)
free (grp);
if (note)
free (note);
pers = mbox = host = path = grp = note = NULL;
err[0] = 0;
if (dp == NULL) {
dp = cp = getcpy (addrs ? addrs : "");
glevel = 0;
}
else
if (cp == NULL) {
free (dp);
dp = NULL;
return NULL;
}
switch (parse_address ()) {
case DONE:
free (dp);
dp = cp = NULL;
return NULL;
case OK:
switch (last_lex) {
case LX_COMA:
case LX_END:
break;
default: /* catch trailing comments */
bp = cp;
my_lex (adr);
cp = bp;
break;
}
break;
default:
break;
}
if (err[0])
for (;;) {
switch (last_lex) {
case LX_COMA:
case LX_END:
break;
default:
my_lex (adr);
continue;
}
break;
}
while (isspace (*ap))
ap++;
if (cp)
sprintf (adr, "%.*s", cp - ap, ap);
else
strcpy (adr, ap);
bp = adr + strlen (adr) - 1;
if (*bp == ',' || *bp == ';' || *bp == '\n')
*bp = 0;
adrxp->text = adr;
adrxp->pers = pers;
adrxp->mbox = mbox;
adrxp->host = host;
adrxp->path = path;
adrxp->grp = grp;
adrxp->ingrp = ingrp;
adrxp->note = note;
adrxp->err = err[0] ? err : NULL;
return adrxp;
}
static int
parse_address (void)
{
char buffer[BUFSIZ];
again: ;
ap = cp;
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = getcpy (buffer);
break;
case LX_SEMI:
if (glevel-- <= 0) {
strcpy (err, "extraneous semi-colon");
return NOTOK;
}
case LX_COMA:
if (note) {
free (note);
note = NULL;
}
goto again;
case LX_END:
return DONE;
case LX_LBRK: /* sigh (2) */
goto get_addr;
case LX_AT: /* sigh (3) */
cp = ap;
if (route_addr (buffer) == NOTOK)
return NOTOK;
return OK; /* why be choosy? */
default:
sprintf (err, "illegal address construct (%s)", buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = add (buffer, add (" ", pers));
more_phrase: ; /* sigh (1) */
if (phrase (buffer) == NOTOK)
return NOTOK;
switch (last_lex) {
case LX_LBRK:
get_addr: ;
if (route_addr (buffer) == NOTOK)
return NOTOK;
if (last_lex == LX_RBRK)
return OK;
sprintf (err, "missing right-bracket (%s)", buffer);
return NOTOK;
case LX_COLN:
get_group: ;
if (glevel++ > 0) {
sprintf (err, "nested groups not allowed (%s)", pers);
return NOTOK;
}
grp = add (": ", pers);
pers = NULL;
{
char *pp = cp;
for (;;)
switch (my_lex (buffer)) {
case LX_SEMI:
case LX_END: /* tsk, tsk */
glevel--;
return OK;
case LX_COMA:
continue;
default:
cp = pp;
return parse_address ();
}
}
case LX_DOT: /* sigh (1) */
pers = add (".", pers);
goto more_phrase;
default:
sprintf (err, "no mailbox in address, only a phrase (%s%s)",
pers, buffer);
return NOTOK;
}
case LX_LBRK:
goto get_addr;
case LX_COLN:
goto get_group;
case LX_DOT:
mbox = add (buffer, pers);
pers = NULL;
if (route_addr (buffer) == NOTOK)
return NOTOK;
goto check_end;
case LX_AT:
ingrp = glevel;
mbox = pers;
pers = NULL;
if (domain (buffer) == NOTOK)
return NOTOK;
check_end: ;
switch (last_lex) {
case LX_SEMI:
if (glevel-- <= 0) {
strcpy (err, "extraneous semi-colon");
return NOTOK;
}
case LX_COMA:
case LX_END:
return OK;
default:
sprintf (err, "junk after local@domain (%s)", buffer);
return NOTOK;
}
case LX_SEMI: /* no host */
case LX_COMA:
case LX_END:
ingrp = glevel;
if (last_lex == LX_SEMI && glevel-- <= 0) {
strcpy (err, "extraneous semi-colon");
return NOTOK;
}
mbox = pers;
pers = NULL;
return OK;
default:
sprintf (err, "missing mailbox (%s)", buffer);
return NOTOK;
}
}
static int
phrase (char *buffer)
{
for (;;)
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
pers = add (buffer, add (" ", pers));
continue;
default:
return OK;
}
}
static int
route_addr (char *buffer)
{
register char *pp = cp;
if (my_lex (buffer) == LX_AT) {
if (route (buffer) == NOTOK)
return NOTOK;
}
else
cp = pp;
if (local_part (buffer) == NOTOK)
return NOTOK;
switch (last_lex) {
case LX_AT:
return domain (buffer);
case LX_SEMI: /* if in group */
case LX_RBRK: /* no host */
case LX_COMA:
case LX_END:
return OK;
default:
sprintf (err, "no at-sign after local-part (%s)", buffer);
return NOTOK;
}
}
static int
local_part (char *buffer)
{
ingrp = glevel;
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_QSTR:
mbox = add (buffer, mbox);
break;
default:
sprintf (err, "no mailbox in local-part (%s)", buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_DOT:
mbox = add (buffer, mbox);
continue;
default:
return OK;
}
}
}
static int
domain (char *buffer)
{
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_DLIT:
host = add (buffer, host);
break;
default:
sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_DOT:
host = add (buffer, host);
continue;
case LX_AT: /* sigh (0) */
mbox = add (host, add ("%", mbox));
free (host);
host = NULL;
continue;
default:
return OK;
}
}
}
static int
route (char *buffer)
{
path = getcpy ("@");
for (;;) {
switch (my_lex (buffer)) {
case LX_ATOM:
case LX_DLIT:
path = add (buffer, path);
break;
default:
sprintf (err, "no sub-domain in domain-part of address (%s)", buffer);
return NOTOK;
}
switch (my_lex (buffer)) {
case LX_COMA:
path = add (buffer, path);
for (;;) {
switch (my_lex (buffer)) {
case LX_COMA:
continue;
case LX_AT:
path = add (buffer, path);
break;
default:
sprintf (err, "no at-sign found for next domain in route (%s)",
buffer);
}
break;
}
continue;
case LX_AT: /* XXX */
case LX_DOT:
path = add (buffer, path);
continue;
case LX_COLN:
path = add (buffer, path);
return OK;
default:
sprintf (err, "no colon found to terminate route (%s)", buffer);
return NOTOK;
}
}
}
static int
my_lex (char *buffer)
{
/* buffer should be at least BUFSIZ bytes long */
int i, gotat = 0;
register char c, *bp;
/* Add C to the buffer bp. After use of this macro *bp is guaranteed to be within the buffer. */
#define ADDCHR(C) do { *bp++ = (C); if ((bp - buffer) == (BUFSIZ-1)) goto my_lex_buffull; } while (0)
bp = buffer;
*bp = 0;
if (!cp)
return (last_lex = LX_END);
gotat = isat (cp);
c = *cp++;
while (isspace (c))
c = *cp++;
if (c == 0) {
cp = NULL;
return (last_lex = LX_END);
}
if (c == '(') {
ADDCHR(c);
for (i = 0;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
ADDCHR(c);
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
ADDCHR(c);
continue;
case '(':
i++;
default:
ADDCHR(c);
continue;
case ')':
ADDCHR(c);
if (--i < 0) {
*bp = 0;
note = note ? add (buffer, add (" ", note))
: getcpy (buffer);
return my_lex (buffer);
}
}
}
if (c == '"') {
ADDCHR(c);
for (;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
ADDCHR(c);
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
default:
ADDCHR(c);
continue;
case '"':
ADDCHR(c);
*bp = 0;
return (last_lex = LX_QSTR);
}
}
if (c == '[') {
ADDCHR(c);
for (;;)
switch (c = *cp++) {
case 0:
cp = NULL;
return (last_lex = LX_ERR);
case QUOTE:
ADDCHR(c);
if ((c = *cp++) == 0) {
cp = NULL;
return (last_lex = LX_ERR);
}
default:
ADDCHR(c);
continue;
case ']':
ADDCHR(c);
*bp = 0;
return (last_lex = LX_DLIT);
}
}
ADDCHR(c);
*bp = 0;
for (i = 0; special[i].lx_chr != 0; i++)
if (c == special[i].lx_chr)
return (last_lex = special[i].lx_val);
if (iscntrl (c))
return (last_lex = LX_ERR);
for (;;) {
if ((c = *cp++) == 0)
break;
for (i = 0; special[i].lx_chr != 0; i++)
if (c == special[i].lx_chr)
goto got_atom;
if (iscntrl (c) || isspace (c))
break;
ADDCHR(c);
}
got_atom: ;
if (c == 0)
cp = NULL;
else
cp--;
*bp = 0;
last_lex = !gotat || cp == NULL || strchr(cp, '<') != NULL
? LX_ATOM : LX_AT;
return last_lex;
my_lex_buffull:
/* Out of buffer space. *bp is the last byte in the buffer */
*bp = 0;
return (last_lex = LX_ERR);
}
char *
legal_person (char *p)
{
int i;
register char *cp;
static char buffer[BUFSIZ];
if (*p == '"')
return p;
for (cp = p; *cp; cp++)
for (i = 0; special[i].lx_chr; i++)
if (*cp == special[i].lx_chr) {
sprintf (buffer, "\"%s\"", p);
return buffer;
}
return p;
}
int
mfgets (FILE *in, char **bp)
{
int i;
register char *cp, *dp, *ep;
static int len = 0;
static char *pp = NULL;
if (pp == NULL)
if (!(pp = malloc ((size_t) (len = BUFSIZ))))
return NOTOK;
for (ep = (cp = pp) + len - 2;;) {
switch (i = getc (in)) {
case EOF:
eol: ;
if (cp != pp) {
*cp = 0;
*bp = pp;
return OK;
}
eoh: ;
*bp = NULL;
free (pp);
pp = NULL;
return DONE;
case 0:
continue;
case '\n':
if (cp == pp) /* end of headers, gobble it */
goto eoh;
switch (i = getc (in)) {
default: /* end of line */
case '\n': /* end of headers, save for next call */
ungetc (i, in);
goto eol;
case ' ': /* continue headers */
case '\t':
*cp++ = '\n';
break;
} /* fall into default case */
default:
*cp++ = i;
break;
}
if (cp >= ep) {
if (!(dp = realloc (pp, (size_t) (len += BUFSIZ)))) {
free (pp);
pp = NULL;
return NOTOK;
}
else
cp += dp - pp, ep = (pp = cp) + len - 2;
}
}
}
syntax highlighted by Code2HTML, v. 0.9.1