/* $Id: addrparse.C,v 1.8 2006/02/04 01:10:33 dm Exp $ */
/*
*
* Copyright (C) 2004 David Mazieres (dm@uun.org)
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
* USA
*
*/
/*
* RFC 821 address parser. The grammar in RFC 821 unfortunately
* doesn't match reality--it permits things that are never used, and
* prohibits commonly used email addresses (e.g., anything in which a
* DNS label has fewer than three characters). We try to be as strict
* as possible while remaining reasonable.
*/
#include "asmtpd.h"
static int
gobble_0_255 (const char **dpp)
{
const char *dp = *dpp;
int i, n;
if (!isdigit (*dp))
return -1;
i = n = 0;
while (isdigit (*dp) && i++ < 3)
n = 10 * n + *dp++ - '0';
if (n > 255)
return -1;
*dpp = dp;
return 0;
}
static inline int
gobble_dquad (const char **dpp)
{
const char *dp = *dpp;
if (*dp++ != '['
|| gobble_0_255 (&dp) || *dp++ != '.'
|| gobble_0_255 (&dp) || *dp++ != '.'
|| gobble_0_255 (&dp) || *dp++ != '.'
|| gobble_0_255 (&dp) || *dp++ != ']')
return -1;
*dpp = dp;
return 0;
}
static int
gobble_name (const char **dpp, bool uok)
{
/* I'm cheating a bit here. RFC821 requires at least 3 characters
* in a name, whereas the real world often only uses one or two. We
* still insist that the first character be a letter, and that the
* last not be a hyphen, though. I'm also cheating by bumping "#"
* and dotnums back up to gobble_domain (see the comment there). */
const char *dp = *dpp;
/* if (!isalpha (*dp)) return -1; */
/* Actually, violate 821 some more to allow 3com.com, etc. */
if (!isalnum (*dp) && (!uok || *dp != '_'))
return -1;
dp++;
while (isalnum (*dp) || *dp == '-' || (uok && *dp == '_'))
dp++;
while (dp[-1] == '-')
dp--;
*dpp = dp;
return 0;
}
static inline int
gobble_dname (const char **dpp, bool uok)
{
const char *dp = *dpp;
if (gobble_name (&dp, uok) /* || *dp++ != '.' || gobble_name (&dp) */)
return -1;
while (*dp++ == '.') {
if (gobble_name (&dp, uok))
break;
}
*dpp = dp - 1;
return 0;
}
static int
gobble_domain (const char **dpp, bool uok = false)
{
/* I'm cheating a bit here, but only because RFC821 doesn't make
* sense. I mean, do you really want to consider something like
* "harvard.edu.[18.26.0.1].#34" a valid domain name? How the hell
* would you route mail to an address like that anyway? So we'll
* take either one bracketed doted quad or else a syntactically
* valid internet domain name. */
if (**dpp == '[')
return gobble_dquad (dpp);
else
return gobble_dname (dpp, uok);
}
static int
gobble_atdomain (const char **dpp)
{
const char *dp = *dpp;
if (*dp++ != '@' || gobble_domain (&dp))
return -1;
*dpp = dp;
return 0;
}
static int
gobble_adl (const char **dpp)
{
const char *dp = *dpp;
do {
if (gobble_atdomain (&dp))
return -1;
} while (*dp++ == ',');
if (dp[-1] != ':')
return -1;
*dpp = dp;
return 0;
}
static inline int
myisspecial (char c)
{
static const char specials[0x80] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
};
if ((unsigned char) c >= 128)
return 1;
return specials[(int) c];
}
static inline int
gobble_char (const char **dpp)
{
/* More cheating. You should be allowed to have any quoted
* character in the local part of an address, but we disallow '\0'
* as well as '\n' (since we are reading input one line at a time).
* I don't feel too bad because sendmail seems to do the same thing,
* and anyone who puts '\0' or a newline in an email address is
* looking for trouble. */
const char *dp = *dpp;
if (*dp == '\\') {
if (dp[1] == '\0' && (unsigned char) dp[1] > 127)
return -1;
*dpp = dp + 2;
return 0;
}
if (myisspecial (*dp))
return -1;
*dpp = dp + 1;
return 0;
}
static int
gobble_string (const char **dpp)
{
const char *dp = *dpp;
if (gobble_char (&dp))
return -1;
while (!gobble_char (&dp))
;
*dpp = dp;
return 0;
}
static int
gobble_dot_string (const char **dpp)
{
const char *dp = *dpp;
do {
if (gobble_string (&dp))
return -1;
} while (*dp++ == '.');
*dpp = dp - 1;
return 0;
}
static inline int
gobble_qqq (const char **dpp)
{
/* Again, we cheat on '\0' and '\n'. */
const char *dp = *dpp;
if (*dp == '\\') {
if ((unsigned char) dp[1] > 127 || dp[1] == '\0' || dp[1] == '\n')
return -1;
*dpp = dp + 2;
return 0;
}
if (*dp == '\0' || *dp == '\r' || *dp == '\n' || *dp == '"')
return -1;
*dpp = dp + 1;
return 0;
}
static int
gobble_quoted_string (const char **dpp)
{
const char *dp = *dpp;
if (*dp++ != '"')
return -1;
if (gobble_qqq (&dp))
return -1;
while (!gobble_qqq (&dp))
;
if (*dp++ != '"')
return -1;
*dpp = dp;
return 0;
}
static int
gobble_local_part (const char **dpp)
{
if (**dpp == '"')
return gobble_quoted_string (dpp);
else
return gobble_dot_string (dpp);
}
str
extract_addr (const char **dpp, const char *prefix)
{
const char *s;
const char *dp = *dpp;
int pl = strlen (prefix);
int rl;
if (strncasecmp (dp, prefix, pl))
return NULL;
dp += pl;
while (*dp && *dp == ' ')
dp++;
if (*dp != '<')
return NULL;
s = ++dp;
if (*dp == '>')
return "";
if (*dp == '@' && gobble_adl (&dp))
return NULL;
if (gobble_local_part (&dp))
return NULL;
if (*dp++ != '@' || gobble_domain (&dp))
return NULL;
if ((rl = dp - s) > MAX_ADDR_LEN)
return NULL;
if (*dp++ != '>')
return NULL;
while (*dp == ' ')
dp++;
*dpp = dp;
return str (s, rl);
}
str
extract_addr (const str &in, const char *prefix)
{
const char *dp = in;
return extract_addr (&dp, prefix);
}
str
extract_relay (const char *addr)
{
const char *dp = addr;
const char *s;
int i, rl;
if (*dp == '@') {
if (gobble_atdomain (&dp) || (*dp != ',' && *dp != ':'))
return NULL;
s = addr + 1;
}
else {
if (gobble_local_part (&dp) || *dp++ != '@')
return NULL;
s = dp;
if (gobble_domain (&dp) || *dp)
return NULL;
}
rl = dp - s;
mstr r (rl);
for (i = 0; i < rl; i++)
r[i] = tolower (s[i]);
str res (r);
return res;
}
bool
validate_domain (const char *addr, bool uok)
{
const char *dp = addr;
return !gobble_domain (&dp, uok) && !*dp;
}
str
extract_domain (const char *addr)
{
const char *dp = addr;
if (!gobble_local_part (&dp) && *dp++ == '@')
return dp;
dp = addr;
if (gobble_adl (&dp))
return NULL;
if (!gobble_local_part (&dp) && *dp++ == '@')
return dp;
return NULL;
}
str
domain_tolower (const char *addr)
{
mstr m (strlen (addr));
strcpy (m, addr);
const char *dp = implicit_cast<char *> (m);
if (!gobble_local_part (&dp) && *dp == '@') {
for (char *cp = const_cast<char *> (dp); *cp; cp++)
*cp = tolower (*cp);
return m;
}
dp = implicit_cast<char *> (m);
if (gobble_adl (&dp))
return NULL;
for (char *cp = m; cp < dp; cp++)
*cp = tolower (*cp);
if (gobble_local_part (&dp))
return NULL;
if (!*dp)
return m;
if (*dp++ != '@')
return NULL;
for (char *cp = const_cast<char *> (dp); *cp; cp++)
*cp = tolower (*cp);
return m;
}
str
extract_local (const char *addr)
{
const char *dp = addr;
if (gobble_local_part (&dp) || *dp != '@')
return NULL;
const char *dp2 = dp + 1;
if (gobble_domain (&dp2) || *dp2)
return NULL;
return str (addr, dp - addr);
}
bool
validate_local (str addr)
{
const char *dp = addr;
if (gobble_local_part (&dp) || *dp)
return false;
if (addr.cstr () + addr.len () != dp)
return false;
return true;
}
syntax highlighted by Code2HTML, v. 0.9.1