/* $Id: addrparse.C,v 1.8 2006/02/04 01:10:33 dm Exp $ */ /* * * Copyright (C) 2004 David Mazieres (dm@uun.org) * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as * published by the Free Software Foundation; either version 2, or (at * your option) any later version. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 * USA * */ /* * RFC 821 address parser. The grammar in RFC 821 unfortunately * doesn't match reality--it permits things that are never used, and * prohibits commonly used email addresses (e.g., anything in which a * DNS label has fewer than three characters). We try to be as strict * as possible while remaining reasonable. */ #include "asmtpd.h" static int gobble_0_255 (const char **dpp) { const char *dp = *dpp; int i, n; if (!isdigit (*dp)) return -1; i = n = 0; while (isdigit (*dp) && i++ < 3) n = 10 * n + *dp++ - '0'; if (n > 255) return -1; *dpp = dp; return 0; } static inline int gobble_dquad (const char **dpp) { const char *dp = *dpp; if (*dp++ != '[' || gobble_0_255 (&dp) || *dp++ != '.' || gobble_0_255 (&dp) || *dp++ != '.' || gobble_0_255 (&dp) || *dp++ != '.' || gobble_0_255 (&dp) || *dp++ != ']') return -1; *dpp = dp; return 0; } static int gobble_name (const char **dpp, bool uok) { /* I'm cheating a bit here. RFC821 requires at least 3 characters * in a name, whereas the real world often only uses one or two. We * still insist that the first character be a letter, and that the * last not be a hyphen, though. I'm also cheating by bumping "#" * and dotnums back up to gobble_domain (see the comment there). */ const char *dp = *dpp; /* if (!isalpha (*dp)) return -1; */ /* Actually, violate 821 some more to allow 3com.com, etc. */ if (!isalnum (*dp) && (!uok || *dp != '_')) return -1; dp++; while (isalnum (*dp) || *dp == '-' || (uok && *dp == '_')) dp++; while (dp[-1] == '-') dp--; *dpp = dp; return 0; } static inline int gobble_dname (const char **dpp, bool uok) { const char *dp = *dpp; if (gobble_name (&dp, uok) /* || *dp++ != '.' || gobble_name (&dp) */) return -1; while (*dp++ == '.') { if (gobble_name (&dp, uok)) break; } *dpp = dp - 1; return 0; } static int gobble_domain (const char **dpp, bool uok = false) { /* I'm cheating a bit here, but only because RFC821 doesn't make * sense. I mean, do you really want to consider something like * "harvard.edu.[18.26.0.1].#34" a valid domain name? How the hell * would you route mail to an address like that anyway? So we'll * take either one bracketed doted quad or else a syntactically * valid internet domain name. */ if (**dpp == '[') return gobble_dquad (dpp); else return gobble_dname (dpp, uok); } static int gobble_atdomain (const char **dpp) { const char *dp = *dpp; if (*dp++ != '@' || gobble_domain (&dp)) return -1; *dpp = dp; return 0; } static int gobble_adl (const char **dpp) { const char *dp = *dpp; do { if (gobble_atdomain (&dp)) return -1; } while (*dp++ == ','); if (dp[-1] != ':') return -1; *dpp = dp; return 0; } static inline int myisspecial (char c) { static const char specials[0x80] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, }; if ((unsigned char) c >= 128) return 1; return specials[(int) c]; } static inline int gobble_char (const char **dpp) { /* More cheating. You should be allowed to have any quoted * character in the local part of an address, but we disallow '\0' * as well as '\n' (since we are reading input one line at a time). * I don't feel too bad because sendmail seems to do the same thing, * and anyone who puts '\0' or a newline in an email address is * looking for trouble. */ const char *dp = *dpp; if (*dp == '\\') { if (dp[1] == '\0' && (unsigned char) dp[1] > 127) return -1; *dpp = dp + 2; return 0; } if (myisspecial (*dp)) return -1; *dpp = dp + 1; return 0; } static int gobble_string (const char **dpp) { const char *dp = *dpp; if (gobble_char (&dp)) return -1; while (!gobble_char (&dp)) ; *dpp = dp; return 0; } static int gobble_dot_string (const char **dpp) { const char *dp = *dpp; do { if (gobble_string (&dp)) return -1; } while (*dp++ == '.'); *dpp = dp - 1; return 0; } static inline int gobble_qqq (const char **dpp) { /* Again, we cheat on '\0' and '\n'. */ const char *dp = *dpp; if (*dp == '\\') { if ((unsigned char) dp[1] > 127 || dp[1] == '\0' || dp[1] == '\n') return -1; *dpp = dp + 2; return 0; } if (*dp == '\0' || *dp == '\r' || *dp == '\n' || *dp == '"') return -1; *dpp = dp + 1; return 0; } static int gobble_quoted_string (const char **dpp) { const char *dp = *dpp; if (*dp++ != '"') return -1; if (gobble_qqq (&dp)) return -1; while (!gobble_qqq (&dp)) ; if (*dp++ != '"') return -1; *dpp = dp; return 0; } static int gobble_local_part (const char **dpp) { if (**dpp == '"') return gobble_quoted_string (dpp); else return gobble_dot_string (dpp); } str extract_addr (const char **dpp, const char *prefix) { const char *s; const char *dp = *dpp; int pl = strlen (prefix); int rl; if (strncasecmp (dp, prefix, pl)) return NULL; dp += pl; while (*dp && *dp == ' ') dp++; if (*dp != '<') return NULL; s = ++dp; if (*dp == '>') return ""; if (*dp == '@' && gobble_adl (&dp)) return NULL; if (gobble_local_part (&dp)) return NULL; if (*dp++ != '@' || gobble_domain (&dp)) return NULL; if ((rl = dp - s) > MAX_ADDR_LEN) return NULL; if (*dp++ != '>') return NULL; while (*dp == ' ') dp++; *dpp = dp; return str (s, rl); } str extract_addr (const str &in, const char *prefix) { const char *dp = in; return extract_addr (&dp, prefix); } str extract_relay (const char *addr) { const char *dp = addr; const char *s; int i, rl; if (*dp == '@') { if (gobble_atdomain (&dp) || (*dp != ',' && *dp != ':')) return NULL; s = addr + 1; } else { if (gobble_local_part (&dp) || *dp++ != '@') return NULL; s = dp; if (gobble_domain (&dp) || *dp) return NULL; } rl = dp - s; mstr r (rl); for (i = 0; i < rl; i++) r[i] = tolower (s[i]); str res (r); return res; } bool validate_domain (const char *addr, bool uok) { const char *dp = addr; return !gobble_domain (&dp, uok) && !*dp; } str extract_domain (const char *addr) { const char *dp = addr; if (!gobble_local_part (&dp) && *dp++ == '@') return dp; dp = addr; if (gobble_adl (&dp)) return NULL; if (!gobble_local_part (&dp) && *dp++ == '@') return dp; return NULL; } str domain_tolower (const char *addr) { mstr m (strlen (addr)); strcpy (m, addr); const char *dp = implicit_cast (m); if (!gobble_local_part (&dp) && *dp == '@') { for (char *cp = const_cast (dp); *cp; cp++) *cp = tolower (*cp); return m; } dp = implicit_cast (m); if (gobble_adl (&dp)) return NULL; for (char *cp = m; cp < dp; cp++) *cp = tolower (*cp); if (gobble_local_part (&dp)) return NULL; if (!*dp) return m; if (*dp++ != '@') return NULL; for (char *cp = const_cast (dp); *cp; cp++) *cp = tolower (*cp); return m; } str extract_local (const char *addr) { const char *dp = addr; if (gobble_local_part (&dp) || *dp != '@') return NULL; const char *dp2 = dp + 1; if (gobble_domain (&dp2) || *dp2) return NULL; return str (addr, dp - addr); } bool validate_local (str addr) { const char *dp = addr; if (gobble_local_part (&dp) || *dp) return false; if (addr.cstr () + addr.len () != dp) return false; return true; }