/*- * Copyright 1997 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: util.c,v 1.12 2003/08/23 01:26:57 lutherj Exp $ */ /* FD_SETSIZE has to be defined here before sys/types.h is brought in * to allow select to work on file descriptors higher than 256. This * number should be at least WEBDAV_MAX_FILES + the number of sockets * we may be using at any given time (probably 10). A buffer is a * good idea here. */ #define FD_SETSIZE 2500 #include #include #include #include #include #include #include #include #include #include /* for time() */ #include /* for MAXNAMLEN */ #include #include /* for struct timeval */ #include #include "webdavd.h" #include "pathnames.h" #include "fetch.h" /*****************************************************************************/ /* String-handling and -parsing functions */ /*****************************************************************************/ /* * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This * must be done after the URI is parsed, since the principal purpose of * the encoding is to hide characters which would otherwise be significant * to the parser (like `/'). */ char *percent_decode(const char *uri) { char *rv, *s; s = malloc(strlen(uri) + 1); if (!s) { return (s); } rv = s; while (*uri) { if (*uri == '%' && uri[1] && isxdigit(uri[1]) && isxdigit(uri[2])) { int c; char buf[] = "xx"; buf[0] = uri[1]; buf[1] = uri[2]; sscanf(buf, "%x", &c); uri += 3; *s++ = c; } else { *s++ = *uri++; } } *s = '\0'; return rv; } /*****************************************************************************/ /* * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This * differs from the above routine in that it does the conversion within * the input string */ void percent_decode_in_place(char *uri) { char *s; s = uri; while (*uri) { if (*uri == '%' && uri[1] && isxdigit(uri[1]) && isxdigit(uri[2])) { int c; char buf[] = "xx"; buf[0] = uri[1]; buf[1] = uri[2]; sscanf(buf, "%x", &c); uri += 3; *s++ = c; } else { *s++ = *uri++; } } *s = '\0'; } /*****************************************************************************/ /* * Translate utf-8 url to http us-ascii via % encoding * Caller must free the allocated return string */ char *utf8_encode(const unsigned char *orig) { int index = 0, orig_index = 0; int charval; char *new_string = NULL, *slash = NULL; new_string = malloc((size_t)(strlen(orig) * UTF8_TO_ASCII_MAX_SCALE)); if (!new_string) { syslog(LOG_ERR, "utf8_encode: new_string could not be allocated"); return (NULL); } /* we don't want to escape characters in the host name part */ slash = strchr(orig, '/'); /* the end of the host name */ while (orig[orig_index] != '\0') { charval = (int)orig[orig_index]; if (((const char *) & orig[orig_index] > slash) && (charval <= 32 || charval == 34 || charval == 35 || charval == 37 || charval == 38 || (charval >= 58 && charval <= 64) || (charval >= 91 && charval <= 94) || charval == 96 || charval >= 123)) { /* * In other words if c is not one of the legitimate http accepted subset * of US-ASCII characters, we will escape it. That long if statement is * designed to screen out all control charaters, the space character, all * hi bit ascii characters the delete character and the "#%<>[]^{|} * characters. That quote by the way is an excluded character not the * beginning of a string as you may have guessed */ new_string[index] = '%'; ++index; sprintf(&(new_string[index]), "%02x", charval); index += 2; } else { new_string[index] = orig[orig_index]; ++index; } ++orig_index; } /* end while */ new_string[index] = '\0'; return (new_string); } /*****************************************************************************/ /* * Reconstruct a URL given a standard host:port string and a remote * request which contains all of the url except the host name. */ int reconstruct_url(const char *hostheader, const char *remotefile, char **url) { const char *colon; unsigned long length; unsigned long webdavprefixlength; webdavprefixlength = strlen(_WEBDAVPREFIX); if (strncmp(remotefile, _WEBDAVPREFIX, webdavprefixlength) == 0) { /* remotefile is full url */ (void *) * url = malloc(strlen(remotefile) + 1); if (!*url) { syslog(LOG_ERR, "reconstruct_url: *url could not be allocated"); return (ENOMEM); } (void)strcpy(*url, remotefile); } else { length = strlen(hostheader) + strlen(remotefile) + webdavprefixlength + 1; (void *) * url = malloc(length); if (!*url) { syslog(LOG_ERR, "reconstruct_url: *url could not be allocated"); return (ENOMEM); } (void)strcpy(*url, _WEBDAVPREFIX); colon = strchr(hostheader, ':'); if (colon != NULL) { errno = 0; (void)strncat(*url, hostheader, (size_t)(colon - hostheader)); } else { (void)strcat(*url, hostheader); } (void)strcat(*url, remotefile); } return 0; } /*****************************************************************************/ /* * Implement the `base64' encoding as described in RFC 1521. */ static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /*****************************************************************************/ char *to_base64(const unsigned char *buf, size_t len) { char *s, *rv; unsigned tmp; s = malloc((((len + 2) / 3) * 4) + 1); if (!s) { return (0); } rv = s; while (len >= 3) { tmp = buf[0] << 16 | buf[1] << 8 | buf[2]; s[0] = base64[tmp >> 18]; s[1] = base64[(tmp >> 12) & 077]; s[2] = base64[(tmp >> 6) & 077]; s[3] = base64[tmp & 077]; len -= 3; buf += 3; s += 4; } /* RFC 1521 enumerates these three possibilities... */ switch (len) { case 2: tmp = buf[0] << 16 | buf[1] << 8; s[0] = base64[(tmp >> 18) & 077]; s[1] = base64[(tmp >> 12) & 077]; s[2] = base64[(tmp >> 6) & 077]; s[3] = '='; s[4] = '\0'; break; case 1: tmp = buf[0] << 16; s[0] = base64[(tmp >> 18) & 077]; s[1] = base64[(tmp >> 12) & 077]; s[2] = s[3] = '='; s[4] = '\0'; break; case 0: s[0] = '\0'; break; } return rv; } /*****************************************************************************/ /* The from_base64 function decodes a base64 encoded c-string into outBuffer. * The outBuffer's size is *lengthptr. The actual number of bytes decoded into * outBuffer is also returned in *lengthptr. If outBuffer is large enough to * decode the base64 string and if the base64 encoding is valid, from_base64() * returns 0; otherwise -1 is returned. Note that outBuffer is just an array of * bytes... it is not a c-string. */ int from_base64(const char *base64str, unsigned char *outBuffer, size_t *lengthptr) { char decodedChar; unsigned long base64Length; unsigned char *eightBitByte; unsigned char sixBitEncoding[4]; unsigned short encodingIndex; int endOfData; const char *equalPtr; const char *base64CharPtr; const char *base64EndPtr; /* Determine the length of the base64 input string. * This also catches illegal '=' characters within a base64 string. */ base64Length = 0; /* is there an '=' character? */ equalPtr = strchr(base64str, '='); if ( equalPtr != NULL ) { /* yes -- then it must be the last character of an octet, or * it must be the next to last character of an octet followed * by another '=' character */ switch ( (equalPtr - base64str) % 4 ) { case 0: case 1: /* invalid encoding */ goto error_exit; break; case 2: if ( equalPtr[1] != '=' ) { /* invalid encoding */ goto error_exit; } base64Length = (equalPtr - base64str) + 2; *lengthptr += 2; /* adjust for padding */ break; case 3: base64Length = (equalPtr - base64str) + 1; *lengthptr += 1; /* adjust for padding */ break; } } else { base64Length = strlen(base64str); } /* Make sure outBuffer is big enough */ if ( *lengthptr < ((base64Length / 4) * 3) ) { /* outBuffer is too small */ goto error_exit; } /* Make sure length is a multiple of 4 */ if ( (base64Length % 4) != 0 ) { /* invalid encoding */ goto error_exit; } /* OK -- */ eightBitByte = outBuffer; encodingIndex = 0; endOfData = FALSE; base64EndPtr = (char *)((unsigned long)base64str + base64Length); base64CharPtr = base64str; while ( base64CharPtr < base64EndPtr ) { decodedChar = *base64CharPtr++; if ( (decodedChar >= 'A') && (decodedChar <= 'Z') ) { decodedChar = decodedChar - 'A'; } else if ( (decodedChar >= 'a') && (decodedChar <= 'z') ) { decodedChar = decodedChar - 'a' + 26; } else if ( (decodedChar >= '0') && (decodedChar <= '9') ) { decodedChar = decodedChar - '0' + 52; } else if ( decodedChar == '+' ) { decodedChar = 62; } else if ( decodedChar == '/' ) { decodedChar = 63; } else if ( decodedChar == '=' ) /* end of base64 encoding */ { endOfData = TRUE; } else { /* invalid character */ goto error_exit; } if ( endOfData ) { /* make sure there's no more looping */ base64CharPtr = base64EndPtr; } else { sixBitEncoding[encodingIndex] = (unsigned char)decodedChar; ++encodingIndex; } if ( (encodingIndex == 4) || endOfData) { /* convert four 6-bit characters into three 8-bit bytes */ /* always get first byte */ *eightBitByte++ = (sixBitEncoding[0] << 2) | ((sixBitEncoding[1] & 0x30) >> 4); if ( encodingIndex >= 3 ) { /* get second byte only if encodingIndex is 3 or 4 */ *eightBitByte++ = ((sixBitEncoding[1] & 0x0F) << 4) | ((sixBitEncoding[2] & 0x3C) >> 2); if ( encodingIndex == 4 ) { /* get third byte only if encodingIndex is 4 */ *eightBitByte++ = ((sixBitEncoding[2] & 0x03) << 6) | (sixBitEncoding[3] & 0x3F); } } /* reset encodingIndex */ encodingIndex = 0; } } /* return the number of bytes in outBuffer and no error */ *lengthptr = eightBitByte - outBuffer; return ( 0 ); error_exit: /* return 0 bytes in outBuffer and an error */ *lengthptr = 0; return ( -1 ); } /*****************************************************************************/ ssize_t socket_read_bytes(int fd, char *buf, size_t n) { fd_set fdset; int ret = 0; struct timeval timeout; if (n == 0) { return (0); } bzero(&timeout, sizeof(timeout)); timeout.tv_sec = WEBDAV_IO_TIMEOUT; FD_ZERO(&fdset); FD_SET(fd, &fdset); ret = select(fd + 1, &fdset, (fd_set *)0, (fd_set *)0, &timeout); /* If select returns a postive number, our socket is ready for reading */ /* Otherwise it timed out or generated an errror, so return EIO */ if (ret > 0) { ssize_t len = recv(fd, buf, n, 0); if (len != -1) { return len; } else { syslog(LOG_ERR, "socket_read_bytes: recv(): %s", strerror(errno)); } } else if (ret < 0) { /* there was an error */ syslog(LOG_ERR, "socket_read_bytes: select(): %s", strerror(errno)); } else { /* select timed out */ syslog(LOG_ERR, "socket_read_bytes: select(): timed out"); } errno = EIO; return 0; } /*****************************************************************************/ #define USUAL_LINE_LEN 48 /* XXX what is this? */ ssize_t socket_read_line(int fd, char *buf, size_t n) { fd_set fdset; int ret = 0, found_lf = 0; struct timeval timeout; size_t start = 0, /* index of start of current read */ end = 0, /* one past the last byte read in the peek */ index = 0, /* one past the last byte read for real */ max = n - 1; /* maximum length to be read, leaving room for the NULL string terminator */ ssize_t len; if (n == 0) { return (0); } bzero(&timeout, sizeof(timeout)); timeout.tv_sec = WEBDAV_IO_TIMEOUT; wait_for_data: FD_ZERO(&fdset); FD_SET(fd, &fdset); ret = select(fd + 1, &fdset, (fd_set *)0, (fd_set *)0, &timeout); /* If select returns a postive number, our socket is ready for reading */ /* Otherwise it timed out or generated an errror, so return EIO */ if (ret > 0) { /* peek at the data queued on the socket */ start = index; len = recv(fd, &buf[start], MIN(USUAL_LINE_LEN, (max - start)), MSG_PEEK); if (len > 0) { for (end = (start + len); index < end; index++) { if (buf[index] == '\n') { found_lf = 1; index++; break; } } /* now do a read that actually consumes data queued on the socket */ len = recv(fd, &buf[start], (index - start), 0); if (len > 0) { buf[index] = '\0'; if (found_lf) { return (index); } else { if (index < max) { /* So long as we read something and there's still room in the buffer, since it didn't time out, we should try again. */ goto wait_for_data; } else { /* buffer is full and no LF was found; fall through */ syslog(LOG_ERR, "socket_read_line: index >= max"); } } } else { /* recv (peek) failed unexpectedly; fall through */ if ( len < 0 ) { /* log error message if this wasn't just a close from the server */ syslog(LOG_ERR, "socket_read_line: recv(): %s", strerror(errno)); } } } else { /* recv (peek) failed unexpectedly; fall through */ if ( len < 0 ) { /* log error message if this wasn't just a close from the server */ syslog(LOG_ERR, "socket_read_line: recv() MSG_PEEK: %s", strerror(errno)); } } } else if (ret < 0) { /* there was an error */ syslog(LOG_ERR, "socket_read_line: select(): %s", strerror(errno)); } else { /* select timed out */ syslog(LOG_ERR, "socket_read_line: select(): timed out"); } errno = EIO; return 0; } /*****************************************************************************/ void zero_trailing_spaces(char *line) { char *cp; if (line) { for (cp = (line + strlen(line) - 1); (cp >= line && isspace(*cp)); cp--) { *cp = '\0'; } } } /*****************************************************************************/ /* * SkipQuotedString finds the end of a quoted-string using the rules * (rfc 2616, section 2.2): * * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) * qdtext = > * quoted-pair = "\" CHAR * * On input, the bytes parameter points to the character AFTER the initial * '"' character. The function result is a pointer to the '"' character that * terminates the quoted-string or the end of the C string. */ char * SkipQuotedString(char *bytes) { while ( *bytes != '\0' ) { /* the end of the quoted-string? */ if ( *bytes == '\"' ) { break; } /* quoted-pair within the quoted-string? */ else if ( *bytes == '\\' && bytes[1] ) { /* skip quoted-pair */ bytes += 2; } else { /* skip character */ ++bytes; } } return ( bytes ); } /*****************************************************************************/ /* * SkipCodedURL finds the end of a Coded-URL using the rules * (rfc 2518, section 9.4 and rfc 2396): * * Coded-URL = "<" absoluteURI ">" * * On input, the bytes parameter points to the character AFTER the initial * '<' character. The function result is a pointer to the '>' character that * terminates the Coded-URL or the end of the C string. */ char * SkipCodedURL(char *bytes) { /* the end of the string or Coded-URL? */ while ( (*bytes != '\0') && (*bytes != '>') ) { /* skip character */ ++bytes; } return ( bytes ); } /*****************************************************************************/ /* * SkipToken finds the end of a token using the rules (rfc 2616, section 2.2): * * token = 1* * CTL = * separators = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * * The function result is a pointer to the first non token character or the * end of the C string. */ char * SkipToken(char *bytes) { while ( *bytes != '\0' ) { /* CTL - US-ASCII control character (octets 0 - 31) */ if ( (unsigned char)*bytes <= 31 ) { /* not a token character - done */ goto Done; } else { switch ( *bytes ) { /* CTL - DEL (127) */ case '\x7f': /* separators */ case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '\"': case '/': case '[': case ']': case '\?': case '=': case '{': case '}': case ' ': case '\t': /* not a token character - done */ goto Done; break; default: /* skip token characters */ ++bytes; break; } } } Done: return (bytes); } /*****************************************************************************/ /* * SkipLWS finds the end of a run of LWS using the rule * (rfc 2616, section 2.2): * * LWS = [CRLF] 1*( SP | HT ) * * The function result is a pointer to the first non LWS character or the end * of the C string. */ char * SkipLWS(char *bytes) { while ( *bytes != '\0' ) { if ( (*bytes == ' ') || (*bytes == '\t') ) { /* skip SP and HT characters */ ++bytes; continue; } /* * skip CRLF only if followed by SP or HT (in which case the SP * or HT can be skipped, too) */ else if ( *bytes == '\x0d' ) /* CR? */ { /* LF? */ if ( bytes[1] == '\x0a' ) { /* SP or HT? */ if ( (bytes[2] == ' ') || (bytes[2] == '\t') ) { /* skip CRLF followed by SP or HT */ bytes += 3; continue; } } } /* found the end of the LWS run */ break; } return ( bytes ); } /*****************************************************************************/