/*- * Copyright 1997 Massachusetts Institute of Technology * * Permission to use, copy, modify, and distribute this software and * its documentation for any purpose and without fee is hereby * granted, provided that both the above copyright notice and this * permission notice appear in all copies, that both the above * copyright notice and this permission notice appear in all * supporting documentation, and that the name of M.I.T. not be used * in advertising or publicity pertaining to distribution of the * software without specific, written prior permission. M.I.T. makes * no representations about the suitability of this software for any * purpose. It is provided "as is" without express or implied * warranty. * * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: util.c,v 1.7 2002/06/16 23:47:19 lutherj Exp $ */ /* FD_SETSIZE has to be defined here before sys/types.h is brought in * to allow select to work on file descriptors higher than 256. This * number should be at least WEBDAV_MAX_FILES + the number of sockets * we may be using at any given time (probably 10). A buffer is a * good idea here. */ #define FD_SETSIZE 2500 #include #include #include #include #include #include #include #include #include /* for time() */ #include /* for MAXNAMLEN */ #include #include /* for struct timeval */ #include #include "webdavd.h" #include "pathnames.h" #include "fetch.h" /*****************************************************************************/ /* String-handling and -parsing functions */ /*****************************************************************************/ /* * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This * must be done after the URI is parsed, since the principal purpose of * the encoding is to hide characters which would otherwise be significant * to the parser (like `/'). */ char *percent_decode(const char *uri) { char *rv, *s; s = malloc(strlen(uri) + 1); if (!s) { return (s); } rv = s; while (*uri) { if (*uri == '%' && uri[1] && isxdigit(uri[1]) && isxdigit(uri[2])) { int c; char buf[] = "xx"; buf[0] = uri[1]; buf[1] = uri[2]; sscanf(buf, "%x", &c); uri += 3; *s++ = c; } else { *s++ = *uri++; } } *s = '\0'; return rv; } /*****************************************************************************/ /* * Undo the standard %-sign encoding in URIs (e.g., `%2f' -> `/'). This * differs from the above routine in that it does the conversion within * the input string */ void percent_decode_in_place(char *uri) { char *s; s = uri; while (*uri) { if (*uri == '%' && uri[1] && isxdigit(uri[1]) && isxdigit(uri[2])) { int c; char buf[] = "xx"; buf[0] = uri[1]; buf[1] = uri[2]; sscanf(buf, "%x", &c); uri += 3; *s++ = c; } else { *s++ = *uri++; } } *s = '\0'; } /*****************************************************************************/ /* * Translate utf-8 url to http us-ascii via % encoding * Caller must free the allocated return string */ char *utf8_encode(const unsigned char *orig) { int index = 0, orig_index = 0; int charval; char *new_string = NULL, *slash = NULL; new_string = malloc((size_t)(strlen(orig) * UTF8_TO_ASCII_MAX_SCALE)); if (!new_string) { return (NULL); } /* we don't want to escape characters in the host name part */ slash = strchr(orig, '/'); /* the end of the host name */ while (orig[orig_index] != '\0') { charval = (int)orig[orig_index]; if (((char *) & orig[orig_index] > slash) && (charval <= 32 || charval == 34 || charval == 35 || charval == 37 || charval == 38 || (charval >= 58 && charval <= 64) || (charval >= 91 && charval <= 94) || charval == 96 || charval >= 123)) { /* * In other words if c is not one of the legitimate http accepted subset * of US-ASCII characters, we will escape it. That long if statement is * designed to screen out all control charaters, the space character, all * hi bit ascii characters the delete character and the "#%<>[]^{|} * characters. That quote by the way is an excluded character not the * beginning of a string as you may have guessed */ new_string[index] = '%'; ++index; sprintf(&(new_string[index]), "%02x", charval); index += 2; } else { new_string[index] = orig[orig_index]; ++index; } ++orig_index; } /* end while */ new_string[index] = '\0'; return (new_string); } /*****************************************************************************/ /* * Reconstruct a URL given a standard host:port string and a remote * request which contains all of the url except the host name. */ int reconstruct_url(const char *hostheader, const char *remotefile, char **url) { const char *colon; unsigned long length; length = strlen(hostheader) + strlen(remotefile) + strlen(_WEBDAVPREFIX) + 1; (void *) * url = malloc(length); if (!*url) { return (ENOMEM); } (void)strcpy(*url, _WEBDAVPREFIX); colon = strchr(hostheader, ':'); if (colon != 0) { errno = 0; (void)strncat(*url, hostheader, colon - hostheader); } else { (void)strcat(*url, hostheader); } (void)strcat(*url, remotefile); return 0; } /*****************************************************************************/ /* * Implement the `base64' encoding as described in RFC 1521. */ static const char base64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /*****************************************************************************/ char *to_base64(const unsigned char *buf, size_t len) { char *s, *rv; unsigned tmp; s = malloc((4 * (len + 1)) / 3 + 1); if (!s) { return (0); } rv = s; while (len >= 3) { tmp = buf[0] << 16 | buf[1] << 8 | buf[2]; s[0] = base64[tmp >> 18]; s[1] = base64[(tmp >> 12) & 077]; s[2] = base64[(tmp >> 6) & 077]; s[3] = base64[tmp & 077]; len -= 3; buf += 3; s += 4; } /* RFC 1521 enumerates these three possibilities... */ switch (len) { case 2: tmp = buf[0] << 16 | buf[1] << 8; s[0] = base64[(tmp >> 18) & 077]; s[1] = base64[(tmp >> 12) & 077]; s[2] = base64[(tmp >> 6) & 077]; s[3] = '='; s[4] = '\0'; break; case 1: tmp = buf[0] << 16; s[0] = base64[(tmp >> 18) & 077]; s[1] = base64[(tmp >> 12) & 077]; s[2] = s[3] = '='; s[4] = '\0'; break; case 0: s[0] = '\0'; break; } return rv; } /*****************************************************************************/ int from_base64(const char *orig, unsigned char *buf, size_t *lenp)/* *** not used? *** */ { int len, len2; const char *equals; unsigned tmp; len = strlen(orig); while (isspace(orig[len - 1])) { len--; } if (len % 4) { return -1; } len2 = 3 * (len / 4); equals = strchr(orig, '='); if (equals != 0) { if (equals[1] == '=') { len2 -= 2; } else { len2 -= 1; } } /* Now the length is len2 is the actual length of the original. */ if (len2 > *lenp) { return -1; } *lenp = len2; while (len > 0) { int i; const char *off; int forget; tmp = 0; forget = 0; for (i = 0; i < 4; i++) { if (orig[i] == '=') { off = base64; forget++; } else { off = strchr(base64, orig[i]); } if (off == 0) { return -1; } tmp = (tmp << 6) | (off - base64); } buf[0] = (tmp >> 16) & 0xff; if (forget < 2) { buf[1] = (tmp >> 8) & 0xff; } if (forget < 1) { buf[2] = (tmp >> 8) & 0xff; } len -= 4; orig += 4; buf += 3 - forget; } return 0; } /*****************************************************************************/ ssize_t socket_read_bytes(int fd, char *buf, size_t n) { fd_set fdset; int ret = 0; struct timeval timeout; if (n == 0) { return (0); } bzero(&timeout, sizeof(timeout)); timeout.tv_sec = WEBDAV_IO_TIMEOUT; FD_ZERO(&fdset); FD_SET(fd, &fdset); ret = select(fd + 1, &fdset, (fd_set *)0, (fd_set *)0, &timeout); /* If select returns a postive number, our socket is ready for reading */ /* Otherwise it timed out or generated an errror, so return EIO */ if (ret > 0) { ssize_t len = recv(fd, buf, n, 0); if (len != -1) { return len; } /* fall through, for now */ } /* there was an error */ #ifdef DEBUG if (ret == 0) { fprintf(stderr, "read timed out\n"); } #endif errno = EIO; return 0; } /*****************************************************************************/ #define USUAL_LINE_LEN 48 ssize_t socket_read_line(int fd, char *buf, size_t n) { fd_set fdset; int ret = 0, found_lf = 0; struct timeval timeout; size_t start = 0, /* index of start of current read */ end = 0, /* one past the last byte read in the peek */ index = 0, /* one past the last byte read for real */ max = n - 1; /* maximum length to be read, leaving room for the NULL string terminator */ ssize_t len; if (n == 0) { return (0); } bzero(&timeout, sizeof(timeout)); timeout.tv_sec = WEBDAV_IO_TIMEOUT; wait_for_data: FD_ZERO(&fdset); FD_SET(fd, &fdset); ret = select(fd + 1, &fdset, (fd_set *)0, (fd_set *)0, &timeout); /* If select returns a postive number, our socket is ready for reading */ /* Otherwise it timed out or generated an errror, so return EIO */ if (ret > 0) { /* peek at the data queued on the socket */ start = index; len = recv(fd, &buf[start], MIN(USUAL_LINE_LEN, (max - start)), MSG_PEEK); if (len > 0) { for (end = (start + len); index < end; index++) { if (buf[index] == '\n') { found_lf = 1; index++; break; } } /* now do a read that actually consumes data queued on the socket */ len = recv(fd, &buf[start], (index - start), 0); if (len > 0) { buf[index] = '\0'; if (found_lf) { return (index); } else { if (index < max) { /* So long as we read something and there's still room in the buffer, since it didn't time out, we should try again. */ goto wait_for_data; } /* buffer is full and no LF was found; fall through */ } } /* read failed unexpectedly; fall through */ } /* read (peek) failed unexpectedly; fall through */ } #ifdef DEBUG if (ret == 0) { fprintf(stderr, "read timed out\n"); } #endif errno = EIO; return 0; } /*****************************************************************************/ void zero_trailing_spaces(char *line) { char *cp; if (line) { for (cp = (line + strlen(line) - 1); (cp >= line && isspace(*cp)); cp--) { *cp = '\0'; } } } /*****************************************************************************/ /* * SkipQuotedString finds the end of a quoted-string using the rules * (rfc 2616, section 2.2): * * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) * qdtext = > * quoted-pair = "\" CHAR * * On input, the bytes parameter points to the character AFTER the initial * '"' character. The function result is a pointer to the '"' character that * terminates the quoted-string or the end of the C string. */ char * SkipQuotedString(char *bytes) { while ( *bytes != '\0' ) { /* the end of the quoted-string? */ if ( *bytes == '\"' ) { break; } /* quoted-pair within the quoted-string? */ else if ( *bytes == '\\' && bytes[1] ) { /* skip quoted-pair */ bytes += 2; } else { /* skip character */ ++bytes; } } return ( bytes ); } /*****************************************************************************/ /* * SkipCodedURL finds the end of a Coded-URL using the rules * (rfc 2518, section 9.4 and rfc 2396): * * Coded-URL = "<" absoluteURI ">" * * On input, the bytes parameter points to the character AFTER the initial * '<' character. The function result is a pointer to the '>' character that * terminates the Coded-URL or the end of the C string. */ char * SkipCodedURL(char *bytes) { /* the end of the string or Coded-URL? */ while ( (*bytes != '\0') && (*bytes != '>') ) { /* skip character */ ++bytes; } return ( bytes ); } /*****************************************************************************/ /* * SkipToken finds the end of a token using the rules (rfc 2616, section 2.2): * * token = 1* * CTL = * separators = "(" | ")" | "<" | ">" | "@" * | "," | ";" | ":" | "\" | <"> * | "/" | "[" | "]" | "?" | "=" * | "{" | "}" | SP | HT * * The function result is a pointer to the first non token character or the * end of the C string. */ char * SkipToken(char *bytes) { while ( *bytes != '\0' ) { /* CTL - US-ASCII control character (octets 0 - 31) */ if ( (unsigned char)*bytes <= 31 ) { /* not a token character - done */ goto Done; } else { switch ( *bytes ) { /* CTL - DEL (127) */ case '\x7f': /* separators */ case '(': case ')': case '<': case '>': case '@': case ',': case ';': case ':': case '\\': case '\"': case '/': case '[': case ']': case '\?': case '=': case '{': case '}': case ' ': case '\t': /* not a token character - done */ goto Done; break; default: /* skip token characters */ ++bytes; break; } } } Done: return (bytes); } /*****************************************************************************/ /* * SkipLWS finds the end of a run of LWS using the rule * (rfc 2616, section 2.2): * * LWS = [CRLF] 1*( SP | HT ) * * The function result is a pointer to the first non LWS character or the end * of the C string. */ char * SkipLWS(char *bytes) { while ( *bytes != '\0' ) { if ( (*bytes == ' ') || (*bytes == '\t') ) { /* skip SP and HT characters */ ++bytes; continue; } /* * skip CRLF only if followed by SP or HT (in which case the SP * or HT can be skipped, too) */ else if ( *bytes == '\x0d' ) /* CR? */ { /* LF? */ if ( bytes[1] == '\x0a' ) { /* SP or HT? */ if ( (bytes[2] == ' ') || (bytes[2] == '\t') ) { /* skip CRLF followed by SP or HT */ bytes += 3; continue; } } } /* found the end of the LWS run */ break; } return ( bytes ); } /*****************************************************************************/