/* rename.c - renaming of files to end with appropriate extensions, and to prevent metacharacters in filenames */ #include #include #include #include #include "web.h" struct rename_contenttype { char * pszContentType; /* content type name */ char * * ppszExtensions; /* valid extension list - index 0 = preferred */ int nExtensions; /* no of extensions in list */ struct rename_contenttype * next; } * typelist; char * pszMetachars = "?&*%=#"; char cHexquote = '@'; static const char * hexchars = "0123456789ABCDEF"; void addcontenttype(char * typename, char * extlist) { struct rename_contenttype * p = typelist; int i; char * tmp; /* find the type name if it exists already, otherwise create a new one */ while (1) { if (!strcmp(typename, p->pszContentType)) /* override prev. def */ { /* free previously allocated memory */ for (i = 0; i < p->nExtensions; i++) free(p->ppszExtensions[i]); free(p->ppszExtensions); break; } if (!p->next) { p->next = malloc(sizeof(*p)); p = p->next; p->pszContentType = strdup(typename); p->next = NULL; break; } p = p->next; } i = 0; tmp = extlist - 1; while ((tmp = strchr(tmp+1, ' ')) != NULL) i++; /* count spaces */ p->nExtensions = i; p->ppszExtensions = malloc(i * sizeof(char *)); for (i = 0; i < p->nExtensions; i++) { tmp = strchr(extlist, ' '); *tmp = 0; p->ppszExtensions[i] = strdup(extlist); extlist = tmp + 1; } } /* Reads a configuration file section for renaming. Accepts the following * syntax: * * meta string - sets metacharacter list * type content/type preferred [extra extra ...] - sets extensions for type * quote char - sets quote character * * section ended by line beginning with [, which will be returned in buffer, * or by eof. */ void rename_readconfig(FILE * fp, const char * filename, int * lineno, char * buf, int buflen) { while (!feof(fp)) { fgets(buf,buflen,fp); if (buf[0] == '[' || feof(fp)) return; (*lineno)++; while (strlen(buf) && buf[strlen(buf) - 1] < 32) buf[strlen(buf) - 1] = 0; /* remove end crap */ if (!strlen(buf)) continue; if (strncmp(buf, "meta ", 5)) { pszMetachars = strdup(buf + 5); } else if (strcmp(buf, "meta")) { pszMetachars = ""; /* metacharacter processing disabled */ } else if (strncmp(buf, "type ", 5)) { char * typename = buf + 5; char * extlist = strchr(typename, ' '); if (! extlist) { fprintf (stderr, "%s, line %d: type must have extensions\n", filename, *lineno); exit(1); } *extlist = 0; extlist ++; addcontenttype(typename, extlist); } else if (strncmp(buf, "quote ", 6)) { if (buf[6] <= 32) { fprintf(stderr, "%s, line %d: quote character must be " "non-whitespace\n", filename, *lineno); exit(1); } cHexquote = buf[6]; } else { fprintf(stderr, "%s, line %d: I can't understand this\n", filename, *lineno); exit(1); } } } void rename_init() { typelist = malloc(sizeof(*typelist)); typelist->pszContentType = "text/html"; typelist->ppszExtensions = malloc(sizeof (char *) * 2); typelist->ppszExtensions[0] = strdup("html"); typelist->ppszExtensions[1] = strdup("htm"); typelist->nExtensions = 2; typelist->next = NULL; } /* return a saner name for an object. Given the same input name & directory * it must always return the same output for each run of the program. Due to * the fact that the content type may not be known at all points where the * file is referenced, this means we must build up a table for all files * whose extensions are modified, and reference this if contenttype is NULL. * * Filename * returned is in static storage; copy before using again if still required */ /* TODO: determine whether or not we need to discard pszObjectname, and fix memory leak if one exists. */ struct renamed_table_ent { char * pszHost; int port; char * pszObjectname; char * pszResult; struct renamed_table_ent * next; } * renamed_table; char * rename_object(const char * pszHost, int port, char * pszObjectname, const char * contenttype) { static char namebuf[256]; int i, j, p; const char * lastdot = NULL; struct rename_contenttype * t; /* returns a saner name if Options.bRename == 1 */ if (!options.bRename) return pszObjectname; if (strlen(pszObjectname) == 0) return pszObjectname; if (pszObjectname[strlen(pszObjectname) - 1] == '/') return pszObjectname; if (options.bVerbose >= 4) printf ("[renaming %s:%d/%s (%s)", pszHost, port, pszObjectname, contenttype ? contenttype : "NULL"); if (! contenttype) /* check in table for previous renaming */ { struct renamed_table_ent * r = renamed_table; while (r) { if (!strcmp(r->pszHost, pszHost) && r->port == port && !strcmp(r->pszObjectname, pszObjectname)) { if (options.bVerbose >= 4) printf (" - cached %s]\n", r->pszResult); return r->pszResult; } r = r->next; } if (options.bVerbose >= 4) { printf (" - not cached"); fflush(stdout); } } /* no cached version; calculate it ourselves! */ i = j = 0; while (pszObjectname[i] && j < 230) { if (strchr(pszMetachars, pszObjectname[i]) || pszObjectname[i] == cHexquote) { namebuf[j++] = cHexquote; namebuf[j++] = hexchars[(pszObjectname[i] >> 4) & 0x0F]; namebuf[j++] = hexchars[pszObjectname[i] & 0x0F]; if (pszObjectname[i] == '?') /* query - personalise if reqd */ { if (options.bQueryAddPid) { p = getpid(); /* assume pid = 16 bit integer */ namebuf[j++] = hexchars[(p >> 12) & 0x0F]; namebuf[j++] = hexchars[(p >> 8) & 0x0F]; namebuf[j++] = hexchars[(p >> 4) & 0x0F]; namebuf[j++] = hexchars[p & 0x0F]; } } } else { if (pszObjectname[i] == '.') lastdot = pszObjectname + i; namebuf[j++] = pszObjectname[i]; } i++; } namebuf[j] = 0; /* check extension - first find content type in list, if it exists */ /* if we have no content type, then we can't do this, so just return * here! */ if (!contenttype) { if (options.bVerbose >= 4) printf (" - result %s]\n", namebuf); return namebuf; } t = typelist; while (t) { if (!strcmp(t->pszContentType, contenttype)) break; t = t->next; } if (t) { /* okay, we have the type. now, see if our extension [if we have one] * is in the list of valid extensions. Set t to null to indicate OK. */ if (lastdot) { lastdot++; /* skip over the dot */ for (i = 0; i < t->nExtensions; i++) if (!strcmp(t->ppszExtensions[i], lastdot)) { t = NULL; break; } } /* add extension if t is not null */ if (t) { struct renamed_table_ent * r; strcat(namebuf, "."); strcat(namebuf, t->ppszExtensions[0]); /* as we've changed the extension, which requires content * type knowledge that isn't always available, we must now * store the result in the renamed table. Adding at the * head of the list is a) efficient to add, and b) likely * to give good search results due to reference locality. */ r = malloc (sizeof *r); r->next = renamed_table; r->pszHost = strdup(pszHost); r->port = port; r->pszObjectname = strdup(pszObjectname); r->pszResult = strdup(namebuf); renamed_table = r; if (options.bVerbose >= 4) printf (" - logged"); } } if (options.bVerbose >= 4) printf (" - result %s]\n", namebuf); return namebuf; }