/* SRG - Squid Report Generator Copyright 2003 University of Waikato This file is part of SRG. SRG is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. SRG is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with SRG; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "srg.h" #define STATE_INIT 0 #define STATE_COLON 1 #define STATE_PROTOCOL 2 #define STATE_PASSPORT 3 #define STATE_USERSITE 4 #define STATE_SITE 5 #define STATE_PORT 6 #define STATE_END 7 /* Parses the specified URL and fills the structure with the different parts * return 0 on success or non-zero otherwise */ int parseURL(const char*URL, url_request *req) { char *start = strdup(URL); char *tmp = start; char *t1 = NULL; char *t2 = NULL; unsigned int state = STATE_INIT; /* Initialise the return structure */ req->protocol = NULL; req->user = NULL; req->password = NULL; req->site = NULL; req->port = NULL; req->location = NULL; while (*tmp != '\0') { switch (state) { case STATE_INIT: if (*tmp == ':') { /* Cannot Start with : */ if (!t1) { free(start); return -1; } *tmp = '\0'; t1 = start; state = STATE_COLON; } else if (*tmp == '/') { /* Cannot Start with / */ if (!t1) { free(start); return -1; } *tmp = '\0'; req->site = strdup(start); t1 = NULL; /* Get the location also */ *tmp = '/'; req->location = strdup(tmp); state = STATE_END; } else { /* Temporary pointer */ t1 = tmp; } break; case STATE_COLON: if (*tmp == '/') { state = STATE_PROTOCOL; } else if(*tmp == '@') { /* user:@ (empty password!) */ *tmp = '\0'; req->user = strdup(start); req->password = strdup("\0"); t1 = NULL; state = STATE_SITE; } else { /* No Protocol, could be password or port */ t2 = tmp; state = STATE_PASSPORT; } break; case STATE_PROTOCOL: if (*tmp == '/') { /* Valid protocol found */ req->protocol = strdup(start); t1 = NULL; /* Could not have a user or a site */ state = STATE_USERSITE; } else { /* :/ is not a valid string in a URL */ free(start); return -1; } break; case STATE_USERSITE: if (*tmp == ':') { /* Cannot Start with : */ if (!t1) { free(start); return -1; } *tmp = '\0'; state = STATE_PASSPORT; } else if (*tmp == '/') { if (!t1) { /* /// is not valid in a URL */ free(start); return -1; } /* No User / Pass, No Port */ *tmp = '\0'; req->site = strdup(t1); /* Get the location also */ *tmp = '/'; req->location = strdup(tmp); state = STATE_END; } /* Set starting pointer */ if (!t1) t1 = tmp; break; case STATE_PASSPORT: if (*tmp == '@') { /* We have user/pass */ *tmp ='\0'; req->user = strdup(t1); if (t2) req->password = strdup(t2); else req->password = strdup("\0"); t1 = NULL; t2 = NULL; state = STATE_SITE; } else if (*tmp == '/') { /* We have site / port */ if (!t2) { /* :/ is not valid in a URL */ free(start); return -1; } *tmp = '\0'; req->site = strdup(t1); req->port = strdup(t2); t1 = NULL; t2 = NULL; /* Get the location also */ *tmp = '/'; req->location = strdup(tmp); state = STATE_END; } /* Set starting pointer */ if (!t2) t2 = tmp; break; case STATE_SITE: if (*tmp == ':') { /* Site / Port found */ if (!t1) { /* Cannot have 0 length site */ free(start); return -1; } *tmp = '\0'; req->site = strdup(t1); t1 = NULL; state = STATE_PORT; } else if (*tmp == '/') { /* Site / Location found */ if (!t1) { /* Cannot have 0 length site */ free(start); return -1; } *tmp = '\0'; req->site = strdup(t1); t1 = NULL; /* Get the location also */ *tmp = '/'; req->location = strdup(tmp); state = STATE_END; } else { /* Set starting pointer */ if (!t1) t1 = tmp; } break; case STATE_PORT: if (*tmp == '/') { /* Port / Location found */ if (!t1) { /* Cannot have 0 length port */ free(start); return -1; } *tmp = '\0'; req->port = strdup(t1); t1 = NULL; /* And get the location also */ *tmp = '/'; req->location = strdup(tmp); state = STATE_END; } /* Set starting pointer */ if (!t1) t1 = tmp; break; case STATE_END: break; break; } /* Move to next character */ tmp++; } /* Ending cases */ switch (state) { case STATE_INIT: /* No : found!, assume form of 'www.google.com', * assign site only */ req->site = strdup(start); break; case STATE_COLON: /* INVALID: Cannot end URL with a : */ free(start); return -1; break; case STATE_PROTOCOL: /* INVALID: Cannot end URL with :/ */ free(start); return -1; break; case STATE_PASSPORT: /* We have a URL of the form 'www.google.com:80' */ req->site = strdup(t1); req->port = strdup(t2); break; case STATE_USERSITE: case STATE_SITE: /* Assume that we have found a site */ if (!t1) { /* Must be > 0 length */ free(start); return -1; } req->site = strdup(t1); break; case STATE_PORT: /* Got port but no location, site is already set */ if (!t1) { /* Cannot have 0 length port */ free(start); return -1; } req->port = strdup(t1); break; case STATE_END: /* All is Well :) */ break; } /* Always return a valid string for the location */ if (!req->location) req->location = strdup("\0"); free(start); return 0; } char *asprintURL(url_request *req) { char *buffer=NULL; /* Check that the parsed string matches the original */ if (req->protocol != NULL) { if (req->user != NULL && req->password != NULL) { if (req->port != NULL) { if (req->location != NULL) { asprintf(&buffer, "%s://%s:%s@%s:%s%s", req->protocol, req->user, req->password, req->site, req->port, req->location); } else { asprintf(&buffer, "%s://%s:%s@%s:%s", req->protocol, req->user, req->password, req->site, req->port); } } else { if (req->location != NULL) { asprintf(&buffer, "%s://%s:%s@%s%s", req->protocol, req->user, req->password, req->site, req->location); } else { asprintf(&buffer, "%s://%s:%s@%s", req->protocol, req->user, req->password, req->site); } } } else { if (req->port != NULL) { if (req->location != NULL) { asprintf(&buffer, "%s://%s:%s%s", req->protocol, req->site, req->port, req->location); } else { asprintf(&buffer, "%s://%s:%s", req->protocol, req->site, req->port); } } else { if (req->location != NULL) { asprintf(&buffer, "%s://%s%s", req->protocol, req->site, req->location); } else { asprintf(&buffer, "%s://%s", req->protocol, req->site); } } } } else { if (req->user != NULL && req->password != NULL) { if (req->port != NULL) { if (req->location != NULL) { asprintf(&buffer, "%s:%s@%s:%s%s", req->user, req->password, req->site, req->port, req->location); } else { asprintf(&buffer, "%s:%s@%s:%s", req->user, req->password, req->site, req->port); } } else { if (req->location != NULL) { asprintf(&buffer, "%s:%s@%s%s", req->user, req->password, req->site, req->location); } else { asprintf(&buffer, "%s:%s@%s", req->user, req->password, req->site); } } } else { if (req->port != NULL) { if (req->location != NULL) { asprintf(&buffer, "%s:%s%s", req->site, req->port, req->location); } else { asprintf(&buffer, "%s:%s", req->site, req->port); } } else { if (req->location != NULL) { asprintf(&buffer, "%s%s", req->site, req->location); } else { asprintf(&buffer, "%s", req->site); } } } } return buffer; } void freeURL(url_request *req) { /* Free any allocated strings */ if (req->protocol) free(req->protocol); if (req->user) free(req->user); if (req->password) free(req->password); if (req->site) free(req->site); if (req->port) free(req->port); if (req->location) free(req->location); } #ifdef TEST #include void testURL(const char *URL, bool is_invalid); int main(int argc, char **argv) { testURL("www.google.com", false); testURL("www.google.com:80", false); testURL("http://www.google.com/", false); testURL("http://www.google.com/index.html", false); testURL("http://www.google.com:80/", false); testURL("http://www.google.com:80/index.html", false); testURL("www.google.com:80/index.html", false); testURL("www.google.com/index.html", false); testURL("www.google.com/", false); testURL("www.google.com:80/", false); testURL("matt:@www.google.com/index.html", false); testURL("matt:matt@www.google.com:80/", false); testURL(":matt@www.google.com:80/", true); testURL("ftp://www.google.com/", false); exit(0); } /* Tests that the specified URL is correctly parsed */ void testURL(const char *URL, bool is_invalid) { url_request result; char *buffer=NULL; if (parseURL(URL, &result)!=0) { /* Invalid URL, cannot parse */ assert(is_invalid && true); return; } /* Check that the parsed string matches the original */ buffer = asprintURL(&result); int rv = strcasecmp(buffer, URL); assert(rv==0); if (buffer) free(buffer); freeURL(&result); return; } #endif