/* * URL.cpp * * Copyright (C) 1999 Stephen F. White * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (see the file "COPYING" for details); if * not, write to the Free Software Foundation, Inc., 675 Mass Ave, * Cambridge, MA 02139, USA. */ #include #include #include #ifdef WIN32 # include #endif #include "stdafx.h" #ifdef AIX # include "strings.h" #endif #include "URL.h" URL::URL(const char *u) { Parse(u); } URL::URL(void) { _url = ""; _scheme = ""; _hostname = ""; _port = 80; _path = ""; } URL::URL(const char *base, const char *url) { if (strstr(url, "://")) { /* URL is already fully qualified */ Parse(url); } else if (url[0] == '/') { /* URL is absolute */ Parse(base); _path = url; SimplifyPath(); } else if (strstr(url, ":/")) { /* URL is MS-DOS style */ Parse(base); #ifdef WIN32 _path=""; #else _path="/"; #endif _path+=url; SimplifyPath(); } /* else if (strstr(url, "../")) { URL is relative Parse(base); _path=url; } */ else { int len = strlen(base); if(base[len-1] != '/') { char *newBase = new char[len+1]; strcpy(newBase, base); char *b = strrchr(newBase, '/'); if (b) *(b+1) = '\0'; Parse(newBase); delete [] newBase; } else { Parse(base); } _path += url; SimplifyPath(); } Rebuild(); } bool URL::Parse(const char *u) { enum { START, SCHEME, SLASH1, SLASH2, HOST, PORT1, PORT, PATH, TOPIC, ERR } state = START; if (u == NULL) { u = _url; } else { _url = u; } _scheme = ""; _hostname = ""; _port = 80; _path = ""; if (!u[0]) { return false; } bool dospath=false; char driveLetter=(char) 0; while(*u && state != ERR) { if ((*u == '|') && (u != _url)) { dospath=true; driveLetter=*(u-1); } switch(state) { case START: if (*u != ' ') { _scheme += *u; state = SCHEME; } break; case SCHEME: // getting scheme if (isalnum(*u) || *u == '+' || *u == '-' || *u == '.') { _scheme += *u; } else if (*u == ':') { state = SLASH1; } else if (*u == '/') { // hack: allow skipping of scheme if ((_url != NULL) && (_url[0] == '/')) { _hostname = ""; _path = _scheme; _path += "/"; } else { _hostname = _scheme; _path = "/"; } _scheme = ""; state = PATH; } else { state = ERR; } break; case SLASH1: // getting first slash if (*u == '/') { state = SLASH2; } else if (isdigit(*u)) { // another hack _hostname = _scheme; _scheme = ""; _port = (short) (*u - '0'); state = PORT; } else if (isalpha(*u)) { // yet another hack, for IExplorer _path = *u; state = PATH; } break; case SLASH2: // getting second slash if (*u == '/') state = HOST; else state = ERR; break; case HOST: // getting hostname if (isalnum(*u) || *u == '.' || *u == '-') { _hostname += *u; } else if (*u == ':') { state = PORT1; } else if (*u == '/') { if (dospath) { dospath=false; _path=driveLetter; _path+=':'; } _path += *u; state = PATH; } else { state = ERR; } break; case PORT1: if (isdigit(*u)) { _port = (short) (*u - '0'); state = PORT; } else { state = ERR; } break; case PORT: // waiting for port if (isdigit(*u)) { _port = _port * 10 + (*u - '0'); } else if (*u == '/') { _path += *u; state = PATH; } else { state = ERR; } break; case PATH: // getting path if (*u == '#') { state = TOPIC; } else { _path += *u; } break; case TOPIC: _topic += *u; break; default: break; } u++; } if (state == SCHEME) { _hostname = _scheme; _scheme = ""; } if (_scheme.length() == 0) { if (!stringncmp(_hostname, "ftp")) { _scheme = "ftp"; } else if (!stringncmp(_hostname, "http")) { _scheme = "http"; } else { _scheme = "file"; } } if (_path.length() == 0) { _path = "/"; } Rebuild(); return (state != ERR); } void URL::Rebuild(bool withTopic /* = true*/) { if (!strcasecmp(_scheme, "news") || !strcasecmp(_scheme, "mailto") || !strcasecmp(_scheme, "nntp")) { _url = _scheme; _url += ":"; } else { #ifndef WIN32 if (strcasecmp(_scheme, "file") == 0) { _url = ""; _url +=_path; return; } #endif _url = (const char *) _scheme; if (_scheme[0]) _url += "://"; _url += _hostname; if (!strcasecmp(_scheme, "http") && _port != 80) { char buf[10]; sprintf(buf, ":%d", _port); _url += buf; } } _url += _path; if (withTopic && _topic.length() > 0) { _url += '#'; _url += _topic; } } #ifdef WIN32 void URL::TruncateToFit(HDC hDC, int pixels, MyString *out) { MyString prefix; const char *newpath = _path; MyString filename; SIZE size; *out = _scheme; *out += "://"; *out += _hostname; if (_port != 80 || _scheme != "http") { char buf[10]; sprintf(buf, ":%d", _port); *out += buf; } *out += newpath; GetTextExtentPoint(hDC, *out, out->length(), &size); if (size.cx > pixels) { *out = _scheme; *out += "://..."; *out += newpath; } while (GetTextExtentPoint(hDC, *out, out->length(), &size) && size.cx > pixels) { const char *slash = strchr(newpath + 1, '/'); if (slash) { newpath = slash; *out = _scheme; *out += "://..."; *out += newpath; } else { break; } } if (GetTextExtentPoint(hDC, *out, out->length(), &size) && size.cx > pixels) { // last ditch attempt to squeeze it smaller *out = ".../"; *out += GetFileName(); } } #endif MyString URL::GetFileName(void) { MyString filename = ""; const char *slash = strrchr(_path, '/'); if (!slash) { slash = strrchr(_path, '\\'); } if (slash) { filename += slash + 1; } else { filename += _path; } return filename; } MyString URL::RelativeTo(const char *parentURL) { #ifdef WIN32 if (!sameDrive(_url, parentURL)) return _url; // if (getDriveLetter(_path)) // return _url; #endif URL parent(parentURL); MyString ret; const char *c, *p; if (_scheme == parent._scheme && !strcasecmp(_hostname, parent._hostname) && _port == parent._port) { for (p = parent._path, c = _path; *p && *p == *c; p++, c++) { int doNothing = 0; } if (p == parent._path + 1) return _path; if (*p && *c == '/') c--; #ifdef WIN32 // test for paths starting with "Driveletter:" if ((c == _path) && (c[0]!=0) && (c[1]==':')) return(_url); #endif while ((p = strchr(p, '/')) != NULL) { ret += "../"; p++; } while (c > (const char *) _path && *c != '/') { c--; } ret += c + 1; return ret; } else { return _url; } } MyString URL::ToPath() const { MyString path; #ifdef WIN32 if (_hostname.length() != 0) { path = "\\\\"; path += _hostname; path += "\\"; } else { path = ""; if ((_path[0]!='/') && (_path[0]!='|')) path += _path[0]; } for (const char *c = _path + 1; *c; c++) { switch (*c) { case '/': path += '\\'; break; case '|': path += ':'; break; default: path += *c; break; } } return path; #else return _path; #endif } void URL::SimplifyPath() { const char *s1 = _path; char *buf = new char [strlen(s1) + 1]; char *s2 = buf; *s2 = '\0'; while (*s1) { #ifdef WIN32 // finding /D: means D:windowspath/something if (s1[2] == ':') { _path = strdup(s1 + 1); return; } #endif if (!strncmp(s1, "/./", 3)) { s1 += 2; } else { do { *s2++ = *s1++; } while (*s1 && *s1 != '/'); } *s2 = '\0'; } _path = strdup(buf); delete [] buf; } void URL::FromPath(const char *path) { #ifdef WIN32 char drive[_MAX_DRIVE]; char dir[_MAX_DIR]; char fname[_MAX_FNAME]; char ext[_MAX_EXT]; _splitpath(path, drive, dir, fname, ext); _url = ""; _scheme = "file"; _hostname = ""; _port = 80; _path = '/'; _path += drive[0]; _path += '|'; for (const char *p = dir; *p; p++) { if (*p == '\\') { _path += '/'; } else { _path += *p; } } _path += fname; _path += ext; Rebuild(); #else _scheme = "file"; _hostname = ""; _port = 80; if (path[0] == '/') { _path = path; } else { char *dir, buf[1024]; if ((dir = getenv("PWD")) != NULL) { mystrncpy_secure(buf, dir, 1024); } else { getcwd(buf, 1024); } _path = buf; _path += '/'; _path += path; SimplifyPath(); } Rebuild(); #endif } bool URL::notJavascript(void) { return ::notJavascript(_url); } bool URL::notURN(void) { return ::notURN(_url); } const MyString & URL::GetDir() { static MyString ret; int len; for (len=_path.length()-1;len>=0;len--) if ((_path[len]=='/') || (_path[len]=='\\')) break; if (len<0) ret=_path; else { ret=""; for (int i=0;i