// --------------------------------------------------------------------------- // - Uri.cpp - // - afnix:www module - uri class implementation - // --------------------------------------------------------------------------- // - This program is free software; you can redistribute it and/or modify - // - it provided that this copyright notice is kept intact. - // - - // - This program is distributed in the hope that it will be useful, but - // - without any warranty; without even the implied warranty of - // - merchantability or fitness for a particular purpose. In no event shall - // - the copyright holder be liable for any direct, indirect, incidental or - // - special damages arising in any way out of the use of this software. - // --------------------------------------------------------------------------- // - copyright (c) 1999-2007 amaury darsch - // --------------------------------------------------------------------------- #include "Uri.hpp" #include "Ascii.hpp" #include "Regex.hpp" #include "Vector.hpp" #include "Strvec.hpp" #include "Buffer.hpp" #include "Utility.hpp" #include "Integer.hpp" #include "Unicode.hpp" #include "Pathname.hpp" #include "QuarkZone.hpp" #include "Exception.hpp" namespace afnix { // ------------------------------------------------------------------------- // - private section - // ------------------------------------------------------------------------- // the default ftp port static const long URI_XFTP_PORT = 21; // the default smtp port static const long URI_SMTP_PORT = 25; // the default http port static const long URI_HTTP_PORT = 80; // the default https port static const long URI_HTPS_PORT = 443; // this procedure normalize a path static String normalize_path (const String& path) { // check for special case if (path.isnil () == true) return path; if (path == "/") return path; if (path.first () != Unicode::toquad ('/')) return path; // extract the path components Strvec svec = Strvec::split (path, "/"); long slen = svec.length (); if (slen < 2) return path; // rebuild a new vector path Strvec rvec; for (long i = 0; i < slen ; i++) { // get the string element String elem = svec.get (i); // check for special case if (elem.isnil () == true) { if (i == slen-1) rvec.add (elem); continue; } if (elem == ".") continue; if (elem == "..") { if (rvec.empty () == false) rvec.rml (); continue; } // save in result vector rvec.add (elem); } // rebuild result path long rlen = rvec.length (); if (rlen == 0) return "/"; String result = ""; for (long i = 0; i < rlen; i++) { result += "/"; result += rvec.get (i); } // evntually fix the last / if (path.last () == Unicode::toquad ('/')) result += "/"; return result; } // this procedure get the base name from a path if it does not finish // with a slash - so it can be used as a base for adding a path static String get_base_path (const String& name) { if (name.last () == Unicode::toquad ('/')) return name; // get name length and check long len = name.length (); if (len == 0) return name; // find the first slash index for (long i = len - 1; i >= 0; i--) { if (name[i] == Unicode::toquad ('/')) { return name.lsubstr (i); } } return name; } // get a system path from a uri path static String get_sys_path (const String& path) { //check that the path starts with a '/' character if (path.first () != Unicode::toquad ('/')) { throw Exception ("path-error", "invalid uri path to convert", path); } // check special case with root only if (path == "/") return System::rootdir (); // split the path Strvec svec = Strvec::split (path, "/"); if (svec.length () < 2) { throw Exception ("internal-error", "cannot convert uri path", path); } // build first component String result = System::rootdir (); // add the remaining elements long slen = svec.length (); for (long i = 1; i < slen; i++) { result = System::join (result, svec.get (i)); } return Uri::pencode (result); } // this procedure returns a authority host static String get_auth_host (const String& auth) { // check for nil name if (auth.isnil () == true) return auth; // the authority regex Regex re ("(<$a-._~>*)[:$d+]?"); if (re == auth) return re.getstr (0); // invalid authority throw Exception ("uri-error", "invalid authority name", auth); } // this procedure returns a mail host static String get_mail_host (const String& path) { // check for nil name if (path.isnil () == true) return path; // the authority regex Regex re ("<$a+-_.%:>*@(<$a-._~>*)"); if (re == path) return re.getstr (0); // invalid authority throw Exception ("uri-error", "invalid mail path name", path); } // this procedure returns a auth port if any static long get_auth_port (const String& auth) { // check for nil name if (auth.isnil () == true) return -1; // the authority regex Regex re ("<$a-._~>*:($d+)"); if (re == auth) return Utility::tointeger (re.getstr (0)); return -1; } // ------------------------------------------------------------------------- // - public section - // ------------------------------------------------------------------------- // percent-encode a uri name String Uri::pencode (const String& name) { // do nothing with nil string if (name.isnil () == true) return name; // encode the string char* cbuf = name.encode (); try { // get the buffer length long clen = Ascii::strlen (cbuf); // create a buffer and loop Buffer buf; for (long i = 0; i < clen; i++) { char c = cbuf[i]; if (c == blkc) { buf.add ('+'); continue; } if (c == '\'') { buf.add ('%'); buf.add (Ascii::btoc (c, false)); buf.add (Ascii::btoc (c, true)); continue; } if (c == '"') { buf.add ('%'); buf.add (Ascii::btoc (c, false)); buf.add (Ascii::btoc (c, true)); continue; } if (Ascii::isascii (c) == true) { buf.add (c); } else { buf.add ('%'); buf.add (Ascii::btoc (c, false)); buf.add (Ascii::btoc (c, true)); } } delete [] cbuf; return buf.tostring (); } catch (...) { delete [] cbuf; throw; } } // percent-decode a uri name String Uri::pdecode (const String& name) { // check for nil name if (name.isnil () == true) return name; // create a working buffer Buffer buf; // get the name length and loop long len = name.length (); for (long i = 0; i < len; i++) { t_quad c = name[i]; if (c == Unicode::toquad ('+')) { buf.add (' '); continue; } if (c == Unicode::toquad ('%')) { t_byte val = nilc; if (++i < len) val = Ascii::htob (name[i]) << 4; if (++i < len) val += Ascii::htob (name[i]); if (val == nilc) { throw Exception ("uri-error", "unterminated escape sequence"); } buf.add ((char) val); continue; } if (Unicode::isascii (c) == false) { throw Exception ("uri-error", "non-ascii uri sequence to decode", name); } buf.add (c); } return buf.tostring (); } // normalize the uri name by looking at various scheme for incomplete name String Uri::nrmname (const String& name) { // check if we have a scheme Regex sre ("($l$a*):($N+)"); if (sre == name) return name; // check if we have a qualified host Regex hre ("[<$a-_~>+.]+$s+[/$N*]?"); if (hre == name) { String result = "http://"; result += name; return result; } // check if we have a qualified host with // Regex qre ("//[<$a-_~>+.]+$s+[/$N*]?"); if (qre == name) { String result = "http:"; result += name; return result; } // here we can assume that we have a file since there is no scheme and // host name - we just check for an absolute path if (name.first () == Unicode::toquad ('/')) { String result = "file://"; result += name; return result; } // here there is nothing we can do - so return the original name return name; } // normalize the uri name by prioritizing the system name String Uri::sysname (const String& name) { // check if we have a scheme Regex sre ("($l$a*):($N+)"); if (sre == name) return name; // check if we have a system path Pathname path = name; String pnam = path.getsysp (); if (System::isfile (pnam) == true) { String result = "file://"; result += pnam; return result; } // go back to the regular normalization return Uri::nrmname (name); } // ------------------------------------------------------------------------- // - class section - // ------------------------------------------------------------------------- // create a default uri Uri::Uri (void) { reset (); } // create a uri by name Uri::Uri (const String& uri) { reset (); parse (uri); } // copy constructs this uri Uri::Uri (const Uri& that) { that.rdlock (); d_schm = that.d_schm; d_amrk = that.d_amrk; d_auth = that.d_auth; d_path = that.d_path; d_query = that.d_query; d_frag = that.d_frag; that.unlock (); } // asssign an uri to this one Uri& Uri::operator = (const Uri& that) { wrlock (); that.rdlock (); d_schm = that.d_schm; d_amrk = that.d_amrk; d_auth = that.d_auth; d_path = that.d_path; d_query = that.d_query; d_frag = that.d_frag; that.unlock (); unlock (); return *this; } // return the class name String Uri::repr (void) const { return "Uri"; } // get a clone of this object Object* Uri::clone (void) const { return new Uri (*this); } // reset an uri information void Uri::reset (void) { wrlock (); d_schm = ""; d_amrk = ""; d_auth = ""; d_path = ""; d_query = ""; d_frag = ""; unlock (); } // get the uri original name String Uri::getname (void) const { rdlock (); try { String result = d_schm; result += ":"; result += d_amrk; result += d_auth; result += d_path; // add optional query if (d_query.isnil () == false) { result += '?'; result += d_query; } // add optional fragment if (d_frag.isnil () == false) { result += '#'; result += d_frag; } unlock (); return result; } catch (...) { unlock (); throw; } } // parse an uri and update the uri data structure void Uri::parse (const String& uri) { // check for nothing first if (uri.isnil () == true) { reset (); return; } // ok let's deal with it wrlock (); try { // reset everythnig reset (); // percent encode the string String puri = Uri::pencode (uri); // the reminding string String rest; // extract the scheme Regex rsch ("($l$a*):($N+)"); if (rsch == puri) { d_schm = rsch.getstr (0); rest = rsch.getstr (1); } else { throw Exception ("uri-error", "empty scheme uri name", uri); } // now extract the authority Regex raut ("//(<$a-._~>*[:$d+]?)(?$N*)"); if (raut == rest) { // extract the authority d_amrk = "//"; d_auth = raut.getstr (0); rest = raut.getstr (1); // now extract the path Regex rpth ("(/<$a+-_.%:@,/'~()>*)(?$N*)"); if (rpth == rest) { d_path = rpth.getstr (0); rest = rpth.getstr (1); } } else { // now extract the path Regex rpth ("(<$a+-_.%:@,'~()>*)(?$N*)"); if (rpth == rest) { d_path = rpth.getstr (0); rest = rpth.getstr (1); } } // now extract the query Regex rqry ("$?(<$a+-_.%/=;&:,@>*)(<$#>?$N*)"); if (rqry == rest) { d_query = rqry.getstr (0); rest = rqry.getstr (1); } // now extract the fragment Regex rfrg ("#($N*)"); if (rfrg == rest) { d_frag = rfrg.getstr (0); rest = ""; } if (rest.length () != 0) { throw Exception ("uri-error", "incorrect uri format", uri); } } catch (...) { reset (); unlock (); throw; } } // normalize the uri authority void Uri::nrmauth (void) { wrlock (); try { d_auth = d_auth.tolower (); unlock (); } catch (...) { unlock (); throw; } } // return the uri scheme String Uri::getscheme (void) const { rdlock (); String result = d_schm; unlock (); return result; } // return the uri authority String Uri::getauth (void) const { rdlock (); String result = d_auth; unlock (); return result; } // return the uri path String Uri::getpath (void) const { rdlock (); try { String result = Uri::pdecode (d_path); if (d_amrk == "//") result = normalize_path (result); unlock (); return result; } catch (...) { unlock (); throw; } } // return the uri query String Uri::getquery (void) const { rdlock (); String result = d_query; unlock (); return result; } // return the uri fragment String Uri::getfragment (void) const { rdlock (); String result = Uri::pdecode (d_frag); unlock (); return result; } // get the uri base name (scheme+authority) String Uri::getbase (void) const { rdlock (); try { String result = d_schm; result += ":"; result += d_amrk; result += d_auth; unlock (); return result; } catch (...) { unlock (); throw; } } // get the uri rname (scheme+authority+path percent encoded) String Uri::getrnam (void) const { rdlock (); try { String result = getbase (); if ((d_path.isnil () == true) && (d_amrk == "//")) { result += "/"; } else { if (d_amrk == "//") { result += normalize_path (d_path); } else { result += d_path; } } unlock (); return result; } catch (...) { unlock (); throw; } } // get the uri hname (scheme+authority+path) String Uri::gethnam (void) const { rdlock (); try { String result = Uri::pdecode (getrnam ()); unlock (); return result; } catch (...) { unlock (); throw; } } // get the path target name String Uri::getptnm (void) const { rdlock (); try { // get the path name String path = getpath (); if (path.isnil () == true) { unlock (); return path; } // split the path into element Strvec svec = Strvec::split (path, "/"); long slen = svec.length (); if (slen == 0) { unlock (); return path; } // here is the result String result = svec.get (slen-1); unlock (); return result; } catch (...) { unlock (); throw; } } // add a path to this uri Uri Uri::addpath (const String& path) const { rdlock (); try { // fix the path String epth = Uri::pencode (path); // check if the path is valid Regex re ("([<$a+-_.%/,@>+]?)[$?$N+]?[$#$N+]?"); if (re != epth) { throw Exception ("path-error", "invalid path to add to uri", path); } String rpth = re.getstr (0); if (rpth.isnil () == true) { Uri result = *this; unlock (); return result; } // rebuild the uri String uri = getbase (); // check if have an absolute path if (rpth.first () == Unicode::toquad ('/')) { uri += rpth; } else { // add the base path uri += get_base_path (d_path); // check if we add a slash if (uri.last () == Unicode::toquad ('/')) { if (rpth.first () == Unicode::toquad ('/')) { uri += rpth.rsubstr (1); } else { uri += rpth; } } else { if (rpth.first () == Unicode::toquad ('/')) { uri += rpth; } else { uri += '/'; uri += rpth; } } // add optional query if (d_query.isnil () == false) { uri += '?'; uri += d_query; } // add optional fragment if (d_frag.isnil () == false) { uri += '?'; uri += d_frag; } } // build the result uri Uri result = uri; unlock (); return result; } catch (...) { unlock (); throw; } } // get a new uri by path - if the path is a uri the new uri is return // otherwise, a new uri path is built Uri Uri::gethref (const String& path) const { rdlock (); try { // check for nil first if (path.isnil () == true) { Uri result = *this; unlock (); return result; } // select from path scheme Regex rsch ("($l$a*):($N+)"); Uri result = (rsch == path) ? path : addpath (path); unlock (); return result; } catch (...) { unlock (); throw; } } // get a system path if the scheme is a file String Uri::getsysp (void) const { rdlock (); try { // check that we have a file scheme if (d_schm != "file") { throw Exception ("scheme-error", "cannot convert to system path without file scheme"); } String result = get_sys_path (getpath ()); unlock (); return result; } catch (...) { unlock (); throw; } } // get the path encoded name String Uri::getpenc (void) const { rdlock (); try { String result = d_path; unlock (); return result; } catch (...) { unlock (); throw; } } // get the uri host if any String Uri::gethost (void) const { rdlock (); try { // initialize result String result; // select host from the scheme if (d_schm == "ftp") result = get_auth_host (d_auth); if (d_schm == "http") result = get_auth_host (d_auth); if (d_schm == "https") result = get_auth_host (d_auth); if (d_schm == "mailto") result = get_mail_host (d_path); unlock (); return result; } catch (...) { unlock (); throw; } } // get the uri port if any long Uri::getport (void) const { rdlock (); try { long result = -1; // select host from the scheme if (d_schm == "ftp") { result = get_auth_port (d_auth); if (result == -1) result = URI_XFTP_PORT; } if (d_schm == "http") { result = get_auth_port (d_auth); if (result == -1) result = URI_HTTP_PORT; } if (d_schm == "https") { result = get_auth_port (d_auth); if (result == -1) result = URI_HTPS_PORT; } if (d_schm == "mailto") result = URI_SMTP_PORT; if (result == -1) { throw Exception ("uri-error", "invalid uri port access", getname ()); } unlock (); return result; } catch (...) { unlock (); throw; } } // ------------------------------------------------------------------------- // - object section - // ------------------------------------------------------------------------- // the quark zone static const long QUARK_ZONE_LENGTH = 18; static QuarkZone zone (QUARK_ZONE_LENGTH); // the uri supported quarks static const long QUARK_PARSE = zone.intern ("parse"); static const long QUARK_GETNAME = zone.intern ("get-name"); static const long QUARK_GETBASE = zone.intern ("get-base"); static const long QUARK_GETRNAM = zone.intern ("get-rname"); static const long QUARK_GETHNAM = zone.intern ("get-hname"); static const long QUARK_ADDPATH = zone.intern ("add-path"); static const long QUARK_GETHREF = zone.intern ("get-href"); static const long QUARK_GETAUTH = zone.intern ("get-authority"); static const long QUARK_GETPATH = zone.intern ("get-path"); static const long QUARK_GETPTNM = zone.intern ("get-path-target"); static const long QUARK_GETSYST = zone.intern ("get-system-path"); static const long QUARK_GETPENC = zone.intern ("get-path-encoded"); static const long QUARK_GETHOST = zone.intern ("get-host"); static const long QUARK_GETPORT = zone.intern ("get-port"); static const long QUARK_NRMAUTH = zone.intern ("normalize-authority"); static const long QUARK_GETQUERY = zone.intern ("get-query"); static const long QUARK_GETSCHEME = zone.intern ("get-scheme"); static const long QUARK_GETFRAGMENT = zone.intern ("get-fragment"); // create a new object in a generic way Object* Uri::mknew (Vector* argv) { long argc = (argv == nilp) ? 0 : argv->length (); // check for 0 argument if (argc == 0) return new Uri; // check for 1 argument if (argc == 1) { String uri = argv->getstring (0); return new Uri (uri); } // invalid arguments throw Exception ("argument-error", "invalid arguments with uri object"); } // return true if the given quark is defined bool Uri::isquark (const long quark, const bool hflg) const { rdlock (); if (zone.exists (quark) == true){ unlock (); return true; } bool result = hflg ? Nameable::isquark (quark, hflg) : false; unlock (); return result; } // apply this object with a set of arguments and a quark Object* Uri::apply (Runnable* robj, Nameset* nset, const long quark, Vector* argv) { // get the number of arguments long argc = (argv == nilp) ? 0 : argv->length (); // dispatch 0 argument if (argc == 0) { if (quark == QUARK_GETNAME) return new String (getname ()); if (quark == QUARK_GETBASE) return new String (getbase ()); if (quark == QUARK_GETRNAM) return new String (getrnam ()); if (quark == QUARK_GETHNAM) return new String (gethnam ()); if (quark == QUARK_GETAUTH) return new String (getauth ()); if (quark == QUARK_GETPATH) return new String (getpath ()); if (quark == QUARK_GETPTNM) return new String (getptnm ()); if (quark == QUARK_GETSYST) return new String (getsysp ()); if (quark == QUARK_GETPENC) return new String (getpenc ()); if (quark == QUARK_GETHOST) return new String (gethost ()); if (quark == QUARK_GETPORT) return new Integer (getport ()); if (quark == QUARK_GETQUERY) return new String (getquery ()); if (quark == QUARK_GETSCHEME) return new String (getscheme ()); if (quark == QUARK_GETFRAGMENT) return new String (getfragment ()); if (quark == QUARK_NRMAUTH) { nrmauth (); return nilp; } } // dispatch 1 argument if (argc == 1) { if (quark == QUARK_PARSE) { parse (argv->getstring (0)); return nilp; } if (quark == QUARK_ADDPATH) { String path = argv->getstring (0); return new Uri (addpath (path)); } if (quark == QUARK_GETHREF) { String path = argv->getstring (0); return new Uri (gethref (path)); } } // call the nameable object return Nameable::apply (robj, nset, quark, argv); } }