// ---------------------------------------------------------------------------
// - Uri.cpp -
// - afnix:www module - uri class implementation -
// ---------------------------------------------------------------------------
// - This program is free software; you can redistribute it and/or modify -
// - it provided that this copyright notice is kept intact. -
// - -
// - This program is distributed in the hope that it will be useful, but -
// - without any warranty; without even the implied warranty of -
// - merchantability or fitness for a particular purpose. In no event shall -
// - the copyright holder be liable for any direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software. -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch -
// ---------------------------------------------------------------------------
#include "Uri.hpp"
#include "Ascii.hpp"
#include "Regex.hpp"
#include "Vector.hpp"
#include "Strvec.hpp"
#include "Buffer.hpp"
#include "Utility.hpp"
#include "Integer.hpp"
#include "Unicode.hpp"
#include "Pathname.hpp"
#include "QuarkZone.hpp"
#include "Exception.hpp"
namespace afnix {
// -------------------------------------------------------------------------
// - private section -
// -------------------------------------------------------------------------
// the default ftp port
static const long URI_XFTP_PORT = 21;
// the default smtp port
static const long URI_SMTP_PORT = 25;
// the default http port
static const long URI_HTTP_PORT = 80;
// the default https port
static const long URI_HTPS_PORT = 443;
// this procedure normalize a path
static String normalize_path (const String& path) {
// check for special case
if (path.isnil () == true) return path;
if (path == "/") return path;
if (path.first () != Unicode::toquad ('/')) return path;
// extract the path components
Strvec svec = Strvec::split (path, "/");
long slen = svec.length ();
if (slen < 2) return path;
// rebuild a new vector path
Strvec rvec;
for (long i = 0; i < slen ; i++) {
// get the string element
String elem = svec.get (i);
// check for special case
if (elem.isnil () == true) {
if (i == slen-1) rvec.add (elem);
continue;
}
if (elem == ".") continue;
if (elem == "..") {
if (rvec.empty () == false) rvec.rml ();
continue;
}
// save in result vector
rvec.add (elem);
}
// rebuild result path
long rlen = rvec.length ();
if (rlen == 0) return "/";
String result = "";
for (long i = 0; i < rlen; i++) {
result += "/";
result += rvec.get (i);
}
// evntually fix the last /
if (path.last () == Unicode::toquad ('/')) result += "/";
return result;
}
// this procedure get the base name from a path if it does not finish
// with a slash - so it can be used as a base for adding a path
static String get_base_path (const String& name) {
if (name.last () == Unicode::toquad ('/')) return name;
// get name length and check
long len = name.length ();
if (len == 0) return name;
// find the first slash index
for (long i = len - 1; i >= 0; i--) {
if (name[i] == Unicode::toquad ('/')) {
return name.lsubstr (i);
}
}
return name;
}
// get a system path from a uri path
static String get_sys_path (const String& path) {
//check that the path starts with a '/' character
if (path.first () != Unicode::toquad ('/')) {
throw Exception ("path-error", "invalid uri path to convert", path);
}
// check special case with root only
if (path == "/") return System::rootdir ();
// split the path
Strvec svec = Strvec::split (path, "/");
if (svec.length () < 2) {
throw Exception ("internal-error", "cannot convert uri path", path);
}
// build first component
String result = System::rootdir ();
// add the remaining elements
long slen = svec.length ();
for (long i = 1; i < slen; i++) {
result = System::join (result, svec.get (i));
}
return Uri::pencode (result);
}
// this procedure returns a authority host
static String get_auth_host (const String& auth) {
// check for nil name
if (auth.isnil () == true) return auth;
// the authority regex
Regex re ("(<$a-._~>*)[:$d+]?");
if (re == auth) return re.getstr (0);
// invalid authority
throw Exception ("uri-error", "invalid authority name", auth);
}
// this procedure returns a mail host
static String get_mail_host (const String& path) {
// check for nil name
if (path.isnil () == true) return path;
// the authority regex
Regex re ("<$a+-_.%:>*@(<$a-._~>*)");
if (re == path) return re.getstr (0);
// invalid authority
throw Exception ("uri-error", "invalid mail path name", path);
}
// this procedure returns a auth port if any
static long get_auth_port (const String& auth) {
// check for nil name
if (auth.isnil () == true) return -1;
// the authority regex
Regex re ("<$a-._~>*:($d+)");
if (re == auth) return Utility::tointeger (re.getstr (0));
return -1;
}
// -------------------------------------------------------------------------
// - public section -
// -------------------------------------------------------------------------
// percent-encode a uri name
String Uri::pencode (const String& name) {
// do nothing with nil string
if (name.isnil () == true) return name;
// encode the string
char* cbuf = name.encode ();
try {
// get the buffer length
long clen = Ascii::strlen (cbuf);
// create a buffer and loop
Buffer buf;
for (long i = 0; i < clen; i++) {
char c = cbuf[i];
if (c == blkc) {
buf.add ('+');
continue;
}
if (c == '\'') {
buf.add ('%');
buf.add (Ascii::btoc (c, false));
buf.add (Ascii::btoc (c, true));
continue;
}
if (c == '"') {
buf.add ('%');
buf.add (Ascii::btoc (c, false));
buf.add (Ascii::btoc (c, true));
continue;
}
if (Ascii::isascii (c) == true) {
buf.add (c);
} else {
buf.add ('%');
buf.add (Ascii::btoc (c, false));
buf.add (Ascii::btoc (c, true));
}
}
delete [] cbuf;
return buf.tostring ();
} catch (...) {
delete [] cbuf;
throw;
}
}
// percent-decode a uri name
String Uri::pdecode (const String& name) {
// check for nil name
if (name.isnil () == true) return name;
// create a working buffer
Buffer buf;
// get the name length and loop
long len = name.length ();
for (long i = 0; i < len; i++) {
t_quad c = name[i];
if (c == Unicode::toquad ('+')) {
buf.add (' ');
continue;
}
if (c == Unicode::toquad ('%')) {
t_byte val = nilc;
if (++i < len) val = Ascii::htob (name[i]) << 4;
if (++i < len) val += Ascii::htob (name[i]);
if (val == nilc) {
throw Exception ("uri-error", "unterminated escape sequence");
}
buf.add ((char) val);
continue;
}
if (Unicode::isascii (c) == false) {
throw Exception ("uri-error",
"non-ascii uri sequence to decode", name);
}
buf.add (c);
}
return buf.tostring ();
}
// normalize the uri name by looking at various scheme for incomplete name
String Uri::nrmname (const String& name) {
// check if we have a scheme
Regex sre ("($l$a*):($N+)");
if (sre == name) return name;
// check if we have a qualified host
Regex hre ("[<$a-_~>+.]+$s+[/$N*]?");
if (hre == name) {
String result = "http://";
result += name;
return result;
}
// check if we have a qualified host with //
Regex qre ("//[<$a-_~>+.]+$s+[/$N*]?");
if (qre == name) {
String result = "http:";
result += name;
return result;
}
// here we can assume that we have a file since there is no scheme and
// host name - we just check for an absolute path
if (name.first () == Unicode::toquad ('/')) {
String result = "file://";
result += name;
return result;
}
// here there is nothing we can do - so return the original name
return name;
}
// normalize the uri name by prioritizing the system name
String Uri::sysname (const String& name) {
// check if we have a scheme
Regex sre ("($l$a*):($N+)");
if (sre == name) return name;
// check if we have a system path
Pathname path = name;
String pnam = path.getsysp ();
if (System::isfile (pnam) == true) {
String result = "file://";
result += pnam;
return result;
}
// go back to the regular normalization
return Uri::nrmname (name);
}
// -------------------------------------------------------------------------
// - class section -
// -------------------------------------------------------------------------
// create a default uri
Uri::Uri (void) {
reset ();
}
// create a uri by name
Uri::Uri (const String& uri) {
reset ();
parse (uri);
}
// copy constructs this uri
Uri::Uri (const Uri& that) {
that.rdlock ();
d_schm = that.d_schm;
d_amrk = that.d_amrk;
d_auth = that.d_auth;
d_path = that.d_path;
d_query = that.d_query;
d_frag = that.d_frag;
that.unlock ();
}
// asssign an uri to this one
Uri& Uri::operator = (const Uri& that) {
wrlock ();
that.rdlock ();
d_schm = that.d_schm;
d_amrk = that.d_amrk;
d_auth = that.d_auth;
d_path = that.d_path;
d_query = that.d_query;
d_frag = that.d_frag;
that.unlock ();
unlock ();
return *this;
}
// return the class name
String Uri::repr (void) const {
return "Uri";
}
// get a clone of this object
Object* Uri::clone (void) const {
return new Uri (*this);
}
// reset an uri information
void Uri::reset (void) {
wrlock ();
d_schm = "";
d_amrk = "";
d_auth = "";
d_path = "";
d_query = "";
d_frag = "";
unlock ();
}
// get the uri original name
String Uri::getname (void) const {
rdlock ();
try {
String result = d_schm;
result += ":";
result += d_amrk;
result += d_auth;
result += d_path;
// add optional query
if (d_query.isnil () == false) {
result += '?';
result += d_query;
}
// add optional fragment
if (d_frag.isnil () == false) {
result += '#';
result += d_frag;
}
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// parse an uri and update the uri data structure
void Uri::parse (const String& uri) {
// check for nothing first
if (uri.isnil () == true) {
reset ();
return;
}
// ok let's deal with it
wrlock ();
try {
// reset everythnig
reset ();
// percent encode the string
String puri = Uri::pencode (uri);
// the reminding string
String rest;
// extract the scheme
Regex rsch ("($l$a*):($N+)");
if (rsch == puri) {
d_schm = rsch.getstr (0);
rest = rsch.getstr (1);
} else {
throw Exception ("uri-error", "empty scheme uri name", uri);
}
// now extract the authority
Regex raut ("//(<$a-._~>*[:$d+]?)(</?#>?$N*)");
if (raut == rest) {
// extract the authority
d_amrk = "//";
d_auth = raut.getstr (0);
rest = raut.getstr (1);
// now extract the path
Regex rpth ("(/<$a+-_.%:@,/'~()>*)(<?#>?$N*)");
if (rpth == rest) {
d_path = rpth.getstr (0);
rest = rpth.getstr (1);
}
} else {
// now extract the path
Regex rpth ("(<$a+-_.%:@,'~()>*)(<?#>?$N*)");
if (rpth == rest) {
d_path = rpth.getstr (0);
rest = rpth.getstr (1);
}
}
// now extract the query
Regex rqry ("$?(<$a+-_.%/=;&:,@>*)(<$#>?$N*)");
if (rqry == rest) {
d_query = rqry.getstr (0);
rest = rqry.getstr (1);
}
// now extract the fragment
Regex rfrg ("#($N*)");
if (rfrg == rest) {
d_frag = rfrg.getstr (0);
rest = "";
}
if (rest.length () != 0) {
throw Exception ("uri-error", "incorrect uri format", uri);
}
} catch (...) {
reset ();
unlock ();
throw;
}
}
// normalize the uri authority
void Uri::nrmauth (void) {
wrlock ();
try {
d_auth = d_auth.tolower ();
unlock ();
} catch (...) {
unlock ();
throw;
}
}
// return the uri scheme
String Uri::getscheme (void) const {
rdlock ();
String result = d_schm;
unlock ();
return result;
}
// return the uri authority
String Uri::getauth (void) const {
rdlock ();
String result = d_auth;
unlock ();
return result;
}
// return the uri path
String Uri::getpath (void) const {
rdlock ();
try {
String result = Uri::pdecode (d_path);
if (d_amrk == "//") result = normalize_path (result);
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// return the uri query
String Uri::getquery (void) const {
rdlock ();
String result = d_query;
unlock ();
return result;
}
// return the uri fragment
String Uri::getfragment (void) const {
rdlock ();
String result = Uri::pdecode (d_frag);
unlock ();
return result;
}
// get the uri base name (scheme+authority)
String Uri::getbase (void) const {
rdlock ();
try {
String result = d_schm;
result += ":";
result += d_amrk;
result += d_auth;
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the uri rname (scheme+authority+path percent encoded)
String Uri::getrnam (void) const {
rdlock ();
try {
String result = getbase ();
if ((d_path.isnil () == true) && (d_amrk == "//")) {
result += "/";
} else {
if (d_amrk == "//") {
result += normalize_path (d_path);
} else {
result += d_path;
}
}
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the uri hname (scheme+authority+path)
String Uri::gethnam (void) const {
rdlock ();
try {
String result = Uri::pdecode (getrnam ());
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the path target name
String Uri::getptnm (void) const {
rdlock ();
try {
// get the path name
String path = getpath ();
if (path.isnil () == true) {
unlock ();
return path;
}
// split the path into element
Strvec svec = Strvec::split (path, "/");
long slen = svec.length ();
if (slen == 0) {
unlock ();
return path;
}
// here is the result
String result = svec.get (slen-1);
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// add a path to this uri
Uri Uri::addpath (const String& path) const {
rdlock ();
try {
// fix the path
String epth = Uri::pencode (path);
// check if the path is valid
Regex re ("([<$a+-_.%/,@>+]?)[$?$N+]?[$#$N+]?");
if (re != epth) {
throw Exception ("path-error", "invalid path to add to uri", path);
}
String rpth = re.getstr (0);
if (rpth.isnil () == true) {
Uri result = *this;
unlock ();
return result;
}
// rebuild the uri
String uri = getbase ();
// check if have an absolute path
if (rpth.first () == Unicode::toquad ('/')) {
uri += rpth;
} else {
// add the base path
uri += get_base_path (d_path);
// check if we add a slash
if (uri.last () == Unicode::toquad ('/')) {
if (rpth.first () == Unicode::toquad ('/')) {
uri += rpth.rsubstr (1);
} else {
uri += rpth;
}
} else {
if (rpth.first () == Unicode::toquad ('/')) {
uri += rpth;
} else {
uri += '/';
uri += rpth;
}
}
// add optional query
if (d_query.isnil () == false) {
uri += '?';
uri += d_query;
}
// add optional fragment
if (d_frag.isnil () == false) {
uri += '?';
uri += d_frag;
}
}
// build the result uri
Uri result = uri;
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get a new uri by path - if the path is a uri the new uri is return
// otherwise, a new uri path is built
Uri Uri::gethref (const String& path) const {
rdlock ();
try {
// check for nil first
if (path.isnil () == true) {
Uri result = *this;
unlock ();
return result;
}
// select from path scheme
Regex rsch ("($l$a*):($N+)");
Uri result = (rsch == path) ? path : addpath (path);
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get a system path if the scheme is a file
String Uri::getsysp (void) const {
rdlock ();
try {
// check that we have a file scheme
if (d_schm != "file") {
throw Exception ("scheme-error",
"cannot convert to system path without file scheme");
}
String result = get_sys_path (getpath ());
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the path encoded name
String Uri::getpenc (void) const {
rdlock ();
try {
String result = d_path;
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the uri host if any
String Uri::gethost (void) const {
rdlock ();
try {
// initialize result
String result;
// select host from the scheme
if (d_schm == "ftp") result = get_auth_host (d_auth);
if (d_schm == "http") result = get_auth_host (d_auth);
if (d_schm == "https") result = get_auth_host (d_auth);
if (d_schm == "mailto") result = get_mail_host (d_path);
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// get the uri port if any
long Uri::getport (void) const {
rdlock ();
try {
long result = -1;
// select host from the scheme
if (d_schm == "ftp") {
result = get_auth_port (d_auth);
if (result == -1) result = URI_XFTP_PORT;
}
if (d_schm == "http") {
result = get_auth_port (d_auth);
if (result == -1) result = URI_HTTP_PORT;
}
if (d_schm == "https") {
result = get_auth_port (d_auth);
if (result == -1) result = URI_HTPS_PORT;
}
if (d_schm == "mailto") result = URI_SMTP_PORT;
if (result == -1) {
throw Exception ("uri-error", "invalid uri port access", getname ());
}
unlock ();
return result;
} catch (...) {
unlock ();
throw;
}
}
// -------------------------------------------------------------------------
// - object section -
// -------------------------------------------------------------------------
// the quark zone
static const long QUARK_ZONE_LENGTH = 18;
static QuarkZone zone (QUARK_ZONE_LENGTH);
// the uri supported quarks
static const long QUARK_PARSE = zone.intern ("parse");
static const long QUARK_GETNAME = zone.intern ("get-name");
static const long QUARK_GETBASE = zone.intern ("get-base");
static const long QUARK_GETRNAM = zone.intern ("get-rname");
static const long QUARK_GETHNAM = zone.intern ("get-hname");
static const long QUARK_ADDPATH = zone.intern ("add-path");
static const long QUARK_GETHREF = zone.intern ("get-href");
static const long QUARK_GETAUTH = zone.intern ("get-authority");
static const long QUARK_GETPATH = zone.intern ("get-path");
static const long QUARK_GETPTNM = zone.intern ("get-path-target");
static const long QUARK_GETSYST = zone.intern ("get-system-path");
static const long QUARK_GETPENC = zone.intern ("get-path-encoded");
static const long QUARK_GETHOST = zone.intern ("get-host");
static const long QUARK_GETPORT = zone.intern ("get-port");
static const long QUARK_NRMAUTH = zone.intern ("normalize-authority");
static const long QUARK_GETQUERY = zone.intern ("get-query");
static const long QUARK_GETSCHEME = zone.intern ("get-scheme");
static const long QUARK_GETFRAGMENT = zone.intern ("get-fragment");
// create a new object in a generic way
Object* Uri::mknew (Vector* argv) {
long argc = (argv == nilp) ? 0 : argv->length ();
// check for 0 argument
if (argc == 0) return new Uri;
// check for 1 argument
if (argc == 1) {
String uri = argv->getstring (0);
return new Uri (uri);
}
// invalid arguments
throw Exception ("argument-error", "invalid arguments with uri object");
}
// return true if the given quark is defined
bool Uri::isquark (const long quark, const bool hflg) const {
rdlock ();
if (zone.exists (quark) == true){
unlock ();
return true;
}
bool result = hflg ? Nameable::isquark (quark, hflg) : false;
unlock ();
return result;
}
// apply this object with a set of arguments and a quark
Object* Uri::apply (Runnable* robj, Nameset* nset, const long quark,
Vector* argv) {
// get the number of arguments
long argc = (argv == nilp) ? 0 : argv->length ();
// dispatch 0 argument
if (argc == 0) {
if (quark == QUARK_GETNAME) return new String (getname ());
if (quark == QUARK_GETBASE) return new String (getbase ());
if (quark == QUARK_GETRNAM) return new String (getrnam ());
if (quark == QUARK_GETHNAM) return new String (gethnam ());
if (quark == QUARK_GETAUTH) return new String (getauth ());
if (quark == QUARK_GETPATH) return new String (getpath ());
if (quark == QUARK_GETPTNM) return new String (getptnm ());
if (quark == QUARK_GETSYST) return new String (getsysp ());
if (quark == QUARK_GETPENC) return new String (getpenc ());
if (quark == QUARK_GETHOST) return new String (gethost ());
if (quark == QUARK_GETPORT) return new Integer (getport ());
if (quark == QUARK_GETQUERY) return new String (getquery ());
if (quark == QUARK_GETSCHEME) return new String (getscheme ());
if (quark == QUARK_GETFRAGMENT) return new String (getfragment ());
if (quark == QUARK_NRMAUTH) {
nrmauth ();
return nilp;
}
}
// dispatch 1 argument
if (argc == 1) {
if (quark == QUARK_PARSE) {
parse (argv->getstring (0));
return nilp;
}
if (quark == QUARK_ADDPATH) {
String path = argv->getstring (0);
return new Uri (addpath (path));
}
if (quark == QUARK_GETHREF) {
String path = argv->getstring (0);
return new Uri (gethref (path));
}
}
// call the nameable object
return Nameable::apply (robj, nset, quark, argv);
}
}
syntax highlighted by Code2HTML, v. 0.9.1