// ---------------------------------------------------------------------------
// - Uri.cpp                                                                 -
// - afnix:www module - uri class implementation                             -
// ---------------------------------------------------------------------------
// - This program is free software;  you can redistribute it  and/or  modify -
// - it provided that this copyright notice is kept intact.                  -
// -                                                                         -
// - This program  is  distributed in  the hope  that it will be useful, but -
// - without  any  warranty;  without  even   the   implied    warranty   of -
// - merchantability or fitness for a particular purpose.  In no event shall -
// - the copyright holder be liable for any  direct, indirect, incidental or -
// - special damages arising in any way out of the use of this software.     -
// ---------------------------------------------------------------------------
// - copyright (c) 1999-2007 amaury darsch                                   -
// ---------------------------------------------------------------------------

#include "Uri.hpp"
#include "Ascii.hpp"
#include "Regex.hpp"
#include "Vector.hpp"
#include "Strvec.hpp"
#include "Buffer.hpp"
#include "Utility.hpp"
#include "Integer.hpp"
#include "Unicode.hpp"
#include "Pathname.hpp"
#include "QuarkZone.hpp"
#include "Exception.hpp"

namespace afnix {

  // -------------------------------------------------------------------------
  // - private section                                                       -
  // -------------------------------------------------------------------------

  // the default ftp port
  static const long URI_XFTP_PORT = 21;
  // the default smtp port
  static const long URI_SMTP_PORT = 25;
  // the default http port
  static const long URI_HTTP_PORT = 80;
  // the default https port
  static const long URI_HTPS_PORT = 443;

  // this procedure normalize a path
  static String normalize_path (const String& path) {
    // check for special case
    if (path.isnil () == true) return path;
    if (path == "/") return path;
    if (path.first () != Unicode::toquad ('/')) return path;
    // extract the path components
    Strvec svec = Strvec::split (path, "/");
    long slen = svec.length ();
    if (slen < 2) return path;
    // rebuild a new vector path
    Strvec rvec;
    for (long i = 0; i < slen ; i++) {
      // get the string element
      String elem = svec.get (i);
      // check for special case
      if (elem.isnil () == true) {
	if (i == slen-1) rvec.add (elem);
	continue;
      }
      if (elem == ".") continue;
      if (elem == "..") {
	if (rvec.empty () == false) rvec.rml ();
	continue;
      }
      // save in result vector
      rvec.add (elem);
    }
    // rebuild result path
    long rlen = rvec.length ();
    if (rlen == 0) return "/";
    String result = "";
    for (long i = 0; i < rlen; i++) {
      result += "/";
      result += rvec.get (i);
    }
    // evntually fix the last /
    if (path.last () == Unicode::toquad ('/')) result += "/";
    return result;
  }

  // this procedure get the base name from a path if it does not finish
  // with a slash - so it can be used as a base for adding a path
  static String get_base_path (const String& name) {
    if (name.last () == Unicode::toquad ('/')) return name;
    // get name length and check
    long len = name.length ();
    if (len == 0) return name;
    // find the first slash index
    for (long i = len - 1; i >= 0; i--) {
      if (name[i] == Unicode::toquad ('/')) {
	return name.lsubstr (i);
      }
    }
    return name;
  }

  // get a system path from a uri path

  static String get_sys_path (const String& path) {
    //check that the path starts with a '/' character
    if (path.first () != Unicode::toquad ('/')) {
      throw Exception ("path-error", "invalid uri path to convert", path);
    }
    // check special case with root only
    if (path == "/") return System::rootdir ();
    // split the path
    Strvec svec = Strvec::split (path, "/");
    if (svec.length () < 2) {
      throw Exception ("internal-error", "cannot convert uri path", path);
    }
    // build first component
    String result = System::rootdir ();
    // add the remaining elements
    long slen = svec.length ();
    for (long i = 1; i < slen; i++) {
      result = System::join (result, svec.get (i));
    }
    return Uri::pencode (result);
  }

  // this procedure returns a authority host
  static String get_auth_host (const String& auth) {
    // check for nil name
    if (auth.isnil () == true) return auth;
    // the authority regex
    Regex re ("(<$a-._~>*)[:$d+]?");
    if (re == auth) return re.getstr (0);
    // invalid authority
    throw Exception ("uri-error", "invalid authority name", auth);
  }

  // this procedure returns a mail host
  static String get_mail_host (const String& path) {
    // check for nil name
    if (path.isnil () == true) return path;
    // the authority regex
    Regex re ("<$a+-_.%:>*@(<$a-._~>*)");
    if (re == path) return re.getstr (0);
    // invalid authority
    throw Exception ("uri-error", "invalid mail path name", path);
  }

  // this procedure returns a auth port if any
  static long  get_auth_port (const String& auth) {
    // check for nil name
    if (auth.isnil () == true) return -1;
    // the authority regex
    Regex re ("<$a-._~>*:($d+)");
    if (re == auth) return Utility::tointeger (re.getstr (0));
    return -1;
  }

  // -------------------------------------------------------------------------
  // - public section                                                       -
  // -------------------------------------------------------------------------

  // percent-encode a uri name

  String Uri::pencode (const String& name) {
    // do nothing with nil string
    if (name.isnil () == true) return name;
    // encode the string
    char* cbuf = name.encode ();
    try {
      // get the buffer length
      long  clen = Ascii::strlen (cbuf);
      // create a buffer and loop
      Buffer buf;
      for (long i = 0; i < clen; i++) {
	char c = cbuf[i];
	if (c == blkc) {
	  buf.add ('+');
	  continue;
	}
	if (c == '\'') {
	  buf.add ('%');
	  buf.add (Ascii::btoc (c, false));
	  buf.add (Ascii::btoc (c, true));
	  continue;
	}
	if (c == '"') {
	  buf.add ('%');
	  buf.add (Ascii::btoc (c, false));
	  buf.add (Ascii::btoc (c, true));
	  continue;
	}
	if (Ascii::isascii (c) == true) {
	  buf.add (c);
	} else {
	  buf.add ('%');
	  buf.add (Ascii::btoc (c, false));
	  buf.add (Ascii::btoc (c, true));
	}
      }
      delete [] cbuf;
      return buf.tostring ();
    } catch (...) {
      delete [] cbuf;
      throw;
    }
  }

  // percent-decode a uri name

  String Uri::pdecode (const String& name) {
    // check for nil name
    if (name.isnil () == true) return name;
    // create a working buffer
    Buffer buf;
    // get the name length and loop
    long len = name.length ();
    for (long i = 0; i < len; i++) {
      t_quad c = name[i];
      if (c == Unicode::toquad ('+')) {
	buf.add (' ');
	continue;
      }
      if (c == Unicode::toquad ('%')) {
	t_byte val = nilc;
	if (++i < len) val  = Ascii::htob (name[i]) << 4;
	if (++i < len) val += Ascii::htob (name[i]);
	if (val == nilc) {
	  throw Exception ("uri-error", "unterminated escape sequence");
	}
	buf.add ((char) val);
	continue;
      }
      if (Unicode::isascii (c) == false) {
	throw Exception ("uri-error", 
			 "non-ascii uri sequence to decode", name);
      }
      buf.add (c);
    }
    return buf.tostring ();
  }

  // normalize the uri name by looking at various scheme for incomplete name

  String Uri::nrmname (const String& name) {
    // check if we have a scheme
    Regex sre ("($l$a*):($N+)");
    if (sre == name) return name;
    // check if we have a qualified host
    Regex hre ("[<$a-_~>+.]+$s+[/$N*]?");
    if (hre == name) {
      String result = "http://";
      result += name;
      return result;
    }
    // check if we have a qualified host with //
    Regex qre ("//[<$a-_~>+.]+$s+[/$N*]?");
    if (qre == name) {
      String result = "http:";
      result += name;
      return result;
    }
    // here we can assume that we have a file since there is no scheme and
    // host name - we just check for an absolute path
    if (name.first () == Unicode::toquad ('/')) {
      String result = "file://";
      result += name;
      return result;
    }
    // here there is nothing we can do - so return the original name
    return name;
  }

  // normalize the uri name by prioritizing the system name
  
  String Uri::sysname (const String& name) {
    // check if we have a scheme
    Regex sre ("($l$a*):($N+)");
    if (sre == name) return name;
    // check if we have a system path
    Pathname path = name;
    String pnam = path.getsysp ();
    if (System::isfile (pnam) == true) {
      String result = "file://";
      result += pnam;
      return result;
    }
    // go back to the regular normalization
    return Uri::nrmname (name);
  }

  // -------------------------------------------------------------------------
  // - class section                                                         -
  // -------------------------------------------------------------------------

  // create a default uri

  Uri::Uri (void) {
    reset ();
  }

  // create a uri by name

  Uri::Uri (const String& uri) {
    reset ();
    parse (uri);
  }

  // copy constructs this uri

  Uri::Uri (const Uri& that) {
    that.rdlock ();
    d_schm  = that.d_schm;
    d_amrk  = that.d_amrk;
    d_auth  = that.d_auth;
    d_path  = that.d_path;
    d_query = that.d_query;
    d_frag  = that.d_frag;
    that.unlock ();
  }

  // asssign an uri to this one

  Uri& Uri::operator = (const Uri& that) {
    wrlock ();
    that.rdlock ();
    d_schm  = that.d_schm;
    d_amrk  = that.d_amrk;
    d_auth  = that.d_auth;
    d_path  = that.d_path;
    d_query = that.d_query;
    d_frag  = that.d_frag;
    that.unlock ();
    unlock ();
    return *this;
  }

  // return the class name

  String Uri::repr (void) const {
    return "Uri";
  }

  // get a clone of this object

  Object* Uri::clone (void) const {
    return new Uri (*this);
  }

  // reset an uri information

  void Uri::reset (void) {
    wrlock ();
    d_schm  = "";
    d_amrk  = "";
    d_auth  = "";
    d_path  = "";
    d_query = "";
    d_frag  = "";
    unlock ();
  }

  // get the uri original name

  String Uri::getname (void) const {
    rdlock ();
    try {
      String result = d_schm;
      result += ":";
      result += d_amrk;
      result += d_auth;
      result += d_path;
      // add optional query
      if (d_query.isnil () == false) {
	result += '?';
	result += d_query;
      }
      // add optional fragment
      if (d_frag.isnil () == false) {
	result += '#';
	result += d_frag;
      }
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // parse an uri and update the uri data structure

  void Uri::parse (const String& uri) {
    // check for nothing first
    if (uri.isnil () == true) {
      reset ();
      return;
    }
    // ok let's deal with it
    wrlock ();
    try {
      // reset everythnig
      reset  ();
      // percent encode the string
      String puri = Uri::pencode (uri);
      // the reminding string
      String rest;
      // extract the scheme
      Regex rsch ("($l$a*):($N+)");
      if (rsch == puri) {
	d_schm = rsch.getstr (0);
	rest   = rsch.getstr (1);
      } else {
	throw Exception ("uri-error", "empty scheme uri name", uri);
      }
      // now extract the  authority
      Regex raut ("//(<$a-._~>*[:$d+]?)(</?#>?$N*)");
      if (raut == rest) {
	// extract the authority
	d_amrk = "//";
	d_auth = raut.getstr (0);
	rest   = raut.getstr (1);
	// now extract the path
	Regex rpth ("(/<$a+-_.%:@,/'~()>*)(<?#>?$N*)");
	if (rpth == rest) {
	  d_path = rpth.getstr (0);
	  rest   = rpth.getstr (1);
	}
      } else {
	// now extract the path
	Regex rpth ("(<$a+-_.%:@,'~()>*)(<?#>?$N*)");
	if (rpth == rest) {
	  d_path = rpth.getstr (0);
	  rest   = rpth.getstr (1);
	}
      }
      // now extract the query
      Regex rqry ("$?(<$a+-_.%/=;&:,@>*)(<$#>?$N*)");
      if (rqry == rest) {
	d_query = rqry.getstr (0);
	rest    = rqry.getstr (1);
      }
      // now extract the fragment
      Regex rfrg ("#($N*)");
      if (rfrg == rest) {
	d_frag = rfrg.getstr (0);
	rest = "";
      }
      if (rest.length () != 0) {
	throw Exception ("uri-error", "incorrect uri format", uri);
      }
    } catch (...) {
      reset  ();
      unlock ();
      throw;
    }
  }

  // normalize the uri authority

  void Uri::nrmauth (void) {
    wrlock ();
    try {
      d_auth = d_auth.tolower ();
      unlock ();
    } catch (...) {
      unlock ();
      throw;
    }
  }
  
  // return the uri scheme
  
  String Uri::getscheme (void) const {
    rdlock ();
    String result = d_schm;
    unlock ();
    return result;
  }

  // return the uri authority
  
  String Uri::getauth (void) const {
    rdlock ();
    String result = d_auth;
    unlock ();
    return result;
  }

  // return the uri path
  
  String Uri::getpath (void) const {
    rdlock ();
    try {
      String result = Uri::pdecode (d_path);
      if (d_amrk == "//") result = normalize_path (result);
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // return the uri query
  
  String Uri::getquery (void) const {
    rdlock ();
    String result = d_query;
    unlock ();
    return result;
  }

  // return the uri fragment
  
  String Uri::getfragment (void) const {
    rdlock ();
    String result = Uri::pdecode (d_frag);
    unlock ();
    return result;
  }

  // get the uri base name (scheme+authority)

  String Uri::getbase (void) const {
    rdlock ();
    try {
      String result = d_schm;
      result += ":";
      result += d_amrk;
      result += d_auth;
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get the uri rname (scheme+authority+path percent encoded)

  String Uri::getrnam (void) const {
    rdlock ();
    try {
      String result = getbase ();
      if ((d_path.isnil () == true) && (d_amrk == "//")) {
	result += "/";
      } else {
	if (d_amrk == "//") {
	  result += normalize_path (d_path);
	} else {
	  result += d_path;
	}
      }
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get the uri hname (scheme+authority+path)

  String Uri::gethnam (void) const {
    rdlock ();
    try {
      String result = Uri::pdecode (getrnam ());
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }
  
  // get the path target name

  String Uri::getptnm (void) const {
    rdlock ();
    try {
      // get the path name
      String path = getpath ();
      if (path.isnil () == true) {
	unlock ();
	return path;
      }
      // split the path into element
      Strvec svec = Strvec::split (path, "/");
      long slen = svec.length ();
      if (slen == 0) {
	unlock ();
	return path;
      }
      // here is the result
      String result = svec.get (slen-1);
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // add a path to this uri

  Uri Uri::addpath (const String& path) const {
    rdlock ();
    try {
      // fix the path
      String epth = Uri::pencode (path);
      // check if the path is valid
      Regex re ("([<$a+-_.%/,@>+]?)[$?$N+]?[$#$N+]?");
      if (re != epth) {
	throw Exception ("path-error", "invalid path to add to uri", path);
      }
      String rpth = re.getstr (0);
      if (rpth.isnil () == true) {
	Uri result = *this;
	unlock ();
	return result;
      }
      // rebuild the uri
      String uri = getbase ();
      // check if have an absolute path
      if (rpth.first () == Unicode::toquad ('/')) {
	uri += rpth;
      } else {
	// add the base path
	uri += get_base_path (d_path);
	// check if we add a slash
	if (uri.last () == Unicode::toquad ('/')) {
	  if (rpth.first () == Unicode::toquad ('/')) {
	    uri += rpth.rsubstr (1);
	  } else {
	    uri += rpth;
	  }
	} else {
	  if (rpth.first () == Unicode::toquad ('/')) {
	    uri += rpth;
	  } else {
	    uri += '/';
	    uri += rpth;
	  }
	}
	// add optional query
	if (d_query.isnil () == false) {
	  uri += '?';
	  uri += d_query;
	}
	// add optional fragment
	if (d_frag.isnil () == false) {
	  uri += '?';
	  uri += d_frag;
	}
      }
      // build the result uri
      Uri result = uri;
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get a new uri by path - if the path is a uri the new uri is return
  // otherwise, a new uri path is built
  
  Uri Uri::gethref (const String& path) const {
    rdlock ();
    try {
      // check for nil first
      if (path.isnil () == true) {
	Uri result = *this;
	unlock ();
	return result;
      }
      // select from path scheme
      Regex rsch ("($l$a*):($N+)");
      Uri result = (rsch == path) ? path : addpath (path);
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get a system path if the scheme is a file

  String Uri::getsysp (void) const {
    rdlock ();
    try {
      // check that we have a file scheme
      if (d_schm != "file") {
	throw Exception ("scheme-error", 
			 "cannot convert to system path without file scheme");
      }
      String result = get_sys_path (getpath ());
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get the path encoded name

  String Uri::getpenc (void) const {
    rdlock ();
    try {
      String result = d_path;
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get the uri host if any

  String Uri::gethost (void) const {
    rdlock ();
    try {
      // initialize result
      String result;
      // select host from the scheme
      if (d_schm == "ftp")    result = get_auth_host (d_auth);
      if (d_schm == "http")   result = get_auth_host (d_auth);
      if (d_schm == "https")  result = get_auth_host (d_auth);
      if (d_schm == "mailto") result = get_mail_host (d_path);
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // get the uri port if any

  long Uri::getport (void) const {
    rdlock ();
    try {
      long result = -1;
      // select host from the scheme
      if (d_schm == "ftp") {
	result = get_auth_port (d_auth);
	if (result == -1) result = URI_XFTP_PORT;
      }
      if (d_schm == "http") {
	result = get_auth_port (d_auth);
	if (result == -1) result = URI_HTTP_PORT;
      }
      if (d_schm == "https") {
	result = get_auth_port (d_auth);
	if (result == -1) result = URI_HTPS_PORT;
      }
      if (d_schm == "mailto") result = URI_SMTP_PORT;
      if (result == -1) {
	throw Exception ("uri-error", "invalid uri port access", getname ());
      }
      unlock ();
      return result;
    } catch (...) {
      unlock ();
      throw;
    }
  }

  // -------------------------------------------------------------------------
  // - object section                                                        -
  // -------------------------------------------------------------------------

  // the quark zone
  static const long QUARK_ZONE_LENGTH = 18;
  static QuarkZone  zone (QUARK_ZONE_LENGTH);

  // the uri supported quarks
  static const long QUARK_PARSE       = zone.intern ("parse");
  static const long QUARK_GETNAME     = zone.intern ("get-name");
  static const long QUARK_GETBASE     = zone.intern ("get-base");
  static const long QUARK_GETRNAM     = zone.intern ("get-rname");
  static const long QUARK_GETHNAM     = zone.intern ("get-hname");
  static const long QUARK_ADDPATH     = zone.intern ("add-path");
  static const long QUARK_GETHREF     = zone.intern ("get-href");
  static const long QUARK_GETAUTH     = zone.intern ("get-authority");
  static const long QUARK_GETPATH     = zone.intern ("get-path");
  static const long QUARK_GETPTNM     = zone.intern ("get-path-target");
  static const long QUARK_GETSYST     = zone.intern ("get-system-path");
  static const long QUARK_GETPENC     = zone.intern ("get-path-encoded");
  static const long QUARK_GETHOST     = zone.intern ("get-host");
  static const long QUARK_GETPORT     = zone.intern ("get-port");
  static const long QUARK_NRMAUTH     = zone.intern ("normalize-authority");
  static const long QUARK_GETQUERY    = zone.intern ("get-query");
  static const long QUARK_GETSCHEME   = zone.intern ("get-scheme");
  static const long QUARK_GETFRAGMENT = zone.intern ("get-fragment");

  // create a new object in a generic way

  Object* Uri::mknew (Vector* argv) {
    long argc = (argv == nilp) ? 0 : argv->length ();

    // check for 0 argument
    if (argc == 0) return new Uri;
    // check for 1 argument
    if (argc == 1) {
      String uri = argv->getstring (0);
      return new Uri (uri);
    }
    // invalid arguments
    throw Exception ("argument-error", "invalid arguments with uri object");
  }

  // return true if the given quark is defined

  bool Uri::isquark (const long quark, const bool hflg) const {
    rdlock ();
    if (zone.exists (quark) == true){
      unlock ();
      return true;
    }
    bool result = hflg ? Nameable::isquark (quark, hflg) : false;
    unlock ();
    return result;
  }

  // apply this object with a set of arguments and a quark

  Object* Uri::apply (Runnable* robj, Nameset* nset, const long quark,
		      Vector* argv) {
    // get the number of arguments
    long argc = (argv == nilp) ? 0 : argv->length ();

    // dispatch 0 argument
    if (argc == 0) {
      if (quark == QUARK_GETNAME)     return new String  (getname     ());
      if (quark == QUARK_GETBASE)     return new String  (getbase     ());
      if (quark == QUARK_GETRNAM)     return new String  (getrnam     ());
      if (quark == QUARK_GETHNAM)     return new String  (gethnam     ());
      if (quark == QUARK_GETAUTH)     return new String  (getauth     ());
      if (quark == QUARK_GETPATH)     return new String  (getpath     ());
      if (quark == QUARK_GETPTNM)     return new String  (getptnm     ());
      if (quark == QUARK_GETSYST)     return new String  (getsysp     ());
      if (quark == QUARK_GETPENC)     return new String  (getpenc     ());
      if (quark == QUARK_GETHOST)     return new String  (gethost     ());
      if (quark == QUARK_GETPORT)     return new Integer (getport     ());
      if (quark == QUARK_GETQUERY)    return new String  (getquery    ());
      if (quark == QUARK_GETSCHEME)   return new String  (getscheme   ());
      if (quark == QUARK_GETFRAGMENT) return new String  (getfragment ());
      if (quark == QUARK_NRMAUTH) {
	nrmauth ();
	return nilp;
      }
    }
    // dispatch 1 argument
    if (argc == 1) {
      if (quark == QUARK_PARSE) {
	parse (argv->getstring (0));
	return nilp;
      }
      if (quark == QUARK_ADDPATH) {
	String path = argv->getstring (0);
	return new Uri (addpath (path));
      }
      if (quark == QUARK_GETHREF) {
	String path = argv->getstring (0);
	return new Uri (gethref (path));
      }
    }
    // call the nameable object
    return Nameable::apply (robj, nset, quark, argv);
  }
}


syntax highlighted by Code2HTML, v. 0.9.1