---
-- The browser object is the plugins interface to the web.
-- The object has the following methods:
--
-- get_uri(uri,exhed) : returns string,err and takes the uri in
-- "http://" form,exhed are extra header lines you want to add
-- , for example {"Range: bytes 0-100","User-agent: fake" }
--
-- get_head(uri,exhed,fallback) : returns string,err and takes
-- the uri in
-- "http://" form,exhed are extra header lines you want to add.
-- returns only the header, not the the body. If fallback is true then
-- a GET with range: bytes 0 is tryed
--
-- get_head_and_body(uri,exhed,fallback) : returns string,string,err and
-- takes the same arguments of get_uri, but returns as the first value the
-- header
--
-- pipe_uri(uri,callback,exhed) :
-- Gets the uri and uses callback on the data
-- received,exhed are extra header lines you want to add.
-- The callback takes a string (the data) argument and returns a couple.
-- The first argument is the amount of byte served, if ~= from the sring.len
-- of the argument it is considered an error, and an error message.
-- pipe_uri returns a string that is nil
-- on error, "" on end of transmission.
-- It also return an error if one.
-- See
-- libero.lua for an example on how to use the callback.
--
-- pipe_uri_with_header(self,url,cb_h,cb_b,exhed) :
-- As pipe uri, but uses cb_h for the header and cb_b fpor the body.
-- Since the browser module doesn't know the result of the GET it
-- will not follow redirects. The mimer module uses this.
--
-- post_uri(uri,post,exhed) : returns string,err and takes the uri in
-- "http://" form, the post data in "name=val&..." form
-- (you may need to urlescape it by hand), exhed
--
-- show() : Debug printing on the browser content.
--
-- whathaveweread() : returns the page's url we have returned
-- (may differ from the requested if we got a redirect).
--
-- wherearewe() : returns the host we have contacted
-- (may differ from the requested if we got a redirect).
--
-- add_cookie(url,c) :adds cookie c as if received browsing
-- url
--
-- get_cookie(name) :returns the table containing all cookie
-- attributes. Since the returned table is the same table the browser uses
-- (ie, passed by references) be careful. If you modify its values the
-- browser eill be affected too.
-- url
--
-- verbose_mode() : activates the verbose logging of CURL
--
-- ssl_init_stuff() : some stuff for SSL
--
require("curl")
local cookie = require("browser.cookie")
-- the methods of a browser objects
local Private = {}
-- local functions
local Hidden = {}
--============================================================================--
-- This is part of FreePOPs (http://www.freepops.org) released under GNU/GPL
--============================================================================--
--<==========================================================================>--
Hidden.errors = {
["400"] = "Bad Request",
["401"] = "Unauthorized",
["402"] = "Payment Required",
["403"] = "Forbidden",
["404"] = "Not Found",
["405"] = "Method Not Allowed",
["406"] = "Not Acceptable",
["407"] = "Proxy Authentication Required",
["408"] = "Request Timeout",
["409"] = "Conflict",
["410"] = "Gone",
["411"] = "Length Required",
["412"] = "Precondition Failed",
["413"] = "Request Entity Too Large",
["414"] = "Request-URI Too Long",
["415"] = "Unsupported Media Type",
["416"] = "Requested Range Not Satisfiable",
["417"] = "Expectation Failed",
["500"] = "Internal Server Error",
["501"] = "Not Implemented",
["502"] = "Bad Gateway",
["503"] = "Service Unavailable",
["504"] = "Gateway Timeout",
["505"] = "HTTP Version Not Supported",
}
Hidden.DONE = 0
Hidden.REDO = 1
-- create a callback that stores in t
function Hidden.build_w_cb(t)
return function(s,len)
-- stores the received data in the table t
table.insert(t,s)
-- return number_of_byte_served, error_message
-- number_of_byte_served ~= string.len(s) is an error
return len,nil
end
end
-- finds in a table t like {"Referer: xxx","Cookie: yyy"} if a field starts
-- with s and return the whole field or nil
function Hidden.find_in_header(t,s)
local capture = "^("..s..")"
return table.foreachi(t,function (k,v)
local _,_,x = string.find(v,capture)
if x ~= nil then
return v
end
end)
end
-- prepares the header with cookies and referer and host
function Hidden.build_header(self,url,exhed)
local u = cookie.parse_url(url)
--clean expired cookies
cookie.clean_expired(self.cookies)
-- the header
local head = exhed or {}
local cook = cookie.get(self.cookies,u.path,u.host,u.host)
if self.referrer then
local tmp = Hidden.find_in_header(head,"Referer:")
if tmp == nil then
table.insert(head,"Referer: "..self.referrer)
end
end
if cook ~= nil then
table.insert(head,"Cookie: "..cook)
end
if u.host ~= nil then
-- This is a terrible hack. I had to put it so that hotmail would work. The
-- grammar that hotmail uses differs from the one described in cookie.lua.
--
if (string.find(u.host, "hotmail") ~= nil) then
u.host = string.gsub(u.host, "%?.*$", "")
end
table.insert(head,"Host: "..u.host)
end
self.curl:setopt(curl.OPT_HTTPHEADER,head)
--the url
self.curl:setopt(curl.OPT_URL,url)
end
-- parses the response header updating the referer and cookies
function Hidden.cookie_and_referer(self,url,gl_h)
local u = cookie.parse_url(url)
-- save referrer
self.referrer = url
table.foreach(gl_h,function(_,l)
local _,_,content = string.find(l,
"^[Ss][Ee][Tt]%-[Cc][Oo][Oo][Kk][Ii][Ee]%s*:%s*(.*)")
if content ~= nil then
local c = cookie.parse_cookies(content,u.host)
cookie.merge(self.cookies,c)
end
end)
end
-- adds dirname(u.path) .. / .. location if needed
function Hidden.adjust_path(l,u,location)
local function clean_2_slash(s)
return (string.gsub(s,"//","/"))
end
local function dirname(path)
local base = ""
if string.sub(path, -1, -1) == "/" then
-- is a dir, so the dirname is the whole path
if (string.sub(path, 1, 1) ~= "/") then
base = "/" .. path
else
base = path
end
else
local rc = {}
string.gsub(path,"([^/]+)",
function(s)table.insert(rc,s)
end)
-- delete last element
table.remove(rc,table.getn(rc))
base = "/" .. table.concat(rc,"/") .. "/"
end
return clean_2_slash(base)
end
local u_dir = dirname(u.path)
local l_dir = dirname(l.path)
if (l_dir == "/") then
return clean_2_slash(u_dir .. location)
else
local x,y = string.find(u_dir,l_dir)
if ( x == nil or y ~= string.len(u_dir)) then
-- the l_dir path is not included in the u_dir, so
-- we keep it untouched
return clean_2_slash("/" .. location)
else
return clean_2_slash(
string.sub(u_dir,1,x) .. "/" .. location)
end
end
end
-- gets the field Location: in a header table
function Hidden.get_location(gl_h,url)
return table.foreach(gl_h,function(_,l)
local _,_,location = string.find(l,
"[Ll][Oo][Cc][Aa][Tt][Ii][Oo][Nn]%s*:%s*([^\r\n]*)")
if location ~= nil then
-- ah ah ah, what do you think? the RFC says that
-- Location wants an absolute uri, but the
-- wounderful IIS sends a relative uri
local l = cookie.parse_url(location)
if ( l.host == nil or l.scheme == nil) then
local u = cookie.parse_url(url)
if ( u.host == nil or u.scheme == nil) then
error("get_location must be called "..
"with an absolute uri")
end
location = Hidden.adjust_path(l,u,location)
if (l.host == nil) then
location = u.host .. location
end
if (l.scheme == nil) then
location = u.scheme .. "://"..location
end
l = cookie.parse_url(location)
if ( l.host == nil or l.scheme == nil) then
error("unable to recover bad Location")
end
end
end
return location
end)
end
-- gets the field Refresh:'s URL in a header table
function Hidden.get_refresh_location(gl_h)
return table.foreach(gl_h,function(_,l)
local _,_,location = string.find(l,
"[Rr][Ee][Ff][Rr][Ee][Ss][Hh]%s*:%s*[%d]+;[Uu][Rr][Ll]=([^\r\n]*)")
return location
end)
end
-- returns an error
function Hidden.error(s)
log.say(s)
return nil,s
end
-- error
function Hidden.errorcode(ret)
return nil,(ret .. ": " .. (Hidden.errors[ret] or "unknown error"))
end
-- HTTP CONNECT Proxy 2xx
function Hidden.is_https_proxy_tunnel(b, url, ret)
return (b.proxy ~= nil) and
(string.sub(url, 1, 5) == "https") and
(ret == "200")
end
-- reads the HTTP return code and returns
-- nil,error if an error
-- DONE,nil if ok
-- REDO,url if 3xx code
function Hidden.parse_header(self,gl_h,url)
if gl_h[1] == nil then
return Hidden.error("malformed HTTP header line: nil")
end
local _,_,ret = string.find(gl_h[1],"[^%s]+%s+(%d%d%d)")
if ret == nil then
--print("STRANGE HEADER!")
table.foreach(gl_h,print)
return Hidden.error("malformed HTTP header line: "..gl_h[1])
end
-- HTTP 2xx
if string.byte(ret,1) == string.byte("2",1) then
if self.followRefreshHeader == true then
local l = Hidden.get_refresh_location(gl_h)
if l ~= nil then
return Hidden.REDO,l
end
end
return Hidden.DONE,nil
-- HTTP 3xx
elseif string.byte(ret,1) == string.byte("3",1) then
if ret=="300" or ret=="304" or ret=="305" then
return Hidden.error("Unsupported HTTP "..ret.." code")
end
if ret=="301" or ret=="302" or ret=="303" or ret=="307" then
local l = Hidden.get_location(gl_h,url)
if l ~= nil then
return Hidden.REDO,l
else
return Hidden.error("Unable to find Location:")
end
end
-- HTTP 4xx
elseif string.byte(ret,1) == string.byte("4",1) then
return Hidden.errorcode(ret)
-- HTTP 5xx
elseif string.byte(ret,1) == string.byte("5",1) then
return Hidden.errorcode(ret)
-- HTTP 1xx or HTTPS proxy tunnel
elseif string.byte(ret,1) == string.byte("1",1) or
Hidden.is_https_proxy_tunnel(self, url, ret) then
local gl_h1 = {} -- to not lose the real header
local end_of_1xx = false
for i=1,table.getn(gl_h) do
if end_of_1xx then
table.insert(gl_h1,gl_h[i])
end
if gl_h[i] == "\r\n" then
end_of_1xx = true
end
end
if gl_h1[1] ~= nil then
return Hidden.parse_header(self,gl_h1,url)
else
return Hidden.error("Malformed HTTP/1.x 1xx header")
end
else
return Hidden.error("Unsupported HTTP "..ret.." code")
end
end
-- starts curl!
function Hidden.perform(self,url,gl_h,gl_b)
-- the callback for the body
if type(gl_b) == "table" then
self.curl:setopt(curl.OPT_WRITEFUNCTION,Hidden.build_w_cb(gl_b))
elseif type(gl_b) == "function" then
self.curl:setopt(curl.OPT_WRITEFUNCTION,gl_b)
else
error("Hidden.perform must be called with table/function gl_b"..
", but is called with a "..type(gl_b))
end
-- the callback for the header
if type(gl_h) == "table" then
self.curl:setopt(
curl.OPT_HEADERFUNCTION,Hidden.build_w_cb(gl_h))
elseif type(gl_h) == "function" then
self.curl:setopt(curl.OPT_HEADERFUNCTION,gl_h)
else
error("Hidden.perform must be called with table/function gl_h"..
", but is called with a "..type(gl_h))
end
local rc,err = self.curl:perform()
-- check result
if rc == 0 then
if type(gl_h) == "function" then
-- we haven *not* the header!
return rc,err
else
Hidden.cookie_and_referer(self,url,gl_h)
return Hidden.parse_header(self,gl_h,url)
end
else
return nil,err
end
end
-- return true if the table contains strings (checks only the first argument)
function Hidden.is_a_string_table(t)
if type(t[1]) == "string" then
return true
else
return false
end
end
-- to not do by hand the call if we get a 3xx code
function Hidden.continue_or_return(rc,err,t,f,...)
if rc == Hidden.DONE then
if type(t) == "table" then
if Hidden.is_a_string_table(t) then
return table.concat(t),nil
else
-- we are in the case of the get_head_and_body
return
table.concat(t[1] or {}),
table.concat(t[2] or {}),nil
end
else
error("Hidden.continue_or_return(_,_,t,...): "..
"t of invalit type")
end
elseif rc == Hidden.REDO then
return f(unpack(arg))
elseif rc == nil then
return nil,err
else
error("Hidden.perform returned something strange")
end
end
-- to handle local redirect
function Hidden.mangle_location(self,loc)
if loc == nil then return nil end
-- some shit has a not RFC compliant header
local x = string.find(loc,"^[Hh/]")
if x == nil then
-- ok, this cookie is rotten
-- we have to add the whole "http://host/path/"
local part_path = nil
local u = cookie.parse_url(self.referrer)
if u ~= nil then
_,_,part_path = string.find(u.path or "/","(.*/)")
end
loc = (part_path or "/") .. loc
end
-- now find where we have to go!
if string.byte(loc,1) == string.byte("/",1) then
local u = cookie.parse_url(self.referrer)
if u ~= nil then
loc = u.scheme .. "://" .. u.host .. ":" ..
(u.port or "80") .. loc
end
end
return loc
end
--<==========================================================================>--
function Private.get_uri(self,url,exhed)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"GET")
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
return Hidden.continue_or_return(rc,err,gl_b,
Private.get_uri,self,Hidden.mangle_location(self,err),exhed)
end
function Private.get_head_and_body(self,url,exhed)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"GET")
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
return Hidden.continue_or_return(rc,err,{gl_h,gl_b},
Private.get_uri,self,Hidden.mangle_location(self,err),exhed)
end
function Private.custom_get_uri(self,url,custom,exhed)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_CUSTOMREQUEST,custom)
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
return Hidden.continue_or_return(rc,err,gl_b,
Private.get_uri,self,Hidden.mangle_location(self,err),exhed)
end
function Private.custom_post_uri(self,url,custom,post,exhed)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_POST,1)
self.curl:setopt(curl.OPT_POSTFIELDS,post)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,custom)
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
return Hidden.continue_or_return(rc,err,gl_b,
--Private.post_uri,self,Hidden.mangle_location(self,err),post,exhed)
Private.get_uri,self,Hidden.mangle_location(self,err),exhed)
end
function Private.post_uri(self,url,post,exhed)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_POST,1)
self.curl:setopt(curl.OPT_POSTFIELDS,post)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"POST")
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
return Hidden.continue_or_return(rc,err,gl_b,
--Private.post_uri,self,Hidden.mangle_location(self,err),post,exhed)
Private.get_uri,self,Hidden.mangle_location(self,err),exhed)
end
function Private.add_cookie(self,url,c)
local u = cookie.parse_url(url)
local b = cookie.parse_cookies(c,u.host)
cookie.merge(self.cookies,b)
end
function Private.get_cookie(self,name)
return table.foreach(self.cookies,function(k,v)
if v.name == name then
return v
end
end)
end
function Private.get_head(self,url,exhed,fallback)
local gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"HEAD")
self.curl:setopt(curl.OPT_NOBODY,1)
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,gl_b)
self.curl:setopt(curl.OPT_NOBODY,0)
-- since some server do not implement it we try the last thing
if rc == nil and fallback then
gl_b,gl_h = {},{}
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"GET")
Hidden.build_header(self,url,{"Range: bytes=0-1"})
rc,err = Hidden.perform(self,url,gl_h,gl_b)
end
return Hidden.continue_or_return(rc,err,gl_h,
Private.get_head,self,Hidden.mangle_location(self,err),exhed)
end
function Private.pipe_uri(self,url,cb,exhed)
local gl_h = {}
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"GET")
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,gl_h,cb)
return Hidden.continue_or_return(rc,err,{""},
Private.pipe_uri,self,Hidden.mangle_location(self,err),
cb,exhed)
end
function Private.pipe_uri_with_header(self,url,cb_h,cb_b,exhed)
self.curl:setopt(curl.OPT_HTTPGET,1)
self.curl:setopt(curl.OPT_CUSTOMREQUEST,"GET")
Hidden.build_header(self,url,exhed)
local rc,err = Hidden.perform(self,url,cb_h,cb_b)
return Hidden.continue_or_return(rc,err,{""},
Private.pipe_uri_with_header,self,
Hidden.mangle_location(self,err),cb_h,cb_b,exhed)
end
function Private.show(self)
log.dbg("browser:\n\tcookies:")
table.foreach(self.cookies,function(_,c)
table.foreach(c,function(a,b) log.dbg("\t\t"..a.."="..b) end)
log.dbg("\n")
end)
log.dbg("\treferrer:\n\t\t" .. (self.referrer or ""))
log.dbg("\tproxy:\n\t\t" .. (self.proxy or ""))
log.dbg("\tproxyauth:\n\t\t" .. (self.proxyauth or ""))
log.dbg("\tuseragent:\n\t\t" .. (self.useragent or ""))
end
function Private.init_curl(self)
self.curl = curl.easy_init()
-- to debug
--self.curl:setopt(curl.OPT_VERBOSE,1)
-- useragent
self.curl:setopt(curl.OPT_USERAGENT,self.useragent or
"cURL/browser.lua (;;;;) FreePOPs")
-- proxy
if self.proxy ~= nil then
self.curl:setopt(curl.OPT_PROXY,self.proxy)
-- old cURL < 7.10.0 ?? have no OPT_PROXYTYPE
if curl.OPT_PROXYTYPE ~= nil then
self.curl:setopt(curl.OPT_PROXYTYPE,curl.PROXY_HTTP)
end
end
-- init the proxy authentication stuff
-- some functions to make the code more readable
local function init_generic_proxy()
if browser.ssl_enabled() then
self.curl:setopt(curl.OPT_PROXYAUTH,
curl.AUTH_BASIC + curl.AUTH_NTLM +
curl.AUTH_GSSNEGOTIATE +
curl.AUTH_DIGEST)
else
self.curl:setopt(curl.OPT_PROXYAUTH,
curl.AUTH_BASIC)
end
end
local fpat_2_curl = {
["gss"] = curl.AUTH_GSSNEGOTIATE,
["ntlm"] = curl.AUTH_NTLM,
["digest"] = curl.AUTH_DIGEST,
["basic"] = curl.AUTH_BASIC,
}
local function init_specific_proxy(at)
local a = fpat_2_curl[at]
if a ~= nil then
self.curl:setopt(curl.OPT_PROXYAUTH,a)
else
log.error_print("Internal error, invalid fpat " .. at)
end
end
if self.proxyauth ~= nil then
self.curl:setopt(curl.OPT_PROXYUSERPWD,self.proxyauth)
-- old cURL < 7.10.7 have no OPT_PROXYAUTH
if curl.OPT_PROXYAUTH ~= nil then
if self.fpat ~= nil then
init_specific_proxy(self.fpat)
else
init_generic_proxy()
end
end
end
-- tells the library to follow any Location:
-- header that the server sends as part of an HTTP header
self.curl:setopt(curl.OPT_FOLLOWLOCATION,0)
end
function Private.serialize(self,name)
local s = {}
table.insert(s,name..".cookies="..serial.serialize(nil,self.cookies))
table.insert(s,name..".referrer="..serial.serialize(nil,self.referrer))
return name .. "= browser.new();" .. table.concat(s)
end
function Private.whathaveweread(self)
return self.referrer
end
function Private.setFollowRefreshHeader(self, val)
self.followRefreshHeader = val
end
function Private.wherearewe(self)
local u = cookie.parse_url(self.referrer)
if u.port ~= nil then
return u.host..":".. u.port
else
return u.host
end
end
function Private.ssl_init_stuff(self)
self.curl:setopt(curl.OPT_SSL_VERIFYHOST, 2)
self.curl:setopt(curl.OPT_SSL_VERIFYPEER, 0)
end
function Private.verbose_mode(self)
self.curl:setopt(curl.OPT_VERBOSE,1)
end
--<==========================================================================>--
module("browser")
---
-- Creates a new object.
-- @return object.
function new(override_useragent)
local b = {
cookies = {},
referrer = false, --nil will break the metatable check
curl = false,
proxy = os.getenv("LUA_HTTP_PROXY"),
proxyauth = os.getenv("LUA_HTTP_PROXYAUTH"),
useragent = override_useragent or
os.getenv("LUA_HTTP_USERAGENT"),
fpat = os.getenv("LUA_FORCE_PROXY_AUTH_TYPE"),
followRefreshHeader = false,
}
setmetatable(b,{
__index = Private,
__newindex = function(t,k,v)
log.error_abort("No allowed to create a new field "..
"in a browser object!")
end
})
b:init_curl()
-- see what we have done
--b:show()
return b
end
---
-- Returns true if the browser is SSL enabled.
-- @return boolean.
function ssl_enabled()
local s = curl.version()
local _,_,x = string.find(s,"([SsTt][SsLl][LlSs])")
return x ~= nil
end