/*
* Copyright (c) 2002-2007 Samit Basu
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
*/
#include <QtCore>
#include <QtGui>
#include <QtXml>
#include <QtNetwork>
#include "Array.hpp"
#include "XML.hpp"
//!
//@Module XMLREAD Read an XML Document into FreeMat
//@@Section IO
//@@Usage
//Given a filename, reads an XML document, parses it, and
//returns the result as a FreeMat data structure. The syntax for its
//use is:
//@[
// p = xmlread(filename)
//@]
//where @|filename| is a @|string|. The
//resulting object @|p| is a data structure containing the information
//in the document. Note that the returned object @|p| is not the same
//object as the one returned by MATLAB's @|xmlread|, although the
//information content is the same. The output is largely compatible with the
//output of the parseXML example in the @|xmlread| documentation of the
//MATLAB API.
//!
//!
//@Module HTMLREAD Read an HTML Document into FreeMat
//@@Section IO
//@@Usage
//Given a filename, reads an HTML document, (attempts to) parse it, and
//returns the result as a FreeMat data structure. The syntax for its
//use is:
//@[
// p = htmlread(filename)
//@]
//where @|filename| is a @|string|. The
//resulting object @|p| is a data structure containing the information
//in the document. Note that this function works by internally converting
//the HTML document into something closer to XHTML, and then using the
//XML parser to parse it. In some cases, the converted HTML cannot be properly
//parsed. In such cases, a third party tool such as "tidy" will probably do
//a better job.
//!
// private (recursively called) function to convert a QDomElement into
// a FreeMat Array. The structure of a node is a struct with:
// Name: nodeName
// Attributes: cell array of attributes, each is a struct with Name and Value pairs
// Data: Not sure yet
// Children: cell array of nodes
static Array cellScalar(Array t) {
ArrayMatrix m;
ArrayVector n;
n << t;
m << n;
return Array::cellConstructor(m);
}
static Array QDomElementToArray(QDomElement elem);
static Array QDomNodeToArray(QDomNode p) {
if (p.nodeType() == QDomNode::ElementNode)
return QDomElementToArray(p.toElement());
rvstring fnames;
fnames << "Name" << "Value";
ArrayVector vals;
vals << Array::stringConstructor(p.nodeName().toStdString());
vals << Array::stringConstructor(p.nodeValue().toStdString());
return Array::structConstructor(fnames,vals);
}
static Array QDomNamedNodeMapToArray(QDomNamedNodeMap attr) {
ArrayVector nodes;
for (int i=0;i<attr.length();i++)
nodes << QDomNodeToArray(attr.item(i));
return Array::cellConstructor(ArrayMatrix() << nodes);
}
static Array QDomElementToArray(QDomElement elem) {
rvstring fnames;
fnames << "Name" << "Attributes" << "Data" << "Children";
ArrayVector vals;
vals << Array::stringConstructor(elem.tagName().toStdString());
vals << cellScalar(QDomNamedNodeMapToArray(elem.attributes()));
vals << Array::emptyConstructor();
ArrayVector children;
QDomNode n = elem.firstChild();
while (!n.isNull()) {
children << QDomNodeToArray(n);
n = n.nextSibling();
}
vals << cellScalar(Array::cellConstructor(ArrayMatrix() << children));
return Array::structConstructor(fnames,vals);
}
ArrayVector XMLReadFunction(int nargout, const ArrayVector& arg) {
if (arg.size() < 1)
throw Exception("xmlread requires at least one argument (the filename)");
// Parse the thing
QDomDocument doc("fmdoc");
string filename(ArrayToString(arg[0]));
QFile file(QString::fromStdString(filename));
if (!file.open(QIODevice::ReadOnly))
return ArrayVector();
QString errMsg;
int errLine, errColumn;
if (!doc.setContent(&file,true,&errMsg,&errLine,&errColumn)) {
file.close();
throw Exception(string("error parsing ") + filename + ":" + errMsg.toStdString() + " Line: " + errLine + " Column: " + errColumn);
}
file.close();
// Walk the tree
QDomElement docElem = doc.documentElement();
return ArrayVector() << QDomElementToArray(docElem);
}
ArrayVector HTMLReadFunction(int nargout, const ArrayVector& arg) {
if (arg.size() < 1)
throw Exception("htmlread requires at least one argument (the filename)");
// Read the proposed filename
string filename(ArrayToString(arg[0]));
QFile file(QString::fromStdString(filename));
if (!file.open(QIODevice::ReadOnly))
return ArrayVector();
QString fileData;
QTextStream tStream(&file);
fileData = tStream.readAll();
file.close();
// Tricky...
// Parse the thing
QTextDocument *tdoc = new QTextDocument;
tdoc->setHtml(fileData);
QString xhtml(tdoc->toHtml());
QFile file2("xhtml.xml");
file2.open(QIODevice::WriteOnly);
QTextStream tStream2(&file2);
tStream2 << xhtml;
file2.close();
delete tdoc;
QDomDocument doc("fmdoc");
QString errMsg;
int errLine, errColumn;
if (!doc.setContent(xhtml,true,&errMsg,&errLine,&errColumn)) {
throw Exception(string("error parsing ") + filename + ":" + errMsg.toStdString() + " Line: " + errLine + " Column: " + errColumn);
}
// Walk the tree
QDomElement docElem = doc.documentElement();
return ArrayVector() << QDomElementToArray(docElem);
}
//!
//@Module URLWRITE Retrieve a URL into a File
//@@Section IO
//@@Usage
//Given a URL and a timeout, attempts to retrieve the URL and write the
//contents to a file. The syntax is
//@[
// f = urlwrite(url,filename,timeout)
//@]
//The @|timeout| is in milliseconds. Note that the URL must be a complete
//spec (i.e., including the name of the resource you wish to retrieve). So
//for example, you cannot use @|http://www.google.com| as a URL, but must
//instead use @|http://www.google.com/index.html|.
//!
void URLRetriever::requestFinished(int id, bool err) {
if (id != m_httpGetId)
return;
m_err = err;
}
void URLRetriever::run()
{
m_http.setHost(m_url.host(), m_url.port() != -1 ? m_url.port() : 80);
if (!m_url.userName().isEmpty())
m_http.setUser(m_url.userName(), m_url.password());
m_httpGetId = m_http.get(m_url.path(), m_file);
QTimer::singleShot(m_timeout, &m_http, SLOT(abort()));
connect(&m_http, SIGNAL(done(bool)), &m_event, SLOT(quit()));
connect(&m_http, SIGNAL(requestFinished(int, bool)), this, SLOT(requestFinished(int, bool)));
m_event.exec();
}
ArrayVector URLWriteFunction(int nargout, const ArrayVector& arg) {
if (arg.size() < 3) throw Exception("urlwrite requires 3 arguments: url, filename, timeout");
QUrl url(QString::fromStdString(ArrayToString(arg[0])));
if (!url.isValid()) throw Exception(string("url is not valid:") + ArrayToString(arg[0]));
string filename(ArrayToString(arg[1]));
QFile file(QString::fromStdString(filename));
if (!file.open(QIODevice::WriteOnly))
throw Exception(string("unable to open output file ") + filename + " for writing");
double timeout(ArrayToDouble(arg[2]));
URLRetriever p_url(url,&file,timeout);
p_url.run();
if (p_url.error())
throw Exception(string("error retrieving url:") + ArrayToString(arg[0]));
return ArrayVector() << Array::stringConstructor(filename);
}
syntax highlighted by Code2HTML, v. 0.9.1