// -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- // Copyright 1999 Jose M. Vidal // Jose M. Vidal, vidal@multiagent.com, http://jmvidal.ece.sc.edu // // This program is free software. You can redistribute it and/or modify // it under the terms of the GNU General Public License // // $Id: XMLParser.C,v 1.13 2002/04/02 16:51:51 jmvidal Exp $ #ifdef HAVE_CONFIG_H #include #endif #include "XMLParser.H" using namespace std; //defined in reference.C extern string trim(const string & s); extern int equalNoCase(const string& s, const string &s2); void XMLParser::startTag(string &tagName, map & attributes){ cout << "startTag = " << tagName << endl; map::const_iterator i; for (i = attributes.begin(); i != attributes.end(); ++i){ cout << "key:" << i->first << " value:" << i->second << endl; } } void XMLParser::endTag(string &tagName){ cout << "endTag = " << tagName << endl; } void XMLParser::cdata(string &content){ cout << "content = " << content << endl; } string XMLParser::toUpper(const string &s){ string result = ""; for (string::const_iterator p = s.begin(); p!= s.end(); ++p){ result += toupper(*p); } return result; } void XMLParser::parse(iwebstream &in) { char c; string data = ""; string dataUpperCase = ""; while (!in.eof()){ c = in.get(); if (isspace(c)) continue; if (c == '<'){ //start of a tag while (isspace(c = in.get())) continue; //swallow whitespace if (c == '/') { //it is an end tag while ((c = in.get()) != '>'){ if (in.eof()) return; data += c; } dataUpperCase = toUpper(data); endTag(dataUpperCase); data = ""; continue; } else if (c == '!') { if ((c = in.get()) == '-'){ //its a comment, swallow it while ((c = in.get()) != '>') continue; } else if (c == 'D'){ //assume its ') && (!isspace(c))) doctype += c; if (c != '>') while ((c = in.get()) != '>') continue; } } else if (c == '?'){ //, ignore it while ((c = in.get()) != '>') continue; } else { //it is a start tag, get the argument/value pairs data += c; //data will hold the tagname c = in.get(); while (!isspace(c) && (c != '>')){ if (in.eof()) return; data += c; c = in.get(); } map attributeList; data = toUpper(data); if (c == '>'){ if (data[data.length() - 2] == '/') { //its a so call it as an end tag data = data.substr(0, data.length() -1); endTag(data); data = ""; continue; } startTag(data, attributeList); data = ""; continue; } string attributeName = ""; string attributeValue = ""; string oldAttributeName = ""; while ((c = in.get()) != '>') { if (in.eof()) return; if (c == '='){ //start getting the attribute-value if (attributeName == ""){ attributeName = oldAttributeName; oldAttributeName = ""; } attributeValue = ""; while (isspace(c = in.get())) continue; if (c == '\"') { //value surrounded by quotes while ((c = in.get()) != '\"'){ if (in.eof()) return; attributeValue += c; } } else { //value not surround by quotes, stop on a space. while (( !isspace(c = in.get())) && (c != '>')) { if (in.eof()) return; attributeValue += c; } in.putback(c); } continue; } else if (isspace(c) && (attributeName != "")) { dataUpperCase = toUpper(attributeName); attributeList[dataUpperCase] = attributeValue; if (attributeValue == "") oldAttributeName = attributeName; //save it in case this is a "href =" attributeName = ""; attributeValue = ""; continue; } if (!isspace(c)) attributeName += c; } if (attributeName != ""){ //add the last one. It might already be there (e.g. ), if so this does nothing. dataUpperCase = toUpper(attributeName); attributeList[dataUpperCase] = attributeValue; } startTag(data, attributeList); data = ""; } } else { //it is content data += c; while ((c = in.get()) != '<'){ data += c; if (in.eof()) { if (data != "" ) cdata(data); return; } } // data = trim(data); if (trim(data) != "") cdata(data); data = ""; in.putback(c); } } } string XMLParser::toString(string &tagName, map & attributes){ string result; result += "<" + tagName; map::const_iterator i; for (i = attributes.begin(); i != attributes.end(); ++i){ result += " " + i->first + "=\"" + i->second + "\""; } result += ">"; return result; } // // // BookmarksParser // // void BookmarksParser::startTag(string &tagName, map & attributes){ // XMLParser::startTag(tagName, attributes); if (tagName == "DL"){ //start a new reference tree if (inReference){ currentReference = 0; inReference = false; inTitle = false; inComment = false; } if (currentReferenceTree == 0) currentReferenceTree = root; else currentReferenceTree = new referenceTree; if (root == 0) root = currentReferenceTree; rtStack.push_back(currentReferenceTree); } else if ((tagName == "H3") || (tagName == "A")){ map::const_iterator i; string url; string addDate; string modDate; string visitDate; bool isAliasOf = false; string aliasOf; bool isAliasId = false; string aliasId; for (i = attributes.begin(); i != attributes.end(); ++i){ if (i->first == HREF) url = i->second; else if (i->first == ADDDATE) addDate = i->second; else if (i->first == LASTVISIT) visitDate = i->second; else if (i->first == LASTMODIFIED) modDate = i->second; else if (i->first == ALIASOF ) { isAliasOf = true; aliasOf = i->second; } else if (i->first == ALIASID ) { isAliasId = true; aliasId = i->second; } } bool isFolder; if (tagName == "H3") isFolder = true; else isFolder = false; currentReference = new reference(url, addDate, modDate, visitDate, isFolder); if (isAliasId) { currentReference->aliasID = aliasId; } else if (isAliasOf) { currentReference->aliasof = true; currentReference->aliasOf = aliasOf; } if (isFolder){ refStack.push_back(currentReference); } // currentReference->writeAsBookHelper(cout, string("")); inReference = true; inTitle = true; } else if (tagName == "DT"){ if (inReference) { endReference(); } } else if (tagName == "BR"){ if (inComment) //replace "
" with a space. currentReference->comment += " "; } } /** Add the currentReference to the currentReferenceTree */ void BookmarksParser::endReference(){ // cout << "Stack size = " << rtStack.size() << endl; // cout << "Adding reference" << endl; // currentReference->writeAsBookHelper(cout, ""); currentReferenceTree->addReference(*currentReference); currentReference = 0; inReference = false; inTitle = false; inComment = false; } void BookmarksParser::endTag(string &tagName){ // XMLParser::endTag(tagName); if (tagName == "DL") { if (inReference) { endReference(); } if (!refStack.empty()){ currentReference = refStack.back(); currentReference->children = currentReferenceTree; currentReferenceTree->comment = currentReference->comment; // currentReferenceTree->writeAsBookmarkFile(cout, "asdf"); // currentReference->writeAsBookHelper(cout, "****"); rtStack.pop_back(); currentReferenceTree = rtStack.back(); endReference(); //add currentReference refStack.pop_back(); } else { rtStack.pop_back(); if ( rtStack.size() > 0 ) { currentReferenceTree = rtStack.back(); } else { currentReferenceTree = 0; } } // cout << "Poped size=" << rtStack.size() << endl; } else if (((tagName == "A") || (tagName == "H3")) && inTitle){ inTitle = false; inComment = true; } } void BookmarksParser::cdata(string &content){ // XMLParser::cdata(content); if (inTitle) { currentReference->title = content; if (currentReference->isFolder()) currentReference->dirname = content; } else if (inComment){ currentReference->comment += content; } } // // // RSSParser // // void RSSParser::startTag(string &tagName, map & attributes){ // XMLParser::startTag(tagName, attributes); if (tagName == "ITEM"){ currentReference = new reference(); inItem = true; } else if (tagName == "TITLE"){ inTitle = true; } else if (tagName == "LINK"){ inLink = true; } else if (tagName == "DESCRIPTION"){ inDescription = true; } } void RSSParser::endTag(string &tagName){ // XMLParser::endTag(tagName); if (tagName == "ITEM"){ root->addReference(*currentReference); currentReference = 0; inItem = false; } else if (tagName == "TITLE"){ inTitle = false; } else if (tagName == "LINK"){ inLink = false; } else if (tagName == "DESCRIPTION"){ inDescription = false; } } void RSSParser::cdata(string &content){ // XMLParser::cdata(content); if (inItem){ if (inTitle) { currentReference->title = content; } else if (inLink) { currentReference->url = content; } else if (inDescription) { currentReference->comment = content; } } } // // // XBEL Parser // // void XBELParser::startTag(string &tagName, map & attributes){ // XMLParser::startTag(tagName, attributes); if (equalNoCase(tagName,"FOLDER")){ //start a new reference tree currentReferenceTree = new referenceTree; if (root == 0) root = currentReferenceTree; rtStack.push_back(currentReferenceTree); currentFolder = new reference; //also, start a new reference currentFolder->children = currentReferenceTree; currentFolder->folder = true; if (attributes["ADDED"] != "") currentFolder->creationTime = atoi(attributes["ADDED"].c_str()); refStack.push_back(currentFolder); inFolder = true; } else if (equalNoCase(tagName,"TITLE")){ inTitle = true; } else if (equalNoCase(tagName,"DESC")){ inDesc = true; } else if (equalNoCase(tagName,"TIME_ADDED")){ inAdded = true; } else if (equalNoCase(tagName,"TIME_VISITED")){ inVisited = true; } else if (equalNoCase(tagName,"TIME_MODIFIED")){ inModified = true; } else if (equalNoCase(tagName,"BOOKMARK")){ map::const_iterator i; string url = ""; string addDate = ""; string modDate = ""; string visitDate = ""; for (i = attributes.begin(); i != attributes.end(); ++i){ if (equalNoCase(i->first,"HREF")) url = i->second; else if (equalNoCase(i->first,"ADDED")) addDate = i->second; else if (equalNoCase(i->first,"VISITED")) visitDate = i->second; else if (equalNoCase(i->first,"MODIFIED")) modDate = i->second; } currentBookmark = new reference(url, addDate, modDate, visitDate, false); inBookmark = true; } } void XBELParser::endTag(string &tagName){ // XMLParser::endTag(tagName); if (equalNoCase(tagName,"TITLE")) { inTitle = false; } else if (equalNoCase(tagName,"DESC")){ inDesc = false; } else if (equalNoCase(tagName,"TIME_ADDED")){ inAdded = false; } else if (equalNoCase(tagName,"TIME_VISITED")){ inVisited = false; } else if (equalNoCase(tagName,"TIME_MODIFIED")){ inModified = false; } else if (equalNoCase(tagName,"FOLDER")){ rtStack.pop_back(); currentReferenceTree = rtStack.back(); currentReferenceTree->addReference(*currentFolder); if (!refStack.empty()){ refStack.pop_back(); currentFolder = refStack.back(); } inFolder = false; } else if (equalNoCase(tagName,"BOOKMARK")){ currentReferenceTree->addReference(*currentBookmark); //delete currentReference; currentBookmark = 0; inBookmark = false; } } void XBELParser::cdata(string &content){ // XMLParser::cdata(content); if (inFolder && !inBookmark){ if (inTitle) { currentFolder->title = content; currentFolder->dirname = content; } else if (inDesc){ currentFolder->comment = content; } else if (inAdded) { currentFolder->creationTime = atoi(content.c_str()); } else if (inVisited) { currentFolder->visitTime = atoi(content.c_str()); } else if (inModified) { currentFolder->modifiedTime = atoi(content.c_str()); } } else if (inBookmark) { if (inTitle) { currentBookmark->title = content; currentBookmark->dirname = content; } else if (inDesc){ currentBookmark->comment = content; } else if (inAdded) { currentBookmark->creationTime = atoi(content.c_str()); } else if (inVisited) { currentBookmark->visitTime = atoi(content.c_str()); } else if (inModified) { currentBookmark->modifiedTime = atoi(content.c_str()); } } } // // // FolderCommentParser // // void FolderCommentParser::startTag(string &tagName, map & attributes){ // XMLParser::startTag(tagName, attributes); if (tagName == "PRIVATE"){ r.priv = true; } if (tagName == "ALIAS"){ inAlias = true; } else if (tagName == "DIRNAME"){ inDirname = true; } else if (tagName == "INCLUDE"){ inInclude = true; } else if (tagName == "FOLDERHTML"){ inFolderHTML = true; } else if (tagName == "SORT"){ inSort = true; sortOrder = toUpper(attributes["ORDER"]); if (sortOrder == "") sortOrder = "NORMAL"; } else if (inFolderHTML) { r.folderHTML += toString(tagName, attributes); } else { finalComment += toString(tagName, attributes); } } void FolderCommentParser::endTag(string &tagName){ // XMLParser::endTag(tagName); if (tagName == "ALIAS"){ inAlias = false; } else if (tagName == "DIRNAME"){ inDirname = false; } else if (tagName == "INCLUDE"){ inInclude = false; } else if (tagName == "FOLDERHTML"){ inFolderHTML = false; } else if (tagName == "SORT"){ inSort = false; } else if (inFolderHTML){ r.folderHTML += ""; } else { finalComment += ""; } } void FolderCommentParser::cdata(string &content){ // cout << "\"" << content << "\"" << endl; if (inAlias) { r.aliasof = true; r.aliasDir = trim(content); } else if (inDirname){ r.dirname = trim(content); } else if (inInclude){ string fileName = trim(content); iwebstream istream(fileName.c_str()); cout << "Found INCLUDE " << fileName << " in folder " << r.title << endl; if (istream == 0){ cout << "Could not open " << fileName << endl; } else { cout << "Reading " << fileName << endl; referenceTree rtc2(istream); r.children->addTree(rtc2); r.comment = ""; r.children->comment = ""; } } else if (inFolderHTML){ r.folderHTML += content; } else if (inSort){ if (sortOrder == "NORMAL"){ // cout << "sortOrder=Normal content=" << content << endl; // cout << "Sorting folder " << r.title << endl; if (equalNoCase(content, "CREATION")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpCreation()); else if (equalNoCase(content, "TITLE")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpTitle()); else if (equalNoCase(content, "MODIFIED")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpModified()); else if (equalNoCase(content, "VISIT")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpVisit()); else if (equalNoCase(content, "HITS")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpHits()); else cerr << "ERROR: Unknown sort field: " << content << endl; } else if (sortOrder == "INVERSE"){ if (equalNoCase(content, "CREATION")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpCreationI()); else if (equalNoCase(content, "TITLE")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpTitleI()); else if (equalNoCase(content, "MODIFIED")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpModifiedI()); else if (equalNoCase(content, "VISIT")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpVisitI()); else if (equalNoCase(content, "HITS")) sort(r.children->getContents().begin(), r.children->getContents().end(), referenceCmpHitsI()); else cerr << "ERROR: Unknown sort field: " << content << endl; } else cerr << "ERROR: Unknown sort order " << sortOrder << endl; } else { finalComment += content; } } void FolderCommentParser::parse(string & data){ iwebstream in("data:" + data); XMLParser::parse(in); r.comment = trim(finalComment); //An ugly hack to get around the bug in Netscape which swallows the last // characters in a comment that ends in ">", so we end in "." instead. if (r.comment == "."){ r.comment = ""; } };