/* * The Apache Software License, Version 1.1 * * Copyright (c) 1999-2001 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Xerces" and "Apache Software Foundation" must * not be used to endorse or promote products derived from this * software without prior written permission. For written * permission, please contact apache\@apache.org. * * 5. Products derived from this software may not be called "Apache", * nor may "Apache" appear in their name, without prior written * permission of the Apache Software Foundation. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Apache Software Foundation, and was * originally based on software copyright (c) 1999, International * Business Machines, Inc., http://www.ibm.com . For more information * on the Apache Software Foundation, please see * . */ /* * $Log: XMLScanner.hpp,v $ * Revision 1.1.1.1 2002/02/01 22:22:03 peiyongz * sane_include * * Revision 1.38 2001/11/30 22:19:15 peiyongz * cleanUp function made member function * cleanUp object moved to file scope * * Revision 1.37 2001/11/20 18:51:44 tng * Schema: schemaLocation and noNamespaceSchemaLocation to be specified outside the instance document. New methods setExternalSchemaLocation and setExternalNoNamespaceSchemaLocation are added (for SAX2, two new properties are added). * * Revision 1.36 2001/11/13 13:27:28 tng * Move root element check to XMLScanner. * * Revision 1.35 2001/11/02 14:20:14 knoaman * Add support for identity constraints. * * Revision 1.34 2001/10/12 20:52:18 tng * Schema: Find the attributes see if they should be (un)qualified. * * Revision 1.33 2001/09/10 15:16:04 tng * Store the fGrammarType instead of calling getGrammarType all the time for faster performance. * * Revision 1.32 2001/09/10 14:06:22 tng * Schema: AnyAttribute support in Scanner and Validator. * * Revision 1.31 2001/08/13 15:06:39 knoaman * update validation. * * Revision 1.30 2001/08/02 16:54:39 tng * Reset some Scanner flags in scanReset(). * * Revision 1.29 2001/08/01 19:11:01 tng * Add full schema constraint checking flag to the samples and the parser. * * Revision 1.28 2001/07/24 21:23:39 tng * Schema: Use DatatypeValidator for ID/IDREF/ENTITY/ENTITIES/NOTATION. * * Revision 1.27 2001/07/13 16:56:48 tng * ScanId fix. * * Revision 1.26 2001/07/12 18:50:17 tng * Some performance modification regarding standalone check and xml decl check. * * Revision 1.25 2001/07/10 21:09:31 tng * Give proper error messsage when scanning external id. * * Revision 1.24 2001/07/09 13:42:08 tng * Partial Markup in Parameter Entity is validity constraint and thus should be just error, not fatal error. * * Revision 1.23 2001/07/05 13:12:11 tng * Standalone checking is validity constraint and thus should be just error, not fatal error: * * Revision 1.22 2001/06/22 12:42:33 tng * [Bug 2257] 1.5 thinks a tag is a tag * * Revision 1.21 2001/06/04 20:59:29 jberry * Add method incrementErrorCount for use by validator. Make sure to reset error count in _both_ the scanReset methods. * * Revision 1.20 2001/06/03 19:21:40 jberry * Add support for tracking error count during parse; enables simple parse without requiring error handler. * * Revision 1.19 2001/05/28 20:55:02 tng * Schema: allocate a fDTDValidator, fSchemaValidator explicitly to avoid wrong cast * * Revision 1.18 2001/05/11 15:17:28 tng * Schema: Nillable fixes. * * Revision 1.17 2001/05/11 13:26:17 tng * Copyright update. * * Revision 1.16 2001/05/03 20:34:29 tng * Schema: SchemaValidator update * * Revision 1.15 2001/05/03 19:09:09 knoaman * Support Warning/Error/FatalError messaging. * Validity constraints errors are treated as errors, with the ability by user to set * validity constraints as fatal errors. * * Revision 1.14 2001/04/19 18:16:59 tng * Schema: SchemaValidator update, and use QName in Content Model * * Revision 1.13 2001/03/30 16:46:56 tng * Schema: Use setDoSchema instead of setSchemaValidation which makes more sense. * * Revision 1.12 2001/03/30 16:35:06 tng * Schema: Whitespace normalization. * * Revision 1.11 2001/03/21 21:56:05 tng * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar. * * Revision 1.10 2001/02/15 15:56:27 tng * Schema: Add setSchemaValidation and getSchemaValidation for DOMParser and SAXParser. * Add feature "http://apache.org/xml/features/validation/schema" for SAX2XMLReader. * New data field fSchemaValidation in XMLScanner as the flag. * * Revision 1.9 2000/04/12 22:58:28 roddey * Added support for 'auto validate' mode. * * Revision 1.8 2000/03/03 01:29:32 roddey * Added a scanReset()/parseReset() method to the scanner and * parsers, to allow for reset after early exit from a progressive parse. * Added calls to new Terminate() call to all of the samples. Improved * documentation in SAX and DOM parsers. * * Revision 1.7 2000/03/02 19:54:30 roddey * This checkin includes many changes done while waiting for the * 1.1.0 code to be finished. I can't list them all here, but a list is * available elsewhere. * * Revision 1.6 2000/02/24 20:18:07 abagchi * Swat for removing Log from API docs * * Revision 1.5 2000/02/06 07:47:54 rahulj * Year 2K copyright swat. * * Revision 1.4 2000/01/24 20:40:43 roddey * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff * is not tested yet, but I wanted to get the API changes in now so that the API * can be stablized. * * Revision 1.3 2000/01/12 23:52:46 roddey * These are trivial changes required to get the C++ and Java versions * of error messages more into sync. Mostly it was where the Java version * was passing out one or more parameter than the C++ version was. In * some cases the change just required an extra parameter to get the * needed info to the place where the error was issued. * * Revision 1.2 2000/01/12 00:15:04 roddey * Changes to deal with multiply nested, relative pathed, entities and to deal * with the new URL class changes. * * Revision 1.1.1.1 1999/11/09 01:08:23 twl * Initial checkin * * Revision 1.4 1999/11/08 20:44:52 rahul * Swat for adding in Product name and CVS comment log variable. * */ #if !defined(XMLSCANNER_HPP) #define XMLSCANNER_HPP #include #include #include #include #include #include #include #include #include #include #include #include #include class InputSource; class XMLDocumentHandler; class DocTypeHandler; class XMLElementDecl; class XMLEntityHandler; class EntityResolver; class XMLErrorReporter; class ErrorHandler; class XMLMsgLoader; class XMLValidator; class ValueStoreCache; class XPathMatcherStack; class FieldActivator; class IdentityConstraint; // // This is the mondo scanner class, which does the vast majority of the // work of parsing. It handles reading in input and spitting out events // to installed handlers. // class XMLPARSER_EXPORT XMLScanner { public : // ----------------------------------------------------------------------- // Public class types // // NOTE: These should really be private, but some of the compilers we // have to deal with are too stupid to understand this. // // DeclTypes // Used by scanXMLDecl() to know what type of decl it should scan. // Text decls have slightly different rules from XMLDecls. // // EntityExpRes // These are the values returned from the entity expansion method, // to indicate how it went. // // XMLTokens // These represent the possible types of input we can get while // scanning content. // // ValScheme // This indicates what the scanner should do in terms of validation. // 'Auto' means if there is any int/ext subset, then validate. Else, // don't. // ----------------------------------------------------------------------- enum DeclTypes { Decl_Text , Decl_XML }; enum EntityExpRes { EntityExp_Pushed , EntityExp_Returned , EntityExp_Failed }; enum XMLTokens { Token_CData , Token_CharData , Token_Comment , Token_EndTag , Token_EOF , Token_PI , Token_StartTag , Token_Unknown }; enum ValSchemes { Val_Never , Val_Always , Val_Auto }; // ----------------------------------------------------------------------- // Constructors and Destructor // ----------------------------------------------------------------------- XMLScanner ( XMLValidator* const valToAdopt ); XMLScanner ( XMLDocumentHandler* const docHandler , DocTypeHandler* const docTypeHandler , XMLEntityHandler* const entityHandler , XMLErrorReporter* const errReporter , XMLValidator* const valToAdopt ); ~XMLScanner(); // ----------------------------------------------------------------------- // Error emitter methods // ----------------------------------------------------------------------- void emitError(const XMLErrs::Codes toEmit); void emitError ( const XMLErrs::Codes toEmit , const XMLCh* const text1 , const XMLCh* const text2 = 0 , const XMLCh* const text3 = 0 , const XMLCh* const text4 = 0 ); void emitError ( const XMLErrs::Codes toEmit , const char* const text1 , const char* const text2 = 0 , const char* const text3 = 0 , const char* const text4 = 0 ); // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- const XMLDocumentHandler* getDocHandler() const; XMLDocumentHandler* getDocHandler(); const DocTypeHandler* getDocTypeHandler() const; DocTypeHandler* getDocTypeHandler(); bool getDoNamespaces() const; ValSchemes getValidationScheme() const; bool getDoSchema() const; bool getValidationSchemaFullChecking() const; const XMLEntityHandler* getEntityHandler() const; XMLEntityHandler* getEntityHandler(); const XMLErrorReporter* getErrorReporter() const; XMLErrorReporter* getErrorReporter(); bool getExitOnFirstFatal() const; bool getValidationConstraintFatal() const; RefHashTableOf* getIDRefList(); const RefHashTableOf* getIDRefList() const; bool getInException() const; bool getLastExtLocation ( XMLCh* const sysIdToFill , const unsigned int maxSysIdChars , XMLCh* const pubIdToFill , const unsigned int maxPubIdChars , unsigned int& lineToFill , unsigned int& colToFill ); const Locator* getLocator() const; unsigned int getSrcOffset() const; bool getStandalone() const; const XMLValidator* getValidator() const; XMLValidator* getValidator(); int getErrorCount(); const DTDEntityDecl* getEntityDecl ( const XMLCh* const entName ) const; DTDEntityDecl* getEntityDecl ( const XMLCh* const entName ); NameIdPoolEnumerator getEntityEnumerator() const; NameIdPool* getEntityDeclPool(); const NameIdPool* getEntityDeclPool() const; const XMLStringPool* getURIStringPool() const; XMLStringPool* getURIStringPool(); bool getHasNoDTD() const; XMLCh* getExternalSchemaLocation() const; XMLCh* getExternalNoNamespaceSchemaLocation() const; // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- /** * When an attribute name has no prefix, unlike elements, it is not mapped * to the global namespace. So, in order to have something to map it to * for practical purposes, a id for an empty URL is created and used for * such names. * * @return The URL pool id of the URL for an empty URL "". */ unsigned int getEmptyNamespaceId() const; /** * When a prefix is found that has not been mapped, an error is issued. * However, if the parser has been instructed not to stop on the first * fatal error, it needs to be able to continue. To do so, it will map * that prefix tot his magic unknown namespace id. * * @return The URL pool id of the URL for the unknown prefix * namespace. */ unsigned int getUnknownNamespaceId() const; /** * The prefix 'xml' is a magic prefix, defined by the XML spec and * requiring no prior definition. This method returns the id for the * intrinsically defined URL for this prefix. * * @return The URL pool id of the URL for the 'xml' prefix. */ unsigned int getXMLNamespaceId() const; /** * The prefix 'xmlns' is a magic prefix, defined by the namespace spec * and requiring no prior definition. This method returns the id for the * intrinsically defined URL for this prefix. * * @return The URL pool id of the URL for the 'xmlns' prefix. */ unsigned int getXMLNSNamespaceId() const; /** * This method find the passed URI id in its URI pool and * copy the text of that URI into the passed buffer. */ bool getURIText ( const unsigned int uriId , XMLBuffer& uriBufToFill ) const; const XMLCh* getURIText(const unsigned int uriId) const; /** * This method separate the passed QName into prefix * and local part, and then return the URI id by resolving * the prefix. * * mode: Indicate if this QName comes from an Element or Attribute */ unsigned int resolveQName ( const XMLCh* const qName , XMLBuffer& nameBufToFill , XMLBuffer& prefixBufToFill , const ElemStack::MapModes mode ); /* tell if the validator comes from user */ bool isValidatorFromUser(); // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- void setDocHandler(XMLDocumentHandler* const docHandler); void setDocTypeHandler(DocTypeHandler* const docTypeHandler); void setDoNamespaces(const bool doNamespaces); void setEntityHandler(XMLEntityHandler* const docTypeHandler); void setEntityResolver(EntityResolver* const handler); void setErrorReporter(XMLErrorReporter* const errHandler); void setErrorHandler(ErrorHandler* const handler); void setExitOnFirstFatal(const bool newValue); void setValidationConstraintFatal(const bool newValue); void setValidationScheme(const ValSchemes newScheme); void setValidator(XMLValidator* const valToAdopt); void setDoSchema(const bool doSchema); void setValidationSchemaFullChecking(const bool schemaFullChecking); void setHasNoDTD(const bool hasNoDTD); void setRootElemName(XMLCh* rootElemName); void setExternalSchemaLocation(const XMLCh* const schemaLocation); void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation); void setExternalSchemaLocation(const char* const schemaLocation); void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation); // ----------------------------------------------------------------------- // Mutator methods // ----------------------------------------------------------------------- void incrementErrorCount(void); // For use by XMLValidator // ----------------------------------------------------------------------- // Deprecated methods as of 3.2.0. Use getValidationScheme() and // setValidationScheme() instead. // ----------------------------------------------------------------------- bool getDoValidation() const; void setDoValidation(const bool validate, const bool setValScheme = true); // ----------------------------------------------------------------------- // Document scanning methods // // scanDocument() does the entire source document. scanFirst(), // scanNext(), and scanReset() support a progressive parse. // ----------------------------------------------------------------------- void scanDocument ( const InputSource& src , const bool reuseGrammar = false ); void scanDocument ( const XMLCh* const systemId , const bool reuseGrammar = false ); void scanDocument ( const char* const systemId , const bool reuseGrammar = false ); bool scanFirst ( const InputSource& src , XMLPScanToken& toFill , const bool reuseGrammar = false ); bool scanFirst ( const XMLCh* const systemId , XMLPScanToken& toFill , const bool reuseGrammar = false ); bool scanFirst ( const char* const systemId , XMLPScanToken& toFill , const bool reuseGrammar = false ); bool scanNext(XMLPScanToken& toFill); void scanReset(XMLPScanToken& toFill); bool checkXMLDecl(bool startWithAngle); // ----------------------------------------------------------------------- // Notification that lazy data has been deleted // ----------------------------------------------------------------------- static void reinitScannerMutex(); static void reinitMsgLoader(); private : // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- XMLScanner(); XMLScanner(const XMLScanner&); void operator=(const XMLScanner&); // ----------------------------------------------------------------------- // Private helper methods // ----------------------------------------------------------------------- void commonInit(); void initValidator(XMLValidator* theValidator); void resetEntityDeclPool(); void resetURIStringPool(); // ----------------------------------------------------------------------- // Private helper methods // // These are implemented in XMLScanner2.cpp, to keep the main file from // becoming so bloated. We can't have any bloated files. // ----------------------------------------------------------------------- unsigned int buildAttList ( const RefVectorOf& providedAttrs , const unsigned int attCount , XMLElementDecl* elemDecl , RefVectorOf& toFill ); void checkIDRefs(); bool isLegalToken(const XMLPScanToken& toCheck); bool normalizeAttValue ( const XMLAttDef* const attDef , const XMLCh* const value , XMLBuffer& toFill ); bool normalizeAttRawValue ( const XMLCh* const attrName , const XMLCh* const value , XMLBuffer& toFill ); unsigned int resolvePrefix ( const XMLCh* const prefix , const ElemStack::MapModes mode ); unsigned int resolvePrefix ( const XMLCh* const prefix , XMLBuffer& uriBufToFill , const ElemStack::MapModes mode ); void scanReset(const InputSource& src); void sendCharData(XMLBuffer& toSend); XMLTokens senseNextToken(unsigned int& orgReader); void updateNSMap ( const XMLCh* const attrName , const XMLCh* const attrValue ); void scanRawAttrListforNameSpaces(const RefVectorOf* theRawAttrList, int attCount); void parseSchemaLocation(const XMLCh* const schemaLocationStr); void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri); bool switchGrammar(unsigned int newGrammarNameSpaceIndex); bool switchGrammar(const XMLCh* const newGrammarNameSpace); bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv, const XMLContentModel* const cm, const unsigned int parentElemDepth); bool anyAttributeValidation(SchemaAttDef* attWildCard, unsigned int uriId, bool& skipThisOne, bool& laxThisOne); // ----------------------------------------------------------------------- // Private scanning methods // ----------------------------------------------------------------------- bool basicAttrValueScan ( const XMLCh* const attrName , XMLBuffer& toFill ); bool getQuotedString(XMLBuffer& toFill); unsigned int rawAttrScan ( const XMLCh* const elemName , RefVectorOf& toFill , bool& isEmpty ); bool scanAttValue ( const XMLAttDef* const attDef , XMLBuffer& toFill ); void scanCDSection(); void scanCharData(XMLBuffer& toToUse); bool scanCharRef(XMLCh& toFill, XMLCh& second); void scanComment(); bool scanContent(const bool extEntity); void scanEndTag(bool& gotData); EntityExpRes scanEntityRef ( const bool inAttVal , XMLCh& firstCh , XMLCh& secondCh , bool& escaped ); bool scanEq(); void scanMiscellaneous(); void scanPI(); void scanProlog(); bool scanStartTag(bool& gotData); bool scanStartTagNS(bool& gotData); void scanXMLDecl(const DeclTypes type); unsigned int scanUpToWSOr ( XMLBuffer& toFill , const XMLCh chEndChar ); // ----------------------------------------------------------------------- // Private helper methods // ----------------------------------------------------------------------- void resizeElemState(); // ----------------------------------------------------------------------- // IdentityConstraints Activation methods // ----------------------------------------------------------------------- void activateSelectorFor(IdentityConstraint* const ic); // ----------------------------------------------------------------------- // Data members // // fAttrList // Every time we get a new element start tag, we have to pass to // the document handler the attributes found. To make it more // efficient we keep this ref vector of XMLAttr objects around. We // just reuse it over and over, allowing it to grow to meet the // peek need. // // fBufMgr // This is a manager for temporary buffers used during scanning. // For efficiency we must use a set of static buffers, but we have // to insure that they are not incorrectly reused. So this manager // provides the smarts to hand out buffers as required. // // fDocHandler // The client code's document handler. If zero, then no document // handler callouts are done. We don't adopt it. // // fDocTypeHandler // The client code's document type handler (used by DTD Validator). // // fDoNamespaces // This flag indicates whether the client code wants us to do // namespaces or not. If the installed validator indicates that it // has to do namespaces, then this is ignored. // // fElemStack // This is the element stack that is used to track the elements that // are currently being worked on. // // fEntityHandler // The client code's entity handler. If zero, then no entity handler // callouts are done. We don't adopt it. // // fEntityResolver // The client code's entity resolver. Need to store this info for // Schema parse entity resolving. // // fErrorReporter // The client code's error reporter. If zero, then no error reporter // callouts are done. We don't adopt it. // // fErrorHandler // The client code's error handler. Need to store this info for // Schema parse error handling. // // fExitOnFirstFatal // This indicates whether we bail out on the first fatal XML error // or not. It defaults to true, which is the strict XML way, but it // can be changed. // // fValidationConstraintFatal // This indicates whether we treat validation constraint errors as // fatal errors or not. It defaults to false, but it can be changed. // // fIDRefList // This is a list of XMLRefInfo objects. This member lets us do all // needed ID-IDREF balancing checks. // // fInException // To avoid a circular freakout when we catch an exception and emit // it, which would normally throw again if the 'fail on first error' // flag is one. // // fRawAttrList // During the initial scan of the attributes we can only do a raw // scan for key/value pairs. So this vector is used to store them // until they can be processed (and put into fAttrList.) // // fReaderMgr // This is the reader manager, from which we get characters. It // manages the reader stack for us, and provides a lot of convenience // methods to do specialized checking for chars, sequences of chars, // skipping chars, etc... // // fReuseGrammar // This flag is set on a per-scan basis. So its provided in the // scanDocument() and scanFirst() methods, and applies for that // one pass. It indicates if the Grammar should be reused or not. // If so, then all the Grammar will be ignored. // There cannot be any internal subset. // // fScannerId // fSequenceId // These are used for progressive parsing, to make sure that the // client code does the right thing at the right time. // // fStandalone // Indicates whether the document is standalone or not. Defaults to // no, but can be overridden in the XMLDecl. // // fHasNoDTD // Indicates the document has no DTD or has only an internal DTD subset // which contains no parameter entity references. // // fValidate // Indicates whether any validation should be done. This is defined // by the existence of a Grammar together with fValScheme. // // fValidator // The installed validator. We look at them via the abstract // validator interface, and don't know what it actual is. // Either point to user's installed validator, or fDTDValidator // or fSchemaValidator. // // fDTDValidator // The DTD validator instance. // // fSchemaValidator // The Schema validator instance. // // fValidatorFromUser // This flag indicates whether the validator was installed from // user. If false, then the validator was created by the Scanner. // // fValScheme // This is the currently set validation scheme. It defaults to // 'never', but can be set by the client. // // fErrorCount // The number of errors we've encountered. // // fDoSchema // This flag indicates whether the client code wants Schema to // be processed or not. // // fSchemaFullChecking // This flag indicates whether the client code wants full Schema // constraint checking. // // fSeeXsi // This flag indicates a schema has been seen. // // fAttName // fAttValue // fCDataBuf // fNameBuf // fQNameBuf // fPrefixBuf // For the most part, buffers are obtained from the fBufMgr object // on the fly. However, for the start tag scan, we have a set of // fixed buffers for performance reasons. These are used a lot and // there are a number of them, so asking the buffer manager each // time for new buffers is a bit too much overhead. // // fEmptyNamespaceId // This is the id of the empty namespace URI. This is a special one // because of the xmlns="" type of deal. We have to quickly sense // that its the empty namespace. // // fUnknownNamespaceId // This is the id of the namespace URI which is assigned to the // global namespace. Its for debug purposes only, since there is no // real global namespace URI. Its set by the derived class. // // fXMLNamespaceId // fXMLNSNamespaceId // These are the ids of the namespace URIs which are assigned to the // 'xml' and 'xmlns' special prefixes. The former is officially // defined but the latter is not, so we just provide one for debug // purposes. // // fSchemaNamespaceId // This is the id of the schema namespace URI. // // fElemState // fElemStateSize // Stores an element next state from DFA content model - used for // wildcard validation // // fGrammarResolver // Grammar Pool that stores all the Grammar // // fGrammar // Current Grammar used by the Scanner and Validator // // fGrammarType // Current Grammar Type. Store this value instead of calling getGrammarType // all the time for faster performance. // // fEntityDeclPool // This is a pool of EntityDecl objects, which contains all of the // general entities that are declared in the DTD subsets, plus the // default entities (such as > < ...) defined by the XML Standard. // // fURIStringPool // This is a pool for URIs with unique ids assigned. We use a standard // string pool class. This pool is going to be shared by all Grammar. // Use only if namespace is turned on. // // fMatcherStack // Stack of active XPath matchers for identity constraints. All // active XPath matchers are notified of startElement, characters // and endElement callbacks in order to perform their matches. // // fValueStoreCache // Cache of value stores for identity constraint fields. // // fFieldActivator // Activates fields within a certain scope when a selector matches // its xpath. // // fRootElemName // No matter we are using DTD or Schema Grammar, if a DOCTYPE exists, // we need to verify the root element name. So store the rootElement // that is used in the DOCTYPE in the Scanner instead of in the DTDGrammar // where it used to // // fExternalSchemaLocation // The list of Namespace/SchemaLocation that was specified externally // using setExternalSchemaLocation. // // fExternalNoNamespaceSchemaLocation // The no target namespace XML Schema Location that was specified // externally using setExternalNoNamespaceSchemaLocation. // // ----------------------------------------------------------------------- bool fDoNamespaces; bool fExitOnFirstFatal; bool fValidationConstraintFatal; bool fInException; bool fReuseGrammar; bool fStandalone; bool fHasNoDTD; bool fValidate; bool fValidatorFromUser; bool fDoSchema; bool fSchemaFullChecking; bool fSeeXsi; int fErrorCount; unsigned int fEmptyNamespaceId; unsigned int fUnknownNamespaceId; unsigned int fXMLNamespaceId; unsigned int fXMLNSNamespaceId; unsigned int fSchemaNamespaceId; unsigned int fElemStateSize; XMLUInt32 fScannerId; XMLUInt32 fSequenceId; unsigned int* fElemState; RefVectorOf* fAttrList; XMLBufferMgr fBufMgr; XMLDocumentHandler* fDocHandler; DocTypeHandler* fDocTypeHandler; ElemStack fElemStack; XMLEntityHandler* fEntityHandler; EntityResolver* fEntityResolver; XMLErrorReporter* fErrorReporter; ErrorHandler* fErrorHandler; RefHashTableOf* fIDRefList; RefVectorOf* fRawAttrList; ReaderMgr fReaderMgr; XMLValidator* fValidator; DTDValidator* fDTDValidator; SchemaValidator* fSchemaValidator; ValSchemes fValScheme; XMLBuffer fAttNameBuf; XMLBuffer fAttValueBuf; XMLBuffer fCDataBuf; XMLBuffer fNameBuf; XMLBuffer fQNameBuf; XMLBuffer fPrefixBuf; XMLBuffer fURIBuf; GrammarResolver* fGrammarResolver; Grammar* fGrammar; Grammar::GrammarType fGrammarType; NameIdPool* fEntityDeclPool; XMLStringPool* fURIStringPool; XPathMatcherStack* fMatcherStack; ValueStoreCache* fValueStoreCache; FieldActivator* fFieldActivator; XMLCh* fRootElemName; XMLCh* fExternalSchemaLocation; XMLCh* fExternalNoNamespaceSchemaLocation; }; // --------------------------------------------------------------------------- // XMLScanner: Getter methods // --------------------------------------------------------------------------- inline const XMLDocumentHandler* XMLScanner::getDocHandler() const { return fDocHandler; } inline XMLDocumentHandler* XMLScanner::getDocHandler() { return fDocHandler; } inline const DocTypeHandler* XMLScanner::getDocTypeHandler() const { return fDocTypeHandler; } inline DocTypeHandler* XMLScanner::getDocTypeHandler() { return fDocTypeHandler; } inline bool XMLScanner::getDoNamespaces() const { return fDoNamespaces; } inline const XMLEntityHandler* XMLScanner::getEntityHandler() const { return fEntityHandler; } inline XMLEntityHandler* XMLScanner::getEntityHandler() { return fEntityHandler; } inline const XMLErrorReporter* XMLScanner::getErrorReporter() const { return fErrorReporter; } inline XMLErrorReporter* XMLScanner::getErrorReporter() { return fErrorReporter; } inline bool XMLScanner::getExitOnFirstFatal() const { return fExitOnFirstFatal; } inline bool XMLScanner::getValidationConstraintFatal() const { return fValidationConstraintFatal; } inline RefHashTableOf* XMLScanner::getIDRefList() { return fIDRefList; } inline bool XMLScanner::getInException() const { return fInException; } inline const RefHashTableOf* XMLScanner::getIDRefList() const { return fIDRefList; } inline const Locator* XMLScanner::getLocator() const { return &fReaderMgr; } inline unsigned int XMLScanner::getSrcOffset() const { return fReaderMgr.getSrcOffset(); } inline bool XMLScanner::getStandalone() const { return fStandalone; } inline XMLScanner::ValSchemes XMLScanner::getValidationScheme() const { return fValScheme; } inline const XMLValidator* XMLScanner::getValidator() const { return fValidator; } inline XMLValidator* XMLScanner::getValidator() { return fValidator; } inline bool XMLScanner::getDoSchema() const { return fDoSchema; } inline bool XMLScanner::getValidationSchemaFullChecking() const { return fSchemaFullChecking; } inline int XMLScanner::getErrorCount() { return fErrorCount; } inline bool XMLScanner::isValidatorFromUser() { return fValidatorFromUser; } inline unsigned int XMLScanner::getEmptyNamespaceId() const { return fEmptyNamespaceId; } inline unsigned int XMLScanner::getUnknownNamespaceId() const { return fUnknownNamespaceId; } inline unsigned int XMLScanner::getXMLNamespaceId() const { return fXMLNamespaceId; } inline unsigned int XMLScanner::getXMLNSNamespaceId() const { return fXMLNSNamespaceId; } inline NameIdPoolEnumerator XMLScanner::getEntityEnumerator() const { return NameIdPoolEnumerator(fEntityDeclPool); } inline const DTDEntityDecl* XMLScanner::getEntityDecl(const XMLCh* const entName) const { return fEntityDeclPool->getByKey(entName); } inline DTDEntityDecl* XMLScanner::getEntityDecl(const XMLCh* const entName) { return fEntityDeclPool->getByKey(entName); } inline NameIdPool* XMLScanner::getEntityDeclPool() { return fEntityDeclPool; } inline const NameIdPool* XMLScanner::getEntityDeclPool() const { return fEntityDeclPool; } inline const XMLStringPool* XMLScanner::getURIStringPool() const { return fURIStringPool; } inline XMLStringPool* XMLScanner::getURIStringPool() { return fURIStringPool; } inline bool XMLScanner::getHasNoDTD() const { return fHasNoDTD; } inline XMLCh* XMLScanner::getExternalSchemaLocation() const { return fExternalSchemaLocation; } inline XMLCh* XMLScanner::getExternalNoNamespaceSchemaLocation() const { return fExternalNoNamespaceSchemaLocation; } // --------------------------------------------------------------------------- // XMLScanner: Setter methods // --------------------------------------------------------------------------- inline void XMLScanner::setDocHandler(XMLDocumentHandler* const docHandler) { fDocHandler = docHandler; } inline void XMLScanner::setDocTypeHandler(DocTypeHandler* const docTypeHandler) { fDocTypeHandler = docTypeHandler; } inline void XMLScanner::setDoNamespaces(const bool doNamespaces) { fDoNamespaces = doNamespaces; if (fDoNamespaces) { if (!fURIStringPool) { fURIStringPool = new XMLStringPool(); resetURIStringPool(); } } } inline void XMLScanner::setErrorReporter(XMLErrorReporter* const errHandler) { fErrorReporter = errHandler; fDTDValidator->setErrorReporter(fErrorReporter); fSchemaValidator->setErrorReporter(fErrorReporter); } inline void XMLScanner::setErrorHandler(ErrorHandler* const handler) { fErrorHandler = handler; } inline void XMLScanner::setEntityHandler(XMLEntityHandler* const entityHandler) { fEntityHandler = entityHandler; fReaderMgr.setEntityHandler(entityHandler); } inline void XMLScanner::setEntityResolver(EntityResolver* const handler) { fEntityResolver = handler; } inline void XMLScanner::setExitOnFirstFatal(const bool newValue) { fExitOnFirstFatal = newValue; } inline void XMLScanner::setValidationConstraintFatal(const bool newValue) { fValidationConstraintFatal = newValue; } inline void XMLScanner::setValidationScheme(const ValSchemes newScheme) { fValScheme = newScheme; // validation flag for Val_Auto is set to false by default, // and will be turned to true if a grammar is seen if (fValScheme == Val_Always) fValidate = true; else fValidate = false; } inline void XMLScanner::setValidator(XMLValidator* const valToAdopt) { if (fValidatorFromUser) delete fValidator; fValidator = valToAdopt; fValidatorFromUser = true; initValidator(fValidator); } inline void XMLScanner::setDoSchema(const bool doSchema) { fDoSchema = doSchema; } inline void XMLScanner::setValidationSchemaFullChecking(const bool schemaFullChecking) { fSchemaFullChecking = schemaFullChecking; } inline void XMLScanner::setHasNoDTD(const bool hasNoDTD) { fHasNoDTD = hasNoDTD; } inline void XMLScanner::setRootElemName(XMLCh* rootElemName) { delete [] fRootElemName; fRootElemName = XMLString::replicate(rootElemName); } inline void XMLScanner::setExternalSchemaLocation(const XMLCh* const schemaLocation) { delete [] fExternalSchemaLocation; fExternalSchemaLocation = XMLString::replicate(schemaLocation); } inline void XMLScanner::setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation) { delete [] fExternalNoNamespaceSchemaLocation; fExternalNoNamespaceSchemaLocation = XMLString::replicate(noNamespaceSchemaLocation); } inline void XMLScanner::setExternalSchemaLocation(const char* const schemaLocation) { delete [] fExternalSchemaLocation; fExternalSchemaLocation = XMLString::transcode(schemaLocation); } inline void XMLScanner::setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation) { delete [] fExternalNoNamespaceSchemaLocation; fExternalNoNamespaceSchemaLocation = XMLString::transcode(noNamespaceSchemaLocation); } // --------------------------------------------------------------------------- // XMLScanner: Mutator methods // --------------------------------------------------------------------------- inline void XMLScanner::incrementErrorCount() { ++fErrorCount; } // --------------------------------------------------------------------------- // XMLScanner: Deprecated methods // --------------------------------------------------------------------------- inline bool XMLScanner::getDoValidation() const { return fValidate; } inline void XMLScanner::setDoValidation(const bool validate, const bool setValScheme) { fValidate = validate; if (setValScheme) { if (fValidate) fValScheme = Val_Always; else fValScheme = Val_Never; } } #endif