/* * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* CFXMLParser.c Copyright 1999-2002, Apple, Inc. All rights reserved. Responsibility: Chris Parker */ #include #include #include "CFXMLInputStream.h" #include "CFUniChar.h" #include "CFInternal.h" struct __CFXMLParser { CFRuntimeBase _cfBase; _CFXMLInputStream input; void **stack; void **top; UInt32 capacity; struct __CFXMLNode *node; // Our private node; we use it to report back information CFMutableDictionaryRef argDict; CFMutableArrayRef argArray; UInt32 options; CFXMLParserCallBacks callBacks; CFXMLParserContext context; CFXMLParserStatusCode status; CFStringRef errorString; }; static CFStringRef __CFXMLParserCopyDescription(CFTypeRef cf) { const struct __CFXMLParser *parser = cf; return CFStringCreateWithFormat(CFGetAllocator(cf), NULL, CFSTR(""), parser); } static void __CFXMLParserDeallocate(CFTypeRef cf) { struct __CFXMLParser *parser = (struct __CFXMLParser *)cf; CFAllocatorRef alloc = CFGetAllocator(parser); _freeInputStream(&(parser->input)); if (parser->argDict) CFRelease(parser->argDict); if (parser->argArray) CFRelease(parser->argArray); if (parser->errorString) CFRelease(parser->errorString); if (parser->node) CFRelease(parser->node); CFAllocatorDeallocate(alloc, parser->stack); if (parser->context.info && parser->context.release) { parser->context.release(parser->context.info); } } static CFTypeID __kCFXMLParserTypeID = _kCFRuntimeNotATypeID; static const CFRuntimeClass __CFXMLParserClass = { 0, "CFXMLParser", NULL, // init NULL, // copy __CFXMLParserDeallocate, NULL, NULL, NULL, // __CFXMLParserCopyDescription }; __private_extern__ void __CFXMLParserInitialize(void) { __kCFXMLParserTypeID = _CFRuntimeRegisterClass(&__CFXMLParserClass); } CFTypeID CFXMLParserGetTypeID(void) { return __kCFXMLParserTypeID; } #if defined(__ppc__) #define __mask ~0x3 #else #define __mask ~0x0 #endif void CFXMLParserGetContext(CFXMLParserRef parser, CFXMLParserContext *context) { CFAssert1(parser != NULL, __kCFLogAssertion, "%s(): NULL parser not permitted", __PRETTY_FUNCTION__); __CFGenericValidateType(parser, __kCFXMLParserTypeID); if (context) { context->version = parser->context.version; context->info = parser->context.info; context->retain = (void *)((uintptr_t)parser->context.retain & __mask); context->release = (void *)((uintptr_t)parser->context.release & __mask); context->copyDescription = (void *)((uintptr_t)parser->context.copyDescription & __mask); } } void CFXMLParserGetCallBacks(CFXMLParserRef parser, CFXMLParserCallBacks *callBacks) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); if (callBacks) { callBacks->version = parser->callBacks.version; callBacks->createXMLStructure = (void *)((uintptr_t)parser->callBacks.createXMLStructure & __mask); callBacks->addChild = (void *)((uintptr_t)parser->callBacks.addChild & __mask); callBacks->endXMLStructure = (void *)((uintptr_t)parser->callBacks.endXMLStructure & __mask); callBacks->resolveExternalEntity = (void *)((uintptr_t)parser->callBacks.resolveExternalEntity & __mask); callBacks->handleError = (void *)((uintptr_t)parser->callBacks.handleError & __mask); } } #undef __mask CFURLRef CFXMLParserGetSourceURL(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); return parser->input.url; } /* Returns the character index or line number of the current parse location */ CFIndex CFXMLParserGetLocation(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); return _inputStreamCurrentLocation(&parser->input); } CFIndex CFXMLParserGetLineNumber(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); return _inputStreamCurrentLine(&parser->input); } /* Returns the top-most object returned by the createXMLStructure callback */ void *CFXMLParserGetDocument(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); if (parser->capacity > 0) return parser->stack[0]; else return NULL; } CFXMLParserStatusCode CFXMLParserGetStatusCode(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); return parser->status; } CFStringRef CFXMLParserCopyErrorDescription(CFXMLParserRef parser) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); return CFRetain(parser->errorString); } void CFXMLParserAbort(CFXMLParserRef parser, CFXMLParserStatusCode errorCode, CFStringRef errorDescription) { __CFGenericValidateType(parser, __kCFXMLParserTypeID); CFAssert1(errorCode > 0, __kCFLogAssertion, "%s(): errorCode must be greater than zero", __PRETTY_FUNCTION__); CFAssert1(errorDescription != NULL, __kCFLogAssertion, "%s(): errorDescription may not be NULL", __PRETTY_FUNCTION__); __CFGenericValidateType(errorDescription, CFStringGetTypeID()); parser->status = errorCode; if (parser->errorString) CFRelease(parser->errorString); parser->errorString = CFStringCreateCopy(NULL, errorDescription); } static Boolean parseXML(CFXMLParserRef parser); static Boolean parseComment(CFXMLParserRef parser, Boolean report); static Boolean parseProcessingInstruction(CFXMLParserRef parser, Boolean report); static Boolean parseInlineDTD(CFXMLParserRef parser); static Boolean parseDTD(CFXMLParserRef parser); static Boolean parsePhysicalEntityReference(CFXMLParserRef parser); static Boolean parseCDSect(CFXMLParserRef parser); static Boolean parseEntityReference(CFXMLParserRef parser, Boolean report); static Boolean parsePCData(CFXMLParserRef parser); static Boolean parseWhitespace(CFXMLParserRef parser); static Boolean parseAttributeListDeclaration(CFXMLParserRef parser); static Boolean parseNotationDeclaration(CFXMLParserRef parser); static Boolean parseElementDeclaration(CFXMLParserRef parser); static Boolean parseEntityDeclaration(CFXMLParserRef parser); static Boolean parseExternalID(CFXMLParserRef parser, Boolean alsoAcceptPublicID, CFXMLExternalID *extID); static Boolean parseCloseTag(CFXMLParserRef parser, CFStringRef tag); static Boolean parseTagContent(CFXMLParserRef parser); static Boolean parseTag(CFXMLParserRef parser); static Boolean parseAttributes(CFXMLParserRef parser); static Boolean parseAttributeValue(CFXMLParserRef parser, CFMutableStringRef str); // Utilities; may need to make these accessible to the property list parser to avoid code duplication static void _CFReportError(CFXMLParserRef parser, CFXMLParserStatusCode errNum, const char *str); static Boolean reportNewLeaf(CFXMLParserRef parser); // Assumes parser->node has been set and is ready to go static void pushXMLNode(CFXMLParserRef parser, void *node); static CFXMLParserRef __CFXMLParserInit(CFAllocatorRef alloc, CFURLRef dataSource, CFOptionFlags options, CFDataRef xmlData, CFIndex version, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { struct __CFXMLParser *parser = (struct __CFXMLParser *)_CFRuntimeCreateInstance(alloc, __kCFXMLParserTypeID, sizeof(struct __CFXMLParser) - sizeof(CFRuntimeBase), NULL); struct __CFXMLNode *node = (struct __CFXMLNode *)_CFRuntimeCreateInstance(alloc, CFXMLNodeGetTypeID(), sizeof(struct __CFXMLNode) - sizeof(CFRuntimeBase), NULL); UniChar *buf; if (parser && node) { alloc = CFGetAllocator(parser); _initializeInputStream(&(parser->input), alloc, dataSource, xmlData); parser->top = parser->stack; parser->stack = NULL; parser->capacity = 0; buf = CFAllocatorAllocate(alloc, 128*sizeof(UniChar), 0); parser->node = node; parser->node->dataString = CFStringCreateMutableWithExternalCharactersNoCopy(alloc, buf, 0, 128, alloc); parser->node->additionalData = NULL; parser->node->version = version; parser->argDict = NULL; // don't create these until necessary parser->argArray = NULL; parser->options = options; parser->callBacks = *callBacks; FAULT_CALLBACK((void **)&(parser->callBacks.createXMLStructure)); FAULT_CALLBACK((void **)&(parser->callBacks.addChild)); FAULT_CALLBACK((void **)&(parser->callBacks.endXMLStructure)); FAULT_CALLBACK((void **)&(parser->callBacks.resolveExternalEntity)); FAULT_CALLBACK((void **)&(parser->callBacks.handleError)); if (context) { parser->context = *context; if (parser->context.info && parser->context.retain) { parser->context.retain(parser->context.info); } } else { parser->context.version = 0; parser->context.info = NULL; parser->context.retain = NULL; parser->context.release = NULL; parser->context.copyDescription = NULL; } parser->status = kCFXMLStatusParseNotBegun; parser->errorString = NULL; } else { if (parser) CFRelease(parser); if (node) CFRelease(node); parser = NULL; } return parser; } CFXMLParserRef CFXMLParserCreate(CFAllocatorRef allocator, CFDataRef xmlData, CFURLRef dataSource, CFOptionFlags parseOptions, CFIndex versionOfNodes, CFXMLParserCallBacks *callBacks, CFXMLParserContext *context) { CFAssert1(xmlData != NULL, __kCFLogAssertion, "%s(): NULL data not permitted", __PRETTY_FUNCTION__); __CFGenericValidateType(xmlData, CFDataGetTypeID()); CFAssert1(dataSource == NULL || CFGetTypeID(dataSource) == CFURLGetTypeID(), __kCFLogAssertion, "%s(): dataSource is not a valid CFURL", __PRETTY_FUNCTION__); CFAssert1(callBacks != NULL && callBacks->createXMLStructure != NULL && callBacks->addChild != NULL && callBacks->endXMLStructure != NULL, __kCFLogAssertion, "%s(): callbacks createXMLStructure, addChild, and endXMLStructure must all be non-NULL", __PRETTY_FUNCTION__); CFAssert2(versionOfNodes <= 1, __kCFLogAssertion, "%s(): version number %d is higher than supported by CFXMLParser", __PRETTY_FUNCTION__, versionOfNodes); CFAssert1(versionOfNodes != 0, __kCFLogAssertion, "%s(): version number 0 is no longer supported by CFXMLParser", __PRETTY_FUNCTION__); return __CFXMLParserInit(allocator, dataSource, parseOptions, xmlData, versionOfNodes, callBacks, context); } Boolean CFXMLParserParse(CFXMLParserRef parser) { CFXMLDocumentInfo docData; __CFGenericValidateType(parser, __kCFXMLParserTypeID); if (parser->status != kCFXMLStatusParseNotBegun) return false; parser->status = kCFXMLStatusParseInProgress; if (!_openInputStream(&parser->input)) { if (!parser->input.data) { // couldn't load URL parser->status = kCFXMLErrorNoData; parser->errorString = CFStringCreateWithFormat(CFGetAllocator(parser), NULL, CFSTR("No data found at %@"), CFURLGetString(parser->input.url)); } else { // couldn't figure out the encoding CFAssert(parser->input.encoding == kCFStringEncodingInvalidId, __kCFLogAssertion, "CFXMLParser internal error: input stream could not be opened"); parser->status = kCFXMLErrorUnknownEncoding; parser->errorString = CFStringCreateWithCString(CFGetAllocator(parser), "Encountered unknown encoding", kCFStringEncodingASCII); } if (parser->callBacks.handleError) { INVOKE_CALLBACK3(parser->callBacks.handleError, parser, parser->status, parser->context.info); } return false; } // Create the document parser->stack = CFAllocatorAllocate(CFGetAllocator(parser), 16 * sizeof(void *), 0); parser->capacity = 16; parser->node->dataTypeID = kCFXMLNodeTypeDocument; docData.encoding = _inputStreamGetEncoding(&parser->input); docData.sourceURL = parser->input.url; parser->node->additionalData = &docData; parser->stack[0] = (void *)INVOKE_CALLBACK3(parser->callBacks.createXMLStructure, parser, parser->node, parser->context.info); parser->top = parser->stack; parser->node->additionalData = NULL; // Client may have called CFXMLParserAbort() during any callback, so we must always check to see if we have an error status after a callback if (parser->status != kCFXMLStatusParseInProgress) { _CFReportError(parser, parser->status, NULL); return false; } return parseXML(parser); } /* The next several functions are all intended to parse past a particular XML structure. They expect parser->curr to be set to the first content character of their structure (e.g. parseXMLComment expects parser->curr to be set just past "