/* * Copyright (c) 2003 Apple Computer, Inc. All rights reserved. * * @APPLE_LICENSE_HEADER_START@ * * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved. * * This file contains Original Code and/or Modifications of Original Code * as defined in and that are subject to the Apple Public Source License * Version 2.0 (the 'License'). You may not use this file except in * compliance with the License. Please obtain a copy of the License at * http://www.opensource.apple.com/apsl/ and read it before using this * file. * * The Original Code and all software distributed under the License are * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. * Please see the License for the specific language governing rights and * limitations under the License. * * @APPLE_LICENSE_HEADER_END@ */ /* CFUnicodePrecomposition.c Copyright 1999-2002, Apple, Inc. All rights reserved. Responsibility: Aki Inoue */ #if !defined(KERNEL) #define KERNEL 0 #endif #include #if KERNEL #include "CFUnicodePrecomposition.h" #include "CFUniCharPrecompData.h" #else KERNEL #include #include #include "CFUniChar.h" #include "CFUnicodePrecomposition.h" #include "CFInternal.h" #include "CFUniCharPriv.h" #endif KERNEL // Canonical Precomposition #if KERNEL static const uint32_t __CFUniCharPrecompositionTableLength = (sizeof(__CFUniCharPrecompSourceTable) / (sizeof(uint32_t) * 2)); CF_EXPORT uint8_t **CFUniCharCombiningPriorityTable; CF_EXPORT uint8_t **CFUniCharCombiningPriorityExtraTable; CF_EXPORT uint8_t CFUniCharNumberOfPlanesForCombiningPriority; CF_EXPORT uint8_t __CFUniCharGetCombiningPriority(UTF32Char character) { if (character < (CFUniCharNumberOfPlanesForCombiningPriority << 16)) { uint32_t plane = character >> 16; const uint8_t *bitmap = CFUniCharCombiningPriorityTable[plane]; if (bitmap) { uint8_t value = bitmap[(character >> 8) & 0xFF]; if (value) { bitmap = CFUniCharCombiningPriorityExtraTable[plane] + ((value - 1) * 256); return bitmap[character % 256]; } } } return 0; } CF_EXPORT uint8_t **CFUniCharNonBaseBitmap; CF_EXPORT uint8_t CFUniCharNumberOfPlanesForNonBaseBitmap; CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) { if (character < (CFUniCharNumberOfPlanesForNonBaseBitmap << 16)) { const uint8_t *bitmap = CFUniCharNonBaseBitmap[character >> 16]; uint8_t value = bitmap[(character >> 8) & 0xFF]; if (value == 0xFF) { return true; } else if (value) { bitmap = bitmap + ((value - 1) * 32) + 256; return (bitmap[(character & 0xFF) / 8] & (1 << (character % 8)) ? true : false); } } return false; } #else KERNEL static UTF32Char *__CFUniCharPrecompSourceTable = NULL; static uint32_t __CFUniCharPrecompositionTableLength = 0; static uint16_t *__CFUniCharBMPPrecompDestinationTable = NULL; static uint32_t *__CFUniCharNonBMPPrecompDestinationTable = NULL; static const uint8_t *__CFUniCharNonBaseBitmapForBMP_P = NULL; // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c static const uint8_t *__CFUniCharCombiningClassForBMP = NULL; static CFSpinLock_t __CFUniCharPrecompositionTableLock = 0; static void __CFUniCharLoadPrecompositionTable(void) { __CFSpinLock(&__CFUniCharPrecompositionTableLock); if (NULL == __CFUniCharPrecompSourceTable) { const void *bytes = CFUniCharGetMappingData(kCFUniCharCanonicalPrecompMapping); uint32_t bmpMappingLength; if (NULL == bytes) { __CFSpinUnlock(&__CFUniCharPrecompositionTableLock); return; } __CFUniCharPrecompositionTableLength = *(((uint32_t *)bytes)++); bmpMappingLength = *(((uint32_t *)bytes)++); __CFUniCharPrecompSourceTable = (UTF32Char *)bytes; __CFUniCharBMPPrecompDestinationTable = (uint16_t *)((intptr_t)bytes + (__CFUniCharPrecompositionTableLength * sizeof(UTF32Char) * 2)); __CFUniCharNonBMPPrecompDestinationTable = (uint32_t *)(((intptr_t)__CFUniCharBMPPrecompDestinationTable) + bmpMappingLength); __CFUniCharNonBaseBitmapForBMP_P = CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, 0); __CFUniCharCombiningClassForBMP = CFUniCharGetUnicodePropertyDataForPlane(kCFUniCharCombiningProperty, 0); } __CFSpinUnlock(&__CFUniCharPrecompositionTableLock); } // Adding _P so the symbol name is different from the one in CFUnicodeDecomposition.c #define __CFUniCharIsNonBaseCharacter __CFUniCharIsNonBaseCharacter_P CF_INLINE bool __CFUniCharIsNonBaseCharacter(UTF32Char character) { return CFUniCharIsMemberOfBitmap(character, (character < 0x10000 ? __CFUniCharNonBaseBitmapForBMP_P : CFUniCharGetBitmapPtrForPlane(kCFUniCharNonBaseCharacterSet, ((character >> 16) & 0xFF)))); } #endif KERNEL typedef struct { UTF16Char _key; UTF16Char _value; } __CFUniCharPrecomposeBMPMappings; static UTF16Char __CFUniCharGetMappedBMPValue(const __CFUniCharPrecomposeBMPMappings *theTable, uint32_t numElem, UTF16Char character) { const __CFUniCharPrecomposeBMPMappings *p, *q, *divider; if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { return 0; } p = theTable; q = p + (numElem-1); while (p <= q) { divider = p + ((q - p) >> 1); /* divide by 2 */ if (character < divider->_key) { q = divider - 1; } else if (character > divider->_key) { p = divider + 1; } else { return divider->_value; } } return 0; } typedef struct { UTF32Char _key; uint32_t _value; } __CFUniCharPrecomposeMappings; static uint32_t __CFUniCharGetMappedValue_P(const __CFUniCharPrecomposeMappings *theTable, uint32_t numElem, UTF32Char character) { const __CFUniCharPrecomposeMappings *p, *q, *divider; if ((character < theTable[0]._key) || (character > theTable[numElem-1]._key)) { return 0; } p = theTable; q = p + (numElem-1); while (p <= q) { divider = p + ((q - p) >> 1); /* divide by 2 */ if (character < divider->_key) { q = divider - 1; } else if (character > divider->_key) { p = divider + 1; } else { return divider->_value; } } return 0; } #if !KERNEL __private_extern__ #endif !KERNEL UTF32Char CFUniCharPrecomposeCharacter(UTF32Char base, UTF32Char combining) { uint32_t value; #if !KERNEL if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable(); #endif !KERNEL if (!(value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)__CFUniCharPrecompSourceTable, __CFUniCharPrecompositionTableLength, combining))) return 0xFFFD; #if !KERNEL // We don't have precomposition in non-BMP if (value & kCFUniCharNonBmpFlag) { value = __CFUniCharGetMappedValue_P((const __CFUniCharPrecomposeMappings *)((uint32_t *)__CFUniCharNonBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16) & 0x7FFF, base); } else { #endif !KERNEL value = __CFUniCharGetMappedBMPValue((const __CFUniCharPrecomposeBMPMappings *)((uint32_t *)__CFUniCharBMPPrecompDestinationTable + (value & 0xFFFF)), (value >> 16), base); #if !KERNEL } #endif !KERNEL return (value ? value : 0xFFFD); } #define HANGUL_SBASE 0xAC00 #define HANGUL_LBASE 0x1100 #define HANGUL_VBASE 0x1161 #define HANGUL_TBASE 0x11A7 #define HANGUL_SCOUNT 11172 #define HANGUL_LCOUNT 19 #define HANGUL_VCOUNT 21 #define HANGUL_TCOUNT 28 #define HANGUL_NCOUNT (HANGUL_VCOUNT * HANGUL_TCOUNT) CF_INLINE void __CFUniCharMoveBufferFromEnd(UTF16Char *convertedChars, uint32_t length, uint32_t delta) { const UTF16Char *limit = convertedChars; UTF16Char *dstP; convertedChars += length; dstP = convertedChars + delta; while (convertedChars > limit) *(--dstP) = *(--convertedChars); } bool CFUniCharPrecompose(const UTF16Char *characters, uint32_t length, uint32_t *consumedLength, UTF16Char *precomposed, uint32_t maxLength, uint32_t *filledLength) { UTF32Char currentChar = 0, lastChar = 0, precomposedChar = 0xFFFD; uint32_t originalLength = length, usedLength = 0; UTF16Char *currentBase = precomposed; uint8_t currentClass, lastClass = 0; bool currentBaseIsBMP = true; bool isPrecomposed; #if !KERNEL if (NULL == __CFUniCharPrecompSourceTable) __CFUniCharLoadPrecompositionTable(); #endif !KERNEL while (length > 0) { currentChar = *(characters++); --length; if (CFUniCharIsSurrogateHighCharacter(currentChar) && (length > 0) && CFUniCharIsSurrogateLowCharacter(*characters)) { currentChar = CFUniCharGetLongCharacterForSurrogatePair(currentChar, *(characters++)); --length; } if (lastChar && __CFUniCharIsNonBaseCharacter(currentChar)) { isPrecomposed = (precomposedChar == 0xFFFD ? false : true); if (isPrecomposed) lastChar = precomposedChar; #if KERNEL currentClass = __CFUniCharGetCombiningPriority(currentChar); #else KERNEL currentClass = (currentChar > 0xFFFF ? CFUniCharGetUnicodeProperty(currentChar, kCFUniCharCombiningProperty) : CFUniCharGetCombiningPropertyForCharacter(currentChar, __CFUniCharCombiningClassForBMP)); #endif KERNEL if ((lastClass == 0) || (currentClass != lastClass)) { if ((precomposedChar = CFUniCharPrecomposeCharacter(lastChar, currentChar)) == 0xFFFD) { if (isPrecomposed) precomposedChar = lastChar; lastClass = currentClass; } else { lastClass = 0; continue; } } if (currentChar > 0xFFFF) { // Non-BMP usedLength += 2; if (usedLength > maxLength) break; currentChar -= 0x10000; *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); } else { ++usedLength; if (usedLength > maxLength) break; *(precomposed++) = (UTF16Char)currentChar; } } else { if ((currentChar >= HANGUL_LBASE) && (currentChar < (HANGUL_LBASE + 0xFF))) { // Hangul Jamo int8_t lIndex = currentChar - HANGUL_LBASE; if ((length > 0) && (0 <= lIndex) && (lIndex <= HANGUL_LCOUNT)) { int16_t vIndex = *characters - HANGUL_VBASE; if ((vIndex >= 0) && (vIndex <= HANGUL_VCOUNT)) { int16_t tIndex = 0; ++characters; --length; if (length > 0) { tIndex = *characters - HANGUL_TBASE; if ((tIndex < 0) || (tIndex > HANGUL_TCOUNT)) { tIndex = 0; } else { ++characters; --length; } } currentChar = (lIndex * HANGUL_VCOUNT + vIndex) * HANGUL_TCOUNT + tIndex + HANGUL_SBASE; } } } if (precomposedChar != 0xFFFD) { if (currentBaseIsBMP) { // Non-BMP if (lastChar > 0xFFFF) { // Last char was Non-BMP --usedLength; memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char)); } *(currentBase) = (UTF16Char)precomposedChar; } else { if (lastChar < 0x10000) { // Last char was BMP ++usedLength; if (usedLength > maxLength) break; __CFUniCharMoveBufferFromEnd(currentBase + 1, precomposed - (currentBase + 1), 1); } precomposedChar -= 0x10000; *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL); *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL); } precomposedChar = 0xFFFD; } currentBase = precomposed; lastChar = currentChar; lastClass = 0; if (currentChar > 0xFFFF) { // Non-BMP usedLength += 2; if (usedLength > maxLength) break; currentChar -= 0x10000; *(precomposed++) = (UTF16Char)((currentChar >> 10) + 0xD800UL); *(precomposed++) = (UTF16Char)((currentChar & 0x3FF) + 0xDC00UL); currentBaseIsBMP = false; } else { ++usedLength; if (usedLength > maxLength) break; *(precomposed++) = (UTF16Char)currentChar; currentBaseIsBMP = true; } } } if (precomposedChar != 0xFFFD) { if (currentChar > 0xFFFF) { // Non-BMP if (lastChar < 0x10000) { // Last char was BMP ++usedLength; if (usedLength > maxLength) { if (consumedLength) *consumedLength = originalLength - length; if (filledLength) *filledLength = usedLength; return false; } __CFUniCharMoveBufferFromEnd(currentBase + 1, precomposed - (currentBase + 1), 1); } precomposedChar -= 0x10000; *currentBase = (UTF16Char)((precomposedChar >> 10) + 0xD800UL); *(currentBase + 1) = (UTF16Char)((precomposedChar & 0x3FF) + 0xDC00UL); } else { if (lastChar > 0xFFFF) { // Last char was Non-BMP --usedLength; memmove(currentBase + 1, currentBase + 2, (precomposed - (currentBase + 2)) * sizeof(UTF16Char)); } *(currentBase) = (UTF16Char)precomposedChar; } } if (consumedLength) *consumedLength = originalLength - length; if (filledLength) *filledLength = usedLength; return true; }