/******************************* LICENCE ************************************** * Any code in this file may be redistributed or modified under the terms of * the GNU General Public Licence as published by the Free Software * Foundation; version 2 of the licence. ****************************** END LICENCE ***********************************/ /****************************************************************************** * Author: * Andrew Smith, http://littlesvr.ca/misc/contactandrew.php * * Contributors: * The Samba Project - http://samba.org/ * - most of the filename mangling code ******************************************************************************/ #include #include #include #include #include #include "bk.h" #include "bkInternal.h" #include "bkMangle.h" #include "bkError.h" /* length of aaa in aaa~xxxx.bbb */ #define NCHARS_9660_BASE 3 /* * note that some unsigned ints in mangling functions are * required to be 32 bits long for the hashing to work * see the samba code for details */ /****************************************************************************** * charIsValid9660() * * */ bool charIsValid9660(char theChar) { if( (theChar >= '0' && theChar <= '9') || (theChar >= 'a' && theChar <= 'z') || (theChar >= 'A' && theChar <= 'Z') || strchr("._-$~", theChar) ) { return true; } else return false; } /****************************************************************************** * charIsValidJoliet() * * */ bool charIsValidJoliet(char theChar) { /* can be any ascii char between decimal 32 and 126 * except '*' (42) '/' (47), ':' (58), ';' (59), '?' (63) and '\' (92) */ if(theChar < 32 || theChar > 126 || theChar == 42 || theChar == 47 || theChar == 58 || theChar == 59 || theChar == 63 || theChar == 92) return false; else return true; } /* hash a string of the specified length. The string does not need to be null terminated this hash needs to be fast with a low collision rate (what hash doesn't?) */ unsigned hashString(const char *str, unsigned int length) { unsigned value; unsigned i; static const unsigned fnv1Prime = 0x01000193; /* Set the initial value from the key size. */ /* fnv1 of the string: idra@samba.org 2002 */ value = 0xa6b93095; for (i = 0; i < length; i++) { value *= (unsigned)fnv1Prime; value ^= (unsigned)(str[i]); } /* note that we force it to a 31 bit hash, to keep within the limits of the 36^6 mangle space */ return value & ~0x80000000; } /****************************************************************************** * mangleDir() * Mangles the filenames from origDir and puts the results into newDir, whcich * it also creates. * filenameTypes is all types required in the end * */ int mangleDir(const BkDir* origDir, DirToWrite* newDir, int filenameTypes) { int rc; bool haveCollisions; int numTimesTried; int num9660Collisions; char newName9660[13]; /* for remangling */ int numJolietCollisions; char newNameJoliet[NCHARS_FILE_ID_MAX_JOLIET]; /* for remangling */ BkFileBase* currentOrigChild; BaseToWrite** currentNewChild; /* for counting collisions */ BaseToWrite* currentChild; BaseToWrite* currentChildToCompare; /* MANGLE all names, create new children list */ currentOrigChild = origDir->children; currentNewChild = &(newDir->children); while(currentOrigChild != NULL) { if( IS_DIR(currentOrigChild->posixFileMode) ) { *currentNewChild = malloc(sizeof(DirToWrite)); if(*currentNewChild == NULL) return BKERROR_OUT_OF_MEMORY; bzero(*currentNewChild, sizeof(DirToWrite)); } else if( IS_REG_FILE(currentOrigChild->posixFileMode) ) { *currentNewChild = malloc(sizeof(FileToWrite)); if(*currentNewChild == NULL) return BKERROR_OUT_OF_MEMORY; bzero(*currentNewChild, sizeof(FileToWrite)); } else if( IS_SYMLINK(currentOrigChild->posixFileMode) ) { *currentNewChild = malloc(sizeof(SymLinkToWrite)); if(*currentNewChild == NULL) return BKERROR_OUT_OF_MEMORY; bzero(*currentNewChild, sizeof(SymLinkToWrite)); } else return BKERROR_NO_SPECIAL_FILES; if(currentOrigChild->original9660name[0] != '\0') strcpy((*currentNewChild)->name9660, currentOrigChild->original9660name); else shortenNameFor9660(currentOrigChild->name, (*currentNewChild)->name9660); if(filenameTypes | FNTYPE_ROCKRIDGE) strcpy((*currentNewChild)->nameRock, currentOrigChild->name); else (*currentNewChild)->nameRock[0] = '\0'; if(filenameTypes | FNTYPE_JOLIET) mangleNameForJoliet(currentOrigChild->name, (*currentNewChild)->nameJoliet, false); else (*currentNewChild)->nameJoliet[0] = '\0'; (*currentNewChild)->posixFileMode = currentOrigChild->posixFileMode; if( IS_DIR(currentOrigChild->posixFileMode) ) { rc = mangleDir(BK_DIR_PTR(currentOrigChild), DIRTW_PTR(*currentNewChild), filenameTypes); if(rc < 0) { free(*currentNewChild); *currentNewChild = NULL; return rc; } } else if( IS_REG_FILE(currentOrigChild->posixFileMode) ) { BkFile* origFile = BK_FILE_PTR(currentOrigChild); FileToWrite* newFile = FILETW_PTR(*currentNewChild); newFile->size = origFile->size; newFile->location = origFile->location; newFile->onImage = origFile->onImage; newFile->offset = origFile->position; if( !origFile->onImage ) { newFile->pathAndName = malloc(strlen(origFile->pathAndName) + 1); if( newFile->pathAndName == NULL ) { free(*currentNewChild); *currentNewChild = NULL; return BKERROR_OUT_OF_MEMORY; } strcpy(newFile->pathAndName, origFile->pathAndName); } newFile->origFile = origFile; } else /* if( IS_SYMLINK(currentOrigChild->posixFileMode) ) */ { strncpy(SYMLINKTW_PTR(*currentNewChild)->target, BK_SYMLINK_PTR(currentOrigChild)->target, NCHARS_SYMLINK_TARGET_MAX); } currentOrigChild = currentOrigChild->next; currentNewChild = &((*currentNewChild)->next); } /* END MANGLE all names, create new children list */ haveCollisions = true; numTimesTried = 0; while(haveCollisions && numTimesTried < 50000) /* random big number */ { haveCollisions = false; currentChild = newDir->children; while(currentChild != NULL) { num9660Collisions = 0; numJolietCollisions = 0; currentChildToCompare = newDir->children; while(currentChildToCompare != NULL) { if(strcmp(currentChild->name9660, currentChildToCompare->name9660) == 0) { num9660Collisions++; } if(strcmp(currentChild->nameJoliet, currentChildToCompare->nameJoliet) == 0) { numJolietCollisions++; } currentChildToCompare = currentChildToCompare->next; } if(num9660Collisions != 1) { haveCollisions = true; if( IS_DIR(currentChild->posixFileMode) ) mangleNameFor9660(currentChild->name9660, newName9660, true); else mangleNameFor9660(currentChild->name9660, newName9660, false); strcpy(currentChild->name9660, newName9660); } if(numJolietCollisions != 1) { haveCollisions = true; mangleNameForJoliet(currentChild->nameJoliet, newNameJoliet, true); strcpy(currentChild->nameJoliet, newNameJoliet); } currentChild = currentChild->next; } numTimesTried++; } if(haveCollisions) return BKERROR_MANGLE_TOO_MANY_COL; return 1; } /****************************************************************************** * mangleNameFor9660() * Convert a long filename into an ISO9660 acceptable form: * see charIsValid9660(), 8 chars max for directories and 8.3 chars * for files. Extension is kept if it's shorter then 4 chars. * 3 chars from the original name are kept, the rest is filled with ~XXXX where * the XXXX is a random string (but still with valid characters). * */ void mangleNameFor9660(const char* origName, char* newName, bool isADir) { char* dot_p; int i; char base[7]; /* max 6 chars */ char extension[4]; /* max 3 chars */ int extensionLen; unsigned hash; unsigned v; /* these are the characters we use in the 8.3 hash. Must be 36 chars long */ static const char* baseChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /* FIND extension */ if(isADir) { dot_p = NULL; } else { dot_p = strrchr(origName, '.'); if(dot_p) { /* if the extension contains any illegal characters or is too long (> 3) or zero length then we treat it as part of the prefix */ for(i = 0; i < 4 && dot_p[i + 1] != '\0'; i++) { if( !charIsValid9660(dot_p[i + 1]) ) { dot_p = NULL; break; } } if(i == 0 || i == 4 || dot_p == origName) dot_p = NULL; } } /* END FIND extension */ /* GET base */ /* the leading characters in the mangled name is taken from * the first characters of the name, if they are ascii otherwise * '_' is used */ for(i = 0; i < NCHARS_9660_BASE && origName[i] != '\0'; i++) { base[i] = origName[i]; if ( !charIsValid9660(origName[i]) ) base[i] = '_'; base[i] = toupper(base[i]); } /* make sure base doesn't contain part of the extension */ if(dot_p != NULL) { /* !! test this */ if(i > dot_p - origName) i = dot_p - origName; } /* fixed length */ while(i < NCHARS_9660_BASE) { base[i] = '_'; i++; } base[NCHARS_9660_BASE] = '\0'; /* END GET base */ /* GET extension */ /* the extension of the mangled name is taken from the first 3 * ascii chars after the dot */ extensionLen = 0; if(dot_p) { for(i = 1; extensionLen < 3 && dot_p[i] != '\0'; i++) { extension[extensionLen] = toupper(dot_p[i]); extensionLen++; } } extension[extensionLen] = '\0'; /* END GET extension */ /* find the hash for this prefix */ hash = hashString(origName, strlen(origName)); /* now form the mangled name. */ for(i = 0; i < NCHARS_9660_BASE; i++) { newName[i] = base[i]; } newName[NCHARS_9660_BASE] = '~'; v = hash; newName[7] = baseChars[v % 36]; for(i = 6; i > NCHARS_9660_BASE; i--) { v = v / 36; newName[i] = baseChars[v % 36]; } /* add the extension and terminate string */ if(extensionLen > 0) { newName[8] = '.'; strcpy(newName + 9, extension); } else { newName[8] = '\0'; } printf("remangled '%s' -> '%s'\n", origName, newName); } void mangleNameForJoliet(const char* origName, char* newName, bool appendHash) { char* dot_p; int i; char base[NCHARS_FILE_ID_MAX_JOLIET]; /* '\0' terminated */ char extension[6]; /* max 3 chars */ int extensionLen; unsigned hash; unsigned v; char hashStr[5]; /* '\0' terminated */ /* these are the characters we use in the 8.3 hash. Must be 36 chars long */ static const char* baseChars = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; /* FIND extension candidate */ dot_p = strrchr(origName, '.'); if(dot_p) { /* if the extension contains any illegal characters or is too long (> 5) or zero length then we treat it as part of the prefix */ for(i = 0; i < 6 && dot_p[i + 1] != '\0'; i++) { if( !charIsValidJoliet(dot_p[i + 1]) ) { dot_p = NULL; break; } } if(i == 0 || i == 6 || dot_p == origName) dot_p = NULL; } /* END FIND extension candidate */ /* GET base */ /* The leading characters in the mangled name are taken from * the first characters of the name if they are allowed, otherwise * '_' is used */ for(i = 0; i < NCHARS_FILE_ID_MAX_JOLIET - 1 && origName[i] != '\0'; i++) { base[i] = origName[i]; if ( !charIsValidJoliet(origName[i]) ) base[i] = '_'; } /* make sure base doesn't contain part of the extension */ if(dot_p != NULL) { if(i > dot_p - origName) i = dot_p - origName; } base[i] = '\0'; /* END GET base */ /* GET extension */ /* the extension of the mangled name is taken from the first 3 ascii chars after the dot */ extensionLen = 0; if(dot_p) { for(i = 1; extensionLen < 5 && dot_p[i] != '\0'; i++) { extension[extensionLen] = dot_p[i]; extensionLen++; } } extension[extensionLen] = '\0'; /* END GET extension */ /* FIND the hash for this prefix */ hash = hashString(origName, strlen(origName)); hashStr[4] = '\0'; v = hash; hashStr[3] = baseChars[v % 36]; for(i = 2; i >= 0; i--) { v = v / 36; hashStr[i] = baseChars[v % 36]; } /* END FIND the hash for this prefix */ /* ASSEMBLE name */ strcpy(newName, base); if(appendHash) { /* max name len - '~' - hash - '.' - extension */ if(strlen(newName) >= NCHARS_FILE_ID_MAX_JOLIET - 1 - 1 - 4 - 1 - 5) newName[NCHARS_FILE_ID_MAX_JOLIET - 1 - 1 - 4 - 1 - 5] = '\0'; strcat(newName, "~"); strcat(newName, hashStr); } if(extensionLen > 0) { strcat(newName, "."); strcat(newName, extension); } /* END ASSEMBLE name */ if(appendHash) printf("joliet mangle '%s' -> '%s'\n", origName, newName); } /****************************************************************************** * shortenNameFor9660() * Same as mangleNameFor9660() but without the ~XXXX. * */ void shortenNameFor9660(const char* origName, char* newName) { char* dot_p; int i; char base[9]; /* max 9 chars */ char extension[4]; /* max 3 chars */ int extensionLen; /* FIND extension */ /* ISO9660 requires that directories have no dots ('.') but some isolinux * cds have the kernel in a directory with a dot so i need to allow dots in * directories :( */ /*if(isADir) { dot_p = NULL; } else {*/ dot_p = strrchr(origName, '.'); if(dot_p) { /* if the extension contains any illegal characters or is too long (> 3) or zero length then we treat it as part of the prefix */ for(i = 0; i < 4 && dot_p[i + 1] != '\0'; i++) { if( !charIsValid9660(dot_p[i + 1]) ) { dot_p = NULL; break; } } if(i == 0 || i == 4 || dot_p == origName) dot_p = NULL; } /*}*/ /* END FIND extension */ /* GET base */ /* the leading characters in the mangled name is taken from * the first characters of the name, if they are allowed otherwise * '_' is used */ for(i = 0; i < 8 && origName[i] != '\0'; i++) { base[i] = origName[i]; if ( !charIsValid9660(origName[i]) ) base[i] = '_'; base[i] = toupper(base[i]); } /* make sure base doesn't contain part of the extension */ if(dot_p != NULL) { /* !! test this to make sure it works */ if(i > dot_p - origName) i = dot_p - origName; } base[i] = '\0'; /* END GET base */ /* GET extension */ /* the extension of the mangled name is taken from the first 3 ascii chars after the dot */ extensionLen = 0; if(dot_p) { for(i = 1; extensionLen < 3 && dot_p[i] != '\0'; i++) { extension[extensionLen] = toupper(dot_p[i]); extensionLen++; } } extension[extensionLen] = '\0'; /* END GET extension */ strcpy(newName, base); if(extensionLen > 0) { strcat(newName, "."); strcat(newName, extension); } }