#import #import #import #import #import "StringAdditions.h" @implementation NSString (StringAdditions) - (NSString *)stringFromFirstLine { NSRange where = [self rangeOfString:@"\n"]; if(where.location==NSNotFound){ return [NSString stringWithString:self]; } return [self substringWithRange:NSMakeRange(0, where.location-1)]; } - (NSArray *)stringToComponents { NSCharacterSet *white = [NSCharacterSet whitespaceAndNewlineCharacterSet], *alnum = [white invertedSet]; NSScanner *scan = [NSScanner scannerWithString:self]; [scan setCharactersToBeSkipped:white]; NSMutableArray *result = [NSMutableArray array]; while([scan isAtEnd]==NO){ NSString *comp; if([scan scanCharactersFromSet:alnum intoString:&comp]==NO){ break; } [result addObject:comp]; } return result; } - (NSString *)stringByTrimmingWhitespaceAndNewlines { return [self stringByTrimmingCharactersInSet: [NSCharacterSet whitespaceAndNewlineCharacterSet]]; } - (NSString *)firstComponent { NSString *result; [[NSScanner scannerWithString: [self stringByTrimmingWhitespaceAndNewlines]] scanUpToCharactersFromSet: [NSCharacterSet characterSetWithCharactersInString:@"\n\r"] intoString:&result]; return result; } #define ENTITY_REGEX \ "(&[0-9]{1,3};)|(&#[0-9]{1,5};)|(&#(x|X)[0-9a-fA-F]{1,4};)|(&[a-zA-Z]+;)" typedef struct { const char *name; unichar value; } name2val; static name2val escapes[] = { "amp", '&', "apos", '\'', "copy", 169, "lt", '<', "gt", '>', "quot", '"', "reg", 174, NULL, 0 }; static BOOL compiled = NO; static regex_t reg; - (NSString *)stringByUnescapingHTML { if(compiled==NO){ int errcode; if(errcode = regcomp(® , ENTITY_REGEX, REG_EXTENDED)){ int errlen = regerror(errcode, ®, NULL, 0); char errbuf[errlen]; regerror(errcode, ®, errbuf, errlen); [NSException raise:NSGenericException format:@"regexp compilation error: %s %s", ENTITY_REGEX, errbuf]; } compiled = YES; } const char *str = [self lossyCString]; int maxmatch = strlen(str)+1; regmatch_t match[maxmatch], *matches; unsigned where = 0; matches = match; while(regexec(®, str+where, 1, matches, 0)!=REG_NOMATCH){ matches->rm_so += where; matches->rm_eo += where; where = matches->rm_eo; matches++; } if(!where){ return [NSString stringWithString:self]; } matches->rm_so = -1; matches->rm_eo = -1; matches = match; NSMutableString *result = [NSMutableString stringWithCapacity:maxmatch-1]; int pos; for(pos=0; posrm_so){ name2val *esc = escapes; if(pos+1rm_eo && isdigit(str[pos+1])){ u = (unichar)atoi(str+pos+1); } else if(pos+2rm_eo && str[pos+1]=='#' && isdigit(str[pos+2])){ u = (unichar)atoi(str+pos+2); } else if(pos+3rm_eo && str[pos+1]=='#' && (str[pos+2]=='x' || str[pos+2]=='X')){ unsigned ux; sscanf(str+pos+3, "%x", &ux); u = (unichar)ux; } else{ while(esc->name!=NULL){ if(!strncmp(esc->name, str+pos+1, matches->rm_eo-matches->rm_so-2)){ break; } esc++; } if(esc->name!=NULL){ u = esc->value; } else{ u = str[pos]; } } if(esc->name!=NULL){ pos = matches->rm_eo-1; } matches++; } else{ u = str[pos]; } NSString *ustr = [NSString stringWithCharacters:&u length:1]; [result appendString:ustr]; } // regfree(®); return result; } @end