/* This file is part of libextractor. (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with libextractor; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "platform.h" #include "extractor.h" #ifndef MINGW #include #endif #include "convert.h" #define DEBUG_EXTRACT_ID3v2 0 static struct EXTRACTOR_Keywords * addKeyword(EXTRACTOR_KeywordList *oldhead, char *phrase, EXTRACTOR_KeywordType type) { EXTRACTOR_KeywordList * keyword; keyword = (EXTRACTOR_KeywordList*) malloc(sizeof(EXTRACTOR_KeywordList)); keyword->next = oldhead; keyword->keyword = phrase; keyword->keywordType = type; return keyword; } typedef struct { char * text; EXTRACTOR_KeywordType type; } Matches; static Matches tmap[] = { { "TAL", EXTRACTOR_TITLE }, { "TT1", EXTRACTOR_GROUP }, { "TT2", EXTRACTOR_TITLE }, { "TT3", EXTRACTOR_TITLE }, { "TXT", EXTRACTOR_DESCRIPTION }, { "TPB", EXTRACTOR_PUBLISHER }, { "WAF", EXTRACTOR_LOCATION }, { "WAR", EXTRACTOR_LOCATION }, { "WAS", EXTRACTOR_LOCATION }, { "WCP", EXTRACTOR_COPYRIGHT }, { "WAF", EXTRACTOR_LOCATION }, { "WCM", EXTRACTOR_DISCLAIMER }, { "TSS", EXTRACTOR_FORMAT }, { "TYE", EXTRACTOR_DATE }, { "TLA", EXTRACTOR_LANGUAGE }, { "TP1", EXTRACTOR_ARTIST }, { "TP2", EXTRACTOR_ARTIST }, { "TP3", EXTRACTOR_CONDUCTOR }, { "TP4", EXTRACTOR_INTERPRET }, { "IPL", EXTRACTOR_CONTRIBUTOR }, { "TOF", EXTRACTOR_FILENAME }, { "TEN", EXTRACTOR_PRODUCER }, { "TCO", EXTRACTOR_SUBJECT }, { "TCR", EXTRACTOR_COPYRIGHT }, { "SLT", EXTRACTOR_LYRICS }, { "TOA", EXTRACTOR_ARTIST }, { "TRC", EXTRACTOR_RESOURCE_IDENTIFIER }, { "TCM", EXTRACTOR_CREATOR }, { "TOT", EXTRACTOR_ALBUM }, { "TOL", EXTRACTOR_AUTHOR }, { "COM", EXTRACTOR_COMMENT }, { "", EXTRACTOR_KEYWORDS }, { NULL, 0}, }; /* mimetype = audio/mpeg */ struct EXTRACTOR_Keywords * libextractor_id3v2_extract(const char * filename, const unsigned char * data, size_t size, struct EXTRACTOR_Keywords * prev) { int unsync; unsigned int tsize; unsigned int pos; if ( (size < 16) || (data[0] != 0x49) || (data[1] != 0x44) || (data[2] != 0x33) || (data[3] != 0x02) || (data[4] != 0x00) ) return prev; unsync = (data[5] & 0x80) > 0; tsize = ( ( (data[6] & 0x7F) << 21 ) | ( (data[7] & 0x7F) << 14 ) | ( (data[8] & 0x7F) << 07 ) | ( (data[9] & 0x7F) << 00 ) ); if (tsize + 10 > size) return prev; pos = 10; while (pos < tsize) { size_t csize; int i; if (pos + 6 > tsize) return prev; csize = (data[pos+3] << 16) + (data[pos+4] << 8) + data[pos+5]; if ( (pos + 6 + csize > tsize) || (csize > tsize) || (csize == 0) ) break; i = 0; while (tmap[i].text != NULL) { if (0 == strncmp(tmap[i].text, (const char*) &data[pos], 3)) { char * word; /* this byte describes the encoding try to convert strings to UTF-8 if it fails, then forget it */ switch (data[pos+6]) { case 0x00: word = convertToUtf8((const char*) &data[pos+7], csize, "ISO-8859-1"); break; case 0x01: word = convertToUtf8((const char*) &data[pos+7], csize, "UCS-2"); break; default: /* bad encoding byte, try to convert from iso-8859-1 */ word = convertToUtf8((const char*) &data[pos+7], csize, "ISO-8859-1"); break; } pos++; csize--; if ( (word != NULL) && (strlen(word) > 0) ) { prev = addKeyword(prev, word, tmap[i].type); } else { free(word); } break; } i++; } pos += 6 + csize; } return prev; } /* end of id3v2extractor.c */