/* This file is part of libextractor. (C) 2002, 2003, 2004, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with libextractor; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. Some of this code is based on AVInfo 1.0 alpha 11 (c) George Shuklin, gs]AT[shounen.ru, 2002-2004 http://shounen.ru/soft/avinfo/ */ #define DEBUG_EXTRACT_MP3 0 #include "platform.h" #include "extractor.h" #include "convert.h" #include #include #include #include #include #include typedef struct { char * title; char * artist; char * album; char * year; char * comment; const char * genre; } id3tag; static const char *const genre_names[] = { gettext_noop("Blues"), gettext_noop("Classic Rock"), gettext_noop("Country"), gettext_noop("Dance"), gettext_noop("Disco"), gettext_noop("Funk"), gettext_noop("Grunge"), gettext_noop("Hip-Hop"), gettext_noop("Jazz"), gettext_noop("Metal"), gettext_noop("New Age"), gettext_noop("Oldies"), gettext_noop("Other"), gettext_noop("Pop"), gettext_noop("R&B"), gettext_noop("Rap"), gettext_noop("Reggae"), gettext_noop("Rock"), gettext_noop("Techno"), gettext_noop("Industrial"), gettext_noop("Alternative"), gettext_noop("Ska"), gettext_noop("Death Metal"), gettext_noop("Pranks"), gettext_noop("Soundtrack"), gettext_noop("Euro-Techno"), gettext_noop("Ambient"), gettext_noop("Trip-Hop"), gettext_noop("Vocal"), gettext_noop("Jazz+Funk"), gettext_noop("Fusion"), gettext_noop("Trance"), gettext_noop("Classical"), gettext_noop("Instrumental"), gettext_noop("Acid"), gettext_noop("House"), gettext_noop("Game"), gettext_noop("Sound Clip"), gettext_noop("Gospel"), gettext_noop("Noise"), gettext_noop("Alt. Rock"), gettext_noop("Bass"), gettext_noop("Soul"), gettext_noop("Punk"), gettext_noop("Space"), gettext_noop("Meditative"), gettext_noop("Instrumental Pop"), gettext_noop("Instrumental Rock"), gettext_noop("Ethnic"), gettext_noop("Gothic"), gettext_noop("Darkwave"), gettext_noop("Techno-Industrial"), gettext_noop("Electronic"), gettext_noop("Pop-Folk"), gettext_noop("Eurodance"), gettext_noop("Dream"), gettext_noop("Southern Rock"), gettext_noop("Comedy"), gettext_noop("Cult"), gettext_noop("Gangsta Rap"), gettext_noop("Top 40"), gettext_noop("Christian Rap"), gettext_noop("Pop/Funk"), gettext_noop("Jungle"), gettext_noop("Native American"), gettext_noop("Cabaret"), gettext_noop("New Wave"), gettext_noop("Psychedelic"), gettext_noop("Rave"), gettext_noop("Showtunes"), gettext_noop("Trailer"), gettext_noop("Lo-Fi"), gettext_noop("Tribal"), gettext_noop("Acid Punk"), gettext_noop("Acid Jazz"), gettext_noop("Polka"), gettext_noop("Retro"), gettext_noop("Musical"), gettext_noop("Rock & Roll"), gettext_noop("Hard Rock"), gettext_noop("Folk"), gettext_noop("Folk/Rock"), gettext_noop("National Folk"), gettext_noop("Swing"), gettext_noop("Fast-Fusion"), gettext_noop("Bebob"), gettext_noop("Latin"), gettext_noop("Revival"), gettext_noop("Celtic"), gettext_noop("Bluegrass"), gettext_noop("Avantgarde"), gettext_noop("Gothic Rock"), gettext_noop("Progressive Rock"), gettext_noop("Psychedelic Rock"), gettext_noop("Symphonic Rock"), gettext_noop("Slow Rock"), gettext_noop("Big Band"), gettext_noop("Chorus"), gettext_noop("Easy Listening"), gettext_noop("Acoustic"), gettext_noop("Humour"), gettext_noop("Speech"), gettext_noop("Chanson"), gettext_noop("Opera"), gettext_noop("Chamber Music"), gettext_noop("Sonata"), gettext_noop("Symphony"), gettext_noop("Booty Bass"), gettext_noop("Primus"), gettext_noop("Porn Groove"), gettext_noop("Satire"), gettext_noop("Slow Jam"), gettext_noop("Club"), gettext_noop("Tango"), gettext_noop("Samba"), gettext_noop("Folklore"), gettext_noop("Ballad"), gettext_noop("Power Ballad"), gettext_noop("Rhythmic Soul"), gettext_noop("Freestyle"), gettext_noop("Duet"), gettext_noop("Punk Rock"), gettext_noop("Drum Solo"), gettext_noop("A Cappella"), gettext_noop("Euro-House"), gettext_noop("Dance Hall"), gettext_noop("Goa"), gettext_noop("Drum & Bass"), gettext_noop("Club-House"), gettext_noop("Hardcore"), gettext_noop("Terror"), gettext_noop("Indie"), gettext_noop("BritPop"), gettext_noop("Negerpunk"), gettext_noop("Polsk Punk"), gettext_noop("Beat"), gettext_noop("Christian Gangsta Rap"), gettext_noop("Heavy Metal"), gettext_noop("Black Metal"), gettext_noop("Crossover"), gettext_noop("Contemporary Christian"), gettext_noop("Christian Rock"), gettext_noop("Merengue"), gettext_noop("Salsa"), gettext_noop("Thrash Metal"), gettext_noop("Anime"), gettext_noop("JPop"), gettext_noop("Synthpop"), }; #define GENRE_NAME_COUNT \ ((unsigned int)(sizeof genre_names / sizeof (const char *const))) #define MAX_MP3_SCAN_DEEP 16768 const int max_frames_scan=1024; enum{ MPEG_ERR=0,MPEG_V1=1,MPEG_V2=2,MPEG_V25=3}; enum{ LAYER_ERR=0,LAYER_1=1,LAYER_2=2,LAYER_3=3}; const unsigned int sync_mask=0xE0FF; const unsigned int mpeg_ver_mask=0x1800; const unsigned int mpeg_layer_mask=0x600; const unsigned int bitrate_mask=0xF00000; const unsigned int freq_mask=0xC0000; const unsigned int ch_mask=0xC0000000; const unsigned int pad_mask=0x20000; unsigned int bitrate_table[16][6]={ {0,0,0,0,0,0}, {32, 32, 32, 32, 32, 8}, {64, 48, 40, 64, 48, 16}, {96, 56, 48, 96, 56, 24}, {128, 64 , 56 , 128, 64 , 32}, {160, 80 , 64 , 160, 80 , 64}, {192, 96 , 80 , 192, 96 , 80}, {224, 112, 96 , 224, 112, 56}, {256, 128, 112, 256, 128, 64}, {288, 160, 128, 288, 160, 128}, {320, 192, 160, 320, 192, 160}, {352, 224, 192, 352, 224, 112}, {384, 256, 224, 384, 256, 128}, {416, 320, 256, 416, 320, 256}, {448, 384, 320, 448, 384, 320}, {-1,-1,-1,-1,-1,-1} }; int freq_table[4][3]={ {44100,22050,11025}, {48000,24000,12000}, {32000,16000,8000} }; #define OK 0 #define SYSERR 1 #define INVALID_ID3 2 static void trim(char * k) { while ( (strlen(k) > 0) && (isspace(k[strlen(k)-1])) ) k[strlen(k)-1] = '\0'; } static int get_id3(const char * data, size_t size, id3tag * id3) { const char * pos; if (size < 128) return INVALID_ID3; pos = &data[size - 128]; if (0 != strncmp("TAG", pos, 3)) return INVALID_ID3; pos += 3; id3->title = convertToUtf8(pos, 30, "ISO-8859-1"); trim(id3->title); pos += 30; id3->artist = convertToUtf8(pos, 30, "ISO-8859-1"); trim(id3->artist); pos += 30; id3->album = convertToUtf8(pos, 30, "ISO-8859-1"); trim(id3->album); pos += 30; id3->year = convertToUtf8(pos, 4, "ISO-8859-1"); trim(id3->year); pos += 4; id3->comment = convertToUtf8(pos, 30, "ISO-8859-1"); trim(id3->comment); pos += 30; id3->genre = ""; if (pos[0] < GENRE_NAME_COUNT) id3->genre = dgettext(PACKAGE, genre_names[(unsigned) pos[0]]); return OK; } static struct EXTRACTOR_Keywords * addkword(EXTRACTOR_KeywordList *oldhead, const char * phrase, EXTRACTOR_KeywordType type) { EXTRACTOR_KeywordList * keyword; keyword = malloc(sizeof(EXTRACTOR_KeywordList)); keyword->next = oldhead; keyword->keyword = strdup(phrase); keyword->keywordType = type; return keyword; } static struct EXTRACTOR_Keywords * mp3parse(const char * data, size_t size, struct EXTRACTOR_Keywords * prev) { unsigned int header; int counter=0; char mpeg_ver=0; char layer_ver=0; int idx_num=0; int bitrate=0; /*used for each frame*/ int avg_bps=0; /*average bitrate*/ int vbr_flag=0; int length=0; int sample_rate=0; int ch=0; int frame_size; int frames=0; size_t pos = 0; char * format; do { /* seek for frame start */ if (pos + sizeof(header) > size) { return prev; }/*unable to find header*/ memcpy(&header, &data[pos], sizeof(header)); if ((header&sync_mask)==sync_mask) break;/*found header sync*/ pos++; counter++; /*next try*/ } while(counter=MAX_MP3_SCAN_DEEP) { return prev; };/*give up to find mp3 header*/ prev = addkword(prev, "audio/mpeg", EXTRACTOR_MIMETYPE); do { /*ok, now we found a mp3 frame header*/ frames++; switch (header & mpeg_ver_mask){ case 0x1000: mpeg_ver = MPEG_ERR; /*error*/ break; case 0x800: prev = addkword(prev, "MPEG V2", EXTRACTOR_RESOURCE_TYPE); mpeg_ver = MPEG_V2; break; case 0x1800: prev = addkword(prev, "MPEG V1", EXTRACTOR_RESOURCE_TYPE); mpeg_ver = MPEG_V1; break; case 0: prev = addkword(prev, "MPEG V25", EXTRACTOR_RESOURCE_TYPE); mpeg_ver = MPEG_V25; break; } switch(header&mpeg_layer_mask){ case 0x400: layer_ver=LAYER_2; break; case 0x200: layer_ver=LAYER_3; break; case 0x600: layer_ver=LAYER_1; break; case 0: layer_ver=LAYER_ERR;/*error*/ } if (!layer_ver||!mpeg_ver) return prev; /*unknown mpeg type*/ if (mpeg_ver<3) idx_num=(mpeg_ver-1)*3+layer_ver-1; else idx_num=2+layer_ver; bitrate = 1000*bitrate_table[(header&bitrate_mask)>>20][idx_num]; if (bitrate<0) { frames--; break; } /*error in header*/ sample_rate = freq_table[(header&freq_mask)>>18][mpeg_ver-1]; if (sample_rate<0) { frames--; break; } /*error in header*/ if ((header&ch_mask)==ch_mask) ch=1; else ch=2; /*stereo non stereo select*/ frame_size = 144*bitrate/(sample_rate?sample_rate:1)+((header&pad_mask)>>17); avg_bps += bitrate/1000; pos += frame_size-4; if (frames > max_frames_scan) break; /*optimization*/ if (avg_bps/frames!=bitrate/1000) vbr_flag=1; if (pos + sizeof(header) > size) break; /* EOF */ memcpy(&header, &data[pos], sizeof(header)); } while ((header&sync_mask)==sync_mask); if (!frames) return prev; /*no valid frames*/ avg_bps = avg_bps/frames; if (max_frames_scan){ /*if not all frames scaned*/ length=size/(avg_bps?avg_bps:bitrate?bitrate:0xFFFFFFFF)/125; } else{ length=1152*frames/(sample_rate?sample_rate:0xFFFFFFFF); } format = malloc(512); snprintf(format, 512, "%d kbps, %d hz, %dm%02d %s %s", avg_bps, sample_rate, length/60, length % 60, /* minutes / seconds */ ch == 2 ? _("stereo") : _("mono"), vbr_flag ? _("(variable bps)"):""); prev = addkword(prev, format, EXTRACTOR_FORMAT); free(format); return prev; } /* mimetype = audio/mpeg */ struct EXTRACTOR_Keywords * libextractor_mp3_extract(const char * filename, const char * data, size_t size, struct EXTRACTOR_Keywords * klist) { id3tag info; char * word; if (0 != get_id3(data, size, &info)) return klist; if (strlen(info.title) > 0) klist = addkword(klist, info.title, EXTRACTOR_TITLE); if (strlen(info.artist) > 0) klist = addkword(klist, info.artist, EXTRACTOR_ARTIST); if (strlen(info.album) > 0) klist = addkword(klist, info.album, EXTRACTOR_ALBUM); if (strlen(info.year) > 0) klist = addkword(klist, info.year, EXTRACTOR_YEAR); if (strlen(info.genre) > 0) klist = addkword(klist, info.genre, EXTRACTOR_GENRE); if (strlen(info.comment) > 0) klist = addkword(klist, info.comment, EXTRACTOR_COMMENT); /* A keyword that has all of the information together) */ word = (char*) malloc(strlen(info.artist) + strlen(info.title) + strlen(info.album) + 6); sprintf(word, "%s: %s (%s)", info.artist, info.title, info.album); klist = addkword(klist, word, EXTRACTOR_DESCRIPTION); free(word); free(info.title); free(info.year); free(info.album); free(info.artist); free(info.comment); return mp3parse(data, size, klist); } /* end of mp3extractor.c */