/* This file is part of libextractor. (C) 2002, 2003, 2006 Vidyut Samanta and Christian Grothoff libextractor is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. libextractor is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with libextractor; see the file COPYING. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "platform.h" #include "extractor.h" static EXTRACTOR_KeywordList * addKeyword(EXTRACTOR_KeywordType type, char * keyword, EXTRACTOR_KeywordList * next) { EXTRACTOR_KeywordList * result; if (keyword == NULL) return next; result = malloc(sizeof(EXTRACTOR_KeywordList)); result->next = next; result->keyword = keyword; result->keywordType = type; return result; } /** * Detect a file-type. * @param data the contents of the file * @param len the length of the file * @param arg closure... * @return 0 if the file does not match, 1 if it does **/ typedef int (*Detector)(const char * data, size_t len, void * arg); /** * Detect a file-type. * @param data the contents of the file * @param len the length of the file * @return always 1 **/ static int defaultDetector(const char * data, size_t len, void * arg) { return 1; } /** * Detect a file-type. * @param data the contents of the file * @param len the length of the file * @return always 0 **/ static int disableDetector(const char * data, size_t len, void * arg) { return 0; } typedef struct ExtraPattern { int pos; int len; char * pattern; } ExtraPattern; /** * Define special matching rules for complicated formats... **/ static ExtraPattern xpatterns[] = { #define AVI_XPATTERN 0 { 8, 4, "AVI "}, { 0, 0, NULL }, #define WAVE_XPATTERN 2 { 8, 4, "WAVE"}, { 0, 0, NULL }, #define ACE_XPATTERN 4 { 4, 10, "\x00\x00\x90**ACE**"}, { 0, 0, NULL }, #define TAR_XPATTERN 6 { 257, 6, "ustar\x00"}, { 0, 0, NULL }, #define GTAR_XPATTERN 8 { 257, 8, "ustar\040\040\0"}, { 0, 0, NULL }, #define RMID_XPATTERN 10 { 8, 4, "RMID"}, { 0, 0, NULL }, #define ACON_XPATTERN 12 { 8, 4, "ACON"}, { 0, 0, NULL}, }; /** * Detect AVI. A pattern matches if all XPatterns until the next {0, * 0, NULL} slot match. OR-ing patterns can be achieved using multiple * entries in the main table, so this "AND" (all match) semantics are * the only reasonable answer. **/ static int xPatternMatcher(const char * data, size_t len, void * cls) { ExtraPattern * arg = cls; while (arg->pattern != NULL) { if (arg->pos + arg->len > len) return 0; if (0 != memcmp(&data[arg->pos], arg->pattern, arg->len)) return 0; arg++; } return 1; } /** * Detect SVG */ static int svgMatcher(const char *data, size_t len, void * cls) { enum {XMLSTART, XMLCLOSE, SVGSTART} state; size_t i; i = 0; state = XMLSTART; while (i < len) { if (! isprint(data[i])) return 0; switch (state) { case XMLSTART: if (i + 6 >= len) return 0; else if (memcmp(data + i, "= len) return 0; else if (memcmp(data + i, "?>", 2) == 0) state = SVGSTART; break; case SVGSTART: if (i + 5 >= len) return 0; else if (memcmp(data + i, "\ndebian", 14, "application/x-dpkg", DEFAULT}, /* .deb */ { "PK\x03\x04", 4, "application/x-zip", DEFAULT}, { "\xea\x60", 2, "application/x-arj", DEFAULT}, { "\037\235", 2, "application/x-compress", DEFAULT}, { "Rar!", 4, "application/x-rar", DEFAULT}, { "", 0, "application/x-ace", XPATTERN(ACE_XPATTERN)}, { "", 0, "application/x-tar", XPATTERN(TAR_XPATTERN)}, { "", 0, "application/x-gtar", XPATTERN(GTAR_XPATTERN)}, { "-lh0-", 5, "application/x-lha", DEFAULT}, { "-lh1-", 5, "application/x-lha", DEFAULT}, { "-lh2-", 5, "application/x-lha", DEFAULT}, { "-lh3-", 5, "application/x-lha", DEFAULT}, { "-lh4-", 5, "application/x-lha", DEFAULT}, { "-lh5-", 5, "application/x-lha", DEFAULT}, { "-lh6-", 5, "application/x-lha", DEFAULT}, { "-lh7-", 5, "application/x-lha", DEFAULT}, { "-lhd-", 5, "application/x-lha", DEFAULT}, { "-lh\40-", 5, "application/x-lha", DEFAULT}, { "-lz4-", 5, "application/x-lha", DEFAULT}, { "-lz5-", 5, "application/x-lha", DEFAULT}, { "-lzs-", 5, "application/x-lha", DEFAULT}, { "\xFD\x76", 2, "application/x-lzh", DEFAULT}, { "\x00\x00\x01\xb3", 4, "video/mpeg", DEFAULT}, { "\x00\x00\x01\xba", 4, "video/mpeg", DEFAULT}, { "moov", 4, "video/quicktime", DEFAULT}, { "mdat", 4, "video/quicktime", DEFAULT}, { "\x8aMNG", 4, "video/x-mng", DEFAULT}, { "\x30\x26\xb2\x75\x8e\x66", 6, "video/asf", DEFAULT}, /* same as .wmv ? */ { "FWS", 3, "application/x-shockwave-flash", DEFAULT}, { "MThd", 4, "audio/midi", DEFAULT}, { "ID3", 3, "audio/mpeg", DEFAULT}, { "\xFF\xFA", 2, "audio/mpeg", DEFAULT}, { "\xFF\xFB", 2, "audio/mpeg", DEFAULT}, { "\xFF\xFC", 2, "audio/mpeg", DEFAULT}, { "\xFF\xFD", 2, "audio/mpeg", DEFAULT}, { "\xFF\xFE", 2, "audio/mpeg", DEFAULT}, { "\xFF\xFF", 2, "audio/mpeg", DEFAULT}, { "OggS", 4, "application/ogg", DEFAULT}, { "#!/bin/sh", 9, "application/x-shellscript", DEFAULT}, { "#!/bin/bash", 11, "application/x-shellscript", DEFAULT}, { "#!/bin/csh", 10, "application/x-shellscript", DEFAULT}, { "#!/bin/tcsh", 11, "application/x-shellscript", DEFAULT}, { "#!/bin/perl", 11, "application/x-perl", DEFAULT}, { "