/* File: file_pdf.c Copyright (C) 1998-2007 Christophe GRENIER This software is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ #ifdef HAVE_CONFIG_H #include #endif #ifdef HAVE_STRING_H #include #endif #include #include "types.h" #include "filegen.h" static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end, const unsigned char *needle, const unsigned int needle_length); static void register_header_check_pdf(file_stat_t *file_stat); static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new); static void file_check_pdf(file_recovery_t *file_recovery); static void file_check_pdf_and_size(file_recovery_t *file_recovery); const file_hint_t file_hint_pdf= { .extension="pdf", .description="Portable Document Format, Adobe Illustrator", .min_header_distance=0, .max_filesize=PHOTOREC_MAX_FILE_SIZE, .recover=1, .header_check=&header_check_pdf, .register_header_check=®ister_header_check_pdf }; static const unsigned char pdf_header[] = { '%','P','D','F','-','1'}; static void register_header_check_pdf(file_stat_t *file_stat) { register_header_check(0, pdf_header,sizeof(pdf_header), &header_check_pdf, file_stat); } static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end, const unsigned char *needle, const unsigned int needle_length) { while(haystack!=NULL) { haystack=memchr(haystack,needle[0],haystack_end-haystack); if(haystack!=NULL && haystack<=(haystack_end-needle_length)) { if(memcmp(haystack,needle,needle_length)==0) return haystack; haystack++; } else haystack=NULL; }; return NULL; } static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { if(memcmp(buffer,pdf_header,sizeof(pdf_header))==0) { const unsigned char sig_illustrator[11]={'I','l','l','u','s','t','r','a','t','o','r'}; const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'}; const unsigned char *linearized; reset_file_recovery(file_recovery_new); if(find_in_mem(buffer, buffer+512, sig_illustrator,sizeof(sig_illustrator)) != NULL) file_recovery_new->extension="ai"; else file_recovery_new->extension=file_hint_pdf.extension; if((linearized=find_in_mem(buffer, buffer+512, sig_linearized,sizeof(sig_linearized))) != NULL) { linearized+=sizeof(sig_linearized); while(*linearized!='>' && linearized<=buffer+512) { if(*linearized=='/' && *(linearized+1)=='L') { linearized+=2; while(*linearized==' ' || *linearized=='\t' || *linearized=='\n' || *linearized=='\r') linearized++; file_recovery_new->calculated_file_size=0; while(*linearized>='0' && *linearized<='9' && linearized<=buffer+512) { file_recovery_new->calculated_file_size=file_recovery_new->calculated_file_size*10+(*linearized)-'0'; linearized++; } file_recovery_new->data_check=&data_check_size; file_recovery_new->file_check=&file_check_pdf_and_size; return 1; } linearized++; } } file_recovery_new->file_check=&file_check_pdf; return 1; } return 0; } static void file_check_pdf_and_size(file_recovery_t *file_recovery) { if(file_recovery->file_size>=file_recovery->calculated_file_size) { const unsigned int read_size=20; unsigned char buffer[read_size+3]; int i; int taille; file_recovery->file_size=file_recovery->calculated_file_size; if(fseek(file_recovery->handle,file_recovery->file_size-read_size,SEEK_SET)<0) { file_recovery->file_size=0; return ; } taille=fread(buffer,1,read_size,file_recovery->handle); for(i=taille-4;i>=0;i--) { if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F') return ; } } file_recovery->file_size=0; } static void file_check_pdf(file_recovery_t *file_recovery) { const unsigned int read_size=4096; unsigned char buffer[read_size+3]; int64_t pdf_size=file_recovery->file_size; int i; buffer[read_size]=0; buffer[read_size+1]=0; buffer[read_size+2]=0; do { int taille; pdf_size-=read_size; if(pdf_size<0) { pdf_size=0; buffer[read_size]=0; buffer[read_size+1]=0; buffer[read_size+2]=0; } if(fseek(file_recovery->handle,pdf_size,SEEK_SET)<0) return; taille=fread(buffer,1,read_size,file_recovery->handle); for(i=taille-1;i>=0;i--) { if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F') { file_recovery->file_size=pdf_size+i+4; if(i+4<=read_size+2 && buffer[i+4]=='\r') { file_recovery->file_size++; if(i+5<=read_size+2 && buffer[i+5]=='\n') file_recovery->file_size++; } else if(i+4<=read_size+2 && buffer[i+4]=='\n') file_recovery->file_size++; return; } } buffer[read_size]=buffer[0]; buffer[read_size+1]=buffer[1]; buffer[read_size+2]=buffer[2]; } while(pdf_size>0); file_recovery->file_size=0; }