/*

    File: file_pdf.c

    Copyright (C) 1998-2007 Christophe GRENIER <grenier@cgsecurity.org>
  
    This software is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.
  
    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
  
    You should have received a copy of the GNU General Public License along
    with this program; if not, write the Free Software Foundation, Inc., 51
    Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.

 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include <stdio.h>
#include "types.h"
#include "filegen.h"

static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end,
    const unsigned char *needle, const unsigned int needle_length);

static void register_header_check_pdf(file_stat_t *file_stat);
static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new);
static void file_check_pdf(file_recovery_t *file_recovery);
static void file_check_pdf_and_size(file_recovery_t *file_recovery);

const file_hint_t file_hint_pdf= {
  .extension="pdf",
  .description="Portable Document Format, Adobe Illustrator",
  .min_header_distance=0,
  .max_filesize=PHOTOREC_MAX_FILE_SIZE,
  .recover=1,
  .header_check=&header_check_pdf,
  .register_header_check=&register_header_check_pdf
};

static const unsigned char pdf_header[]  = { '%','P','D','F','-','1'};

static void register_header_check_pdf(file_stat_t *file_stat)
{
  register_header_check(0, pdf_header,sizeof(pdf_header), &header_check_pdf, file_stat);
}

static inline const unsigned char *find_in_mem(const unsigned char *haystack, const unsigned char * haystack_end,
    const unsigned char *needle, const unsigned int needle_length)
{
  while(haystack!=NULL)
  {
    haystack=memchr(haystack,needle[0],haystack_end-haystack);
    if(haystack!=NULL && haystack<=(haystack_end-needle_length))
    {
      if(memcmp(haystack,needle,needle_length)==0)
	return haystack;
      haystack++;
    }
    else
      haystack=NULL;
  };
  return NULL;
}

static int header_check_pdf(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
{
  if(memcmp(buffer,pdf_header,sizeof(pdf_header))==0)
  {
    const unsigned char sig_illustrator[11]={'I','l','l','u','s','t','r','a','t','o','r'};
    const unsigned char sig_linearized[10]={'L','i','n','e','a','r','i','z','e','d'};
    const unsigned char *linearized;
    reset_file_recovery(file_recovery_new);
    if(find_in_mem(buffer, buffer+512, sig_illustrator,sizeof(sig_illustrator)) != NULL)
      file_recovery_new->extension="ai";
    else
      file_recovery_new->extension=file_hint_pdf.extension;
    if((linearized=find_in_mem(buffer, buffer+512, sig_linearized,sizeof(sig_linearized))) != NULL)
    {
      linearized+=sizeof(sig_linearized);
      while(*linearized!='>' && linearized<=buffer+512)
      {
	if(*linearized=='/' && *(linearized+1)=='L')
	{
	  linearized+=2;
	  while(*linearized==' ' || *linearized=='\t' || *linearized=='\n' || *linearized=='\r')
	    linearized++;
	  file_recovery_new->calculated_file_size=0;
	  while(*linearized>='0' && *linearized<='9' && linearized<=buffer+512)
	  {
	    file_recovery_new->calculated_file_size=file_recovery_new->calculated_file_size*10+(*linearized)-'0';
	    linearized++;
	  }
	  file_recovery_new->data_check=&data_check_size;
	  file_recovery_new->file_check=&file_check_pdf_and_size;
	  return 1;
	}
	linearized++;
      }
    }
    file_recovery_new->file_check=&file_check_pdf;
    return 1;
  }
  return 0;
}

static void file_check_pdf_and_size(file_recovery_t *file_recovery)
{
  if(file_recovery->file_size>=file_recovery->calculated_file_size)
  {
    const unsigned int read_size=20;
    unsigned char buffer[read_size+3];
    int i;
    int taille;
    file_recovery->file_size=file_recovery->calculated_file_size;
    if(fseek(file_recovery->handle,file_recovery->file_size-read_size,SEEK_SET)<0)
    {
      file_recovery->file_size=0;
      return ;
    }
    taille=fread(buffer,1,read_size,file_recovery->handle);
    for(i=taille-4;i>=0;i--)
    {
      if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F')
	return ;
    }
  }
  file_recovery->file_size=0;
}

static void file_check_pdf(file_recovery_t *file_recovery)
{
  const unsigned int read_size=4096;
  unsigned char buffer[read_size+3];
  int64_t pdf_size=file_recovery->file_size;
  int i;
  buffer[read_size]=0;
  buffer[read_size+1]=0;
  buffer[read_size+2]=0;
  do
  {
    int taille;
    pdf_size-=read_size;
    if(pdf_size<0)
    {
      pdf_size=0;
      buffer[read_size]=0;
      buffer[read_size+1]=0;
      buffer[read_size+2]=0;
    }
    if(fseek(file_recovery->handle,pdf_size,SEEK_SET)<0)
      return;
    taille=fread(buffer,1,read_size,file_recovery->handle);
    for(i=taille-1;i>=0;i--)
    {
      if(buffer[i]=='%' && buffer[i+1]=='E' && buffer[i+2]=='O' && buffer[i+3]=='F')
      {
	file_recovery->file_size=pdf_size+i+4;
	if(i+4<=read_size+2 && buffer[i+4]=='\r')
	{
	  file_recovery->file_size++;
	  if(i+5<=read_size+2 && buffer[i+5]=='\n')
	    file_recovery->file_size++;
	}
	else if(i+4<=read_size+2 && buffer[i+4]=='\n')
	  file_recovery->file_size++;
	return;
      }
    }
    buffer[read_size]=buffer[0];
    buffer[read_size+1]=buffer[1];
    buffer[read_size+2]=buffer[2];
  } while(pdf_size>0);
  file_recovery->file_size=0;
}


syntax highlighted by Code2HTML, v. 0.9.1