/* ** Modular Logfile Analyzer ** Copyright 2000 Jan Kneschke ** ** Homepage: http://www.modlogan.org ** This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version, and provided that the above copyright and permission notice is included with all distributed copies of this or derived software. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ** ** $Id: mio.c,v 1.13 2004/03/18 02:31:50 ostborn Exp $ */ #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include "config.h" #ifdef HAVE_MMAP #include #endif #include "mio.h" #define WP() \ fprintf(stderr, "%s.%d (%s): looks like something went completly wrong. Please report this bug to jan@kneschke.de\n", __FILE__, __LINE__, __FUNCTION__); #ifdef HAVE_LIBZ /* gzip flag byte */ #define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */ #define HEAD_CRC 0x02 /* bit 1 set: header CRC present */ #define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */ #define ORIG_NAME 0x08 /* bit 3 set: original file name present */ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ void gz_skip_header(z_stream * s) { int method; /* method byte */ int flags; /* flags byte */ uInt len; int c; int ndx = 0; /* Check the gzip magic header */ for (len = 0; len < 2; len++) { c = s->next_in[ndx++]; } method = s->next_in[ndx++]; flags = s->next_in[ndx++]; if (method != Z_DEFLATED || (flags & RESERVED) != 0) { return; } /* Discard time, xflags and OS code: */ for (len = 0; len < 6; len++) (void)s->next_in[ndx++]; if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */ len = (uInt)s->next_in[ndx++]; len += ((uInt)s->next_in[ndx++])<<8; /* len is garbage if EOF but the loop below will quit anyway */ while (len-- != 0 && (char)(s->next_in[ndx++]) != EOF) ; } if ((flags & ORIG_NAME) != 0) { /* skip the original file name */ while ((c = s->next_in[ndx++]) != 0 && c != EOF) ; } if ((flags & COMMENT) != 0) { /* skip the .gz file comment */ while ((c = s->next_in[ndx++]) != 0 && c != EOF) ; } if ((flags & HEAD_CRC) != 0) { /* skip the header crc */ for (len = 0; len < 2; len++) (void)s->next_in[ndx++]; } s->next_in += ndx; s->avail_in -= ndx; } #endif int mopen (mfile *f, char *fn) { #ifdef HAVE_MMAP f->access_type = MIO_MMAP; #else f->access_type = MIO_FILE; #endif f->file_type = MIO_UNSET; f->obuf = buffer_init(); f->ibuf = buffer_init(); buffer_prepare_copy(f->obuf, 4096); #if 0 f->obuf->used = 4096; #endif f->eof = 0; #ifdef HAVE_MMAP if (f->access_type == MIO_MMAP) { if (fn == NULL) { /* fall back */ f->access_type = MIO_FILE; } else { struct stat st; # ifdef O_LARGEFILE if (-1 == (f->fd = open(fn, O_RDONLY))) { fprintf(stderr, "%s\n", strerror(errno)); return -1; } # else if (-1 == (f->fd = open(fn, O_RDONLY|O_LARGEFILE))) { fprintf(stderr, "%s\n", strerror(errno)); return -1; } # endif stat(fn, &st); if (MAP_FAILED == (f->ibuf->ptr = mmap(0, st.st_size, PROT_READ, MAP_SHARED, f->fd, 0))) { fprintf(stderr, "%s\n", strerror(errno)); return -1; } f->ibuf->used = st.st_size; f->ibuf->size = st.st_size; } } #endif if (f->access_type == MIO_FILE) { /* stdin */ if (fn == NULL) { f->fd = 0; } else { if (-1 == (f->fd = open(fn, O_RDONLY))) { fprintf(stderr, "%s\n", strerror(errno)); return -1; } } buffer_prepare_copy(f->ibuf, 4096); } f->offset = 0; return 0; } int mio_detect_type(mfile *f) { if (f->ibuf->used > 2 && f->ibuf->ptr[0] == '\037' && f->ibuf->ptr[1] == '\213') { #ifdef HAVE_LIBZ int rc; f->file_type = MIO_GZIP; f->gz.zalloc = Z_NULL; f->gz.zfree = Z_NULL; f->gz.opaque = Z_NULL; f->gz.next_in = f->ibuf->ptr; f->gz.avail_in = f->ibuf->used; f->gz.total_in = 0; f->gz.next_out = f->obuf->ptr; f->gz.avail_out = f->obuf->size; f->gz.total_out = 0; switch((rc = inflateInit2(&(f->gz), -MAX_WBITS))) { case Z_OK: break; case Z_MEM_ERROR: WP(); break; default: WP(); break; } gz_skip_header(&(f->gz)); #else fprintf(stderr, "gzip support not available (libz missing)\n"); return -1; #endif } else if (f->ibuf->used > 2 && f->ibuf->ptr[0] == 'B' && f->ibuf->ptr[1] == 'Z') { #ifdef HAVE_LIBBZ int rc; f->file_type = MIO_BZIP; f->bz.bzalloc = NULL; f->bz.bzfree = NULL; f->bz.opaque = NULL; f->bz.next_in = f->ibuf->ptr; f->bz.avail_in = f->ibuf->used; f->bz.total_in_lo32 = 0; f->bz.total_in_hi32 = 0; f->bz.next_out = f->obuf->ptr; f->bz.avail_out = f->obuf->size; f->bz.total_out_lo32 = 0; f->bz.total_out_hi32 = 0; switch((rc = BZ2_bzDecompressInit(&(f->bz), 0, 0))) { case BZ_OK: break; default: WP(); break; } #else fprintf(stderr, "bzip2 support not available (libbz2 missing)\n"); return -1; #endif } else { f->file_type = MIO_PLAIN; } return 0; } char *mgets (mfile *f, buffer *buf) { char *e, *s = NULL; int len = 0; /* no type detected yet */ if (f->file_type == MIO_UNSET) { if (f->access_type == MIO_FILE) { /* preload buffer */ if (-1 == (f->ibuf->used = read(f->fd, f->ibuf->ptr, f->ibuf->size))) { fprintf(stderr, "%s\n", strerror(errno)); return NULL; } if (f->ibuf->used == 0) f->eof = 1; } if (mio_detect_type(f)) return NULL; } if (f->file_type == MIO_PLAIN) { s = f->ibuf->ptr + f->offset; len = f->ibuf->used - f->offset; } else if (f->file_type == MIO_GZIP) { #ifdef HAVE_LIBZ int rc; if (f->gz.total_out == 0) { /* called the first time -> fill the out buffer */ rc = inflate(&(f->gz), Z_SYNC_FLUSH); switch(rc) { case Z_OK: break; case Z_STREAM_END: f->eof = 1; break; default: fprintf(stderr, "%s.%d: inflate failed: %s - %d\n", __FILE__, __LINE__, f->gz.msg, rc); } f->obuf->used = f->obuf->size - f->gz.avail_out; } s = f->obuf->ptr + f->offset; len = f->obuf->used - f->offset; #endif } else if (f->file_type == MIO_BZIP) { #ifdef HAVE_LIBBZ int rc; if (f->bz.total_out_lo32 == 0 && f->bz.total_out_hi32 == 0) { /* called the first time -> fill the out buffer */ rc = BZ2_bzDecompress(&(f->bz)); switch(rc) { case BZ_OK: break; case BZ_STREAM_END: f->eof = 1; break; default: fprintf(stderr, "%s.%d: inflate failed: - %d\n", __FILE__, __LINE__, rc); } f->obuf->used = f->obuf->size - f->bz.avail_out; } s = f->obuf->ptr + f->offset; len = f->obuf->used - f->offset; #endif } else { WP(); return NULL; } /* search for a newline */ e = memchr( s, '\n', len ); if( e && (*e == '\n')) { len = e - s; buffer_copy_string_len(buf, s, len); /* skip the newline */ f->offset += len + 1; return buf->ptr; } else if (!f->eof) { /* move the buffer */ do { if (f->file_type == MIO_PLAIN) { if (f->access_type == MIO_FILE) { /* move the rest to the beginning of the buffer */ memmove(f->ibuf->ptr, s, len); f->ibuf->used = len; /* ensure that the buffer is ALWAYS full */ do { /* fill the buffer again */ if (-1 == (len = read(f->fd, f->ibuf->ptr + f->ibuf->used, f->ibuf->size - f->ibuf->used))) { WP(); fprintf(stderr, "%s\n", strerror(errno)); return NULL; } f->ibuf->used += len; } while (f->ibuf->used < f->ibuf->size && len != 0); /* the read was 'empty' -> EOF */ if (len == 0) { f->eof = 1; } f->offset = 0; s = f->ibuf->ptr; len = f->ibuf->used; } else if (f->access_type == MIO_MMAP) { /* we only get here if the last line doesn't contain a newline * and this is illegal -> ignore it */ len = 0; f->eof = 1; } } else if (f->file_type == MIO_GZIP) { #ifdef HAVE_LIBZ int rc; if (f->access_type == MIO_FILE) { /* move the rest to the beginning of the buffer */ memmove(f->ibuf->ptr, f->ibuf->ptr + f->ibuf->used - f->gz.avail_in, f->gz.avail_in); /* do we need the loop here too ?*/ /* have to handle the refill */ if (-1 == (f->ibuf->used = read(f->fd, f->ibuf->ptr + f->gz.avail_in, f->ibuf->size - f->gz.avail_in))) { WP(); fprintf(stderr, "%s\n", strerror(errno)); return NULL; } f->ibuf->used += f->gz.avail_in; f->gz.next_in = f->ibuf->ptr; f->gz.avail_in = f->ibuf->used; } /* move the rest of the buffer to the start */ memmove(f->obuf->ptr, s, len); f->gz.next_out = f->obuf->ptr + len; f->gz.avail_out = f->obuf->size - len; f->offset = 0; rc = inflate(&(f->gz), Z_SYNC_FLUSH); switch(rc) { case Z_OK: break; case Z_STREAM_END: f->eof = 1; break; default: fprintf(stderr, "%s.%d: inflate failed: %s - %d\n", __FILE__, __LINE__, f->gz.msg, rc); return NULL; } f->obuf->used = f->obuf->size - f->gz.avail_out; s = f->obuf->ptr; len = f->obuf->used; #endif } else if (f->file_type == MIO_BZIP) { #ifdef HAVE_LIBBZ int rc; do { if (f->bz.avail_in == 0 && f->access_type == MIO_FILE) { /* have to handle the refill */ if (-1 == (f->ibuf->used = read(f->fd, f->ibuf->ptr, f->ibuf->size))) { WP(); fprintf(stderr, "%s\n", strerror(errno)); return NULL; } f->bz.next_in = f->ibuf->ptr; f->bz.avail_in = f->ibuf->used; } memmove(f->obuf->ptr, s, len); f->bz.next_out = f->obuf->ptr + len; f->bz.avail_out = f->obuf->size - len; f->offset = 0; rc = BZ2_bzDecompress(&(f->bz)); switch(rc) { case BZ_OK: break; case BZ_STREAM_END: f->eof = 1; break; default: fprintf(stderr, "%s.%d: bzDecompress failed: - %d\n", __FILE__, __LINE__, rc); return NULL; } f->obuf->used = f->obuf->size - f->bz.avail_out; s = f->obuf->ptr; len = f->obuf->used; } while (f->bz.total_out_lo32 == 0 && f->bz.total_out_hi32 == 0); #endif } if (len == 0) { /* read nothing */ if (f->file_type == MIO_GZIP) { #ifdef HAVE_LIBZ inflateEnd(&(f->gz)); #endif } else if (f->file_type == MIO_BZIP) { #ifdef HAVE_LIBBZ BZ2_bzDecompressEnd(&(f->bz)); #endif } return NULL; } /* search for a newline */ e = memchr( s, '\n', len ); if( e && (*e == '\n')) { len = e - s; buffer_copy_string_len(buf, s, len); /* skip the newline */ f->offset += len + 1; return buf->ptr; } else if (e - s == len) { /* last line is without a newline */ buffer_copy_string_len(buf, s, len); f->offset += len; return buf->ptr; } /* refilled the buffer again and still found nothing */ if (f->access_type == MIO_FILE) { buffer_prepare_append(f->ibuf, 1024); } buffer_prepare_append(f->obuf, 1024); switch(f->file_type) { case MIO_PLAIN: s = f->ibuf->ptr + f->offset; break; case MIO_GZIP: case MIO_BZIP: s = f->obuf->ptr + f->offset; break; default: WP(); } } while(1); } else { /* EOF */ if (f->file_type == MIO_GZIP) { #ifdef HAVE_LIBZ inflateEnd(&(f->gz)); #endif } else if (f->file_type == MIO_BZIP) { #ifdef HAVE_LIBBZ BZ2_bzDecompressEnd(&(f->bz)); #endif } return NULL; } if (buf->ptr == NULL) WP(); return buf->ptr; } void mclose(mfile *f) { if (f->access_type == MIO_MMAP) { #ifdef HAVE_MMAP munmap(f->ibuf->ptr, f->ibuf->used); f->ibuf->ptr = NULL; f->ibuf->used = 0; f->ibuf->size = 0; close(f->fd); f->fd = -1; #endif } else if (f->access_type == MIO_FILE) { /* don't close stdin :) */ if (f->fd) close(f->fd); f->fd = -1; } if (f->ibuf) { buffer_free(f->ibuf); } if (f->obuf) { buffer_free(f->obuf); } } #ifdef MIO_TEST int main (int argc, char **argv) { mfile f; FILE *pf; buffer *b; int line = 0; b = buffer_init(); if (0 != mopen(&f, argv[1])) { return -1; } while(mgets(&f, b)) { line ++; if( !( line % 1000 ) ) { fprintf(stdout, "%10d\r", line ); fflush( stdout ); } } mclose(&f); buffer_free(b); return 0; } #endif