diff -Naur --exclude .svn tarfile-current/gzip22.py tarfile-2.x/gzip22.py --- tarfile-current/gzip22.py 1970-01-01 01:00:00.000000000 +0100 +++ tarfile-2.x/gzip22.py 2005-09-04 12:28:16.000000000 +0200 @@ -0,0 +1,390 @@ +"""Functions that read and write gzipped files. + +The user of the file doesn't have to worry about the compression, +but random access is not allowed.""" + +# based on Andrew Kuchling's minigzip.py distributed with the zlib module + +import struct, sys, time +import zlib +import __builtin__ + +__all__ = ["GzipFile","open"] + +FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 + +READ, WRITE = 1, 2 + +def write32(output, value): + output.write(struct.pack("' + + def _init_write(self, filename): + if filename[-3:] != '.gz': + filename = filename + '.gz' + self.filename = filename + self.crc = zlib.crc32("") + self.size = 0 + self.writebuf = [] + self.bufsize = 0 + + def _write_gzip_header(self): + self.fileobj.write('\037\213') # magic header + self.fileobj.write('\010') # compression method + fname = self.filename[:-3] + flags = 0 + if fname: + flags = FNAME + self.fileobj.write(chr(flags)) + write32u(self.fileobj, long(time.time())) + self.fileobj.write('\002') + self.fileobj.write('\377') + if fname: + self.fileobj.write(fname + '\000') + + def _init_read(self): + self.crc = zlib.crc32("") + self.size = 0 + + def _read_gzip_header(self): + magic = self.fileobj.read(2) + if magic != '\037\213': + raise IOError, 'Not a gzipped file' + method = ord( self.fileobj.read(1) ) + if method != 8: + raise IOError, 'Unknown compression method' + flag = ord( self.fileobj.read(1) ) + # modtime = self.fileobj.read(4) + # extraflag = self.fileobj.read(1) + # os = self.fileobj.read(1) + self.fileobj.read(6) + + if flag & FEXTRA: + # Read & discard the extra field, if present + xlen=ord(self.fileobj.read(1)) + xlen=xlen+256*ord(self.fileobj.read(1)) + self.fileobj.read(xlen) + if flag & FNAME: + # Read and discard a null-terminated string containing the filename + while (1): + s=self.fileobj.read(1) + if not s or s=='\000': break + if flag & FCOMMENT: + # Read and discard a null-terminated string containing a comment + while (1): + s=self.fileobj.read(1) + if not s or s=='\000': break + if flag & FHCRC: + self.fileobj.read(2) # Read & discard the 16-bit header CRC + + + def write(self,data): + if self.fileobj is None: + raise ValueError, "write() on closed GzipFile object" + if len(data) > 0: + self.size = self.size + len(data) + self.crc = zlib.crc32(data, self.crc) + self.fileobj.write( self.compress.compress(data) ) + self.offset += len(data) + + def read(self, size=-1): + if self.extrasize <= 0 and self.fileobj is None: + return '' + + readsize = 1024 + if size < 0: # get the whole thing + try: + while 1: + self._read(readsize) + readsize = readsize * 2 + except EOFError: + size = self.extrasize + else: # just get some more of it + try: + while size > self.extrasize: + self._read(readsize) + readsize = readsize * 2 + except EOFError: + if size > self.extrasize: + size = self.extrasize + + chunk = self.extrabuf[:size] + self.extrabuf = self.extrabuf[size:] + self.extrasize = self.extrasize - size + + self.offset += size + return chunk + + def _unread(self, buf): + self.extrabuf = buf + self.extrabuf + self.extrasize = len(buf) + self.extrasize + self.offset -= len(buf) + + def _read(self, size=1024): + if self.fileobj is None: raise EOFError, "Reached EOF" + + if self._new_member: + # If the _new_member flag is set, we have to + # jump to the next member, if there is one. + # + # First, check if we're at the end of the file; + # if so, it's time to stop; no more members to read. + pos = self.fileobj.tell() # Save current position + self.fileobj.seek(0, 2) # Seek to end of file + if pos == self.fileobj.tell(): + raise EOFError, "Reached EOF" + else: + self.fileobj.seek( pos ) # Return to original position + + self._init_read() + self._read_gzip_header() + self.decompress = zlib.decompressobj(-zlib.MAX_WBITS) + self._new_member = 0 + + # Read a chunk of data from the file + buf = self.fileobj.read(size) + + # If the EOF has been reached, flush the decompression object + # and mark this object as finished. + + if buf == "": + uncompress = self.decompress.flush() + self._read_eof() + self._add_read_data( uncompress ) + raise EOFError, 'Reached EOF' + + uncompress = self.decompress.decompress(buf) + self._add_read_data( uncompress ) + + if self.decompress.unused_data != "": + # Ending case: we've come to the end of a member in the file, + # so seek back to the start of the unused data, finish up + # this member, and read a new gzip header. + # (The number of bytes to seek back is the length of the unused + # data, minus 8 because _read_eof() will rewind a further 8 bytes) + self.fileobj.seek( -len(self.decompress.unused_data)+8, 1) + + # Check the CRC and file size, and set the flag so we read + # a new member on the next call + self._read_eof() + self._new_member = 1 + + def _add_read_data(self, data): + self.crc = zlib.crc32(data, self.crc) + self.extrabuf = self.extrabuf + data + self.extrasize = self.extrasize + len(data) + self.size = self.size + len(data) + + def _read_eof(self): + # We've read to the end of the file, so we have to rewind in order + # to reread the 8 bytes containing the CRC and the file size. + # We check the that the computed CRC and size of the + # uncompressed data matches the stored values. + self.fileobj.seek(-8, 1) + crc32 = read32(self.fileobj) + isize = read32(self.fileobj) + if crc32%0x100000000L != self.crc%0x100000000L: + raise ValueError, "CRC check failed" + elif isize != self.size: + raise ValueError, "Incorrect length of data produced" + + def close(self): + if self.mode == WRITE: + self.fileobj.write(self.compress.flush()) + write32(self.fileobj, self.crc) + write32(self.fileobj, self.size) + self.fileobj = None + elif self.mode == READ: + self.fileobj = None + if self.myfileobj: + self.myfileobj.close() + self.myfileobj = None + + def __del__(self): + try: + if (self.myfileobj is None and + self.fileobj is None): + return + except AttributeError: + return + self.close() + + def flush(self): + self.fileobj.flush() + + def isatty(self): + return 0 + + def tell(self): + return self.offset + + def rewind(self): + '''Return the uncompressed stream file position indicator to the + beginning of the file''' + if self.mode != READ: + raise IOError("Can't rewind in write mode") + self.fileobj.seek(0) + self._new_member = 1 + self.extrabuf = "" + self.extrasize = 0 + self.offset = 0 + + def seek(self, offset): + if self.mode == WRITE: + if offset < self.offset: + raise IOError('Negative seek in write mode') + count = offset - self.offset + for i in range(count/1024): + self.write(1024*'\0') + self.write((count%1024)*'\0') + elif self.mode == READ: + if offset < self.offset: + # for negative seek, rewind and do positive seek + self.rewind() + count = offset - self.offset + for i in range(count/1024): self.read(1024) + self.read(count % 1024) + + def readline(self, size=-1): + if size < 0: size = sys.maxint + bufs = [] + readsize = min(100, size) # Read from the file in small chunks + while 1: + if size == 0: + return "".join(bufs) # Return resulting line + + c = self.read(readsize) + i = c.find('\n') + if size is not None: + # We set i=size to break out of the loop under two + # conditions: 1) there's no newline, and the chunk is + # larger than size, or 2) there is a newline, but the + # resulting line would be longer than 'size'. + if i==-1 and len(c) > size: i=size-1 + elif size <= i: i = size -1 + + if i >= 0 or c == '': + bufs.append(c[:i+1]) # Add portion of last chunk + self._unread(c[i+1:]) # Push back rest of chunk + return ''.join(bufs) # Return resulting line + + # Append chunk to list, decrease 'size', + bufs.append(c) + size = size - len(c) + readsize = min(size, readsize * 2) + + def readlines(self, sizehint=0): + # Negative numbers result in reading all the lines + if sizehint <= 0: sizehint = sys.maxint + L = [] + while sizehint > 0: + line = self.readline() + if line == "": break + L.append( line ) + sizehint = sizehint - len(line) + + return L + + def writelines(self, L): + for line in L: + self.write(line) + + +def _test(): + # Act like gzip; with -d, act like gunzip. + # The input file is not deleted, however, nor are any other gzip + # options or features supported. + args = sys.argv[1:] + decompress = args and args[0] == "-d" + if decompress: + args = args[1:] + if not args: + args = ["-"] + for arg in args: + if decompress: + if arg == "-": + f = GzipFile(filename="", mode="rb", fileobj=sys.stdin) + g = sys.stdout + else: + if arg[-3:] != ".gz": + print "filename doesn't end in .gz:", `arg` + continue + f = open(arg, "rb") + g = __builtin__.open(arg[:-3], "wb") + else: + if arg == "-": + f = sys.stdin + g = GzipFile(filename="", mode="wb", fileobj=sys.stdout) + else: + f = __builtin__.open(arg, "rb") + g = open(arg + ".gz", "wb") + while 1: + chunk = f.read(1024) + if not chunk: + break + g.write(chunk) + if g is not sys.stdout: + g.close() + if f is not sys.stdin: + f.close() + +if __name__ == '__main__': + _test() diff -Naur --exclude .svn tarfile-current/setup.py tarfile-2.x/setup.py --- tarfile-current/setup.py 2005-10-26 12:52:03.000000000 +0200 +++ tarfile-2.x/setup.py 2005-09-04 12:28:16.000000000 +0200 @@ -5,7 +5,7 @@ # # type "python setup.py install" to install the module # -# $Id: setup.py 538 2005-10-26 10:34:59Z lars $ +# $Id: setup.py 354 2005-08-28 16:38:22Z lars $ import sys import os @@ -16,16 +16,6 @@ print "tarfile needs at least Python 2.x" sys.exit(1) -if sys.version_info[:2] < (2, 2): - print "before proceeding you have to apply a patch." - print "see README.python-2.x for details." - sys.exit(1) - -elif sys.version_info[:2] >= (2, 3): - print "you may need to edit sys.path so that tarfile.py" - print "is imported from site-packages." - print - from distutils.core import setup, Extension ext = [] @@ -33,11 +23,11 @@ ext.append(Extension("_tarfile", ["_tarfile.c"], include_dirs=["include"])) setup(name = "tarfile", - version = "0.7.8", + version = "0.7.7", description = "read and write access to tar archives", author = "Lars Gustäbel", author_email = "lars@gustaebel.de", url = "http://www.gustaebel.de/lars/tarfile/", - py_modules = ["tarfile"], + py_modules = ["tarfile", "gzip22"], ext_modules=ext) diff -Naur --exclude .svn tarfile-current/_tarfile.c tarfile-2.x/_tarfile.c --- tarfile-current/_tarfile.c 2005-10-26 12:52:03.000000000 +0200 +++ tarfile-2.x/_tarfile.c 2005-09-04 12:28:16.000000000 +0200 @@ -146,8 +146,8 @@ char *path = NULL; int uid, gid; int res; - if (!PyArg_ParseTuple(args, "etii:lchown", - Py_FileSystemDefaultEncoding, &path, + if (!PyArg_ParseTuple(args, "sii:lchown", + &path, &uid, &gid)) return NULL; Py_BEGIN_ALLOW_THREADS diff -Naur --exclude .svn tarfile-current/tarfile.py tarfile-2.x/tarfile.py --- tarfile-current/tarfile.py 2005-10-26 12:52:03.000000000 +0200 +++ tarfile-2.x/tarfile.py 2005-10-26 12:42:11.000000000 +0200 @@ -5,7 +5,7 @@ # # Module for reading and writing .tar and tar.gz files. # -# Needs at least Python version 2.2. +# Needs at least Python version 2.1. # # Please consult the html documentation in this distribution # for further details on how to use tarfile. @@ -38,13 +38,13 @@ """Read from and write to tar format archives. """ -__version__ = "$Revision: 538 $" +__version__ = "$Revision: 357 $" # $Source$ version = "0.7.8" __author__ = "Lars Gustäbel (lars@gustaebel.de)" -__date__ = "$Date: 2005-10-26 12:34:59 +0200 (Wed, 26 Oct 2005) $" -__cvsid__ = "$Id: tarfile.py 538 2005-10-26 10:34:59Z lars $" +__date__ = "$Date: 2005-08-29 14:23:58 +0200 (Mon, 29 Aug 2005) $" +__cvsid__ = "$Id: tarfile.py 357 2005-08-29 12:23:58Z lars $" __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend" #--------- @@ -57,6 +57,10 @@ import errno import time import struct +import UserList + +import __builtin__ +file = __builtin__.open try: import grp, pwd @@ -167,7 +171,12 @@ def nts(s): """Convert a null-terminated string buffer to a python string. """ - return s.rstrip(NUL) + i = len(s) - 1 + while i >= 0: + if s[i] != NUL: + break + i -= 1 + return s[0:i+1] def calc_chksum(buf): """Calculate the checksum for a member's header. It's a simple addition @@ -341,17 +350,6 @@ else: self._init_write_gz() - if type == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError, "bz2 module is not available" - if mode == "r": - self.dbuf = "" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - def __del__(self): if hasattr(self, "closed") and not self.closed: self.close() @@ -516,7 +514,7 @@ return t[:size] # class _Stream -class _StreamProxy(object): +class _StreamProxy: """Small proxy class that enables transparent compression detection for the Stream interface (mode 'r|*'). """ @@ -532,81 +530,16 @@ def getcomptype(self): if self.buf.startswith("\037\213\010"): return "gz" - if self.buf.startswith("BZh91"): - return "bz2" return "tar" def close(self): self.fileobj.close() # class StreamProxy -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = "" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - b = [self.buf] - x = len(self.buf) - while x < size: - try: - raw = self.fileobj.read(self.blocksize) - data = self.bz2obj.decompress(raw) - b.append(data) - except EOFError: - break - x += len(data) - self.buf = "".join(b) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) - self.fileobj.close() -# class _BZ2Proxy - #------------------------ # Extraction file object #------------------------ -class ExFileObject(object): +class ExFileObject: """File-like object for reading an archive member. Is returned by TarFile.extractfile(). Support for sparse files included. @@ -746,27 +679,16 @@ """ self.closed = True - def __iter__(self): - """Get an iterator over the file object. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - return self - def next(self): """Get the next item from the file iterator. """ - result = self.readline() - if not result: - raise StopIteration - return result - + return self.readline() #class ExFileObject #------------------ # Exported Classes #------------------ -class TarInfo(object): +class TarInfo: """Informational class which holds the details about an archive member given by a tar header block. TarInfo objects are returned by TarFile.getmember(), @@ -801,56 +723,6 @@ def __repr__(self): return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) - def frombuf(cls, buf): - """Construct a TarInfo object from a 512 byte string buffer. - """ - tarinfo = cls() - tarinfo.name = nts(buf[0:100]) - tarinfo.mode = int(buf[100:108], 8) - tarinfo.uid = int(buf[108:116],8) - tarinfo.gid = int(buf[116:124],8) - - # There are two possible codings for the size field we - # have to discriminate, see comment in tobuf() below. - if buf[124] != chr(0200): - tarinfo.size = long(buf[124:136], 8) - else: - tarinfo.size = 0L - for i in range(11): - tarinfo.size <<= 8 - tarinfo.size += ord(buf[125 + i]) - - tarinfo.mtime = long(buf[136:148], 8) - tarinfo.chksum = int(buf[148:156], 8) - tarinfo.type = buf[156:157] - tarinfo.linkname = nts(buf[157:257]) - tarinfo.uname = nts(buf[265:297]) - tarinfo.gname = nts(buf[297:329]) - try: - tarinfo.devmajor = int(buf[329:337], 8) - tarinfo.devminor = int(buf[337:345], 8) - except ValueError: - tarinfo.devmajor = tarinfo.devmajor = 0 - tarinfo.prefix = buf[345:500] - - # Some old tar programs represent a directory as a regular - # file with a trailing slash. - if tarinfo.isreg() and tarinfo.name.endswith("/"): - tarinfo.type = DIRTYPE - - # The prefix field is used for filenames > 100 in - # the POSIX standard. - # name = prefix + '/' + name - if tarinfo.type != GNUTYPE_SPARSE: - tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) - - # Directory names should have a '/' at the end. - if tarinfo.isdir(): - tarinfo.name += "/" - return tarinfo - - frombuf = classmethod(frombuf) - def tobuf(self): """Return a tar header block as a 512 byte string. """ @@ -920,7 +792,173 @@ return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) # class TarInfo -class TarFile(object): +def frombuf(buf): + """Construct a TarInfo object from a 512 byte string buffer. + """ + tarinfo = TarInfo() + tarinfo.name = nts(buf[0:100]) + tarinfo.mode = int(buf[100:108], 8) + tarinfo.uid = int(buf[108:116],8) + tarinfo.gid = int(buf[116:124],8) + + # There are two possible codings for the size field we + # have to discriminate, see comment in tobuf() below. + if buf[124] != chr(0200): + tarinfo.size = long(buf[124:136], 8) + else: + tarinfo.size = 0L + for i in range(11): + tarinfo.size <<= 8 + tarinfo.size += ord(buf[125 + i]) + + tarinfo.mtime = long(buf[136:148], 8) + tarinfo.chksum = int(buf[148:156], 8) + tarinfo.type = buf[156:157] + tarinfo.linkname = nts(buf[157:257]) + tarinfo.uname = nts(buf[265:297]) + tarinfo.gname = nts(buf[297:329]) + try: + tarinfo.devmajor = int(buf[329:337], 8) + tarinfo.devminor = int(buf[337:345], 8) + except ValueError: + tarinfo.devmajor = tarinfo.devmajor = 0 + tarinfo.prefix = buf[345:500] + + # Some old tar programs represent a directory as a regular + # file with a trailing slash. + if tarinfo.isreg() and tarinfo.name.endswith("/"): + tarinfo.type = DIRTYPE + + # The prefix field is used for filenames > 100 in + # the POSIX standard. + # name = prefix + '/' + name + if tarinfo.type != GNUTYPE_SPARSE: + tarinfo.name = normpath(os.path.join(nts(tarinfo.prefix), tarinfo.name)) + + # Directory names should have a '/' at the end. + if tarinfo.isdir(): + tarinfo.name += "/" + return tarinfo + +#-------------------------------------------------------------------------- +# Below are the classmethods which act as alternate constructors to the +# TarFile class. The open() method is the only one that is needed for +# public use; it is the "super"-constructor and is able to select an +# adequate "sub"-constructor for a particular compression using the mapping +# from OPEN_METH. +# +# This concept allows one to subclass TarFile without losing the comfort of +# the super-constructor. A sub-constructor is registered and made available +# by adding it to the mapping in OPEN_METH. + +def open(name=None, mode="r", fileobj=None, bufsize=20*512): + """Open a tar archive for reading, writing or appending. Return + an appropriate TarFile class. + + mode: + 'r' or 'r:*' open for reading with transparent compression + 'r:' open for reading exclusively uncompressed + 'r:gz' open for reading with gzip compression + 'a' or 'a:' open for appending + 'w' or 'w:' open for writing without compression + 'w:gz' open for writing with gzip compression + + 'r|*' open a stream of tar blocks with transparent compression + 'r|' open an uncompressed stream of tar blocks for reading + 'r|gz' open a gzip compressed stream of tar blocks + 'w|' open an uncompressed stream for writing + 'w|gz' open a gzip compressed stream for writing + """ + + if not name and not fileobj: + raise ValueError, "nothing to open" + + if mode in ("r", "r:*"): + # Find out which *open() is appropriate for opening the file. + for comptype in OPEN_METH.keys(): + func = OPEN_METH[comptype] + try: + return func(name, "r", fileobj) + except (ReadError, CompressionError): + continue + raise ReadError, "file could not be opened successfully" + + elif ":" in mode: + filemode, comptype = mode.split(":", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + # Select the *open() function according to + # given compression. + if comptype in OPEN_METH.keys(): + func = OPEN_METH[comptype] + else: + raise CompressionError, "unknown compression type %r" % comptype + return func(name, filemode, fileobj) + + elif "|" in mode: + filemode, comptype = mode.split("|", 1) + filemode = filemode or "r" + comptype = comptype or "tar" + + if filemode not in "rw": + raise ValueError, "mode must be 'r' or 'w'" + + t = TarFile(name, filemode, + _Stream(name, filemode, comptype, fileobj, bufsize)) + t._extfileobj = False + return t + + elif mode in "aw": + return taropen(name, mode, fileobj) + + raise ValueError, "undiscernible mode" + +def taropen(name, mode="r", fileobj=None): + """Open uncompressed tar archive name for reading or writing. + """ + if len(mode) > 1 or mode not in "raw": + raise ValueError, "mode must be 'r', 'a' or 'w'" + return TarFile(name, mode, fileobj) + +def gzopen(name, mode="r", fileobj=None, compresslevel=9): + """Open gzip compressed tar archive name for reading or writing. + Appending is not allowed. + """ + if len(mode) > 1 or mode not in "rw": + raise ValueError, "mode must be 'r' or 'w'" + + try: + import gzip22 + except ImportError: + raise CompressionError, "gzip module is not available" + + pre, ext = os.path.splitext(name) + if ext == ".tgz": + ext = ".tar" + if ext == ".gz": + ext = "" + tarname = os.path.basename(pre + ext) + + if fileobj is None: + fileobj = file(name, mode + "b") + + try: + t = taropen(name, mode, + gzip22.GzipFile(tarname, mode, compresslevel, fileobj) + ) + except IOError: + raise ReadError, "not a gzip file" + t._extfileobj = False + return t + +# All *open() methods are registered here. +OPEN_METH = { + "tar": taropen, # uncompressed tar + "gz": gzopen # gzip compressed tar +} + +class TarFile: """The TarFile Class provides an interface to tar archives. """ @@ -998,161 +1036,6 @@ self._loaded = True #-------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - def open(cls, name=None, mode="r", fileobj=None, bufsize=20*512): - """Open a tar archive for reading, writing or appending. Return - an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError, "nothing to open" - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - try: - return func(name, "r", fileobj) - except (ReadError, CompressionError): - continue - raise ReadError, "file could not be opened successfully" - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError, "unknown compression type %r" % comptype - return func(name, filemode, fileobj) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in "rw": - raise ValueError, "mode must be 'r' or 'w'" - - t = cls(name, filemode, - _Stream(name, filemode, comptype, fileobj, bufsize)) - t._extfileobj = False - return t - - elif mode in "aw": - return cls.taropen(name, mode, fileobj) - - raise ValueError, "undiscernible mode" - - open = classmethod(open) - - def taropen(cls, name, mode="r", fileobj=None): - """Open uncompressed tar archive name for reading or writing. - """ - if len(mode) > 1 or mode not in "raw": - raise ValueError, "mode must be 'r', 'a' or 'w'" - return cls(name, mode, fileobj) - - taropen = classmethod(taropen) - - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9): - """Open gzip compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError, "mode must be 'r' or 'w'" - - try: - import gzip - except ImportError: - raise CompressionError, "gzip module is not available" - - pre, ext = os.path.splitext(name) - if ext == ".tgz": - ext = ".tar" - if ext == ".gz": - ext = "" - tarname = os.path.basename(pre + ext) - - if fileobj is None: - fileobj = file(name, mode + "b") - - try: - t = cls.taropen(name, mode, - gzip.GzipFile(tarname, mode, compresslevel, fileobj) - ) - except IOError: - raise ReadError, "not a gzip file" - t._extfileobj = False - return t - - gzopen = classmethod(gzopen) - - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9): - """Open bzip2 compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError, "mode must be 'r' or 'w'." - - try: - import bz2 - except ImportError: - raise CompressionError, "bz2 module is not available" - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj) - except IOError: - raise ReadError, "not a bzip2 file" - t._extfileobj = False - return t - - bz2open = classmethod(bz2open) - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar - } - - #-------------------------------------------------------------------------- # The public methods which TarFile provides: def close(self): @@ -1242,11 +1125,11 @@ statres = os.fstat(fileobj.fileno()) linkname = "" - stmd = statres.st_mode + stmd = statres[stat.ST_MODE] if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) + inode = (statres[stat.ST_INO], statres[stat.ST_DEV]) if not self.dereference and \ - statres.st_nlink > 1 and inode in self.inodes: + statres[stat.ST_NLINK] > 1 and inode in self.inodes.keys(): # Is it a hardlink to an already # archived file? type = LNKTYPE @@ -1277,13 +1160,13 @@ # information we can get. tarinfo.name = arcname tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid + tarinfo.uid = statres[stat.ST_UID] + tarinfo.gid = statres[stat.ST_GID] if stat.S_ISREG(stmd): - tarinfo.size = statres.st_size + tarinfo.size = statres[stat.ST_SIZE] else: tarinfo.size = 0L - tarinfo.mtime = statres.st_mtime + tarinfo.mtime = statres[stat.ST_MTIME] tarinfo.type = type tarinfo.linkname = linkname if pwd: @@ -1299,8 +1182,8 @@ if type in (CHRTYPE, BLKTYPE): if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) + tarinfo.devmajor = os.major(statres[stat.ST_RDEV]) + tarinfo.devminor = os.minor(statres[stat.ST_RDEV]) return tarinfo def list(self, verbose=True): @@ -1310,7 +1193,7 @@ """ self._check() - for tarinfo in self: + for tarinfo in self.getmembers(): if verbose: print filemode(tarinfo.mode), print "%s/%s" % (tarinfo.uname or tarinfo.uid, @@ -1777,9 +1660,10 @@ while True: buf = self.fileobj.read(BLOCKSIZE) if not buf: + self._loaded = True return None try: - tarinfo = TarInfo.frombuf(buf) + tarinfo = frombuf(buf) except ValueError: if self.ignore_zeros: if buf.count(NUL) == BLOCKSIZE: @@ -1795,6 +1679,7 @@ # If the first block is invalid. That does not # look like a tar archive we can handle. raise ReadError,"empty, unreadable or compressed file" + self._loaded = True return None break @@ -1813,7 +1698,7 @@ # Check if the TarInfo object has a typeflag for which a callback # method is registered in the TYPE_METH. If so, then call it. - if tarinfo.type in self.TYPE_METH: + if tarinfo.type in self.TYPE_METH.keys(): return self.TYPE_METH[tarinfo.type](self, tarinfo) tarinfo.offset_data = self.offset @@ -1980,14 +1865,6 @@ if mode is not None and self._mode not in mode: raise IOError, "bad operation for mode %r" % self._mode - def __iter__(self): - """Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - def _create_gnulong(self, name, type): """Write a GNU longname/longlink member to the TarFile. It consists of an extended tar header, with the length @@ -2020,42 +1897,6 @@ print >> sys.stderr, msg # class TarFile -class TarIter: - """Iterator Class. - - for tarinfo in TarFile(...): - suite... - """ - - def __init__(self, tarfile): - """Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - def __iter__(self): - """Return iterator object. - """ - return self - def next(self): - """Return the next item using TarFile's next() method. - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - if not self.tarfile._loaded: - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - try: - tarinfo = self.tarfile.members[self.index] - except IndexError: - raise StopIteration - self.index += 1 - return tarinfo - # Helper classes for sparse file support class _section: """Base class for _data and _hole. @@ -2078,20 +1919,21 @@ """ pass -class _ringbuffer(list): +class _ringbuffer(UserList.UserList): """Ringbuffer class which increases performance over a regular list. """ def __init__(self): + UserList.UserList.__init__(self) self.idx = 0 def find(self, offset): idx = self.idx while True: - item = self[idx] + item = self.data[idx] if offset in item: break idx += 1 - if idx == len(self): + if idx == len(self.data): idx = 0 if idx == self.idx: # End of File @@ -2110,9 +1952,9 @@ """ def __init__(self, file, mode="r", compression=TAR_PLAIN): if compression == TAR_PLAIN: - self.tarfile = TarFile.taropen(file, mode) + self.tarfile = taropen(file, mode) elif compression == TAR_GZIPPED: - self.tarfile = TarFile.gzopen(file, mode) + self.tarfile = gzopen(file, mode) else: raise ValueError, "unknown compression constant" if mode[0:1] == "r": @@ -2162,4 +2004,3 @@ except TarError: return False -open = TarFile.open diff -Naur --exclude .svn tarfile-current/test_tarfile.py tarfile-2.x/test_tarfile.py --- tarfile-current/test_tarfile.py 2005-10-26 12:52:03.000000000 +0200 +++ tarfile-2.x/test_tarfile.py 2005-10-26 12:41:20.000000000 +0200 @@ -1,5 +1,6 @@ import sys import os +import stat import shutil import tempfile @@ -13,21 +14,28 @@ except ImportError: import cStringIO as StringIO +file = open + +# XXX remove for release (2.3) +try: + True + False +except NameError: + True = 1 + False = 0 + # Check for our compression modules. try: - import gzip + import gzip22 as gzip gzip.GzipFile except (ImportError, AttributeError): gzip = None -try: - import bz2 -except ImportError: - bz2 = None def path(path): return os.path.join(os.getcwd(), path) testtar = path("testtar.tar") +os.extsep = '.' # Won't work in RISCOS tempdir = os.path.join(tempfile.gettempdir(), "testtar" + os.extsep + "dir") tempname = test_support.TESTFN membercount = 10 @@ -64,7 +72,10 @@ """Test member extraction. """ members = 0 - for tarinfo in self.tar: + while 1: + tarinfo = self.tar.next() + if tarinfo is None: + break members += 1 if not tarinfo.isreg(): continue @@ -96,16 +107,6 @@ self.assert_(lines1 == lines2, "_FileObject.readline() does not work correctly") - def test_iter(self): - # Test iteration over ExFileObject. - if self.sep != "|": - filename = "0-REGTYPE-TEXT" - self.tar.extract(filename, dirname()) - lines1 = file(os.path.join(dirname(), filename), "rU").readlines() - lines2 = [line for line in self.tar.extractfile(filename)] - self.assert_(lines1 == lines2, - "ExFileObject iteration does not work correctly") - def test_seek(self): """Test seek() method of _FileObject, incl. random reading. """ @@ -259,10 +260,13 @@ self.dst.addfile(tarinfo, fobj) self.dst.close() - self.assertNotEqual(os.stat(self.dstname).st_size, 0) + self.assertNotEqual(os.stat(self.dstname)[stat.ST_SIZE], 0) def _test(self): - for tarinfo in self.src: + while 1: + tarinfo = self.src.next() + if tarinfo is None: + break if not tarinfo.isreg(): continue f = self.src.extractfile(tarinfo) @@ -497,32 +501,10 @@ comp = "gz" -if bz2: - # Bzip2 TestCases - class ReadTestBzip2(ReadTest): - comp = "bz2" - class ReadStreamTestBzip2(ReadStreamTest): - comp = "bz2" - class ReadFileobjTestBzip2(ReadFileobjTest): - comp = "bz2" - class WriteTestBzip2(WriteTest): - comp = "bz2" - class WriteStreamTestBzip2(WriteStreamTest): - comp = "bz2" - class WriteFileobjTestBzip2(WriteFileobjTest): - comp = "bz2" - class ReadAsteriskTestBzip2(ReadAsteriskTest): - comp = "bz2" - class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest): - comp = "bz2" - def test_main(): if gzip: # create testtar.tar.gz gzip.open(tarname("gz"), "wb").write(file(tarname(), "rb").read()) - if bz2: - # create testtar.tar.bz2 - bz2.BZ2File(tarname("bz2"), "wb").write(file(tarname(), "rb").read()) tests = [ FileModeTest, @@ -550,20 +532,15 @@ ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip ]) - if bz2: - tests.extend([ - ReadTestBzip2, ReadStreamTestBzip2, - ReadFileobjTestBzip2, WriteTestBzip2, - WriteStreamTestBzip2, WriteFileobjTestBzip2, - ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2 - ]) try: - test_support.run_unittest(*tests) + suites = [] + for testclass in tests: + suites.append(unittest.makeSuite(testclass, "test")) + runner = unittest.TextTestRunner() + runner.run(unittest.TestSuite(suites)) finally: if gzip: os.remove(tarname("gz")) - if bz2: - os.remove(tarname("bz2")) if os.path.exists(dirname()): shutil.rmtree(dirname()) if os.path.exists(tmpname()):