ports//sysutils/duplicity/work/duplicity-0.4.8/src/patchdir.py

# Copyright 2002 Ben Escoto
#
# This file is part of duplicity.
#
# Duplicity is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# Duplicity is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with duplicity; if not, write to the Free Software Foundation,
# Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

"""Functions for patching of directories"""

from __future__ import generators
import re, tarfile, librsync, log, diffdir, misc
from path import *
from lazy import *

class PatchDirException(Exception): pass


def Patch(base_path, difftar_fileobj):
	"""Patch given base_path and file object containing delta"""
	diff_tarfile = tarfile.TarFile("arbitrary", "r", difftar_fileobj)
	patch_diff_tarfile(base_path, diff_tarfile)
	assert not difftar_fileobj.close()

def Patch_from_iter(base_path, fileobj_iter, restrict_index = ()):
	"""Patch given base_path and iterator of delta file objects"""
	diff_tarfile = TarFile_FromFileobjs(fileobj_iter)
	patch_diff_tarfile(base_path, diff_tarfile, restrict_index)

def patch_diff_tarfile(base_path, diff_tarfile, restrict_index = ()):
	"""Patch given Path object using delta tarfile (as in tarfile.TarFile)

	If restrict_index is set, ignore any deltas in diff_tarfile that
	don't start with restrict_index.

	"""
	if base_path.exists(): path_iter = selection.Select(base_path).set_iter()
	else: path_iter = empty_iter() # probably untarring full backup

	diff_path_iter = difftar2path_iter(diff_tarfile)
	if restrict_index:
		diff_path_iter = filter_path_iter(diff_path_iter, restrict_index)
	collated = diffdir.collate2iters(path_iter, diff_path_iter)

	ITR = IterTreeReducer(PathPatcher, [base_path])
	for basis_path, diff_ropath in collated:
		if basis_path:
			log.Log("Patching %s" % (basis_path.get_relative_path(),), 5)
			ITR(basis_path.index, basis_path, diff_ropath)
		else:
			log.Log("Patching %s" % (diff_ropath.get_relative_path(),), 5)
			ITR(diff_ropath.index, basis_path, diff_ropath)
	ITR.Finish()
	base_path.setdata()

def empty_iter():
	if 0: yield 1 # this never happens, but fools into generator treatment

def filter_path_iter(path_iter, index):
	"""Rewrite path elements of path_iter so they start with index

	Discard any that doesn't start with index, and remove the index
	prefix from the rest.

	"""
	assert isinstance(index, tuple) and index, index
	l = len(index)
	for path in path_iter:
		if path.index[:l] == index:
			path.index = path.index[l:]
			yield path

def difftar2path_iter(diff_tarfile):
	"""Turn file-like difftarobj into iterator of ROPaths"""
	tar_iter = iter(diff_tarfile)
	multivol_fileobj = None

	# The next tar_info is stored in this one element list so
	# Multivol_Filelike below can update it.  Any StopIterations will
	# be passed upwards.
	tarinfo_list = [tar_iter.next()]

	while 1:
		# This section relevant when a multivol diff is last in tar
		if not tarinfo_list[0]: raise StopIteration
		if multivol_fileobj and not multivol_fileobj.at_end:
			multivol_fileobj.close() # aborting in middle of multivol
			continue
		
		index, difftype, multivol = get_index_from_tarinfo(tarinfo_list[0])
		ropath = ROPath(index)
		ropath.init_from_tarinfo(tarinfo_list[0])
		ropath.difftype = difftype
		if difftype == "deleted": ropath.type = None
		elif ropath.isreg():
			if multivol:
				multivol_fileobj = Multivol_Filelike(diff_tarfile, tar_iter,
													 tarinfo_list, index)
				ropath.setfileobj(multivol_fileobj)
				yield ropath
				continue # Multivol_Filelike will reset tarinfo_list
			else: ropath.setfileobj(diff_tarfile.extractfile(tarinfo_list[0]))
		yield ropath
		tarinfo_list[0] = tar_iter.next()

def get_index_from_tarinfo(tarinfo):
	"""Return (index, difftype, multivol) pair from tarinfo object"""
	for prefix in ["snapshot/", "diff/", "deleted/",
				   "multivol_diff/", "multivol_snapshot/"]:
		if tarinfo.name.startswith(prefix):
			name = tarinfo.name[len(prefix):] # strip prefix
			if prefix.startswith("multivol"):
				if prefix == "multivol_diff/": difftype = "diff"
				else: difftype = "snapshot"
				multivol = 1
				name, num_subs = \
					  re.subn("^multivol_(diff|snapshot)/(.*)/[0-9]+$",
							  "\\2", tarinfo.name)
				if num_subs != 1:
					raise PatchDirException("Unrecognized diff entry %s" %
											(tarinfo.name,))
			else:
				difftype = prefix[:-1] # strip trailing /
				name = tarinfo.name[len(prefix):]
				if name.endswith("/"): name = name[:-1] # strip trailing /'s
				multivol = 0
			break
	else: raise PatchDirException("Unrecognized diff entry %s" %
								 (tarinfo.name,))
	if name == "." or name == "": index = ()
	else:
		index = tuple(name.split("/"))
		if '..' in index:
			raise PatchDirException("Tar entry %s contains '..'.  Security "
									"violation" % (tarinfo.name,))
	return (index, difftype, multivol)


class Multivol_Filelike:
	"""Emulate a file like object from multivols

	Maintains a buffer about the size of a volume.  When it is read()
	to the end, pull in more volumes as desired.

	"""
	def __init__(self, tf, tar_iter, tarinfo_list, index):
		"""Initializer.  tf is TarFile obj, tarinfo is first tarinfo"""
		self.tf, self.tar_iter = tf, tar_iter
		self.tarinfo_list = tarinfo_list # must store as list for write access
		self.index = index
		self.buffer = ""
		self.at_end = 0

	def read(self, length = -1):
		"""Read length bytes from file"""
		if length < 0:
			while self.addtobuffer(): pass
			real_len = len(self.buffer)
		else:
			while len(self.buffer) < length:
				if not self.addtobuffer(): break
			real_len = min(len(self.buffer), length)

		result = self.buffer[:real_len]
		self.buffer = self.buffer[real_len:]
		return result

	def addtobuffer(self):
		"""Add next chunk to buffer"""
		if self.at_end: return None
		index, difftype, multivol = get_index_from_tarinfo(
			self.tarinfo_list[0])
		if not multivol or index != self.index: # we've moved on
			# the following communicates next tarinfo to difftar2path_iter
			self.at_end = 1
			return None

		fp = self.tf.extractfile(self.tarinfo_list[0])
		self.buffer += fp.read()
		fp.close()

		try: self.tarinfo_list[0] = self.tar_iter.next()
		except StopIteration:
			self.tarinfo_list[0] = None
			self.at_end = 1
			return None
		return 1

	def close(self):
		"""If not at end, read remaining data"""
		if not self.at_end:
			while 1:
				self.buffer = ""
				if not self.addtobuffer(): break
		self.at_end = 1
		

class PathPatcher(ITRBranch):
	"""Used by DirPatch, process the given basis and diff"""
	def __init__(self, base_path):
		"""Set base_path, Path of root of tree"""
		self.base_path = base_path
		self.dir_diff_ropath = None

	def start_process(self, index, basis_path, diff_ropath):
		"""Start processing when diff_ropath is a directory"""
		if not (diff_ropath and diff_ropath.isdir()):
			assert index == (), str(index) # should only happen for first elem
			self.fast_process(index, basis_path, diff_ropath)
			return
			
		if not basis_path:
			basis_path = self.base_path.new_index(index)
			assert not basis_path.exists()
			basis_path.mkdir() # Need place for later files to go into
		elif not basis_path.isdir():
			basis_path.delete()
			basis_path.mkdir()
		self.dir_basis_path = basis_path
		self.dir_diff_ropath = diff_ropath

	def end_process(self):
		"""Copy directory permissions when leaving tree"""
		if self.dir_diff_ropath:
			self.dir_diff_ropath.copy_attribs(self.dir_basis_path)

	def can_fast_process(self, index, basis_path, diff_ropath):
		"""No need to recurse if diff_ropath isn't a directory"""
		return not (diff_ropath and diff_ropath.isdir())

	def fast_process(self, index, basis_path, diff_ropath):
		"""For use when neither is a directory"""
		if not diff_ropath: return # no change
		elif not basis_path:
			if diff_ropath.difftype == "deleted": pass # already deleted
			else:  # just copy snapshot over
				diff_ropath.copy(self.base_path.new_index(index))
		elif diff_ropath.difftype == "deleted":
			if basis_path.isdir(): basis_path.deltree()
			else: basis_path.delete()
		elif not basis_path.isreg():
			if basis_path.isdir(): basis_path.deltree()
			else: basis_path.delete()
			diff_ropath.copy(basis_path)
		else:
			assert diff_ropath.difftype == "diff", diff_ropath.difftype
			basis_path.patch_with_attribs(diff_ropath)


class TarFile_FromFileobjs:
	"""Like a tarfile.TarFile iterator, but read from multiple fileobjs"""
	def __init__(self, fileobj_iter):
		"""Make new tarinfo iterator

		fileobj_iter should be an iterator of file objects opened for
		reading.  They will be closed at end of reading.

		"""
		self.fileobj_iter = fileobj_iter
		self.tarfile, self.tar_iter = None, None
		self.current_fp = None

	def __iter__(self): return self

	def set_tarfile(self):
		"""Set tarfile from next file object, or raise StopIteration"""
		if self.current_fp: assert not self.current_fp.close()
		self.current_fp = self.fileobj_iter.next()
		self.tarfile = tarfile.TarFile("arbitrary", "r", self.current_fp)
		self.tar_iter = iter(self.tarfile)

	def next(self):
		if not self.tarfile: self.set_tarfile()
		try: return self.tar_iter.next()
		except StopIteration:
			assert not self.tarfile.close()
			self.set_tarfile()
			return self.tar_iter.next()

	def extractfile(self, tarinfo):
		"""Return data associated with given tarinfo"""
		return self.tarfile.extractfile(tarinfo)


def collate_iters(iter_list):
	"""Collate iterators by index

	Input is a list of n iterators each of which must iterate elements
	with an index attribute.  The elements must come out in increasing
	order, and the index should be a tuple itself.

	The output is an iterator which yields tuples where all elements
	in the tuple have the same index, and the tuple has n elements in
	it.  If any iterator lacks an element with that index, the tuple
	will have None in that spot.

	"""
	# overflow[i] means that iter_list[i] has been exhausted
	# elems[i] is None means that it is time to replenish it.
	iter_num = len(iter_list)
	if iter_num == 2:
		return diffdir.collate2iters(iter_list[0], iter_list[1])
	overflow = [None] * iter_num
	elems = overflow[:]

	def setrorps(overflow, elems):
		"""Set the overflow and rorps list"""
		for i in range(iter_num):
			if not overflow[i] and elems[i] is None:
				try: elems[i] = iter_list[i].next()
				except StopIteration:
					overflow[i] = 1
					elems[i] = None

	def getleastindex(elems):
		"""Return the first index in elems, assuming elems isn't empty"""
		return min(map(lambda elem: elem.index, filter(lambda x: x, elems)))

	def yield_tuples(iter_num, overflow, elems):
		while 1:
			setrorps(overflow, elems)
			if not None in overflow: break

			index = getleastindex(elems)
			yieldval = []
			for i in range(iter_num):
				if elems[i] and elems[i].index == index:
					yieldval.append(elems[i])
					elems[i] = None
				else: yieldval.append(None)
			yield tuple(yieldval)
	return yield_tuples(iter_num, overflow, elems)

class IndexedTuple:
	"""Like a tuple, but has .index (used previously by collate_iters)"""
	def __init__(self, index, sequence):
		self.index = index
		self.data = tuple(sequence)

	def __len__(self): return len(self.data)

	def __getitem__(self, key):
		"""This only works for numerical keys (easier this way)"""
		return self.data[key]

	def __lt__(self, other): return self.__cmp__(other) == -1
	def __le__(self, other): return self.__cmp__(other) != 1
	def __ne__(self, other): return not self.__eq__(other)
	def __gt__(self, other): return self.__cmp__(other) == 1
	def __ge__(self, other): return self.__cmp__(other) != -1
	
	def __cmp__(self, other):
		assert isinstance(other, IndexedTuple)
		if self.index < other.index: return -1
		elif self.index == other.index: return 0
		else: return 1

	def __eq__(self, other):
		if isinstance(other, IndexedTuple):
			return self.index == other.index and self.data == other.data
		elif type(other) is types.TupleType:
			return self.data == other
		else: return None

	def __str__(self):
		return  "(%s).%s" % (", ".join(map(str, self.data)), self.index)

def normalize_ps(patch_sequence):
	"""Given an sequence of ROPath deltas, remove blank and unnecessary

	The sequence is assumed to be in patch order (later patches apply
	to earlier ones).  A patch is unnecessary if a later one doesn't
	require it (for instance, any patches before a "delete" are
	unnecessary).

	"""
	result_list = []
	i = len(patch_sequence)-1
	while i >= 0:
		delta = patch_sequence[i]
		if delta is not None: # skip blank entries
			result_list.insert(0, delta)
			if delta.difftype != "diff": break
		i -= 1
	return result_list

def patch_seq2ropath(patch_seq):
	"""Apply the patches in patch_seq, return single ropath"""
	first = patch_seq[0]
	assert first.difftype != "diff", patch_seq
	if not first.isreg(): # No need to bother with data if not regular file
		assert len(patch_seq) == 1, len(patch_seq)
		return first.get_ropath()

	current_file = first.open("rb")

	for delta_ropath in patch_seq[1:]:
		assert delta_ropath.difftype == "diff", delta_ropath.difftype
		if not isinstance(current_file, file): # librsync needs true file
			tempfp = os.tmpfile()
			misc.copyfileobj(current_file, tempfp)
			assert not current_file.close()
			tempfp.seek(0)
			current_file = tempfp
		current_file = librsync.PatchedFile(current_file,
											delta_ropath.open("rb"))
	result = patch_seq[-1].get_ropath()
	result.setfileobj(current_file)
	return result

def integrate_patch_iters(iter_list):
	"""Combine a list of iterators of ropath patches

	The iter_list should be sorted in patch order, and the elements in
	each iter_list need to be orderd by index.  The output will be an
	iterator of the final ROPaths in index order.

	"""
	collated = collate_iters(iter_list)
	for patch_seq in collated:
		final_ropath = patch_seq2ropath(normalize_ps(patch_seq))
		if final_ropath.exists(): # otherwise final patch was delete
			yield final_ropath

def tarfiles2rop_iter(tarfile_list, restrict_index = ()):
	"""Integrate tarfiles of diffs into single ROPath iter

	Then filter out all the diffs in that index which don't start with
	the restrict_index.

	"""
	diff_iters = map(difftar2path_iter, tarfile_list)
	if restrict_index: # Apply filter before integration
		diff_iters = map(lambda i: filter_path_iter(i, restrict_index),
						 diff_iters)
	return integrate_patch_iters(diff_iters)

def Write_ROPaths(base_path, rop_iter):
	"""Write out ropaths in rop_iter starting at base_path

	Returns 1 if something was actually written, 0 otherwise.

	"""
	ITR = IterTreeReducer(ROPath_IterWriter, [base_path])
	return_val = 0
	for ropath in rop_iter:
		return_val = 1
		ITR(ropath.index, ropath)
	ITR.Finish()
	base_path.setdata()
	return return_val

class ROPath_IterWriter(ITRBranch):
	"""Used in Write_ROPaths above

	We need to use an ITR because we have to update the
	permissions/times of directories after we write the files in them.

	"""
	def __init__(self, base_path):
		"""Set base_path, Path of root of tree"""
		self.base_path = base_path
		self.dir_diff_ropath = None
		self.dir_new_path = None

	def start_process(self, index, ropath):
		"""Write ropath.  Only handles the directory case"""
		if not ropath.isdir(): # Base may not be a directory, but rest should
			assert ropath.index == (), ropath.index
			new_path = self.base_path.new_index(index)
			if ropath.exists():
				if new_path.exists(): new_path.deltree()
				ropath.copy(new_path)

		self.dir_new_path = self.base_path.new_index(index)
		if self.dir_new_path.exists(): # base may exist, but nothing else
			assert index == (), index
		else: self.dir_new_path.mkdir()
		self.dir_diff_ropath = ropath

	def end_process(self):
		"""Update information of a directory when leaving it"""
		if self.dir_diff_ropath:
			self.dir_diff_ropath.copy_attribs(self.dir_new_path)

	def can_fast_process(self, index, ropath):
		"""Can fast process (no recursion) if ropath isn't a directory"""
		log.Log("Writing %s of type %s" %
				(ropath.get_relative_path(), ropath.type), 5)
		return not ropath.isdir()

	def fast_process(self, index, ropath):
		"""Write non-directory ropath to destination"""
		if ropath.exists(): ropath.copy(self.base_path.new_index(index))
syntax highlighted by Code2HTML, v. 0.9.1