# Part of the A-A-P recipe executive: Cache downloaded files # Copyright (C) 2002-2003 Stichting NLnet Labs # Permission to copy and use this file is specified in the file COPYING. # If this file is missing you can find it here: http://www.a-a-p.org/COPYING # # This module handles the caching of remote files. # # There can be several directories with a cache. They can be shared between # users to minimize the number of downloads. $CACHEPATH specifies the list of # directories. # # Each cache directory contains: # - An index file, each line containing these fields: # URLlocalnameremote-timelocal-timeaccess-time # "URL" is the url of the file being cached # "localname" is the file name of the cached file, excluding the path. # Timestamps are in seconds. # Always written in binary mode, to make them usable on all systems. # - The cache files, with an arbitrary file name (actually a random number). # Suffixes are kept, some tools require this. # - A lock file, only present while the index file is being updated. # # The index files are read into our internal cache lookup, so that we can # access it quickly and update it for downloaded files. The updated index # files are written when we exit or when $CACHEPATH is changed. import time import os import os.path import errno from Remote import is_url, url_download from Dictlist import varname2dictlist from Util import * from Message import * import Global cache = {} # Dictionary for cache entries. Key is the URL. cache_indexes = {} # Dictionary of index files that have been read # or need updating. cache_val_read = {} # Value of $CACHEPATH plus the current directory # for which cache files have been read. index_fname = "index" # name of the file with the cache index class Cache: def __init__(self, indexname, localname, rtime, ltime, atime): """An object in the dictionary of cache entries.""" self.indexname = indexname # full path of the index file self.localname = localname # file name of cache file (no path) self.rtime = rtime # timestamp of remote file or zero self.ltime = ltime # timestamp of local file self.atime = atime # timestamp when last accessed self.updated = 0 # updated this cached file self.gone = 0 # file doesn't exist self.written = 0 # entry written to index file def localname_path(self): """Return the name of the cache file with the path.""" return os.path.join(os.path.dirname(self.indexname), self.localname) def timestamp(self): """Return the last modified time.""" if self.rtime: return self.rtime # use the remote time if we know it return self.ltime def cache_read(recdict, fname, rcache, lock): """Read cache index file "fname" and add entries to the cache "rchache". "fname" must have an absolute path.""" # When "lock" is non-zero, wait for a lock in the index file to disappear. # We don't lock the cache index file, because it would make it impossible # to use a cache from a read-only directory. There is a tiny risk that the # index file is truncated if some other program starts updating it just # after we checked the index file isn't locked. Read the whole file at # once to minimize that. The only risk is that we download a file that's # already cached. if lock: try: index_lock(recdict, fname, 0) except: # What to do when the lock sticks? Let's just ignore it. pass # Read all the lines from the index file. # When there is an error silently ignore that. try: f = open(fname, "rb") lines = f.readlines() f.close() except: return global cache_indexes cache_indexes[fname] = 1 # remember we have read this index file # Read entries into our cache. for line in lines: try: url, lfname, rtime, ltime, atime, rest = string.split(line, "\033") rtime = long(rtime) ltime = long(ltime) atime = long(atime) except: # Some error in this line, skip it. continue if rcache.has_key(url): # URL already exists. Only use new entry when it's newer. rc = rcache[url] if rtime > rc.rtime or ltime > rc.ltime: rc.indexname = fname rc.localname = lfname rc.rtime = rtime rc.ltime = ltime if atime > rc.atime: rc.atime = atime rc.updated = 0 rc.gone = 0 else: # Add new entry. rcache[url] = Cache(fname, lfname, rtime, ltime, atime) def index_write_newest(recdict, fname, f, use_cache, check_cache): """Write lines to index file "fname" opened as "f" from cache "use_cache", skipping entries for other index files and entries that are newer in "check_cache".""" for url in use_cache.keys(): ce = use_cache[url] if ce.indexname == fname: if ce.gone: pass # skip entries with deleted files # Only write a line when the check_cache doesn't have this entry or # it does have an entry which is not written but our entry is # newer. else: if check_cache.has_key(url) and not check_cache[url].gone: ck = check_cache[url] else: ck = None if (not ck or (not ck.written and (ce.rtime > ck.rtime or ce.ltime > ck.ltime or (ce.rtime == ck.rtime and ce.ltime == ck.ltime)))): if ck and check_cache[url].atime > ce.atime: atime = ck.atime else: atime = ce.atime f.write("%s\033%s\033%d\033%d\033%d\033\n" % (url, ce.localname, ce.rtime, ce.ltime, atime)) ce.updated = 0 ce.written = 1 elif not (ck and ck.written) and ce.localname != ck.localname: # An entry that is not written in the index file can be # deleted. fn = os.path.join(os.path.dirname(fname), ce.localname) try: os.remove(fn) ce.gone = 1 except EnvironmentError, e: msg_warning(recdict, (_('Can\'t delete cached file "%s"') % fn) + str(e)) def cache_update(recdict, fname): """Update cache index file "fname" for the entries in our cache.""" # First check if there is anything to update for this index file. global cache foundone = 0 for url in cache.keys(): if cache[url].indexname == fname and ( cache[url].updated or cache[url].gone): foundone = 1 break if not foundone: return # If the cache is "AAPDIR/cache/", create the directory if it doesn't exist. adir = os.path.dirname(fname) if (os.path.basename(adir) == "cache" and os.path.dirname(adir) == Global.aap_dirname and not os.path.exists(adir)): try: assert_aap_dir(recdict) os.makedirs(adir) except: # Silently skip this cache when it doesn't exist and we can't # create it. return # Lock the index file. # If this fails we probably can't write to this cache directory. try: index_lock(recdict, fname, 1) except: msg_note(recdict, _('Can\'t lock cache index file "%s"') % fname) return # Read the entries from the index file into "tcache". # They may have been updated since the last time we read it. tcache = {} cache_read(recdict, fname, tcache, 0) # open index file for writing try: f = open(fname, "wb") except EnvironmentError, e: # Can't write the index file, even though we can lock it!? msg_warning(recdict, (_('Can\'t write cache index file "%s"') % fname) + str(e)) else: try: # Rewrite entries that were already in this index file and for # which we don't have a newer entry. index_write_newest(recdict, fname, f, tcache, cache) # Add new entries from our cache. index_write_newest(recdict, fname, f, cache, tcache) # close file f.close() except EnvironmentError, e: msg_warning(recdict, _('Error writing index file "%s"') % fname + str(e)) # unlock file try: index_unlock(fname) except: msg_warning(recdict, _('Can\'t unlock cache index file "%s"') % fname) def get_lock_fname(fname): """Return the index lock file name for the index file "fname".""" return os.path.join(os.path.dirname(fname), "indexlock") def index_lock(recdict, fname, create): """Wait for index file "fname" to be unlocked. When "create" is non-zero: Lock index file "fname". Timeout after a while and delete the lock.""" lname = get_lock_fname(fname) did_msg = 0 # Try up to two hundred times. Avoids hangup when something is wrong. trycount = 0 while trycount < 200: trycount = trycount + 1 try: if create: # Try creating the lock file, fail if it already exists. fd = os.open(lname, os.O_WRONLY + os.O_CREAT + os.O_EXCL) # Write our process number in it, so we know who created it. try: pid = os.getpid() except: pid = 1 os.write(fd, "%d\n" % pid) os.close(fd) elif os.path.exists(lname): # Lock file exists. # Clumsy: throw an exception to go into the code below. raise IOError, (errno.EEXIST, "lock is there") break except EnvironmentError, (error, msg): # If creation didn't file because the file already exists, give up. if error != errno.EEXIST: raise # Couldn't create the lock file. After waiting trying for 10 # seconds, assume it's an orphan. # Note: this throws an IOError if we can't delete the lock file. if trycount == 100: os.remove(lname) if os.path.exists(lname): raise IOError, 'Can\'t delete lock file "%s"' % lname msg_info(recdict, _('Deleted old lock file "%s"') % lname) did_msg = 0 continue # Wait a tenth of a second before trying again. if not did_msg: msg_info(recdict, _('Waiting 10 seconds for lock file "%s" to disappear') % lname) did_msg = 1 time.sleep(0.1) if did_msg: msg_info(recdict, _("Lock file is gone now, continuing...")) def index_unlock(fname): """Unlock an index file.""" os.remove(get_lock_fname(fname)) def cache_dirlist(recdict): """Get the value of the $CACHEPATH variable as a list of strings.""" if recdict["_no"]["CACHEPATH"] == '': return [] return map(lambda x: os.path.expanduser(x["name"]), varname2dictlist(recdict, "_no", "CACHEPATH")) def fill_cache(recdict): """Read all cache index files in $CACHEPATH.""" # Only need to fill the cache when it wasn't done for the current value of # $CACHEPATH and the current directory. Avoids expanding items in # $CACHEPATH to absolute paths each time. check = get_var_val_int(recdict, "CACHEPATH") + '>' + os.getcwd() if not cache_val_read.has_key(check): cache_val_read[check] = 1 # Read the cache index files for all entries in $CACHEPATH for n in cache_dirlist(recdict): index = os.path.join(os.path.abspath(n), index_fname) if not cache_indexes.has_key(index): cache_read(recdict, index, cache, 1) def dump_cache(recdict): """Update all cached index files. Empties our cache. Called just before $CACHEPATH is changed.""" global cache, cache_indexes, cache_val_read for n in cache_indexes.keys(): cache_update(recdict, n) cache = {} cache_indexes = {} cache_val_read = {} def cache_lookup(recdict, name, cache_update_str = None): """Lookup URL "name" in the cache. Return the Cache object if found.""" if cache.has_key(name): ent = cache[name] # if the entry was updated this session, it's always accepted. if ent.updated: return ent # Check if the cached file is too old. if not cache_update_str: cache_update_str = get_var_val(0, recdict, "_no", "CACHEUPDATE") if time.time() < ent.ltime + date2secs(cache_update_str): return ent # When the old timestamp of the remote file is known, get the # timestamp of remote file and accept the cached file when it's # still the same. if ent.rtime != 0: from Remote import remote_time rt = remote_time(recdict, name) if rt != 0 and rt == ent.rtime: msg_depend(recdict, _('timestamp did not change for "%s"') % name) # Update ltime, so that we don't obtain the remote # timestamp too often. ent.ltime = time.time() ent.updated = 1 return ent if rt == 0: msg_info(recdict, _('cannot get timestamp for "%s"') % name) else: msg_depend(recdict, _('timestamp for "%s" changed from %d to %d') % (name, ent.rtime, rt)) return None def local_name(recdict, name, cache_update_str = None): """Get the local file name for "name": If it's a local file "name" is returned. If it's a remote file and a cached copy is available, return the name of the cached copy. Otherwise try to download the file, cache it and return the name of the cached file. Returns the local name and a flag indicating the file is in the cache. Gives an error message and returns None if this fails.""" from VersCont import separate_scheme scheme, fname = separate_scheme(name) if scheme == "file": return os.path.abspath(os.path.expanduser(fname)), 0 if not is_url(name): return os.path.abspath(os.path.expanduser(name)), 0 # Don't fill the cache when --nocache specified. if not Global.cmd_args.has_option("nocache"): fill_cache(recdict) msg_extra(recdict, "Looking up local name for %s" % name) cache_entry = cache_lookup(recdict, name, cache_update_str) if cache_entry: # Check if the file really exists, it may have been cleared since # we read the index file. p = cache_entry.localname_path() if os.path.exists(p): # Update the last-access time. cache_entry.atime = time.time() cache_entry.updated = 1 return p, 1 cache_entry.gone = 1 # Skip when not actually building. if skip_commands(): msg_info(recdict, _('skip downloading "%s"') % name) return None, 0 # Isolate the suffixes. n = os.path.basename(name) i = string.find(n, ".") if i <= 0: suf = '' # no suffix or starts with a dot else: suf = n[i:] # suffix, can also be ".c.diff.gz" # Find a cache directory where we can write. # TODO: remember directories where we can't write and skip them. import random for cachedir in cache_dirlist(recdict): if not os.path.exists(cachedir): # If the name starts with "AAPDIR/" or $HOME may create the # directory. found = '' for dname in [ home_dir(), Global.aap_dirname ]: if dname: l = len(dname) if (len(cachedir) > l and cachedir[:l] == dname and cachedir[l] in "\\/"): found = dname break if not found: continue # Try creating the directory. When this fails silently skip it. try: os.makedirs(cachedir) except: continue # Loop to try different random cache file names. while 1: fname = str(random.randint(1,99999999)) + suf path = os.path.join(cachedir, fname) try: # Try creating the cached file, fail if it already exists. fd = os.open(path, os.O_WRONLY + os.O_CREAT + os.O_EXCL) os.close(fd) break except EnvironmentError, (error, msg): if error != errno.EEXIST: # Can't create this file for some reason, try another # directory. path = '' break if path: # Try downloading the file to the cache directory. try: f, rtime = url_download(recdict, name, path) except EnvironmentError, e: msg_note(recdict, _('Cannot download "%s": %s') % (name, str(e))) # Delete the empty file we created. try_delete(path) return None, 0 # Downloading worked, add an entry to the cache index. ifname = os.path.join(cachedir, index_fname) cache[name] = Cache(ifname, fname, rtime, os.path.getmtime(path), time.time()) cache[name].updated = 1 # remember we need to update this index file cache_indexes[os.path.abspath(ifname)] = 1 return path, 1 # Get here when cannot write to any cache directory. msg_warning(recdict, _('Cannot write in any cache directory')) return None, 0 # vim: set sw=4 et sts=4 tw=79 fo+=l: