""" Archive Library: Convenience functions for dealing with archive formats. Allows easy retrieval and writing of files to archives transparently. Most usage should come from "get_info" and "extract", which will work on any archive that is "supported". """ # zipfile is part of the standard lib import zipfile, re, os, struct import log # note: rarfile has modifications and is included in my romutil # directory. If my modification gets pushed to the cheeseshop version, # i will remove it from here, so this check should stay supported_extensions = ['zip'] try: import rarfile supported_extensions.append('rar') except ImportError: print 'warning: no rar support ("easy_install rarfile" for rar support)' rarfile = False try: import py7zlib supported_extensions.append('7z') except ImportError: print 'warning: no 7zip support ("easy_install pylzma" for lzma support)' py7zlib = False supported_extensions = tuple(supported_extensions) def filter_to_re(filter_): """Takes a simplistic filter in the form of a string like "*.nds", and creates a regex to match that (".*\.nds$" in this case). Since this is meant to match filenames, the regex returned will be case insensitive.""" if not filter_.endswith('$'): filter_ += '$' filter_ = filter_.replace('\\', '\\\\') # replace \ w/ \\ filter_ = filter_.replace('.', '\.') filter_ = filter_.replace("*", ".*") return re.compile(filter_, re.IGNORECASE) def crc(info): """Return the 8 character capital hexidecimal representation of the CRC for the archive file specified by info. Python's "zipfile.ZipInfo" stores it's CRC field in a different byte order than the other archive format libraries, so this function uses the struct module to get it in the right order.""" if isinstance(info, zipfile.ZipInfo): crc = info.CRC ordered = struct.pack("!I", crc) s = '' for i in ordered: s += '%02x' % (ord(i)) crc = int(s, 16) else: crc = info.CRC return "%08X" % crc class FakeInfo: """Fake 'ZipInfo' compatibility for py7zlib's 'ArchiveFile'""" def __init__(self, _ArchiveFile): _af = _ArchiveFile self._af = _af self.CRC = _af.digest self.comment = '' # is this proper? self.compress_size = _af._maxsize # the following are probably not important self.compress_type = -1 self.create_system = 0 self.create_version = 26 # LZMA:26 # FIXME: if you need this, please see the msdn docs on FILETIME self.date_time = 0 self.external_attr = 0 # FIXME: what is this self.extra = '' self.extract_version = self.create_version self.file_offset = _af._src_start # offset in the archive self.file_size = _af.uncompressed self.filename = _af.filename self.flag_bits = 0 self.header_offset = 0 self.internal_offset = 0 self.orig_filename = _af.filename self.reserved = 0 self.volume = 0 def identify(filename): """Identify what type of archive `filename` is. Returns the appropriate extension (in 'arclib.supported_extensions') or 'unknown' on failure.""" if zipfile.is_zipfile(filename): return 'zip' elif rarfile and rarfile.is_rarfile(filename): return 'rar' elif not py7zlib: return 'unknown' magic = open(filename).read(len(py7zlib.MAGIC_7Z)) if magic == py7zlib.MAGIC_7Z: return '7z' else: return 'unknown' def extract(filename, filter=''): """Extract files in archive at 'filename' and return in a list of file objects. Optional argument 'filter' is a regex to match against filenames to filter out only desired files (see help(arclib.filter_to_re) for more info on filter formats)""" ext = identify(filename) if ext not in supported_extensions: return False return _extract(filename, ext, filter) def _extract(filename, extension, filter): if extension == 'zip': _ext = zip_extract elif extension == 'rar': _ext = rar_extract elif extension == '7z': _ext = lzma_extract return _ext(filename, filter) def get_info(filename, filter=''): """Extract 'info' (modeled after zipfile.FileInfo) from the header of an archive file at 'filename'. Optional argument 'filter' is as in arclib.extract.""" ext = identify(filename) if ext in supported_extensions: return _get_info(filename, ext, filter) return False def _get_info(filename, extension, filter): if extension == 'zip': f = zipfile.ZipFile(filename) infolist = f.infolist() elif extension == 'rar': f = rarfile.RarFile(filename) infolist = f.infolist() else: # extension == '7z' f = py7zlib.Archive7z(open(filename, 'rb')) infolist = [FakeInfo(file) for file in f.files] if not filter: return infolist regex = filter_to_re(filter) return [info for info in infolist if regex.match(info.filename)] def _err_false(func): def inner(*args, **kwargs): try: return func(*args, **kwargs) except: if log.options['debug']: import traceback log.p(traceback.format_exc(), 'EX > ') return False inner.__name__ = func.__name__ inner.__doc__ = func.__doc__ return inner # the *_extract functions all return False if an exception occurs @_err_false def zip_extract(filename, filter=''): """Extract the files from a zip archive. See help(arclib.extract) for more information ont he optional 'filter' argument. Using this directly should not be necessary.""" zf = zipfile.ZipFile(filename) files = [] file_list = zf.namelist() if filter: regex = filter_to_re(filter) file_list = [file for file in file_list if regex.match(file)] log.vv("filter: %s => %s (matched %s)" % (filter, regex.pattern, file_list)) for handle in file_list: newfile = os.tmpfile() log.d("zip: extracting %s ..." % handle) newfile.write(zf.read(handle)) newfile.seek(0) files.append(newfile) log.d("zip: %s done extracting." % handle) return files @_err_false def rar_extract(filename, filter=''): """Extract the files from a rar archive. See help(arclib.extract) for more information on the optional 'filter' argument. Using this directly should not be necessary.""" rf = rarfile.RarFile(filename) files = [] file_list = rf.namelist() if filter: regex = filter_to_re(filter) file_list = [file for file in file_list if regex.match(file)] log.vv("filter: %s => %s (matched %s)" % (filter, regex.pattern, file_list)) for handle in file_list: newfile = os.tmpfile() log.d("rar: extracting %s ..." % handle) newfile.write(rf.read(handle)) newfile.seek(0) files.append(newfile) log.d("rar: %s done extracting." % handle) return files # the lzma lib does not have the same interface as zipfile @_err_false def lzma_extract(filename, filter=''): """Extract the files from a 7zip archive. See help(arclib.extract) for more information on the optional 'filter' argument. Using this directly should not be necessary.""" f = open(file, 'rb') zf = py7zlib.Archive7z(f) files = [] file_list = zf.files if filter: regex = filter_to_re(filter) file_list = [file for file in file_list if regex.match(file.filename)] log.vv("filter: %s => %s (matched %s)" % (filter, regex.pattern, [file.filename for file in file_list])) for handle in file_list: newfile = os.tmpfile() log.d("7zip: extracting %s ..." % handle.filename) newfile.write(handle.read()) newfile.seek(0) files.append(newfile) log.d("7zip: %s done extracting." % handle.filename) return files