faqts : Computers : Programming : Languages : Python : Snippets

+ Search
Add Entry AlertManage Folder Edit Entry Add page to http://del.icio.us/
Did You Find This Entry Useful?

9 of 10 people (90%) answered Yes
Recently 6 of 7 people (86%) answered Yes

Entry

Python implementation of tar

Jul 5th, 2000 10:01
Nathan Wallace, Hans Nowak, Snippet 205, Tamito Kajiyama


"""
Packages: tools;operating_systems.unix;interfaces
"""
"""
| Does someone has a tar implementation written in Python?
The attached is one that I wrote last year.  It can extract files from
archives, but cannot create an archive.
Feel free to use, modify, and redistribute the module.  Comments and
improvement are appreciated.
Regards,
KAJIYAMA, Tamito
"""
# ----------snip----------snip----------snip----------
#!/usr/bin/env python
#
#  Tar.py - handles tape archive (tar) files
#           written by Tamito KAJIYAMA <18 November 1998>
#
import string
BLOCKSIZE = 512
def trim_null(v):
    return v[:string.find(v, '\000')]
def oct2int(v):
    return eval('0' + string.strip(v))
def int2int(v):
    return int(string.strip(v))
class SubfileHeader:
    def __init__(self, hblock, tar_name, base):
        self.tar_name = tar_name
        self.base     = base
        self.name     = trim_null(hblock[0:100])
        self.mode     = hblock[100:108-2]
        self.uid      = oct2int(hblock[108:116-2])
        self.gid      = oct2int(hblock[116:124-2])
        self.size     = oct2int(hblock[124:136])
        self.mtime    = oct2int(hblock[136:148])
        self.checksum = int2int(hblock[148:156-2])
        self.linkflag = hblock[156]
        self.linkname = trim_null(hblock[157:256])
        if self.size % BLOCKSIZE == 0:
            self.size_in_block = self.size / BLOCKSIZE
        else:
            self.size_in_block = self.size / BLOCKSIZE + 1
class Subfile:
    def __init__(self, header):
        self.base = header.base
        self.size = header.size
        self.file = open(header.tar_name, 'r')
        self.file.seek(header.base, 0)
        self.header = header
    def fileno(self):
        return self.file.fileno()
    def seek(self, offset, whence=0):
        if whence == 0:
            pass
        elif whence == 1:
            offset = self.tell() + offset
        elif whence == 2:
            offset = self.size - offset
        else:
            raise IOError, (22, 'Invalid argument')
        if offset < 0:
            offset = 0
        elif offset > self.size:
            offset = self.size
        self.file.seek(self.base + offset, 0)
    def tell(self):
        return self.file.tell() - self.base
    def read(self, length=None):
        remain = self.size - self.tell()
        if remain <= 0:
            return ''
        elif length and length < remain:
            return self.file.read(length)
        else:
            return self.file.read(remain)
    def readline(self, length=None):
        remain = self.size - self.tell()
        if remain <= 0:
            return ''
        elif length and length < remain:
            return self.file.readline(length)
        else:
            return self.file.readline(remain)
    def readlines(self):
        lines = []
        while 1:
            line = self.readline()
            if not line:
                break
            lines.append(line)
        return lines
class Tar:
    def __init__(self, filename):
        file = open(filename, 'r')
        self.headers = []
        while 1:
            # read subfile header
            hblock = file.read(BLOCKSIZE)
            if hblock[0] == '\000':
                break
            header = SubfileHeader(hblock, filename, file.tell())
            self.headers.append(header)
            # skip subfile body
            file.read(BLOCKSIZE * header.size_in_block)
        file.close()
    def list(self):
        return self.headers
    def retrieve(self, name):
        for header in self.headers:
            if header.name == name:
                return Subfile(header)
        else:
            return None
def test():
    import os, sys, time
    if len(sys.argv) == 2:
        tar = Tar(sys.argv[1])
        for header in tar.list():
            print ' name:', header.name
            print ' size:', header.size, 'bytes'
            print 'mtime:', time.ctime(header.mtime)
            print
    elif len(sys.argv) > 2:
        tar = Tar(sys.argv[1])
        for filename in sys.argv[2:]:
            file = tar.retrieve(filename)
            if file:
                outfile = open(file.header.name, 'w')
                outfile.write(file.read())
                outfile.close()
                print 'wrote', file.header.name
            else:
                print filename, 'not found'
    else:
        print 'Usage: %s filename.tar [filename ...]' % \
              os.path.basename(sys.argv[0])
if __name__ == '__main__':
    test()