Entry
Python implementation of tar
Jul 5th, 2000 10:01
Nathan Wallace, Hans Nowak, Snippet 205, Tamito Kajiyama
"""
Packages: tools;operating_systems.unix;interfaces
"""
"""
| Does someone has a tar implementation written in Python?
The attached is one that I wrote last year. It can extract files from
archives, but cannot create an archive.
Feel free to use, modify, and redistribute the module. Comments and
improvement are appreciated.
Regards,
KAJIYAMA, Tamito
"""
# ----------snip----------snip----------snip----------
#!/usr/bin/env python
#
# Tar.py - handles tape archive (tar) files
# written by Tamito KAJIYAMA <18 November 1998>
#
import string
BLOCKSIZE = 512
def trim_null(v):
return v[:string.find(v, '\000')]
def oct2int(v):
return eval('0' + string.strip(v))
def int2int(v):
return int(string.strip(v))
class SubfileHeader:
def __init__(self, hblock, tar_name, base):
self.tar_name = tar_name
self.base = base
self.name = trim_null(hblock[0:100])
self.mode = hblock[100:108-2]
self.uid = oct2int(hblock[108:116-2])
self.gid = oct2int(hblock[116:124-2])
self.size = oct2int(hblock[124:136])
self.mtime = oct2int(hblock[136:148])
self.checksum = int2int(hblock[148:156-2])
self.linkflag = hblock[156]
self.linkname = trim_null(hblock[157:256])
if self.size % BLOCKSIZE == 0:
self.size_in_block = self.size / BLOCKSIZE
else:
self.size_in_block = self.size / BLOCKSIZE + 1
class Subfile:
def __init__(self, header):
self.base = header.base
self.size = header.size
self.file = open(header.tar_name, 'r')
self.file.seek(header.base, 0)
self.header = header
def fileno(self):
return self.file.fileno()
def seek(self, offset, whence=0):
if whence == 0:
pass
elif whence == 1:
offset = self.tell() + offset
elif whence == 2:
offset = self.size - offset
else:
raise IOError, (22, 'Invalid argument')
if offset < 0:
offset = 0
elif offset > self.size:
offset = self.size
self.file.seek(self.base + offset, 0)
def tell(self):
return self.file.tell() - self.base
def read(self, length=None):
remain = self.size - self.tell()
if remain <= 0:
return ''
elif length and length < remain:
return self.file.read(length)
else:
return self.file.read(remain)
def readline(self, length=None):
remain = self.size - self.tell()
if remain <= 0:
return ''
elif length and length < remain:
return self.file.readline(length)
else:
return self.file.readline(remain)
def readlines(self):
lines = []
while 1:
line = self.readline()
if not line:
break
lines.append(line)
return lines
class Tar:
def __init__(self, filename):
file = open(filename, 'r')
self.headers = []
while 1:
# read subfile header
hblock = file.read(BLOCKSIZE)
if hblock[0] == '\000':
break
header = SubfileHeader(hblock, filename, file.tell())
self.headers.append(header)
# skip subfile body
file.read(BLOCKSIZE * header.size_in_block)
file.close()
def list(self):
return self.headers
def retrieve(self, name):
for header in self.headers:
if header.name == name:
return Subfile(header)
else:
return None
def test():
import os, sys, time
if len(sys.argv) == 2:
tar = Tar(sys.argv[1])
for header in tar.list():
print ' name:', header.name
print ' size:', header.size, 'bytes'
print 'mtime:', time.ctime(header.mtime)
print
elif len(sys.argv) > 2:
tar = Tar(sys.argv[1])
for filename in sys.argv[2:]:
file = tar.retrieve(filename)
if file:
outfile = open(file.header.name, 'w')
outfile.write(file.read())
outfile.close()
print 'wrote', file.header.name
else:
print filename, 'not found'
else:
print 'Usage: %s filename.tar [filename ...]' % \
os.path.basename(sys.argv[0])
if __name__ == '__main__':
test()