How do you unzip very large files in python? How do you unzip very large files in python? python python

How do you unzip very large files in python?


Here's an outline of decompression of large files.

import zipfileimport zlibimport ossrc = open( doc, "rb" )zf = zipfile.ZipFile( src )for m in  zf.infolist():    # Examine the header    print m.filename, m.header_offset, m.compress_size, repr(m.extra), repr(m.comment)    src.seek( m.header_offset )    src.read( 30 ) # Good to use struct to unpack this.    nm= src.read( len(m.filename) )    if len(m.extra) > 0: ex= src.read( len(m.extra) )    if len(m.comment) > 0: cm= src.read( len(m.comment) )     # Build a decompression object    decomp= zlib.decompressobj(-15)    # This can be done with a loop reading blocks    out= open( m.filename, "wb" )    result= decomp.decompress( src.read( m.compress_size ) )    out.write( result )    result = decomp.flush()    out.write( result )    # end of the loop    out.close()zf.close()src.close()


As of Python 2.6, you can use ZipFile.open() to open a file handle on a file, and copy contents efficiently to a target file of your choosing:

import errnoimport osimport shutilimport zipfileTARGETDIR = '/foo/bar/baz'with open(doc, "rb") as zipsrc:    zfile = zipfile.ZipFile(zipsrc)    for member in zfile.infolist():       target_path = os.path.join(TARGETDIR, member.filename)       if target_path.endswith('/'):  # folder entry, create           try:               os.makedirs(target_path)           except (OSError, IOError) as err:               # Windows may complain if the folders already exist               if err.errno != errno.EEXIST:                   raise           continue       with open(target_path, 'wb') as outfile, zfile.open(member) as infile:           shutil.copyfileobj(infile, outfile)

This uses shutil.copyfileobj() to efficiently read data from the open zipfile object, copying it over to the output file.