How to extract zip file recursively? How to extract zip file recursively? python python

How to extract zip file recursively?


When extracting the zip file, you would want to write the inner zip files to memory instead of them on disk. To do this, I've used BytesIO.

Check out this code:

import osimport ioimport zipfiledef extract(filename):    z = zipfile.ZipFile(filename)    for f in z.namelist():        # get directory name from file        dirname = os.path.splitext(f)[0]          # create new directory        os.mkdir(dirname)          # read inner zip file into bytes buffer         content = io.BytesIO(z.read(f))        zip_file = zipfile.ZipFile(content)        for i in zip_file.namelist():            zip_file.extract(i, dirname)

If you run extract("zipfile.zip") with zipfile.zip as:

zipfile.zip/    dirA.zip/        a    dirB.zip/        b    dirC.zip/        c

Output should be:

dirA/  adirB/  bdirC/  c


For a function that extracts a nested zip file (any level of nesting) and cleans up the original zip files:

import zipfile, re, osdef extract_nested_zip(zippedFile, toFolder):    """ Extract a zip file including any nested zip files        Delete the zip file(s) after extraction    """    with zipfile.ZipFile(zippedFile, 'r') as zfile:        zfile.extractall(path=toFolder)    os.remove(zippedFile)    for root, dirs, files in os.walk(toFolder):        for filename in files:            if re.search(r'\.zip$', filename):                fileSpec = os.path.join(root, filename)                extract_nested_zip(fileSpec, root)


I tried some of the other solutions but couldn't get them to work "in place". I'll post my solution to handle the "in place" version. Note: it deletes the zip files and 'replaces' them with identically named directories, so back up your zip files if you want to keep.

Strategy is simple. Unzip all zip files in the directory (and subdirectories) and rinse and repeat until no zip files remain. The rinse and repeat is needed if the zip files contain zip files.

import osimport ioimport zipfileimport redef unzip_directory(directory):    """" This function unzips (and then deletes) all zip files in a directory """    for root, dirs, files in os.walk(directory):        for filename in files:            if re.search(r'\.zip$', filename):                to_path = os.path.join(root, filename.split('.zip')[0])                zipped_file = os.path.join(root, filename)                if not os.path.exists(to_path):                    os.makedirs(to_path)                    with zipfile.ZipFile(zipped_file, 'r') as zfile:                        zfile.extractall(path=to_path)                    # deletes zip file                    os.remove(zipped_file)def exists_zip(directory):    """ This function returns T/F whether any .zip file exists within the directory, recursively """    is_zip = False    for root, dirs, files in os.walk(directory):        for filename in files:            if re.search(r'\.zip$', filename):                is_zip = True    return is_zipdef unzip_directory_recursively(directory, max_iter=1000):    print("Does the directory path exist? ", os.path.exists(directory))    """ Calls unzip_directory until all contained zip files (and new ones from previous calls)    are unzipped    """    iterate = 0    while exists_zip(directory) and iterate < max_iter:        unzip_directory(directory)        iterate += 1    pre = "Did not " if iterate < max_iter else "Did"    print(pre, "time out based on max_iter limit of", max_iter, ". Took iterations:", iterate)

Assuming your zip files are backed up, you make this all work by calling unzip_directory_recursively(your_directory).