Boto3 to download all files from a S3 Bucket Boto3 to download all files from a S3 Bucket python python

Boto3 to download all files from a S3 Bucket


I have the same needs and created the following function that download recursively the files.

The directories are created locally only if they contain files.

import boto3import osdef download_dir(client, resource, dist, local='/tmp', bucket='your_bucket'):    paginator = client.get_paginator('list_objects')    for result in paginator.paginate(Bucket=bucket, Delimiter='/', Prefix=dist):        if result.get('CommonPrefixes') is not None:            for subdir in result.get('CommonPrefixes'):                download_dir(client, resource, subdir.get('Prefix'), local, bucket)        for file in result.get('Contents', []):            dest_pathname = os.path.join(local, file.get('Key'))            if not os.path.exists(os.path.dirname(dest_pathname)):                os.makedirs(os.path.dirname(dest_pathname))            resource.meta.client.download_file(bucket, file.get('Key'), dest_pathname)

The function is called that way:

def _start():    client = boto3.client('s3')    resource = boto3.resource('s3')    download_dir(client, resource, 'clientconf/', '/tmp', bucket='my-bucket')


When working with buckets that have 1000+ objects its necessary to implement a solution that uses the NextContinuationToken on sequential sets of, at most, 1000 keys. This solution first compiles a list of objects then iteratively creates the specified directories and downloads the existing objects.

import boto3import oss3_client = boto3.client('s3')def download_dir(prefix, local, bucket, client=s3_client):    """    params:    - prefix: pattern to match in s3    - local: local path to folder in which to place files    - bucket: s3 bucket with target contents    - client: initialized s3 client object    """    keys = []    dirs = []    next_token = ''    base_kwargs = {        'Bucket':bucket,        'Prefix':prefix,    }    while next_token is not None:        kwargs = base_kwargs.copy()        if next_token != '':            kwargs.update({'ContinuationToken': next_token})        results = client.list_objects_v2(**kwargs)        contents = results.get('Contents')        for i in contents:            k = i.get('Key')            if k[-1] != '/':                keys.append(k)            else:                dirs.append(k)        next_token = results.get('NextContinuationToken')    for d in dirs:        dest_pathname = os.path.join(local, d)        if not os.path.exists(os.path.dirname(dest_pathname)):            os.makedirs(os.path.dirname(dest_pathname))    for k in keys:        dest_pathname = os.path.join(local, k)        if not os.path.exists(os.path.dirname(dest_pathname)):            os.makedirs(os.path.dirname(dest_pathname))        client.download_file(bucket, k, dest_pathname)


import osimport boto3#initiate s3 resources3 = boto3.resource('s3')# select bucketmy_bucket = s3.Bucket('my_bucket_name')# download file into current directoryfor s3_object in my_bucket.objects.all():    # Need to split s3_object.key into path and file name, else it will give error file not found.    path, filename = os.path.split(s3_object.key)    my_bucket.download_file(s3_object.key, filename)