How to read a csv file from an s3 bucket using Pandas in Python
Using pandas 0.20.3
import osimport boto3import pandas as pdimport sysif sys.version_info[0] < 3: from StringIO import StringIO # Python 2.xelse: from io import StringIO # Python 3.x# get your credentials from environment variablesaws_id = os.environ['AWS_ID']aws_secret = os.environ['AWS_SECRET']client = boto3.client('s3', aws_access_key_id=aws_id, aws_secret_access_key=aws_secret)bucket_name = 'my_bucket'object_key = 'my_file.csv'csv_obj = client.get_object(Bucket=bucket_name, Key=object_key)body = csv_obj['Body']csv_string = body.read().decode('utf-8')df = pd.read_csv(StringIO(csv_string))
Based on this answer that suggested using smart_open
for reading from S3, this is how I used it with Pandas:
import osimport pandas as pdfrom smart_open import smart_openaws_key = os.environ['AWS_ACCESS_KEY']aws_secret = os.environ['AWS_SECRET_ACCESS_KEY']bucket_name = 'my_bucket'object_key = 'my_file.csv'path = 's3://{}:{}@{}/{}'.format(aws_key, aws_secret, bucket_name, object_key)df = pd.read_csv(smart_open(path))
You don't need pandas.. you can just use the default csv library of python
def read_file(bucket_name,region, remote_file_name, aws_access_key_id, aws_secret_access_key): # reads a csv from AWS # first you stablish connection with your passwords and region id conn = boto.s3.connect_to_region( region, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) # next you obtain the key of the csv you want to read # you will need the bucket name and the csv file name bucket = conn.get_bucket(bucket_name, validate=False) key = Key(bucket) key.key = remote_file_name data = key.get_contents_as_string() key.close() # you store it into a string, therefore you will need to split it # usually the split characters are '\r\n' if not just read the file normally # and find out what they are reader = csv.reader(data.split('\r\n')) data = [] header = next(reader) for row in reader: data.append(row) return data
hope it solved your problem, good luck!:)