Download a spreadsheet from Google Docs using Python
The https://github.com/burnash/gspread library is a newer, simpler way to interact with Google Spreadsheets, rather than the old answers to this that suggest the gdata
library which is not only too low-level, but is also overly-complicated.
You will also need to create and download (in JSON format) a Service Account key: https://console.developers.google.com/apis/credentials/serviceaccountkey
Here's an example of how to use it:
import csvimport gspreadfrom oauth2client.service_account import ServiceAccountCredentialsscope = ['https://spreadsheets.google.com/feeds']credentials = ServiceAccountCredentials.from_json_keyfile_name('credentials.json', scope)docid = "0zjVQXjJixf-SdGpLKnJtcmQhNjVUTk1hNTRpc0x5b9c"client = gspread.authorize(credentials)spreadsheet = client.open_by_key(docid)for i, worksheet in enumerate(spreadsheet.worksheets()): filename = docid + '-worksheet' + str(i) + '.csv' with open(filename, 'wb') as f: writer = csv.writer(f) writer.writerows(worksheet.get_all_values())
In case anyone comes across this looking for a quick fix, here's another (currently) working solution that doesn't rely on the gdata client library:
#!/usr/bin/pythonimport re, urllib, urllib2class Spreadsheet(object): def __init__(self, key): super(Spreadsheet, self).__init__() self.key = keyclass Client(object): def __init__(self, email, password): super(Client, self).__init__() self.email = email self.password = password def _get_auth_token(self, email, password, source, service): url = "https://www.google.com/accounts/ClientLogin" params = { "Email": email, "Passwd": password, "service": service, "accountType": "HOSTED_OR_GOOGLE", "source": source } req = urllib2.Request(url, urllib.urlencode(params)) return re.findall(r"Auth=(.*)", urllib2.urlopen(req).read())[0] def get_auth_token(self): source = type(self).__name__ return self._get_auth_token(self.email, self.password, source, service="wise") def download(self, spreadsheet, gid=0, format="csv"): url_format = "https://spreadsheets.google.com/feeds/download/spreadsheets/Export?key=%s&exportFormat=%s&gid=%i" headers = { "Authorization": "GoogleLogin auth=" + self.get_auth_token(), "GData-Version": "3.0" } req = urllib2.Request(url_format % (spreadsheet.key, format, gid), headers=headers) return urllib2.urlopen(req)if __name__ == "__main__": import getpass import csv email = "" # (your email here) password = getpass.getpass() spreadsheet_id = "" # (spreadsheet id here) # Create client and spreadsheet objects gs = Client(email, password) ss = Spreadsheet(spreadsheet_id) # Request a file-like object containing the spreadsheet's contents csv_file = gs.download(ss) # Parse as CSV and print the rows for row in csv.reader(csv_file): print ", ".join(row)
You might try using the AuthSub method described in the Exporting Spreadsheets section of the documentation.
Get a separate login token for the spreadsheets service and substitue that for the export. Adding this to the get_spreadsheet
code worked for me:
import gdata.spreadsheet.servicedef get_spreadsheet(key, gid=0): # ... spreadsheets_client = gdata.spreadsheet.service.SpreadsheetsService() spreadsheets_client.email = gd_client.email spreadsheets_client.password = gd_client.password spreadsheets_client.source = "My Fancy Spreadsheet Downloader" spreadsheets_client.ProgrammaticLogin() # ... entry = gd_client.GetDocumentListEntry(uri) docs_auth_token = gd_client.GetClientLoginToken() gd_client.SetClientLoginToken(spreadsheets_client.GetClientLoginToken()) gd_client.Export(entry, file_path) gd_client.SetClientLoginToken(docs_auth_token) # reset the DocList auth token
Notice I also used Export
, as Download
seems to give only PDF files.