How can I download all emails with attachments from Gmail?
Hard one :-)
import email, getpass, imaplib, osdetach_dir = '.' # directory where to save attachments (default: current)user = raw_input("Enter your GMail username:")pwd = getpass.getpass("Enter your password: ")# connecting to the gmail imap serverm = imaplib.IMAP4_SSL("imap.gmail.com")m.login(user,pwd)m.select("[Gmail]/All Mail") # here you a can choose a mail box like INBOX instead# use m.list() to get all the mailboxesresp, items = m.search(None, "ALL") # you could filter using the IMAP rules here (check http://www.example-code.com/csharp/imap-search-critera.asp)items = items[0].split() # getting the mails idfor emailid in items: resp, data = m.fetch(emailid, "(RFC822)") # fetching the mail, "`(RFC822)`" means "get the whole stuff", but you can ask for headers only, etc email_body = data[0][1] # getting the mail content mail = email.message_from_string(email_body) # parsing the mail content to get a mail object #Check if any attachments at all if mail.get_content_maintype() != 'multipart': continue print "["+mail["From"]+"] :" + mail["Subject"] # we use walk to create a generator so we can iterate on the parts and forget about the recursive headach for part in mail.walk(): # multipart are just containers, so we skip them if part.get_content_maintype() == 'multipart': continue # is this part an attachment ? if part.get('Content-Disposition') is None: continue filename = part.get_filename() counter = 1 # if there is no filename, we create one with a counter to avoid duplicates if not filename: filename = 'part-%03d%s' % (counter, 'bin') counter += 1 att_path = os.path.join(detach_dir, filename) #Check if its already there if not os.path.isfile(att_path) : # finally write the stuff fp = open(att_path, 'wb') fp.write(part.get_payload(decode=True)) fp.close()
Wowww! That was something. ;-) But try the same in Java, just for fun!
By the way, I tested that in a shell, so some errors likely remain.
Enjoy
EDIT:
Because mail-box names can change from one country to another, I recommend doing m.list()
and picking an item in it before m.select("the mailbox name")
to avoid this error:
imaplib.error: command SEARCH illegal in state AUTH, only allowed in states SELECTED
#!/usr/bin/env python"""Save all attachments for given gmail account."""import os, sysfrom libgmail import GmailAccountga = GmailAccount("your.account@gmail.com", "pA$$w0Rd_")ga.login()# folders: inbox, starred, all, drafts, sent, spamfor thread in ga.getMessagesByFolder('all', allPages=True): for msg in thread: sys.stdout.write('.') if msg.attachments: print "\n", msg.id, msg.number, msg.subject, msg.sender for att in msg.attachments: if att.filename and att.content: attdir = os.path.join(thread.id, msg.id) if not os.path.isdir(attdir): os.makedirs(attdir) with open(os.path.join(attdir, att.filename), 'wb') as f: f.write(att.content)
untested
- Make sure TOS allows such scripts otherwise you account will be suspended
- There might be better options: GMail offline mode, Thunderbird + ExtractExtensions, GmailFS, Gmail Drive, etc.