How to read the header with pycurl How to read the header with pycurl python python

How to read the header with pycurl


There are several solutions (by default, they are dropped). Here is anexample using the option HEADERFUNCTION which lets you indicate afunction to handle them.

Other solutions are options WRITEHEADER (not compatible withWRITEFUNCTION) or setting HEADER to True so that they are transmittedwith the body.

#!/usr/bin/pythonimport pycurlimport sysclass Storage:    def __init__(self):        self.contents = ''        self.line = 0    def store(self, buf):        self.line = self.line + 1        self.contents = "%s%i: %s" % (self.contents, self.line, buf)    def __str__(self):        return self.contentsretrieved_body = Storage()retrieved_headers = Storage()c = pycurl.Curl()c.setopt(c.URL, 'http://www.demaziere.fr/eve/')c.setopt(c.WRITEFUNCTION, retrieved_body.store)c.setopt(c.HEADERFUNCTION, retrieved_headers.store)c.perform()c.close()print retrieved_headersprint retrieved_body


import pycurlfrom StringIO import StringIOheaders = StringIO()c = pycurl.Curl()c.setopt(c.URL, url)c.setopt(c.HEADER, 1)c.setopt(c.NOBODY, 1) # header only, no bodyc.setopt(c.HEADERFUNCTION, headers.write)c.perform()print headers.getvalue()

Add any other curl setopts as necessary/desired, such as FOLLOWLOCATION.


Anothr alternate, human_curl usage: pip human_curl

In [1]: import human_curl as hurlIn [2]: r = hurl.get("http://stackoverflow.com")In [3]: r.headersOut[3]: {'cache-control': 'public, max-age=45', 'content-length': '198515', 'content-type': 'text/html; charset=utf-8', 'date': 'Thu, 01 Sep 2011 11:53:43 GMT', 'expires': 'Thu, 01 Sep 2011 11:54:28 GMT', 'last-modified': 'Thu, 01 Sep 2011 11:53:28 GMT', 'vary': '*'}