Computing an md5 hash of a data structure Computing an md5 hash of a data structure python python

Computing an md5 hash of a data structure


json.dumps() can sort dictionaries by key. So you don't need other dependencies:

import hashlibimport jsondata = ['only', 'lists', [1,2,3], 'dictionaries', {'a':0,'b':1}, 'numbers', 47, 'strings']data_md5 = hashlib.md5(json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest()print(data_md5)

Prints:

87e83d90fc0d03f2c05631e2cd68ea02


bencode sorts dictionaries so:

import hashlibimport bencodedata = ['only', 'lists', [1,2,3], 'dictionaries', {'a':0,'b':1}, 'numbers', 47, 'strings']data_md5 = hashlib.md5(bencode.bencode(data)).hexdigest()print data_md5

prints:

af1b88ca9fd8a3e828b40ed1b9a2cb20


I ended up writing it myself as I thought I would have to:

class Hasher(object):    """Hashes Python data into md5."""    def __init__(self):        self.md5 = md5()    def update(self, v):        """Add `v` to the hash, recursively if needed."""        self.md5.update(str(type(v)))        if isinstance(v, basestring):            self.md5.update(v)        elif isinstance(v, (int, long, float)):            self.update(str(v))        elif isinstance(v, (tuple, list)):            for e in v:                self.update(e)        elif isinstance(v, dict):            keys = v.keys()            for k in sorted(keys):                self.update(k)                self.update(v[k])        else:            for k in dir(v):                if k.startswith('__'):                    continue                a = getattr(v, k)                if inspect.isroutine(a):                    continue                self.update(k)                self.update(a)    def digest(self):        """Retrieve the digest of the hash."""        return self.md5.digest()