Computing an md5 hash of a data structure
json.dumps() can sort dictionaries by key. So you don't need other dependencies:
import hashlibimport jsondata = ['only', 'lists', [1,2,3], 'dictionaries', {'a':0,'b':1}, 'numbers', 47, 'strings']data_md5 = hashlib.md5(json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest()print(data_md5)
Prints:
87e83d90fc0d03f2c05631e2cd68ea02
bencode sorts dictionaries so:
import hashlibimport bencodedata = ['only', 'lists', [1,2,3], 'dictionaries', {'a':0,'b':1}, 'numbers', 47, 'strings']data_md5 = hashlib.md5(bencode.bencode(data)).hexdigest()print data_md5
prints:
af1b88ca9fd8a3e828b40ed1b9a2cb20
I ended up writing it myself as I thought I would have to:
class Hasher(object): """Hashes Python data into md5.""" def __init__(self): self.md5 = md5() def update(self, v): """Add `v` to the hash, recursively if needed.""" self.md5.update(str(type(v))) if isinstance(v, basestring): self.md5.update(v) elif isinstance(v, (int, long, float)): self.update(str(v)) elif isinstance(v, (tuple, list)): for e in v: self.update(e) elif isinstance(v, dict): keys = v.keys() for k in sorted(keys): self.update(k) self.update(v[k]) else: for k in dir(v): if k.startswith('__'): continue a = getattr(v, k) if inspect.isroutine(a): continue self.update(k) self.update(a) def digest(self): """Retrieve the digest of the hash.""" return self.md5.digest()