Python 3 - Can pickle handle byte objects larger than 4GB? Python 3 - Can pickle handle byte objects larger than 4GB? python python

Python 3 - Can pickle handle byte objects larger than 4GB?


Here is a simple workaround for issue 24658. Use pickle.loads or pickle.dumps and break the bytes object into chunks of size 2**31 - 1 to get it in or out of the file.

import pickleimport os.pathfile_path = "pkl.pkl"n_bytes = 2**31max_bytes = 2**31 - 1data = bytearray(n_bytes)## writebytes_out = pickle.dumps(data)with open(file_path, 'wb') as f_out:    for idx in range(0, len(bytes_out), max_bytes):        f_out.write(bytes_out[idx:idx+max_bytes])## readbytes_in = bytearray(0)input_size = os.path.getsize(file_path)with open(file_path, 'rb') as f_in:    for _ in range(0, input_size, max_bytes):        bytes_in += f_in.read(max_bytes)data2 = pickle.loads(bytes_in)assert(data == data2)


To sum up what was answered in the comments:

Yes, Python can pickle byte objects bigger than 4GB. The observed error is caused by a bug in the implementation (see Issue24658).


Here is the full workaround, though it seems pickle.load no longer tries to dump a huge file anymore (I am on Python 3.5.2) so strictly speaking only the pickle.dumps needs this to work properly.

import pickleclass MacOSFile(object):    def __init__(self, f):        self.f = f    def __getattr__(self, item):        return getattr(self.f, item)    def read(self, n):        # print("reading total_bytes=%s" % n, flush=True)        if n >= (1 << 31):            buffer = bytearray(n)            idx = 0            while idx < n:                batch_size = min(n - idx, 1 << 31 - 1)                # print("reading bytes [%s,%s)..." % (idx, idx + batch_size), end="", flush=True)                buffer[idx:idx + batch_size] = self.f.read(batch_size)                # print("done.", flush=True)                idx += batch_size            return buffer        return self.f.read(n)    def write(self, buffer):        n = len(buffer)        print("writing total_bytes=%s..." % n, flush=True)        idx = 0        while idx < n:            batch_size = min(n - idx, 1 << 31 - 1)            print("writing bytes [%s, %s)... " % (idx, idx + batch_size), end="", flush=True)            self.f.write(buffer[idx:idx + batch_size])            print("done.", flush=True)            idx += batch_sizedef pickle_dump(obj, file_path):    with open(file_path, "wb") as f:        return pickle.dump(obj, MacOSFile(f), protocol=pickle.HIGHEST_PROTOCOL)def pickle_load(file_path):    with open(file_path, "rb") as f:        return pickle.load(MacOSFile(f))