How to Copy Files Fast [duplicate] How to Copy Files Fast [duplicate] python python

How to Copy Files Fast [duplicate]


The fastest version w/o overoptimizing the code I've got with the following code:

class CTError(Exception):    def __init__(self, errors):        self.errors = errorstry:    O_BINARY = os.O_BINARYexcept:    O_BINARY = 0READ_FLAGS = os.O_RDONLY | O_BINARYWRITE_FLAGS = os.O_WRONLY | os.O_CREAT | os.O_TRUNC | O_BINARYBUFFER_SIZE = 128*1024def copyfile(src, dst):    try:        fin = os.open(src, READ_FLAGS)        stat = os.fstat(fin)        fout = os.open(dst, WRITE_FLAGS, stat.st_mode)        for x in iter(lambda: os.read(fin, BUFFER_SIZE), ""):            os.write(fout, x)    finally:        try: os.close(fin)        except: pass        try: os.close(fout)        except: passdef copytree(src, dst, symlinks=False, ignore=[]):    names = os.listdir(src)    if not os.path.exists(dst):        os.makedirs(dst)    errors = []    for name in names:        if name in ignore:            continue        srcname = os.path.join(src, name)        dstname = os.path.join(dst, name)        try:            if symlinks and os.path.islink(srcname):                linkto = os.readlink(srcname)                os.symlink(linkto, dstname)            elif os.path.isdir(srcname):                copytree(srcname, dstname, symlinks, ignore)            else:                copyfile(srcname, dstname)            # XXX What about devices, sockets etc.?        except (IOError, os.error), why:            errors.append((srcname, dstname, str(why)))        except CTError, err:            errors.extend(err.errors)    if errors:        raise CTError(errors)

This code runs a little bit slower than native linux "cp -rf".

Comparing to shutil the gain for the local storage to tmfps is around 2x-3x and around than 6x for NFS to local storage.

After profiling I've noticed that shutil.copy does lots of fstat syscals which are pretty heavyweight.If one want to optimize further I would suggest to do a single fstat for src and reuse the values. Honestly I didn't go further as I got almost the same figures as native linux copy tool and optimizing for several hundrends of milliseconds wasn't my goal.


You could simply just use the OS you are doing the copy on, for Windows:

from subprocess import callcall(["xcopy", "c:\\file.txt", "n:\\folder\\", "/K/O/X"])

/K - Copies attributes. Typically, Xcopy resets read-only attributes
/O - Copies file ownership and ACL information.
/X - Copies file audit settings (implies /O).


import sysimport subprocessdef copyWithSubprocess(cmd):            proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)cmd=Noneif sys.platform.startswith("darwin"): cmd=['cp', source, dest]elif sys.platform.startswith("win"): cmd=['xcopy', source, dest, '/K/O/X']if cmd: copyWithSubprocess(cmd)