Handling large file uploads with Flask Handling large file uploads with Flask flask flask

Handling large file uploads with Flask


I think the super simple way to get around that simply sends the file in lots of small parts/chunks. So there are going to be two parts to making this work, the front-end (website) and backend (server).For the front-end part, you can use something like Dropzone.js which has no additional dependencies and decent CSS included. All you have to do is add the class dropzone to a form and it automatically turns it into one of their special drag and drop fields (you can also click and select).

However, by default, dropzone does not chunk files. Luckily, it is really easy to enable. Here's a sample file upload form with DropzoneJS and chunking enabled:

<html lang="en"><head>    <meta charset="UTF-8">    <link rel="stylesheet"      href="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.4.0/min/dropzone.min.css"/>    <link rel="stylesheet"      href="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.4.0/min/basic.min.css"/>    <script type="application/javascript"      src="https://cdnjs.cloudflare.com/ajax/libs/dropzone/5.4.0/min/dropzone.min.js">    </script>    <title>File Dropper</title></head><body><form method="POST" action='/upload' class="dropzone dz-clickable"       id="dropper" enctype="multipart/form-data"></form><script type="application/javascript">    Dropzone.options.dropper = {        paramName: 'file',        chunking: true,        forceChunking: true,        url: '/upload',        maxFilesize: 1025, // megabytes        chunkSize: 1000000 // bytes    }</script></body></html>

And Here's the Back-end part using flask:

import loggingimport osfrom flask import render_template, Blueprint, request, make_responsefrom werkzeug.utils import secure_filenamefrom pydrop.config import configblueprint = Blueprint('templated', __name__, template_folder='templates')log = logging.getLogger('pydrop')@blueprint.route('/')@blueprint.route('/index')def index():    # Route to serve the upload form    return render_template('index.html',                           page_name='Main',                           project_name="pydrop")@blueprint.route('/upload', methods=['POST'])def upload():    file = request.files['file']    save_path = os.path.join(config.data_dir, secure_filename(file.filename))    current_chunk = int(request.form['dzchunkindex'])    # If the file already exists it's ok if we are appending to it,    # but not if it's new file that would overwrite the existing one    if os.path.exists(save_path) and current_chunk == 0:        # 400 and 500s will tell dropzone that an error occurred and show an error        return make_response(('File already exists', 400))    try:        with open(save_path, 'ab') as f:            f.seek(int(request.form['dzchunkbyteoffset']))            f.write(file.stream.read())    except OSError:        # log.exception will include the traceback so we can see what's wrong         log.exception('Could not write to file')        return make_response(("Not sure why,"                              " but we couldn't write the file to disk", 500))    total_chunks = int(request.form['dztotalchunkcount'])    if current_chunk + 1 == total_chunks:        # This was the last chunk, the file should be complete and the size we expect        if os.path.getsize(save_path) != int(request.form['dztotalfilesize']):            log.error(f"File {file.filename} was completed, "                      f"but has a size mismatch."                      f"Was {os.path.getsize(save_path)} but we"                      f" expected {request.form['dztotalfilesize']} ")            return make_response(('Size mismatch', 500))        else:            log.info(f'File {file.filename} has been uploaded successfully')    else:        log.debug(f'Chunk {current_chunk + 1} of {total_chunks} '                  f'for file {file.filename} complete')    return make_response(("Chunk upload successful", 200))


Use copy_current_request_context,it will duplicate the context request.so you can use thread or anything else to make your task running background.

maybe an example will make it be clear.i have test it by a 3.37G file-debian-9.5.0-amd64-DVD-1.iso.

# coding:utf-8from flask import Flask,render_template,request,redirect,url_forfrom werkzeug.utils import secure_filenameimport osfrom time import sleepfrom flask import copy_current_request_contextimport threadingimport datetimeapp = Flask(__name__)@app.route('/upload', methods=['POST','GET'])def upload():    @copy_current_request_context    def save_file(closeAfterWrite):        print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " i am doing")        f = request.files['file']        basepath = os.path.dirname(__file__)         upload_path = os.path.join(basepath, '',secure_filename(f.filename))         f.save(upload_path)        closeAfterWrite()        print(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + " write done")    def passExit():        pass    if request.method == 'POST':        f= request.files['file']        normalExit = f.stream.close        f.stream.close = passExit        t = threading.Thread(target=save_file,args=(normalExit,))        t.start()        return redirect(url_for('upload'))    return render_template('upload.html')if __name__ == '__main__':    app.run(debug=True)

this is tempalte,it should be templates\upload.html

<!DOCTYPE html><html lang="en"><head>    <meta charset="UTF-8">    <title>Title</title></head><body>    <h1>example</h1>    <form action="" enctype='multipart/form-data' method='POST'>        <input type="file" name="file">        <input type="submit" value="upload">    </form></body></html>


When uploading a file, you just can't leave the page and have it continue. The page has to stay opened in order to continue the upload.

Something you could do is open a new tab just for handling the upload and alerting the user when they inadvertently close the new tab before the upload finishes. That way the upload will be separate from whatever the user is doing on the original page so they can still navigate without cancelling the upload. The upload tab can also just close itself when it finishes.

index.js

    // get value from <input id="upload" type="file"> on page    var upload = document.getElementById('upload');    upload.addEventListener('input', function () {        // open new tab and stick the selected file in it        var file = upload.files[0];        var uploadTab = window.open('/upload-page', '_blank');        if (uploadTab) {            uploadTab.file = file;        } else {            alert('Failed to open new tab');        }    });

upload-page.js

    window.addEventListener('beforeunload', function () {        return 'The upload will cancel if you leave the page, continue?';    });    window.addEventListener('load', function () {        var req = new XMLHttpRequest();        req.addEventListener('progress', function (evt) {            var percentage = '' + (evt.loaded / evt.total * 100) + '%';            // use percentage to update progress bar or something        });        req.addEventListener('load', function () {            alert('Upload Finished');            window.removeEventListener('beforeunload');            window.close();        });        req.addRequestHeader('Content-Type', 'application/octet-stream');        req.open('POST', '/upload/'+encodeURIComponent(window.file.name));        req.send(window.file);    });

On the server, you can use request.stream to read the uploaded file in chunks to avoid having to wait for the entire thing to load up in memory first.

server.py

@app('/upload/<filename>', methods=['POST'])def upload(filename):    filename = urllib.parse.unquote(filename)    bytes_left = int(request.headers.get('content-length'))    with open(os.path.join('uploads', filename), 'wb') as upload:        chunk_size = 5120        while bytes_left > 0:            chunk = request.stream.read(chunk_size)            upload.write(chunk)            bytes_left -= len(chunk)        return make_response('Upload Complete', 200)

You might be able to use FormData api instead of an octet-stream, but I'm not sure if you can stream those in flask.