Batch fill PDF forms from python or bash Batch fill PDF forms from python or bash python python

Batch fill PDF forms from python or bash


For Python you'll need the fdfgen lib and pdftk

@Hugh Bothwell's comment is 100% correct so I'll extend that answer with a working implementation.

If you're in windows you'll also need to make sure both python and pdftk are contained in the system path (unless you want to use long folder names).

Here's the code to auto-batch-fill a collection of PDF forms from a CSV data file:

import csvfrom fdfgen import forge_fdfimport osimport syssys.path.insert(0, os.getcwd())filename_prefix = "NVC"csv_file = "NVC.csv"pdf_file = "NVC.pdf"tmp_file = "tmp.fdf"output_folder = './output/'def process_csv(file):    headers = []    data =  []    csv_data = csv.reader(open(file))    for i, row in enumerate(csv_data):      if i == 0:        headers = row        continue;      field = []      for i in range(len(headers)):        field.append((headers[i], row[i]))      data.append(field)    return datadef form_fill(fields):  fdf = forge_fdf("",fields,[],[],[])  fdf_file = open(tmp_file,"w")  fdf_file.write(fdf)  fdf_file.close()  output_file = '{0}{1} {2}.pdf'.format(output_folder, filename_prefix, fields[1][1])  cmd = 'pdftk "{0}" fill_form "{1}" output "{2}" dont_ask'.format(pdf_file, tmp_file, output_file)  os.system(cmd)  os.remove(tmp_file)data = process_csv(csv_file)print('Generating Forms:')print('-----------------------')for i in data:  if i[0][1] == 'Yes':    continue  print('{0} {1} created...'.format(filename_prefix, i[1][1]))  form_fill(i)

Note: It shouldn't be rocket-surgery to figure out how to customize this. The initial variable declarations contain the custom configuration.

In the CSV, in the first row each column will contain the name of the corresponding field name in the PDF file. Any columns that don't have corresponding fields in the template will be ignored.

In the PDF template, just create editable fields where you want your data to fill and make sure the names match up with the CSV data.

For this specific configuration, just put this file in the same folder as your NVC.csv, NVC.pdf, and a folder named 'output'. Run it and it automagically does the rest.


Much faster version, no pdftk nor fdfgen needed, pure Python 3.6+:

# -*- coding: utf-8 -*-from collections import OrderedDictfrom PyPDF2 import PdfFileWriter, PdfFileReaderdef _getFields(obj, tree=None, retval=None, fileobj=None):    """    Extracts field data if this PDF contains interactive form fields.    The *tree* and *retval* parameters are for recursive use.    :param fileobj: A file object (usually a text file) to write        a report to on all interactive form fields found.    :return: A dictionary where each key is a field name, and each        value is a :class:`Field<PyPDF2.generic.Field>` object. By        default, the mapping name is used for keys.    :rtype: dict, or ``None`` if form data could not be located.    """    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}    if retval is None:        retval = OrderedDict()        catalog = obj.trailer["/Root"]        # get the AcroForm tree        if "/AcroForm" in catalog:            tree = catalog["/AcroForm"]        else:            return None    if tree is None:        return retval    obj._checkKids(tree, retval, fileobj)    for attr in fieldAttributes:        if attr in tree:            # Tree is a field            obj._buildField(tree, retval, fileobj, fieldAttributes)            break    if "/Fields" in tree:        fields = tree["/Fields"]        for f in fields:            field = f.getObject()            obj._buildField(field, retval, fileobj, fieldAttributes)    return retvaldef get_form_fields(infile):    infile = PdfFileReader(open(infile, 'rb'))    fields = _getFields(infile)    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())def update_form_values(infile, outfile, newvals=None):    pdf = PdfFileReader(open(infile, 'rb'))    writer = PdfFileWriter()    for i in range(pdf.getNumPages()):        page = pdf.getPage(i)        try:            if newvals:                writer.updatePageFormFieldValues(page, newvals)            else:                writer.updatePageFormFieldValues(page,                                                 {k: f'#{i} {k}={v}'                                                  for i, (k, v) in enumerate(get_form_fields(infile).items())                                                  })            writer.addPage(page)        except Exception as e:            print(repr(e))            writer.addPage(page)    with open(outfile, 'wb') as out:        writer.write(out)if __name__ == '__main__':    from pprint import pprint    pdf_file_name = '2PagesFormExample.pdf'    pprint(get_form_fields(pdf_file_name))    update_form_values(pdf_file_name, 'out-' + pdf_file_name)  # enumerate & fill the fields with their own names    update_form_values(pdf_file_name, 'out2-' + pdf_file_name,                       {'my_fieldname_1': 'My Value',                        'my_fieldname_2': 'My Another 💎alue'})  # update the form fields


Replace Original File

os.system('pdftk "original.pdf" fill_form "data.fdf" output "output.pdf"')os.remove("data.fdf")os.remove("original.pdf")os.rename("output.pdf","original.pdf")