Batch fill PDF forms from python or bash

For Python you'll need the fdfgen lib and pdftk

@Hugh Bothwell's comment is 100% correct so I'll extend that answer with a working implementation.

If you're in windows you'll also need to make sure both python and pdftk are contained in the system path (unless you want to use long folder names).

Here's the code to auto-batch-fill a collection of PDF forms from a CSV data file:

import csvfrom fdfgen import forge_fdfimport osimport syssys.path.insert(0, os.getcwd())filename_prefix = "NVC"csv_file = "NVC.csv"pdf_file = "NVC.pdf"tmp_file = "tmp.fdf"output_folder = './output/'def process_csv(file):    headers = []    data =  []    csv_data = csv.reader(open(file))    for i, row in enumerate(csv_data):      if i == 0:        headers = row        continue;      field = []      for i in range(len(headers)):        field.append((headers[i], row[i]))      data.append(field)    return datadef form_fill(fields):  fdf = forge_fdf("",fields,[],[],[])  fdf_file = open(tmp_file,"w")  fdf_file.write(fdf)  fdf_file.close()  output_file = '{0}{1} {2}.pdf'.format(output_folder, filename_prefix, fields[1][1])  cmd = 'pdftk "{0}" fill_form "{1}" output "{2}" dont_ask'.format(pdf_file, tmp_file, output_file)  os.system(cmd)  os.remove(tmp_file)data = process_csv(csv_file)print('Generating Forms:')print('-----------------------')for i in data:  if i[0][1] == 'Yes':    continue  print('{0} {1} created...'.format(filename_prefix, i[1][1]))  form_fill(i)

Note: It shouldn't be rocket-surgery to figure out how to customize this. The initial variable declarations contain the custom configuration.

In the CSV, in the first row each column will contain the name of the corresponding field name in the PDF file. Any columns that don't have corresponding fields in the template will be ignored.

In the PDF template, just create editable fields where you want your data to fill and make sure the names match up with the CSV data.

For this specific configuration, just put this file in the same folder as your NVC.csv, NVC.pdf, and a folder named 'output'. Run it and it automagically does the rest.

python forms pdf automation

Much faster version, no pdftk nor fdfgen needed, pure Python 3.6+:

# -*- coding: utf-8 -*-from collections import OrderedDictfrom PyPDF2 import PdfFileWriter, PdfFileReaderdef _getFields(obj, tree=None, retval=None, fileobj=None):    """    Extracts field data if this PDF contains interactive form fields.    The *tree* and *retval* parameters are for recursive use.    :param fileobj: A file object (usually a text file) to write        a report to on all interactive form fields found.    :return: A dictionary where each key is a field name, and each        value is a :class:`Field<PyPDF2.generic.Field>` object. By        default, the mapping name is used for keys.    :rtype: dict, or ``None`` if form data could not be located.    """    fieldAttributes = {'/FT': 'Field Type', '/Parent': 'Parent', '/T': 'Field Name', '/TU': 'Alternate Field Name',                       '/TM': 'Mapping Name', '/Ff': 'Field Flags', '/V': 'Value', '/DV': 'Default Value'}    if retval is None:        retval = OrderedDict()        catalog = obj.trailer["/Root"]        # get the AcroForm tree        if "/AcroForm" in catalog:            tree = catalog["/AcroForm"]        else:            return None    if tree is None:        return retval    obj._checkKids(tree, retval, fileobj)    for attr in fieldAttributes:        if attr in tree:            # Tree is a field            obj._buildField(tree, retval, fileobj, fieldAttributes)            break    if "/Fields" in tree:        fields = tree["/Fields"]        for f in fields:            field = f.getObject()            obj._buildField(field, retval, fileobj, fieldAttributes)    return retvaldef get_form_fields(infile):    infile = PdfFileReader(open(infile, 'rb'))    fields = _getFields(infile)    return OrderedDict((k, v.get('/V', '')) for k, v in fields.items())def update_form_values(infile, outfile, newvals=None):    pdf = PdfFileReader(open(infile, 'rb'))    writer = PdfFileWriter()    for i in range(pdf.getNumPages()):        page = pdf.getPage(i)        try:            if newvals:                writer.updatePageFormFieldValues(page, newvals)            else:                writer.updatePageFormFieldValues(page,                                                 {k: f'#{i} {k}={v}'                                                  for i, (k, v) in enumerate(get_form_fields(infile).items())                                                  })            writer.addPage(page)        except Exception as e:            print(repr(e))            writer.addPage(page)    with open(outfile, 'wb') as out:        writer.write(out)if __name__ == '__main__':    from pprint import pprint    pdf_file_name = '2PagesFormExample.pdf'    pprint(get_form_fields(pdf_file_name))    update_form_values(pdf_file_name, 'out-' + pdf_file_name)  # enumerate & fill the fields with their own names    update_form_values(pdf_file_name, 'out2-' + pdf_file_name,                       {'my_fieldname_1': 'My Value',                        'my_fieldname_2': 'My Another 💎alue'})  # update the form fields

python forms pdf automation

Replace Original File

os.system('pdftk "original.pdf" fill_form "data.fdf" output "output.pdf"')os.remove("data.fdf")os.remove("original.pdf")os.rename("output.pdf","original.pdf")

CodeHunter

Batch fill PDF forms from python or bash

Recent Posts

How can I color dots in a xy scatterplot according to column value?

How to update a claim in ASP.NET Identity?

What does {0} mean when initializing an object?

Accessing members of items in a JSONArray with Java

How to log SQL statements in Spring Boot?

Powershell Get-WebSite name parameter is ignored

How to detect scroll to bottom of html element

Java synchronized method

How to test controllers with CodeIgniter?

Detect Visual Composer

Matplotlib: Specify format of floats for tick labels

Rails join a list of strings with commas and "and" before the last