I have a project where I need to fill out pre-made PDFs and the most logical solution that comes to mind to accomplish this is to make the pre-made PDFs into PDF forms so there are tags where input values are supposed to go, then I can look through the form tags in the PDF and line them up with a dictionary of values.
I have accomplished this using PyPDF2. Overall, I took an image of a web form and then opened Acrobat and created a PDF form based on the fields seen in the image, then used PyPDF2 for filling out the PDF form fields, but the caveat is that printing those filled in values seems buggy in some browsers, Firefox being one.
How do I got about converting my PDF-Form to a standard/flat PDF so I can keep the pre-populated values, but lose the editable fields (as I think this is the issue)?
from io import BytesIO
import PyPDF2
from django.http import HttpResponse
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject
def pdf_view(request):
template = 'templates/template.pdf'
outfile = "templates/test.pdf"
input_stream = open(template, "rb")
pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
if "/AcroForm" in pdf_reader.trailer["/Root"]:
pdf_reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_writer = PyPDF2.PdfFileWriter()
set_need_appearances_writer(pdf_writer)
if "/AcroForm" in pdf_writer._root_object:
# Acro form is form field, set needs appearances to fix printing issues
pdf_writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
data_dict = {
'first_name': 'John',
'last_name': 'Smith',
'email': 'mail@mail.com',
'phone': '889-998-9967',
'company': 'Amazing Inc.',
'job_title': 'Dev',
'street': '123 Main Way',
'city': 'Johannesburg',
'state': 'New Mexico',
'zip': 96705,
'country': 'USA',
'topic': 'Who cares...'
}
pdf_writer.addPage(pdf_reader.getPage(0))
pdf_writer.updatePageFormFieldValues(pdf_writer.getPage(0), data_dict)
output_stream = BytesIO()
pdf_writer.write(output_stream)
# print(fill_in_pdf(template, data_dict).getvalue())
# fill_in_pdf(template, data_dict).getvalue()
response = HttpResponse(output_stream.getvalue(), content_type='application/pdf')
response['Content-Disposition'] = 'inline; filename="completed.pdf"'
input_stream.close()
return response
def set_need_appearances_writer(writer):
try:
catalog = writer._root_object
# get the AcroForm tree and add "/NeedAppearances attribute
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
The solution was super simple, read the docs when in doubt (page 552/978):
https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/pdf_reference_archives/PDFReference.pdf
All I needed to do was change the bit position of the field flags to 1, making the field ReadOnly, like so:
from io import BytesIO
import PyPDF2
from django.http import HttpResponse
from PyPDF2.generic import BooleanObject, NameObject, IndirectObject, NumberObject
def pdf(request):
template = 'templates/template.pdf'
outfile = "templates/test.pdf"
input_stream = open(template, "rb")
pdf_reader = PyPDF2.PdfFileReader(input_stream, strict=False)
if "/AcroForm" in pdf_reader.trailer["/Root"]:
pdf_reader.trailer["/Root"]["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
pdf_writer = PyPDF2.PdfFileWriter()
set_need_appearances_writer(pdf_writer)
if "/AcroForm" in pdf_writer._root_object:
# Acro form is form field, set needs appearances to fix printing issues
pdf_writer._root_object["/AcroForm"].update(
{NameObject("/NeedAppearances"): BooleanObject(True)})
data_dict = {
'first_name': 'John\n',
'last_name': 'Smith\n',
'email': 'mail@mail.com\n',
'phone': '889-998-9967\n',
'company': 'Amazing Inc.\n',
'job_title': 'Dev\n',
'street': '123 Main Way\n',
'city': 'Johannesburg\n',
'state': 'New Mexico\n',
'zip': 96705,
'country': 'USA\n',
'topic': 'Who cares...\n'
}
pdf_writer.addPage(pdf_reader.getPage(0))
page = pdf_writer.getPage(0)
pdf_writer.updatePageFormFieldValues(page, data_dict)
for j in range(0, len(page['/Annots'])):
writer_annot = page['/Annots'][j].getObject()
for field in data_dict:
# -----------------------------------------------------BOOYAH!
if writer_annot.get('/T') == field:
writer_annot.update({
NameObject("/Ff"): NumberObject(1)
})
# -----------------------------------------------------
output_stream = BytesIO()
pdf_writer.write(output_stream)
response = HttpResponse(output_stream.getvalue(), content_type='application/pdf')
response['Content-Disposition'] = 'inline; filename="completed.pdf"'
input_stream.close()
return response
def set_need_appearances_writer(writer):
try:
catalog = writer._root_object
# get the AcroForm tree and add "/NeedAppearances attribute
if "/AcroForm" not in catalog:
writer._root_object.update({
NameObject("/AcroForm"): IndirectObject(len(writer._objects), 0, writer)})
need_appearances = NameObject("/NeedAppearances")
writer._root_object["/AcroForm"][need_appearances] = BooleanObject(True)
except Exception as e:
print('set_need_appearances_writer() catch : ', repr(e))
return writer
来源:https://stackoverflow.com/questions/55187651/python-django-pdf-flattening-of-form-fields