问题
How do I preserve custom properties from xlsx template which I am modifying with openpyxl
? When I save()
workbook using openpyxl
these custom properties vanish!
Custom properties can be found here:-
On Mac -> Go to File Menu in Excel -> Properties ... -> Custom tab -> Properties section
回答1:
I am posting a pure python solution to reading and writing Workbook.CustomDocumentProperties just because I am currently also feeling the pain of not having this in openpyxl, and I needed a quick workaround for a personal automation project.
In fact, I will try to implement this feature (and hopefully later Worksheet.CustomProperties) into openpyxl myself if I can get my head around how to do all the plumbing the library needs: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1003
So for now, here is a workaround, converting the .xlsx to .zip, then reading and writing the .xml files in the zip directly, and then renaming to .xlsx at the end.
To read Workbook.CustomDocumentProperties you can do this - only very slightly modified from this great answer: https://stackoverflow.com/a/46919795/9792594
from lxml import etree as ET
import zipfile
def get_custom_doc_properties(filename):
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main_ns = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
docPr_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/custom-properties}"
docPr_type = "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}" #i4, r8, filetime, bool, lpwstr
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
with zipfile.ZipFile(zip_filename) as zip:
props = zip.open('docProps/custom.xml')
text = props.read()
xml = ET.fromstring(text)
workbook_props = {}
for child in XML:
if (child.tag == f"{docPr_ns}property"):
for cusPr in child:
workbook_props[child.attrib['name']] = cusPr.text
return workbook_props
#call like this:
get_custom_doc_properties(f'./example.xlsx')
And to add one prop to a document which already has custom doc props (and therefore already has a 'docProps/custom.xml' file), is pretty easy and we just append one more custom property to the xml.
(However, if the document had no current custom doc props, then we need to generate the 'docProps/custom.xml' file from scratch, as well as add a content override and a relationship - see code comments):
import os
from lxml import etree as ET
import zipfile
import shutil
import datetime
from tempfile import NamedTemporaryFile
def set_workbook_custom_document_properties(filename, cus_doc_prop_name, cus_doc_prop_val):
if not isinstance(cus_doc_prop_name, str):
print("you must supply a string as the 'cus_doc_prop_name'")
return
if isinstance(cus_doc_prop_val, str):
docPr_type_suffix = "lpwstr"
cus_doc_prop_str = cus_doc_prop_val
elif isinstance(cus_doc_prop_val, int):
docPr_type_suffix = "i4"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, float):
docPr_type_suffix = "r8"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, bool):
docPr_type_suffix = "bool"
cus_doc_prop_str = str(cus_doc_prop_val)
elif isinstance(cus_doc_prop_val, datetime.datetime):
docPr_type_suffix = "filetime"
cus_doc_prop_str = cus_doc_prop_val.strftime("%Y-%m-%dT%H:%M:%SZ")
else:
print("you must supply a string, int, float, bool, or date, as the 'cus_doc_prop_val'")
return
path_file = os.path.abspath(filename)
base, ext = os.path.splitext(path_file)
zip_filename = base + ".zip"
os.rename(path_file, zip_filename)
main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
main_ns = "{%s}" % main
docPr = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
docPr_ns = "{%s}" % docPr
docPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
docPr_type_ns = "{%s}" % docPr_type #i4, r8, filetime, bool, lpwstr
docPr_rel_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
docPr_content_type = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
base_xml = '{dec}<Properties xmlns="{docPr}" xmlns:vt="{docPr_type}"></Properties>'.format(dec=xml_declaration, docPr=docPr, docPr_type=docPr_type).encode('utf-8')
with NamedTemporaryFile() as tmp_file:
tmpname = os.path.basename(tmp_file.name)
with ZipFileMod(zip_filename, 'r') as zip_in:
with ZipFileMod(tmpname, 'w') as zip_out:
zip_out.comment = zip_in.comment # preserve the comment
custom_present = 'docProps/custom.xml' in zip_in.namelist()
for item in zip_in.infolist():
if item.filename == 'docProps/custom.xml':
custom_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '_rels/.rels':
rels_xml = ET.fromstring(zip_in.read(item.filename))
elif custom_present == False and item.filename == '[Content_Types].xml':
content_types_xml = ET.fromstring(zip_in.read(item.filename))
else:
zip_out.writestr(item, zip_in.read(item.filename))
if custom_present:
# if custom.xml is already present we just need to append:
max_pid = 1
for node in custom_xml:
max_pid = max(int(node.attrib['pid']), max_pid)
else:
# if custom.xml is not present, we need to create it
# and also to add an override to [Content_Types].xml
# and also to add a relationship to _rels/.rels
custom_xml = ET.parse(BytesIO(base_xml)).getroot()
max_pid = 1
child_override = ET.SubElement(content_types_xml, "Override")
child_override.attrib['ContentType'] = docPr_content_type
child_override.attrib['PartName'] = '/docProps/custom.xml'
zip_out.writestr('[Content_Types].xml', ET.tostring(content_types_xml))
max_rid = 0
for node in rels_xml:
max_rid = max(int(node.attrib['Id'].replace("rId", "")), max_rid)
child_rel = ET.SubElement(rels_xml, "Relationship")
child_rel.attrib['Type'] = docPr_rel_type
child_rel.attrib['Target'] = 'docProps/custom.xml'
child_rel.attrib['Id'] = "rID" + str(max_rid + 1)
zip_out.writestr('_rels/.rels', ET.tostring(rels_xml))
child = ET.SubElement(custom_xml, "property")
child.attrib['name'] = cus_doc_prop_name
child.attrib['pid'] = str(max_pid + 1)
child.attrib['fmtid'] = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
val = ET.SubElement(child, f"{docPr_type_ns}{docPr_type_suffix}")
val.text = cus_doc_prop_str
print(ET.tostring(custom_xml, pretty_print=True))
zip_out.writestr('docProps/custom.xml', ET.tostring(custom_xml))
zip_out.close()
zip_in.close()
shutil.copyfile(tmpname, zip_filename)
os.rename(zip_filename, path_file)
#call it like this:
set_workbook_custom_document_properties(f'./example.xlsx', "testDocProp7", 2.5)
来源:https://stackoverflow.com/questions/52831565/openpyxl-how-to-preserve-xlsx-custom-properties