openpyxl - How to preserve xlsx custom properties

不想你离开。 提交于 2020-08-08 05:49:28

问题


How do I preserve custom properties from xlsx template which I am modifying with openpyxl? When I save() workbook using openpyxl these custom properties vanish!

Custom properties can be found here:-

On Mac -> Go to File Menu in Excel -> Properties ... -> Custom tab -> Properties section


回答1:


I am posting a pure python solution to reading and writing Workbook.CustomDocumentProperties just because I am currently also feeling the pain of not having this in openpyxl, and I needed a quick workaround for a personal automation project.

In fact, I will try to implement this feature (and hopefully later Worksheet.CustomProperties) into openpyxl myself if I can get my head around how to do all the plumbing the library needs: https://foss.heptapod.net/openpyxl/openpyxl/-/issues/1003

So for now, here is a workaround, converting the .xlsx to .zip, then reading and writing the .xml files in the zip directly, and then renaming to .xlsx at the end.

To read Workbook.CustomDocumentProperties you can do this - only very slightly modified from this great answer: https://stackoverflow.com/a/46919795/9792594

from lxml import etree as ET
import zipfile    

def get_custom_doc_properties(filename):
    path_file = os.path.abspath(filename)
    base, ext = os.path.splitext(path_file)
    zip_filename = base + ".zip"
    os.rename(path_file, zip_filename)

    main_ns = "{http://schemas.openxmlformats.org/spreadsheetml/2006/main}"
    docPr_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/custom-properties}"
    docPr_type = "{http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes}" #i4, r8, filetime, bool, lpwstr
    r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
    cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"

    with zipfile.ZipFile(zip_filename) as zip:
        props = zip.open('docProps/custom.xml')
        text = props.read()
        xml = ET.fromstring(text)
        workbook_props = {}
        for child in XML:
           if (child.tag == f"{docPr_ns}property"):
                for cusPr in child:
                    workbook_props[child.attrib['name']] = cusPr.text
    return workbook_props 

#call like this:
get_custom_doc_properties(f'./example.xlsx')

And to add one prop to a document which already has custom doc props (and therefore already has a 'docProps/custom.xml' file), is pretty easy and we just append one more custom property to the xml.

(However, if the document had no current custom doc props, then we need to generate the 'docProps/custom.xml' file from scratch, as well as add a content override and a relationship - see code comments):

import os
from lxml import etree as ET
import zipfile
import shutil
import datetime
from tempfile import NamedTemporaryFile

def set_workbook_custom_document_properties(filename, cus_doc_prop_name, cus_doc_prop_val):

    if not isinstance(cus_doc_prop_name, str):
        print("you must supply a string as the 'cus_doc_prop_name'")
        return

    if isinstance(cus_doc_prop_val, str):
        docPr_type_suffix = "lpwstr"
        cus_doc_prop_str = cus_doc_prop_val
    elif isinstance(cus_doc_prop_val, int):
        docPr_type_suffix = "i4"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, float):
        docPr_type_suffix = "r8"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, bool):
        docPr_type_suffix = "bool"
        cus_doc_prop_str = str(cus_doc_prop_val)
    elif isinstance(cus_doc_prop_val, datetime.datetime):
        docPr_type_suffix = "filetime"
        cus_doc_prop_str = cus_doc_prop_val.strftime("%Y-%m-%dT%H:%M:%SZ")
    else:
        print("you must supply a string, int, float, bool, or date, as the 'cus_doc_prop_val'")
        return

    path_file = os.path.abspath(filename)
    base, ext = os.path.splitext(path_file)
    zip_filename = base + ".zip"
    os.rename(path_file, zip_filename)

    main = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
    main_ns = "{%s}" % main
    docPr = "http://schemas.openxmlformats.org/officeDocument/2006/custom-properties"
    docPr_ns = "{%s}" % docPr
    docPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"
    docPr_type_ns = "{%s}" % docPr_type #i4, r8, filetime, bool, lpwstr
    docPr_rel_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"
    docPr_content_type = "application/vnd.openxmlformats-officedocument.custom-properties+xml"
    r_ns = "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}"
    cusPr_type = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/customProperty"
    xml_declaration = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>'
    base_xml = '{dec}<Properties xmlns="{docPr}" xmlns:vt="{docPr_type}"></Properties>'.format(dec=xml_declaration, docPr=docPr, docPr_type=docPr_type).encode('utf-8')

    with NamedTemporaryFile() as tmp_file:
        tmpname = os.path.basename(tmp_file.name)
        with ZipFileMod(zip_filename, 'r') as zip_in:
            with ZipFileMod(tmpname, 'w') as zip_out:
                zip_out.comment = zip_in.comment # preserve the comment
                custom_present = 'docProps/custom.xml' in zip_in.namelist()
                for item in zip_in.infolist():
                    if item.filename == 'docProps/custom.xml':
                        custom_xml = ET.fromstring(zip_in.read(item.filename))
                    elif custom_present == False and item.filename == '_rels/.rels':
                        rels_xml = ET.fromstring(zip_in.read(item.filename))
                    elif custom_present == False and item.filename == '[Content_Types].xml':
                        content_types_xml = ET.fromstring(zip_in.read(item.filename))
                    else:
                        zip_out.writestr(item, zip_in.read(item.filename))

                if custom_present:
                    # if custom.xml is already present we just need to append:
                    max_pid = 1
                    for node in custom_xml:
                        max_pid = max(int(node.attrib['pid']), max_pid)
                else:
                    # if custom.xml is not present, we need to create it
                    # and also to add an override to [Content_Types].xml
                    # and also to add a relationship to _rels/.rels
                    custom_xml = ET.parse(BytesIO(base_xml)).getroot()
                    max_pid = 1
                    child_override = ET.SubElement(content_types_xml, "Override")
                    child_override.attrib['ContentType'] = docPr_content_type
                    child_override.attrib['PartName'] = '/docProps/custom.xml'
                    zip_out.writestr('[Content_Types].xml', ET.tostring(content_types_xml))
                    max_rid = 0
                    for node in rels_xml:
                        max_rid = max(int(node.attrib['Id'].replace("rId", "")), max_rid)
                    child_rel = ET.SubElement(rels_xml, "Relationship")
                    child_rel.attrib['Type'] = docPr_rel_type
                    child_rel.attrib['Target'] = 'docProps/custom.xml'
                    child_rel.attrib['Id'] = "rID" + str(max_rid + 1)
                    zip_out.writestr('_rels/.rels', ET.tostring(rels_xml))

                child = ET.SubElement(custom_xml, "property")
                child.attrib['name'] = cus_doc_prop_name
                child.attrib['pid'] = str(max_pid + 1)
                child.attrib['fmtid'] = "{D5CDD505-2E9C-101B-9397-08002B2CF9AE}"
                val = ET.SubElement(child, f"{docPr_type_ns}{docPr_type_suffix}")
                val.text = cus_doc_prop_str
                print(ET.tostring(custom_xml, pretty_print=True))
                zip_out.writestr('docProps/custom.xml', ET.tostring(custom_xml))
                zip_out.close()
            zip_in.close()
        shutil.copyfile(tmpname, zip_filename)
    os.rename(zip_filename, path_file)

#call it like this:
set_workbook_custom_document_properties(f'./example.xlsx', "testDocProp7", 2.5)


来源:https://stackoverflow.com/questions/52831565/openpyxl-how-to-preserve-xlsx-custom-properties

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!