XML writing tools for Python

后端 未结 8 1009
眼角桃花
眼角桃花 2020-12-23 09:30

I\'m currently trying ElementTree and it looks fine, it escapes HTML entities and so on and so forth. Am I missing something truly wonderful I haven\'t heard of?

Thi

相关标签:
8条回答
  • 2020-12-23 09:56

    Another way is using the E Factory builder from lxml (available in Elementtree too)

    >>> from lxml import etree
    
    >>> from lxml.builder import E
    
    >>> def CLASS(*args): # class is a reserved word in Python
    ...     return {"class":' '.join(args)}
    
    >>> html = page = (
    ...   E.html(       # create an Element called "html"
    ...     E.head(
    ...       E.title("This is a sample document")
    ...     ),
    ...     E.body(
    ...       E.h1("Hello!", CLASS("title")),
    ...       E.p("This is a paragraph with ", E.b("bold"), " text in it!"),
    ...       E.p("This is another paragraph, with a", "\n      ",
    ...         E.a("link", href="http://www.python.org"), "."),
    ...       E.p("Here are some reserved characters: <spam&egg>."),
    ...       etree.XML("<p>And finally an embedded XHTML fragment.</p>"),
    ...     )
    ...   )
    ... )
    
    >>> print(etree.tostring(page, pretty_print=True))
    <html>
      <head>
        <title>This is a sample document</title>
      </head>
      <body>
        <h1 class="title">Hello!</h1>
        <p>This is a paragraph with <b>bold</b> text in it!</p>
        <p>This is another paragraph, with a
          <a href="http://www.python.org">link</a>.</p>
        <p>Here are some reservered characters: &lt;spam&amp;egg&gt;.</p>
        <p>And finally an embedded XHTML fragment.</p>
      </body>
    </html>
    
    0 讨论(0)
  • 2020-12-23 09:59

    https://github.com/galvez/xmlwitch:

    import xmlwitch
    xml = xmlwitch.Builder(version='1.0', encoding='utf-8')
    with xml.feed(xmlns='http://www.w3.org/2005/Atom'):
        xml.title('Example Feed')
        xml.updated('2003-12-13T18:30:02Z')
        with xml.author:
            xml.name('John Doe')
        xml.id('urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6')
        with xml.entry:
            xml.title('Atom-Powered Robots Run Amok')
            xml.id('urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
            xml.updated('2003-12-13T18:30:02Z')
            xml.summary('Some text.')
    print(xml)
    
    0 讨论(0)
  • 2020-12-23 10:00

    I assume that you're actually creating an XML DOM tree, because you want to validate that what goes into this file is valid XML, since otherwise you'd just write a static string to a file. If validating your output is indeed your goal, then I'd suggest

    from xml.dom.minidom import parseString
    
    doc = parseString("""<html>
        <head>
            <script type="text/javascript">
                var a = 'I love &amp;aacute; letters'
            </script>
        </head>
        <body>
            <h1>And I like the fact that 3 &gt; 1</h1>
        </body>
        </html>""")
    
    with open("foo.xhtml", "w") as f:
        f.write( doc.toxml() )
    

    This lets you just write the XML you want to output, validate that it's correct (since parseString will raise an exception if it's invalid) and have your code look much nicer.

    Presumably you're not just writing the same static XML every time and want some substitution. In this case I'd have lines like

    var a = '%(message)s'
    

    and then use the % operator to do the substitution, like

    </html>""" % {"message": "I love &amp;aacute; letters"})
    
    0 讨论(0)
  • 2020-12-23 10:07

    I ended up using saxutils.escape(str) to generate valid XML strings and then validating it with Eli's approach to be sure I didn't miss any tag

    from xml.sax import saxutils
    from xml.dom.minidom import parseString
    from xml.parsers.expat import ExpatError
    
    xml = '''<?xml version="1.0" encoding="%s"?>\n
    <contents title="%s" crawl_date="%s" in_text_date="%s" 
    url="%s">\n<main_post>%s</main_post>\n</contents>''' %
    (self.encoding, saxutils.escape(title), saxutils.escape(time), 
    saxutils.escape(date), saxutils.escape(url), saxutils.escape(contents))
    try:
        minidoc = parseString(xml)
    catch ExpatError:
        print "Invalid xml"
    
    0 讨论(0)
  • 2020-12-23 10:16

    Try http://uche.ogbuji.net/tech/4suite/amara. It is quite complete and has a straight forward set of access tools. Normal Unicode support, etc.

    #
    #Output the XML entry
    #
    def genFileOLD(out,label,term,idval):
        filename=entryTime() + ".html"
        writer=MarkupWriter(out, indent=u"yes")
        writer.startDocument()
        #Test element and attribute writing
        ans=namespace=u'http://www.w3.org/2005/Atom'
        xns=namespace=u'http://www.w3.org/1999/xhtml'
        writer.startElement(u'entry',
           ans,
           extraNss={u'x':u'http://www.w3.org/1999/xhtml' ,
                     u'dc':u'http://purl.org/dc/elements/1.1'})
        #u'a':u'http://www.w3.org/2005/Atom',
        #writer.attribute(u'xml:lang',unicode("en-UK"))
    
        writer.simpleElement(u'title',ans,content=unicode(label))
        #writer.simpleElement(u'a:subtitle',ans,content=u' ')
        id=unicode("http://www.dpawson.co.uk/nodesets/"+afn.split(".")[0])
        writer.simpleElement(u'id',ans,content=id)
        writer.simpleElement(u'updated',ans,content=unicode(dtime()))
        writer.startElement(u'author',ans)
        writer.simpleElement(u'name',ans,content=u'Dave ')
        writer.simpleElement(u'uri',ans,
          content=u'http://www.dpawson.co.uk/nodesets/'+afn+".xml")
        writer.endElement(u'author')
        writer.startElement(u'category', ans)
        if (prompt):
            label=unicode(raw_input("Enter label "))
        writer.attribute(u'label',unicode(label))
        if (prompt):
            term = unicode(raw_input("Enter term to use "))
        writer.attribute(u'term', unicode(term))
        writer.endElement(u'category')
        writer.simpleElement(u'rights',ans,content=u'\u00A9 Dave 2005-2008')
        writer.startElement(u'link',ans)
        writer.attribute(u'href',
             unicode("http://www.dpawson.co.uk/nodesets/entries/"+afn+".html"))
        writer.attribute(u'rel',unicode("alternate"))
        writer.endElement(u'link')
        writer.startElement(u'published', ans)
        dt=dtime()
        dtu=unicode(dt)
        writer.text(dtu)
        writer.endElement(u'published')
        writer.simpleElement(u'summary',ans,content=unicode(label))
        writer.startElement(u'content',ans)
        writer.attribute(u'type',unicode("xhtml"))
        writer.startElement(u'div',xns)
        writer.simpleElement(u'h3',xns,content=unicode(label))
        writer.endElement(u'div')
        writer.endElement(u'content')
        writer.endElement(u'entry')
    
    0 讨论(0)
  • 2020-12-23 10:21

    For anyone encountering this now, there's actually a way to do this hidden away in Python's standard library in xml.sax.utils.XMLGenerator. Here's an example of it in action:

    >>> from xml.sax.saxutils import XMLGenerator
    >>> import StringIO
    >>> w = XMLGenerator(out, 'utf-8')
    >>> w.startDocument()
    >>> w.startElement("test", {'bar': 'baz'})
    >>> w.characters("Foo")
    >>> w.endElement("test")
    >>> w.endDocument()
    >>> print out.getvalue()
    <?xml version="1.0" encoding="utf-8"?>
    <test bar="baz">Foo</test>
    
    0 讨论(0)
提交回复
热议问题