I\'m currently trying ElementTree and it looks fine, it escapes HTML entities and so on and so forth. Am I missing something truly wonderful I haven\'t heard of?
Thi
Another way is using the E Factory builder from lxml (available in Elementtree too)
>>> from lxml import etree
>>> from lxml.builder import E
>>> def CLASS(*args): # class is a reserved word in Python
... return {"class":' '.join(args)}
>>> html = page = (
... E.html( # create an Element called "html"
... E.head(
... E.title("This is a sample document")
... ),
... E.body(
... E.h1("Hello!", CLASS("title")),
... E.p("This is a paragraph with ", E.b("bold"), " text in it!"),
... E.p("This is another paragraph, with a", "\n ",
... E.a("link", href="http://www.python.org"), "."),
... E.p("Here are some reserved characters: <spam&egg>."),
... etree.XML("<p>And finally an embedded XHTML fragment.</p>"),
... )
... )
... )
>>> print(etree.tostring(page, pretty_print=True))
<html>
<head>
<title>This is a sample document</title>
</head>
<body>
<h1 class="title">Hello!</h1>
<p>This is a paragraph with <b>bold</b> text in it!</p>
<p>This is another paragraph, with a
<a href="http://www.python.org">link</a>.</p>
<p>Here are some reservered characters: <spam&egg>.</p>
<p>And finally an embedded XHTML fragment.</p>
</body>
</html>
https://github.com/galvez/xmlwitch:
import xmlwitch
xml = xmlwitch.Builder(version='1.0', encoding='utf-8')
with xml.feed(xmlns='http://www.w3.org/2005/Atom'):
xml.title('Example Feed')
xml.updated('2003-12-13T18:30:02Z')
with xml.author:
xml.name('John Doe')
xml.id('urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6')
with xml.entry:
xml.title('Atom-Powered Robots Run Amok')
xml.id('urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a')
xml.updated('2003-12-13T18:30:02Z')
xml.summary('Some text.')
print(xml)
I assume that you're actually creating an XML DOM tree, because you want to validate that what goes into this file is valid XML, since otherwise you'd just write a static string to a file. If validating your output is indeed your goal, then I'd suggest
from xml.dom.minidom import parseString
doc = parseString("""<html>
<head>
<script type="text/javascript">
var a = 'I love &aacute; letters'
</script>
</head>
<body>
<h1>And I like the fact that 3 > 1</h1>
</body>
</html>""")
with open("foo.xhtml", "w") as f:
f.write( doc.toxml() )
This lets you just write the XML you want to output, validate that it's correct (since parseString will raise an exception if it's invalid) and have your code look much nicer.
Presumably you're not just writing the same static XML every time and want some substitution. In this case I'd have lines like
var a = '%(message)s'
and then use the % operator to do the substitution, like
</html>""" % {"message": "I love &aacute; letters"})
I ended up using saxutils.escape(str) to generate valid XML strings and then validating it with Eli's approach to be sure I didn't miss any tag
from xml.sax import saxutils
from xml.dom.minidom import parseString
from xml.parsers.expat import ExpatError
xml = '''<?xml version="1.0" encoding="%s"?>\n
<contents title="%s" crawl_date="%s" in_text_date="%s"
url="%s">\n<main_post>%s</main_post>\n</contents>''' %
(self.encoding, saxutils.escape(title), saxutils.escape(time),
saxutils.escape(date), saxutils.escape(url), saxutils.escape(contents))
try:
minidoc = parseString(xml)
catch ExpatError:
print "Invalid xml"
Try http://uche.ogbuji.net/tech/4suite/amara. It is quite complete and has a straight forward set of access tools. Normal Unicode support, etc.
#
#Output the XML entry
#
def genFileOLD(out,label,term,idval):
filename=entryTime() + ".html"
writer=MarkupWriter(out, indent=u"yes")
writer.startDocument()
#Test element and attribute writing
ans=namespace=u'http://www.w3.org/2005/Atom'
xns=namespace=u'http://www.w3.org/1999/xhtml'
writer.startElement(u'entry',
ans,
extraNss={u'x':u'http://www.w3.org/1999/xhtml' ,
u'dc':u'http://purl.org/dc/elements/1.1'})
#u'a':u'http://www.w3.org/2005/Atom',
#writer.attribute(u'xml:lang',unicode("en-UK"))
writer.simpleElement(u'title',ans,content=unicode(label))
#writer.simpleElement(u'a:subtitle',ans,content=u' ')
id=unicode("http://www.dpawson.co.uk/nodesets/"+afn.split(".")[0])
writer.simpleElement(u'id',ans,content=id)
writer.simpleElement(u'updated',ans,content=unicode(dtime()))
writer.startElement(u'author',ans)
writer.simpleElement(u'name',ans,content=u'Dave ')
writer.simpleElement(u'uri',ans,
content=u'http://www.dpawson.co.uk/nodesets/'+afn+".xml")
writer.endElement(u'author')
writer.startElement(u'category', ans)
if (prompt):
label=unicode(raw_input("Enter label "))
writer.attribute(u'label',unicode(label))
if (prompt):
term = unicode(raw_input("Enter term to use "))
writer.attribute(u'term', unicode(term))
writer.endElement(u'category')
writer.simpleElement(u'rights',ans,content=u'\u00A9 Dave 2005-2008')
writer.startElement(u'link',ans)
writer.attribute(u'href',
unicode("http://www.dpawson.co.uk/nodesets/entries/"+afn+".html"))
writer.attribute(u'rel',unicode("alternate"))
writer.endElement(u'link')
writer.startElement(u'published', ans)
dt=dtime()
dtu=unicode(dt)
writer.text(dtu)
writer.endElement(u'published')
writer.simpleElement(u'summary',ans,content=unicode(label))
writer.startElement(u'content',ans)
writer.attribute(u'type',unicode("xhtml"))
writer.startElement(u'div',xns)
writer.simpleElement(u'h3',xns,content=unicode(label))
writer.endElement(u'div')
writer.endElement(u'content')
writer.endElement(u'entry')
For anyone encountering this now, there's actually a way to do this hidden away in Python's standard library in xml.sax.utils.XMLGenerator. Here's an example of it in action:
>>> from xml.sax.saxutils import XMLGenerator
>>> import StringIO
>>> w = XMLGenerator(out, 'utf-8')
>>> w.startDocument()
>>> w.startElement("test", {'bar': 'baz'})
>>> w.characters("Foo")
>>> w.endElement("test")
>>> w.endDocument()
>>> print out.getvalue()
<?xml version="1.0" encoding="utf-8"?>
<test bar="baz">Foo</test>