问题
Our program displays a tree control showing the metadata structure of the XML file they are using as a datasource. So it displays all elements & attributes in use in the XML file, like this:
Employees
Employee
FirstName
LastName
Orders
Order
OrderId
For the case where the user does not pass us a XSD file, we need to walk the XML file and build up the metadata structure.
The full code for this is at SaxonQuestions.zip, TestBuildTree.java and is also listed below.
I am concerned that my code is not the most efficient, or maybe even wrong. It works, but it works on the 3 XML files I tested it on. My questions are:
- What is the best way to get the root element from the DOM? Is it walking the children of the DOM root node?
- What is the best way to determine if an element has data (as opposed to just child elements)? The best I could come up with throws an exception if not and I don't think code executing the happy path should throw an exception.
- What is the best way to get the class of the data held in an element or attribute? Is it: ((XdmAtomicValue)((XdmNode)currentNode).getTypedValue()).getValue().getClass();
- Is the best way to walk all the nodes to use XdmNode.axisIterator? And to do so as I have in this code?
TestBuildTree.java
import net.sf.saxon.s9api.*;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
public class TestBuildTree {
public static void main(String[] args) throws Exception {
XmlDatasource datasource = new XmlDatasource(
new FileInputStream(new File("files", "SouthWind.xml").getCanonicalPath()),
null);
// Question:
// Is this the best way to get the root element?
// get the root element
XdmNode rootNode = null;
for (XdmNode node : datasource.getXmlRootNode().children()) {
if (node.getNodeKind() == XdmNodeKind.ELEMENT) {
rootNode = node;
break;
}
}
TestBuildTree buildTree = new TestBuildTree(rootNode);
Element root = buildTree.addNode();
System.out.println("Schema:");
printElement("", root);
}
private static void printElement(String indent, Element element) {
System.out.println(indent + "<" + element.name + "> (" + (element.type == null ? "null" : element.type.getSimpleName()) + ")");
indent += " ";
for (Attribute attr : element.attributes)
System.out.println(indent + "=" + attr.name + " (" + (attr.type == null ? "null" : attr.type.getSimpleName()) + ")");
for (Element child : element.children)
printElement(indent, child);
}
protected XdmItem currentNode;
public TestBuildTree(XdmItem currentNode) {
this.currentNode = currentNode;
}
private Element addNode() throws SaxonApiException {
String name = ((XdmNode)currentNode).getNodeName().getLocalName();
// Question:
// Is this the best way to determine that this element has data (as opposed to child elements)?
Boolean elementHasData;
try {
((XdmNode) currentNode).getTypedValue();
elementHasData = true;
} catch (Exception ex) {
elementHasData = false;
}
// Questions:
// Is this the best way to get the type of the element value?
// If no schema is it always String?
Class valueClass = ! elementHasData ? null : ((XdmAtomicValue)((XdmNode)currentNode).getTypedValue()).getValue().getClass();
Element element = new Element(name, valueClass, null);
// add in attributes
XdmSequenceIterator currentSequence;
if ((currentSequence = moveTo(Axis.ATTRIBUTE)) != null) {
do {
name = ((XdmNode) currentNode).getNodeName().getLocalName();
// Questions:
// Is this the best way to get the type of the attribute value?
// If no schema is it always String?
valueClass = ((XdmAtomicValue)((XdmNode)currentNode).getTypedValue()).getValue().getClass();
Attribute attr = new Attribute(name, valueClass, null);
element.attributes.add(attr);
} while (moveToNextInCurrentSequence(currentSequence));
moveTo(Axis.PARENT);
}
// add in children elements
if ((currentSequence = moveTo(Axis.CHILD)) != null) {
do {
Element child = addNode();
// if we don't have this, add it
Element existing = element.getChildByName(child.name);
if (existing == null)
element.children.add(child);
else
// add in any children this does not have
existing.addNewItems (child);
} while (moveToNextInCurrentSequence(currentSequence));
moveTo(Axis.PARENT);
}
return element;
}
// moves to element or attribute
private XdmSequenceIterator moveTo(Axis axis) {
XdmSequenceIterator en = ((XdmNode) currentNode).axisIterator(axis);
boolean gotIt = false;
while (en.hasNext()) {
currentNode = en.next();
if (((XdmNode) currentNode).getNodeKind() == XdmNodeKind.ELEMENT || ((XdmNode) currentNode).getNodeKind() == XdmNodeKind.ATTRIBUTE) {
gotIt = true;
break;
}
}
if (gotIt) {
if (axis == Axis.ATTRIBUTE || axis == Axis.CHILD || axis == Axis.NAMESPACE)
return en;
return null;
}
return null;
}
// moves to next element or attribute
private Boolean moveToNextInCurrentSequence(XdmSequenceIterator currentSequence)
{
if (currentSequence == null)
return false;
while (currentSequence.hasNext())
{
currentNode = currentSequence.next();
if (((XdmNode)currentNode).getNodeKind() == XdmNodeKind.ELEMENT || ((XdmNode)currentNode).getNodeKind() == XdmNodeKind.ATTRIBUTE)
return true;
}
return false;
}
static class Node {
String name;
Class type;
String description;
public Node(String name, Class type, String description) {
this.name = name;
this.type = type;
this.description = description;
}
}
static class Element extends Node {
List<Element> children;
List<Attribute> attributes;
public Element(String name, Class type, String description) {
super(name, type, description);
children = new ArrayList<>();
attributes = new ArrayList<>();
}
public Element getChildByName(String name) {
for (Element child : children) {
if (child.name.equals(name))
return child;
}
return null;
}
public void addNewItems(Element child) {
for (Attribute attrAdd : child.attributes) {
boolean haveIt = false;
for (Attribute attrExist : attributes)
if (attrExist.name.equals(attrAdd.name)) {
haveIt = true;
break;
}
if (!haveIt)
attributes.add(attrAdd);
}
for (Element elemAdd : child.children) {
Element exist = null;
for (Element elemExist : children)
if (elemExist.name.equals(elemAdd.name)) {
exist = elemExist;
break;
}
if (exist == null)
children.add(elemAdd);
else
exist.addNewItems(elemAdd);
}
}
}
static class Attribute extends Node {
public Attribute(String name, Class type, String description) {
super(name, type, description);
}
}
}
XmlDatasource.java
import com.saxonica.config.EnterpriseConfiguration;
import com.saxonica.ee.s9api.SchemaValidatorImpl;
import net.sf.saxon.Configuration;
import net.sf.saxon.lib.FeatureKeys;
import net.sf.saxon.s9api.*;
import net.sf.saxon.type.SchemaException;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import javax.xml.transform.Source;
import javax.xml.transform.sax.SAXSource;
import javax.xml.transform.stream.StreamSource;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
public class XmlDatasource {
/** the DOM all searches are against */
private XdmNode xmlRootNode;
private XPathCompiler xPathCompiler;
/** key == the prefix; value == the uri mapped to that prefix */
private HashMap<String, String> prefixToUriMap = new HashMap<>();
/** key == the uri mapped to that prefix; value == the prefix */
private HashMap<String, String> uriToPrefixMap = new HashMap<>();
public XmlDatasource (InputStream xmlData, InputStream schemaFile) throws SAXException, SchemaException, SaxonApiException, IOException {
boolean haveSchema = schemaFile != null;
// call this before any instantiation of Saxon classes.
Configuration config = createEnterpriseConfiguration();
if (haveSchema) {
Source schemaSource = new StreamSource(schemaFile);
config.addSchemaSource(schemaSource);
}
Processor processor = new Processor(config);
DocumentBuilder doc_builder = processor.newDocumentBuilder();
XMLReader reader = createXMLReader();
InputSource xmlSource = new InputSource(xmlData);
SAXSource saxSource = new SAXSource(reader, xmlSource);
if (haveSchema) {
SchemaValidator validator = new SchemaValidatorImpl(processor);
doc_builder.setSchemaValidator(validator);
}
xmlRootNode = doc_builder.build(saxSource);
xPathCompiler = processor.newXPathCompiler();
if (haveSchema)
xPathCompiler.setSchemaAware(true);
declareNameSpaces();
}
public XdmNode getXmlRootNode() {
return xmlRootNode;
}
public XPathCompiler getxPathCompiler() {
return xPathCompiler;
}
/**
* Create a XMLReader set to disallow XXE aattacks.
* @return a safe XMLReader.
*/
public static XMLReader createXMLReader() throws SAXException {
XMLReader reader = XMLReaderFactory.createXMLReader();
// stop XXE https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Prevention_Cheat_Sheet#JAXP_DocumentBuilderFactory.2C_SAXParserFactory_and_DOM4J
reader.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
reader.setFeature("http://xml.org/sax/features/external-general-entities", false);
reader.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
return reader;
}
private void declareNameSpaces() throws SaxonApiException {
// saxon has some of their functions set up with this.
prefixToUriMap.put("saxon", "http://saxon.sf.net");
uriToPrefixMap.put("http://saxon.sf.net", "saxon");
XdmValue list = xPathCompiler.evaluate("//namespace::*", xmlRootNode);
if (list == null || list.size() == 0)
return;
for (int index=0; index<list.size(); index++) {
XdmNode node = (XdmNode) list.itemAt(index);
String prefix = node.getNodeName() == null ? "" : node.getNodeName().getLocalName();
// xml, xsd, & xsi are XML structure ones, not ones used in the XML
if (prefix.equals("xml") || prefix.equals("xsd") || prefix.equals("xsi"))
continue;
// use default prefix if prefix is empty.
if (prefix == null || prefix.isEmpty())
prefix = "def";
// this returns repeats, so if a repeat, go on to next.
if (prefixToUriMap.containsKey(prefix))
continue;
String uri = node.getStringValue();
if (uri != null && !uri.isEmpty()) {
xPathCompiler.declareNamespace(prefix, uri);
prefixToUriMap.put(prefix, uri);
uriToPrefixMap.put(uri, prefix); }
}
}
public static EnterpriseConfiguration createEnterpriseConfiguration()
{
EnterpriseConfiguration configuration = new EnterpriseConfiguration();
configuration.supplyLicenseKey(new BufferedReader(new java.io.StringReader(deobfuscate(key))));
configuration.setConfigurationProperty(FeatureKeys.SUPPRESS_XPATH_WARNINGS, Boolean.TRUE);
return configuration;
}
}
来源:https://stackoverflow.com/questions/63100478/is-this-the-best-way-to-get-all-elements-attributes-in-an-xml-file-using-saxon