Remove empty nodes from a XML recursively

喜夏-厌秋 提交于 2019-12-05 11:52:14

This works, just create a recursive function that "goes deep" first, then removes empty nodes on the way "back up the tree", this will have the effect of removing both D21 and D2.

public static void main(String[] args) throws Exception {

    DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
    String input = "<A><B><C><C1><C11>something</C11><C12>something</C12></C1></C><D><D1><D11><D111 operation=\"create\"><Node>something else</Node></D111></D11></D1><D2><D21></D21></D2></D></B></A>";

    Document document = builder.parse(new InputSource(new StringReader(
            input)));

    removeNodes(document);

    Transformer transformer = TransformerFactory.newInstance()
            .newTransformer();
    transformer.setOutputProperty(OutputKeys.INDENT, "yes");
    StreamResult result = new StreamResult(new StringWriter());
    transformer.transform(new DOMSource(document), result);
    System.out.println(result.getWriter().toString());
}

public static void removeNodes(Node node) {
    NodeList list = node.getChildNodes();
    for (int i = 0; i < list.getLength(); i++) {
        removeNodes(list.item(i));
    }
    boolean emptyElement = node.getNodeType() == Node.ELEMENT_NODE
            && node.getChildNodes().getLength() == 0;
    boolean emptyText = node.getNodeType() == Node.TEXT_NODE
            && node.getNodeValue().trim().isEmpty();
    if (emptyElement || emptyText) {
        node.getParentNode().removeChild(node);
    }
}

Output

<A>
<B>
<C>
<C1>
<C11>something</C11>
<C12>something</C12>
</C1>
</C>
<D>
<D1>
<D11>
<D111 operation="create">
<Node>something else</Node>
</D111>
</D11>
</D1>
</D>
</B>
</A>

I don't have enough rep to comment on @Adam's solution, but I was having an issue where after a node removal, the last sibling of that node was moved to index zero, causing it to not fully remove empty elements. The fix was to use a list to hold all of the nodes we want to recursively call for removal.

Also, there was a bug that removed empty elements that had attributes.

Solution to both issues:

public static void removeEmptyNodes(Node node) {

    NodeList list = node.getChildNodes();
    List<Node> nodesToRecursivelyCall = new LinkedList();

    for (int i = 0; i < list.getLength(); i++) {
        nodesToRecursivelyCall.add(list.item(i));
    }

    for(Node tempNode : nodesToRecursivelyCall) {
        removeEmptyNodes(tempNode);
    }

    boolean emptyElement = node.getNodeType() == Node.ELEMENT_NODE 
          && node.getChildNodes().getLength() == 0;
    boolean emptyText = node.getNodeType() == Node.TEXT_NODE 
          && node.getNodeValue().trim().isEmpty();

    if (emptyElement || emptyText) {
        if(!node.hasAttributes()) {
            node.getParentNode().removeChild(node);
        }
    }

}

Use getTextContent() on top-level element of DOM. If method return empty string or null, you can removed this node, because this node and all child nodes is empty. If method getTextContent() return not empty string, call getTextContent on every child of current node, and so on.
See documentation.

Just work with strings:

    Pattern emptyValueTag = Pattern.compile("\\s*<\\w+/>");
    Pattern emptyTagMultiLine = Pattern.compile("\\s*<\\w+>\n*\\s*</\\w+>");

    xml = emptyValueTag.matcher(xml).replaceAll("");

    while (xml.length() != (xml = emptyTagMultiLine.matcher(xml).replaceAll("")).length()) {
    }

    return xml;
Muenuddeen Shekh
public class RemoveEmprtElement {

public static void main(String[] args) {
    ReadFile readFile =new ReadFile();
    String strXml=readFile.readFileFromPath(new File("sampleXml4.xml"));
    RemoveEmprtElement elementEmprtElement=new RemoveEmprtElement();
    DocumentBuilder dBuilder = null;
    Document doc = null;
    try {
        dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
        doc = dBuilder.parse(new ByteArrayInputStream(strXml.getBytes()));

        elementEmprtElement.getEmptyNodes(doc);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer trans = tf.newTransformer();
        StreamResult result = new StreamResult(new StringWriter());
        trans.transform(new DOMSource(doc), result);
        System.out.println(result.getWriter().toString());

    }catch(Exception e) {
        e.printStackTrace();
    }
}

private void getEmptyNodes(Document doc){

    try {
        XPathFactory factory = XPathFactory.newInstance();
        XPath xpath = factory.newXPath();
        XPathExpression expr = xpath.compile("//*[not(*)]");
        Object resultNS = expr.evaluate(doc, XPathConstants.NODESET);
        NodeList nodes = (NodeList) resultNS;
        for(int i =0 ; i < nodes.getLength() ; i++){
            Node node = nodes.item(i);
            boolean emptyElement = node.getNodeType() == Node.ELEMENT_NODE
                    && node.getChildNodes().getLength() == 0;
            boolean emptyText = node.getNodeType() == Node.TEXT_NODE
                    && node.getNodeValue().trim().isEmpty();

            if (emptyElement || emptyText) {
                xmlNodeRemove(doc,findPath(node));
                getEmptyNodes(doc);
            }
        } 
    }catch(Exception e) {
        e.printStackTrace();
    }

}

private void xmlNodeRemove(Document doc,String xmlNodeLocation){

    try {
        XPathFactory factory = XPathFactory.newInstance();
        XPath xpath = factory.newXPath();
        XPathExpression expr = xpath.compile(xmlNodeLocation);
        Object resultNS = expr.evaluate(doc, XPathConstants.NODESET);
        NodeList nodes = (NodeList) resultNS;
        Node node =nodes.item(0);
        if(node!=null && node.getParentNode()!=null && node.getParentNode().hasChildNodes()){
        node.getParentNode().removeChild(node);
        }
    }catch(Exception e) {
        e.printStackTrace();
    }
}

private String findPath(Node n) {
    String path="";
    if(n==null){
        return path;
    }else if(n.getNodeName().equals("#document")){
        return "";
    }
        else{
            path=n.getNodeName();
            path=findPath(n.getParentNode())+"/"+path;
        }
        return path;
    }

}
标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!