Fastest and optimized way to read the xml

后端 未结 3 1567
南旧
南旧 2020-12-23 23:25

I have this below XML file



  Kiran
  Pai
  22


        
相关标签:
3条回答
  • 2020-12-23 23:39

    If the performances are important in your case, you should prefer SAX or StAX(http://en.wikipedia.org/wiki/StAX) to DOM.

    With DOM, in a first time the XML file is parsed into an object model then you have can ask it. So for you algorithm there are two pass.

    With SAX, during the parse, some callbacks are invoked (startDocument, endElement...), SAX is event-based or a push model.

    With StAX, you control the parsing. You move a cursor from an element to another one. This is a pull model.

    With a file containing 32910000 persons , i compare my version with SAX to the over answer (of Blaise Doughan) with StAX. I remove all the System.out.println instrusctions. My program took 106 seconds to read all the file and the other took 94 seconds. I suppose that SAX is slower because all the callback are invoked even if they does nothing (the push model) whereas with StAX the cursor is moved only on the "interresting" elements (the pull model).

    For example with java 7 :

    import java.io.File;
    
    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;
    
    import org.xml.sax.Attributes;
    import org.xml.sax.SAXException;
    import org.xml.sax.helpers.DefaultHandler;
    
    public class ReadAndPrintXMLFileWithSax {
    
        public static void main(String[] args) throws Exception {
            SAXParserFactory fabrique = SAXParserFactory.newInstance();
            SAXParser parser = fabrique.newSAXParser();
    
            File file = new File("book.xml");
            BookHandler handler = new BookHandler();
            parser.parse(file, handler);
        }
    
        public static class BookHandler extends DefaultHandler {
            private int count = 0;
            private StringBuilder buffer;
    
            @Override
            public void startElement(String uri, String localName, String qName,
                    Attributes attributes) throws SAXException {
                switch (qName) {
                case "person":
                    count++;
                    break;
                case "first":
                    buffer = new StringBuilder("First Name : ");
                    break;
                case "last":
                    buffer = new StringBuilder("Last Name : ");
                    break;
                case "age":
                    buffer = new StringBuilder("Age : ");
                    break;
                }
            }
    
            @Override
            public void characters(char[] ch, int start, int length)
                    throws SAXException {
                String content = new String(ch, start, length);
                if (buffer != null)
                    buffer.append(content);
            }
    
            @Override
            public void endElement(String uri, String localName, String qName)
                    throws SAXException {
                switch (qName) {
                case "first":
                case "last":
                case "age":
                    System.out.println(buffer.toString());
                    break;
                }
            }
    
            @Override
            public void endDocument() throws SAXException {
                System.out.println(count + " persons");
            }
        }
    }
    
    0 讨论(0)
  • 2020-12-23 23:54

    A Stax Example

    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.io.InputStream;
    import javax.xml.stream.XMLEventReader;
    import javax.xml.stream.XMLInputFactory;
    import javax.xml.stream.XMLStreamException;
    import javax.xml.stream.events.StartElement;
    import javax.xml.stream.events.XMLEvent;
    
    public class ReadAndPrintXMLFile {
    
    
        public static void main(String argv []) {
    
            String inputFile = "c:/source/book.xml";
    
            try {
                // First create a new XMLInputFactory
                XMLInputFactory inputFactory = XMLInputFactory.newInstance();
                // Setup a new eventReader
                InputStream in = new FileInputStream(inputFile);
                XMLEventReader eventReader = inputFactory.createXMLEventReader(in);
                // Read the XML document
    
                while (eventReader.hasNext()) {
                    XMLEvent event = eventReader.nextEvent();
    
                    if (event.isStartElement()) {
                        StartElement startElement = event.asStartElement();
    
                        if (startElement.getName().getLocalPart().equals("first")) {
    
                            event = eventReader.nextEvent();
    
                            StringBuilder fName = new StringBuilder();
    
                            while (!event.isEndElement()) {
                                fName.append(event.asCharacters().getData());
                                event = eventReader.nextEvent();
                            }
                            System.out.println("First Name : " + fName);
    
                            event = eventReader.nextEvent();
    
                            continue;
                        }
    
                        if (startElement.getName().getLocalPart().equals("last")) {
    
                            event = eventReader.nextEvent();
    
                            StringBuilder lName = new StringBuilder();
    
                            while (!event.isEndElement()) {
                                lName.append(event.asCharacters().getData());
                                event = eventReader.nextEvent();
                            }
                            System.out.println("Last Name : " + lName);
    
                            event = eventReader.nextEvent();
    
                            continue;
                        }
    
                        if (startElement.getName().getLocalPart().equals("age")) {
    
                            event = eventReader.nextEvent();
    
                            StringBuilder age = new StringBuilder();
    
                            while (!event.isEndElement()) {
                                age.append(event.asCharacters().getData());
                                event = eventReader.nextEvent();
                            }
                            System.out.println("Age : " + age);
    
                            event = eventReader.nextEvent();
    
                            continue;
                        }
    
                    }
                }
            } catch (FileNotFoundException e) {
                System.out.println("File not Found: " + inputFile);
            } catch (XMLStreamException e) {
                e.printStackTrace();
            }
        }
    
    }
    


    Output:

    First Name : Kiran
    Last Name : Pai
    Age : 22
    First Name : Bill
    Last Name : Gates
    Age : 46
    First Name : Steve
    Last Name : Jobs
    Age : 40
    
    0 讨论(0)
  • 2020-12-23 23:58

    Using ReadAndPrintXMLFileWithStAX below, when I compare with ReadAndPrintXMLFileWithSAX from the answer given by gontard the StAX approach is faster. My test involved running both sample code 500000 times on JDK 1.7.0_07 for the Mac.

    ReadAndPrintXMLFileWithStAX:  103 seconds
    ReadAndPrintXMLFileWithSAX:   125 seconds
    

    ReadAndPrintXMLFileWithStAX (using Java SE 7)

    Below is a more optimized StAX (JSR-173) example using XMLStreamReader instead of XMLEventReader.

    import java.io.FileInputStream;
    import java.io.InputStream;
    import javax.xml.stream.*;
    
    public class ReadAndPrintXMLFileWithStAX {
    
        public static void main(String argv[]) throws Exception {
            XMLInputFactory inputFactory = XMLInputFactory.newInstance();
            InputStream in = new FileInputStream("book.xml");
            XMLStreamReader streamReader = inputFactory.createXMLStreamReader(in);
            streamReader.nextTag(); // Advance to "book" element
            streamReader.nextTag(); // Advance to "person" element
    
            int persons = 0;
            while (streamReader.hasNext()) {
                if (streamReader.isStartElement()) {
                    switch (streamReader.getLocalName()) {
                    case "first": {
                        System.out.print("First Name : ");
                        System.out.println(streamReader.getElementText());
                        break;
                    }
                    case "last": {
                        System.out.print("Last Name : ");
                        System.out.println(streamReader.getElementText());
                        break;
                    }
                    case "age": {
                        System.out.print("Age : ");
                        System.out.println(streamReader.getElementText());
                        break;
                    }
                    case "person" : {
                        persons ++;
                    }
                    }
                }
                streamReader.next();
            }
            System.out.print(persons);
            System.out.println(" persons");
        }
    
    }
    

    Output

    First Name : Kiran
    Last Name : Pai
    Age : 22
    First Name : Bill
    Last Name : Gates
    Age : 46
    First Name : Steve
    Last Name : Jobs
    Age : 40
    3 persons
    
    0 讨论(0)
提交回复
热议问题