Excel vba xml parsing performance

前端 未结 2 780
别那么骄傲
别那么骄傲 2021-01-27 04:24

I\'m working on taking some input data in excel, parsing it to xml and using that to run a SQL stored procedure, but I\'m running into performance issue on the xml parsing. The

相关标签:
2条回答
  • 2021-01-27 04:54

    I wanted to compare the Psuedo-String Builder that I used for Turn Excel range into VBA string against Parfait's implemetaion of MSXML to ouput the range to xml. I modified Parfait's code adding a timer and allowing non-date values.

    The Data had a header row and 300 rows by 300 Columns (90,000 cells). Although the String Builder was roughly 400% faster I would still use Parfait's MSXML approach. Being an industry standard, it is already well documented.

    Sub XMLFromRange()
        Dim Start: Start = Timer
        Const AVGCELLLENGTH As Long = 100
        Dim LG As Long, index As Long, x As Long, y As Long
        Dim data As Variant, Headers As Variant
        Dim result As String, s As String
        data = getDataArray
        Headers = getHeaderArray(data)
    
        result = Space(UBound(data, 1) * UBound(data, 2) * AVGCELLLENGTH)
        index = 1
        Mid(result, index, 11) = "<DataSet>" & vbCrLf
        index = index + 11
    
        For x = 2 To UBound(data, 1)
    
            Mid(result, index, 11) = "<DataRow>" & vbCrLf
            index = index + 11
            For y = 1 To UBound(data, 2)
    
                LG = Len(Headers(1, y))
                Mid(result, index, LG) = Headers(1, y)
                index = index + LG
    
                s = RTrim(data(x, y))
                LG = Len(s)
                Mid(result, index, LG) = s
                index = index + LG
    
                LG = Len(Headers(2, y))
                Mid(result, index, LG) = Headers(2, y)
                index = index + LG
    
            Next
            Mid(result, index, 12) = "</DataRow>" & vbCrLf
            index = index + 12
        Next
        Mid(result, index, 12) = "</DataSet>" & vbCrLf
        index = index + 12
    
        result = Left(result, index)
    
        MsgBox (Timer - Start) & " Second(s)" & vbCrLf & _
        (UBound(data, 1) - 1) * UBound(data, 2) & " Data Cells", vbInformation, "Execution Time"
    
        Dim myFile As String
        myFile = ThisWorkbook.Path & "\demo.txt"
    
        Open myFile For Output As #1
        Print #1, result
        Close #1
    
        Shell "Notepad.exe " & myFile, vbNormalFocus
    End Sub
    
    Function getDataArray()
        With Worksheets("Sheet1")
            getDataArray = .Range(.Range("A" & .Rows.Count).End(xlUp), .Cells(1, .Columns.Count).End(xlToLeft))
        End With
    End Function
    
    Function getHeaderArray(DataArray As Variant)
        Dim y As Long
        Dim Headers() As String
        ReDim Headers(1 To 2, 1 To UBound(DataArray, 2))
        For y = 1 To UBound(DataArray, 2)
            Headers(1, y) = "<" & DataArray(1, y) & ">"
            Headers(2, y) = "</" & DataArray(1, y) & ">" & vbCrLf
        Next
        getHeaderArray = Headers
    End Function
    
    0 讨论(0)
  • 2021-01-27 05:06

    Consider using MSXML, a comprehensive W3C compliant library of XML APIs which you can use to build your XML with DOM methods (createElement, appendChild, setAttribute) instead of concatenating text strings. XML is not quite a text file but a markup file with encoding and tree structure. Excel comes equipped with the MSXML COM object by reference or late-binding, and can iteratively build a tree from Excel data as shown below.

    With 300 rows by 12 cols of random dates, below didn't even take a minute (literally seconds after clicking macro) AND it even pretty printed raw output with line breaks and indentation using an embedded XSLT stylesheet (if you do not pretty print, the MSXML outputs document as one long, continuous line).

    Input

    VBA (of course align to actual data)

    Sub xmlExport()
    On Error GoTo ErrHandle
        ' VBA REFERENCE MSXML, v6.0 '
        Dim doc As New MSXML2.DOMDocument60, xslDoc As New MSXML2.DOMDocument60, newDoc As New MSXML2.DOMDocument60
        Dim root As IXMLDOMElement, dataNode As IXMLDOMElement, datesNode As IXMLDOMElement, namesNode As IXMLDOMElement
        Dim i As Long, j As Long
        Dim tmpValue As Variant
    
        ' DECLARE XML DOC OBJECT '
        Set root = doc.createElement("DataSet")
        doc.appendChild root
    
        ' ITERATE THROUGH ROWS '
        For i = 2 To Sheets(1).UsedRange.Rows.Count
    
            ' DATA ROW NODE '
            Set dataNode = doc.createElement("DataRow")
            root.appendChild dataNode
    
            ' DATES NODE '
            Set datesNode = doc.createElement("Dates")
            datesNode.Text = Sheets(1).Range("A" & i)
            dataNode.appendChild datesNode
    
            ' NAMES NODE '
            For j = 1 To 12
                tmpValue = Sheets(1).Cells(i, j + 1)
                If IsDate(tmpValue) And Not IsNumeric(tmpValue) Then
                    Set namesNode = doc.createElement("Name" & j)
                    namesNode.Text = Format(tmpValue, "yyyy-mm-dd")
                    dataNode.appendChild namesNode
                End If
            Next j
    
        Next i
    
        ' PRETTY PRINT RAW OUTPUT '
        xslDoc.LoadXML "<?xml version=" & Chr(34) & "1.0" & Chr(34) & "?>" _
                & "<xsl:stylesheet version=" & Chr(34) & "1.0" & Chr(34) _
                & "                xmlns:xsl=" & Chr(34) & "http://www.w3.org/1999/XSL/Transform" & Chr(34) & ">" _
                & "<xsl:strip-space elements=" & Chr(34) & "*" & Chr(34) & " />" _
                & "<xsl:output method=" & Chr(34) & "xml" & Chr(34) & " indent=" & Chr(34) & "yes" & Chr(34) & "" _
                & "            encoding=" & Chr(34) & "UTF-8" & Chr(34) & "/>" _
                & " <xsl:template match=" & Chr(34) & "node() | @*" & Chr(34) & ">" _
                & "  <xsl:copy>" _
                & "   <xsl:apply-templates select=" & Chr(34) & "node() | @*" & Chr(34) & " />" _
                & "  </xsl:copy>" _
                & " </xsl:template>" _
                & "</xsl:stylesheet>"
    
        xslDoc.async = False
        doc.transformNodeToObject xslDoc, newDoc
        newDoc.Save ActiveWorkbook.Path & "\Output.xml"
    
        MsgBox "Successfully exported Excel data to XML!", vbInformation
        Exit Sub
    
    ErrHandle:
        MsgBox Err.Number & " - " & Err.Description, vbCritical
        Exit Sub
    
    End Sub
    

    Output

    <?xml version="1.0" encoding="UTF-8"?>
    <DataSet>
        <DataRow>
            <Dates>Date1</Dates>
            <Name1>2016-04-23</Name1>
            <Name2>2016-09-22</Name2>
            <Name3>2016-09-23</Name3>
            <Name4>2016-09-24</Name4>
            <Name5>2016-10-31</Name5>
            <Name6>2016-09-26</Name6>
            <Name7>2016-09-27</Name7>
            <Name8>2016-09-28</Name8>
            <Name9>2016-09-29</Name9>
            <Name10>2016-09-30</Name10>
            <Name11>2016-10-01</Name11>
            <Name12>2016-10-02</Name12>
        </DataRow>
        <DataRow>
            <Dates>Date2</Dates>
            <Name1>2016-06-27</Name1>
            <Name2>2016-08-14</Name2>
            <Name3>2016-07-08</Name3>
            <Name4>2016-08-22</Name4>
            <Name5>2016-11-03</Name5>
            <Name6>2016-07-28</Name6>
            <Name7>2016-08-23</Name7>
            <Name8>2016-11-01</Name8>
            <Name9>2016-11-01</Name9>
            <Name10>2016-08-11</Name10>
            <Name11>2016-08-18</Name11>
            <Name12>2016-09-23</Name12>
        </DataRow>
        ...
    
    0 讨论(0)
提交回复
热议问题