Handle large delimited text files in VBA

后端 未结 4 2047
醉酒成梦
醉酒成梦 2021-01-16 15:33

Using VBA, I need to \"unpivot\" data that is currently in delimited text files (as hundreds of columns by tens of thousands of rows) into a normalized form (four columns by

4条回答
  •  暖寄归人
    2021-01-16 16:27

    Compiled but not tested

    Sub UnpivotFile(sPath As String)
    
        Const DELIM As String = ","
        Const QUOTE As String = """"
    
        Dim FSO As New FileSystemObject
        Dim arrHeader
        Dim arrContent
        Dim lb As Integer, ub As Integer
        Dim x As Integer
        Dim inData As Boolean
        Dim l As String, fName As String
        Dim fIn As Scripting.TextStream
        Dim fOut As Scripting.TextStream
        Dim tmp As String
        Dim lineNum As Long
    
        fName = FSO.GetFileName(sPath)
    
        Set fIn = FSO.OpenTextFile(sPath, ForReading)
        Set fOut = FSO.OpenTextFile(sPath & "_out", ForWriting)
        lineNum = 0
    
        Do While Not fIn.AtEndOfStream
    
            l = fIn.ReadLine
            lineNum = lineNum + 1
            arrContent = ParseLineToArray(l, DELIM, QUOTE)
    
            If Not inData Then
                arrHeader = arrContent
                lb = LBound(arrHeader)
                ub = UBound(arrHeader)
                inData = True
            Else
                For x = lb To ub
                    fOut.WriteLine Join(Array(fName, lineNum, _
                                   QID(arrHeader(x), DELIM, QUOTE), _
                                   QID(arrContent(x), DELIM, QUOTE)), DELIM)
    
                Next x
            End If
        Loop
        fIn.Close
        fOut.Close
    End Sub
    
    'quote if delimiter found
    Function QID(s, d As String, q As String)
        QID = IIf(InStr(s, d) > -1, q & s & q, s)
    End Function
    
    
    'Split a string into an array based on a Delimiter and a Text Identifier
    Private Function ParseLineToArray(sInput As String, m_Delim As String, _
                                      m_TextIdentifier As String) As Variant
       'Dim vArr As Variant
       Dim sArr() As String
       Dim bInText As Boolean
       Dim i As Long, n As Long
       Dim sTemp As String, tmp As String
    
       If sInput = "" Or InStr(1, sInput, m_Delim) = 0 Then
          'zero length string, or delimiter not present
          'dump all input into single-element array (minus Text Identifier)
          ReDim sArr(0)
          sArr(0) = Replace(sInput, m_TextIdentifier, "")
          ParseLineToArray = sArr()
       Else
          If InStr(1, sInput, m_TextIdentifier) = 0 Then
             'no text identifier so just split and return
             sArr() = Split(sInput, m_Delim)
             ParseLineToArray = sArr()
          Else
             'found the text identifier, so do it the long way
             bInText = False
             sTemp = ""
             n = 0
    
             For i = 1 To Len(sInput)
                tmp = Mid(sInput, i, 1)
                If tmp = m_TextIdentifier Then
                   'just toggle the flag - don't add to string
                   bInText = Not bInText
                Else
                   If tmp = m_Delim Then
                      If Not bInText Then
                         'delimiter not within quoted text, so add next array member
                         ReDim Preserve sArr(n)
                         sArr(n) = sTemp
                         sTemp = ""
                         n = n + 1
                      Else
                         sTemp = sTemp & tmp
                      End If
                   Else
                      sTemp = sTemp & tmp
                   End If           'character is a delimiter
                End If              'character is a quote marker
             Next i
    
             ReDim Preserve sArr(n)
             sArr(n) = sTemp
    
             ParseLineToArray = sArr()
          End If 'has any quoted text
       End If 'parseable
    
    End Function
    

提交回复
热议问题