Using VBA, I need to \"unpivot\" data that is currently in delimited text files (as hundreds of columns by tens of thousands of rows) into a normalized form (four columns by
Compiled but not tested
Sub UnpivotFile(sPath As String)
Const DELIM As String = ","
Const QUOTE As String = """"
Dim FSO As New FileSystemObject
Dim arrHeader
Dim arrContent
Dim lb As Integer, ub As Integer
Dim x As Integer
Dim inData As Boolean
Dim l As String, fName As String
Dim fIn As Scripting.TextStream
Dim fOut As Scripting.TextStream
Dim tmp As String
Dim lineNum As Long
fName = FSO.GetFileName(sPath)
Set fIn = FSO.OpenTextFile(sPath, ForReading)
Set fOut = FSO.OpenTextFile(sPath & "_out", ForWriting)
lineNum = 0
Do While Not fIn.AtEndOfStream
l = fIn.ReadLine
lineNum = lineNum + 1
arrContent = ParseLineToArray(l, DELIM, QUOTE)
If Not inData Then
arrHeader = arrContent
lb = LBound(arrHeader)
ub = UBound(arrHeader)
inData = True
Else
For x = lb To ub
fOut.WriteLine Join(Array(fName, lineNum, _
QID(arrHeader(x), DELIM, QUOTE), _
QID(arrContent(x), DELIM, QUOTE)), DELIM)
Next x
End If
Loop
fIn.Close
fOut.Close
End Sub
'quote if delimiter found
Function QID(s, d As String, q As String)
QID = IIf(InStr(s, d) > -1, q & s & q, s)
End Function
'Split a string into an array based on a Delimiter and a Text Identifier
Private Function ParseLineToArray(sInput As String, m_Delim As String, _
m_TextIdentifier As String) As Variant
'Dim vArr As Variant
Dim sArr() As String
Dim bInText As Boolean
Dim i As Long, n As Long
Dim sTemp As String, tmp As String
If sInput = "" Or InStr(1, sInput, m_Delim) = 0 Then
'zero length string, or delimiter not present
'dump all input into single-element array (minus Text Identifier)
ReDim sArr(0)
sArr(0) = Replace(sInput, m_TextIdentifier, "")
ParseLineToArray = sArr()
Else
If InStr(1, sInput, m_TextIdentifier) = 0 Then
'no text identifier so just split and return
sArr() = Split(sInput, m_Delim)
ParseLineToArray = sArr()
Else
'found the text identifier, so do it the long way
bInText = False
sTemp = ""
n = 0
For i = 1 To Len(sInput)
tmp = Mid(sInput, i, 1)
If tmp = m_TextIdentifier Then
'just toggle the flag - don't add to string
bInText = Not bInText
Else
If tmp = m_Delim Then
If Not bInText Then
'delimiter not within quoted text, so add next array member
ReDim Preserve sArr(n)
sArr(n) = sTemp
sTemp = ""
n = n + 1
Else
sTemp = sTemp & tmp
End If
Else
sTemp = sTemp & tmp
End If 'character is a delimiter
End If 'character is a quote marker
Next i
ReDim Preserve sArr(n)
sArr(n) = sTemp
ParseLineToArray = sArr()
End If 'has any quoted text
End If 'parseable
End Function