Parsing JSON in Excel VBA

后端 未结 11 900
长发绾君心
长发绾君心 2020-11-22 09:58

I have the same issue as in Excel VBA: Parsed JSON Object Loop but cannot find any solution. My JSON has nested objects so suggested solution like VBJSON and vba-json do not

11条回答
  •  遇见更好的自我
    2020-11-22 10:25

    Lots of good answers here - just chipping in my own.

    I had a requirement to parse a very specific JSON string, representing the results of making a web-API call. The JSON described a list of objects, and looked something like this:

    [
       {
         "property1": "foo",
         "property2": "bar",
         "timeOfDay": "2019-09-30T00:00:00",
         "numberOfHits": 98,
         "isSpecial": false,
         "comment": "just to be awkward, this contains a comma"
       },
       {
         "property1": "fool",
         "property2": "barrel",
         "timeOfDay": "2019-10-31T00:00:00",
         "numberOfHits": 11,
         "isSpecial": false,
         "comment": null
       },
       ...
    ]
    

    There are a few things to note about this:

    1. The JSON should always describe a list (even if empty), which should only contain objects.
    2. The objects in the list should only contain properties with simple types (string / date / number / boolean or null).
    3. The value of a property may contain a comma - which makes parsing the JSON somewhat harder - but may not contain any quotes (because I'm too lazy to deal with that).

    The ParseListOfObjects function in the code below takes the JSON string as input, and returns a Collection representing the items in the list. Each item is represented as a Dictionary, where the keys of the dictionary correspond to the names of the object's properties. The values are automatically converted to the appropriate type (String, Date, Double, Boolean - or Empty if the value is null).

    Your VBA project will need a reference to the Microsoft Scripting Runtime library to use the Dictionary object - though it would not be difficult to remove this dependency if you use a different way of encoding the results.

    Here's my JSON.bas:

    Option Explicit
    
    ' NOTE: a fully-featured JSON parser in VBA would be a beast.
    ' This simple parser only supports VERY simple JSON (which is all we need).
    ' Specifically, it supports JSON comprising a list of objects, each of which has only simple properties.
    
    Private Const strSTART_OF_LIST As String = "["
    Private Const strEND_OF_LIST As String = "]"
    
    Private Const strLIST_DELIMITER As String = ","
    
    Private Const strSTART_OF_OBJECT As String = "{"
    Private Const strEND_OF_OBJECT As String = "}"
    
    Private Const strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR As String = ":"
    
    Private Const strQUOTE As String = """"
    
    Private Const strNULL_VALUE As String = "null"
    Private Const strTRUE_VALUE As String = "true"
    Private Const strFALSE_VALUE As String = "false"
    
    
    Public Function ParseListOfObjects(ByVal strJson As String) As Collection
    
        ' Takes a JSON string that represents a list of objects (where each object has only simple value properties), and
        ' returns a collection of dictionary objects, where the keys and values of each dictionary represent the names and
        ' values of the JSON object properties.
    
        Set ParseListOfObjects = New Collection
    
        Dim strList As String: strList = Trim(strJson)
    
        ' Check we have a list
        If Left(strList, Len(strSTART_OF_LIST)) <> strSTART_OF_LIST _
        Or Right(strList, Len(strEND_OF_LIST)) <> strEND_OF_LIST Then
            Err.Raise vbObjectError, Description:="The provided JSON does not appear to be a list (it does not start with '" & strSTART_OF_LIST & "' and end with '" & strEND_OF_LIST & "')"
        End If
    
        ' Get the list item text (between the [ and ])
        Dim strBody As String: strBody = Trim(Mid(strList, 1 + Len(strSTART_OF_LIST), Len(strList) - Len(strSTART_OF_LIST) - Len(strEND_OF_LIST)))
    
        If strBody = "" Then
            Exit Function
        End If
    
        ' Check we have a list of objects
        If Left(strBody, Len(strSTART_OF_OBJECT)) <> strSTART_OF_OBJECT Then
            Err.Raise vbObjectError, Description:="The provided JSON does not appear to be a list of objects (the content of the list does not start with '" & strSTART_OF_OBJECT & "')"
        End If
    
        ' We now have something like:
        '    {"property":"value", "property":"value"}, {"property":"value", "property":"value"}, ...
        ' so we can't just split on a comma to get the various items (because the items themselves have commas in them).
        ' HOWEVER, since we know we're dealing with very simple JSON that has no nested objects, we can split on "}," because
        ' that should only appear between items. That'll mean that all but the last item will be missing it's closing brace.
        Dim astrItems() As String: astrItems = Split(strBody, strEND_OF_OBJECT & strLIST_DELIMITER)
    
        Dim ixItem As Long
        For ixItem = LBound(astrItems) To UBound(astrItems)
    
            Dim strItem As String: strItem = Trim(astrItems(ixItem))
    
            If Left(strItem, Len(strSTART_OF_OBJECT)) <> strSTART_OF_OBJECT Then
                Err.Raise vbObjectError, Description:="Mal-formed list item (does not start with '" & strSTART_OF_OBJECT & "')"
            End If
    
            ' Only the last item will have a closing brace (see comment above)
            Dim bIsLastItem As Boolean: bIsLastItem = ixItem = UBound(astrItems)
    
            If bIsLastItem Then
                If Right(strItem, Len(strEND_OF_OBJECT)) <> strEND_OF_OBJECT Then
                    Err.Raise vbObjectError, Description:="Mal-formed list item (does not end with '" & strEND_OF_OBJECT & "')"
                End If
            End If
    
            Dim strContent: strContent = Mid(strItem, 1 + Len(strSTART_OF_OBJECT), Len(strItem) - Len(strSTART_OF_OBJECT) - IIf(bIsLastItem, Len(strEND_OF_OBJECT), 0))
    
            ParseListOfObjects.Add ParseObjectContent(strContent)
    
        Next ixItem
    
    End Function
    
    Private Function ParseObjectContent(ByVal strContent As String) As Scripting.Dictionary
    
        Set ParseObjectContent = New Scripting.Dictionary
        ParseObjectContent.CompareMode = TextCompare
    
        ' The object content will look something like:
        '    "property":"value", "property":"value", ...
        ' ... although the value may not be in quotes, since numbers are not quoted.
        ' We can't assume that the property value won't contain a comma, so we can't just split the
        ' string on the commas, but it's reasonably safe to assume that the value won't contain further quotes
        ' (and we're already assuming no sub-structure).
        ' We'll need to scan for commas while taking quoted strings into account.
    
        Dim ixPos As Long: ixPos = 1
        Do While ixPos <= Len(strContent)
    
            Dim strRemainder As String
    
            ' Find the opening quote for the name (names should always be quoted)
            Dim ixOpeningQuote As Long: ixOpeningQuote = InStr(ixPos, strContent, strQUOTE)
    
            If ixOpeningQuote <= 0 Then
                ' The only valid reason for not finding a quote is if we're at the end (though white space is permitted)
                strRemainder = Trim(Mid(strContent, ixPos))
                If Len(strRemainder) = 0 Then
                    Exit Do
                End If
                Err.Raise vbObjectError, Description:="Mal-formed object (the object name does not start with a quote)"
            End If
    
            ' Now find the closing quote for the name, which we assume is the very next quote
            Dim ixClosingQuote As Long: ixClosingQuote = InStr(ixOpeningQuote + 1, strContent, strQUOTE)
            If ixClosingQuote <= 0 Then
                Err.Raise vbObjectError, Description:="Mal-formed object (the object name does not end with a quote)"
            End If
    
            If ixClosingQuote - ixOpeningQuote - Len(strQUOTE) = 0 Then
                Err.Raise vbObjectError, Description:="Mal-formed object (the object name is blank)"
            End If
    
            Dim strName: strName = Mid(strContent, ixOpeningQuote + Len(strQUOTE), ixClosingQuote - ixOpeningQuote - Len(strQUOTE))
    
            ' The next thing after the quote should be the colon
    
            Dim ixNameValueSeparator As Long: ixNameValueSeparator = InStr(ixClosingQuote + Len(strQUOTE), strContent, strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR)
    
            If ixNameValueSeparator <= 0 Then
                Err.Raise vbObjectError, Description:="Mal-formed object (missing '" & strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR & "')"
            End If
    
            ' Check that there was nothing between the closing quote and the colon
    
            strRemainder = Trim(Mid(strContent, ixClosingQuote + Len(strQUOTE), ixNameValueSeparator - ixClosingQuote - Len(strQUOTE)))
            If Len(strRemainder) > 0 Then
                Err.Raise vbObjectError, Description:="Mal-formed object (unexpected content between name and '" & strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR & "')"
            End If
    
            ' What comes after the colon is the value, which may or may not be quoted (e.g. numbers are not quoted).
            ' If the very next thing we see is a quote, then it's a quoted value, and we need to find the matching
            ' closing quote while ignoring any commas inside the quoted value.
            ' If the next thing we see is NOT a quote, then it must be an unquoted value, and we can scan directly
            ' for the next comma.
            ' Either way, we're looking for a quote or a comma, whichever comes first (or neither, in which case we
            ' have the last - unquoted - value).
    
            ixOpeningQuote = InStr(ixNameValueSeparator + Len(strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR), strContent, strQUOTE)
            Dim ixPropertySeparator As Long: ixPropertySeparator = InStr(ixNameValueSeparator + Len(strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR), strContent, strLIST_DELIMITER)
    
            If ixOpeningQuote > 0 And ixPropertySeparator > 0 Then
                ' Only use whichever came first
                If ixOpeningQuote < ixPropertySeparator Then
                    ixPropertySeparator = 0
                Else
                    ixOpeningQuote = 0
                End If
            End If
    
            Dim strValue As String
            Dim vValue As Variant
    
            If ixOpeningQuote <= 0 Then ' it's not a quoted value
    
                If ixPropertySeparator <= 0 Then ' there's no next value; this is the last one
                    strValue = Trim(Mid(strContent, ixNameValueSeparator + Len(strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR)))
                    ixPos = Len(strContent) + 1
                Else ' this is not the last value
                    strValue = Trim(Mid(strContent, ixNameValueSeparator + Len(strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR), ixPropertySeparator - ixNameValueSeparator - Len(strOBJECT_PROPERTY_NAME_VALUE_SEPARATOR)))
                    ixPos = ixPropertySeparator + Len(strLIST_DELIMITER)
                End If
    
                vValue = ParseUnquotedValue(strValue)
    
            Else ' It is a quoted value
    
                ' Find the corresponding closing quote, which should be the very next one
    
                ixClosingQuote = InStr(ixOpeningQuote + Len(strQUOTE), strContent, strQUOTE)
    
                If ixClosingQuote <= 0 Then
                    Err.Raise vbObjectError, Description:="Mal-formed object (the value does not end with a quote)"
                End If
    
                strValue = Mid(strContent, ixOpeningQuote + Len(strQUOTE), ixClosingQuote - ixOpeningQuote - Len(strQUOTE))
                vValue = ParseQuotedValue(strValue)
    
                ' Re-scan for the property separator, in case we hit one that was part of the quoted value
                ixPropertySeparator = InStr(ixClosingQuote + Len(strQUOTE), strContent, strLIST_DELIMITER)
    
                If ixPropertySeparator <= 0 Then ' this was the last value
    
                    ' Check that there's nothing between the closing quote and the end of the text
                    strRemainder = Trim(Mid(strContent, ixClosingQuote + Len(strQUOTE)))
                    If Len(strRemainder) > 0 Then
                        Err.Raise vbObjectError, Description:="Mal-formed object (there is content after the last value)"
                    End If
    
                    ixPos = Len(strContent) + 1
    
                Else ' this is not the last value
    
                    ' Check that there's nothing between the closing quote and the property separator
                    strRemainder = Trim(Mid(strContent, ixClosingQuote + Len(strQUOTE), ixPropertySeparator - ixClosingQuote - Len(strQUOTE)))
                    If Len(strRemainder) > 0 Then
                        Err.Raise vbObjectError, Description:="Mal-formed object (there is content after the last value)"
                    End If
    
                    ixPos = ixPropertySeparator + Len(strLIST_DELIMITER)
    
                End If
    
            End If
    
            ParseObjectContent.Add strName, vValue
    
        Loop
    
    End Function
    
    Private Function ParseUnquotedValue(ByVal strValue As String) As Variant
    
        If StrComp(strValue, strNULL_VALUE, vbTextCompare) = 0 Then
            ParseUnquotedValue = Empty
        ElseIf StrComp(strValue, strTRUE_VALUE, vbTextCompare) = 0 Then
            ParseUnquotedValue = True
        ElseIf StrComp(strValue, strFALSE_VALUE, vbTextCompare) = 0 Then
            ParseUnquotedValue = False
        ElseIf IsNumeric(strValue) Then
            ParseUnquotedValue = CDbl(strValue)
        Else
            Err.Raise vbObjectError, Description:="Mal-formed value (not null, true, false or a number)"
        End If
    
    End Function
    
    Private Function ParseQuotedValue(ByVal strValue As String) As Variant
    
        ' Both dates and strings are quoted; we'll treat it as a date if it has the expected date format.
        ' Dates are in the form:
        '    2019-09-30T00:00:00
        If strValue Like "####-##-##T##:00:00" Then
            ' NOTE: we just want the date part
            ParseQuotedValue = CDate(Left(strValue, Len("####-##-##")))
        Else
            ParseQuotedValue = strValue
        End If
    
    End Function
    

    A simple test:

    Const strJSON As String = "[{""property1"":""foo""}]"
    Dim oObjects As Collection: Set oObjects = Json.ParseListOfObjects(strJSON)
    
    MsgBox oObjects(1)("property1") ' shows "foo"
    

提交回复
热议问题