How to reduce the time cost while looping in a 4 million array in VBA?

后端 未结 2 850
不思量自难忘°
不思量自难忘° 2021-02-11 04:59

I need to do the ‘vlookup’ function using the VBA. I need to lookup data from a access database containg 4.6 million records.

Private Sub connectDB()
 Dim sqlst         


        
相关标签:
2条回答
  • 2021-02-11 05:46

    Although scripting dictionary is pretty fast at performing lookups, loading one up gets progressively slower the more you put into it: when you're in the millions of items then it gets really slow to load.

    So, you could maybe consider something like splitting your values across an array of dictionary objects. All other excellent suggestions aside, this would at least reduce your runtime with your current setup. The only caveat is the "keys" queried from your database would need to be unique.

    Sub LookupTester()
    
        Const NUM_VALUES As Long = 4000000# '<< size of total dataset
        Const MAX_PER_DICT As Long = 400000 '<< max # of entries per dictionary
    
        Dim numDicts As Long, i As Long, n, t, d, v, r, c As Long
        Dim arrD() As Scripting.Dictionary
    
        numDicts = Application.Ceiling(NUM_VALUES / MAX_PER_DICT, 1)
        ReDim arrD(1 To numDicts)
        'initialize the array of dictionaries
        For n = 1 To numDicts
            Set arrD(n) = New Scripting.Dictionary
        Next n
    
        t = Timer
        n = 1
        c = 0
        Set d = arrD(n)
    
        'Load up some dummy data...
        For i = 1 To NUM_VALUES
            d("Value_" & i) = i
            c = c + 1
            If i Mod 400000 = 0 Then Debug.Print "Store", i, Timer - t 'check progresss
            If c = MAX_PER_DICT Then
                n = n + 1
                If i <> NUM_VALUES Then Set d = arrD(n)'<< next dict
                c = 0
            End If
        Next i
        'Done storing: 87 sec in my test
    
    
        t = Timer
        Randomize
        'perform a million lookups
        For i = 1 To 1000000#
            v = "Value_" & CLng(Rnd() * NUM_VALUES)
            For n = 1 To numDicts
                If arrD(n).Exists(v) Then
                    r = arrD(n)(v) '<< lookup result
                    Exit For
                End If
            Next n
            If i Mod 100000 = 0 Then Debug.Print "Query", i, Timer - t
        Next i
        'Done querying: ~320 sec to run the queries
    
    End Sub
    

    You could wrap up this type of thing into a nice class...

    0 讨论(0)
  • 2021-02-11 05:54

    You could reduce significantly the lookup time by implementing your own hashtable/dictionary.

    Here's an example indexing a 4 millions array under 5 seconds:

    Private Declare PtrSafe Function RtlComputeCrc32 Lib "ntdll.dll" ( _
      ByVal start As Long, ByVal data As LongPtr, ByVal size As Long) As Long
    
    Sub Example()
      Dim data(), slots() As Long, i As Long
    
      ' generate some records '
    
      ReDim data(0 To 1, 0 To 4000000)
      For i = 0 To UBound(data, 2)
        data(0, i) = CStr(i)
      Next
    
      ' index all the keys from column 1 '
    
      MapKeys slots, data, column:=0
    
      ' lookup a key in column 1 '
    
      i = IndexOfKey(slots, data, column:=0, key:="4876")
    
      If i >= 0 Then
        Debug.Print "Found at index " & i
      Else
        Debug.Print "Missing"
      End If
    
    End Sub
    
    
    Public Sub MapKeys(slots() As Long, data(), column As Long)
      Dim bucketsCount&, key$, r&, i&, s&, h&      
      bucketsCount = UBound(data, 2) * 0.9   ' n * load factor '
      ReDim slots(0 To UBound(data, 2) + bucketsCount)
    
      For r = 0 To UBound(data, 2) ' each record '
        key = data(column, r)
        h = RtlComputeCrc32(0, StrPtr(key), LenB(key)) And &H7FFFFFF  ' get hash '
        s = UBound(slots) - (h Mod bucketsCount)                      ' get slot '
        Do
          i = slots(s) - 1& ' get index (base 0) '
    
          If i >= 0& Then  ' if index for hash '
            If data(column, i) = data(column, r) Then Exit Do  ' if key present, handle next record '
          Else
            slots(s) = r + 1&  ' add index (base 1) '
            Exit Do
          End If
    
          s = i  ' collision, index points to the next slot '
        Loop
      Next
    End Sub
    
    Public Function IndexOfKey(slots() As Long, data(), column As Long, key As String) As Long
      Dim h&, s&, i&
      h = RtlComputeCrc32(0, StrPtr(key), LenB(key)) And &H7FFFFFF    ' get hash  '
      s = UBound(slots) - (h Mod (UBound(slots) - UBound(data, 2)))   ' get slot  '
      i = slots(s) - 1&                                               ' get index (base 0) '
    
      Do While i >= 0&
        If data(column, i) = key Then Exit Do  ' break if same key '
        i = slots(i) - 1&                      ' collision, index points to the next slot '
      Loop
    
      IndexOfKey = i
    End Function
    
    0 讨论(0)
提交回复
热议问题