Improving the performance of FOR loop

后端 未结 3 798
时光取名叫无心
时光取名叫无心 2020-12-04 00:49

I am comparing sheets in a workbook. The workbook has two sheets named PRE and POST with the same 19 columns in each. The number of rows varies every day but are same for th

相关标签:
3条回答
  • 2020-12-04 01:04

    Any references to a worksheet cells is slow. This adds up dramatically when you do it in a loop. The best speed increase will come from limiting these worksheet references.

    One good way is to copy the data in Variant Arrays, and loop over these, building a new Variant Array with the data to be kept. Then place the new array over the old in one go in one go.

    Using a test data set of 200,000 rows, 20 columns, 50% text, 50% numbers, deleting 170,000 rows: this code runs in about 30s on my hardware

    Sub Mine2()
        Dim T1 As Long, T2 As Long, T3 As Long
    
        Dim ResDelete As Boolean
        Dim iPRE As Long, iPOST As Long
        Dim EventState  As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
        Dim iCntr As Long, y As Long, iRows As Long
        Dim rPre As Range, rPost As Range
    
        Dim PreDat As Variant, PostDat As Variant, PreDelDat As Variant, PostDelDat As Variant
    
        Dim n As Long
        Dim wsPre As Worksheet, wsPost As Worksheet
    
        Set wsPre = ActiveWorkbook.Worksheets("PRE")
        With wsPre
            Set rPre = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
            PreDat = rPre.Value
            iPRE = UBound(PreDat, 1)
            'MsgBox iPRE
        End With
    
        Set wsPost = ActiveWorkbook.Worksheets("POST")
        With wsPost
            Set rPost = .Range(.Cells(1, .Columns.Count).End(xlToLeft), .Cells(.Rows.Count, 1).End(xlUp))
            PostDat = rPost.Value
            iPOST = UBound(PostDat, 1)
            'MsgBox iPOST
        End With
    
        If iPRE <> iPOST Then
            MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
            Exit Sub
        End If
        iRows = iPRE
    
    
        ReDim PreDelDat(1 To UBound(PreDat, 1), 1 To UBound(PreDat, 2))
        ReDim PostDelDat(1 To UBound(PostDat, 1), 1 To UBound(PostDat, 2))
        n = 1
        On Error GoTo EH:
     'Optimize Performance
    
        Application.ScreenUpdating = False
        EventState = Application.EnableEvents
        Application.EnableEvents = False
    
        CalcState = Application.Calculation
        Application.Calculation = xlCalculationManual
    
        PageBreakState = ActiveSheet.DisplayPageBreaks
        ActiveSheet.DisplayPageBreaks = False
    
    
        T1 = GetTickCount
        For y = 1 To UBound(PreDat, 2)
            PreDelDat(1, y) = PreDat(1, y)
            PostDelDat(1, y) = PostDat(1, y)
        Next
    
        n = 2
        For iCntr = 2 To UBound(PreDat, 1)
            ResDelete = True
            For y = 1 To UBound(PreDat, 2)
                If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
                    ResDelete = False
                    Exit For
                End If
            Next y
    
            If Not ResDelete Then
                For y = 1 To UBound(PreDat, 2)
                    PreDelDat(n, y) = PreDat(iCntr, y)
                    PostDelDat(n, y) = PostDat(iCntr, y)
                Next
                n = n + 1
            End If
        Next iCntr
        T2 = GetTickCount
        Debug.Print "Compare Done in:", T2 - T1
        Debug.Print "Rows to delete:", n - 1
    
        rPre = PreDelDat
        rPost = PostDelDat
    
        T3 = GetTickCount
        Debug.Print "Delete Done In:", T3 - T1
    CleanUp:
        'Revert optmizing lines
        On Error Resume Next
        ActiveSheet.DisplayPageBreaks = PageBreakState
        Application.Calculation = CalcState
        Application.EnableEvents = EventState
        Application.ScreenUpdating = True
    Exit Sub
    EH:
        ' Handle Errors here
        Debug.Assert False
        Resume
        Err.Clear
        Resume CleanUp
    End Sub
    

    Original:

    One good way is to copy the data in Variant Arrays, and loop over these, building a reference to cells to delete later. Then do the delete in one go.

    Other general tips:

    • Declare all variables
    • Use more appropriate data types (Long, Boolean)
    • Use End(xlUp) to avoid failing at unexpected blanks (unless you want to stop at the first blank)

    Refactored code:

    Sub Demo()
        Dim ResDelete As Boolean
        Dim iPRE As Long, iPOST As Long
        Dim EventState  As Boolean, CalcState As XlCalculation, PageBreakState As Boolean
        Dim iCntr As Long, y As Long, iRows As Long
        Dim rPreDelete As Range, rPostDelete As Range
    
        Dim PreDat As Variant, PostDat As Variant
    
        With ActiveWorkbook.Worksheets("PRE")
            PreDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
            iPRE = UBound(PreDat, 1)
            'MsgBox iPRE
        End With
    
        With ActiveWorkbook.Worksheets("POST")
            PostDat = .Range(.Cells(1, 20), .Cells(.Rows.Count, 1).End(xlUp)).Value
            iPOST = UBound(PostDat, 1)
            'MsgBox iPOST
        End With
    
        If iPRE <> iPOST Then
            MsgBox "The number of rows in PRE and POST sheets are not the same. The macro quits"
            Exit Sub
        End If
        iRows = iPRE
    
        On Error GoTo EH:
     'Optimize Performance
    
        Application.ScreenUpdating = False
        EventState = Application.EnableEvents
        Application.EnableEvents = False
    
        CalcState = Application.Calculation
        Application.Calculation = xlCalculationManual
    
        PageBreakState = ActiveSheet.DisplayPageBreaks
        ActiveSheet.DisplayPageBreaks = False
    
        For iCntr = 2 To UBound(PreDat, 1)
            ResDelete = True
            For y = 1 To 20
                If PreDat(iCntr, y) <> PostDat(iCntr, y) Then
                    ResDelete = False
                    Exit For
                End If
            Next y
    
            If ResDelete Then
                If rPreDelete Is Nothing Then
                    Set rPreDelete = Worksheets("PRE").Rows(iCntr)
                    Set rPostDelete = Worksheets("POST").Rows(iCntr)
                Else
                    Set rPreDelete = Application.Union(rPreDelete, Worksheets("PRE").Rows(iCntr))
                    Set rPostDelete = Application.Union(rPostDelete, Worksheets("POST").Rows(iCntr))
                End If
            End If
        Next iCntr
        If Not rPreDelete Is Nothing Then
            rPreDelete.Delete
            rPostDelete.Delete
        End If
    
    CleanUp:
        'Revert optmizing lines
        On Error Resume Next
        ActiveSheet.DisplayPageBreaks = PageBreakState
        Application.Calculation = CalcState
        Application.EnableEvents = EventState
        Application.ScreenUpdating = True
    Exit Sub
    EH:
        ' Handle Errors here
    
        Resume CleanUp
    End Sub
    
    0 讨论(0)
  • 2020-12-04 01:12

    Perhaps you can go with 2 adjustments, although their performance impact will be only very small:

    ' prepare references to worksheets
    Dim WorksheetPRE As Worksheet
    Dim WorksheetPOST As Worksheet
    Set WorksheetPRE = ActiveWorkbook.Worksheets("PRE")
    Set WorksheetPOST = ActiveWorkbook.Worksheets("POST")
    

    and then, in your code, replace ActiveWorkbook.Worksheets("PRE") for WorksheetPRE etc.

    I think no other significant optimizations are possible when you stay in Excel. Remember, Microsoft Excel is primarily a spreadsheet calculator, not a data-table processing tool.

    If I really needed to speed-up the comparisons, then I would go with one of these approaches:

    • link Excel worksheet to Microsoft Access as table and perform the comparison in Access (easiest)

    • as the above, but instead of linking the table, import it

    • as the above two, but use Microsoft SQL Server (Express version is free)

    0 讨论(0)
  • 2020-12-04 01:14

    If I may put my two cents in, here is my suggestion.

    I've tested the original code (with the the only alteration being the For y = 1 to 10 instead of For y = 1 to 20) and my code against 2 sheets with 10 columns and (initially 500,000) 250,000 rows data each. The reason I've used 10 instead of 20 lies in the fact that I don't know what data is in the columns and as a substitute I have used a random value of either 1 or 2.

    • For 10 columns that means that there are 2^10 = 1,024 possibilities.
    • For 20 columns that means that there are 2^20 = 1,048,576 possibilities.

    As I wanted to have at least the possibility of a few equal rows in each table I opted for the 10 column scenario.

    To time the macro I set up a timer macro which calls the macro to compare and delete data.

    In order to be able to compare the results both macros were executed directly after starting Excel and opening the file with the exact same data.

    I have

    • avoided all instances of Active
    • minimized the reading and writing of data between Excel and VBA which is accomplished by collecting all data on a sheet in a 2D array and then analyzing the array.
    • collected the rows to delete in ranges (1 per sheet) and deleted all the rows to be deleted outside the loop

    The Code

    Sub CompareAndDelete()
        Dim WsPre As Worksheet, WsPost As Worksheet
        Dim Row As Long, Column As Long
        Dim ArrPre() As Variant, ArrPost() As Variant
        Dim DeleteRow As Boolean
        Dim DeletePre As Range, DeletePost As Range
    
        With Application
            .ScreenUpdating = False
            .Calculation = xlCalculationManual
            .EnableEvents = False
        End With
    
        With ThisWorkbook
            Set WsPre = .Worksheets("PRE")
            Set WsPost = .Worksheets("Post")
        End With
    
        ArrPre = WsPre.Range(WsPre.Cells(1, 1), WsPre.Cells(WsPre.Cells(WsPre.Rows.Count, 1).End(xlUp).Row, 20))
        ArrPost = WsPost.Range(WsPost.Cells(1, 1), WsPost.Cells(WsPost.Cells(WsPost.Rows.Count, 1).End(xlUp).Row, 20))
    
        If Not UBound(ArrPre, 1) = UBound(ArrPost, 1) Then
            MsgBox "Unequal number of rows in sheets PRE and POST. Exiting macro.", vbCritical, "Unequal sheets"
        Else
    
            For Row = 2 To UBound(ArrPre, 1)
                DeleteRow = True
                For Column = 1 To UBound(ArrPre, 2)
                    If Not ArrPre(Row, Column) = ArrPost(Row, Column) Then
                        DeleteRow = False
                        Exit For
                    End If
                Next Column
    
                If DeleteRow = True Then
                    If DeletePre Is Nothing Then
                        Set DeletePre = WsPre.Rows(Row)
                        Set DeletePost = WsPost.Rows(Row)
                    Else
                        Set DeletePre = Union(DeletePre, WsPre.Rows(Row))
                        Set DeletePost = Union(DeletePost, WsPost.Rows(Row))
                    End If
    
                End If
            Next Row
    
            If Not DeletePre Is Nothing Then DeletePre.Delete
            If Not DeletePost Is Nothing Then DeletePost.Delete
    
        End If
    
        With Application
            .ScreenUpdating = True
            .Calculation = xlCalculationAutomatic
            .EnableEvents = True
        End With
    
    End Sub
    

    Results

    My Code - 500,000 rows of data.

    Datasheet with 500.000 rows and 10 columns has been processed in 14,23 seconds, 561 rows have been found equal and have been deleted.

    Original Code - 500,000 rows of data.

    Unfortunately my system couldn't handle this task and Excel stopped working.


    My Code - 250,000 rows of data.

    Datasheet with 250.000 rows and 10 columns has been processed in 4,72 seconds, 313 rows have been found equal and have been deleted.

    Original Code - 250,000 rows of data.

    Datasheet with 250.000 rows and 10 columns has been processed in 14,07 seconds, 313 rows have been found equal and have been deleted.

    0 讨论(0)
提交回复
热议问题