Remove duplicate Outlook items from a folder

后端 未结 4 750
北荒
北荒 2021-02-06 10:25

issue

  1. Outlook 2016 corrupted while I was moving items from an online archive into a pst file.
  2. The PST file has been recovered .... but many
相关标签:
4条回答
  • 2021-02-06 10:56

    The approach below:

    1. Provides users with a prompt to select the folder to process
    2. Checks duplicates on the base of Subject, Sender, CreationTime and Size
    3. Moved (rather than delete) any duplicates into a sub-folder (removed items) of the folder being processed.
    4. Create a CSV file - stored under the path in StrPath to create a external reference to Outlook of the emails that have been moved.

    Updated: Checking for size surprisingly missed a number of dupes, even for otherwise identical mail items. I have changed the test to subject and body

    Tested on Outlook 2016

    Const strPath = "c:\temp\deleted msg.csv"
    Sub DeleteDuplicateEmails()
    
    Dim lngCnt As Long
    Dim objMail As Object
    Dim objFSO As Object
    Dim objTF As Object
    
    Dim objDic As Object
    Dim objItem As Object
    Dim olApp As Outlook.Application
    Dim olNS As NameSpace
    Dim olFolder As Folder
    Dim olFolder2 As Folder
    Dim strCheck As String
    
    Set objDic = CreateObject("scripting.dictionary")
    Set objFSO = CreateObject("scripting.filesystemobject")
    Set objTF = objFSO.CreateTextFile(strPath)
    objTF.WriteLine "Subject"
    
    Set olApp = Outlook.Application
    Set olNS = olApp.GetNamespace("MAPI")
    Set olFolder = olNS.PickFolder
    
    If olFolder Is Nothing Then Exit Sub
    
    On Error Resume Next
    Set olFolder2 = olFolder.Folders("removed items")
    On Error GoTo 0
    
    If olFolder2 Is Nothing Then Set olFolder2 = olFolder.Folders.Add("removed items")
    
    
    For lngCnt = olFolder.Items.Count To 1 Step -1
    
    Set objItem = olFolder.Items(lngCnt)
    
    strCheck = objItem.Subject & "," & objItem.Body & ","
    strCheck = Replace(strCheck, ", ", Chr(32))
    
        If objDic.Exists(strCheck) Then
           objItem.Move olFolder2
           objTF.WriteLine Replace(objItem.Subject, ", ", Chr(32))
        Else
            objDic.Add strCheck, True
        End If
    Next
    
    If objTF.Line > 2 Then
        MsgBox "duplicate items were removed to ""removed items""", vbCritical, "See " & strPath & " for details"
    Else
        MsgBox "No duplicates found"
    End If
    End Sub
    
    0 讨论(0)
  • 2021-02-06 10:56

    Here's a script that takes advantage of sorting emails to check for duplicates much more efficiently.

    There's no need to maintain a giant dictionary of every email you've seen if you are processing emails in a deterministic order (e.g. received date). Once the date changes, you know you'll never see another email with the prior date, therefore, they won't be duplicates, so you can clear your dictionary on each date change.

    This script also takes into account the fact that some items use an HTMLBody for the full message definition, and others don't have that property.

    Sub DeleteDuplicateEmails()
        Dim allMails As Outlook.Items
        Dim objMail As Object, objDic As Object, objLastMail As Object
        Dim olFolder As Folder, olDuplicatesFolder As Folder
        Dim strCheck As String
        Dim received As Date, lastReceived As Date        
    
        Set objDic = CreateObject("scripting.dictionary")
        With Outlook.Application.GetNamespace("MAPI")
            Set olFolder = .PickFolder
        End With
        If olFolder Is Nothing Then Exit Sub
    
        On Error Resume Next
        Set olDuplicatesFolder = olFolder.Folders("Duplicates")
        On Error GoTo 0
        If olDuplicatesFolder Is Nothing Then Set olDuplicatesFolder = olFolder.Folders.Add("Duplicates")
    
        Debug.Print "Sorting " & olFolder.Name & " by ReceivedTime..."
        Set allMails = olFolder.Items
        allMails.Sort "[ReceivedTime]", True
        Dim totalCount As Long, index As Long
        totalCount = allMails.count
        Debug.Print totalCount & " Items to Process..."
    
        lastReceived = "1/1/1987"
        For index = totalCount - 1 To 1 Step -1
            Set objMail = allMails(index)
            received = objMail.ReceivedTime
            If received < lastReceived Then
                Debug.Print "Error: Expected emails to be in order of date recieved. Previous mail was " & lastReceived _
                    & " current is " & received
                Exit Sub
            ElseIf received = lastReceived Then
                ' Might be a duplicate track mail contents until this recieved time changes.
                ' Add the last mail to the dictionary if it hasn't been tracked yet
                If Not objLastMail Is Nothing Then
                    Debug.Print "Found multiple emais recieved at " & lastReceived & ", checking for duplicates..."
                    objDic.Add GetMailKey(objLastMail), True
                End If
                ' Now check the current mail item to see if it's a duplicate
                strCheck = GetMailKey(objMail)
                If objDic.Exists(strCheck) Then
                    Debug.Print "Found Duplicate: """ & objMail.Subject & " on " & lastReceived
                    objMail.Move olDuplicatesFolder
                    DoEvents
                Else
                    objDic.Add strCheck, True
                End If
                ' No need to track the last mail, since we have it in the dictionary
                Set objLastMail = Nothing
            Else
                ' This can't be a duplicate, it has a different date, reset our dictionary
                objDic.RemoveAll
                lastReceived = received
                ' Keep track of this mail in case we end up needing to build a dictionary
                Set objLastMail = objMail
            End If
    
            ' Progress update
            If index Mod 10 = 0 Then
                Debug.Print index & " Remaining..."
            End If
            DoEvents
        Next
        Debug.Print "Finished moving Duplicate Emails"
    End Sub
    

    And the helper function referenced above for "uniquely identifying" an email. Adapt as needed, but I think if the subject and full body are the same, there's no point in checking anything else. Also works for calendar invites, etc.:

    Function GetMailKey(ByRef objMail As Object) As String
        On Error GoTo NoHTML
        GetMailKey = objMail.Subject & objMail.HTMLBody
        Exit Function
    BodyKey:
        On Error GoTo 0
        GetMailKey = objMail.Subject & objMail.Body
        Exit Function
    NoHTML:
        Err.Clear
        Resume BodyKey
    End Function
    
    0 讨论(0)
  • 2021-02-06 11:15

    Dears, thank you VERY MUCH you saved my day :-) I have simplified the duplicate search as in my case I imported muliple duplicates from PST files but the full mail body didn't matched, I don't know exactly why, as I am sure those mail are true duplicates So my simplification is to match ONLY the receive TIME STAMP and the SUBJECT I also add an error exception I got some times on the function: Set olDuplicatesFolder = olFolder.Folders("Duplicates") And did different format for the debug.print messages So here's my code, that works very well for me. THANK YOU

    Attribute VB_Name = "DelDupEmails_DATE_SUBJECT"
    Sub DeleteDuplicateEmails_DATE_SUBJECT()
    Dim allMails As Outlook.Items
    Dim objMail As Object, objDic As Object, objLastMail As Object
    Dim olFolder As Folder, olDuplicatesFolder As Folder
    Dim strCheck As String
    Dim received As Date, lastReceived As Date
    
    Set objDic = CreateObject("scripting.dictionary")
    With Outlook.Application.GetNamespace("MAPI")
        Set olFolder = .PickFolder
    End With
    If olFolder Is Nothing Then Exit Sub
    
    On Error Resume Next
    Set olDuplicatesFolder = olFolder.Folders("Duplicates")
    On Error GoTo 0
    If olDuplicatesFolder Is Nothing Then Set olDuplicatesFolder = olFolder.Folders.Add("Duplicates")
    
    Debug.Print "Sorting " & olFolder.Name & " by ReceivedTime..."
    Set allMails = olFolder.Items
    allMails.Sort "[ReceivedTime]", True
    Dim totalCount As Long, index As Long
    totalCount = allMails.Count
    Debug.Print totalCount & " Items to Process..."
    'MsgBox totalCount & " Items to Process..."
    
    lastReceived = "1/1/1987"
    For index = totalCount - 1 To 1 Step -1
        Set objMail = allMails(index)
        On Error Resume Next
        received = objMail.ReceivedTime
        On Error GoTo 0
        If received < lastReceived Then
            Debug.Print "Error: Expected emails to be in order of date recieved. Previous mail was " & lastReceived _
                & " current is " & received
            Exit Sub
        ElseIf received = lastReceived Then
            ' Might be a duplicate track mail contents until this recieved time changes.
            ' Add the last mail to the dictionary if it hasn't been tracked yet
            If Not objLastMail Is Nothing Then
                Debug.Print olFolder & " : Found multiple emails recieved at " & lastReceived & ", checking for duplicates..."
                'MsgBox "Found multiple emails recieved at " & lastReceived & ", checking for duplicates..."
                objDic.Add GetMailKey(objLastMail), True
            End If
            ' Now check the current mail item to see if it's a duplicate
            strCheck = GetMailKey(objMail)
            If objDic.Exists(strCheck) Then
                Debug.Print "#" & index & " - Duplicate: " & lastReceived & " " & objMail.Subject
                'Debug.Print "Found Duplicate: """ & objMail.Subject & " on " & lastReceived
                'MsgBox "Found Duplicate: """ & objMail.Subject & " on " & lastReceived
                objMail.Move olDuplicatesFolder
                DoEvents
            Else
                objDic.Add strCheck, True
            End If
            ' No need to track the last mail, since we have it in the dictionary
            Set objLastMail = Nothing
        Else
            ' This can't be a duplicate, it has a different date, reset our dictionary
            objDic.RemoveAll
            lastReceived = received
            ' Keep track of this mail in case we end up needing to build a dictionary
            Set objLastMail = objMail
        End If
    
        ' Progress update
        If index Mod 100 = 0 Then
            Debug.Print index & " Remaining... from " & olFolder
            'MsgBox index & " Remaining..."
        End If
        DoEvents
    Next
    Debug.Print "Finished moving Duplicate Emails"
    MsgBox "Finished moving Duplicate Emails"
    
    End Sub
    
    Function GetMailKey(ByRef objMail As Object) As String
      On Error GoTo NoHTML
      'GetMailKey = objMail.Subject & objMail.HTMLBody
      GetMailKey = objMail.Subject ' & objMail.HTMLBody
      Exit Function
    BodyKey:
      On Error GoTo 0
      'GetMailKey = objMail.Subject & objMail.Body
      GetMailKey = objMail.Subject ' & objMail.Body
      Exit Function
    NoHTML:
      Err.Clear
      Resume BodyKey
    End Function
    
    0 讨论(0)
  • 2021-02-06 11:20

    I've wrote a VBA script called "Outlook Duplicated Items Remover"

    The source code is available on GitHub

    It will find all duplicated items in a folder and its subfolders and move them to a dedicated folder

    0 讨论(0)
提交回复
热议问题