VBA Macro to download multiple files from links in IE

让人想犯罪 __ 提交于 2019-12-01 13:59:57

Below is a quite common example I adapted for your case, it shows the usage of XHR and RegEx to retrieve webpage HTML content, extract all links from it, and download each link's target file:

Option Explicit

Sub Test()
    ' declare vars
    Dim sUrl As String
    Dim sReqProt As String
    Dim sReqAddr As String
    Dim sReqPath As String
    Dim sContent As String
    Dim oLinks As Object
    Dim oMatch As Object
    Dim sHref As String
    Dim sHrefProt As String
    Dim sHrefAddr As String
    Dim sHrefPath As String
    Dim sHrefFull As String
    Dim n As Long
    Dim aContent() As Byte
    ' set source URL
    sUrl = "https:\\......\links.html"
    ' process source URL
    SplitUrl sUrl, sReqProt, sReqAddr, sReqPath
    If sReqProt = "" Then sReqProt = "http:"
    sUrl = sReqProt & "//" & sReqAddr & "/" & sReqPath
    ' retrieve source page HTML content
    With CreateObject("Microsoft.XMLHTTP")
        .Open "GET", sUrl, False
        sContent = .ResponseText
    End With
    ' parse source page HTML content to extract all links
    Set oLinks = CreateObject("Scripting.Dictionary")
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = "<a.*?href *= *(?:'|"")(.*?)(?:'|"").*?>"
        For Each oMatch In .Execute(sContent)
            sHref = oMatch.subMatches(0)
            SplitUrl sHref, sHrefProt, sHrefAddr, sHrefPath
            If sHrefProt = "" Then sHrefProt = sReqProt
            If sHrefAddr = "" Then sHrefAddr = sReqAddr
            sHrefFull = sHrefProt & "//" & sHrefAddr & "/" & sHrefPath
            oLinks(oLinks.Count) = sHrefFull
    End With
    ' save each link target into file
    For Each n In oLinks
        sHref = oLinks(n)
        With CreateObject("Microsoft.XMLHTTP")
            .Open "GET", sHref, False
            aContent = .ResponseBody
        End With
        With CreateObject("ADODB.Stream")
            .Type = 1 ' adTypeBinary
            .Write aContent
            .SaveToFile "C:\Test\" & n & ".xml", 2 ' adSaveCreateOverWrite
        End With
End Sub

Sub SplitUrl(sUrl, sProt, sAddr, sPath)
    ' extract protocol, address and path from URL
    Dim aSplit
    aSplit = Split(sUrl, "//")
    If UBound(aSplit) = 0 Then
        sProt = ""
        sAddr = sUrl
        sProt = aSplit(0)
        sAddr = aSplit(1)
    End If
    aSplit = Split(sAddr, "/")
    If UBound(aSplit) = 0 Then
        sPath = sAddr
        sAddr = ""
        sPath = Mid(sAddr, Len(aSplit(0)) + 2)
        sAddr = aSplit(0)
    End If
End Sub

This method doesn't employ IE automation. Usually the IE's cookies which Microsoft.XMLHTTP processes are sufficient to refer to the current session, so if your website doesn't use additional procedures for authentication and generation the list of the links then the method should work for you.
