How to suppress cookie request

左心房为你撑大大i 提交于 2019-11-28 05:54:46


I am using vba inside of Excel 2013 to scrape data off of Yahoo Option Contract and while I do get the data, I also get multiple requests to accept a cookie (see dialog below).

I tried accepting this to see if it would prevent further popups but no such luck. How can I suppress the dialog?

As an aside, I'm pretty sure there is an api for yahoo_option_contract that would serve up some cookie free xml but I couldn't get it to work. Can anyone verify that is does work and provide a link that explains how to use it?



Here is a sample link to yahoo's site. It also happens that I show most of my code and scrape strategy at the bottom of a previous SO post


Set http = New MSXML2.XMLHTTP60
With http
    .Open "GET", aUrl, False
    Do Until .readyState = 4
End With

Select Case http.Status
    Case Is = 200
        Set GetHttp = http
    Case Else
        err.Raise Number:=ERR_WEB_CONNECTION, _
            Description:="Bad Response " & http.Status & mStrings.Bracket(http.statusText)
End Select


Try VBA code below to retrieve HTML content of the page via XHR, parse it with RegEx and output to worksheet:

Option Explicit

Sub Scrape_Yahoo_Option_Contract()

    Dim sUrl As String
    Dim aHeaders
    Dim sResp As String
    Dim sContent
    Dim oTables As Object
    Dim oRows As Object
    Dim aData()
    Dim i As Long

    ' Get data
    sUrl = ""
    aHeaders = Array( _
        Array("user-agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36") _
    XmlHttpRequest "GET", sUrl, aHeaders, "", "", sResp
    ' Parse tables
    ParseToDict "(<table class=""[^""]*?W\(100%\)[^>]*>)([\s\S]*?)</table>", sResp, oTables
    ' Parse rows
    For Each sContent In oTables.Items
        ParseToDict "<tr><td>(.*?)</td><td>(.*?)</td></tr>", HtmlSimplify(sContent), oRows
    ' Populate 2d array
    ReDim aData(1 To oRows.Count, 1 To 2)
    i = 1
    For Each sContent In oRows
        aData(i, 1) = GetInnerText(sContent)
        aData(i, 2) = GetInnerText(oRows(sContent))
        i = i + 1
    ' Output array to worksheet 1
    With ThisWorkbook.Sheets(1)
        Output2DArray .Cells(1, 1), aData
    End With

End Sub

Sub Output2DArray(oDstRng As Range, aCells As Variant)

    With oDstRng
        With .Resize( _
                UBound(aCells, 1) - LBound(aCells, 1) + 1, _
                UBound(aCells, 2) - LBound(aCells, 2) + 1)
            .NumberFormat = "@"
            .Value = aCells
        End With
    End With

End Sub

Sub XmlHttpRequest(sMethod As String, sUrl As String, arrSetHeaders, sFormData, sRespHeaders As String, sContent As String)

    Dim arrHeader

    'With CreateObject("Msxml2.ServerXMLHTTP.3.0")
    With CreateObject("Msxml2.XMLHTTP")
        .Open sMethod, sUrl, False
        If IsArray(arrSetHeaders) Then
            For Each arrHeader In arrSetHeaders
                .SetRequestHeader arrHeader(0), arrHeader(1)
        End If
        .Send sFormData
        sRespHeaders = .GetAllResponseHeaders
        sContent = .ResponseText
    End With

End Sub

Function HtmlSimplify(ByVal sCont)

    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = "(<[\w\/^<]*)[\s\S]*?>"
        sCont = .Replace(sCont, "$1>")
        .Pattern = "(?:<span>|</span>)"
        sCont = .Replace(sCont, "")
        .Pattern = "(?:<small>|</small>)"
        sCont = .Replace(sCont, "")
        .Pattern = "&nbsp;"
        sCont = .Replace(sCont, " ")
        .Pattern = "[\f\n\r\t\v]"
        sCont = .Replace(sCont, "")
        .Pattern = " +"
        sCont = .Replace(sCont, " ")
        .Pattern = "> <"
        sCont = .Replace(sCont, "><")
    End With
    HtmlSimplify = sCont

End Function

Sub ParseToDict(sPattern As String, sResponse As String, oDict As Object)

    Dim oMatch

    If oDict Is Nothing Then Set oDict = CreateObject("Scripting.Dictionary")
    With CreateObject("VBScript.RegExp")
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .Pattern = sPattern
        For Each oMatch In .Execute(sResponse)
            If Trim(oMatch.SubMatches(0)) <> "" Then oDict(oMatch.SubMatches(0)) = oMatch.SubMatches(1)
    End With

End Sub

Function GetInnerText(ByVal sHtml As String) As String

    Static oHtmlfile As Object

    If oHtmlfile Is Nothing Then ' init
        Set oHtmlfile = CreateObject("htmlfile")
        oHtmlfile.Write "<body></body>"
    End If
    ' Convert
    On Error Resume Next
    oHtmlfile.body.innerHTML = sHtml
    GetInnerText = oHtmlfile.body.innerText

End Function

