Scraping specific data inside a table II

后端 未结 2 867
感动是毒
感动是毒 2021-01-27 18:11

I hate that I have to ask this question again but the website I had been scraping data from updated, not just aesthetically, the underlying code has changed too. Before the upd

相关标签:
2条回答
  • 2021-01-27 18:44

    You can get the value of eps using xmlhttp requests if you send a post requests to the correct url along with appropriate parameters. When you run the script, you should get json response containing the required result. I used regex to parse the specific portion you are interested in.

    The payload is bigger than usual. However, you can change the ticker name within the variable accordingly.

    This is how you can go:

    Sub GetContent()
        Const link = "https://app-money.tmx.com/graphql"
        Dim elem As Object, payload As Variant, S$, tickerName$
        
        tickerName = "AAPL:US"      'use ticker name here
    
        payload = "{""operationName"":""getQuoteBySymbol"",""variables"":{""symbol"":""" & tickerName & """,""locale"":""en""},""query"":""query getQuoteBySymbol($symbol: String, $locale: String) {\n  getQuoteBySymbol(symbol: $symbol, locale: $locale) {\n    symbol\n    name\n    price\n    priceChange\n    percentChange\n    exchangeName\n    exShortName\n    exchangeCode\n    marketPlace\n    sector\n    industry\n    volume\n    openPrice\n    dayHigh\n    dayLow\n    MarketCap\n" & _
                "MarketCapAllClasses\n    peRatio\n    prevClose\n    dividendFrequency\n    dividendYield\n    dividendAmount\n    dividendCurrency\n    beta\n    eps\n    exDividendDate\n    shortDescription\n    longDescription\n    website\n    email\n    phoneNumber\n    fullAddress\n    employees\n    shareOutStanding\n    totalDebtToEquity\n    totalSharesOutStanding\n    sharesESCROW\n    vwap\n    dividendPayDate\n    weeks52high\n    weeks52low\n    alpha\n    averageVolume10D\n    averageVolume30D\n    averageVolume50D\n    priceToBook\n    priceToCashFlow\n    returnOnEquity\n" & _
                "returnOnAssets\n    day21MovingAvg\n    day50MovingAvg\n    day200MovingAvg\n    dividend3Years\n    dividend5Years\n    datatype\n    __typename\n  }\n}\n""}"
    
        With CreateObject("MSXML2.XMLHTTP")
            .Open "POST", link, False
            .setRequestHeader "User-Agent", "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"
            .setRequestHeader "Content-Type", "application/json"
            .send payload
            S = .responseText
        End With
        
        With CreateObject("VBScript.RegExp")
            .Global = True
            .MultiLine = True
    
            .Pattern = """eps"":(.*?),"
            Set elem = .Execute(S)
            MsgBox elem(0).SubMatches(0)
        End With
    End Sub
    
    0 讨论(0)
  • 2021-01-27 18:56

    It's a dynamic page. The content will be load by scrolling down. So you must wait for load the "first part" of the page. Then scroll down to the needed table by 1500. After scrolling down wait to load the table.

    You don't need a loop to scrape the wanted values. You can use querySelector() to get the specific element by it's attribute name and atttribute value.

    Look here for information about querySelector()
    And here for examples you can use also with querySelector(): querySelectorAll()

    This code works for me. If not for you play with the breaks and perhaps the amount of scrolling:

    Sub test()
    
    Dim ieObj As Object
    Dim nodeEps As Object
    Dim nodeDividend As Object
    Dim eps As String
    Dim dividend As String
    
      Set ieObj = CreateObject("InternetExplorer.Application")
      ieObj.Visible = True
      'ieObj.navigate "https://web.tmxmoney.com/quote.php?qm_symbol=" & Cells(c, 2) & ":US"
      ieObj.navigate "https://web.tmxmoney.com/quote.php?qm_symbol=GE:US"
      
      Do While ieObj.readyState <> 4
        Application.StatusBar = "Getting to 'Key Data' Table"
        DoEvents
      Loop
      Application.Wait (Now + TimeSerial(0, 0, 3))
      ieObj.document.parentWindow.Scroll 0, 1500
      Application.Wait (Now + TimeSerial(0, 0, 2))
      
      'scrape EPS amount
      Set nodeEps = ieObj.document.querySelector("div[data-testid='eps-value']")
      eps = Trim(nodeEps.innerText)
      'Range("H2").Value = eps
      
      'scrape dividend
      Set nodeDividend = ieObj.document.querySelector("div[data-testid='dividendAmount-value']")
      dividend = Trim(nodeDividend.innerText)
      'Range("I2").Value = dividend
      
      'Clean up
      ieObj.Quit
      Set ieObj = Nothing
      Set nodeEps = Nothing
      Set nodeDividend = Nothing
      Application.StatusBar = False
      
      MsgBox eps & Chr(13) & dividend
    End Sub
    
    0 讨论(0)
提交回复
热议问题