VBA HTML Listing Info Pull

后端 未结 3 637
终归单人心
终归单人心 2021-01-26 14:29

I am looking to follow a series of URL\'s that are found in column A (example: https://www.ebay.com/itm/Apple-iPhone-7-GSM-Unlocked-Verizon-AT-T-TMobile-Sprint-32GB-128GB-256GB/

相关标签:
3条回答
  • 2021-01-26 14:53

    Here's an approach using Web Requests, using MSXML. It should be significantly faster than using IE, and I'd encourage you to strongly consider using this approach wherever possible.

    You'll need references to Microsoft HTML Object Library and Microsoft XML v6.0 to get this working.

    Option Explicit
    
    Public Sub SubmitRequest()
        Dim URLs                              As Excel.Range
        Dim URL                               As Excel.Range
        Dim LastRow                           As Long
        Dim wb                                As Excel.Workbook: Set wb = ThisWorkbook
        Dim ws                                As Excel.Worksheet: Set ws = wb.Worksheets(1)
        Dim ListingDetail                     As Variant
        Dim i                                 As Long
        Dim j                                 As Long
        Dim html                              As HTMLDocument
    
        ReDim ListingDetail(0 To 2, 0 To 10000)
    
        'Get URLs
        With ws
            LastRow = .Cells(.Rows.Count, 1).End(xlUp).Row
            Set URLs = .Range(.Cells(1, 1), .Cells(LastRow, 1))
        End With
    
        'Update the ListingDetail
        For Each URL In URLs
            Set html = getHTML(URL.Value2)
            ListingDetail(0, i) = html.getElementByID("itemTitle").innertext 'Title
            ListingDetail(1, i) = html.getElementByID("prcIsum").innertext 'Price
            ListingDetail(2, i) = html.getElementsByClassName("viSNotesCnt")(0).innertext 'Seller Notes
            i = i + 1
        Next
    
        'Resize array
        ReDim Preserve ListingDetail(0 To 2, 0 To i - 1)
    
        'Dump in Column T,U,V of existing sheet
        ws.Range("T1:V" & i).Value = WorksheetFunction.Transpose(ListingDetail)
    End Sub
    
    Private Function getHTML(ByVal URL As String) As HTMLDocument
        'Add a reference to Microsoft HTML Object Library
        Set getHTML = New HTMLDocument
    
        With New MSXML2.XMLHTTP60
            .Open "GET", URL
            .send
            getHTML.body.innerHTML = .responseText
        End With
    End Function
    
    0 讨论(0)
  • 2021-01-26 15:01

    I would use late binding for MSXML2.XMLHTTP and set a reference to the Microsoft HTML Object Library for the HTMLDocument.

    Note: querySelector() references the first item it finds that matches its search string.

    Here is the short version:

    Public Sub ListingInfo()
        Dim cell As Range
        With ThisWorkbook.Worksheets("Sheet1")
            For Each cell In .Range("A1", .Cells(.Rows.Count, 1).End(xlUp))
                Dim Document As MSHTML.HTMLDocument
                With CreateObject("MSXML2.XMLHTTP")
                    .Open "GET", cell.Value, False
                    .send
                    Set Document = New MSHTML.HTMLDocument
                    Document.body.innerHTML = .responseText
                End With
                cell.Offset(0, 1).Value = Document.getElementByID("itemTitle").innerText
                cell.Offset(0, 2).Value = Document.getElementByID("prcIsum").innerText
    
                If Not Document.querySelector(".viSNotesCnt") Is Nothing Then
                    cell.Offset(0, 3).Value = Document.querySelector(".viSNotesCnt").innerText
                Else
                    'Try Something Else
                End If
            Next
        End With
    End Sub
    

    A more elaborate solution would be to break the code up into smaller routines and load the data into an Array. The main advantage of this is that you can test each subroutine separately.

    Option Explicit
    Public Type tListingInfo
        Description As String
        Price As Currency
        Title As String
    End Type
    
    Public Sub ListingInfo()
        Dim source As Range
        Dim data As Variant
        With ThisWorkbook.Worksheets("Sheet1")
            Set source = .Range("A1:D1", .Cells(.Rows.count, 1).End(xlUp))
            data = source.Value
        End With
        Dim r As Long
        Dim record As tListingInfo
        Dim url As String
    
        For r = 1 To UBound(data)
            record = getListingInfo()
            url = data(r, 1)
            record = getListingInfo(url)
            With record
                data(r, 2) = .Description
                data(r, 3) = .Price
                data(r, 4) = .Title
            End With
        Next
        source.Value = data
    End Sub
    
    Public Function getListingInfo(url As String) As tListingInfo
        Dim ListingInfo As tListingInfo
        Dim Document As MSHTML.HTMLDocument
        Set Document = getHTMLDocument(url)
    
        With ListingInfo
            .Description = Document.getElementByID("itemTitle").innerText
            .Price = Split(Document.getElementByID("prcIsum").innerText)(1)
            .Title = Document.querySelectorAll(".viSNotesCnt")(0).innerText
            Debug.Print .Description, .Price, .Title
        End With
    End Function
    
    Public Function getHTMLDocument(url As String) As MSHTML.HTMLDocument
        Const READYSTATE_COMPLETE As Long = 4
    
        Dim Document As MSHTML.HTMLDocument
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", url, False
            .send
            If .readyState = READYSTATE_COMPLETE And .Status = 200 Then
                Set Document = New MSHTML.HTMLDocument
                Document.body.innerHTML = .responseText
                Set getHTMLDocument = Document
            Else
                MsgBox "URL:  " & vbCrLf & "Ready state: " & .readyState & vbCrLf & "HTTP request status: " & .Status, vbInformation, "URL Not Responding"
            End If
        End With
    End Function
    
    0 讨论(0)
  • 2021-01-26 15:12

    There are a lot of things to fix in your code. It is late here so I will just give pointers (and update fully later) and working code below:

    1. Declare all variables and use appropriate type
    2. Review For Loops and how transpose can be used to create a 1d array of urls pulled from sheet to loop over
    3. Review the difference between querySelector and querySelectorAll methods
    4. Review CSS selectors (you are specifying everything as type selector when in fact you are not selecting by tag for the elements of interest; nor by your stated text)
    5. Think about placement of your IE object creation and of your .Navigate2 to make use of existing object
    6. Make sure to use distinct loop counters
    7. Be sure not to overwrite values in sheet

    Code:

    Option Explicit
    Public Sub ListingInfo()
        Dim ie As New InternetExplorer, ws As Worksheet
        Dim i As Long, urls(), rowCounter As Long
        Dim title As Object, price As Object, description As Object
        Set ws = ThisWorkbook.Worksheets("Sheet1")
        urls = Application.Transpose(ws.Range("A1:A2").Value) '<= Adjust
        With ie
            .Visible = True
            For i = LBound(urls) To UBound(urls)
                If InStr(urls(i), "http") > 0 Then
                    rowCounter = rowCounter + 1
                    .Navigate2 urls(i)
                    While .Busy Or .readyState < 4: DoEvents: Wend
                    Set title = .document.querySelector(".it-ttl")
                    Set price = .document.querySelector("#prcIsum")
                    Set description = .document.querySelector("#viTabs_0_is")
    
                    ws.Cells(rowCounter, 3) = title.innerText
                    ws.Cells(rowCounter, 4) = price.innerText
                    ws.Cells(rowCounter, 5) = description.innerText
                    Set title = Nothing: Set price = Nothing: Set description = Nothing
                End If
            Next
            .Quit
        End With
    End Sub
    
    0 讨论(0)
提交回复
热议问题