I found this article explaining how to scrape certain tags from a website using Excel VBA.
The code below gets the content from the first tag
You're almost there! doc.GetElementsByTagName("p")
returns a collection of HTMLParagraphElement
objects of which you accessed the first entry using doc.GetElementsByTagName("p")(0)
. As you allude to, a For Each
loop would let you access each in turn:
Sub get_title_header()
Dim wb As Object
Dim doc As Object
Dim sURL As String
Dim lastrow As Long
Dim i As Integer
lastrow = Sheet1.Cells(Rows.Count, "A").End(xlUp).Row
For i = 2 To lastrow
Set wb = CreateObject("internetExplorer.Application")
sURL = Cells(i, 1)
wb.navigate sURL
wb.Visible = True
While wb.Busy
DoEvents
Wend
'HTML document
Set doc = wb.document
Cells(i, 2) = doc.Title
On Error GoTo err_clear
Dim el As Object
For Each el In doc.GetElementsByTagName("p")
Cells(i, 3).Value = Cells(i, 3).Value & ", " & el.innerText
Next el
err_clear:
If Err <> 0 Then
Err.Clear
Resume Next
End If
wb.Quit
Range(Cells(i, 1), Cells(i, 3)).Columns.AutoFit
Next i
End Sub
If you just need to get the content of the webpage in plain text this code is more concise
Function WEBSITE_TEXT(Destination As String) As String
' Requires a reference to Microsoft XML, v6.0
' Draws on the stackoverflow answer at bit.ly/parseXML
Dim myRequest As XMLHTTP60
Dim myDomDoc As DOMDocument60
' Check and clean inputs
On Error GoTo exitRoute
If Destination = "" Then
WEBSITE_TEXT = ""
Exit Function
End If
' Read the XML data from the Google Maps API
Set myRequest = New XMLHTTP60
myRequest.Open "GET", Destination, False
myRequest.send
' Parse HTML content
Dim html As New HTMLDocument
Dim text As String
html.body.innerHTML = myRequest.responseText
' Return the website content
text = html.body.innerText
If Not html Is Nothing Then WEBSITE_TEXT = text
exitRoute:
' Tidy up
text = ""
Set myRequest = Nothing
End Function