Convert HTML-table to Excel
The code below fetches the HTML-table at https://rasmusrhl.github.io/stuff, and converts it to Excel-format.
The pr
To get the tabular data (keeping the format as it is) from that page, you can try like below:
Sub Fetch_Data()
Dim http As New XMLHTTP60, html As New HTMLDocument
Dim posts As Object, post As Object, elem As Object
Dim row As Long, col As Long
With http
.Open "GET", "https://rasmusrhl.github.io/stuff/", False
.send
html.body.innerHTML = .responseText
End With
Set posts = html.getElementsByClassName("gmisc_table")(0)
For Each post In posts.Rows
For Each elem In post.Cells
col = col + 1: Cells(row + 1, col).NumberFormat = "@": Cells(row + 1, col) = elem.innerText
Next elem
col = 0
row = row + 1
Next post
End Sub
Reference to add to the library:
1. Microsoft HTML Object Library
2. Microsoft XML, v6.0 'or whatever version you have
This is how that portion looks like when get parsed.
Try this, to import the data as a table:
Sub ImportDataAsTable()
ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Source = Web.Page(Web.Contents(""https://rasmusrhl.github.io/stuff/""))," & Chr(13) & "" & Chr(10) & " Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & " #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""tailnum"", type text}, {"""", type text}, {""Some text goes here. It is long and does not break Machine type (make) year"", type text}, {""Some text goes here. It is long and does not break Mach" & _
"ine type (make) type"", type text}, {""Some text goes here. It is long and does not break Machine type (make) manufacturer"", type text}, {""Some text goes here. It is long and does not break"", type text}, {""Some text goes here. It is long and does not break Specification of machine model"", type text}, {""Some text goes here. It is long and does not break Specifi" & _
"cation of machine engines"", type text}, {""Some text goes here. It is long and does not break Specification of machine seats"", type text}, {""Some text goes here. It is long and does not break Specification of machine speed"", type text}, {""Some text goes here. It is long and does not break Specification of machine engine"", type text}, {""2"", type text}, {""Oth" & _
"er text goes here Other variables s1"", type text}, {""Other text goes here Other variables s2"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Changed Type"""
ActiveWorkbook.Worksheets.Add
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 0]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_0"
.Refresh BackgroundQuery:=False
End With
End Sub
<style type=text/css>
td {mso-number-format: '\@';}
</style>
<table ...
Putting the above global style definition for the cells (<td>
s) on the output you generate using R or rewriting the document on the client side like below just works.
Sub importhtml()
'*********** HTML document rewrite process ***************
Const TableUrl = "https://rasmusrhl.github.io/stuff"
Const adTypeBinary = 1, adSaveCreateOverWrite = 2, TemporaryFolder = 2
Dim tempFilePath, binData() As Byte
With CreateObject("Scripting.FileSystemObject")
tempFilePath = .BuildPath(.GetSpecialFolder(TemporaryFolder), .GetTempName() & ".html")
End With
'download HTML document
With CreateObject("MSXML2.ServerXMLHTTP")
.Open "GET", TableUrl, False
.Send
If .Status <> 200 Then Err.Raise 3, "importhtml", "200 expected"
binData = .ResponseBody
End With
With CreateObject("Adodb.Stream")
.Charset = "x-ansi"
.Open
.WriteText "<style type=text/css>td {mso-number-format:'\@';}</style>"
.Position = 0 'move to start
.Type = adTypeBinary 'change stream type
.Position = .Size 'move to end
.Write binData 'append binary data end of stream
.SaveToFile tempFilePath, adSaveCreateOverWrite 'save temporary file
.Close
End With
'*********** HTML document rewrite process ***************
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;" & tempFilePath, Destination:=Range("$A$1"))
'load HTML document from rewritten local copy
.Name = "stuff"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = False
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = True
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
Kill tempFilePath
End Sub