Convert HTML-table to Excel
The code below fetches the HTML-table at https://rasmusrhl.github.io/stuff, and converts it to Excel-format.
The pr
You may give this a try to see if you get the desired output...
Sub GetWebData()
Dim IE As Object
Dim doc As Object
Dim TRs As Object
Dim TR As Object
Dim Cell As Object
Dim r As Long, c As Long
Application.ScreenUpdating = False
Set IE = CreateObject("InternetExplorer.Application")
IE.Visible = False
IE.navigate "https://rasmusrhl.github.io/stuff/"
Do While IE.Busy Or IE.readyState <> 4
DoEvents
Loop
Set doc = IE.document
Set TRs = doc.getElementsByTagName("tr")
Cells.Clear
For Each TR In TRs
r = r + 1
For Each Cell In TR.Children
c = c + 1
Cells(r, c).NumberFormat = "@"
Cells(r, c) = Cell.innerText
Next Cell
c = 0
Next TR
IE.Quit
Columns.AutoFit
Application.ScreenUpdating = True
End Sub
Solution 2:
To make it work, you need to add the following two references by going to Tools (on VBA Editor) --> References and then find the below mentioned two references and check the checkboxes for them and click OK.
1) Microsoft XML, v6.0 (find the max version available)
2) Microsoft HTML Object Library
Sub GetWebData2()
Dim XMLpage As New MSXML2.XMLHTTP60
Dim doc As New MSHTML.HTMLDocument
Dim TRs As IHTMLElementCollection
Dim TR As IHTMLElement
Dim Cell As IHTMLElement
Dim r As Long, c As Long
Application.ScreenUpdating = False
Set XMLpage = CreateObject("MSXML2.XMLHTTP")
XMLpage.Open "GET", "https://rasmusrhl.github.io/stuff/", False
XMLpage.send
doc.body.innerhtml = XMLpage.responsetext
Set TRs = doc.getElementsByTagName("tr")
Set TRs = doc.getElementsByTagName("tr")
Cells.Clear
For Each TR In TRs
r = r + 1
For Each Cell In TR.Children
c = c + 1
Cells(r, c).NumberFormat = "@"
Cells(r, c) = Cell.innerText
Next Cell
c = 0
Next TR
Columns.AutoFit
Application.ScreenUpdating = True
End Sub
For a client side solution
So run this code after the first block of code, it rewrites the final two columns.
Sub Test2()
'* tools references ->
'* Microsoft HTML Object Library
Dim oHtml4 As MSHTML.IHTMLDocument4
Set oHtml4 = New MSHTML.HTMLDocument
Dim oHtml As MSHTML.HTMLDocument
Set oHtml = Nothing
'* IHTMLDocument4.createDocumentFromUrl
'* MSDN - IHTMLDocument4 createDocumentFromUrl method - https://msdn.microsoft.com/en-us/library/aa752523(v=vs.85).aspx
Set oHtml = oHtml4.createDocumentFromUrl("https://rasmusrhl.github.io/stuff/", "")
While oHtml.readyState <> "complete"
DoEvents '* do not comment this out it is required to break into the code if in infinite loop
Wend
Debug.Assert oHtml.readyState = "complete"
Dim oTRs As MSHTML.IHTMLDOMChildrenCollection
Set oTRs = oHtml.querySelectorAll("TR")
Debug.Assert oTRs.Length = 17
Dim lRowNum As Long
For lRowNum = 3 To oTRs.Length - 1
Dim oTRLoop As MSHTML.HTMLTableRow
Set oTRLoop = oTRs.Item(lRowNum)
If oTRLoop.ChildNodes.Length > 1 Then
Debug.Assert oTRLoop.ChildNodes.Length = 14
Dim oSecondToLastColumn As MSHTML.HTMLTableCell
Set oSecondToLastColumn = oTRLoop.ChildNodes.Item(12)
ActiveSheet.Cells(lRowNum + 2, 13).Value2 = "'" & oSecondToLastColumn.innerText
Dim oLastColumn As MSHTML.HTMLTableCell
Set oLastColumn = oTRLoop.ChildNodes.Item(13)
ActiveSheet.Cells(lRowNum + 2, 14).Value2 = "'" & oLastColumn.innerText
End If
'Stop
Next lRowNum
ActiveSheet.Columns("M:M").EntireColumn.AutoFit
ActiveSheet.Columns("N:N").EntireColumn.AutoFit
End Sub
For a Server Side Solution
Now that we know you control the source script and that it is in R then one can change the R script to style the final columns with mso-number-format:'\@' . Here is a sample R script that achieves this, one builds a CSS matrix of the same dimensions as the data and passes the CSS matrix as a parameter into htmlTable
. I have not tampered with your R source instead I give here a simple illustration for you to interpret.
A=matrix(c("(2)","(4)","(3)","(1)","(5)","(7)"),nrow=2,ncol=3,byrow=TRUE)
css_matrix <- matrix(data="",nrow=2,ncol=3)
css_matrix[,3] <- "mso-number-format:\"\\@\""
htmlTable(x=A,css.cell=css_matrix)
Opening in Excel I get this
Robin Mackenzie adds
you might mention in your server-side solution that OP just needs to add css_matrix[,10:11] <- "mso-number-format:\"\@\"" to their existing R code (after the last css_matrix... line) and it will implement your solution for their specific problem
Thanks Robin
This works with a temp file.
What it does: Downloads Data Locally. Then, replaces the "(" with a "\". Then, imports the data. Formats the data as text (to ensure we can change it back without error). Then, changes the text. This cannot be done with Range.Replace because that will reformat the cell contents.
' Local Variables
Public FileName As String ' Temp File Path
Public FileUrl As String ' Url Formatted Temp File Path
Public DownloadUrl As String ' Where We're Going to Download From
' Declares Have to Be At Top
Private Declare Function GetTempPath Lib "kernel32" _
Alias "GetTempPathA" _
(ByVal nBufferLength As Long, _
ByVal lpBuffer As String) As Long
Private Declare Function GetTempFileName Lib "kernel32" _
Alias "GetTempFileNameA" _
(ByVal lpszPath As String, _
ByVal lpPrefixString As String, _
ByVal wUnique As Long, _
ByVal lpTempFileName As String) As Long
' Loads the HTML Content Without Bug
Sub ImportHtml()
' Set Our Download URL
DownloadUrl = "https://rasmusrhl.github.io/stuff"
' Sets the Temporary File Path
SetFilePath
' Downloads the File
DownloadFile
' Replaces the "(" in the File With "\(", We Will Later Put it Back
' This Ensures Formatting of Content Isn't Modified!!!
ReplaceStringInFile
' Our Query Table is Now Coming From the Local File, Instead
Dim s As QueryTable
Set s = ActiveSheet.QueryTables.Add(Connection:=("FINDER;file://" + FileUrl), Destination:=Range("$A$1"))
With s
.Name = "stuff"
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = False
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = True
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = True
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
' Sets Formatting So When We Change Text the Data Doesn't Change
.ResultRange.NumberFormat = "@"
' Loop Through Cells in Range
' If You Do Excel Replace, Instead It Will Change Cell Format
Const myStr As String = "\(", myReplace As String = "("
For Each c In .ResultRange.Cells
Do While c.Value Like "*" & myStr & "*"
c.Characters(InStr(1, c.Value, myStr), Len(myStr)).Text = myReplace
Loop
Next
End With
End Sub
' This function replaces the "(" in the file with "\("
Sub ReplaceStringInFile()
Dim sBuf As String
Dim sTemp As String
Dim iFileNum As Integer
Dim sFileName As String
' Edit as needed
sFileName = FileName
iFileNum = FreeFile
Open sFileName For Input As iFileNum
Do Until EOF(iFileNum)
Line Input #iFileNum, sBuf
sTemp = sTemp & sBuf & vbCrLf
Loop
Close iFileNum
sTemp = Replace(sTemp, "(", "\(")
iFileNum = FreeFile
Open sFileName For Output As iFileNum
Print #iFileNum, sTemp
Close iFileNum
End Sub
' This function sets file paths because we need a temp file
Function SetFilePath()
If FileName = "" Then
FileName = GetTempHtmlName
FileUrl = Replace(FileName, "\", "/")
End If
End Function
' This subroutine downloads the file from the specified URL
' The download is necessary because we will be editing the file
Sub DownloadFile()
Dim myURL As String
myURL = "https://rasmusrhl.github.io/stuff"
Dim WinHttpReq As Object
Set WinHttpReq = CreateObject("Microsoft.XMLHTTP")
WinHttpReq.Open "GET", DownloadUrl, False, "username", "password"
WinHttpReq.send
myURL = WinHttpReq.responseBody
If WinHttpReq.Status = 200 Then
Set oStream = CreateObject("ADODB.Stream")
oStream.Open
oStream.Type = 1
oStream.Write WinHttpReq.responseBody
oStream.SaveToFile FileName, 2 ' 1 = no overwrite, 2 = overwrite
oStream.Close
End If
End Sub
'''''''''''''''''''''''''''''
' THIS BLOCK OF CODE GETS A TEMPORARY FILE PATH USING THE GetTempHtmlName Function
'''''''''''''''''''''''''''''
Public Function GetTempHtmlName( _
Optional sPrefix As String = "VBA", _
Optional sExtensao As String = "") As String
Dim sTmpPath As String * 512
Dim sTmpName As String * 576
Dim nRet As Long
Dim F As String
nRet = GetTempPath(512, sTmpPath)
If (nRet > 0 And nRet < 512) Then
nRet = GetTempFileName(sTmpPath, sPrefix, 0, sTmpName)
If nRet <> 0 Then F = Left$(sTmpName, InStr(sTmpName, vbNullChar) - 1)
If sExtensao > "" Then
Kill F
If Right(F, 4) = ".tmp" Then F = Left(F, Len(F) - 4)
F = F & sExtensao
End If
F = Replace(F, ".tmp", ".html")
GetTempHtmlName = F
End If
End Function
'''''''''''''''''''''''''''''
' End - GetTempHtmlName
'''''''''''''''''''''''''''''
With the url https://rasmusrhl.github.io/stuff
, it's by luck that Excel can simply just open it directly and save as .xlsx (how come no one try this before the tedious process). If direct open fails, all other methods here are great option!
Option Explicit
Sub OpenWebFile()
Const URL As String = "https://rasmusrhl.github.io/stuff"
Dim oWB As Workbook
On Error Resume Next
Set oWB = Workbooks.Open(Filename:=URL, ReadOnly:=True)
If oWB Is Nothing Then
MsgBox "Cannot open the url " & URL, vbExclamation + vbOKOnly, "ERR " & Err.Number & ":" & Err.Description
Err.Clear
Else
' Change to your desired path and filename
oWB.SaveAs Filename:="C:\Test\stuff.xlsx", FileFormat:=xlOpenXMLWorkbook
Set oWB = Nothing
End If
End Sub
Processing the HTML and then Copying and Pasting it into Excel will
Here are the steps I used:
CreateObject("MSXML2.XMLHTTP")
: get the URL's responseTextCreateObject("HTMLFile")
: create a HTML Document from responseText@
to preserve formatting@
sign with '
Sub LoadTable()
Const URL = "https://rasmusrhl.github.io/stuff/"
Dim x As Long
Dim doc As Object, tbl As Object, rw As Object
With CreateObject("MSXML2.XMLHTTP")
.Open "GET", URL, False
.send
If .readyState = 4 And .Status = 200 Then
Set doc = CreateObject("HTMLFile")
doc.body.innerHTML = .responseText
doc.body.innerHTML = Replace(doc.body.innerHTML, "grey", "black")
Set tbl = doc.getElementsByTagName("TABLE")(0)
For x = 0 To tbl.Rows.Length - 1
Set rw = tbl.Rows(x)
If rw.Cells.Length = 14 Then
'If InStr(rw.Cells(12).innerText, "-") Or InStr(rw.Cells(12).innerText, "/") Then
rw.Cells(12).innerText = "@" & rw.Cells(12).innerText
rw.Cells(13).innerText = "@" & rw.Cells(13).innerText
End If
Next
With CreateObject("new:{1C3B4210-F441-11CE-B9EA-00AA006B1A69}")
.SetText "<html><body>" & doc.body.innerHTML & "</body></html>"
.PutInClipboard
End With
With Worksheets("Sheet1")
.Cells.Clear
.Range("A1").PasteSpecial
.Cells.Interior.Color = vbWhite
.Cells.WrapText = False
.Columns.AutoFit
.Columns("M:N").Replace What:="@", Replacement:="'"
End With
Else
MsgBox "URL: " & vbCrLf & "Ready state: " & .readyState & vbCrLf & "HTTP request status: " & .Status, vbInformation, "URL Not Responding"
End If
End With
End Sub
Based on the documentation from Microsoft MSDN Library: WebFormatting Property you could try the below change to your code:
.WebFormatting = xlWebFormattingNone
This may allow the data to be copied without any number formatting - then you can set your own number format for those cells (using MSDN: Excel VBA NumberFormat property )
A similar solution should solve the issue with numbers being truncated or rounding - set the decimal points for the affected cells in your target range...