Convert HTML-table to Excel using VBA

后端 未结 9 2090
难免孤独
难免孤独 2021-02-05 02:51

Convert HTML-table to Excel

The code below fetches the HTML-table at https://rasmusrhl.github.io/stuff, and converts it to Excel-format.

The pr

相关标签:
9条回答
  • 2021-02-05 03:15

    To get the tabular data (keeping the format as it is) from that page, you can try like below:

     Sub Fetch_Data()
        Dim http As New XMLHTTP60, html As New HTMLDocument
        Dim posts As Object, post As Object, elem As Object
        Dim row As Long, col As Long
    
        With http
            .Open "GET", "https://rasmusrhl.github.io/stuff/", False
            .send
            html.body.innerHTML = .responseText
        End With
    
        Set posts = html.getElementsByClassName("gmisc_table")(0)
    
        For Each post In posts.Rows
            For Each elem In post.Cells
                col = col + 1: Cells(row + 1, col).NumberFormat = "@": Cells(row + 1, col) = elem.innerText
            Next elem
            col = 0
            row = row + 1
        Next post
    End Sub
    

    Reference to add to the library:

    1. Microsoft HTML Object Library
    2. Microsoft XML, v6.0  'or whatever version you have
    

    This is how that portion looks like when get parsed.

    0 讨论(0)
  • 2021-02-05 03:16

    Try this, to import the data as a table:

    Sub ImportDataAsTable()
        ActiveWorkbook.Queries.Add Name:="Table 0", Formula:= _
            "let" & Chr(13) & "" & Chr(10) & "    Source = Web.Page(Web.Contents(""https://rasmusrhl.github.io/stuff/""))," & Chr(13) & "" & Chr(10) & "    Data0 = Source{0}[Data]," & Chr(13) & "" & Chr(10) & "    #""Changed Type"" = Table.TransformColumnTypes(Data0,{{""tailnum"", type text}, {"""", type text}, {""Some text goes here. It is long and does not break Machine type (make) year"", type text}, {""Some text goes here. It is long and does not break Mach" & _
            "ine type (make) type"", type text}, {""Some text goes here. It is long and does not break Machine type (make) manufacturer"", type text}, {""Some text goes here. It is long and does not break"", type text}, {""Some text goes here. It is long and does not break Specification of machine model"", type text}, {""Some text goes here. It is long and does not break Specifi" & _
            "cation of machine engines"", type text}, {""Some text goes here. It is long and does not break Specification of machine seats"", type text}, {""Some text goes here. It is long and does not break Specification of machine speed"", type text}, {""Some text goes here. It is long and does not break Specification of machine engine"", type text}, {""2"", type text}, {""Oth" & _
            "er text goes here Other variables s1"", type text}, {""Other text goes here Other variables s2"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Changed Type"""
        ActiveWorkbook.Worksheets.Add
        With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
            "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 0"";Extended Properties=""""" _
            , Destination:=Range("$A$1")).QueryTable
            .CommandType = xlCmdSql
            .CommandText = Array("SELECT * FROM [Table 0]")
            .RowNumbers = False
            .FillAdjacentFormulas = False
            .PreserveFormatting = True
            .RefreshOnFileOpen = False
            .BackgroundQuery = True
            .RefreshStyle = xlInsertDeleteCells
            .SavePassword = False
            .SaveData = True
            .AdjustColumnWidth = True
            .RefreshPeriod = 0
            .PreserveColumnInfo = True
            .ListObject.DisplayName = "Table_0"
            .Refresh BackgroundQuery:=False
        End With
    End Sub
    
    0 讨论(0)
  • 2021-02-05 03:20
    <style type=text/css>
        td {mso-number-format: '\@';}
    </style>
    <table ...
    

    Putting the above global style definition for the cells (<td>s) on the output you generate using R or rewriting the document on the client side like below just works.

    Sub importhtml()
        '*********** HTML document rewrite process ***************
        Const TableUrl = "https://rasmusrhl.github.io/stuff"
    
        Const adTypeBinary = 1, adSaveCreateOverWrite = 2, TemporaryFolder = 2
        Dim tempFilePath, binData() As Byte
    
        With CreateObject("Scripting.FileSystemObject")
            tempFilePath = .BuildPath(.GetSpecialFolder(TemporaryFolder), .GetTempName() & ".html")
        End With
    
        'download HTML document
        With CreateObject("MSXML2.ServerXMLHTTP")
            .Open "GET", TableUrl, False
            .Send
            If .Status <> 200 Then Err.Raise 3, "importhtml", "200 expected"
            binData = .ResponseBody
        End With
    
        With CreateObject("Adodb.Stream")
            .Charset = "x-ansi"
            .Open
            .WriteText "<style type=text/css>td {mso-number-format:'\@';}</style>"
            .Position = 0 'move to start
            .Type = adTypeBinary 'change stream type
            .Position = .Size 'move to end
            .Write binData 'append binary data end of stream
            .SaveToFile tempFilePath, adSaveCreateOverWrite 'save temporary file
            .Close
        End With
        '*********** HTML document rewrite process ***************
    
        With ActiveSheet.QueryTables.Add(Connection:= _
            "URL;" & tempFilePath, Destination:=Range("$A$1"))
            'load HTML document from rewritten local copy
    
            .Name = "stuff"
            .FieldNames = True
            .RowNumbers = False
            .FillAdjacentFormulas = False
            .PreserveFormatting = False
            .RefreshOnFileOpen = False
            .BackgroundQuery = True
            .RefreshStyle = xlInsertDeleteCells
            .SavePassword = False
            .SaveData = True
            .AdjustColumnWidth = True
            .RefreshPeriod = 0
            .WebSelectionType = xlEntirePage
            .WebFormatting = xlWebFormattingAll
            .WebPreFormattedTextToColumns = True
            .WebConsecutiveDelimitersAsOne = True
            .WebSingleBlockTextImport = False
            .WebDisableDateRecognition = True
            .WebDisableRedirections = False
            .Refresh BackgroundQuery:=False
    
        End With
    
        Kill tempFilePath
    End Sub
    
    0 讨论(0)
提交回复
热议问题