I am still trying to learn more about scraping and I could devise a code that enables me to get the desired results.
Here\'s the code
Take a look at the below example:
Option Explicit
Sub Test()
Dim sState As String
Dim sCourt As String
Dim dtFrom As Date
Dim dtTill As Date
Dim sSubject As String
Dim sOrder As String
Dim oStates As Object
Dim oCourts As Object
Dim oSubjects As Object
Dim oOrders As Object
Dim sStateCode As String
Dim sCourtId As String
Dim sSubjectVal As String
Dim sOrderVal As String
Dim aData
' Set query data
sState = ""
sCourt = ""
dtFrom = DateSerial(2018, 2, 11)
dtTill = DateSerial(2018, 2, 11)
sSubject = ""
sOrder = "Aktenzeichen"
' Retrieve options
GetOptions oStates, oCourts, oSubjects, oOrders
' Validate query parameters
If Not oStates.Exists(sState) Then MsgBox "State valid values:" & vbCrLf & vbCrLf & """" & Join(oStates.Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oCourts(oStates(sState)).Exists(sCourt) Then MsgBox "Court valid values:" & vbCrLf & vbCrLf & """" & Join(oCourts(oStates(sState)).Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oSubjects.Exists(sSubject) Then MsgBox "Subject valid values:" & vbCrLf & vbCrLf & """" & Join(oSubjects.Keys(), """" & vbCrLf & """") & """": Exit Sub
If Not oOrders.Exists(sOrder) Then MsgBox "Order valid values:" & vbCrLf & vbCrLf & """" & Join(oOrders.Keys(), """" & vbCrLf & """") & """": Exit Sub
' Request data
sStateCode = oStates(sState)
sCourtId = oCourts(sStateCode)(sCourt)
sSubjectVal = oSubjects(sSubject)
sOrderVal = oOrders(sOrder)
GetData sStateCode, sCourt, sCourtId, dtFrom, dtTill, sSubjectVal, sOrderVal, aData
' Rebuild nested arrays to 2d array for output
aData = Denestify(aData)
' Output
With ThisWorkbook.Sheets(1)
.Cells.Delete
Output2DArray .Cells(1, 1), aData
.Columns.AutoFit
End With
MsgBox "Completed"
End Sub
Sub GetOptions(oStates As Object, oCourts As Object, oSubjects As Object, oOrders As Object)
Dim sContent As String
Dim aTmp0
Dim aTmp1
Dim vItem
Dim oTmp
Dim i As Long
' Retrieve request options from search page
XmlHttpRequest "GET", "https://www.handelsregisterbekanntmachungen.de/?aktion=suche", "", "", "", sContent
' Get each state and code
ExtractOptions sContent, "land", oStates
' Get courts with courts ids for each federal state
Set oCourts = CreateObject("Scripting.Dictionary")
For Each vItem In oStates.Items()
' Put courts and ids into temp dictionary
Set oTmp = CreateObject("Scripting.Dictionary")
If vItem <> "" Then
' Extract the whole JS array statement with courts names
ParseResponse "BundeslandArray\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp0, False
' Extract each court name into temp array
ParseResponse "'([^']*)'", (aTmp0(0)), aTmp0, False
' Extract the whole JS array statement with courts ids
ParseResponse "BundeslandArrayId\['" & vItem & "'\]=new Array\(('[^']*'(?:,'[^']*')*)\);", sContent, aTmp1, False
' Extract each court id into temp array
ParseResponse "'([^']*)'", (aTmp1(0)), aTmp1, False
For i = 0 To UBound(aTmp0)
oTmp(DecodeHTMLEntities((aTmp0(i)))) = DecodeHTMLEntities((aTmp1(i)))
Next
End If
' Add dummy item
oTmp("") = ""
' Put courts-ids for the state code into dictionary
Set oCourts(vItem) = oTmp
Next
' Add dummy item
oStates("") = ""
' Get subjects
ExtractOptions sContent, "gegenstand", oSubjects
' Add dummy item
oSubjects("") = "0"
' Get sort order types
ExtractOptions sContent, "order", oOrders
End Sub
Sub GetData(sStateCode As String, sCourt As String, sCourtId As String, dtFrom As Date, dtTill As Date, sSubjectVal As String, sOrderVal As String, aData)
Dim i As Long
Dim oQuery As Object
Dim sQuery As String
Dim sContent As String
' Set query parameters
Set oQuery = CreateObject("Scripting.Dictionary")
With oQuery
.Add "suchart", "uneingeschr"
.Add "button", "Start search"
.Add "land", sStateCode
.Add "gericht", sCourtId
.Add "gericht_name", sCourt
.Add "seite", ""
.Add "l", ""
.Add "r", ""
.Add "all", "false"
.Add "vt", Day(dtFrom)
.Add "vm", Month(dtFrom)
.Add "vj", Year(dtFrom)
.Add "bt", Day(dtTill)
.Add "bm", Month(dtTill)
.Add "bj", Year(dtTill)
.Add "fname", ""
.Add "fsitz", ""
.Add "rubrik", ""
.Add "az", ""
.Add "gegenstand", sSubjectVal
.Add "anzv", "alle"
.Add "order", sOrderVal
End With
sQuery = EncodeQueryParams(oQuery)
' Retrieve search results
XmlHttpRequest "POST", _
"https://www.handelsregisterbekanntmachungen.de/de/index.php?aktion=suche", _
Array( _
Array("Content-Type", "application/x-www-form-urlencoded"), _
Array("Content-Length", Len(sQuery) _
) _
), _
sQuery, _
"", _
sContent
' Parse response
sContent = Replace(sContent, "<br>", vbCrLf)
ParseResponse "<li[^>]*><a[^>]*?href=""javascript:NeuFenster\('([^']*)'\)""[^>]*>([^<]*)<ul[^>]*>([\s\S]*?)</ul>", sContent, aData, False
For i = 0 To UBound(aData, 1)
aData(i)(0) = "http://www.handelsregisterbekanntmachungen.de/en/skripte/hrb.php?" & aData(i)(0)
Next
End Sub
Sub ExtractOptions(sContent As String, sName As String, oOptions As Object)
Dim aTmp0
Dim vItem
' Extract the whole <select> for parameter
ParseResponse "<select[^>]* name=" & sName & "[^>]*>[^<]*((?:<option[^>]*>[^<]*</option>[^<]*)+)[^<]*</[^>]*>", sContent, aTmp0, False
' Extract each parameter <option>
ParseResponse "<option[^>]*value=(""[^""]*""|[^\s>]*)[^>]*>([^<]*)</option>", (aTmp0(0)), aTmp0, False
' Put each parameter and value into dictionary
Set oOptions = CreateObject("Scripting.Dictionary")
For Each vItem In aTmp0
oOptions(DecodeHTMLEntities((vItem(1)))) = DecodeHTMLEntities(Replace(vItem(0), """", ""))
Next
End Sub
Sub XmlHttpRequest(sMethod, sUrl, aSetHeaders, sFormData, sRespHeaders, sRespText)
Dim aHeader
' With CreateObject("MSXML2.ServerXMLHTTP")
' .SetOption 2, 13056 ' SXH_SERVER_CERT_IGNORE_ALL_SERVER_ERRORS
With CreateObject("MSXML2.XMLHTTP")
.Open sMethod, sUrl, False
If IsArray(aSetHeaders) Then
For Each aHeader In aSetHeaders
.SetRequestHeader aHeader(0), aHeader(1)
Next
End If
.Send (sFormData)
sRespHeaders = .GetAllResponseHeaders
sRespText = .ResponseText
End With
End Sub
Sub ParseResponse(sPattern, sResponse, aData, Optional bAppend As Boolean = True, Optional bGlobal = True, Optional bMultiLine = True, Optional bIgnoreCase = True)
Dim oMatch
Dim aTmp0()
Dim sSubMatch
If Not (IsArray(aData) And bAppend) Then aData = Array()
With CreateObject("VBScript.RegExp")
.Global = bGlobal
.MultiLine = bMultiLine
.IgnoreCase = bIgnoreCase
.Pattern = sPattern
For Each oMatch In .Execute(sResponse)
If oMatch.SubMatches.Count = 1 Then
PushItem aData, oMatch.SubMatches(0)
Else
aTmp0 = Array()
For Each sSubMatch In oMatch.SubMatches
PushItem aTmp0, sSubMatch
Next
PushItem aData, aTmp0
End If
Next
End With
End Sub
Sub PushItem(aData, vItem, Optional bAppend As Boolean = True)
If Not (IsArray(aData) And bAppend) Then aData = Array()
ReDim Preserve aData(UBound(aData) + 1)
aData(UBound(aData)) = vItem
End Sub
Function DecodeHTMLEntities(sText As String) As String
Static oHtmlfile As Object
Static oDiv As Object
If oHtmlfile Is Nothing Then
Set oHtmlfile = CreateObject("htmlfile")
oHtmlfile.Open
Set oDiv = oHtmlfile.createElement("div")
End If
oDiv.innerHTML = sText
DecodeHTMLEntities = oDiv.innerText
End Function
Function EncodeQueryParams(oParams As Object) As String
Dim aParams
Dim i As Long
aParams = oParams.Keys()
For i = 0 To UBound(aParams)
aParams(i) = EncodeUriComponent((aParams(i))) & "=" & EncodeUriComponent((oParams(aParams(i))))
Next
EncodeQueryParams = Join(aParams, "&")
End Function
Function EncodeUriComponent(strText As String) As String
Static objHtmlfile As Object
If objHtmlfile Is Nothing Then
Set objHtmlfile = CreateObject("htmlfile")
objHtmlfile.parentWindow.execScript "function encode(s) {return encodeURIComponent(s)}", "jscript"
End If
EncodeUriComponent = objHtmlfile.parentWindow.encode(strText)
End Function
Function Denestify(aRows)
Dim aData()
Dim aItems()
Dim i As Long
Dim j As Long
If UBound(aRows) = -1 Then Exit Function
ReDim aData(1 To UBound(aRows) + 1, 1 To 1)
For j = 0 To UBound(aRows)
If IsArray(aRows(j)) Then
aItems = aRows(j)
For i = 0 To UBound(aItems)
If i + 1 > UBound(aData, 2) Then ReDim Preserve aData(1 To UBound(aRows) + 1, 1 To i + 1)
aData(j + 1, i + 1) = aItems(i)
Next
Else
aData(j + 1, 1) = aRows(j)
End If
Next
Denestify = aData
End Function
Sub Output2DArray(oDstRng As Range, aCells As Variant)
With oDstRng
.Parent.Select
With .Resize( _
UBound(aCells, 1) - LBound(aCells, 1) + 1, _
UBound(aCells, 2) - LBound(aCells, 2) + 1)
.NumberFormat = "@"
.Value = aCells
End With
End With
End Sub