问题
So I'm doing this project where I have to download historical stock data from yahoo finance. Got this code. It's working fine, BUT it only downloads max 100 rows. I tried to scan the web for answers or a different code (this one is just recorded macro from excel) but I saw a few tutorials on YouTube that use his solution and it's just fine.
.. I don't understand it then
Sub Makro6()
' Dowload stock prices from Yahoo Finance based on input
Dim ws As Worksheet
Set ws = Sheets("Data")
'clear previous queries
For Each qr In ThisWorkbook.Queries
qr.Delete
Next qr
'clear Data sheet
ws.Select
Cells.Clear
'clear graphs
'ws.ChartObjects.Delete
'stock = Sheets("Main").Range("A2")
StartDate = toUnix(Sheets("Main").Range("A4"))
EndDate = toUnix(Sheets("Main").Range("A6"))
Application.CutCopyMode = False
ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
"let" & Chr(13) & "" & Chr(10) & " Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & " Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & " #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
"t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & " #""Změněný typ"""
With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
"OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
, Destination:=Range("$A$1")).QueryTable
.CommandType = xlCmdSql
.CommandText = Array("SELECT * FROM [Table 2 (3)]")
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = True
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.PreserveColumnInfo = True
.ListObject.DisplayName = "Table_2_3"
.Refresh BackgroundQuery:=False
End With
Sheets("Data").Select
'' Sort data by date from oldest to newest
ws.ListObjects("Table_2_3").Sort.SortFields. _
Clear
ws.ListObjects("Table_2_3").Sort.SortFields. _
Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
DataOption:=xlSortNormal
With ws.ListObjects("Table_2_3").Sort
.Header = xlYes
.MatchCase = False
.Orientation = xlTopToBottom
.SortMethod = xlPinYin
.Apply
End With
Call DeleteDividends
Call Stochastics
End Sub
The code works for other websites. I tried to download Wikipedia page list of total 120 and it loaded data no problem.
The problem is the data from Yahoo finance website is a project requirement
回答1:
If you check against the page you will discover only 100 results are initially present within the HTMLTable
rows (tbody
to be precise).
Enter the css selector [data-test="historical-prices"] tbody tr
in the browser elements tab search box (F12 to open dev tools)and you will see this:
The rest of the rows are fed dynamically from a data store as you scroll down the page. Of course, your current method doesn't pick up on these. You can in fact issue an xhr request, regex out the appropriate javascript object housing all the rows, and parse with a json parser.
Here is roughly what you should currently see in response:
I use jsonconverter.bas as my json parser. Download raw code from here and add to standard module called jsonConverter
. You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.
@TimWilliams wrote a better unix conversion function here but I thought I would have a play at writing something different. I would advise you to stick with his as it is safer and faster.
VBA:
Option Explicit
Public Sub GetYahooData()
'< VBE > Tools > References > Microsoft Scripting Runtime
Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
Dim startDate As String, endDate As String, stock As String
Set ws = ThisWorkbook.Worksheets("Sheet1")
Set re = CreateObject("VBScript.RegExp")
Set xhr = CreateObject("MSXML2.XMLHTTP")
stock = "AAPL"
startDate = "1534809600"
endDate = "1566345600"
With xhr
.Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
.setRequestHeader "User-Agent", "Mozilla/5.0"
.send
s = .responseText
End With
s = GetJsonString(re, s)
If s = "No match" Then Exit Sub
Set json = JsonConverter.ParseJson(s)
WriteOutResults ws, json
End Sub
Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long
headers = json.item(1).keys
ReDim results(1 To json.Count, 1 To UBound(headers) + 1)
For Each item In json
r = r + 1: c = 1
For Each key In item.keys
results(r, c) = item(key)
c = c + 1
Next
Next
With ws
.Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
.Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
End With
End Sub
Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
With re
.Global = True
.MultiLine = True
.IgnoreCase = False
.Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
If .test(responseText) Then
GetJsonString = .Execute(responseText)(0).SubMatches(0)
Else
GetJsonString = "No match"
End If
End With
End Function
Public Function GetCurrentUnix(ByVal t As Double) As String
With CreateObject("htmlfile")
.parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
End With
End Function
Regex:
Python:
I initially wrote as python if of interest:
import requests, re, json
from bs4 import BeautifulSoup as bs
p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])
来源:https://stackoverflow.com/questions/57664229/how-to-download-data-from-yahoo-finance-limited-to-100-rows