How to download data from Yahoo finance limited to 100 rows

浪尽此生 提交于 2020-07-19 06:49:20

问题


So I'm doing this project where I have to download historical stock data from yahoo finance. Got this code. It's working fine, BUT it only downloads max 100 rows. I tried to scan the web for answers or a different code (this one is just recorded macro from excel) but I saw a few tutorials on YouTube that use his solution and it's just fine.

.. I don't understand it then

Sub Makro6()

    ' Dowload stock prices from Yahoo Finance based on input

    Dim ws As Worksheet

    Set ws = Sheets("Data")

    'clear previous queries
    For Each qr In ThisWorkbook.Queries
        qr.Delete
    Next qr

    'clear Data sheet
    ws.Select
    Cells.Clear

    'clear graphs
    'ws.ChartObjects.Delete

    'stock = Sheets("Main").Range("A2")

    StartDate = toUnix(Sheets("Main").Range("A4"))
    EndDate = toUnix(Sheets("Main").Range("A6"))

    Application.CutCopyMode = False
    ActiveWorkbook.Queries.Add Name:="Table 2 (3)", Formula:= _
        "let" & Chr(13) & "" & Chr(10) & "    Zdroj = Web.Page(Web.Contents(""https://finance.yahoo.com/quote/" & stock & "/history?period1=" & StartDate & "&period2=" & EndDate & "&interval=1d&filter=history&frequency=1d""))," & Chr(13) & "" & Chr(10) & "    Data2 = Zdroj{2}[Data]," & Chr(13) & "" & Chr(10) & "    #""Změněný typ"" = Table.TransformColumnTypes(Data2,{{""Date"", type date}, {""Open"", type text}, {""High"", type text}, {""Low"", type text}, {""Close*"", type tex" & _
        "t}, {""Adj Close**"", type text}, {""Volume"", type text}})" & Chr(13) & "" & Chr(10) & "in" & Chr(13) & "" & Chr(10) & "    #""Změněný typ"""
    With ActiveSheet.ListObjects.Add(SourceType:=0, Source:= _
        "OLEDB;Provider=Microsoft.Mashup.OleDb.1;Data Source=$Workbook$;Location=""Table 2 (3)"";Extended Properties=""""" _
        , Destination:=Range("$A$1")).QueryTable
        .CommandType = xlCmdSql
        .CommandText = Array("SELECT * FROM [Table 2 (3)]")
        .RowNumbers = False
        .FillAdjacentFormulas = False
        .PreserveFormatting = True
        .RefreshOnFileOpen = False
        .BackgroundQuery = True
        .RefreshStyle = xlInsertDeleteCells
        .SavePassword = False
        .SaveData = True
        .AdjustColumnWidth = True
        .RefreshPeriod = 0
        .PreserveColumnInfo = True
        .ListObject.DisplayName = "Table_2_3"
        .Refresh BackgroundQuery:=False
    End With
    Sheets("Data").Select

    '' Sort data by date from oldest to newest

    ws.ListObjects("Table_2_3").Sort.SortFields. _
        Clear
    ws.ListObjects("Table_2_3").Sort.SortFields. _
        Add2 Key:=Range("A1:A99"), SortOn:=xlSortOnValues, Order:=xlAscending, _
        DataOption:=xlSortNormal
    With ws.ListObjects("Table_2_3").Sort
        .Header = xlYes
        .MatchCase = False
        .Orientation = xlTopToBottom
        .SortMethod = xlPinYin
        .Apply
    End With

    Call DeleteDividends

    Call Stochastics


End Sub

The code works for other websites. I tried to download Wikipedia page list of total 120 and it loaded data no problem.

The problem is the data from Yahoo finance website is a project requirement


回答1:


If you check against the page you will discover only 100 results are initially present within the HTMLTable rows (tbody to be precise).

Enter the css selector [data-test="historical-prices"] tbody tr in the browser elements tab search box (F12 to open dev tools)and you will see this:

The rest of the rows are fed dynamically from a data store as you scroll down the page. Of course, your current method doesn't pick up on these. You can in fact issue an xhr request, regex out the appropriate javascript object housing all the rows, and parse with a json parser.

Here is roughly what you should currently see in response:

I use jsonconverter.bas as my json parser. Download raw code from here and add to standard module called jsonConverter . You then need to go VBE > Tools > References > Add reference to Microsoft Scripting Runtime.

@TimWilliams wrote a better unix conversion function here but I thought I would have a play at writing something different. I would advise you to stick with his as it is safer and faster.


VBA:

Option Explicit  
Public Sub GetYahooData()
    '<  VBE > Tools > References > Microsoft Scripting Runtime
    Dim json As Object, re As Object, s As String, xhr As Object, ws As Worksheet
    Dim startDate As String, endDate As String, stock As String

    Set ws = ThisWorkbook.Worksheets("Sheet1")
    Set re = CreateObject("VBScript.RegExp")
    Set xhr = CreateObject("MSXML2.XMLHTTP")

    stock = "AAPL"
    startDate = "1534809600" 
    endDate = "1566345600"     

    With xhr
        .Open "GET", "https://finance.yahoo.com/quote/" & stock & "/history?period1=" & startDate & "&period2=" & endDate & "&interval=1d&filter=history&frequency=1d&_guc_consent_skip=" & GetCurrentUnix(Now()), False
        .setRequestHeader "User-Agent", "Mozilla/5.0"
        .send
        s = .responseText
    End With

    s = GetJsonString(re, s)
    If s = "No match" Then Exit Sub

    Set json = JsonConverter.ParseJson(s)
    WriteOutResults ws, json
End Sub

Public Sub WriteOutResults(ByVal ws As Worksheet, ByVal json As Object)
    Dim item As Object, key As Variant, headers(), results(), r As Long, c As Long

    headers = json.item(1).keys
    ReDim results(1 To json.Count, 1 To UBound(headers) + 1)

    For Each item In json
        r = r + 1: c = 1
        For Each key In item.keys
            results(r, c) = item(key)
            c = c + 1
        Next
    Next
    With ws
        .Cells(1, 1).Resize(1, UBound(headers) + 1) = headers
        .Cells(2, 1).Resize(UBound(results, 1), UBound(results, 2)) = results
    End With      
End Sub

Public Function GetJsonString(ByVal re As Object, ByVal responseText As String) As String
    With re
        .Global = True
        .MultiLine = True
        .IgnoreCase = False
        .Pattern = "HistoricalPriceStore"":{""prices"":(.*?\])" 'regex pattern to get json string
        If .test(responseText) Then
            GetJsonString = .Execute(responseText)(0).SubMatches(0)
        Else
            GetJsonString = "No match"
        End If
    End With
End Function

Public Function GetCurrentUnix(ByVal t As Double) As String
    With CreateObject("htmlfile")
        .parentWindow.execScript "function GetTimeStamp(t){return new Date(t).getTime() / 1000}", "jscript"
        GetCurrentUnix = .parentWindow.GetTimeStamp(Now)
    End With
End Function

Regex:


Python:

I initially wrote as python if of interest:

import requests, re, json
from bs4 import BeautifulSoup as bs

p = re.compile('HistoricalPriceStore":{"prices":(.*?\])')
r = requests.get('https://finance.yahoo.com/quote/AAPL/history?period1=1534809600&period2=1566345600&interval=1d&filter=history&frequency=1d&_guc_consent_skip=1566859607')
data = json.loads(p.findall(r.text)[0])


来源:https://stackoverflow.com/questions/57664229/how-to-download-data-from-yahoo-finance-limited-to-100-rows

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!