Scrape Yahoo Finance Financial Ratios

后端 未结 4 882
清歌不尽
清歌不尽 2021-02-04 19:52

I have been trying to scrap the value of the Current Ratio (as shown below) from Yahoo Finance using Beautiful Soup, but it keeps returning an empty value.

Inte

相关标签:
4条回答
  • 2021-02-04 20:12

    You can actually get the data is json format, there is a call to an api that returns a lot of the data including the current ratio:

    import requests
    
    params = {"formatted": "true",
            "crumb": "AKV/cl0TOgz", # works without so not sure of significance
            "lang": "en-US",
            "region": "US",
            "modules": "defaultKeyStatistics,financialData,calendarEvents",
            "corsDomain": "finance.yahoo.com"}
    
    r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSummary/GSB", params=params)
    data = r.json()[u'quoteSummary']["result"][0]
    

    That gives you a dict with numerous pieces of data:

    from pprint import pprint as pp
    pp(data)
    {u'calendarEvents': {u'dividendDate': {u'fmt': u'2016-09-08',
                                            u'raw': 1473292800},
                          u'earnings': {u'earningsAverage': {},
                                        u'earningsDate': [{u'fmt': u'2016-10-27',
                                                           u'raw': 1477526400}],
                                        u'earningsHigh': {},
                                        u'earningsLow': {},
                                        u'revenueAverage': {u'fmt': u'8.72M',
                                                            u'longFmt': u'8,720,000',
                                                            u'raw': 8720000},
                                        u'revenueHigh': {u'fmt': u'8.72M',
                                                         u'longFmt': u'8,720,000',
                                                         u'raw': 8720000},
                                        u'revenueLow': {u'fmt': u'8.72M',
                                                        u'longFmt': u'8,720,000',
                                                        u'raw': 8720000}},
                          u'exDividendDate': {u'fmt': u'2016-05-19',
                                              u'raw': 1463616000},
                          u'maxAge': 1},
      u'defaultKeyStatistics': {u'52WeekChange': {u'fmt': u'3.35%',
                                                  u'raw': 0.033536673},
                                u'SandP52WeekChange': {u'fmt': u'5.21%',
                                                       u'raw': 0.052093267},
                                u'annualHoldingsTurnover': {},
                                u'annualReportExpenseRatio': {},
                                u'beta': {u'fmt': u'0.23', u'raw': 0.234153},
                                u'beta3Year': {},
                                u'bookValue': {u'fmt': u'1.29', u'raw': 1.295},
                                u'category': None,
                                u'earningsQuarterlyGrowth': {u'fmt': u'-28.00%',
                                                             u'raw': -0.28},
                                u'enterpriseToEbitda': {u'fmt': u'9.22',
                                                        u'raw': 9.215},
                                u'enterpriseToRevenue': {u'fmt': u'1.60',
                                                         u'raw': 1.596},
                                u'enterpriseValue': {u'fmt': u'50.69M',
                                                     u'longFmt': u'50,690,408',
                                                     u'raw': 50690408},
                                u'fiveYearAverageReturn': {},
                                u'floatShares': {u'fmt': u'11.63M',
                                                 u'longFmt': u'11,628,487',
                                                 u'raw': 11628487},
                                u'forwardEps': {u'fmt': u'0.29', u'raw': 0.29},
                                u'forwardPE': {},
                                u'fundFamily': None,
                                u'fundInceptionDate': {},
                                u'heldPercentInsiders': {u'fmt': u'36.12%',
                                                         u'raw': 0.36116},
                                u'heldPercentInstitutions': {u'fmt': u'21.70%',
                                                             u'raw': 0.21700001},
                                u'lastCapGain': {},
                                u'lastDividendValue': {},
                                u'lastFiscalYearEnd': {u'fmt': u'2015-12-31',
                                                       u'raw': 1451520000},
                                u'lastSplitDate': {},
                                u'lastSplitFactor': None,
                                u'legalType': None,
                                u'maxAge': 1,
                                u'morningStarOverallRating': {},
                                u'morningStarRiskRating': {},
                                u'mostRecentQuarter': {u'fmt': u'2016-06-30',
                                                       u'raw': 1467244800},
                                u'netIncomeToCommon': {u'fmt': u'3.82M',
                                                       u'longFmt': u'3,819,000',
                                                       u'raw': 3819000},
                                u'nextFiscalYearEnd': {u'fmt': u'2017-12-31',
                                                       u'raw': 1514678400},
                                u'pegRatio': {},
                                u'priceToBook': {u'fmt': u'2.64',
                                                 u'raw': 2.6358302},
                                u'priceToSalesTrailing12Months': {},
                                u'profitMargins': {u'fmt': u'12.02%',
                                                   u'raw': 0.12023},
                                u'revenueQuarterlyGrowth': {},
                                u'sharesOutstanding': {u'fmt': u'21.18M',
                                                       u'longFmt': u'21,184,300',
                                                       u'raw': 21184300},
                                u'sharesShort': {u'fmt': u'27.06k',
                                                 u'longFmt': u'27,057',
                                                 u'raw': 27057},
                                u'sharesShortPriorMonth': {u'fmt': u'36.35k',
                                                           u'longFmt': u'36,352',
                                                           u'raw': 36352},
                                u'shortPercentOfFloat': {u'fmt': u'0.20%',
                                                         u'raw': 0.001977},
                                u'shortRatio': {u'fmt': u'0.81', u'raw': 0.81},
                                u'threeYearAverageReturn': {},
                                u'totalAssets': {},
                                u'trailingEps': {u'fmt': u'0.18', u'raw': 0.18},
                                u'yield': {},
                                u'ytdReturn': {}},
      u'financialData': {u'currentPrice': {u'fmt': u'3.41', u'raw': 3.4134},
                         u'currentRatio': {u'fmt': u'1.97', u'raw': 1.974},
                         u'debtToEquity': {},
                         u'earningsGrowth': {u'fmt': u'-33.30%', u'raw': -0.333},
                         u'ebitda': {u'fmt': u'5.5M',
                                     u'longFmt': u'5,501,000',
                                     u'raw': 5501000},
                         u'ebitdaMargins': {u'fmt': u'17.32%',
                                            u'raw': 0.17318001},
                         u'freeCashflow': {u'fmt': u'4.06M',
                                           u'longFmt': u'4,062,250',
                                           u'raw': 4062250},
                         u'grossMargins': {u'fmt': u'79.29%', u'raw': 0.79288},
                         u'grossProfits': {u'fmt': u'25.17M',
                                           u'longFmt': u'25,172,000',
                                           u'raw': 25172000},
                         u'maxAge': 86400,
                         u'numberOfAnalystOpinions': {},
                         u'operatingCashflow': {u'fmt': u'6.85M',
                                                u'longFmt': u'6,853,000',
                                                u'raw': 6853000},
                         u'operatingMargins': {u'fmt': u'16.47%',
                                               u'raw': 0.16465001},
                         u'profitMargins': {u'fmt': u'12.02%', u'raw': 0.12023},
                         u'quickRatio': {u'fmt': u'1.92', u'raw': 1.917},
                         u'recommendationKey': u'strong_buy',
                         u'recommendationMean': {u'fmt': u'1.00', u'raw': 1.0},
                         u'returnOnAssets': {u'fmt': u'7.79%', u'raw': 0.07793},
                         u'returnOnEquity': {u'fmt': u'15.05%', u'raw': 0.15054},
                         u'revenueGrowth': {u'fmt': u'5.00%', u'raw': 0.05},
                         u'revenuePerShare': {u'fmt': u'1.51', u'raw': 1.513},
                         u'targetHighPrice': {},
                         u'targetLowPrice': {},
                         u'targetMeanPrice': {},
                         u'targetMedianPrice': {},
                         u'totalCash': {u'fmt': u'20.28M',
                                        u'longFmt': u'20,277,000',
                                        u'raw': 20277000},
                         u'totalCashPerShare': {u'fmt': u'0.96', u'raw': 0.957},
                         u'totalDebt': {u'fmt': None,
                                        u'longFmt': u'0',
                                        u'raw': 0},
                         u'totalRevenue': {u'fmt': u'31.76M',
                                           u'longFmt': u'31,764,000',
                                           u'raw': 31764000}}}
    

    What you want is in data[u'financialData']:

     pp(data[u'financialData'])
    
     {u'currentPrice': {u'fmt': u'3.41', u'raw': 3.4134},
     u'currentRatio': {u'fmt': u'1.97', u'raw': 1.974},
     u'debtToEquity': {},
     u'earningsGrowth': {u'fmt': u'-33.30%', u'raw': -0.333},
     u'ebitda': {u'fmt': u'5.5M', u'longFmt': u'5,501,000', u'raw': 5501000},
     u'ebitdaMargins': {u'fmt': u'17.32%', u'raw': 0.17318001},
     u'freeCashflow': {u'fmt': u'4.06M',
                       u'longFmt': u'4,062,250',
                       u'raw': 4062250},
     u'grossMargins': {u'fmt': u'79.29%', u'raw': 0.79288},
     u'grossProfits': {u'fmt': u'25.17M',
                       u'longFmt': u'25,172,000',
                       u'raw': 25172000},
     u'maxAge': 86400,
     u'numberOfAnalystOpinions': {},
     u'operatingCashflow': {u'fmt': u'6.85M',
                            u'longFmt': u'6,853,000',
                            u'raw': 6853000},
     u'operatingMargins': {u'fmt': u'16.47%', u'raw': 0.16465001},
     u'profitMargins': {u'fmt': u'12.02%', u'raw': 0.12023},
     u'quickRatio': {u'fmt': u'1.92', u'raw': 1.917},
     u'recommendationKey': u'strong_buy',
     u'recommendationMean': {u'fmt': u'1.00', u'raw': 1.0},
     u'returnOnAssets': {u'fmt': u'7.79%', u'raw': 0.07793},
     u'returnOnEquity': {u'fmt': u'15.05%', u'raw': 0.15054},
     u'revenueGrowth': {u'fmt': u'5.00%', u'raw': 0.05},
     u'revenuePerShare': {u'fmt': u'1.51', u'raw': 1.513},
     u'targetHighPrice': {},
     u'targetLowPrice': {},
     u'targetMeanPrice': {},
     u'targetMedianPrice': {},
     u'totalCash': {u'fmt': u'20.28M',
                    u'longFmt': u'20,277,000',
                    u'raw': 20277000},
     u'totalCashPerShare': {u'fmt': u'0.96', u'raw': 0.957},
     u'totalDebt': {u'fmt': None, u'longFmt': u'0', u'raw': 0},
     u'totalRevenue': {u'fmt': u'31.76M',
                       u'longFmt': u'31,764,000',
                       u'raw': 31764000}}
    

    You can see u'currentRatio' in there, the fmt is the formatted output you see on the site, formatted to two decimal places. So to get the 1.97:

    In [5]: import requests
       ...: data = {"formatted": "true",
       ...:         "crumb": "AKV/cl0TOgz",
       ...:         "lang": "en-US",
       ...:         "region": "US",
       ...:         "modules": "defaultKeyStatistics,financialData,calendarEvents",
       ...:         "corsDomain": "finance.yahoo.com"}
       ...: r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSumm
       ...: ary/GSB", params=data)
       ...: data = r.json()[u'quoteSummary']["result"][0][u'financialData']
       ...: ratio = data[u'currentRatio']
       ...: print(ratio)
       ...: print(ratio["fmt"])
       ...: 
    {'raw': 1.974, 'fmt': '1.97'}
    1.97
    

    The equivalent code using urllib:

    In [1]: import urllib
       ...: from urllib import urlencode
       ...: from json import load
       ...: 
       ...: 
       ...: data = {"formatted": "true",
       ...:         "crumb": "AKV/cl0TOgz",
       ...:         "lang": "en-US",
       ...:         "region": "US",
       ...:         "modules": "defaultKeyStatistics,financialData,calendarEvents",
       ...:         "corsDomain": "finance.yahoo.com"}
       ...: url = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/GSB"
       ...: r = urllib.urlopen(url, data=urlencode(data))
       ...: data = load(r)[u'quoteSummary']["result"][0][u'financialData']
       ...: ratio = data[u'currentRatio']
       ...: print(ratio)
       ...: print(ratio["fmt"])
       ...: 
    {u'raw': 1.974, u'fmt': u'1.97'}
    1.97
    

    It works fine for APPL also:

    In [1]: import urllib
       ...: from urllib import urlencode
       ...: from json import load
       ...: data = {"formatted": "true",
       ...:         "lang": "en-US",
       ...:         "region": "US",
       ...:         "modules": "defaultKeyStatistics,financialData,calendarEvents",
       ...:         "corsDomain": "finance.yahoo.com"}
       ...: url = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/AAPL"
       ...: r = urllib.urlopen(url, data=urlencode(data))
       ...: data = load(r)[u'quoteSummary']["result"][0][u'financialData']
       ...: ratio = data[u'currentRatio']
       ...: print(ratio)
       ...: print(ratio["fmt"])
       ...: 
    {u'raw': 1.312, u'fmt': u'1.31'}
    1.31
    

    Adding the crumb parameters seems to have no effect, if you need to get it at a later date:

    soup = BeautifulSoup(urllib.urlopen("http://finance.yahoo.com/quote/GSB/key-statistics?p=GSB").read())
    script = soup.find("script", text=re.compile("root.App.main")).text
    data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
    print(data["context"]["dispatcher"]["stores"]["CrumbStore"]["crumb"])
    

    For market cap, you need to add the summaryDetail module:

    In [1]: import requests
       ...: 
       ...: params = {"formatted": "true",
       ...:           "crumb": "AKV/cl0TOgz",  # works without so not sure of signif
       ...: icance
       ...:           "lang": "en-US",
       ...:           "region": "US",
       ...:           "modules": "summaryDetail",
       ...:           "corsDomain": "finance.yahoo.com"}
       ...: 
       ...: r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSumm
       ...: ary/GOOG", params=params)
       ...: data = r.json()[u'quoteSummary']["result"][0]
       ...: print(data["summaryDetail"]["marketCap"])
       ...: 
    {'raw': 769972436992, 'fmt': '769.97B', 'longFmt': '769,972,436,992'}
    

    The available modules I know of are:

    defaultKeyStatistics
    financialData
    calendarEvents
    assetProfile
    summaryDetail
    upgradeDowngradeHistory
    recommendationTrend
    earnings
    price
    
    0 讨论(0)
  • 2021-02-04 20:12

    One thing I'd add to Padriac's answer is to except KeyErrors, since you'll probably be scraping more than one ticker.

    import requests
    a = requests.get('https://query2.finance.yahoo.com/v10/finance/quoteSummary/GSB?formatted=true&crumb=A7e5%2FXKKAFa&lang=en-US&region=US&modules=defaultKeyStatistics%2CfinancialData%2CcalendarEvents&corsDomain=finance.yahoo.com')
    b = a.json()
    try:
        ratio = b['quoteSummary']['result'][0]['financialData']['currentRatio']['raw']
        print(ratio) #prints 1.974
    except (IndexError, KeyError):
        pass
    

    A cool thing about doing it like this is that you can easily change the keys for the information you want. A good way to see the way the dictionary is nested on the Yahoo! Finance pages is to use pprint. Furthermore, for the pages that have quarterly information just change [0] to [1] to get the info for the second quarter instead of the first.. and so on and so forth.

    0 讨论(0)
  • 2021-02-04 20:12

    Maybe this is not the answer you are looking for, but R can do this very easily and very quickly. See the link below.

    http://allthingsr.blogspot.com/2012/10/pull-yahoo-finance-key-statistics.html

    #######################################################################
    # Script to download key metrics for a set of stock tickers using the quantmod package
    #######################################################################
    require(quantmod)
    require("plyr")
    what_metrics <- yahooQF(c("Price/Sales", 
                              "P/E Ratio",
                              "Price/EPS Estimate Next Year",
                              "PEG Ratio",
                              "Dividend Yield", 
                              "Market Capitalization"))
    
    tickers <- c("AAPL", "FB", "GOOG", "HPQ", "IBM", "MSFT", "ORCL", "SAP")
    # Not all the metrics are returned by Yahoo.
    metrics <- getQuote(paste(tickers, sep="", collapse=";"), what=what_metrics)
    
    #Add tickers as the first column and remove the first column which had date stamps
    metrics <- data.frame(Symbol=tickers, metrics[,2:length(metrics)]) 
    
    #Change colnames
    colnames(metrics) <- c("Symbol", "Revenue Multiple", "Earnings Multiple", 
                           "Earnings Multiple (Forward)", "Price-to-Earnings-Growth", "Div Yield", "Market Cap")
    
    #Persist this to the csv file
    write.csv(metrics, "FinancialMetrics.csv", row.names=FALSE)
    
    #######################################################################
    
    #######################################################################
    ##Alternate method to download all key stats using XML and x_path - PREFERRED WAY
    #######################################################################
    
    setwd("C:/Users/i827456/Pictures/Blog/Oct-25")
    require(XML)
    require(plyr)
    getKeyStats_xpath <- function(symbol) {
      yahoo.URL <- "http://finance.yahoo.com/q/ks?s="
      html_text <- htmlParse(paste(yahoo.URL, symbol, sep = ""), encoding="UTF-8")
    
      #search for <td> nodes anywhere that have class 'yfnc_tablehead1'
      nodes <- getNodeSet(html_text, "/*//td[@class='yfnc_tablehead1']")
    
      if(length(nodes) > 0 ) {
       measures <- sapply(nodes, xmlValue)
    
       #Clean up the column name
       measures <- gsub(" *[0-9]*:", "", gsub(" \\(.*?\\)[0-9]*:","", measures))   
    
       #Remove dups
       dups <- which(duplicated(measures))
       #print(dups) 
       for(i in 1:length(dups)) 
         measures[dups[i]] = paste(measures[dups[i]], i, sep=" ")
    
       #use siblings function to get value
       values <- sapply(nodes, function(x)  xmlValue(getSibling(x)))
    
       df <- data.frame(t(values))
       colnames(df) <- measures
       return(df)
      } else {
        break
      }
    }
    
    tickers <- c("AAPL")
    stats <- ldply(tickers, getKeyStats_xpath)
    rownames(stats) <- tickers
    write.csv(t(stats), "FinancialStats_updated.csv",row.names=TRUE)  
    
    #######################################################################
    

    0 讨论(0)
  • 2021-02-04 20:25

    Here is one more solution which uses Excel.

    http://www.financialwisdomforum.org/gummy-stuff/Yahoo-data.htm

    Download a sample workbook from one of the many links on that site. That will do everything you want, and a whole lot more.

    0 讨论(0)
提交回复
热议问题