I have been trying to scrap the value of the Current Ratio (as shown below) from Yahoo Finance using Beautiful Soup, but it keeps returning an empty value.
You can actually get the data is json format, there is a call to an api that returns a lot of the data including the current ratio:
import requests
params = {"formatted": "true",
"crumb": "AKV/cl0TOgz", # works without so not sure of significance
"lang": "en-US",
"region": "US",
"modules": "defaultKeyStatistics,financialData,calendarEvents",
"corsDomain": "finance.yahoo.com"}
r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSummary/GSB", params=params)
data = r.json()[u'quoteSummary']["result"][0]
That gives you a dict with numerous pieces of data:
from pprint import pprint as pp
{u'calendarEvents': {u'dividendDate': {u'fmt': u'2016-09-08',
u'raw': 1473292800},
u'earnings': {u'earningsAverage': {},
u'earningsDate': [{u'fmt': u'2016-10-27',
u'raw': 1477526400}],
u'earningsHigh': {},
u'earningsLow': {},
u'revenueAverage': {u'fmt': u'8.72M',
u'longFmt': u'8,720,000',
u'raw': 8720000},
u'revenueHigh': {u'fmt': u'8.72M',
u'longFmt': u'8,720,000',
u'raw': 8720000},
u'revenueLow': {u'fmt': u'8.72M',
u'longFmt': u'8,720,000',
u'raw': 8720000}},
u'exDividendDate': {u'fmt': u'2016-05-19',
u'raw': 1463616000},
u'maxAge': 1},
u'defaultKeyStatistics': {u'52WeekChange': {u'fmt': u'3.35%',
u'raw': 0.033536673},
u'SandP52WeekChange': {u'fmt': u'5.21%',
u'raw': 0.052093267},
u'annualHoldingsTurnover': {},
u'annualReportExpenseRatio': {},
u'beta': {u'fmt': u'0.23', u'raw': 0.234153},
u'beta3Year': {},
u'bookValue': {u'fmt': u'1.29', u'raw': 1.295},
u'category': None,
u'earningsQuarterlyGrowth': {u'fmt': u'-28.00%',
u'raw': -0.28},
u'enterpriseToEbitda': {u'fmt': u'9.22',
u'raw': 9.215},
u'enterpriseToRevenue': {u'fmt': u'1.60',
u'raw': 1.596},
u'enterpriseValue': {u'fmt': u'50.69M',
u'longFmt': u'50,690,408',
u'raw': 50690408},
u'fiveYearAverageReturn': {},
u'floatShares': {u'fmt': u'11.63M',
u'longFmt': u'11,628,487',
u'raw': 11628487},
u'forwardEps': {u'fmt': u'0.29', u'raw': 0.29},
u'forwardPE': {},
u'fundFamily': None,
u'fundInceptionDate': {},
u'heldPercentInsiders': {u'fmt': u'36.12%',
u'raw': 0.36116},
u'heldPercentInstitutions': {u'fmt': u'21.70%',
u'raw': 0.21700001},
u'lastCapGain': {},
u'lastDividendValue': {},
u'lastFiscalYearEnd': {u'fmt': u'2015-12-31',
u'raw': 1451520000},
u'lastSplitDate': {},
u'lastSplitFactor': None,
u'legalType': None,
u'maxAge': 1,
u'morningStarOverallRating': {},
u'morningStarRiskRating': {},
u'mostRecentQuarter': {u'fmt': u'2016-06-30',
u'raw': 1467244800},
u'netIncomeToCommon': {u'fmt': u'3.82M',
u'longFmt': u'3,819,000',
u'raw': 3819000},
u'nextFiscalYearEnd': {u'fmt': u'2017-12-31',
u'raw': 1514678400},
u'pegRatio': {},
u'priceToBook': {u'fmt': u'2.64',
u'raw': 2.6358302},
u'priceToSalesTrailing12Months': {},
u'profitMargins': {u'fmt': u'12.02%',
u'raw': 0.12023},
u'revenueQuarterlyGrowth': {},
u'sharesOutstanding': {u'fmt': u'21.18M',
u'longFmt': u'21,184,300',
u'raw': 21184300},
u'sharesShort': {u'fmt': u'27.06k',
u'longFmt': u'27,057',
u'raw': 27057},
u'sharesShortPriorMonth': {u'fmt': u'36.35k',
u'longFmt': u'36,352',
u'raw': 36352},
u'shortPercentOfFloat': {u'fmt': u'0.20%',
u'raw': 0.001977},
u'shortRatio': {u'fmt': u'0.81', u'raw': 0.81},
u'threeYearAverageReturn': {},
u'totalAssets': {},
u'trailingEps': {u'fmt': u'0.18', u'raw': 0.18},
u'yield': {},
u'ytdReturn': {}},
u'financialData': {u'currentPrice': {u'fmt': u'3.41', u'raw': 3.4134},
u'currentRatio': {u'fmt': u'1.97', u'raw': 1.974},
u'debtToEquity': {},
u'earningsGrowth': {u'fmt': u'-33.30%', u'raw': -0.333},
u'ebitda': {u'fmt': u'5.5M',
u'longFmt': u'5,501,000',
u'raw': 5501000},
u'ebitdaMargins': {u'fmt': u'17.32%',
u'raw': 0.17318001},
u'freeCashflow': {u'fmt': u'4.06M',
u'longFmt': u'4,062,250',
u'raw': 4062250},
u'grossMargins': {u'fmt': u'79.29%', u'raw': 0.79288},
u'grossProfits': {u'fmt': u'25.17M',
u'longFmt': u'25,172,000',
u'raw': 25172000},
u'maxAge': 86400,
u'numberOfAnalystOpinions': {},
u'operatingCashflow': {u'fmt': u'6.85M',
u'longFmt': u'6,853,000',
u'raw': 6853000},
u'operatingMargins': {u'fmt': u'16.47%',
u'raw': 0.16465001},
u'profitMargins': {u'fmt': u'12.02%', u'raw': 0.12023},
u'quickRatio': {u'fmt': u'1.92', u'raw': 1.917},
u'recommendationKey': u'strong_buy',
u'recommendationMean': {u'fmt': u'1.00', u'raw': 1.0},
u'returnOnAssets': {u'fmt': u'7.79%', u'raw': 0.07793},
u'returnOnEquity': {u'fmt': u'15.05%', u'raw': 0.15054},
u'revenueGrowth': {u'fmt': u'5.00%', u'raw': 0.05},
u'revenuePerShare': {u'fmt': u'1.51', u'raw': 1.513},
u'targetHighPrice': {},
u'targetLowPrice': {},
u'targetMeanPrice': {},
u'targetMedianPrice': {},
u'totalCash': {u'fmt': u'20.28M',
u'longFmt': u'20,277,000',
u'raw': 20277000},
u'totalCashPerShare': {u'fmt': u'0.96', u'raw': 0.957},
u'totalDebt': {u'fmt': None,
u'longFmt': u'0',
u'raw': 0},
u'totalRevenue': {u'fmt': u'31.76M',
u'longFmt': u'31,764,000',
u'raw': 31764000}}}
What you want is in data[u'financialData']
{u'currentPrice': {u'fmt': u'3.41', u'raw': 3.4134},
u'currentRatio': {u'fmt': u'1.97', u'raw': 1.974},
u'debtToEquity': {},
u'earningsGrowth': {u'fmt': u'-33.30%', u'raw': -0.333},
u'ebitda': {u'fmt': u'5.5M', u'longFmt': u'5,501,000', u'raw': 5501000},
u'ebitdaMargins': {u'fmt': u'17.32%', u'raw': 0.17318001},
u'freeCashflow': {u'fmt': u'4.06M',
u'longFmt': u'4,062,250',
u'raw': 4062250},
u'grossMargins': {u'fmt': u'79.29%', u'raw': 0.79288},
u'grossProfits': {u'fmt': u'25.17M',
u'longFmt': u'25,172,000',
u'raw': 25172000},
u'maxAge': 86400,
u'numberOfAnalystOpinions': {},
u'operatingCashflow': {u'fmt': u'6.85M',
u'longFmt': u'6,853,000',
u'raw': 6853000},
u'operatingMargins': {u'fmt': u'16.47%', u'raw': 0.16465001},
u'profitMargins': {u'fmt': u'12.02%', u'raw': 0.12023},
u'quickRatio': {u'fmt': u'1.92', u'raw': 1.917},
u'recommendationKey': u'strong_buy',
u'recommendationMean': {u'fmt': u'1.00', u'raw': 1.0},
u'returnOnAssets': {u'fmt': u'7.79%', u'raw': 0.07793},
u'returnOnEquity': {u'fmt': u'15.05%', u'raw': 0.15054},
u'revenueGrowth': {u'fmt': u'5.00%', u'raw': 0.05},
u'revenuePerShare': {u'fmt': u'1.51', u'raw': 1.513},
u'targetHighPrice': {},
u'targetLowPrice': {},
u'targetMeanPrice': {},
u'targetMedianPrice': {},
u'totalCash': {u'fmt': u'20.28M',
u'longFmt': u'20,277,000',
u'raw': 20277000},
u'totalCashPerShare': {u'fmt': u'0.96', u'raw': 0.957},
u'totalDebt': {u'fmt': None, u'longFmt': u'0', u'raw': 0},
u'totalRevenue': {u'fmt': u'31.76M',
u'longFmt': u'31,764,000',
u'raw': 31764000}}
You can see u'currentRatio'
in there, the fmt is the formatted output you see on the site, formatted to two decimal places. So to get the 1.97:
In [5]: import requests
...: data = {"formatted": "true",
...: "crumb": "AKV/cl0TOgz",
...: "lang": "en-US",
...: "region": "US",
...: "modules": "defaultKeyStatistics,financialData,calendarEvents",
...: "corsDomain": "finance.yahoo.com"}
...: r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSumm
...: ary/GSB", params=data)
...: data = r.json()[u'quoteSummary']["result"][0][u'financialData']
...: ratio = data[u'currentRatio']
...: print(ratio)
...: print(ratio["fmt"])
{'raw': 1.974, 'fmt': '1.97'}
The equivalent code using urllib:
In [1]: import urllib
...: from urllib import urlencode
...: from json import load
...: data = {"formatted": "true",
...: "crumb": "AKV/cl0TOgz",
...: "lang": "en-US",
...: "region": "US",
...: "modules": "defaultKeyStatistics,financialData,calendarEvents",
...: "corsDomain": "finance.yahoo.com"}
...: url = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/GSB"
...: r = urllib.urlopen(url, data=urlencode(data))
...: data = load(r)[u'quoteSummary']["result"][0][u'financialData']
...: ratio = data[u'currentRatio']
...: print(ratio)
...: print(ratio["fmt"])
{u'raw': 1.974, u'fmt': u'1.97'}
It works fine for APPL also:
In [1]: import urllib
...: from urllib import urlencode
...: from json import load
...: data = {"formatted": "true",
...: "lang": "en-US",
...: "region": "US",
...: "modules": "defaultKeyStatistics,financialData,calendarEvents",
...: "corsDomain": "finance.yahoo.com"}
...: url = "https://query1.finance.yahoo.com/v10/finance/quoteSummary/AAPL"
...: r = urllib.urlopen(url, data=urlencode(data))
...: data = load(r)[u'quoteSummary']["result"][0][u'financialData']
...: ratio = data[u'currentRatio']
...: print(ratio)
...: print(ratio["fmt"])
{u'raw': 1.312, u'fmt': u'1.31'}
Adding the crumb parameters seems to have no effect, if you need to get it at a later date:
soup = BeautifulSoup(urllib.urlopen("http://finance.yahoo.com/quote/GSB/key-statistics?p=GSB").read())
script = soup.find("script", text=re.compile("root.App.main")).text
data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
For market cap, you need to add the summaryDetail module:
In [1]: import requests
...: params = {"formatted": "true",
...: "crumb": "AKV/cl0TOgz", # works without so not sure of signif
...: icance
...: "lang": "en-US",
...: "region": "US",
...: "modules": "summaryDetail",
...: "corsDomain": "finance.yahoo.com"}
...: r = requests.get("https://query1.finance.yahoo.com/v10/finance/quoteSumm
...: ary/GOOG", params=params)
...: data = r.json()[u'quoteSummary']["result"][0]
...: print(data["summaryDetail"]["marketCap"])
{'raw': 769972436992, 'fmt': '769.97B', 'longFmt': '769,972,436,992'}
The available modules I know of are:
One thing I'd add to Padriac's answer is to except KeyErrors, since you'll probably be scraping more than one ticker.
import requests
a = requests.get('https://query2.finance.yahoo.com/v10/finance/quoteSummary/GSB?formatted=true&crumb=A7e5%2FXKKAFa&lang=en-US®ion=US&modules=defaultKeyStatistics%2CfinancialData%2CcalendarEvents&corsDomain=finance.yahoo.com')
b = a.json()
ratio = b['quoteSummary']['result'][0]['financialData']['currentRatio']['raw']
print(ratio) #prints 1.974
except (IndexError, KeyError):
A cool thing about doing it like this is that you can easily change the keys for the information you want. A good way to see the way the dictionary is nested on the Yahoo! Finance pages is to use pprint
. Furthermore, for the pages that have quarterly information just change [0]
to [1]
to get the info for the second quarter instead of the first.. and so on and so forth.
Maybe this is not the answer you are looking for, but R can do this very easily and very quickly. See the link below.
# Script to download key metrics for a set of stock tickers using the quantmod package
what_metrics <- yahooQF(c("Price/Sales",
"P/E Ratio",
"Price/EPS Estimate Next Year",
"PEG Ratio",
"Dividend Yield",
"Market Capitalization"))
tickers <- c("AAPL", "FB", "GOOG", "HPQ", "IBM", "MSFT", "ORCL", "SAP")
# Not all the metrics are returned by Yahoo.
metrics <- getQuote(paste(tickers, sep="", collapse=";"), what=what_metrics)
#Add tickers as the first column and remove the first column which had date stamps
metrics <- data.frame(Symbol=tickers, metrics[,2:length(metrics)])
#Change colnames
colnames(metrics) <- c("Symbol", "Revenue Multiple", "Earnings Multiple",
"Earnings Multiple (Forward)", "Price-to-Earnings-Growth", "Div Yield", "Market Cap")
#Persist this to the csv file
write.csv(metrics, "FinancialMetrics.csv", row.names=FALSE)
##Alternate method to download all key stats using XML and x_path - PREFERRED WAY
getKeyStats_xpath <- function(symbol) {
yahoo.URL <- "http://finance.yahoo.com/q/ks?s="
html_text <- htmlParse(paste(yahoo.URL, symbol, sep = ""), encoding="UTF-8")
#search for <td> nodes anywhere that have class 'yfnc_tablehead1'
nodes <- getNodeSet(html_text, "/*//td[@class='yfnc_tablehead1']")
if(length(nodes) > 0 ) {
measures <- sapply(nodes, xmlValue)
#Clean up the column name
measures <- gsub(" *[0-9]*:", "", gsub(" \\(.*?\\)[0-9]*:","", measures))
#Remove dups
dups <- which(duplicated(measures))
for(i in 1:length(dups))
measures[dups[i]] = paste(measures[dups[i]], i, sep=" ")
#use siblings function to get value
values <- sapply(nodes, function(x) xmlValue(getSibling(x)))
df <- data.frame(t(values))
colnames(df) <- measures
} else {
tickers <- c("AAPL")
stats <- ldply(tickers, getKeyStats_xpath)
rownames(stats) <- tickers
write.csv(t(stats), "FinancialStats_updated.csv",row.names=TRUE)
Here is one more solution which uses Excel.
Download a sample workbook from one of the many links on that site. That will do everything you want, and a whole lot more.