I\'ve been trying to retrieve stock price from Yahoo! Finance, like for Apple Inc.. My code is like this:(using Python 2)
import requests
from bs4 import Bea
The data is obviously populated using reactjs so you won't be able to parse it reliably using class names etc.. You can get all the data in json format from the page source from the root.App.main
script :
import requests
from bs4 import BeautifulSoup
import re
from json import loads
soup = BeautifulSoup(requests.get("http://finance.yahoo.com/quote/AAPL/profile?p=AAPL").content)
script = soup.find("script",text=re.compile("root.App.main")).text
data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
print(data)
Which gives you a whole load of json, you can go through the data and pick what you need like below :
stores = data["context"]["dispatcher"]["stores"]
from pprint import pprint as pp
pp(stores[u'QuoteSummaryStore'])
Which gives you:
{u'price': {u'averageDailyVolume10Day': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'averageDailyVolume3Month': {u'fmt': u'36.53M',
u'longFmt': u'36,527,196',
u'raw': 36527196},
u'currency': u'USD',
u'currencySymbol': u'$',
u'exchange': u'NMS',
u'exchangeName': u'NasdaqGS',
u'longName': u'Apple Inc.',
u'marketState': u'PRE',
u'maxAge': 1,
u'openInterest': {},
u'postMarketChange': {u'fmt': u'0.11', u'raw': 0.11000061},
u'postMarketChangePercent': {u'fmt': u'0.10%',
u'raw': 0.0009687416},
u'postMarketPrice': {u'fmt': u'113.66', u'raw': 113.66},
u'postMarketSource': u'DELAYED',
u'postMarketTime': 1474502277,
u'preMarketChange': {u'fmt': u'0.42', u'raw': 0.41999817},
u'preMarketChangePercent': {u'fmt': u'0.37%',
u'raw': 0.0036987949},
u'preMarketPrice': {u'fmt': u'113.97', u'raw': 113.97},
u'preMarketSource': u'FREE_REALTIME',
u'preMarketTime': 1474536411,
u'quoteType': u'EQUITY',
u'regularMarketChange': {u'fmt': u'-0.02', u'raw': -0.019996643},
u'regularMarketChangePercent': {u'fmt': u'-0.02%',
u'raw': -0.00017607327},
u'regularMarketDayHigh': {u'fmt': u'113.99', u'raw': 113.989},
u'regularMarketDayLow': {u'fmt': u'112.44', u'raw': 112.441},
u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
u'regularMarketPreviousClose': {u'fmt': u'113.57',
u'raw': 113.57},
u'regularMarketPrice': {u'fmt': u'113.55', u'raw': 113.55},
u'regularMarketSource': u'FREE_REALTIME',
u'regularMarketTime': 1474488000,
u'regularMarketVolume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028.00',
u'raw': 31574028},
u'shortName': u'Apple Inc.',
u'strikePrice': {},
u'symbol': u'AAPL',
u'underlyingSymbol': None},
u'price,summaryDetail': {},
u'quoteType': {u'exchange': u'NMS',
u'headSymbol': None,
u'longName': u'Apple Inc.',
u'market': u'us_market',
u'messageBoardId': u'finmb_24937',
u'quoteType': u'EQUITY',
u'shortName': u'Apple Inc.',
u'symbol': u'AAPL',
u'underlyingExchangeSymbol': None,
u'underlyingSymbol': None,
u'uuid': u'8b10e4ae-9eeb-3684-921a-9ab27e4d87aa'},
u'summaryDetail': {u'ask': {u'fmt': u'114.00', u'raw': 114},
u'askSize': {u'fmt': u'100',
u'longFmt': u'100',
u'raw': 100},
u'averageDailyVolume10Day': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'averageVolume': {u'fmt': u'36.53M',
u'longFmt': u'36,527,196',
u'raw': 36527196},
u'averageVolume10days': {u'fmt': u'63.06M',
u'longFmt': u'63,056,525',
u'raw': 63056525},
u'beta': {u'fmt': u'1.52', u'raw': 1.51744},
u'bid': {u'fmt': u'113.68', u'raw': 113.68},
u'bidSize': {u'fmt': u'400',
u'longFmt': u'400',
u'raw': 400},
u'dayHigh': {u'fmt': u'113.99', u'raw': 113.989},
u'dayLow': {u'fmt': u'112.44', u'raw': 112.441},
u'dividendRate': {u'fmt': u'2.28', u'raw': 2.28},
u'dividendYield': {u'fmt': u'2.01%', u'raw': 0.0201},
u'exDividendDate': {u'fmt': u'2016-08-04',
u'raw': 1470268800},
u'expireDate': {},
u'fiftyDayAverage': {u'fmt': u'108.61',
u'raw': 108.608284},
u'fiftyTwoWeekHigh': {u'fmt': u'123.82', u'raw': 123.82},
u'fiftyTwoWeekLow': {u'fmt': u'89.47', u'raw': 89.47},
u'fiveYearAvgDividendYield': {},
u'forwardPE': {u'fmt': u'12.70', u'raw': 12.701344},
u'marketCap': {u'fmt': u'611.86B',
u'longFmt': u'611,857,399,808',
u'raw': 611857399808},
u'maxAge': 1,
u'navPrice': {},
u'open': {u'fmt': u'113.82', u'raw': 113.82},
u'openInterest': {},
u'payoutRatio': {u'fmt': u'24.80%', u'raw': 0.248},
u'previousClose': {u'fmt': u'113.57', u'raw': 113.57},
u'priceToSalesTrailing12Months': {u'fmt': u'2.78',
u'raw': 2.777534},
u'regularMarketDayHigh': {u'fmt': u'113.99',
u'raw': 113.989},
u'regularMarketDayLow': {u'fmt': u'112.44',
u'raw': 112.441},
u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
u'regularMarketPreviousClose': {u'fmt': u'113.57',
u'raw': 113.57},
u'regularMarketVolume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028',
u'raw': 31574028},
u'strikePrice': {},
u'totalAssets': {},
u'trailingAnnualDividendRate': {u'fmt': u'2.13',
u'raw': 2.13},
u'trailingAnnualDividendYield': {u'fmt': u'1.88%',
u'raw': 0.018754954},
u'trailingPE': {u'fmt': u'13.24', u'raw': 13.240438},
u'twoHundredDayAverage': {u'fmt': u'102.39',
u'raw': 102.39367},
u'volume': {u'fmt': u'31.57M',
u'longFmt': u'31,574,028',
u'raw': 31574028},
u'yield': {},
u'ytdReturn': {}},
u'symbol': u'AAPL'}
Not sure what you mean by 'dynamic' in this case, but have you considered using CSS selectors?
With Beautifulsoup you could get it e.g like this:
soup.select('div#quote-header-info section span')[0]
And there are some variations you could use on the pattern, such as using the '>' filter.
You could get the same with just lxml
, no need for BeautifulSoup:
import lxml.html as html
page = html.parse(url).getroot()
content = page.cssselect('div#quote-header-info section > span:first-child')[0].text
Which immediately illustrates a more specific selector.
If you're interested in more efficient DOM-traversal, research xpaths.