How to understand this raw HTML of Yahoo! Finance when retrieving data using Python?

后端 未结 2 1107
渐次进展
渐次进展 2021-01-01 02:26

I\'ve been trying to retrieve stock price from Yahoo! Finance, like for Apple Inc.. My code is like this:(using Python 2)

import requests
from bs4 import Bea         


        
相关标签:
2条回答
  • 2021-01-01 03:07

    The data is obviously populated using reactjs so you won't be able to parse it reliably using class names etc.. You can get all the data in json format from the page source from the root.App.main script :

    import  requests
    from bs4 import BeautifulSoup
    import re
    from json import loads
    
    soup = BeautifulSoup(requests.get("http://finance.yahoo.com/quote/AAPL/profile?p=AAPL").content)
    script = soup.find("script",text=re.compile("root.App.main")).text
    data = loads(re.search("root.App.main\s+=\s+(\{.*\})", script).group(1))
    print(data)
    

    Which gives you a whole load of json, you can go through the data and pick what you need like below :

    stores = data["context"]["dispatcher"]["stores"]
    from  pprint import pprint as pp
    
    pp(stores[u'QuoteSummaryStore']) 
    

    Which gives you:

    {u'price': {u'averageDailyVolume10Day': {u'fmt': u'63.06M',
                                             u'longFmt': u'63,056,525',
                                             u'raw': 63056525},
                u'averageDailyVolume3Month': {u'fmt': u'36.53M',
                                              u'longFmt': u'36,527,196',
                                              u'raw': 36527196},
                u'currency': u'USD',
                u'currencySymbol': u'$',
                u'exchange': u'NMS',
                u'exchangeName': u'NasdaqGS',
                u'longName': u'Apple Inc.',
                u'marketState': u'PRE',
                u'maxAge': 1,
                u'openInterest': {},
                u'postMarketChange': {u'fmt': u'0.11', u'raw': 0.11000061},
                u'postMarketChangePercent': {u'fmt': u'0.10%',
                                             u'raw': 0.0009687416},
                u'postMarketPrice': {u'fmt': u'113.66', u'raw': 113.66},
                u'postMarketSource': u'DELAYED',
                u'postMarketTime': 1474502277,
                u'preMarketChange': {u'fmt': u'0.42', u'raw': 0.41999817},
                u'preMarketChangePercent': {u'fmt': u'0.37%',
                                            u'raw': 0.0036987949},
                u'preMarketPrice': {u'fmt': u'113.97', u'raw': 113.97},
                u'preMarketSource': u'FREE_REALTIME',
                u'preMarketTime': 1474536411,
                u'quoteType': u'EQUITY',
                u'regularMarketChange': {u'fmt': u'-0.02', u'raw': -0.019996643},
                u'regularMarketChangePercent': {u'fmt': u'-0.02%',
                                                u'raw': -0.00017607327},
                u'regularMarketDayHigh': {u'fmt': u'113.99', u'raw': 113.989},
                u'regularMarketDayLow': {u'fmt': u'112.44', u'raw': 112.441},
                u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
                u'regularMarketPreviousClose': {u'fmt': u'113.57',
                                                u'raw': 113.57},
                u'regularMarketPrice': {u'fmt': u'113.55', u'raw': 113.55},
                u'regularMarketSource': u'FREE_REALTIME',
                u'regularMarketTime': 1474488000,
                u'regularMarketVolume': {u'fmt': u'31.57M',
                                         u'longFmt': u'31,574,028.00',
                                         u'raw': 31574028},
                u'shortName': u'Apple Inc.',
                u'strikePrice': {},
                u'symbol': u'AAPL',
                u'underlyingSymbol': None},
     u'price,summaryDetail': {},
     u'quoteType': {u'exchange': u'NMS',
                    u'headSymbol': None,
                    u'longName': u'Apple Inc.',
                    u'market': u'us_market',
                    u'messageBoardId': u'finmb_24937',
                    u'quoteType': u'EQUITY',
                    u'shortName': u'Apple Inc.',
                    u'symbol': u'AAPL',
                    u'underlyingExchangeSymbol': None,
                    u'underlyingSymbol': None,
                    u'uuid': u'8b10e4ae-9eeb-3684-921a-9ab27e4d87aa'},
     u'summaryDetail': {u'ask': {u'fmt': u'114.00', u'raw': 114},
                        u'askSize': {u'fmt': u'100',
                                     u'longFmt': u'100',
                                     u'raw': 100},
                        u'averageDailyVolume10Day': {u'fmt': u'63.06M',
                                                     u'longFmt': u'63,056,525',
                                                     u'raw': 63056525},
                        u'averageVolume': {u'fmt': u'36.53M',
                                           u'longFmt': u'36,527,196',
                                           u'raw': 36527196},
                        u'averageVolume10days': {u'fmt': u'63.06M',
                                                 u'longFmt': u'63,056,525',
                                                 u'raw': 63056525},
                        u'beta': {u'fmt': u'1.52', u'raw': 1.51744},
                        u'bid': {u'fmt': u'113.68', u'raw': 113.68},
                        u'bidSize': {u'fmt': u'400',
                                     u'longFmt': u'400',
                                     u'raw': 400},
                        u'dayHigh': {u'fmt': u'113.99', u'raw': 113.989},
                        u'dayLow': {u'fmt': u'112.44', u'raw': 112.441},
                        u'dividendRate': {u'fmt': u'2.28', u'raw': 2.28},
                        u'dividendYield': {u'fmt': u'2.01%', u'raw': 0.0201},
                        u'exDividendDate': {u'fmt': u'2016-08-04',
                                            u'raw': 1470268800},
                        u'expireDate': {},
                        u'fiftyDayAverage': {u'fmt': u'108.61',
                                             u'raw': 108.608284},
                        u'fiftyTwoWeekHigh': {u'fmt': u'123.82', u'raw': 123.82},
                        u'fiftyTwoWeekLow': {u'fmt': u'89.47', u'raw': 89.47},
                        u'fiveYearAvgDividendYield': {},
                        u'forwardPE': {u'fmt': u'12.70', u'raw': 12.701344},
                        u'marketCap': {u'fmt': u'611.86B',
                                       u'longFmt': u'611,857,399,808',
                                       u'raw': 611857399808},
                        u'maxAge': 1,
                        u'navPrice': {},
                        u'open': {u'fmt': u'113.82', u'raw': 113.82},
                        u'openInterest': {},
                        u'payoutRatio': {u'fmt': u'24.80%', u'raw': 0.248},
                        u'previousClose': {u'fmt': u'113.57', u'raw': 113.57},
                        u'priceToSalesTrailing12Months': {u'fmt': u'2.78',
                                                          u'raw': 2.777534},
                        u'regularMarketDayHigh': {u'fmt': u'113.99',
                                                  u'raw': 113.989},
                        u'regularMarketDayLow': {u'fmt': u'112.44',
                                                 u'raw': 112.441},
                        u'regularMarketOpen': {u'fmt': u'113.82', u'raw': 113.82},
                        u'regularMarketPreviousClose': {u'fmt': u'113.57',
                                                        u'raw': 113.57},
                        u'regularMarketVolume': {u'fmt': u'31.57M',
                                                 u'longFmt': u'31,574,028',
                                                 u'raw': 31574028},
                        u'strikePrice': {},
                        u'totalAssets': {},
                        u'trailingAnnualDividendRate': {u'fmt': u'2.13',
                                                        u'raw': 2.13},
                        u'trailingAnnualDividendYield': {u'fmt': u'1.88%',
                                                         u'raw': 0.018754954},
                        u'trailingPE': {u'fmt': u'13.24', u'raw': 13.240438},
                        u'twoHundredDayAverage': {u'fmt': u'102.39',
                                                  u'raw': 102.39367},
                        u'volume': {u'fmt': u'31.57M',
                                    u'longFmt': u'31,574,028',
                                    u'raw': 31574028},
                        u'yield': {},
                        u'ytdReturn': {}},
     u'symbol': u'AAPL'}
    
    0 讨论(0)
  • 2021-01-01 03:13

    Not sure what you mean by 'dynamic' in this case, but have you considered using CSS selectors?

    With Beautifulsoup you could get it e.g like this:

    soup.select('div#quote-header-info section span')[0]  
    

    And there are some variations you could use on the pattern, such as using the '>' filter.

    You could get the same with just lxml, no need for BeautifulSoup:

    import lxml.html as html
    page = html.parse(url).getroot()
    content = page.cssselect('div#quote-header-info section > span:first-child')[0].text
    

    Which immediately illustrates a more specific selector.

    If you're interested in more efficient DOM-traversal, research xpaths.

    0 讨论(0)
提交回复
热议问题