问题
#The error is either it prints only first 11 reviews (when while n<500 is used) or does not print at all(when while True: is used). Requirement is to save all Steam id, review content, profile_url from reviews of the game into excel.
from msedge.selenium_tools import Edge, EdgeOptions
from selenium.webdriver.common.keys import Keys
import re
from time import sleep
from datetime import datetime
from openpyxl import Workbook
game_id= 1097150
url = 'https://steamcommunity.com/app/1097150/positivereviews/?p=1&browsefilter=trendweek&filterLanguage=english'
options = EdgeOptions()
options.use_chromium = True
driver = Edge(options=options)
driver.get(url)
#The page is continously scrolling, and scrapping begins
last_position = driver.execute_script("return window.pageYOffset;")
reviews = []
review_ids = set()
while True:
cards = driver.find_elements_by_class_name('apphub_Card')
for card in cards[-20:]:
profile_url = card.find_element_by_xpath('.//div[@class="apphub_friend_block"]/div/a[2]').get_attribute('href')
steam_id = profile_url.split('/')[-2]
date_posted = card.find_element_by_xpath('.//div[@class="apphub_CardTextContent"]/div').text
review_content = card.find_element_by_xpath('.//div[@class="apphub_CardTextContent"]').text.replace(date_posted,'').strip()
review = (steam_id, profile_url, review_content)
reviews.append(review)
attempt_count = 0
while attempt_count < 3:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
curr_position = driver.execute_script("return window.pageYOffset;")
if curr_position == last_position:
attempt_count += 1
sleep(0.5)
else:
break
driver.close()
#to save the results
wb = Workbook()
ws = wb.worksheets[0]
ws.append(['SteamId', 'ProfileURL', 'ReviewText'])
for row in reviews:
ws.append(row)
today = datetime.today().strftime('%Y%m%d')
wb.save(f'Steam_Reviews_{game_id}_{today}.xlsx')
wb.close()
回答1:
Here's how to infinitely scroll down or until 500 elements in your case.
while True:
cards = driver.find_elements_by_class_name('apphub_Card')
if(len(cards)>=500):
break
last_position = driver.execute_script("return window.pageYOffset;")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(1)
curr_position = driver.execute_script("return window.pageYOffset;")
if(last_position==curr_position):
break
for card in cards[:500]:
profile_url = card.find_element_by_xpath('.//div[@class="apphub_friend_block"]/div/a[2]').get_attribute('href')
steam_id = profile_url.split('/')[-2]
date_posted = card.find_element_by_xpath('.//div[@class="apphub_CardTextContent"]/div').text
review_content = card.find_element_by_xpath('.//div[@class="apphub_CardTextContent"]').text.replace(date_posted,'').strip()
review = (steam_id, profile_url, review_content)
reviews.append(review)
来源:https://stackoverflow.com/questions/64261087/how-to-scrape-all-steam-id-review-content-profile-url-from-reviews-of-a-game-i