问题
I'm having an issue with my web scraping script with Selenium Normally, the script can run smoothly.
However, I would usually have this error within this for loop (I believe the script ran too fast before the elements can be visible):
NoSuchElementException Traceback (most recent call last)
<ipython-input-6-470748a6674f> in <module>
66 item_brand.append(driver.find_element_by_xpath('.//*[@id="brand"]/a/span/bdi').get_attribute('textContent'))
67 item_prices.append(driver.find_element_by_css_selector('[id="price"]').text)
---> 68 item_names.append(driver1.find_element_by_css_selector('[class="nav-product-link-text"] span').text)
69 total_rate.append(driver1.find_element_by_class_name('css-i36p8g').text)
70 review_contents.append(containers.find_element_by_class_name('review-text').text)
......
"NoSuchElementException: Message: no such element: Unable to locate element: {"method":"css selector","selector":"[class="nav-product-link-text"] span"}"
I had to add driver.implicitly_wait(3)
within the for loop so it can wait until the elements are visible but it didn't work.
please help to check my script as below:
driver = webdriver.Chrome(chrome_path)
driver1 = webdriver.Chrome(chrome_path)
# Create lists for the dataframe:
item_names = list()
item_description = list()
item_brand = list()
review_titles= list()
review_contents = list()
product_helpful= list()
product_not_helpful = list()
member_rating = list()
total_rate = list()
item_prices = list()
item_images = list()
URL = "https://ca.iherb.com/c/Vitamins?sr=2&noi=48&p="
for n in range(1,2):
driver.get(f"{URL}{n}") # modify the page numbers to scrape the products information
# driver.get(f"https://ca.iherb.com/c/Vitamins?sr=2&noi=48&p={n}".format(n+1))
wait = WebDriverWait(driver, 10)
# Store all the links in a list
item_links = [item.get_attribute("href") for item in wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,".absolute-link-wrapper > a.product-link")))]
# Iterate over the links
for item_link in item_links:
driver.get(item_link)
# Locate and click on the `View All Reviews` link
all_reviews_link = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"span.all-reviews-link > a")))
time.sleep(2)
x = all_reviews_link.get_attribute("href")
MAX_PAGE_NUM = 60 # Scrape maximum 60 pages in the review section
for i in range(1, MAX_PAGE_NUM + 1):
page_num = str(i)
url = x +'?&p='+ page_num
print(url)
driver1.get(url)
review_containers = driver1.find_elements_by_class_name('review-row')
for containers in review_containers:
driver.implicitly_wait(3) # waiting for the browser to se the website elements
elements = ', '.join([item.text for item in driver.find_elements_by_css_selector("[itemprop='description'] > ul:nth-of-type(1) > li")])
item_description.append(elements)
item_images.append(driver.find_element_by_xpath('//*[@id="product-image"]/div[1]/a').get_attribute('href'))
item_brand.append(driver.find_element_by_xpath('.//*[@id="brand"]/a/span/bdi').get_attribute('textContent'))
item_prices.append(driver.find_element_by_css_selector('[id="price"]').text)
item_names.append(driver1.find_element_by_css_selector('[class="nav-product-link-text"] span').text)
total_rate.append(driver1.find_element_by_class_name('css-i36p8g').text)
review_contents.append(containers.find_element_by_class_name('review-text').text)
product_helpful.append(containers.find_element_by_css_selector('[title="Helpful"] span').text)
product_not_helpful.append(containers.find_element_by_css_selector('[title="Unhelpful"] span').text)
stars = containers.find_elements_by_class_name("css-172co2l")
rating = 0
for star in stars:
star_color = star.find_element_by_tag_name("path").get_attribute("fill")
if star_color != "transparent":
rating += 1
member_rating.append(rating)
time.sleep(2) # Slow the script down
driver.quit()
Please help to check this issue for me. I really appreciate it.
来源:https://stackoverflow.com/questions/66073388/optimizing-python-web-scraping-script-with-selenium