技术合作联系:13958075150
【1】首次登录获取cookies并存为文件
from selenium import webdriver
import time
import json
br = webdriver.Chrome()
br.get("https://twitter.com/login?prefetchTimestamp=1574961630352")
# 填写手机号
user_name = br.find_element_by_css_selector('#page-container > div > div.signin-wrapper > form > fieldset > div:nth-child(2) > input')
user_name.send_keys('您的账号')
# 填写密码
password_input = br.find_element_by_xpath('//*[@id="page-container"]/div/div[1]/form/fieldset/div[2]/input')
password_input.send_keys('您的密码')
# 登陆
br.find_element_by_xpath('//*[@id="page-container"]/div/div[1]/form/div[2]/button').click()
time.sleep(5)
cookies = br.get_cookies()
# print(cookies)
with open('/Users/macbookair/Desktop/cookies-twitter.json', 'w') as f:
self_cookies = f.write(json.dumps(cookies))
【2】从文件读取cookie并爬去目标数据
# url = 'https://twitter.com/washingtonpost/status/998271391605706752'
num=66
for url in get_links()[num:]:
print(num)
if "https" in url:
print(url)
# url = 'https://twitter.com/washingtonpost/status/975722353631121409'
br = webdriver.Chrome()
br.get("https://twitter.com")
with open('/Users/macbookair/Desktop/cookies-twitter.json') as f:
cookies = json.loads(f.read())
for cookie in cookies:
if 'expiry' in cookie:
del cookie['expiry']
br.add_cookie(cookie)
br.get(url)
# return br
commits = set()
get_data()
click_more_replay()
# js = "var b=document.getElementsByClassName('css-18t94o4 css-1dbjc4n r-1777fci r-1jayybb r-o7ynqc r-1j63xyz r-13qz1uu')"
# br.execute_script(js)
click_more_button()
click_show_uncontent()
# 实现页面下拉
js="var q=document.documentElement.scrollTop=100000"
for i in range(25):
time.sleep(2)
br.execute_script(js)
click_more_replay()
click_show_uncontent()
click_more_button()
get_data()
# 保存数据到文件
save_data_to_file(url,commits=get_data())
br.quit()
num+=1
来源:CSDN
作者:__Pythoner__
链接:https://blog.csdn.net/xinxianren007/article/details/104448598