问题
First of all my script works when login is performed using for the request to send data via HTTP POST. But it doesn't work with ajax form submission.
from scrapy.http import Request, FormRequest
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
# from scrapy.selector import HtmlXPathSelector
from scrapy.http import FormRequest
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.response import open_in_browser
from bs4 import BeautifulSoup
import sys
class LoginSpider(CrawlSpider):
name = 'loginspider'
def init_request(self):
return Request(url=self.login_url, callback=self.start_requests)
def start_requests(self):
print ("\n start_request is here \n")
yield Request(
url = self.login_url,
callback = self.login,
dont_filter = True
)
def fetch_form_data(self,response):
if all(field in response.text for field in self.credentials['fields_in_response']):
inputs =response.xpath('//form//input').extract()
soup_dict={}
for key,i in enumerate(inputs):
soup = BeautifulSoup(i, 'html.parser')
inp_type = soup.input['type'] if soup.input.has_attr('type') else None
inp_value = soup.input['value'] if soup.input.has_attr('value') else None
inp_name = soup.input['name'] if soup.input.has_attr('name') else None
soup_dict[key]= {'name':inp_name,'value':inp_value,'type':inp_type}
login_cred= self.credentials['login_details']
form_data={}
for key,value in list(soup_dict.items()):
if value['name'] != None and value['type'] == 'email':
form_data[value['name']]=login_cred['name']
elif value['name'] != None and value['type'] == 'password':
form_data[value['name']]=login_cred['pwd']
elif value['name'] != None and value['type'] == 'hidden':
form_data[value['name']]=value['value']
else:
pass
return form_data
def login(self, response):
print ("\n Login is here! \n")
form_data=self.fetch_form_data(response)
# form_data=ast.literal_eval(json.dumps(self.fetch_form_data(response)))
return FormRequest.from_response(response,
formdata=form_data,
callback=self.check_login_response)
def check_login_response(self, response):
open_in_browser(response)
print((type(self.credentials['fields_in_main_page'])))
print ("\n Check_login_response \n")
if all(field in response.text for field in self.credentials['fields_in_main_page']):
print("Worked, logged in")
#return self.parse_item
else:
print("Not logged in")
return
Above code will work in below kind of form
<form action="/login" method="post" accept-charset="utf-8">
<input type="hidden" name="csrf_token" value="UIskiFCnDvablxTqGjOYRNQLZecJmuEVXdHKSrhgBztyfpwWAMPo">
<div class="row">
<div class="form-group col-xs-3">
<label for="username">Username</label>
<input type="text" class="form-control" id="username" name="username">
</div>
</div>
<div class="row">
<div class="form-group col-xs-3">
<label for="username">Password</label>
<input type="password" class="form-control" id="password" name="password">
</div>
</div>
<input type="submit" value="Login" class="btn btn-primary">
</form>
But it will not work in a case like below
<form id="contactForm1" method="post" accept-charset="utf-8">
<input type="hidden" name="csrf_token" value="UIskiFCnDvablxTqGjOYRNQLZecJmuEVXdHKSrhgBztyfpwWAMPo">
<div class="row">
<div class="form-group col-xs-3">
<label for="username">Username</label>
<input type="text" class="form-control" id="username" name="username">
</div>
</div>
<div class="row">
<div class="form-group col-xs-3">
<label for="username">Password</label>
<input type="password" class="form-control" id="password" name="password">
</div>
</div>
<input type="submit" id="login_sbt" value="Login" class="btn btn-primary">
</form>
<script type="text/javascript">
var frm = $('#contactForm1');
frm.submit(function (e) {
e.preventDefault();
$.ajax({
type: POST,
url: url,
data: frm.serialize(),
success: function (data) {
console.log('Submission was successful.');
console.log(data);
},
error: function (data) {
console.log('An error occurred.');
console.log(data);
},
});
});
</script>
});
I would like to know what all change needs to make to achieve ajax login in my script. The script will work when form action is specified. But I need to perform scrapy on the website which have ajax authentication. May I know what is missing in my script for implementing that
来源:https://stackoverflow.com/questions/61495709/how-to-perform-scrapy-xhr-form-request-using-scrapy-python