How to perform scrapy XHR form request using Scrapy Python

空扰寡人 提交于 2020-05-15 09:30:13

问题


First of all my script works when login is performed using for the request to send data via HTTP POST. But it doesn't work with ajax form submission.

from scrapy.http import Request, FormRequest
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider, Rule
# from scrapy.selector import HtmlXPathSelector
from scrapy.http import FormRequest
import scrapy
from scrapy.crawler import CrawlerProcess
from scrapy.utils.response import open_in_browser
from bs4 import BeautifulSoup
import sys

class LoginSpider(CrawlSpider):
    name = 'loginspider'

    def init_request(self):
        return Request(url=self.login_url, callback=self.start_requests)

    def start_requests(self):
        print ("\n start_request is here \n")
        yield Request(
        url = self.login_url,
        callback = self.login,
        dont_filter = True
        )

    def fetch_form_data(self,response):
        if all(field in response.text for field in self.credentials['fields_in_response']):
            inputs =response.xpath('//form//input').extract()
            soup_dict={}
            for key,i in enumerate(inputs):
                soup = BeautifulSoup(i, 'html.parser')
                inp_type   =   soup.input['type'] if soup.input.has_attr('type') else None
                inp_value  =   soup.input['value'] if soup.input.has_attr('value')  else None
                inp_name   =   soup.input['name'] if soup.input.has_attr('name')  else None
                soup_dict[key]= {'name':inp_name,'value':inp_value,'type':inp_type}
            login_cred= self.credentials['login_details']
            form_data={}
            for key,value in list(soup_dict.items()):
                if value['name'] != None and value['type'] == 'email':
                    form_data[value['name']]=login_cred['name']
                elif value['name'] != None and value['type'] == 'password':
                    form_data[value['name']]=login_cred['pwd']
                elif value['name'] != None and value['type'] == 'hidden':
                    form_data[value['name']]=value['value']
                else:
                    pass
            return form_data      

    def login(self, response):
        print ("\n Login is here! \n")
        form_data=self.fetch_form_data(response) 
        # form_data=ast.literal_eval(json.dumps(self.fetch_form_data(response)))        
        return FormRequest.from_response(response,
        formdata=form_data,
        callback=self.check_login_response)



    def check_login_response(self, response):
        open_in_browser(response)
        print((type(self.credentials['fields_in_main_page'])))
        print ("\n Check_login_response \n")
        if all(field in response.text for field in self.credentials['fields_in_main_page']):
            print("Worked, logged in")
            #return self.parse_item
        else:
            print("Not logged in")
            return            

Above code will work in below kind of form

<form action="/login" method="post" accept-charset="utf-8">
        <input type="hidden" name="csrf_token" value="UIskiFCnDvablxTqGjOYRNQLZecJmuEVXdHKSrhgBztyfpwWAMPo">
        <div class="row">
            <div class="form-group col-xs-3">
                <label for="username">Username</label>
                <input type="text" class="form-control" id="username" name="username">
            </div>
        </div>
        <div class="row">
            <div class="form-group col-xs-3">
                <label for="username">Password</label>
                <input type="password" class="form-control" id="password" name="password">
            </div>
        </div>
        <input type="submit" value="Login" class="btn btn-primary">

    </form>

But it will not work in a case like below

<form id="contactForm1"  method="post" accept-charset="utf-8">
        <input type="hidden" name="csrf_token" value="UIskiFCnDvablxTqGjOYRNQLZecJmuEVXdHKSrhgBztyfpwWAMPo">
        <div class="row">
            <div class="form-group col-xs-3">
                <label for="username">Username</label>
                <input type="text" class="form-control" id="username" name="username">
            </div>
        </div>
        <div class="row">
            <div class="form-group col-xs-3">
                <label for="username">Password</label>
                <input type="password" class="form-control" id="password" name="password">
            </div>
        </div>
        <input type="submit"  id="login_sbt" value="Login" class="btn btn-primary">

    </form>

    <script type="text/javascript">
    var frm = $('#contactForm1');

    frm.submit(function (e) {

        e.preventDefault();

        $.ajax({
            type: POST,
            url: url,
            data: frm.serialize(),
            success: function (data) {
                console.log('Submission was successful.');
                console.log(data);
            },
            error: function (data) {
                console.log('An error occurred.');
                console.log(data);
            },
        });
    });
</script>

});


I would like to know what all change needs to make to achieve ajax login in my script. The script will work when form action is specified. But I need to perform scrapy on the website which have ajax authentication. May I know what is missing in my script for implementing that

来源:https://stackoverflow.com/questions/61495709/how-to-perform-scrapy-xhr-form-request-using-scrapy-python

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!