Using Python and Mechanize to submit form data and authenticate

前端 未结 1 1648
再見小時候
再見小時候 2020-11-30 02:42

I want to submit login to the website Reddit.com, navigate to a particular area of the page, and submit a comment. I don\'t see what\'s wrong with this code, but it is not

相关标签:
1条回答
  • 2020-11-30 03:31

    I would definitely suggest trying to use the API if possible, but this works for me (not for your example post, which has been deleted, but for any active one):

    #!/usr/bin/env python
    
    import mechanize
    import cookielib
    import urllib
    import logging
    import sys
    
    def main():
    
        br = mechanize.Browser()
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
    
        br.set_handle_equiv(True)
        br.set_handle_gzip(True)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)
    
        br.set_handle_refresh(mechanize._http.HTTPRefreshProcessor(), max_time=1)
    
        r= br.open('http://www.reddit.com')
    
        # Select the second (index one) form
        br.select_form(nr=1)
    
        # User credentials
        br.form['user'] = 'user'
        br.form['passwd'] = 'passwd'
    
        # Login
        br.submit()
    
        # Open up comment page
        posting = 'http://www.reddit.com/r/PoopSandwiches/comments/f47f8/testing/'
        rval = 'PoopSandwiches'
        # you can get the rval in other ways, but this will work for testing
    
        r = br.open(posting)
    
        # You need the 'uh' value from the first form
        br.select_form(nr=0)
        uh = br.form['uh']
    
        br.select_form(nr=7)
        thing_id = br.form['thing_id']
        id = '#' + br.form.attrs['id']
        # The id that gets posted is the form id with a '#' prepended.
    
        data = {'uh':uh, 'thing_id':thing_id, 'id':id, 'renderstyle':'html', 'r':rval, 'text':"Your text here!"}
        new_data_dict = dict((k, urllib.quote(v).replace('%20', '+')) for k, v in data.iteritems())
    
        # not sure if the replace needs to happen, I did it anyway
        new_data = 'thing_id=%(thing_id)s&text=%(text)s&id=%(id)s&r=%(r)s&uh=%(uh)s&renderstyle=%(renderstyle)s' %(new_data_dict)
    
        # not sure which of these headers are really needed, but it works with all
        # of them, so why not just include them.
        req = mechanize.Request('http://www.reddit.com/api/comment', new_data)
        req.add_header('Referer', posting)
        req.add_header('Accept', ' application/json, text/javascript, */*')
        req.add_header('Content-Type', 'application/x-www-form-urlencoded; charset=UTF-8')
        req.add_header('X-Requested-With', 'XMLHttpRequest')
        cj.add_cookie_header(req)
        res = mechanize.urlopen(req)
    
    main()
    

    It would be interesting to turn javascript off and see how the reddit comments are handled then. Right now there is a bunch of magic that happens in an onsubmit function called when making your post. This is where the uh and id value get added.

    0 讨论(0)
提交回复
热议问题