How to use qtwebkit in python threads?

后端 未结 1 1252
梦谈多话
梦谈多话 2021-01-21 16:37

I\'m trying to parse webpages generated by js with qtwebkit, I found an example of how to get page source:

import sys
from PySide.QtGui import *
from PySide.QtCo         


        
相关标签:
1条回答
  • 2021-01-21 17:16

    Given QT's async nature, the QtWebkit methods are non-blocking as well, so there is no point running them in threads. You can start them parallelly like this:

    from functools import partial
    
    from PySide.QtCore import QUrl
    from PySide.QtGui import QApplication
    from PySide.QtWebKit import QWebView, QWebSettings
    
    
    TARGET_URLS = (
        'http://stackoverflow.com',
        'http://github.com',
        'http://bitbucket.org',
        'http://news.ycombinator.com',
        'http://slashdot.org',
        'http://www.reddit.com',
        'http://www.dzone.com',
        'http://www.ideone.com',
        'http://jsfiddle.net',
    )
    
    
    class Crawler(object):
    
        def __init__(self, app):
            self.app = app
            self.results = dict()
            self.browsers = dict()
    
        def _load_finished(self, browser_id, ok):
            print ok, browser_id
            web_view, _flag = self.browsers[browser_id]
            self.browsers[browser_id] = (web_view, True)
    
            frame = web_view.page().mainFrame()
            self.results[frame.url()] = frame.toHtml()
    
            web_view.loadFinished.disconnect()
            web_view.stop()
    
            if all([closed for bid, closed in self.browsers.values()]):
                print 'all finished'
                self.app.quit()
    
        def start(self, urls):
            for browser_id, url in enumerate(urls):
                web_view = QWebView()
                web_view.settings().setAttribute(QWebSettings.AutoLoadImages,
                                                 False)
                loaded = partial(self._load_finished, browser_id)
                web_view.loadFinished.connect(loaded)
                web_view.load(QUrl(url))
                self.browsers[browser_id] = (web_view, False)
    
    
    if __name__ == '__main__':
        app = QApplication([])
        crawler = Crawler(app)
        crawler.start(TARGET_URLS)
        app.exec_()
        print 'got:', crawler.results.keys()
    
    0 讨论(0)
提交回复
热议问题