PyQt Class not working for the second usage

后端 未结 1 426
时光取名叫无心
时光取名叫无心 2020-12-05 17:06

I\'m using PyQt to fully load a page(including JS) and get it contents using Beautiful Soup. Works fine at the first iteration, but after, it crashes. I don\'t have a big kn

相关标签:
1条回答
  • 2020-12-05 17:28

    The example crashes because the RenderPage class attempts to create a new QApplication and event-loop for every url it tries to load.

    Instead, only one QApplication should be created, and the QWebPage subclass should load a new url after each page has been processed, rather than using a for-loop.

    Here's a re-write of the example which should do what you want:

    import sys, signal
    from bs4 import BeautifulSoup
    from bs4.dammit import UnicodeDammit
    from PyQt4 import QtCore, QtGui, QtWebKit
    
    class WebPage(QtWebKit.QWebPage):
        def __init__(self):
            QtWebKit.QWebPage.__init__(self)
            self.mainFrame().loadFinished.connect(self.handleLoadFinished)
    
        def process(self, items):
            self._items = iter(items)
            self.fetchNext()
    
        def fetchNext(self):
            try:
                self._url, self._func = next(self._items)
                self.mainFrame().load(QtCore.QUrl(self._url))
            except StopIteration:
                return False
            return True
    
        def handleLoadFinished(self):
            self._func(self._url, self.mainFrame().toHtml())
            if not self.fetchNext():
                print('# processing complete')
                QtGui.qApp.quit()
    
    
    def funcA(url, html):
        print('# processing:', url)
        # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
        # do stuff with soup...
    
    def funcB(url, html):
        print('# processing:', url)
        # soup = BeautifulSoup(UnicodeDammit(html).unicode_markup)
        # do stuff with soup...
    
    if __name__ == '__main__':
    
        items = [
            ('http://stackoverflow.com', funcA),
            ('http://google.com', funcB),
            ]
    
        signal.signal(signal.SIGINT, signal.SIG_DFL)
        print('Press Ctrl+C to quit\n')
        app = QtGui.QApplication(sys.argv)
        webpage = WebPage()
        webpage.process(items)
        sys.exit(app.exec_())
    
    0 讨论(0)
提交回复
热议问题