问题
In short, I'm making a discord bot that downloads the "World of the Day" picture in the website https://growtopiagame.com as D:\Kelbot/render.png and then sends the picture to the channel the command was called. However, it is not a static website and the URL is not in the source code, so I found a solution that uses PyQt5:
import re
import bs4 as bs
import sys
import urllib.request
from PyQt5.QtWebEngineWidgets import QWebEnginePage
from PyQt5.QtWidgets import QApplication
from PyQt5.QtCore import QUrl
@client.command()
@commands.cooldown(1, 60, commands.BucketType.user)
async def wotd(ctx):
class Page(QWebEnginePage):
def __init__(self, url):
self.app = QApplication(sys.argv)
QWebEnginePage.__init__(self)
self.html = ''
self.loadFinished.connect(self._on_load_finished)
self.load(QUrl(url))
self.app.exec_()
def _on_load_finished(self):
self.html = self.toHtml(self.Callable)
print('Load finished')
def Callable(self, html_str):
self.html = html_str
self.app.quit()
def main():
page = Page('https://growtopiagame.com')
soup = bs.BeautifulSoup(page.html, 'html.parser')
js_test = soup.find('a', class_='world-of-day-image')
link = []
for x in js_test:
link.append(str(x))
urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', link[0])
urllib.request.urlretrieve(urls[0], "D:\Kelbot/render.png")
if __name__ == '__main__': main()
await ctx.send(file=discord.File('render.png'))
When I ran the bot from my task scheduler, it didn't work. So, I tried using my Python Shell and Visual Studio Code to run it, and they both worked. However, when the command is called for a second time, python shell and visual studio code both restarted and the bot got killed for some reason. Is it because classes are incompatible with discord.py? How could I possibly fix this. Is there a better solution than using PyQt5?
(Also sometimes instead of getting the picture, I get https://growtopiagame.com/resources/assets/images/load.gif which is the image they put before showing the actual World of the Day picture, but it fixes itself when I restart my pc)
回答1:
PyQt5 is not compatible with asyncio, although there are libraries that try to make it compatible as quamash, asyncqt, qasync, in your case it is not necessary since not the only task you want Qt to do is scrape the web to obtain the ulr of an image and download it so a workaround is to create an external application whose function is just that and then use it in the wotd function:
├── downloader.py
├── .env
└── main.py
main.py
import asyncio
import os
import sys
import uuid
import discord
from discord.ext import commands
from dotenv import load_dotenv
bot = commands.Bot(command_prefix="!")
@commands.cooldown(1, 60, commands.BucketType.user)
@bot.command()
async def wotd(ctx):
current_dir = os.path.dirname(os.path.realpath(__file__))
images_dir = os.path.join(current_dir, "images")
if not os.path.exists(images_dir) or not os.path.isdir(images_dir):
os.mkdir(images_dir)
output_filename = os.path.join(images_dir, "{}.png".format(uuid.uuid4()))
args = [sys.executable, os.path.join(current_dir, "downloader.py"), output_filename]
process = await asyncio.create_subprocess_exec(
*args, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
print("Started: %s, pid=%s" % (args, process.pid), flush=True)
stdout, stderr = await process.communicate()
if process.returncode == 0:
print(
"Done: %s, pid=%s, result: %s"
% (args, process.pid, stdout.decode().strip()),
flush=True,
)
await ctx.send(file=discord.File(output_filename))
print("end", output_filename)
else:
print(
"Failed: %s, pid=%s, result: %s"
% (args, process.pid, stderr.decode().strip()),
flush=True,
)
print("error")
@wotd.error
async def wotd_error(ctx, error):
if isinstance(error, commands.CommandOnCooldown):
msg = "This command is ratelimited, please try again in {:.2f}s".format(
error.retry_after
)
await ctx.send(msg)
print(ctx, error)
def main():
load_dotenv()
token = os.getenv("DISCORD_TOKEN")
bot.run(token)
if __name__ == "__main__":
main()
downloader.py
import sys
from PyQt5 import QtCore, QtWidgets, QtNetwork, QtWebEngineWidgets
class DownLoader(QtCore.QObject):
def __init__(self, path, parent=None):
super().__init__(parent)
self.path = path
url = "https://growtopiagame.com"
self.manager = QtNetwork.QNetworkAccessManager(self)
profile = QtWebEngineWidgets.QWebEngineProfile(
QtCore.QUuid.createUuid().toString(QtCore.QUuid.Id128), self
)
self.page = QtWebEngineWidgets.QWebEnginePage(profile, self)
self.page.loadProgress.connect(print)
self.manager.finished.connect(self.on_finished)
self.page.loadFinished.connect(self.on_load_finished)
self.page.load(QtCore.QUrl(url))
@QtCore.pyqtSlot(bool)
def on_load_finished(self, ok):
if ok:
self.request_url()
else:
print("error", ok, file=sys.stderr)
QtCore.QCoreApplication.exit(-1)
def request_url(self):
js = """
function get_url(){
var elements = document.getElementsByClassName("world-of-day-image")
if(elements.length){
var element = elements[0];
if(element.children.length){
var e = element.children[0]
if(e.tagName == "IMG")
return e.src
}
}
return "";
}
get_url();
"""
self.page.runJavaScript(js, self.download)
def download(self, url):
if url:
print(url)
request = QtNetwork.QNetworkRequest(QtCore.QUrl(url))
self.manager.get(request)
else:
QtCore.QTimer.singleShot(100, self.request_url)
@QtCore.pyqtSlot(QtNetwork.QNetworkReply)
def on_finished(self, reply):
if reply.error() == QtNetwork.QNetworkReply.NoError:
file = QtCore.QFile(self.path)
if file.open(QtCore.QIODevice.WriteOnly):
r = reply.readAll()
print(len(r))
file.write(r)
file.close()
QtCore.QCoreApplication.quit()
else:
print(reply.error(), reply.errorString(), file=sys.stderr)
QtCore.QCoreApplication.exit(-1)
if __name__ == "__main__":
app = QtWidgets.QApplication(sys.argv)
parser = QtCore.QCommandLineParser()
parser.addPositionalArgument("path", "Path of image")
parser.process(app)
args = parser.positionalArguments()
if not args:
print("not path", file=sys.stderr)
sys.exit(-1)
path = args[0]
downloader = DownLoader(path)
sys.exit(app.exec_())
.env
DISCORD_TOKEN=YOUR_TOKEN_HERE
来源:https://stackoverflow.com/questions/59241478/discord-py-rewrite-dynamic-web-scraping-using-pyqt5-not-working-properly