import requests
from bs4 import BeautifulSoup
import jieba
from PIL import Image,ImageSequence
import numpy as np
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator
def changeTitleToDict():
f = open('yingchao.txt', 'r',encoding='utf-8')
str = f.read()
stringList = list(jieba.cut(str))
symbol = {"/", "(", ")" , " ", ";", "!", "、" , ":"}
stringSet = set(stringList) - symbol
title_dict = {}
for i in stringSet:
title_dict[i] = stringList.count(i)
print(title_dict)
return title_dict
for i in range(1,10):
page = i;
hupu = 'https://voice.hupu.com/soccer/tag/496-%s.html' % (page)
reslist = requests.get(hupu)
reslist.encoding = 'utf-8'
soup_list = BeautifulSoup(reslist.text, 'html.parser')
for news in soup_list.find_all('span',class_='n1'):
print(news.text)
f = open('yingchao.txt', 'a', encoding='utf-8')
f.write(news.text)
f.close()
title_dict = changeTitleToDict()
font = r'C:\Windows\Fonts\simhei.ttf'
content = ' '.join(title_dict.keys())
# 根据图片生成词云
image = np.array(Image.open('1.jpg'))
wordcloud = WordCloud(background_color='white', font_path=font, mask=image, width=1000, height=860, margin=2).generate(content)
#字体颜色
image2 = np.array(Image.open('2.jpg'))
iamge_colors = ImageColorGenerator(image2)
wordcloud.recolor(color_func=iamge_colors)
# 显示生成的词云
plt.imshow(wordcloud)
plt.axis("off")
plt.show()
wordcloud.to_file('3.jpg')
背景图
字体颜色图
词云图
来源:oschina
链接:https://my.oschina.net/u/4403899/blog/3990895