TypeError: expected string or bytes-like object ( Python 3) ( Wordcloud)

问题

import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
import wordcloud
from wordcloud import WordCloud,STOPWORDS

# Read the whole text.
remarks = open(r'C:\Users\marmar\Remarks.txt').read().split()
#Create words over an image
mask = np.array(Image.open(r'C:\users\marmar\Documents\cloud.png'))

 #set the stopwords list
 stopwords= set(STOPWORDS)
 #append new words to the stopwords list
 new_words =open(r'C:\Users\marmar\Documents\comments.txt').read().split()
 new_stopwords=stopwords.union(new_words)
 #generate the word cloud with parameters
wc = 
   WordCloud(
         background_color="white", max_words=2000, 
         mask=mask,min_font_size =15, max_font_size=40, relative_scaling = 
         0.5, stopwords=new_stopwords,normalize_plurals= True)
wc.generate(remarks)
plt.figure(figsize=(25,25))
plt.imshow(wc, interpolation="bilinear")
plt.axis("off")

#Show the wordcloud
plt.show()

So, if I remove .read() .split() from my Remarks text file, than it actually works and returns the wordcloud. However, I want to be able to tokenize the words and actually have the wordcloud be accurate. ( It's not showing the words split apart). However, every time I do it, I get this error.

TypeError                                 Traceback (most recent call last)
<ipython-input-7-76f0df420fc2> in <module>()
 19 wc = WordCloud(background_color="white", max_words=2000, 
 mask=mask,min_font_size =15, max_font_size=40, relative_scaling = 0.5, 
 stopwords=new_stopwords,
 20                 normalize_plurals= True)
 ---> 21 wc.generate(remarks)
 22 plt.figure(figsize=(25,25))
 23 plt.imshow(wc, interpolation="bilinear")

 ~\AppData\Local\Continuum\anaconda3\lib\site-
 packages\wordcloud\wordcloud.py in generate(self, text)
  563         self
 564         """
 --> 565         return self.generate_from_text(text)
 566 
 567     def _check_generated(self):

 ~\AppData\Local\Continuum\anaconda3\lib\site-
packages\wordcloud\wordcloud.py in generate_from_text(self, text)
544         self
545         """
--> 546         words = self.process_text(text)
547         self.generate_from_frequencies(words)
548         return self

~\AppData\Local\Continuum\anaconda3\lib\site-packages\wordcloud\wordcloud.py 
in process_text(self, text)
511         regexp = self.regexp if self.regexp is not None else r"\w[\w']+"
512 
--> 513         words = re.findall(regexp, text, flags)
514         # remove stopwords
515         words = [word for word in words if word.lower() not in 
stopwords]

~\AppData\Local\Continuum\anaconda3\lib\re.py in findall(pattern, string, 
flags)
220 
221     Empty matches are included in the result."""
--> 222     return _compile(pattern, flags).findall(string)
223 
224 def finditer(pattern, string, flags=0):

TypeError: expected string or bytes-like object

What I tried to do is actually remove all punctuation from my Remarks text file as well as convert the text file to unicode. The comments text file is working fine, not sure why my Remarks file isn't.

Thanks!

来源：https://stackoverflow.com/questions/47779462/typeerror-expected-string-or-bytes-like-object-python-3-wordcloud

标签

python-3.x

word-cloud