问题
I'm trying to segregate the topics using lda's topic modeling.
Here, I'm able to fetch the top 10 keywords for each topic. Instead of getting only top 10 keywords, I'm trying to fetch all the keywords from each topic.
Can anyone please suggest me regarding the same...
My Code:
from gensim.models import ldamodel
import gensim.corpora;
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer;
from sklearn.decomposition import LatentDirichletAllocation
import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
def load_data(filename):
reviews = list()
labels = list()
with open(filename, encoding='utf-8') as file:
file.readline()
for line in file:
line = line.strip().split(' ',1)
labels.append(line[0])
reviews.append(line[1])
return reviews
data = load_data('/Users/abc/dataset.txt')
#print("Data:" , data)
def display_topics(model, feature_names, no_top_words):
for topic_idx, topic in enumerate(model.components_):
print ("Topic %d:" % (topic_idx))
print (" ".join([feature_names[i]
for i in topic.argsort()[:-no_top_words - 1:-1]]))
no_features = 1000
tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2, max_features=no_features, stop_words='english')
tf = tf_vectorizer.fit_transform(data)
tf_feature_names = tf_vectorizer.get_feature_names()
no_topics = 5
lda = LatentDirichletAllocation(n_topics=no_topics, max_iter=5, learning_method='online', learning_offset=50.,random_state=0).fit(tf)
no_top_words = 10
display_topics(lda, tf_feature_names, no_top_words)
来源:https://stackoverflow.com/questions/52385474/how-to-get-all-the-keywords-based-on-topic-using-topic-modeling