sklearn朴素贝叶斯
一。基本原理 贝叶斯公式 1 from sklearn.datasets import fetch_20newsgroups 2 from sklearn.model_selection import train_test_split 3 from sklearn.feature_extraction.text import TfidfVectorizer 4 from sklearn.naive_bayes import MultinomialNB 5 6 def news_classification(): 7 """ 8 朴素贝叶斯对新闻进行分类 9 :return: 10 """ 11 #1.获取数据 12 news=fetch_20newsgroups("c:/new",subset="all") 13 #print(news) 14 #2.划分数据集 15 x_train,x_test,y_train,y_test=train_test_split(news.data,news.target) 16 # print(x_train) 17 #3.特征工程:文本特征抽取-tfidf 18 transfer=TfidfVectorizer() 19 x_train=transfer.fit_transform(x_train) 20 x_test=transfer