n-grams with Naive Bayes classifier Error

扶醉桌前 提交于 2019-12-11 11:49:50

问题


I was experimenting with python NLTK text classification. Here is the code example i am practicing: http://www.laurentluce.com/posts/twitter-sentiment-analysis-using-python-and-nltk/

Here is code:

from nltk import bigrams
from nltk.probability import ELEProbDist, FreqDist
from nltk import NaiveBayesClassifier
from collections import defaultdict

train_samples = {}

with file ('data/positive.txt', 'rt') as f:
   for line in f.readlines():
       train_samples[line] = 'pos'

with file ('data/negative.txt', 'rt') as d:
   for line in d.readlines():
       train_samples[line] = 'neg'

f = open("data/test.txt", "r")
test_samples = f.readlines()

# Error in this code
# def bigramReturner(text):
#    tweetString = text.lower()
#    bigramFeatureVector = {}
#    for item in bigrams(tweetString.split()):
#        bigramFeatureVector.append(' '.join(item))
#    return bigramFeatureVector

# Updated the code from the stack overflow comment 
def bigramReturner (tweetString):
    tweetString = tweetString.lower()
    #comment the line since the function is not defined
    #tweetString = removePunctuation (tweetString)
    bigramFeatureVector = []
    for item in nltk.unigrams(tweetString.split()):
        bigramFeatureVector.append(' '.join(item))
    return bigramFeatureVector

def get_labeled_features(samples):
    word_freqs = {}
    for text, label in train_samples.items():
        tokens = text.split()
        for token in tokens:
            if token not in word_freqs:
                word_freqs[token] = {'pos': 0, 'neg': 0}
            word_freqs[token][label] += 1
    return word_freqs


def get_label_probdist(labeled_features):
    label_fd = FreqDist()
    for item, counts in labeled_features.items():
        for label in ['neg', 'pos']:
            if counts[label] > 0:
                label_fd.inc(label)
    label_probdist = ELEProbDist(label_fd)
    return label_probdist


def get_feature_probdist(labeled_features):
    feature_freqdist = defaultdict(FreqDist)
    feature_values = defaultdict(set)
    num_samples = len(train_samples) / 2
    for token, counts in labeled_features.items():
        for label in ['neg', 'pos']:
            feature_freqdist[label, token].inc(True, count=counts[label])
            feature_freqdist[label, token].inc(None, num_samples - counts[label])
            feature_values[token].add(None)
            feature_values[token].add(True)
    for item in feature_freqdist.items():
        print item[0], item[1]
    feature_probdist = {}
    for ((label, fname), freqdist) in feature_freqdist.items():
        probdist = ELEProbDist(freqdist, bins=len(feature_values[fname]))
        feature_probdist[label, fname] = probdist
    return feature_probdist



labeled_features = get_labeled_features(train_samples)

label_probdist = get_label_probdist(labeled_features)

feature_probdist = get_feature_probdist(labeled_features)

classifier = NaiveBayesClassifier(label_probdist, feature_probdist)


for sample in test_samples:
    print "%s | %s" % (sample, classifier.classify(bigramReturner(sample)))

but when I run the code I get following error:

Traceback (most recent call last):
  File "naive_bigram_1.py", line 87, in <module>
    print "%s | %s" % (sample, classifier.classify(bigramReturner(sample)))
  File "naive_bigram_1.py", line 30, in bigramReturner
    tweetString = removePunctuation (tweetString)
NameError: global name 'removePunctuation' is not defined

I saw the similar question with other error, here I updated as well n-grams with Naive Bayes classifier


回答1:


You're calling a function removePunctuation that hasn't been defined previously:

def bigramReturner (tweetString):
    tweetString = tweetString.lower()
    tweetString = removePunctuation (tweetString)
    ....

I also noticed that you put spaces between your functions' names and the parameters list. Avoid that as it's not really idiomatic Python and could even cause some problems (like your function being evaluated as an object instead of being called).



来源:https://stackoverflow.com/questions/19209895/n-grams-with-naive-bayes-classifier-error

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!