Tweet情感分析器(简单分类)
# -*- coding:utf-8 -*-
import re
import random
from textblob.classifiers import NaiveBayesClassifier
from textblob import TextBlob
from nltk.corpus import movie_reviews
train = [
('I love this sandwich.', 'pos'),
('This is an amazing place!', 'pos'),
('I feel very good about these beers.', 'pos'),
('This is my best work.', 'pos'),
('What an awesome view', 'pos'),
('I do not like this restaurant', 'neg'),
('I am tired of this stuff.', 'neg'),
("I can't deal with this", 'neg'),
('He is my sworn enemy!', 'neg'),
('My boss is horrible.', 'neg')
]
test = [
('The beer was good.', 'pos'),
('I do not enjoy my job', 'neg'),
("I ain't feeling dandy today.", 'neg'),
("I feel amazing!", 'pos'),
('Gary is a friend of mine.', 'pos'),
("I can't believe I'm doing this.", 'neg')
]
#创建一个新的分类器
cl = NaiveBayesClassifier(train)
#使用TextBlob对象。 您可以将分类器传递到TextBlob的构造函数中
blob = TextBlob("The beer was amazing. "
"But the hangover was horrible. My boss was not happy.",
classifier=cl)
for sentence in blob.sentences:
print(sentence)
print(sentence.classify())
#检查测试集的准确性
print cl.accuracy(test)
cl.show_informative_features(5)
#从NLTK添加更多数据
reviews = [(list(movie_reviews.words(fileid)), category)
for category in movie_reviews.categories()
for fileid in movie_reviews.fileids(category)]
new_train, new_test = reviews[0:100], reviews[101:200]
cl.update(new_train)
print cl.accuracy(test + new_test)
参考:https://blog.csdn.net/baihuaxiu123/article/details/62057062