-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathLexiconClassifier.py
114 lines (94 loc) · 3.39 KB
/
LexiconClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/python
# -*- coding: utf-8 -*-
#### Class to perform a lexicon-based sentiment classification
#### Author: Pedro Paulo Balage Filho
#### Version: 1.0
#### Date: 05/12/12
# performs a lexicon-based sentiment classification. You should initialize it
# with one of my dictionary classes: liwc, sentilex or opinionlexicon. They may
# have the polarity(word) method necessary by this class
class Classifier(object):
# Constructor. Necessary to load a dictionary with the method
# polarity(word) which returns 0,1,-1 or None
def __init__(self, _dictionary):
self.dictionary = _dictionary
self.negators = [u'não','nao','nunca','jamais','nada','tampouco','nenhum','nenhuma']
self.modals = ['deve','pode','poderia','seria','deveria','seria']
self.intensifiers = ['muito','demais','completamente','absolutamente','totalmente','definitivamente']
self.intensifier_factor = 4
# Measure the SO for the full sentence
def classify(self,sentence):
so_total,log = self.so_cal(sentence)
if so_total > 0:
return 1
elif so_total < 0:
return -1
else:
return 0
# Measure the average polarity in the sentence
def so_cal(self,sentence):
so_total = 0.0
log = ''
negation,modal = False,False
is_intensifier = False
# window of next words in which the negation, modality or intensifier
# operates
neg_i = -10
mod_i = -10
int_i = -10
# for each word in the sentence
for i,(w,t) in enumerate(sentence):
tag = ''
log += w
w = w.lower()
# Get the semantic orientation
so = self.dictionary.polarity(w)
if so:
# previous Intensifiers
if is_intensifier and (i-int_i)<=3:
so = so * self.intensifier_factor
log += '#Intensified'
# previous negation
if negation and (i-neg_i)<=3:
so = -so
log += '#Negated'
# previous modal
if modal and (i-mod_i)<=3:
so = 0
log += '#Irrealis'
# Accumulate
log += '#' + str(so)
so_total += so
# Reset variables
negation = False
modal = False
is_intensifier = False
intensifier = 0
# word is a modal
if w in self.modals:
modal = True
mod_i = i
log +='#MODAL'
# word is a negator
if w in self.negators:
negation = True
neg_i = i
log +='#NEGATION'
# word is a intensifier
if w in self.intensifiers:
is_intensifier = True
int_i = i
log +='#INTENSIF'
log += ' '
log += '\n'
return so_total,log
# Analize the results
def show_results(self,gold,test):
from nltk import ConfusionMatrix
correct = 0
for index,result in enumerate(gold):
if result == test[index]:
correct +=1
print 'Accuracy: {:.2%}'.format(float(correct) / float(len(gold)))
cm = ConfusionMatrix(gold, test)
print cm.pp()