-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathspambayes.py
87 lines (64 loc) · 2.7 KB
/
spambayes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import re, math
from roundup.cgi.actions import Action
from roundup.cgi.exceptions import *
import xmlrpclib, socket
REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
def extract_classinfo(db, classname, nodeid):
node = db.getnode(classname, nodeid)
authorage = node['creation'].timestamp() - \
db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
authorid = node.get('author', node.get('creator'))
content = db.getclass(classname).get(nodeid, 'content')
tokens = ["klass:%s" % classname,
"author:%s" % authorid,
"authorage:%d" % int(math.log(authorage)),
"hasrev:%s" % (REVPAT.search(content) is not None)]
return (content, tokens)
def train_spambayes(db, content, tokens, is_spam):
# spambayes training is now disabled; only leave
# spam classification UI
return True, None
spambayes_uri = db.config.detectors['SPAMBAYES_URI']
server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
try:
server.train({'content':content}, tokens, {}, is_spam)
return (True, None)
except (socket.error, xmlrpclib.Error), e:
return (False, str(e))
class SpambayesClassify(Action):
permissionType = 'SB: May Classify'
def handle(self):
(content, tokens) = extract_classinfo(self.db,
self.classname, self.nodeid)
if self.form.has_key("trainspam"):
is_spam = True
elif self.form.has_key("trainham"):
is_spam = False
(status, errmsg) = train_spambayes(self.db, content, tokens,
is_spam)
node = self.db.getnode(self.classname, self.nodeid)
props = {}
if status:
if node.get('spambayes_misclassified', False):
props['spambayes_misclassified'] = True
props['spambayes_score'] = 1.0
s = " SPAM"
if not is_spam:
props['spambayes_score'] = 0.0
s = " HAM"
self.client.add_ok_message(self._('Message classified as') + s)
else:
self.client.add_error_message(self._('Unable to classify message, got error:') + errmsg)
klass = self.db.getclass(self.classname)
klass.set(self.nodeid, **props)
self.db.commit()
def sb_is_spam(obj):
cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
try:
score = obj['spambayes_score']
except KeyError:
return False
return score >= cutoff_score
def init(instance):
instance.registerAction("spambayes_classify", SpambayesClassify)
instance.registerUtil('sb_is_spam', sb_is_spam)