Skip to content

Commit d3d7527

Browse files
committed
updated intent classifier module, added confidence tuning
1 parent e5a5a45 commit d3d7527

22 files changed

+208
-62
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ After any of next methods, you will need to [import default intents](#restore),
1717
```sh
1818
docker-compose build
1919
docker-compose up -d
20+
docker-compose exec iky_backend python manage.py init
2021
```
2122

2223
### Using Docker
@@ -29,6 +30,9 @@ docker build -t iky_gateway:3.0.0 frontend/.
2930
# start iky backend
3031
docker run --name=iky_backend -e="APPLICATION_ENV=Production" iky_backend:3.0.0
3132

33+
# setup default intents
34+
docker exec -it python manage.py init
35+
3236
# start iky gateway with frontend
3337
docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gateway:3.0.0
3438

@@ -43,6 +47,8 @@ docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gate
4347
make setup
4448

4549
make run_dev
50+
51+
source venv/bin/activate && python manage.py init
4652
```
4753
* Production
4854
```sh

app/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@
3030
def not_found(error):
3131
return "Not found", 404
3232

33-
33+
from app.agents.controllers import bots
3434
from app.nlu.controllers import nlu
3535
from app.intents.controllers import intents
3636
from app.train.controllers import train
3737
from app.endpoint.controllers import endpoint
3838

39+
3940
app.register_blueprint(nlu)
4041
app.register_blueprint(intents)
4142
app.register_blueprint(train)
4243
app.register_blueprint(endpoint)
44+
app.register_blueprint(bots)

app/agents/__init__.py

Whitespace-only changes.

app/agents/controllers.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from flask import Blueprint, request
2+
3+
from app.agents.models import Bot
4+
from app.commons import build_response
5+
6+
bots = Blueprint('bots_blueprint', __name__,
7+
url_prefix='/agents/<bot_name>')
8+
9+
10+
11+
@bots.route('/config', methods=['PUT'])
12+
def set_config(bot_name):
13+
"""
14+
Read bot config
15+
:param bot_name:
16+
:return:
17+
"""
18+
19+
content = request.get_json(silent=True)
20+
bot = Bot.objects.get(name=bot_name)
21+
bot.config = content
22+
bot.save()
23+
return build_response.sent_ok()
24+
25+
@bots.route('/config', methods=['GET'])
26+
def get_config(bot_name):
27+
"""
28+
Update bot config
29+
:param json:
30+
:return:
31+
"""
32+
bot = Bot.objects.get(name=bot_name)
33+
34+
return build_response.build_json(bot.config)

app/agents/models.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from mongoengine.fields import StringField , Document,DictField
2+
3+
class Bot(Document):
4+
name = StringField(max_length=100, required=True, unique=True)
5+
config = DictField(required=True,default={
6+
"confidence_threshold": .70
7+
})

app/endpoint/controllers.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -73,34 +73,36 @@ def call_api(url, type, parameters, is_json=False):
7373

7474
from app.nlu.intent_classifer import IntentClassifier
7575

76-
PATH = "{}/{}".format(app.config["MODELS_DIR"],
77-
app.config["INTENT_MODEL_NAME"])
76+
with app.app_context():
77+
PATH = "{}/{}".format(app.config["MODELS_DIR"],
78+
app.config["INTENT_MODEL_NAME"])
7879

79-
sentence_classifier = IntentClassifier()
80-
sentence_classifier.load(PATH)
80+
sentence_classifier = IntentClassifier()
81+
sentence_classifier.load(PATH)
82+
print("Intent Model loaded.")
8183

8284

8385
def update_model(app, message, **extra):
84-
print(message)
8586
sentence_classifier.load(PATH)
86-
print("model updated")
87+
print("Intent Model updated")
8788

8889
from app.nlu.tasks import model_updated_signal
8990
model_updated_signal.connect(update_model, app)
9091

92+
from app.agents.models import Bot
9193
def predict(sentence):
9294
"""
9395
Predict Intent using Intent classifier
9496
:param sentence:
9597
:return:
9698
"""
99+
bot = Bot.objects.get(name="default")
97100
predicted = sentence_classifier.predict(sentence)
98-
99-
if not predicted:
100-
return Intent.objects(
101-
intentName=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id
101+
print(predicted)
102+
if predicted["confidence"] < bot.config.get("confidence_threshold",.90):
103+
return Intent.objects(intentId=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id,1.0
102104
else:
103-
return predicted["class"]
105+
return predicted["intent"],predicted["confidence"]
104106

105107

106108
# Request Handler
@@ -135,7 +137,7 @@ def api():
135137
logger.info(request_json.get("input"), extra=result_json)
136138
return build_response.build_json(result_json)
137139

138-
intent_id = predict(request_json.get("input"))
140+
intent_id,confidence = predict(request_json.get("input"))
139141
intent = Intent.objects.get(id=ObjectId(intent_id))
140142

141143
if intent.parameters:
@@ -147,6 +149,7 @@ def api():
147149
request_json.get("complete") is True)):
148150
result_json["intent"] = {
149151
"name": intent.name,
152+
"confidence":confidence,
150153
"id": str(intent.id)
151154
}
152155

app/intents/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class ApiDetails(EmbeddedDocument):
7171

7272
class Intent(Document):
7373
name = StringField(max_length=100, required=True, unique=True)
74+
userDefined = BooleanField(default=True)
7475
intentId = StringField(required=True)
7576
apiTrigger = BooleanField(required=True)
7677
apiDetails = EmbeddedDocumentField(ApiDetails)

app/nlu/intent_classifer.py

Lines changed: 55 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
21
import cloudpickle
32
from sklearn import preprocessing
43
from sklearn.feature_extraction.text import TfidfVectorizer
5-
from sklearn.multiclass import OneVsRestClassifier
4+
from sklearn.svm import SVC
65
from sklearn.pipeline import Pipeline
7-
from sklearn.svm import LinearSVC
86
from app.nlu.nltk_preprocessor import NLTKPreprocessor
97

108

@@ -28,6 +26,7 @@ def train(self, X, y, outpath=None, verbose=True):
2826
:param verbose:
2927
:return:
3028
"""
29+
3130
def build(X, y=None):
3231
"""
3332
Inner build function that builds a single model.
@@ -39,17 +38,27 @@ def build(X, y=None):
3938
('preprocessor', NLTKPreprocessor()),
4039
('vectorizer', TfidfVectorizer(
4140
tokenizer=self.identity, preprocessor=None, lowercase=False)),
42-
('clf', OneVsRestClassifier(LinearSVC()))])
41+
('clf', SVC(C=1,
42+
probability=True,
43+
class_weight='balanced'))])
44+
45+
from sklearn.model_selection import GridSearchCV
46+
47+
Cs = [1, 2, 5, 10, 20, 100]
48+
param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
49+
grid_search = GridSearchCV(model,
50+
param_grid=param_grid,
51+
scoring='f1_weighted',
52+
verbose=1)
53+
grid_search.fit(X, y)
4354

44-
model.fit(X, y)
55+
model = grid_search
4556
return model
4657

47-
# Label encode the targets
48-
labels = preprocessing.MultiLabelBinarizer()
49-
y = labels.fit_transform(y)
58+
print(X)
59+
print(len(y))
5060

5161
model = build(X, y)
52-
model.labels_ = labels
5362

5463
if outpath:
5564
with open(outpath, 'wb') as f:
@@ -74,14 +83,42 @@ def predict(self, text):
7483
:param PATH:
7584
:return:
7685
"""
86+
return self.process(text)
87+
88+
def predict_proba(self, X):
89+
"""Given a bow vector of an input text, predict most probable label. Returns only the most likely label.
90+
91+
:param X: bow of input text
92+
:return: tuple of first, the most probable label and second, its probability"""
93+
94+
import numpy as np
7795

78-
yhat = self.model.predict([
79-
text
80-
])
81-
if yhat.any():
82-
return {
83-
"class": self.model.labels_.inverse_transform(yhat)[0][0],
84-
"accuracy": 1
85-
}
96+
pred_result = self.model.predict_proba(X)
97+
# sort the probabilities retrieving the indices of the elements in sorted order
98+
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
99+
return sorted_indices, pred_result[:, sorted_indices]
100+
101+
def process(self, x, return_type="intent", INTENT_RANKING_LENGTH=5):
102+
"""Returns the most likely intent and its probability for the input text."""
103+
104+
if not self.model:
105+
print("no class")
106+
intent = None
107+
intent_ranking = []
86108
else:
87-
return False
109+
intents, probabilities = self.predict_proba([x])
110+
intents, probabilities = [self.model.classes_[intent] for intent in
111+
intents.flatten()], probabilities.flatten()
112+
113+
if len(intents) > 0 and len(probabilities) > 0:
114+
ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]
115+
116+
intent = {"intent": intents[0], "confidence": probabilities[0]}
117+
intent_ranking = [{"intent": intent_name, "confidence": score} for intent_name, score in ranking]
118+
else:
119+
intent = {"name": None, "confidence": 0.0}
120+
intent_ranking = []
121+
if return_type == "intent":
122+
return intent
123+
else:
124+
return intent_ranking

app/nlu/nltk_preprocessor.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self, stopwords=None, punct=None,
1616
lower=True, strip=True):
1717
self.lower = lower
1818
self.strip = strip
19-
#self.stopwords = stopwords or set(sw.words('english'))
19+
self.stopwords = stopwords or set(sw.words('english'))
2020
self.punct = punct or set(string.punctuation)
2121
self.lemmatizer = WordNetLemmatizer()
2222

@@ -30,7 +30,6 @@ def transform(self, X):
3030
return [
3131
list(self.tokenize(doc)) for doc in X
3232
]
33-
3433
def tokenize(self, document):
3534
# Break the document into sentences
3635
for sent in sent_tokenize(document):
@@ -43,8 +42,8 @@ def tokenize(self, document):
4342
token = token.strip('*') if self.strip else token
4443

4544
# If stopword, ignore token and continue
46-
# if token in self.stopwords:
47-
# continue
45+
if token in self.stopwords:
46+
continue
4847

4948
# If punctuation, ignore token and continue
5049
if all(char in self.punct for char in token):

app/nlu/tasks.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ def train_intent_classifier(intents):
3939
for intent in intents:
4040
training_data = intent.trainingData
4141
for example in training_data:
42+
if example.get("text").strip() == "":
43+
continue
4244
X.append(example.get("text"))
43-
y.append([str(intent.id)])
45+
y.append(str(intent.id))
4446

4547
PATH = "{}/{}".format(app.config["MODELS_DIR"],
4648
app.config["INTENT_MODEL_NAME"])

0 commit comments

Comments
 (0)