Skip to content

Commit d3d7527

Browse files
committed
updated intent classifier module, added confidence tuning
1 parent e5a5a45 commit d3d7527

22 files changed

+208
-62
lines changed

Diff for: README.md

+6
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ After any of next methods, you will need to [import default intents](#restore),
1717
```sh
1818
docker-compose build
1919
docker-compose up -d
20+
docker-compose exec iky_backend python manage.py init
2021
```
2122

2223
### Using Docker
@@ -29,6 +30,9 @@ docker build -t iky_gateway:3.0.0 frontend/.
2930
# start iky backend
3031
docker run --name=iky_backend -e="APPLICATION_ENV=Production" iky_backend:3.0.0
3132

33+
# setup default intents
34+
docker exec -it python manage.py init
35+
3236
# start iky gateway with frontend
3337
docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gateway:3.0.0
3438

@@ -43,6 +47,8 @@ docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gate
4347
make setup
4448

4549
make run_dev
50+
51+
source venv/bin/activate && python manage.py init
4652
```
4753
* Production
4854
```sh

Diff for: app/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,15 @@
3030
def not_found(error):
3131
return "Not found", 404
3232

33-
33+
from app.agents.controllers import bots
3434
from app.nlu.controllers import nlu
3535
from app.intents.controllers import intents
3636
from app.train.controllers import train
3737
from app.endpoint.controllers import endpoint
3838

39+
3940
app.register_blueprint(nlu)
4041
app.register_blueprint(intents)
4142
app.register_blueprint(train)
4243
app.register_blueprint(endpoint)
44+
app.register_blueprint(bots)

Diff for: app/agents/__init__.py

Whitespace-only changes.

Diff for: app/agents/controllers.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from flask import Blueprint, request
2+
3+
from app.agents.models import Bot
4+
from app.commons import build_response
5+
6+
bots = Blueprint('bots_blueprint', __name__,
7+
url_prefix='/agents/<bot_name>')
8+
9+
10+
11+
@bots.route('/config', methods=['PUT'])
12+
def set_config(bot_name):
13+
"""
14+
Read bot config
15+
:param bot_name:
16+
:return:
17+
"""
18+
19+
content = request.get_json(silent=True)
20+
bot = Bot.objects.get(name=bot_name)
21+
bot.config = content
22+
bot.save()
23+
return build_response.sent_ok()
24+
25+
@bots.route('/config', methods=['GET'])
26+
def get_config(bot_name):
27+
"""
28+
Update bot config
29+
:param json:
30+
:return:
31+
"""
32+
bot = Bot.objects.get(name=bot_name)
33+
34+
return build_response.build_json(bot.config)

Diff for: app/agents/models.py

+7
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from mongoengine.fields import StringField , Document,DictField
2+
3+
class Bot(Document):
4+
name = StringField(max_length=100, required=True, unique=True)
5+
config = DictField(required=True,default={
6+
"confidence_threshold": .70
7+
})

Diff for: app/endpoint/controllers.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -73,34 +73,36 @@ def call_api(url, type, parameters, is_json=False):
7373

7474
from app.nlu.intent_classifer import IntentClassifier
7575

76-
PATH = "{}/{}".format(app.config["MODELS_DIR"],
77-
app.config["INTENT_MODEL_NAME"])
76+
with app.app_context():
77+
PATH = "{}/{}".format(app.config["MODELS_DIR"],
78+
app.config["INTENT_MODEL_NAME"])
7879

79-
sentence_classifier = IntentClassifier()
80-
sentence_classifier.load(PATH)
80+
sentence_classifier = IntentClassifier()
81+
sentence_classifier.load(PATH)
82+
print("Intent Model loaded.")
8183

8284

8385
def update_model(app, message, **extra):
84-
print(message)
8586
sentence_classifier.load(PATH)
86-
print("model updated")
87+
print("Intent Model updated")
8788

8889
from app.nlu.tasks import model_updated_signal
8990
model_updated_signal.connect(update_model, app)
9091

92+
from app.agents.models import Bot
9193
def predict(sentence):
9294
"""
9395
Predict Intent using Intent classifier
9496
:param sentence:
9597
:return:
9698
"""
99+
bot = Bot.objects.get(name="default")
97100
predicted = sentence_classifier.predict(sentence)
98-
99-
if not predicted:
100-
return Intent.objects(
101-
intentName=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id
101+
print(predicted)
102+
if predicted["confidence"] < bot.config.get("confidence_threshold",.90):
103+
return Intent.objects(intentId=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id,1.0
102104
else:
103-
return predicted["class"]
105+
return predicted["intent"],predicted["confidence"]
104106

105107

106108
# Request Handler
@@ -135,7 +137,7 @@ def api():
135137
logger.info(request_json.get("input"), extra=result_json)
136138
return build_response.build_json(result_json)
137139

138-
intent_id = predict(request_json.get("input"))
140+
intent_id,confidence = predict(request_json.get("input"))
139141
intent = Intent.objects.get(id=ObjectId(intent_id))
140142

141143
if intent.parameters:
@@ -147,6 +149,7 @@ def api():
147149
request_json.get("complete") is True)):
148150
result_json["intent"] = {
149151
"name": intent.name,
152+
"confidence":confidence,
150153
"id": str(intent.id)
151154
}
152155

Diff for: app/intents/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class ApiDetails(EmbeddedDocument):
7171

7272
class Intent(Document):
7373
name = StringField(max_length=100, required=True, unique=True)
74+
userDefined = BooleanField(default=True)
7475
intentId = StringField(required=True)
7576
apiTrigger = BooleanField(required=True)
7677
apiDetails = EmbeddedDocumentField(ApiDetails)

Diff for: app/nlu/intent_classifer.py

+55-18
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
21
import cloudpickle
32
from sklearn import preprocessing
43
from sklearn.feature_extraction.text import TfidfVectorizer
5-
from sklearn.multiclass import OneVsRestClassifier
4+
from sklearn.svm import SVC
65
from sklearn.pipeline import Pipeline
7-
from sklearn.svm import LinearSVC
86
from app.nlu.nltk_preprocessor import NLTKPreprocessor
97

108

@@ -28,6 +26,7 @@ def train(self, X, y, outpath=None, verbose=True):
2826
:param verbose:
2927
:return:
3028
"""
29+
3130
def build(X, y=None):
3231
"""
3332
Inner build function that builds a single model.
@@ -39,17 +38,27 @@ def build(X, y=None):
3938
('preprocessor', NLTKPreprocessor()),
4039
('vectorizer', TfidfVectorizer(
4140
tokenizer=self.identity, preprocessor=None, lowercase=False)),
42-
('clf', OneVsRestClassifier(LinearSVC()))])
41+
('clf', SVC(C=1,
42+
probability=True,
43+
class_weight='balanced'))])
44+
45+
from sklearn.model_selection import GridSearchCV
46+
47+
Cs = [1, 2, 5, 10, 20, 100]
48+
param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
49+
grid_search = GridSearchCV(model,
50+
param_grid=param_grid,
51+
scoring='f1_weighted',
52+
verbose=1)
53+
grid_search.fit(X, y)
4354

44-
model.fit(X, y)
55+
model = grid_search
4556
return model
4657

47-
# Label encode the targets
48-
labels = preprocessing.MultiLabelBinarizer()
49-
y = labels.fit_transform(y)
58+
print(X)
59+
print(len(y))
5060

5161
model = build(X, y)
52-
model.labels_ = labels
5362

5463
if outpath:
5564
with open(outpath, 'wb') as f:
@@ -74,14 +83,42 @@ def predict(self, text):
7483
:param PATH:
7584
:return:
7685
"""
86+
return self.process(text)
87+
88+
def predict_proba(self, X):
89+
"""Given a bow vector of an input text, predict most probable label. Returns only the most likely label.
90+
91+
:param X: bow of input text
92+
:return: tuple of first, the most probable label and second, its probability"""
93+
94+
import numpy as np
7795

78-
yhat = self.model.predict([
79-
text
80-
])
81-
if yhat.any():
82-
return {
83-
"class": self.model.labels_.inverse_transform(yhat)[0][0],
84-
"accuracy": 1
85-
}
96+
pred_result = self.model.predict_proba(X)
97+
# sort the probabilities retrieving the indices of the elements in sorted order
98+
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
99+
return sorted_indices, pred_result[:, sorted_indices]
100+
101+
def process(self, x, return_type="intent", INTENT_RANKING_LENGTH=5):
102+
"""Returns the most likely intent and its probability for the input text."""
103+
104+
if not self.model:
105+
print("no class")
106+
intent = None
107+
intent_ranking = []
86108
else:
87-
return False
109+
intents, probabilities = self.predict_proba([x])
110+
intents, probabilities = [self.model.classes_[intent] for intent in
111+
intents.flatten()], probabilities.flatten()
112+
113+
if len(intents) > 0 and len(probabilities) > 0:
114+
ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]
115+
116+
intent = {"intent": intents[0], "confidence": probabilities[0]}
117+
intent_ranking = [{"intent": intent_name, "confidence": score} for intent_name, score in ranking]
118+
else:
119+
intent = {"name": None, "confidence": 0.0}
120+
intent_ranking = []
121+
if return_type == "intent":
122+
return intent
123+
else:
124+
return intent_ranking

Diff for: app/nlu/nltk_preprocessor.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def __init__(self, stopwords=None, punct=None,
1616
lower=True, strip=True):
1717
self.lower = lower
1818
self.strip = strip
19-
#self.stopwords = stopwords or set(sw.words('english'))
19+
self.stopwords = stopwords or set(sw.words('english'))
2020
self.punct = punct or set(string.punctuation)
2121
self.lemmatizer = WordNetLemmatizer()
2222

@@ -30,7 +30,6 @@ def transform(self, X):
3030
return [
3131
list(self.tokenize(doc)) for doc in X
3232
]
33-
3433
def tokenize(self, document):
3534
# Break the document into sentences
3635
for sent in sent_tokenize(document):
@@ -43,8 +42,8 @@ def tokenize(self, document):
4342
token = token.strip('*') if self.strip else token
4443

4544
# If stopword, ignore token and continue
46-
# if token in self.stopwords:
47-
# continue
45+
if token in self.stopwords:
46+
continue
4847

4948
# If punctuation, ignore token and continue
5049
if all(char in self.punct for char in token):

Diff for: app/nlu/tasks.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,10 @@ def train_intent_classifier(intents):
3939
for intent in intents:
4040
training_data = intent.trainingData
4141
for example in training_data:
42+
if example.get("text").strip() == "":
43+
continue
4244
X.append(example.get("text"))
43-
y.append([str(intent.id)])
45+
y.append(str(intent.id))
4446

4547
PATH = "{}/{}".format(app.config["MODELS_DIR"],
4648
app.config["INTENT_MODEL_NAME"])

Diff for: docker-compose.yml

+3
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,23 @@
11
version: '2'
22
services:
33
iky_backend:
4+
container_name: iky_backend
45
build: .
56
environment:
67
APPLICATION_ENV: Production
78
depends_on:
89
- mongodb
910

1011
iky_gateway:
12+
container_name: iky_gateway
1113
build: frontend/
1214
ports:
1315
- "8080:80"
1416
depends_on:
1517
- iky_backend
1618

1719
mongodb:
20+
container_name: mongodb
1821
image: mongo
1922
hostname: mongodb
2023
volumes:

Diff for: examples/default_intents.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"_id": {"$oid": "59aae7bd26f6f60007b06fb3"}, "name": "Default Fallback intent", "intentId": "fallback", "apiTrigger": false, "speechResponse": "Sorry. I'm having trouble understanding you."}, {"name": "cancel", "trainingData": [{"text": "cancel that", "entities": []}, {"text": "cancel", "entities": []}], "intentId": "cancel", "apiTrigger": false, "_id": {"$oid": "59aae7bd26f6f60007b06fb5"}, "speechResponse": "Ok. Canceled."}, {"_id": {"$oid": "59aae7bd26f6f60007b06fb7"}, "name": "Welcome message", "intentId": "init_conversation", "apiTrigger": false, "speechResponse": "Hi, What can i do for you ?"}]
1+
[{"name": "Default Fallback intent", "userDefined": false, "apiTrigger": false, "intentId": "fallback", "_id": {"$oid": "59aae7bd26f6f60007b06fb3"}, "speechResponse": "Sorry. I'm having trouble understanding you."}, {"name": "cancel", "trainingData": [{"text": "i want to cancel", "entities": []}, {"text": "cancel that", "entities": []}, {"text": "cancel", "entities": []}], "userDefined": false, "apiTrigger": false, "intentId": "cancel", "_id": {"$oid": "59aae7bd26f6f60007b06fb5"}, "speechResponse": "Ok. Canceled."}, {"name": "Welcome message", "userDefined": false, "apiTrigger": false, "intentId": "init_conversation", "_id": {"$oid": "59aae7bd26f6f60007b06fb7"}, "speechResponse": "Hi, What can i do for you ?"}]

Diff for: examples/restaurant_search.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
[{"name": "restaurant search", "parameters": [{"prompt": "Tell me your location?", "required": true, "type": "free_text", "id": {"$oid": "5adb289f07440e00128fcfd6"}, "name": "location"}, {"prompt": "Ok. What type cuisine are you looking for?", "required": true, "type": "free_text", "id": {"$oid": "5adb289f07440e00128fcfd7"}, "name": "cuisine"}], "trainingData": [{"text": "i'm looking for a place in new york", "entities": [{"begin": 27, "end": 35, "name": "location", "value": "new york"}]}, {"text": "im looking for a place in banglore", "entities": [{"begin": 26, "end": 34, "name": "location", "value": "banglore"}]}, {"text": "looking for indian cuisine in new york", "entities": [{"begin": 12, "end": 18, "name": "cuisine", "value": "indian"}, {"begin": 30, "end": 38, "name": "location", "value": "new york"}]}, {"text": "central indian restaurant", "entities": [{"begin": 0, "end": 7, "name": "location", "value": "central"}, {"begin": 8, "end": 14, "name": "cuisine", "value": "indian"}]}, {"text": "I am looking for mexican indian fusion", "entities": [{"begin": 17, "end": 38, "name": "cuisine", "value": "mexican indian fusion"}]}, {"text": "I am looking a restaurant in 29432", "entities": [{"begin": 29, "end": 34, "name": "location", "value": "29432"}]}, {"text": "I am looking for asian fusion food", "entities": [{"begin": 17, "end": 29, "name": "cuisine", "value": "asian fusion"}]}, {"text": "anywhere near 18328", "entities": [{"begin": 14, "end": 19, "name": "location", "value": "18328"}]}, {"text": "anywhere in the west", "entities": [{"begin": 16, "end": 20, "name": "location", "value": "west"}]}, {"text": "search for restaurants", "entities": []}, {"text": "i am looking for an indian spot called olaolaolaolaolaola", "entities": [{"begin": 39, "end": 57, "name": "location", "value": "olaolaolaolaolaola"}, {"begin": 20, "end": 26, "name": "cuisine", "value": "indian"}]}, {"text": "show me a mexican place in the centre", "entities": [{"begin": 10, "end": 17, "name": "cuisine", "value": "mexican"}, {"begin": 31, "end": 37, "name": "location", "value": "centre"}]}, {"text": "show me chines restaurants in the north", "entities": [{"begin": 8, "end": 14, "name": "cuisine", "value": "chines"}, {"begin": 34, "end": 39, "name": "location", "value": "north"}]}, {"text": "show me chinese restaurants", "entities": [{"begin": 8, "end": 15, "name": "cuisine", "value": "chinese"}]}, {"text": "i'm looking for a place in the north of town", "entities": [{"begin": 31, "end": 44, "name": "location", "value": "north of town"}]}, {"text": "I am searching for a dinner spot", "entities": []}, {"text": "I want to grab lunch", "entities": []}, {"text": "i'm looking for a place to eat", "entities": []}], "apiTrigger": false, "intentId": "restaurant_search", "_id": {"$oid": "5adb265507440e00128fcfa1"}, "speechResponse": "Ok, I found following restaurants matching your query. Location => {{parameters[\"location\"] }} and Cuisine => {{parameters[\"cuisine\"] }}"}]
1+
[{"name": "Restaurant search", "parameters": [{"prompt": "Tell me your location?", "required": true, "type": "free_text", "id": {"$oid": "5adca3734ae5354f655fed86"}, "name": "location"}, {"prompt": "Ok. What type cuisine are you looking for?", "required": true, "type": "free_text", "id": {"$oid": "5adca3734ae5354f655fed87"}, "name": "cuisine"}], "userDefined": true, "trainingData": [{"text": "im looking for a place near banglore", "entities": [{"begin": 28, "end": 36, "name": "location", "value": "banglore"}]}, {"text": "i'm looking for a place to eat near down town la", "entities": [{"begin": 36, "end": 48, "name": "location", "value": "down town la"}]}, {"text": "i'm looking for a place in new york", "entities": [{"begin": 27, "end": 35, "name": "location", "value": "new york"}]}, {"text": "im looking for a place in banglore", "entities": [{"begin": 26, "end": 34, "name": "location", "value": "banglore"}]}, {"text": "looking for indian cuisine in new york", "entities": [{"begin": 12, "end": 18, "name": "cuisine", "value": "indian"}, {"begin": 30, "end": 38, "name": "location", "value": "new york"}]}, {"text": "central indian restaurant", "entities": [{"begin": 0, "end": 7, "name": "location", "value": "central"}, {"begin": 8, "end": 14, "name": "cuisine", "value": "indian"}]}, {"text": "I am looking for mexican indian fusion", "entities": [{"begin": 17, "end": 38, "name": "cuisine", "value": "mexican indian fusion"}]}, {"text": "I am looking a restaurant in 29432", "entities": [{"begin": 29, "end": 34, "name": "location", "value": "29432"}]}, {"text": "I am looking for asian fusion food", "entities": [{"begin": 17, "end": 29, "name": "cuisine", "value": "asian fusion"}]}, {"text": "anywhere near 18328", "entities": [{"begin": 14, "end": 19, "name": "location", "value": "18328"}]}, {"text": "anywhere in the west", "entities": [{"begin": 16, "end": 20, "name": "location", "value": "west"}]}, {"text": "search for restaurants", "entities": []}, {"text": "i am looking for an indian spot called olaolaolaolaolaola", "entities": [{"begin": 39, "end": 57, "name": "location", "value": "olaolaolaolaolaola"}, {"begin": 20, "end": 26, "name": "cuisine", "value": "indian"}]}, {"text": "show me a mexican place in the centre", "entities": [{"begin": 10, "end": 17, "name": "cuisine", "value": "mexican"}, {"begin": 31, "end": 37, "name": "location", "value": "centre"}]}, {"text": "show me chines restaurants in the north", "entities": [{"begin": 8, "end": 14, "name": "cuisine", "value": "chines"}, {"begin": 34, "end": 39, "name": "location", "value": "north"}]}, {"text": "show me chinese restaurants", "entities": [{"begin": 8, "end": 15, "name": "cuisine", "value": "chinese"}]}, {"text": "i'm looking for a place in the north of town", "entities": [{"begin": 31, "end": 44, "name": "location", "value": "north of town"}]}, {"text": "I am searching for a dinner spot", "entities": []}, {"text": "I want to grab lunch", "entities": []}, {"text": "i'm looking for a place to eat", "entities": []}], "apiTrigger": false, "intentId": "restaurant_search", "_id": {"$oid": "5adb265507440e00128fcfa1"}, "speechResponse": "Ok, I found following restaurants matching your query. Location => {{parameters[\"location\"] }} and Cuisine => {{parameters[\"cuisine\"] }}"}, {"name": "Greet", "trainingData": [{"text": "dear sir", "entities": []}, {"text": "good evening\n", "entities": []}, {"text": "good morning\n", "entities": []}, {"text": "hi\n", "entities": []}, {"text": "hello\n", "entities": []}, {"text": "hey there\n", "entities": []}, {"text": "howdy\n", "entities": []}, {"text": "hey", "entities": []}], "userDefined": true, "apiTrigger": false, "intentId": "greet", "_id": {"$oid": "5adc89ad4ae5353f40be85bd"}, "speechResponse": "Hello Sir"}, {"name": "Affirm", "trainingData": [{"text": "sounds really good", "entities": []}, {"text": "great choice\n", "entities": []}, {"text": "correct\n", "entities": []}, {"text": "right, thank you\n", "entities": []}, {"text": "great\n", "entities": []}, {"text": "ok\n", "entities": []}, {"text": "that's right\n", "entities": []}, {"text": "indeed\n", "entities": []}, {"text": "yeah\n", "entities": []}, {"text": "yep\n", "entities": []}, {"text": "yes\n", "entities": []}], "userDefined": true, "apiTrigger": false, "intentId": "affirm", "_id": {"$oid": "5adc8a3d4ae5353f40be85ca"}, "speechResponse": "I'm glad that you agree"}, {"name": "Goodbye", "trainingData": [{"text": "have a good one", "entities": []}, {"text": "Bye bye\n", "entities": []}, {"text": "farewell\n", "entities": []}, {"text": "end\n", "entities": []}, {"text": "stop\n", "entities": []}, {"text": "good bye\n", "entities": []}, {"text": "goodbye\n", "entities": []}, {"text": "bye", "entities": []}], "userDefined": true, "apiTrigger": false, "intentId": "goodbye", "_id": {"$oid": "5adc8a9c4ae5353f40be85cf"}, "speechResponse": "It was nice talking to you sir"}]

0 commit comments

Comments
 (0)