Skip to content

Commit a7d0890

Browse files
committed
finished tensorflow intent classifier. need to clean and fine tune
1 parent 354e51a commit a7d0890

File tree

9 files changed

+158
-25
lines changed

9 files changed

+158
-25
lines changed

app/endpoint/controllers.py

+9-17
Original file line numberDiff line numberDiff line change
@@ -16,19 +16,11 @@
1616
endpoint = Blueprint('api', __name__, url_prefix='/api')
1717

1818
# Loading ML Models at app startup
19-
from app.nlu.intent_classifer import IntentClassifier
20-
21-
with app.app_context():
22-
PATH = "{}/{}".format(app.config["MODELS_DIR"],
23-
app.config["INTENT_MODEL_NAME"])
24-
25-
sentence_classifier = IntentClassifier()
26-
sentence_classifier.load(PATH)
27-
28-
synonyms = get_synonyms()
29-
entity_extraction = EntityExtractor(synonyms)
30-
app.logger.info("Intent Model loaded.")
19+
from app.nlu.classifiers.tf_intent_classifer import TfIntentClassifier
3120

21+
sentence_classifier = None
22+
synonyms = None
23+
entity_extraction = None
3224

3325
# Request Handler
3426
@endpoint.route('/v1', methods=['POST'])
@@ -202,7 +194,6 @@ def api():
202194
else:
203195
return abort(400)
204196

205-
206197
def update_model(app, message, **extra):
207198
"""
208199
Signal hook to be called after training is completed.
@@ -212,20 +203,21 @@ def update_model(app, message, **extra):
212203
:param extra:
213204
:return:
214205
"""
215-
sentence_classifier.load(PATH)
206+
global sentence_classifier
207+
208+
sentence_classifier = TfIntentClassifier()
209+
sentence_classifier.load(app.config["MODELS_DIR"])
216210
synonyms = get_synonyms()
217211
global entity_extraction
218212
entity_extraction = EntityExtractor(synonyms)
219213
app.logger.info("Intent Model updated")
220214

215+
update_model(app,"Modles updated")
221216

222217
from app.nlu.tasks import model_updated_signal
223-
224218
model_updated_signal.connect(update_model, app)
225219

226220
from app.agents.models import Bot
227-
228-
229221
def predict(sentence):
230222
"""
231223
Predict Intent using Intent classifier

app/nlu/classifiers/__init__.py

Whitespace-only changes.
+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import numpy as np
2+
import tensorflow as tf
3+
import spacy
4+
from sklearn.preprocessing import LabelBinarizer
5+
import os
6+
import cloudpickle
7+
import time
8+
9+
class TfIntentClassifier():
10+
11+
def __init__(self):
12+
self.model = None
13+
self.nlp = spacy.load('en')
14+
self.label_encoder = LabelBinarizer()
15+
self.graph=None
16+
print("im executed")
17+
18+
def train(self, X, y, models_dir=None, verbose=True):
19+
"""
20+
Train intent classifier for given training data
21+
:param X:
22+
:param y:
23+
:param outpath:
24+
:param verbose:
25+
:return:
26+
"""
27+
28+
def create_model():
29+
"""
30+
Define and return tensorflow model.
31+
"""
32+
model = tf.keras.Sequential()
33+
model.add(tf.keras.layers.Dense(512, activation=tf.nn.relu, input_shape=(vocab_size,)))
34+
model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.relu))
35+
model.add(tf.keras.layers.Dense(num_labels, activation=tf.nn.softmax))
36+
37+
model.compile(loss='categorical_crossentropy',
38+
optimizer='rmsprop',
39+
metrics=['accuracy'])
40+
41+
model.summary()
42+
43+
return model
44+
45+
# spacy context vector size
46+
vocab_size = 384
47+
48+
# create spacy doc vector matrix
49+
x_train = np.array([list(self.nlp(x).vector) for x in X])
50+
51+
num_labels = len(set(y))
52+
self.label_encoder.fit(y)
53+
y_train = self.label_encoder.transform(y)
54+
55+
del self.model
56+
tf.keras.backend.clear_session()
57+
time.sleep(3)
58+
59+
self.model = create_model()
60+
# start training
61+
self.model.fit(x_train, y_train, shuffle=True, epochs=50, verbose=1)
62+
63+
if models_dir:
64+
tf.keras.models.save_model(
65+
self.model,
66+
os.path.join(models_dir, "tf_intent_model.hd5")
67+
68+
)
69+
if verbose:
70+
print("TF Model written out to {}".format(os.path.join(models_dir, "tf_intent_model.hd5")))
71+
72+
cloudpickle.dump(self.label_encoder, open(os.path.join(models_dir, "labels.pkl"), 'wb'))
73+
74+
if verbose:
75+
print("Labels written out to {}".format(os.path.join(models_dir, "labels.pkl")))
76+
77+
78+
def load(self, models_dir):
79+
try:
80+
del self.model
81+
tf.keras.backend.clear_session()
82+
self.model = tf.keras.models.load_model(os.path.join(models_dir, "tf_intent_model.hd5"),compile=True)
83+
self.graph = tf.get_default_graph()
84+
print("Tf model loaded")
85+
with open(os.path.join(models_dir, "labels.pkl"), 'rb') as f:
86+
self.label_encoder = cloudpickle.load(f)
87+
print("Labels model loaded")
88+
89+
except IOError:
90+
return False
91+
92+
def predict(self, text):
93+
"""
94+
Predict class label for given model
95+
:param text:
96+
:param PATH:
97+
:return:
98+
"""
99+
return self.process(text)
100+
101+
def predict_proba(self, x):
102+
"""Given a bow vector of an input text, predict most probable label. Returns only the most likely label.
103+
104+
:param x: raw input text
105+
:return: tuple of first, the most probable label and second, its probability"""
106+
107+
x_predict = [self.nlp(x).vector]
108+
with self.graph.as_default():
109+
pred_result = self.model.predict(np.array([x_predict[0]]))
110+
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
111+
return sorted_indices, pred_result[:, sorted_indices]
112+
113+
def process(self, x, return_type="intent", INTENT_RANKING_LENGTH=5):
114+
"""Returns the most likely intent and its probability for the input text."""
115+
116+
if not self.model:
117+
print("no class")
118+
intent = None
119+
intent_ranking = []
120+
else:
121+
intents, probabilities = self.predict_proba(x)
122+
intents, probabilities = [self.label_encoder.classes_[intent] for intent in
123+
intents.flatten()], probabilities.flatten()
124+
125+
if len(intents) > 0 and len(probabilities) > 0:
126+
ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]
127+
128+
intent = {"intent": intents[0], "confidence": float("%.2f"%probabilities[0])}
129+
intent_ranking = [{"intent": intent_name, "confidence": float("%.2f"%score)} for intent_name, score in ranking]
130+
else:
131+
intent = {"name": None, "confidence": 0.0}
132+
intent_ranking = []
133+
if return_type == "intent":
134+
return intent
135+
else:
136+
return intent_ranking

app/nlu/intent_classifer.py

+1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def predict_proba(self, X):
9494
import numpy as np
9595

9696
pred_result = self.model.predict_proba(X)
97+
print(pred_result)
9798
# sort the probabilities retrieving the indices of the elements in sorted order
9899
sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
99100
return sorted_indices, pred_result[:, sorted_indices]

app/nlu/tasks.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from app.intents.models import Intent
44

55
from app import app
6-
from app.nlu.intent_classifer import IntentClassifier
6+
from app.nlu.classifiers.tf_intent_classifer import TfIntentClassifier
77

88
from app import my_signals
99
model_updated_signal = my_signals.signal('model-updated')
@@ -44,12 +44,10 @@ def train_intent_classifier(intents):
4444
X.append(example.get("text"))
4545
y.append(str(intent.id))
4646

47-
PATH = "{}/{}".format(app.config["MODELS_DIR"],
48-
app.config["INTENT_MODEL_NAME"])
49-
intent_classifier = IntentClassifier()
47+
intent_classifier = TfIntentClassifier()
5048
intent_classifier.train(X,
5149
y,
52-
outpath=PATH, verbose=False)
50+
models_dir=app.config["MODELS_DIR"], verbose=True)
5351

5452

5553
def train_all_ner(story_id, training_data):

dockerfile

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ RUN python -m nltk.downloader "averaged_perceptron_tagger"; python
99
RUN python -m nltk.downloader "punkt"; python
1010
RUN python -m nltk.downloader "stopwords"; python
1111
RUN python -m nltk.downloader "wordnet"; python
12+
RUN python -m spacy download en; python
1213

1314
EXPOSE 8080
1415

model_files/.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
*.model
1+
*.model
2+
*.pkl
3+
*.hd5

requirements.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,7 @@ gevent
2424
pytest
2525
cloudpickle
2626
flask_script
27-
blinker
27+
blinker
28+
h5py
29+
spacy
30+
tensorflow

run.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
from app import app
22

33
if __name__ == '__main__':
4-
app.run(host='0.0.0.0', port=8080, debug=True, threaded=True)
4+
app.run(host='0.0.0.0', port=8080, debug=False, threaded=False)

0 commit comments

Comments
 (0)