Skip to content

Commit cdfe971

Browse files
hadifaralfredfrancis
authored andcommitted
fix some typo and docstring (alfredfrancis#88)
* utf8 language issue fixes * pep8 fixes * made word2vec support configurable via config.py
1 parent 12dc355 commit cdfe971

13 files changed

+55
-75
lines changed

app/agents/controllers.py

-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ def set_config(bot_name):
2626
def get_config(bot_name):
2727
"""
2828
Update bot config
29-
:param json:
3029
:return:
3130
"""
3231
bot = Bot.objects.get(name=bot_name)

app/endpoint/controllers.py

+13-11
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from app import app
99
from app.agents.models import Bot
1010
from app.commons import build_response
11-
from app.commons.logger import logger
1211
from app.endpoint.utils import SilentUndefined
1312
from app.endpoint.utils import call_api
1413
from app.endpoint.utils import get_synonyms
@@ -57,8 +56,7 @@ def api():
5756

5857
if request_json:
5958

60-
context = {}
61-
context["context"] = request_json["context"]
59+
context = {"context": request_json["context"]}
6260

6361
if app.config["DEFAULT_WELCOME_INTENT_NAME"] in request_json.get(
6462
"input"):
@@ -73,10 +71,10 @@ def api():
7371
undefined=SilentUndefined)
7472
result_json["speechResponse"] = split_sentence(template.render(**context))
7573

76-
logger.info(request_json.get("input"), extra=result_json)
74+
app.logger.info(request_json.get("input"), extra=result_json)
7775
return build_response.build_json(result_json)
7876

79-
intent_id, confidence, suggetions = predict(request_json.get("input"))
77+
intent_id, confidence, suggestions = predict(request_json.get("input"))
8078
app.logger.info("intent_id => %s" % intent_id)
8179
intent = Intent.objects.get(intentId=intent_id)
8280

@@ -145,9 +143,8 @@ def api():
145143

146144
if len(result_json["missingParameters"]) == 0:
147145
result_json["complete"] = True
148-
context = {}
149-
context["parameters"] = result_json["extractedParameters"]
150-
context["context"] = request_json["context"]
146+
context = {"parameters": result_json["extractedParameters"],
147+
"context": request_json["context"]}
151148
else:
152149
missing_parameter = result_json["missingParameters"][0]
153150
result_json["complete"] = False
@@ -194,7 +191,7 @@ def api():
194191
template = Template(intent.speechResponse,
195192
undefined=SilentUndefined)
196193
result_json["speechResponse"] = split_sentence(template.render(**context))
197-
logger.info(request_json.get("input"), extra=result_json)
194+
app.logger.info(request_json.get("input"), extra=result_json)
198195
return build_response.build_json(result_json)
199196
else:
200197
return abort(400)
@@ -211,15 +208,20 @@ def update_model(app, message, **extra):
211208
"""
212209
global sentence_classifier
213210

214-
sentence_classifier = EmbeddingIntentClassifier.load(app.config["MODELS_DIR"])
211+
sentence_classifier = EmbeddingIntentClassifier.load(
212+
app.config["MODELS_DIR"], app.config["USE_WORD_VECTORS"])
213+
215214
synonyms = get_synonyms()
215+
216216
global entity_extraction
217+
217218
entity_extraction = EntityExtractor(synonyms)
219+
218220
app.logger.info("Intent Model updated")
219221

220222

221223
with app.app_context():
222-
update_model(app, "Modles updated")
224+
update_model(app, "Models updated")
223225

224226
model_updated_signal.connect(update_model, app)
225227

app/endpoint/utils.py

+7-18
Original file line numberDiff line numberDiff line change
@@ -35,36 +35,25 @@ def call_api(url, type, headers={}, parameters={}, is_json=False):
3535
:param is_json:
3636
:return:
3737
"""
38-
app.logger.info("Initiating API Call with following info:"
39-
" url => {} payload => {}".format(url, parameters))
40-
38+
app.logger.info("Initiating API Call with following info: url => {} payload => {}".format(url, parameters))
4139
if "GET" in type:
42-
response = requests.get(url, headers=headers,
43-
params=parameters, timeout=5)
40+
response = requests.get(url, headers=headers, params=parameters, timeout=5)
4441
elif "POST" in type:
4542
if is_json:
46-
response = requests.post(url, headers=headers,
47-
json=parameters, timeout=5)
43+
response = requests.post(url, headers=headers, json=parameters, timeout=5)
4844
else:
49-
response = requests.post(url, headers=headers,
50-
params=parameters, timeout=5)
45+
response = requests.post(url, headers=headers, params=parameters, timeout=5)
5146
elif "PUT" in type:
5247
if is_json:
53-
response = requests.put(url, headers=headers,
54-
json=parameters, timeout=5)
48+
response = requests.put(url, headers=headers, json=parameters, timeout=5)
5549
else:
56-
response = requests.put(url, headers=headers,
57-
params=parameters, timeout=5)
50+
response = requests.put(url, headers=headers, params=parameters, timeout=5)
5851
elif "DELETE" in type:
59-
response = requests.delete(url, headers=headers,
60-
params=parameters, timeout=5)
52+
response = requests.delete(url, headers=headers, params=parameters, timeout=5)
6153
else:
6254
raise Exception("unsupported request method.")
63-
6455
result = json.loads(response.text)
65-
6656
app.logger.info("API response => %s", result)
67-
6857
return result
6958

7059

app/entities/controllers.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
def create_entity():
1515
"""
1616
Create a story from the provided json
17-
:param json:
1817
:return:
1918
"""
2019
content = request.get_json(silent=True)
@@ -60,8 +59,7 @@ def read_entity(id):
6059
def update_entity(id):
6160
"""
6261
Update a story from the provided json
63-
:param intent_id:
64-
:param json:
62+
:param id:
6563
:return:
6664
"""
6765
json_data = loads(request.get_data())

app/intents/controllers.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,8 @@ def read_intents():
7070
find list of intents for the agent
7171
:return:
7272
"""
73-
intents = Intent.objects
74-
return build_response.sent_json(intents.to_json())
73+
all_intents = Intent.objects
74+
return build_response.sent_json(all_intents.to_json())
7575

7676

7777
@intents.route('/<id>')
@@ -83,8 +83,7 @@ def read_intent(id):
8383
"""
8484
return Response(response=dumps(
8585
Intent.objects.get(
86-
id=ObjectId(
87-
id)).to_mongo().to_dict()),
86+
id=ObjectId(id)).to_mongo().to_dict()),
8887
status=200,
8988
mimetype="application/json")
9089

@@ -93,8 +92,6 @@ def read_intent(id):
9392
def update_intent(id):
9493
"""
9594
Update a story from the provided json
96-
:param intent_id:
97-
:param json:
9895
:return:
9996
"""
10097
json_data = loads(request.get_data())
@@ -118,7 +115,7 @@ def delete_intent(id):
118115
except BaseException:
119116
pass
120117

121-
# remove NER model for the deleted stoy
118+
# remove NER model for the deleted story
122119
try:
123120
os.remove("{}/{}.model".format(app.config["MODELS_DIR"], id))
124121
except OSError:
@@ -154,18 +151,18 @@ def import_intents():
154151
if 'file' not in request.files:
155152
abort(400, 'No file part')
156153
json_file = request.files['file']
157-
intents = import_json(json_file)
154+
all_intents = import_json(json_file)
158155

159-
return build_response.build_json({"num_intents_created": len(intents)})
156+
return build_response.build_json({"num_intents_created": len(all_intents)})
160157

161158

162159
def import_json(json_file):
163160
json_data = json_file.read()
164161
# intents = Intent.objects.from_json(json_data)
165-
intents = loads(json_data)
162+
all_intents = loads(json_data)
166163

167164
creates_intents = []
168-
for intent in intents:
165+
for intent in all_intents:
169166
new_intent = Intent()
170167
new_intent = update_document(new_intent, intent)
171168
new_intent.save()

app/nlu/classifiers/sklearn_intent_classifer.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from nltk.corpus import stopwords
77
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS
88
from sklearn.feature_extraction.text import TfidfVectorizer
9+
from sklearn.model_selection import GridSearchCV
910
from sklearn.pipeline import Pipeline
1011
from sklearn.svm import SVC
1112

@@ -76,8 +77,6 @@ def build(X, y=None):
7677
probability=True, class_weight='balanced')
7778
)])
7879

79-
from sklearn.model_selection import GridSearchCV
80-
8180
items, counts = np.unique(y, return_counts=True)
8281

8382
cv_splits = max(2, min(5, np.min(counts) // 5))
@@ -108,7 +107,7 @@ def build(X, y=None):
108107

109108
def load(self, PATH):
110109
"""
111-
load trained model froom given path
110+
load trained model from given path
112111
:param PATH:
113112
:return:
114113
"""
@@ -121,9 +120,6 @@ def load(self, PATH):
121120
def predict(self, text, return_all=False, INTENT_RANKING_LENGTH=5):
122121
"""
123122
Predict class label for given model
124-
:param text:
125-
:param PATH:
126-
:return:
127123
"""
128124
return self.process(text, return_all, INTENT_RANKING_LENGTH)
129125

@@ -135,8 +131,6 @@ def predict_proba(self, X):
135131
:return: tuple of first, the most probable label
136132
and second, its probability"""
137133

138-
import numpy as np
139-
140134
pred_result = self.model.predict_proba(X)
141135
print(pred_result)
142136
# sort the probabilities retrieving the indices of the elements

app/nlu/classifiers/starspace_intent_classifier.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,14 @@
77

88
import io
99
import os
10+
import re
1011

1112
import cloudpickle as pickle
1213
import numpy as np
1314
import spacy
1415
import tensorflow as tf
1516
from flask import current_app as app
17+
from sklearn.feature_extraction.text import CountVectorizer
1618

1719

1820
class EmbeddingIntentClassifier:
@@ -197,12 +199,16 @@ def _create_intent_dict(training_data):
197199
def _create_intent_token_dict(intents, intent_split_symbol):
198200
"""Create intent token dictionary"""
199201

200-
distinct_tokens = set([token
201-
for intent in intents
202-
for token in intent.split(intent_split_symbol)
203-
])
202+
distinct_tokens = set()
203+
204+
for intent in intents:
205+
for token in intent.split(intent_split_symbol):
206+
distinct_tokens.add(token)
207+
208+
distinct_tokens = sorted(distinct_tokens)
209+
204210
return {token: idx for idx, token in
205-
enumerate(sorted(distinct_tokens))}
211+
enumerate(distinct_tokens)}
206212

207213
def _create_encoded_intents(self, intent_dict):
208214
"""Create matrix with intents encoded in rows as bag of words,
@@ -408,9 +414,6 @@ def _lemmatize(self, message):
408414

409415
def prepare_training_data(self, X, y):
410416

411-
from sklearn.feature_extraction.text import CountVectorizer
412-
import re
413-
414417
training_data = {
415418
"intent_examples": []
416419
}
@@ -420,8 +423,7 @@ def prepare_training_data(self, X, y):
420423
token_pattern=r'(?u)\b\w\w+\b',
421424
strip_accents=None,
422425
stop_words=None,
423-
ngram_range=(1,
424-
1),
426+
ngram_range=(1, 1),
425427
max_df=1.0,
426428
min_df=1,
427429
max_features=None,
@@ -564,7 +566,9 @@ def process(self, query, INTENT_RANKING_LENGTH=5):
564566
"confidence": message_sim[0]}
565567

566568
ranking = list(zip(list(intent_ids), message_sim))
569+
567570
ranking = ranking[:INTENT_RANKING_LENGTH]
571+
568572
intent_ranking = [{"intent": self.inv_intent_dict[intent_idx],
569573
"confidence": score}
570574
for intent_idx, score in ranking]
@@ -627,6 +631,7 @@ def load(cls, model_dir=None, use_word_vectors=False):
627631
app.logger.warning("Failed to load nlu model. Maybe path {} "
628632
"doesn't exist"
629633
"".format(os.path.abspath(model_dir)))
634+
630635
return EmbeddingIntentClassifier()
631636

632637
def persist(self, model_dir):

app/nlu/classifiers/tf_intent_classifer.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def train(self, X, y, models_dir=None, verbose=True):
2626
Train intent classifier for given training data
2727
:param X:
2828
:param y:
29+
:param models_dir:
2930
:param verbose:
3031
:return:
3132
"""
@@ -115,7 +116,6 @@ def predict(self, text):
115116
"""
116117
Predict class label for given model
117118
:param text:
118-
:param PATH:
119119
:return:
120120
"""
121121
return self.process(text)

app/nlu/entity_extractor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def train(self, train_sentences, model_name):
126126
trainer.train('model_files/%s.model' % model_name)
127127
return True
128128

129-
# Extract Labeles from BIO tagged sentence
129+
# Extract Labels from BIO tagged sentence
130130
def crf2json(self, tagged_sentence):
131131
"""
132132
Extract label-value pair from NER prediction output

app/nlu/tasks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def train_intent_classifier(intents):
4949
X.append(example.get("text"))
5050
y.append(str(intent.intentId.encode('utf8')))
5151

52-
intent_classifier = EmbeddingIntentClassifier()
52+
intent_classifier = EmbeddingIntentClassifier(use_word_vectors=app.config['USE_WORD_VECTORS'])
5353
intent_classifier.train(X, y)
5454
intent_classifier.persist(model_dir=app.config["MODELS_DIR"])
5555

app/train/controllers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def save_training_data(story_id):
2424
@train.route('/<story_id>/data', methods=['GET'])
2525
def get_training_data(story_id):
2626
"""
27-
retrive training data for a given story
27+
retrieve training data for a given story
2828
:param story_id:
2929
:return:
3030
"""

config.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
import os
2-
3-
41
class Config(object):
52
DEBUG = False
63
MONGODB_DB = "iky-ai"
@@ -9,11 +6,12 @@ class Config(object):
96
MONGODB_USERNAME = ""
107
MONGODB_USERNAME = ""
118

12-
# Intent Classifier model detials
9+
# Intent Classifier model details
1310
MODELS_DIR = "model_files/"
1411
INTENT_MODEL_NAME = "intent.model"
1512
DEFAULT_FALLBACK_INTENT_NAME = "fallback"
1613
DEFAULT_WELCOME_INTENT_NAME = "init_conversation"
14+
USE_WORD_VECTORS = True
1715

1816

1917
class Development(Config):

0 commit comments

Comments
 (0)