codeattackcommunity
diff --git a/‎README.md
Lines changed: 6 additions & 0 deletions b/‎README.md
Lines changed: 6 additions & 0 deletions
diff --git a/‎app/__init__.py
Lines changed: 3 additions & 1 deletion b/‎app/__init__.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎app/agents/__init__.py b/‎app/agents/__init__.py
diff --git a/‎app/agents/controllers.py
Lines changed: 34 additions & 0 deletions b/‎app/agents/controllers.py
Lines changed: 34 additions & 0 deletions
diff --git a/‎app/agents/models.py
Lines changed: 7 additions & 0 deletions b/‎app/agents/models.py
Lines changed: 7 additions & 0 deletions
diff --git a/‎app/endpoint/controllers.py
Lines changed: 15 additions & 12 deletions b/‎app/endpoint/controllers.py
Lines changed: 15 additions & 12 deletions
diff --git a/‎app/intents/models.py
Lines changed: 1 addition & 0 deletions b/‎app/intents/models.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎app/nlu/intent_classifer.py
Lines changed: 55 additions & 18 deletions b/‎app/nlu/intent_classifer.py
Lines changed: 55 additions & 18 deletions
diff --git a/‎app/nlu/nltk_preprocessor.py
Lines changed: 3 additions & 4 deletions b/‎app/nlu/nltk_preprocessor.py
Lines changed: 3 additions & 4 deletions
diff --git a/‎app/nlu/tasks.py
Lines changed: 3 additions & 1 deletion b/‎app/nlu/tasks.py
Lines changed: 3 additions & 1 deletion
@@ -17,6 +17,7 @@ After any of next methods, you will need to [import default intents](#restore),
 ```sh
 docker-compose build
 docker-compose up -d
+docker-compose exec iky_backend python manage.py init
 ```
 
 ### Using Docker
@@ -29,6 +30,9 @@ docker build -t iky_gateway:3.0.0 frontend/.
 # start iky backend
 docker run --name=iky_backend -e="APPLICATION_ENV=Production" iky_backend:3.0.0
 
+# setup default intents
+docker exec -it python manage.py init
+
 # start iky gateway with frontend
 docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gateway:3.0.0
 
@@ -43,6 +47,8 @@ docker run --name=iky_gateway --link iky_backend:iky_backend -p 8080:80 iky_gate
 make setup
 
 make run_dev
+
+source venv/bin/activate && python manage.py init
 ```
 * Production
 ```sh
 
@@ -30,13 +30,15 @@
 def not_found(error):
     return "Not found", 404
 
-
+from app.agents.controllers import bots
 from app.nlu.controllers import nlu
 from app.intents.controllers import intents
 from app.train.controllers import train
 from app.endpoint.controllers import endpoint
 
+
 app.register_blueprint(nlu)
 app.register_blueprint(intents)
 app.register_blueprint(train)
 app.register_blueprint(endpoint)
+app.register_blueprint(bots)
@@ -0,0 +1,34 @@
+from flask import Blueprint, request
+
+from app.agents.models import Bot
+from app.commons import build_response
+
+bots = Blueprint('bots_blueprint', __name__,
+                    url_prefix='/agents/<bot_name>')
+
+
+
+@bots.route('/config', methods=['PUT'])
+def set_config(bot_name):
+    """
+    Read bot config
+    :param bot_name:
+    :return:
+    """
+
+    content = request.get_json(silent=True)
+    bot = Bot.objects.get(name=bot_name)
+    bot.config = content
+    bot.save()
+    return build_response.sent_ok()
+
+@bots.route('/config', methods=['GET'])
+def get_config(bot_name):
+    """
+    Update bot config
+    :param json:
+    :return:
+    """
+    bot = Bot.objects.get(name=bot_name)
+
+    return build_response.build_json(bot.config)
@@ -0,0 +1,7 @@
+from mongoengine.fields import StringField , Document,DictField
+
+class Bot(Document):
+    name = StringField(max_length=100, required=True, unique=True)
+    config = DictField(required=True,default={
+        "confidence_threshold": .70
+    })
@@ -73,34 +73,36 @@ def call_api(url, type, parameters, is_json=False):
 
 from app.nlu.intent_classifer import IntentClassifier
 
-PATH = "{}/{}".format(app.config["MODELS_DIR"],
-                      app.config["INTENT_MODEL_NAME"])
+with app.app_context():
+    PATH = "{}/{}".format(app.config["MODELS_DIR"],
+                          app.config["INTENT_MODEL_NAME"])
 
-sentence_classifier = IntentClassifier()
-sentence_classifier.load(PATH)
+    sentence_classifier = IntentClassifier()
+    sentence_classifier.load(PATH)
+    print("Intent Model loaded.")
 
 
 def update_model(app, message, **extra):
-    print(message)
     sentence_classifier.load(PATH)
-    print("model updated")
+    print("Intent Model updated")
 
 from app.nlu.tasks import model_updated_signal
 model_updated_signal.connect(update_model, app)
 
+from app.agents.models import Bot
 def predict(sentence):
     """
     Predict Intent using Intent classifier
     :param sentence:
     :return:
     """
+    bot = Bot.objects.get(name="default")
     predicted = sentence_classifier.predict(sentence)
-
-    if not predicted:
-        return Intent.objects(
-            intentName=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id
+    print(predicted)
+    if predicted["confidence"] < bot.config.get("confidence_threshold",.90):
+        return Intent.objects(intentId=app.config["DEFAULT_FALLBACK_INTENT_NAME"]).first().id,1.0
     else:
-        return predicted["class"]
+        return predicted["intent"],predicted["confidence"]
 
 
 # Request Handler
@@ -135,7 +137,7 @@ def api():
             logger.info(request_json.get("input"), extra=result_json)
             return build_response.build_json(result_json)
 
-        intent_id = predict(request_json.get("input"))
+        intent_id,confidence = predict(request_json.get("input"))
         intent = Intent.objects.get(id=ObjectId(intent_id))
 
         if intent.parameters:
@@ -147,6 +149,7 @@ def api():
                 request_json.get("complete") is True)):
             result_json["intent"] = {
                 "name": intent.name,
+                "confidence":confidence,
                 "id": str(intent.id)
             }
 
 
@@ -71,6 +71,7 @@ class ApiDetails(EmbeddedDocument):
 
 class Intent(Document):
     name = StringField(max_length=100, required=True, unique=True)
+    userDefined = BooleanField(default=True)
     intentId = StringField(required=True)
     apiTrigger = BooleanField(required=True)
     apiDetails = EmbeddedDocumentField(ApiDetails)
 
@@ -1,10 +1,8 @@
-
 import cloudpickle
 from sklearn import preprocessing
 from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.multiclass import OneVsRestClassifier
+from sklearn.svm import SVC
 from sklearn.pipeline import Pipeline
-from sklearn.svm import LinearSVC
 from app.nlu.nltk_preprocessor import NLTKPreprocessor
 
 
@@ -28,6 +26,7 @@ def train(self, X, y, outpath=None, verbose=True):
         :param verbose:
         :return:
         """
+
         def build(X, y=None):
             """
             Inner build function that builds a single model.
@@ -39,17 +38,27 @@ def build(X, y=None):
                 ('preprocessor', NLTKPreprocessor()),
                 ('vectorizer', TfidfVectorizer(
                     tokenizer=self.identity, preprocessor=None, lowercase=False)),
-                ('clf', OneVsRestClassifier(LinearSVC()))])
+                ('clf', SVC(C=1,
+                            probability=True,
+                            class_weight='balanced'))])
+
+            from sklearn.model_selection import GridSearchCV
+
+            Cs = [1, 2, 5, 10, 20, 100]
+            param_grid = {'clf__C': Cs, 'clf__kernel': ["linear"]}
+            grid_search = GridSearchCV(model,
+                                       param_grid=param_grid,
+                                       scoring='f1_weighted',
+                                       verbose=1)
+            grid_search.fit(X, y)
 
-            model.fit(X, y)
+            model = grid_search
             return model
 
-        # Label encode the targets
-        labels = preprocessing.MultiLabelBinarizer()
-        y = labels.fit_transform(y)
+        print(X)
+        print(len(y))
 
         model = build(X, y)
-        model.labels_ = labels
 
         if outpath:
             with open(outpath, 'wb') as f:
@@ -74,14 +83,42 @@ def predict(self, text):
         :param PATH:
         :return:
         """
+        return self.process(text)
+
+    def predict_proba(self, X):
+        """Given a bow vector of an input text, predict most probable label. Returns only the most likely label.
+
+        :param X: bow of input text
+        :return: tuple of first, the most probable label and second, its probability"""
+
+        import numpy as np
 
-        yhat = self.model.predict([
-            text
-        ])
-        if yhat.any():
-            return {
-                "class": self.model.labels_.inverse_transform(yhat)[0][0],
-                "accuracy": 1
-            }
+        pred_result = self.model.predict_proba(X)
+        # sort the probabilities retrieving the indices of the elements in sorted order
+        sorted_indices = np.fliplr(np.argsort(pred_result, axis=1))
+        return sorted_indices, pred_result[:, sorted_indices]
+
+    def process(self, x, return_type="intent", INTENT_RANKING_LENGTH=5):
+        """Returns the most likely intent and its probability for the input text."""
+
+        if not self.model:
+            print("no class")
+            intent = None
+            intent_ranking = []
         else:
-            return False
+            intents, probabilities = self.predict_proba([x])
+            intents, probabilities = [self.model.classes_[intent] for intent in
+                                      intents.flatten()], probabilities.flatten()
+
+            if len(intents) > 0 and len(probabilities) > 0:
+                ranking = list(zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH]
+
+                intent = {"intent": intents[0], "confidence": probabilities[0]}
+                intent_ranking = [{"intent": intent_name, "confidence": score} for intent_name, score in ranking]
+            else:
+                intent = {"name": None, "confidence": 0.0}
+                intent_ranking = []
+        if return_type == "intent":
+            return intent
+        else:
+            return intent_ranking
@@ -16,7 +16,7 @@ def __init__(self, stopwords=None, punct=None,
                  lower=True, strip=True):
         self.lower = lower
         self.strip = strip
-        #self.stopwords  = stopwords or set(sw.words('english'))
+        self.stopwords  = stopwords or set(sw.words('english'))
         self.punct = punct or set(string.punctuation)
         self.lemmatizer = WordNetLemmatizer()
 
@@ -30,7 +30,6 @@ def transform(self, X):
         return [
             list(self.tokenize(doc)) for doc in X
         ]
-
     def tokenize(self, document):
         # Break the document into sentences
         for sent in sent_tokenize(document):
@@ -43,8 +42,8 @@ def tokenize(self, document):
                 token = token.strip('*') if self.strip else token
 
                 # If stopword, ignore token and continue
-                # if token in self.stopwords:
-                #     continue
+                if token in self.stopwords:
+                    continue
 
                 # If punctuation, ignore token and continue
                 if all(char in self.punct for char in token):
 
@@ -39,8 +39,10 @@ def train_intent_classifier(intents):
     for intent in intents:
         training_data = intent.trainingData
         for example in training_data:
+            if example.get("text").strip() == "":
+                continue
             X.append(example.get("text"))
-            y.append([str(intent.id)])
+            y.append(str(intent.id))
 
     PATH = "{}/{}".format(app.config["MODELS_DIR"],
                           app.config["INTENT_MODEL_NAME"])