Refactor feature calculation in GenericAgent

leonvanbokhorst · leonvanbokhorst · commit dc2373499bc3 · 2024-10-15T14:31:08.000+02:00
diff --git a/src/active_inference_forager/agents/generic_agent.py b/src/active_inference_forager/agents/generic_agent.py
@@ -293,7 +293,7 @@ def process_user_input(self, user_input: str) -> np.ndarray:
         # Basic text statistics
         words = user_input.split()
         features[0] = len(words)  # Word count
-        features[1] = len(user_input) / max(len(words), 1)  # Average word length
+        features[1] = sum(len(word) for word in words) / max(len(words), 1)  # Average word length
         features[2] = user_input.count("?") / max(len(words), 1)  # Question mark frequency
         features[3] = user_input.count("!") / max(len(words), 1)  # Exclamation mark frequency
 
@@ -304,15 +304,15 @@ def process_user_input(self, user_input: str) -> np.ndarray:
 
         # Keyword detection
         keywords = ["help", "explain", "understand", "confused", "clarify"]
-        features[6] = sum(bool(word.lower() in keywords)
+        features[6] = sum(word.lower() in keywords for word in words) / max(len(words), 1)
 
         # Complexity indicators
         features[7] = len(set(words)) / max(len(words), 1)  # Lexical diversity
         features[8] = sum(len(word) > 6 for word in words) / max(len(words), 1)  # Proportion of long words
 
         # Politeness indicator
         polite_words = ["please", "thank", "thanks", "appreciate", "kindly"]
-        features[9] = sum(bool(word.lower() in polite_words)
+        features[9] = sum(word.lower() in polite_words for word in words) / max(len(words), 1)
 
         # spaCy processing
         doc = self.nlp(user_input)
@@ -341,4 +341,4 @@ def tree_depth(token):
         features[15] = len([token for token in doc if token.is_stop]) / max(len(words), 1)  # Stop word density
         features[16] = len([token for token in doc if token.is_punct]) / max(len(words), 1)  # Punctuation density
 
-        return features.astype(float)
+        return features
diff --git a/tests/unit/test_dqn_fep_agent.py b/tests/unit/test_dqn_fep_agent.py
@@ -4,16 +4,6 @@
 from active_inference_forager.agents.generic_agent import GenericAgent, ExperienceReplayBuffer, DQN
 
 
-@pytest.fixture
-def agent():
-    action_space = [
-        "ask_question",
-        "provide_information",
-        "clarify",
-        "suggest_action",
-        "express_empathy",
-        "end_conversation",
-    ]
 @pytest.fixture
 def agent():
     action_space = [
@@ -112,10 +102,10 @@ def test_process_user_input(agent):
     assert isinstance(features, np.ndarray)
     assert features.shape == (17,)
     assert features[0] == 8  # Word count
-    assert features[1] > 0  # Average word length
-    assert features[2] == 1 / 8  # Question mark frequency
-    assert features[3] == 1 / 8  # Exclamation mark frequency
-    assert -1 <= features[4] <= 1  # Sentiment polarity
+    assert features[2] == 0.125  # Question mark frequency
+    assert features[3] == 0.125  # Exclamation mark frequency
+    assert 0 < features[1] < 5  # Average word length
+    assert -1.0 <= features[4] <= 1.0  # Sentiment polarity
     assert 0 <= features[5] <= 1  # Subjectivity
     assert 0 <= features[6] <= 1  # Keyword detection
     assert 0 <= features[7] <= 1  # Lexical diversity
@@ -127,16 +117,7 @@ def test_process_user_input(agent):
     assert 0 <= features[13] <= 1  # Main clause density
     assert features[14] > 0  # Average parse tree depth
     assert 0 <= features[15] <= 1  # Stop word density
-def test_process_user_input(agent):
-    user_input = "Hello, how are you? I'm feeling great today!"
-    features = agent.process_user_input(user_input)
-    assert isinstance(features, np.ndarray)
-    assert features.shape == (17,)
-    assert features[0] == 8  # Word count
-    assert features[2] == 0.125  # Question mark frequency
-    assert features[3] == 0.125  # Exclamation mark frequency
-    assert 0 < features[1] < 5  # Average word length
-    assert -0.9 < features[4] < 0.9  # Sentiment polarity
+    assert 0 <= features[16] <= 1  # Punctuation density
 
 
 if __name__ == "__main__":