Merge pull request #43 from leonvanbokhorst/Refactor-PhilosophyTutorAgent-to-inherit-from-GenericAgent

leonvanbokhorst · web-flow · commit 9f7d565e1442 · 2024-10-14T16:15:41.000+02:00
Refactor PhilosophyTutorAgent to inherit from GenericAgent
diff --git a/src/active_inference_forager/agents/generic_agent.py b/src/active_inference_forager/agents/generic_agent.py
@@ -9,7 +9,6 @@
 
 from active_inference_forager.agents.base_agent import BaseAgent
 from active_inference_forager.agents.belief_node import BeliefNode
-from active_inference_forager.utils.numpy_fields import NumpyArrayField
 
 
 class ExperienceReplayBuffer:
@@ -41,7 +40,7 @@ def forward(self, x):
         return self.network(x)
 
 
-class DQNFEPAgent(BaseAgent):
+class GenericAgent(BaseAgent):
     # FEP-related parameters
     max_kl: float = Field(default=10.0)
     max_fe: float = Field(default=100.0)
@@ -86,12 +85,10 @@ class DQNFEPAgent(BaseAgent):
     def __init__(self, state_dim: int, action_dim: int, **kwargs):
         super().__init__(state_dim=state_dim, action_dim=action_dim, **kwargs)
 
-        # Initialize root belief with correct dimensions
         self.root_belief = BeliefNode(
             mean=np.zeros(state_dim), precision=np.eye(state_dim) * 0.1
         )
 
-        # Initialize DQN components
         self.q_network = self._build_network()
         self.target_network = self._build_network()
         self.target_network.load_state_dict(self.q_network.state_dict())
@@ -127,11 +124,9 @@ def learn(
         batch = self.replay_buffer.sample(self.batch_size)
         states, actions, rewards, next_states, dones = zip(*batch)
 
-        # Convert lists of numpy arrays to single numpy arrays
         states = np.array(states)
         next_states = np.array(next_states)
 
-        # Convert numpy arrays to tensors
         states = torch.FloatTensor(states).to(self.device)
         actions = torch.LongTensor(
             [self.action_space.index(action) for action in actions]
@@ -153,20 +148,32 @@ def learn(
         self.soft_update_target_network()
         self.decay_exploration()
 
+    def interpret_action(self, action: str) -> str:
+        """
+        Interpret the agent's action in a human-readable format.
+        """
+        action_interpretations = {
+            "ask_question": "The agent decides to ask a question to gather more information.",
+            "provide_information": "The agent provides relevant information to the user.",
+            "clarify": "The agent attempts to clarify a point or resolve any confusion.",
+            "suggest_action": "The agent suggests a specific action or solution to the user.",
+            "express_empathy": "The agent expresses empathy or understanding towards the user's situation.",
+            "end_conversation": "The agent determines it's appropriate to end the conversation.",
+        }
+        return action_interpretations.get(action, f"Unknown action: {action}")
+
     def update_belief(self, observation: np.ndarray) -> None:
-        # Validate that observation is numeric
         if not np.issubdtype(observation.dtype, np.number):
             raise ValueError("Observation must be a numeric array.")
-        
+
         self._update_belief_recursive(self.root_belief, observation)
         self._regularize_beliefs()
 
     def _update_belief_recursive(self, node: BeliefNode, observation: np.ndarray):
-        # Ensure observation is a numpy array of floats
         observation = np.asarray(observation)
         if observation.dtype != node.mean.dtype:
             observation = observation.astype(node.mean.dtype)
-        
+
         prediction_error = observation - node.mean
         node.precision += (
             np.outer(prediction_error, prediction_error) * self.learning_rate
@@ -190,13 +197,7 @@ def update_free_energy(self):
         self.free_energy = self._calculate_free_energy_recursive(self.root_belief)
 
     def _build_network(self):
-        return nn.Sequential(
-            nn.Linear(self.state_dim, 128),
-            nn.ReLU(),
-            nn.Linear(128, 128),
-            nn.ReLU(),
-            nn.Linear(128, self.action_dim),
-        )
+        return DQN(self.state_dim, self.action_dim).to(self.device)
 
     def _calculate_free_energy_recursive(self, node: BeliefNode) -> float:
         kl_divergence = self._kl_divergence(node)
@@ -272,48 +273,20 @@ def _build_belief_hierarchy(self, node: BeliefNode, level: int):
             node.children[action] = child
             self._build_belief_hierarchy(child, level + 1)
 
-    def interpret_action(self, action: str) -> str:
-        """
-        Interpret the agent's action in a human-readable format.
-        """
-        action_interpretations = {
-            "ask_question": "The agent decides to ask a question to gather more information.",
-            "provide_information": "The agent provides relevant information to the user.",
-            "clarify": "The agent attempts to clarify a point or resolve any confusion.",
-            "suggest_action": "The agent suggests a specific action or solution to the user.",
-            "express_empathy": "The agent expresses empathy or understanding towards the user's situation.",
-            "end_conversation": "The agent determines it's appropriate to end the conversation.",
-        }
-        return action_interpretations.get(action, f"Unknown action: {action}")
-
     def process_user_input(self, user_input: str) -> np.ndarray:
-        """
-        Simple natural language processing to extract features from user input.
-        """
-        # This is a very basic implementation and can be expanded with more sophisticated NLP techniques
-        features = np.zeros(5)  # Assuming 5 features for simplicity
+        features = np.zeros(5)
 
         words = user_input.split()
-        features[0] = len(words)  # Number of words
-        features[1] = user_input.count("?") / len(words)  # Question mark ratio
-        features[2] = user_input.count("!") / len(words)  # Exclamation mark ratio
-        features[3] = len(user_input) / 100  # Normalized length of input
+        features[0] = len(words)
+        features[1] = user_input.count("?") / len(words)
+        features[2] = user_input.count("!") / len(words)
+        features[3] = len(user_input) / 100
         features[4] = sum(
             1
             for word in words
             if word.lower() in ["please", "thank", "thanks", "appreciate"]
-        ) / len(
-            words
-        )  # Politeness ratio
+        ) / len(words)
 
-        # Ensure features are of type float
         features = features.astype(float)
 
-        # Debug print statements
-        print(f"Debug: Input string: '{user_input}'")
-        print(f"Debug: Word count: {len(words)}")
-        print(f"Debug: Question mark count: {user_input.count('?')}")
-        print(f"Debug: Exclamation mark count: {user_input.count('!')}")
-        print(f"Debug: Features: {features}")
-
         return features
diff --git a/src/active_inference_forager/agents/philosophy_tutor_agent.py b/src/active_inference_forager/agents/philosophy_tutor_agent.py
@@ -1,10 +1,10 @@
 import numpy as np
 from typing import Dict, List
 from pydantic import Field
-from active_inference_forager.agents.dqn_fep_agent import DQNFEPAgent
+from active_inference_forager.agents.generic_agent import GenericAgent
 
 
-class PhilosophyTutorAgent(DQNFEPAgent):
+class PhilosophyTutorAgent(GenericAgent):
     knowledge_base: Dict[str, Dict] = Field(default_factory=dict)
 
     def __init__(self, state_dim: int, action_dim: int, **kwargs):
diff --git a/tests/unit/test_dqn_fep_agent.py b/tests/unit/test_dqn_fep_agent.py
@@ -1,88 +1,119 @@
 import pytest
 import numpy as np
 import torch
-from active_inference_forager.agents.dqn_fep_agent import DQNFEPAgent
+from active_inference_forager.agents.generic_agent import GenericAgent
+
 
 @pytest.fixture
 def agent():
-    action_space = ['ask_question', 'provide_information', 'clarify', 'suggest_action', 'express_empathy', 'end_conversation']
-    return DQNFEPAgent(state_dim=5, action_dim=len(action_space), action_space=action_space)
+    action_space = [
+        "ask_question",
+        "provide_information",
+        "clarify",
+        "suggest_action",
+        "express_empathy",
+        "end_conversation",
+    ]
+    return GenericAgent(
+        state_dim=5, action_dim=len(action_space), action_space=action_space
+    )
+
 
 def test_agent_initialization(agent):
-    assert isinstance(agent, DQNFEPAgent)
+    assert isinstance(agent, GenericAgent)
     assert agent.state_dim == 5
-    assert np.array_equal(agent.action_space, ['ask_question', 'provide_information', 'clarify', 'suggest_action', 'express_empathy', 'end_conversation'])
+    assert np.array_equal(
+        agent.action_space,
+        [
+            "ask_question",
+            "provide_information",
+            "clarify",
+            "suggest_action",
+            "express_empathy",
+            "end_conversation",
+        ],
+    )
     assert isinstance(agent.q_network, torch.nn.Module)
     assert isinstance(agent.target_network, torch.nn.Module)
     assert isinstance(agent.optimizer, torch.optim.Adam)
     assert agent.exploration_rate == agent.epsilon_start
 
+
 def test_take_action(agent):
     state = np.random.rand(5)
     action = agent.take_action(state)
     assert action in agent.action_space
 
+
 def test_learn(agent):
     state = np.random.rand(5)
-    action = 'ask_question'
+    action = "ask_question"
     next_state = np.random.rand(5)
     reward = 1.0
     done = False
 
     initial_total_steps = agent.total_steps
-    
+
     agent.learn(state, action, next_state, reward, done)
-    
+
     assert agent.total_steps > initial_total_steps
 
+
 def test_update_belief(agent):
     observation = np.random.rand(5)
     initial_mean = agent.root_belief.mean.copy()
     initial_precision = agent.root_belief.precision.copy()
-    
+
     agent.update_belief(observation)
-    
+
     assert not np.array_equal(initial_mean, agent.root_belief.mean)
     assert not np.array_equal(initial_precision, agent.root_belief.precision)
 
+
 def test_update_free_energy(agent):
     initial_free_energy = agent.free_energy
     agent.update_free_energy()
     assert agent.free_energy != initial_free_energy
 
+
 def test_update_reward_buffer(agent):
     initial_buffer_length = len(agent.reward_buffer)
     agent.update_reward_buffer(1.0)
     assert len(agent.reward_buffer) == initial_buffer_length + 1
 
+
 def test_decay_exploration(agent):
     initial_exploration_rate = agent.exploration_rate
     agent.decay_exploration()
     assert agent.exploration_rate < initial_exploration_rate
 
+
 def test_reset(agent):
     agent.free_energy = 10.0
     agent.exploration_rate = 0.1
     agent.reset()
     assert agent.free_energy == 0.0
     assert agent.exploration_rate == agent.epsilon_start
 
+
 def test_interpret_action(agent):
-    action = 'ask_question'
+    action = "ask_question"
     interpretation = agent.interpret_action(action)
     assert isinstance(interpretation, str)
     assert "ask a question" in interpretation.lower()
 
+
 def test_process_user_input(agent):
     user_input = "Hello, how are you? I'm feeling great today!"
     features = agent.process_user_input(user_input)
     assert isinstance(features, np.ndarray)
     assert features.shape == (5,)
     assert features[0] == 8  # Number of words
-    assert features[1] == 1/8  # Question mark ratio
-    assert features[2] == 1/8  # Exclamation mark ratio
+    assert features[1] == 1 / 8  # Question mark ratio
+    assert features[2] == 1 / 8  # Exclamation mark ratio
     assert 0 < features[3] < 1  # Normalized length
     assert features[4] == 0  # Politeness ratio
 
+
 if __name__ == "__main__":
     pytest.main()