Merge pull request #42 from leonvanbokhorst/Refactor-belief-update-in-DQNFEPAgent

leonvanbokhorst · web-flow · commit a87b03dc382a · 2024-10-13T11:57:41.000+02:00
Refactor belief update in DQNFEPAgent
diff --git a/src/active_inference_forager/agents/dqn_fep_agent.py b/src/active_inference_forager/agents/dqn_fep_agent.py
@@ -154,10 +154,19 @@ def learn(
         self.decay_exploration()
 
     def update_belief(self, observation: np.ndarray) -> None:
+        # Validate that observation is numeric
+        if not np.issubdtype(observation.dtype, np.number):
+            raise ValueError("Observation must be a numeric array.")
+        
         self._update_belief_recursive(self.root_belief, observation)
         self._regularize_beliefs()
 
     def _update_belief_recursive(self, node: BeliefNode, observation: np.ndarray):
+        # Ensure observation is a numpy array of floats
+        observation = np.asarray(observation)
+        if observation.dtype != node.mean.dtype:
+            observation = observation.astype(node.mean.dtype)
+        
         prediction_error = observation - node.mean
         node.precision += (
             np.outer(prediction_error, prediction_error) * self.learning_rate
@@ -297,6 +306,9 @@ def process_user_input(self, user_input: str) -> np.ndarray:
             words
         )  # Politeness ratio
 
+        # Ensure features are of type float
+        features = features.astype(float)
+
         # Debug print statements
         print(f"Debug: Input string: '{user_input}'")
         print(f"Debug: Word count: {len(words)}")
diff --git a/src/active_inference_forager/main.py b/src/active_inference_forager/main.py
@@ -149,7 +149,13 @@ def simulate_conversation(
 
         next_state, reward, done = env.step(action)
 
-        agent.update_belief(user_input)
+        # Process user input into numerical features
+        # create a variable with a np.array with three random values between 0.0 and 1.0
+        placeholder_user_input = np.random.rand(10)
+
+        processed_input = agent.process_user_input(placeholder_user_input)  # user_input
+        agent.update_belief(processed_input)
+
         state = next_state
         turn += 1