securefederatedai · porteratzo · Apr 11, 2025 · Apr 11, 2025 · Apr 12, 2025 · Apr 12, 2025
diff --git a/openfl/databases/tensor_db.py b/openfl/databases/tensor_db.py
@@ -43,6 +43,7 @@ def __init__(self) -> None:
         self.tensor_db = pd.DataFrame(
             {col: pd.Series(dtype=dtype) for col, dtype in types_dict.items()}
         )
+        self.secondary_db = self.tensor_db
         self._bind_convenience_methods()
 
         self.mutex = Lock()
@@ -93,6 +94,13 @@ def clean_up(self, remove_older_than: int = 1) -> None:
             (self.tensor_db["round"].astype(int) > current_round - remove_older_than)
             | self.tensor_db["report"]
         ].reset_index(drop=True)
+        self.secondary_db = self.tensor_db[
+            ~self.tensor_db["tags"].apply(
+                lambda x: any(
+                    keyword in item for item in x for keyword in ["collaborator", "metric"]
+                )
+            )
+        ].reset_index(drop=True)
 
     def cache_tensor(self, tensor_key_dict: Dict[TensorKey, np.ndarray]) -> None:
         """Insert a tensor into TensorDB (dataframe).
@@ -105,26 +113,35 @@ def cache_tensor(self, tensor_key_dict: Dict[TensorKey, np.ndarray]) -> None:
             None
         """
         entries_to_add = []
-        with self.mutex:
-            for tensor_key, nparray in tensor_key_dict.items():
-                tensor_name, origin, fl_round, report, tags = tensor_key
-                entries_to_add.append(
-                    pd.DataFrame(
-                        [
-                            [
-                                tensor_name,
-                                origin,
-                                fl_round,
-                                report,
-                                tags,
-                                nparray,
-                            ]
-                        ],
-                        columns=list(self.tensor_db.columns),
-                    )
-                )
 
-            self.tensor_db = pd.concat([self.tensor_db, *entries_to_add], ignore_index=True)
+        for tensor_key, nparray in tensor_key_dict.items():
+            tensor_name, origin, fl_round, report, tags = tensor_key
+            entries_to_add.append(
+                {
+                    "tensor_name": tensor_name,
+                    "origin": origin,
+                    "round": fl_round,
+                    "report": report,
+                    "tags": tags,
+                    "nparray": nparray,
+                }
+            )
+
+        if len(entries_to_add) > 0:
+            new_data = pd.DataFrame(entries_to_add)
+            with self.mutex:
+                self.tensor_db = pd.concat([self.tensor_db, new_data], ignore_index=True)
+                filtered_new_data = new_data[
+                    ~new_data["tags"].apply(
+                        lambda x: any(
+                            keyword in item for item in x for keyword in ["collaborator", "metric"]
+                        )
+                    )
+                ].reset_index(drop=True)
+                if len(filtered_new_data) > 0:
+                    self.secondary_db = pd.concat(
+                        [self.secondary_db, filtered_new_data], ignore_index=True
+                    )
 
     def get_tensor_from_cache(self, tensor_key: TensorKey) -> Optional[np.ndarray]:
         """Perform a lookup of the tensor_key in the TensorDB.
@@ -139,13 +156,22 @@ def get_tensor_from_cache(self, tensor_key: TensorKey) -> Optional[np.ndarray]:
         tensor_name, origin, fl_round, report, tags = tensor_key
 
         # TODO come up with easy way to ignore compression
-        df = self.tensor_db[
-            (self.tensor_db["tensor_name"] == tensor_name)
-            & (self.tensor_db["origin"] == origin)
-            & (self.tensor_db["round"] == fl_round)
-            & (self.tensor_db["report"] == report)
-            & (self.tensor_db["tags"] == tags)
-        ]
+        if any(keyword in item for item in tags for keyword in ["collaborator", "metric"]):
+            df = self.tensor_db[
+                (self.tensor_db["tensor_name"] == tensor_name)
+                & (self.tensor_db["origin"] == origin)
+                & (self.tensor_db["round"] == fl_round)
+                & (self.tensor_db["report"] == report)
+                & (self.tensor_db["tags"] == tags)
+            ]
+        else:
+            df = self.secondary_db[
+                (self.secondary_db["tensor_name"] == tensor_name)
+                & (self.secondary_db["origin"] == origin)
+                & (self.secondary_db["round"] == fl_round)
+                & (self.secondary_db["report"] == report)
+                & (self.secondary_db["tags"] == tags)
+            ]
 
         if len(df) == 0:
             return None