Add GenAcq + fix MineAcq + Docstrings of other algorithms (#16)

Dimosts · web-flow · commit 6c46dcb1f11b · 2025-05-13T21:24:37.000+02:00
* gquacq to mineacq + fixes

* add genacq

* gquacq file to mineacq

* Update __init__.py

* genacq docstrings

* more docstring changes
diff --git a/pycona/__init__.py b/pycona/__init__.py
@@ -26,7 +26,7 @@
 from .find_constraint import FindC, FindC2
 from .query_generation import QGen, TQGen, PQGen
 from .find_scope import FindScope, FindScope2
-from .active_algorithms import QuAcq, PQuAcq, GQuAcq, GrowAcq, MQuAcq, MQuAcq2
+from .active_algorithms import QuAcq, PQuAcq, MineAcq, GrowAcq, MQuAcq, MQuAcq2, GenAcq
 from .problem_instance import ProblemInstance, absvar, langBasic, langDist, langEqNeq
 from .predictor import CountsPredictor, FeaturesRelDim, FeaturesSimpleRel
 
diff --git a/pycona/active_algorithms/__init__.py b/pycona/active_algorithms/__init__.py
@@ -12,4 +12,5 @@
 from .mquacq import MQuAcq
 from .growacq import GrowAcq
 from .pquacq import PQuAcq
-from .gquacq import GQuAcq
+from .mineacq import MineAcq
+from .genacq import GenAcq
diff --git a/pycona/active_algorithms/genacq.py b/pycona/active_algorithms/genacq.py
@@ -0,0 +1,176 @@
+import time
+from itertools import product
+
+from .algorithm_core import AlgorithmCAInteractive
+from ..ca_environment.active_ca import ActiveCAEnv
+from ..utils import get_relation, get_scope, get_kappa, replace_variables
+from ..problem_instance import ProblemInstance
+from ..answering_queries import Oracle, UserOracle
+from .. import Metrics
+
+
+class GenAcq(AlgorithmCAInteractive):
+
+    """
+    GenAcq algorithm, using generalization queries on given types of variables. From:
+
+    "Boosting Constraint Acquisition with Generalization Queries", ECAI 2014.
+    """
+
+    def __init__(self, ca_env: ActiveCAEnv = None, types=None, qg_max=3):
+        """
+        Initialize the GenAcq algorithm with an optional constraint acquisition environment.
+
+        :param ca_env: An instance of ActiveCAEnv, default is None.
+        : param types: list of types of variables given by the user
+        : param qg_max: maximum number of generalization queries
+        """
+        super().__init__(ca_env)
+        self._negativeQ = []
+        self._qg_max = qg_max
+        self._types = types if types is not None else []
+
+    def learn(self, instance: ProblemInstance, oracle: Oracle = UserOracle(), verbose=0, metrics: Metrics = None, X=None):
+        """
+        Learn constraints using the GenAcq algorithm by generating queries and analyzing the results.
+        Using generalization queries on given types of variables.
+
+        :param instance: the problem instance to acquire the constraints for
+        :param oracle: An instance of Oracle, default is to use the user as the oracle.
+        :param verbose: Verbosity level, default is 0.
+        :param metrics: statistics logger during learning
+        :return: the learned instance
+        """
+        self.env.init_state(instance, oracle, verbose, metrics)
+
+        if X is None:
+            X = list(self.env.instance.variables.flat)
+
+        if len(self.env.instance.bias) == 0:
+            self.env.instance.construct_bias(X)
+
+        while True:
+            if self.env.verbose > 0:
+                print("Size of CL: ", len(self.env.instance.cl))
+                print("Size of B: ", len(self.env.instance.bias))
+                print("Number of Queries: ", self.env.metrics.total_queries)
+                print("Number of Generalization Queries: ", self.env.metrics.generalization_queries_count)
+                print("Number of Membership Queries: ", self.env.metrics.membership_queries_count)
+
+
+            gen_start = time.time()
+            Y = self.env.run_query_generation(X)
+            gen_end = time.time()
+
+            if len(Y) == 0:
+                # if no query can be generated it means we have (prematurely) converged to the target network -----
+                self.env.metrics.finalize_statistics()
+                if self.env.verbose >= 1:
+                    print(f"\nLearned {self.env.metrics.cl} constraints in "
+                          f"{self.env.metrics.total_queries} queries.")
+                self.env.instance.bias = []
+                return self.env.instance
+
+            self.env.metrics.increase_generation_time(gen_end - gen_start)
+            self.env.metrics.increase_generated_queries()
+            self.env.metrics.increase_top_queries()
+            kappaB = get_kappa(self.env.instance.bias, Y)
+
+            answer = self.env.ask_membership_query(Y)
+            if answer:
+                # it is a solution, so all candidates violated must go
+                # B <- B \setminus K_B(e)
+                self.env.remove_from_bias(kappaB)
+
+            else:  # user says UNSAT
+
+                scope = self.env.run_find_scope(Y)
+                c = self.env.run_findc(scope)
+                self.env.add_to_cl(c)
+                self.generalize(get_relation(c, self.env.instance.language),c)
+
+
+
+    def generalize(self, r, c):
+        """
+        Generalize function presented in
+        "Boosting Constraint Acquisition with Generalization Queries", ECAI 2014.
+
+
+        :param r: The index of a relation in gamma.
+        :param c: The constraint to generalize.
+        :return: List of learned constraints.
+        """
+        # Get the scope variables of constraint c
+        scope_vars = get_scope(c)
+        
+        # Find all possible type sequences for the variables in the scope
+        type_sequences = []
+        for var in scope_vars:
+            var_types = []
+            for type_group in self._types:
+                if var.name in type_group:
+                    var_types.append(type_group)
+            type_sequences.append(var_types)
+
+        # Generate all possible combinations of type sequences
+        all_type_sequences = list(product(*type_sequences))
+        
+        # Filter out sequences based on NegativeQ and NonTarget
+        filtered_sequences = []
+        for s in all_type_sequences:
+
+            # Check if any negative sequence is a subset of current sequence
+            if s in self._negativeQ:
+                continue
+
+            # Check if any non-target constraint has same relation and vars in sequence
+            if any(get_relation(c2, self.env.instance.language) == r and 
+                   all(any(var in set(type_group) for type_group in s) for var in get_scope(c2))
+                   for c2 in set(self.env.instance.excluded_cons)):
+                continue
+
+            filtered_sequences.append(s)
+        
+        all_type_sequences = filtered_sequences
+        
+        gq_counter = 0
+
+        # Sort sequences by number of distinct elements (ascending)
+        all_type_sequences.sort(key=lambda seq: len(set().union(*seq)))
+
+        while len(all_type_sequences) > 0 and gq_counter < self._qg_max:
+            Y = all_type_sequences.pop(0)    
+            
+            # Instead of getting constraints from bias, generate them for this type sequence
+            B = []
+            
+            # Generate all possible variable combinations
+            var_combinations = list(product(*Y))
+            # Create constraints for each variable combination
+            for var_comb in var_combinations:
+
+                if len(set(var_comb)) != len(var_comb):  # No duplicates
+                    continue
+                # Sort var_comb based on variable names 
+                var_comb = sorted(var_comb, key=lambda var: var.name)
+                
+                abs_vars = get_scope(self.env.instance.language[r])
+                replace_dict = dict()
+                for i, v in enumerate(var_comb):
+                    replace_dict[abs_vars[i]] = v
+                constraint = replace_variables(self.env.instance.language[r], replace_dict)
+                
+                # Skip already learned or excluded constraints
+                if constraint not in set(self.env.instance.cl) and constraint not in set(self.env.instance.excluded_cons):
+                    B.append(constraint)
+
+            # If generalization query is accepted
+            if self.env.ask_generalization_query(self.env.instance.language[r], B):
+                self.env.add_to_cl(B)
+                gq_counter = 0
+            else:
+                gq_counter += 1
+                self._negativeQ.append(Y)
+
+        
diff --git a/pycona/active_algorithms/mineacq.py b/pycona/active_algorithms/mineacq.py
@@ -12,61 +12,60 @@
 from .. import Metrics
 
 
-class GQuAcq(AlgorithmCAInteractive):
+class MineAcq(AlgorithmCAInteractive):
 
     """
-    QuAcq variation algorithm, using mine&Ask to detect types of variables and ask genralization queries. From:
+    QuAcq variation algorithm, using mine&Ask to detect types of variables and ask generalization queries. From:
     "Detecting Types of Variables for Generalization in Constraint Acquisition", ICTAI 2015.
     """
 
     def __init__(self, ca_env: ActiveCAEnv = None, qg_max=10):
         """
-        Initialize the GQuAcq algorithm with an optional constraint acquisition environment.
+        Initialize the MineAcq algorithm with an optional constraint acquisition environment.
 
         :param ca_env: An instance of ActiveCAEnv, default is None.
-        : param GQmax: maximum number of generalization queries
+        :param qg_max: maximum number of generalization queries
         """
         super().__init__(ca_env)
         self._negativeQ = []
         self._qg_max = qg_max
 
-    def learn(self, instance: ProblemInstance, oracle: Oracle = UserOracle(), verbose=0, X=None, metrics: Metrics = None):
+    def learn(self, instance: ProblemInstance, oracle: Oracle = UserOracle(), verbose=0, metrics: Metrics = None, X=None):
         """
-        Learn constraints using the GQuAcq algorithm by generating queries and analyzing the results.
+        Learn constraints using the MineAcq algorithm by generating queries and analyzing the results. 
+        Using mine&ask to detect types of variables and ask generalization queries.
 
         :param instance: the problem instance to acquire the constraints for
         :param oracle: An instance of Oracle, default is to use the user as the oracle.
         :param verbose: Verbosity level, default is 0.
         :param metrics: statistics logger during learning
-        :param X: The set of variables to consider, default is None.
+        :param X: List of variables to consider for learning. If None, uses all variables from the instance.
         :return: the learned instance
         """
-        if X is None:
-            X = instance.X
-        assert isinstance(X, list), "When using .learn(), set parameter X must be a list of variables. Instead got: {}".format(X)
-        assert set(X).issubset(set(instance.X)), "When using .learn(), set parameter X must be a subset of the problem instance variables. Instead got: {}".format(X)
-
         self.env.init_state(instance, oracle, verbose, metrics)
 
+        if X is None:
+            X = list(self.env.instance.variables.flat)
+
         if len(self.env.instance.bias) == 0:
             self.env.instance.construct_bias(X)
 
         while True:
             if self.env.verbose > 0:
                 print("Size of CL: ", len(self.env.instance.cl))
                 print("Size of B: ", len(self.env.instance.bias))
-                print("Number of Queries: ", self.env.metrics.membership_queries_count)
+                print("Number of Queries: ", self.env.metrics.total_queries)
 
             gen_start = time.time()
             Y = self.env.run_query_generation(X)
-            gen_end = time.time()   
+            gen_end = time.time()
 
             if len(Y) == 0:
                 # if no query can be generated it means we have (prematurely) converged to the target network -----
                 self.env.metrics.finalize_statistics()
                 if self.env.verbose >= 1:
                     print(f"\nLearned {self.env.metrics.cl} constraints in "
-                          f"{self.env.metrics.membership_queries_count} queries.")
+                          f"{self.env.metrics.total_queries} queries.")
                 self.env.instance.bias = []
                 return self.env.instance
 
@@ -130,16 +129,13 @@ def mineAsk(self, r):
             # potentially generalizing leads to UNSAT
             new_CL = self.env.instance.cl.copy()
             new_CL += B
-            if any(Y2.issubset(Y) for Y2 in self._negativeQ) or not can_be_clique(G.subgraph(Y), D) or \
-                    len(B) > 0 or cp.Model(new_CL).solve():
-                continue
-
-            if self.env.ask_generalization_query(self.env.instance.language[r], B):
-                gen_flag = True
-                self.env.add_to_cl(B)
-            else:
-                gq_counter += 1
-                self._negativeQ.append(Y)
+            if not (any(Y2.issubset(Y) for Y2 in self._negativeQ) or not (can_be_clique(G.subgraph(Y), D) and (len(B) > 0) and cp.Model(new_CL).solve())):
+                if self.env.ask_generalization_query(self.env.instance.language[r], B):
+                    gen_flag = True
+                    self.env.add_to_cl(B)
+                else:
+                    gq_counter += 1
+                    self._negativeQ.append(Y)
 
             if not gen_flag:
                 communities = nx.community.greedy_modularity_communities(G.subgraph(Y))
diff --git a/pycona/active_algorithms/mquacq2.py b/pycona/active_algorithms/mquacq2.py
@@ -34,6 +34,7 @@ def __init__(self, ca_env: ActiveCAEnv = None, *, perform_analyzeAndLearn: bool
     def learn(self, instance: ProblemInstance, oracle: Oracle = UserOracle(), verbose=0, X=None, metrics: Metrics = None):
         """
         Learn constraints using the modified QuAcq algorithm by generating queries and analyzing the results.
+        Learns multiple constraints from each generated query. Uses analyzeAndLearn to focus on the most promising constraints.
 
         :param instance: the problem instance to acquire the constraints for
         :param oracle: An instance of Oracle, default is to use the user as the oracle.
diff --git a/tests/test_algorithms.py b/tests/test_algorithms.py
@@ -14,7 +14,7 @@
 problem_generators = [construct_murder_problem(), construct_examtt_simple(), construct_nurse_rostering()]
 
 classifiers = [DecisionTreeClassifier(), RandomForestClassifier()]
-algorithms = [ca.QuAcq(), ca.MQuAcq(), ca.MQuAcq2(), ca.GQuAcq(), ca.PQuAcq()]
+algorithms = [ca.QuAcq(), ca.MQuAcq(), ca.MQuAcq2(), ca.MineAcq(), ca.PQuAcq(), ca.GenAcq()]
 fast_tests_algorithms = [ca.QuAcq(), ca.MQuAcq(), ca.MQuAcq2()]
 
 def _generate_fast_benchmarks():