Setup rf study with ndcg

asreview · Feb 5, 2025 · c468fc9 · c468fc9
1 parent 75b137e
commit c468fc9
Show file tree

Hide file tree

Showing 2 changed files with 4 additions and 28 deletions.
diff --git a/asreview2-optuna/classifiers.py b/asreview2-optuna/classifiers.py
@@ -6,8 +6,6 @@
     SVM,
 )
 
-from sklearn.ensemble import RandomForestClassifier
-
 
 def naive_bayes_params(trial: optuna.trial.FrozenTrial):
     # Use logarithmic normal distribution for alpha (alpha effect is non-linear)
@@ -29,12 +27,8 @@ def svm_params(trial: optuna.trial.FrozenTrial):
 
 def random_forest_params(trial: optuna.trial.FrozenTrial):
     # Use normal distribution for n_estimators (n_estimators effect is linear)
-    n_estimators = trial.suggest_int("rf__n_estimators", 50, 200)
-
-    # Use normal distribution for max_features (max_features effect is linear)
-    max_features = trial.suggest_categorical("rf__max_features", ["sqrt", "log2"])
-
-    return {"n_estimators": n_estimators, "max_features": max_features}
+    n_estimators = trial.suggest_int("rf__n_estimators", 100, 200)
+    return {"n_estimators": n_estimators, "max_features": "sqrt"}
 
 
 classifier_params = {
@@ -45,24 +39,6 @@ def random_forest_params(trial: optuna.trial.FrozenTrial):
 }
 
 
-class RFClassifier(RandomForestClassifier):
-    """Random forest classifier.
-
-    Based on the sklearn implementation of the random forest
-    sklearn.ensemble.RandomForestClassifier.
-    """
-
-    name = "rf"
-    label = "Random forest"
-
-    def __init__(self, n_estimators=100, max_features=10, **kwargs):
-        super().__init__(
-            n_estimators=int(n_estimators),
-            max_features=max_features,
-            **kwargs,
-        )
-
-
 classifiers = {
     "nb": NaiveBayes,
     "log": Logistic,

diff --git a/asreview2-optuna/main.py b/asreview2-optuna/main.py
@@ -19,13 +19,13 @@
 from feature_extractors import feature_extractor_params, feature_extractors
 
 # Study variables
-VERSION = 1
+VERSION = 2
 METRIC = "ndcg"  # Options: "loss", "ndcg"
 STUDY_SET = "full"
 CLASSIFIER_TYPE = "rf"  # Options: "nb", "log", "svm", "rf"
 FEATURE_EXTRACTOR_TYPE = "tfidf"  # Options: "tfidf", "onehot", "labse", "bge-m3", "stella", "mxbai"
 PICKLE_FOLDER_PATH = Path("synergy-dataset", f"pickles_{FEATURE_EXTRACTOR_TYPE}")
-PRE_PROCESSED_FMS = True  # False = on the fly
+PRE_PROCESSED_FMS = False  # False = on the fly
 PARALLELIZE_OBJECTIVE = True
 AUTO_SHUTDOWN = True