setup full study

asreview · Jan 24, 2025 · f0ce7c5 · f0ce7c5
1 parent b717efe
commit f0ce7c5
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 12 deletions.
diff --git a/asreview2-optuna/classifiers.py b/asreview2-optuna/classifiers.py
@@ -18,13 +18,9 @@ def logistic_params(trial: optuna.trial.FrozenTrial):
     # Use logarithmic normal distribution for C (C effect is non-linear)
     C = trial.suggest_float("log__C", 0.01, 10, log=True)
 
-    solver = trial.suggest_categorical("log__solver", ["lbfgs", "saga"])
+    solver = "lbfgs"
 
-    max_iter = 100
-    if solver == "saga":
-        max_iter = 1000
-
-    return {"C": C, "solver": solver, "max_iter": max_iter}
+    return {"C": C, "solver": solver}
 
 
 def svm_params(trial: optuna.trial.FrozenTrial):

diff --git a/asreview2-optuna/feature_extractors.py b/asreview2-optuna/feature_extractors.py
@@ -11,10 +11,9 @@ def tfidf_params(trial: optuna.trial.FrozenTrial):
 
     min_df = trial.suggest_int("tfidf__min_df", 1, 10)
 
-    max_ngram_range = trial.suggest_int("tfidf__max_ngram_range", 1, 3)
-    ngram_range = (1, max_ngram_range)
+    ngram_range = (1, 2)
 
-    sublinear_tf = trial.suggest_categorical("tfidf__sublinear_tf", [True, False])
+    sublinear_tf = True
 
     return {
         #"max_features": max_features,

diff --git a/asreview2-optuna/main.py b/asreview2-optuna/main.py
@@ -21,8 +21,8 @@
 from feature_extractors import feature_extractor_params, feature_extractors
 
 # Study variables
-VERSION = 3
-STUDY_SET = "demo"
+VERSION = 1
+STUDY_SET = "full"
 PICKLE_FOLDER_PATH = Path("synergy-dataset", "pickles")
 CLASSIFIER_TYPE = "log"  # Options: "nb", "log", "svm", "rf"
 FEATURE_EXTRACTOR_TYPE = "tfidf"  # Options: "tfidf", "onehot"
@@ -164,7 +164,7 @@ def process_row(row, clf_params, fe_params, ratio):
 def objective_report(report_order):
     def objective(trial):
         # Use normal distribution for ratio (ratio effect is linear)
-        ratio = trial.suggest_float("ratio", 1.0, 5.0)
+        ratio = trial.suggest_float("ratio", 1.0, 10.0)
         # ratio = 1.5
         clf_params = classifier_params[CLASSIFIER_TYPE](trial)
         fe_params = feature_extractor_params[FEATURE_EXTRACTOR_TYPE](trial)