Skip to content

Commit

Permalink
Setup rf study with ndcg
Browse files Browse the repository at this point in the history
  • Loading branch information
timovdk committed Feb 5, 2025
1 parent 75b137e commit c468fc9
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 28 deletions.
28 changes: 2 additions & 26 deletions asreview2-optuna/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
SVM,
)

from sklearn.ensemble import RandomForestClassifier


def naive_bayes_params(trial: optuna.trial.FrozenTrial):
# Use logarithmic normal distribution for alpha (alpha effect is non-linear)
Expand All @@ -29,12 +27,8 @@ def svm_params(trial: optuna.trial.FrozenTrial):

def random_forest_params(trial: optuna.trial.FrozenTrial):
# Use normal distribution for n_estimators (n_estimators effect is linear)
n_estimators = trial.suggest_int("rf__n_estimators", 50, 200)

# Use normal distribution for max_features (max_features effect is linear)
max_features = trial.suggest_categorical("rf__max_features", ["sqrt", "log2"])

return {"n_estimators": n_estimators, "max_features": max_features}
n_estimators = trial.suggest_int("rf__n_estimators", 100, 200)
return {"n_estimators": n_estimators, "max_features": "sqrt"}


classifier_params = {
Expand All @@ -45,24 +39,6 @@ def random_forest_params(trial: optuna.trial.FrozenTrial):
}


class RFClassifier(RandomForestClassifier):
"""Random forest classifier.
Based on the sklearn implementation of the random forest
sklearn.ensemble.RandomForestClassifier.
"""

name = "rf"
label = "Random forest"

def __init__(self, n_estimators=100, max_features=10, **kwargs):
super().__init__(
n_estimators=int(n_estimators),
max_features=max_features,
**kwargs,
)


classifiers = {
"nb": NaiveBayes,
"log": Logistic,
Expand Down
4 changes: 2 additions & 2 deletions asreview2-optuna/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@
from feature_extractors import feature_extractor_params, feature_extractors

# Study variables
VERSION = 1
VERSION = 2
METRIC = "ndcg" # Options: "loss", "ndcg"
STUDY_SET = "full"
CLASSIFIER_TYPE = "rf" # Options: "nb", "log", "svm", "rf"
FEATURE_EXTRACTOR_TYPE = "tfidf" # Options: "tfidf", "onehot", "labse", "bge-m3", "stella", "mxbai"
PICKLE_FOLDER_PATH = Path("synergy-dataset", f"pickles_{FEATURE_EXTRACTOR_TYPE}")
PRE_PROCESSED_FMS = True # False = on the fly
PRE_PROCESSED_FMS = False # False = on the fly
PARALLELIZE_OBJECTIVE = True
AUTO_SHUTDOWN = True

Expand Down

0 comments on commit c468fc9

Please sign in to comment.