eyra · vloothuis · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024 · Mar 27, 2024
diff --git a/model.joblib b/model.joblib
diff --git a/run.R b/run.R
@@ -4,7 +4,7 @@
 # Add your method there.
 
 # To test your submission use the following command: 
-# Rscript run.R predict data/PreFer_fake_data.csv
+# Rscript run.R PreFer_fake_data.csv PreFer_fake_background_data.csv
 
 # Install required packages with Rscript packages.R
 

diff --git a/run.py b/run.py
@@ -65,67 +65,6 @@ def predict(data_path, background_data_path, output):
     predictions.to_csv(output, index=False)
 
 
-def score(prediction_path, ground_truth_path, output):
-    """Score (evaluate) the predictions and write the metrics.
-
-    This function takes the path to a CSV file containing predicted outcomes and the
-    path to a CSV file containing the ground truth outcomes. It calculates the overall
-    prediction accuracy, and precision, recall, and F1 score for having a child
-    and writes these scores to a new output CSV file.
-
-    This function should not be modified.
-    """
-
-    if output is None:
-        output = sys.stdout
-    # Load predictions and ground truth into dataframes
-    predictions_df = pd.read_csv(prediction_path)
-    ground_truth_df = pd.read_csv(ground_truth_path)
-
-    # Merge predictions and ground truth on the 'id' column
-    merged_df = pd.merge(predictions_df, ground_truth_df, on="nomem_encr", how="right")
-
-    # Calculate accuracy
-    accuracy = len(merged_df[merged_df["prediction"] == merged_df["new_child"]]) / len(
-        merged_df
-    )
-
-    # Calculate true positives, false positives, and false negatives
-    true_positives = len(
-        merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)]
-    )
-    false_positives = len(
-        merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)]
-    )
-    false_negatives = len(
-        merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
-    )
-
-    # Calculate precision, recall, and F1 score
-    try:
-        precision = true_positives / (true_positives + false_positives)
-    except ZeroDivisionError:
-        precision = 0
-    try:
-        recall = true_positives / (true_positives + false_negatives)
-    except ZeroDivisionError:
-        recall = 0
-    try:
-        f1_score = 2 * (precision * recall) / (precision + recall)
-    except ZeroDivisionError:
-        f1_score = 0
-    # Write metric output to a new CSV file
-    metrics_df = pd.DataFrame(
-        {
-            "accuracy": [accuracy],
-            "precision": [precision],
-            "recall": [recall],
-            "f1_score": [f1_score],
-        }
-    )
-    metrics_df.to_csv(output, index=False)
-
-
 if __name__ == "__main__":
     args = parser.parse_args()
     predict(args.data_path, args.background_data_path, args.output)