diff --git a/model.joblib b/model.joblib index 21a679b..f3a4483 100644 Binary files a/model.joblib and b/model.joblib differ diff --git a/run.R b/run.R index e8dce75..82274fe 100644 --- a/run.R +++ b/run.R @@ -4,7 +4,7 @@ # Add your method there. # To test your submission use the following command: -# Rscript run.R predict data/PreFer_fake_data.csv +# Rscript run.R PreFer_fake_data.csv PreFer_fake_background_data.csv # Install required packages with Rscript packages.R diff --git a/run.py b/run.py index 92369b3..78bf2c6 100644 --- a/run.py +++ b/run.py @@ -65,67 +65,6 @@ def predict(data_path, background_data_path, output): predictions.to_csv(output, index=False) -def score(prediction_path, ground_truth_path, output): - """Score (evaluate) the predictions and write the metrics. - - This function takes the path to a CSV file containing predicted outcomes and the - path to a CSV file containing the ground truth outcomes. It calculates the overall - prediction accuracy, and precision, recall, and F1 score for having a child - and writes these scores to a new output CSV file. - - This function should not be modified. - """ - - if output is None: - output = sys.stdout - # Load predictions and ground truth into dataframes - predictions_df = pd.read_csv(prediction_path) - ground_truth_df = pd.read_csv(ground_truth_path) - - # Merge predictions and ground truth on the 'id' column - merged_df = pd.merge(predictions_df, ground_truth_df, on="nomem_encr", how="right") - - # Calculate accuracy - accuracy = len(merged_df[merged_df["prediction"] == merged_df["new_child"]]) / len( - merged_df - ) - - # Calculate true positives, false positives, and false negatives - true_positives = len( - merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)] - ) - false_positives = len( - merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)] - ) - false_negatives = len( - merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)] - ) - - # Calculate precision, recall, and F1 score - try: - precision = true_positives / (true_positives + false_positives) - except ZeroDivisionError: - precision = 0 - try: - recall = true_positives / (true_positives + false_negatives) - except ZeroDivisionError: - recall = 0 - try: - f1_score = 2 * (precision * recall) / (precision + recall) - except ZeroDivisionError: - f1_score = 0 - # Write metric output to a new CSV file - metrics_df = pd.DataFrame( - { - "accuracy": [accuracy], - "precision": [precision], - "recall": [recall], - "f1_score": [f1_score], - } - ) - metrics_df.to_csv(output, index=False) - - if __name__ == "__main__": args = parser.parse_args() predict(args.data_path, args.background_data_path, args.output)