Skip to content

Commit

Permalink
Merge pull request eyra#36 from eyra/fixes
Browse files Browse the repository at this point in the history
Fixes
  • Loading branch information
vloothuis authored Mar 27, 2024
2 parents 3d276e0 + 97d14ab commit 78af086
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 62 deletions.
Binary file modified model.joblib
Binary file not shown.
2 changes: 1 addition & 1 deletion run.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Add your method there.

# To test your submission use the following command:
# Rscript run.R predict data/PreFer_fake_data.csv
# Rscript run.R PreFer_fake_data.csv PreFer_fake_background_data.csv

# Install required packages with Rscript packages.R

Expand Down
61 changes: 0 additions & 61 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,67 +65,6 @@ def predict(data_path, background_data_path, output):
predictions.to_csv(output, index=False)


def score(prediction_path, ground_truth_path, output):
"""Score (evaluate) the predictions and write the metrics.
This function takes the path to a CSV file containing predicted outcomes and the
path to a CSV file containing the ground truth outcomes. It calculates the overall
prediction accuracy, and precision, recall, and F1 score for having a child
and writes these scores to a new output CSV file.
This function should not be modified.
"""

if output is None:
output = sys.stdout
# Load predictions and ground truth into dataframes
predictions_df = pd.read_csv(prediction_path)
ground_truth_df = pd.read_csv(ground_truth_path)

# Merge predictions and ground truth on the 'id' column
merged_df = pd.merge(predictions_df, ground_truth_df, on="nomem_encr", how="right")

# Calculate accuracy
accuracy = len(merged_df[merged_df["prediction"] == merged_df["new_child"]]) / len(
merged_df
)

# Calculate true positives, false positives, and false negatives
true_positives = len(
merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)]
)
false_positives = len(
merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)]
)
false_negatives = len(
merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
)

# Calculate precision, recall, and F1 score
try:
precision = true_positives / (true_positives + false_positives)
except ZeroDivisionError:
precision = 0
try:
recall = true_positives / (true_positives + false_negatives)
except ZeroDivisionError:
recall = 0
try:
f1_score = 2 * (precision * recall) / (precision + recall)
except ZeroDivisionError:
f1_score = 0
# Write metric output to a new CSV file
metrics_df = pd.DataFrame(
{
"accuracy": [accuracy],
"precision": [precision],
"recall": [recall],
"f1_score": [f1_score],
}
)
metrics_df.to_csv(output, index=False)


if __name__ == "__main__":
args = parser.parse_args()
predict(args.data_path, args.background_data_path, args.output)

0 comments on commit 78af086

Please sign in to comment.