Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #36

Merged
merged 3 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified model.joblib
Binary file not shown.
2 changes: 1 addition & 1 deletion run.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# Add your method there.

# To test your submission use the following command:
# Rscript run.R predict data/PreFer_fake_data.csv
# Rscript run.R PreFer_fake_data.csv PreFer_fake_background_data.csv

# Install required packages with Rscript packages.R

Expand Down
61 changes: 0 additions & 61 deletions run.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,67 +65,6 @@ def predict(data_path, background_data_path, output):
predictions.to_csv(output, index=False)


def score(prediction_path, ground_truth_path, output):
"""Score (evaluate) the predictions and write the metrics.

This function takes the path to a CSV file containing predicted outcomes and the
path to a CSV file containing the ground truth outcomes. It calculates the overall
prediction accuracy, and precision, recall, and F1 score for having a child
and writes these scores to a new output CSV file.

This function should not be modified.
"""

if output is None:
output = sys.stdout
# Load predictions and ground truth into dataframes
predictions_df = pd.read_csv(prediction_path)
ground_truth_df = pd.read_csv(ground_truth_path)

# Merge predictions and ground truth on the 'id' column
merged_df = pd.merge(predictions_df, ground_truth_df, on="nomem_encr", how="right")

# Calculate accuracy
accuracy = len(merged_df[merged_df["prediction"] == merged_df["new_child"]]) / len(
merged_df
)

# Calculate true positives, false positives, and false negatives
true_positives = len(
merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 1)]
)
false_positives = len(
merged_df[(merged_df["prediction"] == 1) & (merged_df["new_child"] == 0)]
)
false_negatives = len(
merged_df[(merged_df["prediction"] == 0) & (merged_df["new_child"] == 1)]
)

# Calculate precision, recall, and F1 score
try:
precision = true_positives / (true_positives + false_positives)
except ZeroDivisionError:
precision = 0
try:
recall = true_positives / (true_positives + false_negatives)
except ZeroDivisionError:
recall = 0
try:
f1_score = 2 * (precision * recall) / (precision + recall)
except ZeroDivisionError:
f1_score = 0
# Write metric output to a new CSV file
metrics_df = pd.DataFrame(
{
"accuracy": [accuracy],
"precision": [precision],
"recall": [recall],
"f1_score": [f1_score],
}
)
metrics_df.to_csv(output, index=False)


if __name__ == "__main__":
args = parser.parse_args()
predict(args.data_path, args.background_data_path, args.output)
Loading