Skip to content

Commit 2d6d6a5

Browse files
authored
Merge pull request #257 from twitter/jbaxter/2024_08_21
New NSH columns and cleanup
2 parents 956d8bd + 167be83 commit 2d6d6a5

22 files changed

+421
-303
lines changed

sourcecode/scoring/constants.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from contextlib import contextmanager
22
from dataclasses import dataclass
33
from enum import Enum
4+
import logging
45
import os
56
import time
67
from typing import Dict, Optional, Set
@@ -9,6 +10,10 @@
910
import pandas as pd
1011

1112

13+
logger = logging.getLogger("birdwatch.constants")
14+
logger.setLevel(logging.INFO)
15+
16+
1217
# Default number of threads to use in torch if os.cpu_count() is unavailable
1318
# and no value is specified.
1419
defaultNumThreads = os.cpu_count() or 8
@@ -461,6 +466,8 @@ def rater_factor_key(i):
461466
updatedTimestampMillisOfNmrDueToMinStableCrhTimeKey = (
462467
"updatedTimestampMillisOfNmrDueToMinStableCrhTime"
463468
)
469+
timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"
470+
timestampMillisOfFirstNmrDueToMinStableCrhTimeKey = "timestampMillisOfFirstNmrDueToMinStableCrhTime"
464471

465472
noteStatusHistoryTSVColumnsAndTypes = [
466473
(noteIdKey, np.int64),
@@ -484,6 +491,8 @@ def rater_factor_key(i):
484491
(timestampMillisOfNmrDueToMinStableCrhTimeKey, np.double), # double because nullable.
485492
(currentMultiGroupStatusKey, "category"),
486493
(currentModelingMultiGroupKey, np.double), # TODO: int
494+
(timestampMinuteOfFinalScoringOutput, np.double), # double because nullable.
495+
(timestampMillisOfFirstNmrDueToMinStableCrhTimeKey, np.double), # double because nullable.
487496
]
488497
noteStatusHistoryTSVColumns = [col for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
489498
noteStatusHistoryTSVTypes = [dtype for (col, dtype) in noteStatusHistoryTSVColumnsAndTypes]
@@ -818,8 +827,6 @@ def rater_factor_key(i):
818827
inputPathsTSVColumns = [col for (col, _) in inputPathsTSVColumnsAndTypes]
819828
inputPathsTSVTypeMapping = {col: dtype for (col, dtype) in inputPathsTSVColumnsAndTypes}
820829

821-
timestampMinuteOfFinalScoringOutput = "timestampMinuteOfFinalScoringOutput"
822-
823830

824831
@contextmanager
825832
def time_block(label):
@@ -828,7 +835,7 @@ def time_block(label):
828835
yield
829836
finally:
830837
end = time.time()
831-
print(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")
838+
logger.info(f"{label} elapsed time: {end - start:.2f} secs ({((end - start) / 60.0):.2f} mins)")
832839

833840

834841
### TODO: weave through second round intercept.

sourcecode/scoring/contributor_state.py

+29-28
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1+
import logging
2+
13
from . import constants as c, explanation_tags
24
from .helpfulness_scores import author_helpfulness
35
from .note_ratings import get_ratings_with_scores, get_valid_ratings
46

57
import pandas as pd
68

79

10+
logger = logging.getLogger("birdwatch.contributor_state")
11+
logger.setLevel(logging.INFO)
12+
13+
814
def should_earn_in(contributorScoresWithEnrollment: pd.DataFrame):
915
"""
1016
The participant should earn in when they are in the earnedOutAcknowledged, earnedoutNoAck and newUser state.
@@ -124,21 +130,21 @@ def _get_rated_after_decision(
124130
assert (
125131
len(ratingInfos) == len(ratings)
126132
), f"assigning a status timestamp shouldn't decrease number of ratings: {len(ratingInfos)} vs. {len(ratings)}"
127-
print("Calculating ratedAfterDecision:")
128-
print(f" Total ratings: {len(ratingInfos)}")
133+
logger.info("Calculating ratedAfterDecision:")
134+
logger.info(f" Total ratings: {len(ratingInfos)}")
129135
ratingInfos = ratingInfos[~pd.isna(ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey])]
130-
print(f" Total ratings on notes with status: {len(ratingInfos)}")
136+
logger.info(f" Total ratings on notes with status: {len(ratingInfos)}")
131137
ratingInfos = ratingInfos[
132138
ratingInfos[c.createdAtMillisKey] > ratingInfos[c.timestampMillisOfNoteMostRecentNonNMRLabelKey]
133139
]
134-
print(f" Total ratings after status: {len(ratingInfos)}")
140+
logger.info(f" Total ratings after status: {len(ratingInfos)}")
135141
ratingInfos[c.ratedAfterDecision] = 1
136142
ratedAfterDecision = (
137143
ratingInfos[[c.raterParticipantIdKey, c.ratedAfterDecision]]
138144
.groupby(c.raterParticipantIdKey)
139145
.sum()
140146
)
141-
print(f" Total raters rating after decision: {len(ratedAfterDecision)}")
147+
logger.info(f" Total raters rating after decision: {len(ratedAfterDecision)}")
142148
return ratedAfterDecision
143149

144150

@@ -421,7 +427,7 @@ def get_contributor_state(
421427
ratings: pd.DataFrame,
422428
noteStatusHistory: pd.DataFrame,
423429
userEnrollment: pd.DataFrame,
424-
logging: bool = True,
430+
log: bool = True,
425431
) -> pd.DataFrame:
426432
"""
427433
Given scored notes, ratings, note status history, the current user enrollment state, this
@@ -433,7 +439,7 @@ def get_contributor_state(
433439
ratings (pd.DataFrame): all ratings
434440
noteStatusHistory (pd.DataFrame): history of note statuses
435441
userEnrollment (pd.DataFrame): User enrollment for BW participants.
436-
logging (bool): Should we log
442+
log (bool): Should we log
437443
Returns:
438444
pd.DataFrame: contributorScoresWithEnrollment The contributor scores with enrollments
439445
"""
@@ -582,27 +588,22 @@ def get_contributor_state(
582588
# users that do not have an id.
583589
contributorScoresWithEnrollment.dropna(subset=[c.raterParticipantIdKey], inplace=True)
584590

585-
if logging:
586-
print("Enrollment State")
587-
print(
588-
"Number of Earned In",
589-
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0]),
591+
if log:
592+
logger.info("Enrollment State")
593+
logger.info(
594+
f"Number of Earned In {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 0])}"
590595
)
591-
print(
592-
"Number At Risk",
593-
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1]),
596+
logger.info(
597+
f"Number At Risk {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 1])}"
594598
)
595-
print(
596-
"Number of Earn Out No Ack",
597-
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2]),
599+
logger.info(
600+
f"Number of Earn Out No Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 2])}"
598601
)
599-
print(
600-
"Number of Earned Out Ack",
601-
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3]),
602+
logger.info(
603+
f"Number of Earned Out Ack {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 3])}"
602604
)
603-
print(
604-
"Number of New Users",
605-
len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4]),
605+
logger.info(
606+
f"Number of New Users {len(contributorScoresWithEnrollment[contributorScoresWithEnrollment[c.enrollmentState] == 4])}"
606607
)
607608

608609
return contributorScoresWithEnrollment, mappedUserEnrollment
@@ -615,7 +616,7 @@ def get_contributor_scores(
615616
lastNNotes=-1,
616617
countNMRNotesLast: bool = False,
617618
sinceLastEarnOut: bool = False,
618-
logging: bool = True,
619+
log: bool = True,
619620
) -> pd.DataFrame:
620621
"""
621622
Given the outputs of the MF model, this function aggregates stats over notes and ratings. The
@@ -628,7 +629,7 @@ def get_contributor_scores(
628629
lastNNotes (int): count over the last n notes
629630
countNMRNotesLast (bool): count NMR notes last. Useful when you want to calculate over a limited set of CRH + CRNH notes
630631
sinceLastEarnOut: only count notes since last Earn Out event
631-
logging (bool): Should we log?
632+
log (bool): Should we log?
632633
Returns:
633634
pd.DataFrame: contributorScores - rating + note aggregates per contributor.
634635
"""
@@ -676,7 +677,7 @@ def get_contributor_scores(
676677
]
677678
)
678679

679-
if logging:
680-
print("Number Contributor Counts: ", len(contributorCounts))
680+
if log:
681+
logger.info(f"Number Contributor Counts: {len(contributorCounts)}")
681682

682683
return contributorCounts

sourcecode/scoring/helpfulness_scores.py

+15-11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
from typing import Optional
23

34
from . import constants as c
@@ -6,6 +7,10 @@
67
import pandas as pd
78

89

10+
logger = logging.getLogger("birdwatch.helpfulness_scores")
11+
logger.setLevel(logging.INFO)
12+
13+
914
def author_helpfulness(
1015
scoredNotes: pd.DataFrame,
1116
noteInterceptKey: str,
@@ -199,15 +204,15 @@ def compute_general_helpfulness_scores(
199204
def filter_ratings_by_helpfulness_scores(
200205
ratingsForTraining: pd.DataFrame,
201206
helpfulnessScores: pd.DataFrame,
202-
logging: bool = True,
207+
log: bool = True,
203208
):
204209
"""Filter out ratings from raters whose helpfulness scores are too low.
205210
See https://twitter.github.io/communitynotes/contributor-scores/#filtering-ratings-based-on-helpfulness-scores.
206211
207212
Args:
208213
ratingsForTraining pandas.DataFrame: unfiltered input ratings
209214
helpfulnessScores pandas.DataFrame: helpfulness scores to use to determine which raters to filter out.
210-
logging (bool, optional): debug output. Defaults to True.
215+
log (bool, optional): debug output. Defaults to True.
211216
212217
Returns:
213218
filtered_ratings pandas.DataFrame: same schema as input ratings, but filtered.
@@ -219,15 +224,14 @@ def filter_ratings_by_helpfulness_scores(
219224
ratingsForTraining, on=c.raterParticipantIdKey
220225
)
221226

222-
if logging:
223-
print("Unique Raters: ", len(np.unique(ratingsForTraining[c.raterParticipantIdKey])))
224-
print("People (Authors or Raters) With Helpfulness Scores: ", len(helpfulnessScores))
225-
print("Raters Included Based on Helpfulness Scores: ", len(includedUsers))
226-
print(
227-
"Included Raters who have rated at least 1 note in the final dataset: ",
228-
len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey])),
227+
if log:
228+
logger.info(f"Unique Raters: {len(np.unique(ratingsForTraining[c.raterParticipantIdKey]))}")
229+
logger.info(f"People (Authors or Raters) With Helpfulness Scores: {len(helpfulnessScores)}")
230+
logger.info(f"Raters Included Based on Helpfulness Scores: {len(includedUsers)}")
231+
logger.info(
232+
f"Included Raters who have rated at least 1 note in the final dataset: {len(np.unique(ratingsHelpfulnessScoreFiltered[c.raterParticipantIdKey]))}",
229233
)
230-
print("Number of Ratings Used For 1st Training: ", len(ratingsForTraining))
231-
print("Number of Ratings for Final Training: ", len(ratingsHelpfulnessScoreFiltered))
234+
logger.info(f"Number of Ratings Used For 1st Training: {len(ratingsForTraining)}")
235+
logger.info(f"Number of Ratings for Final Training: {len(ratingsHelpfulnessScoreFiltered)}")
232236

233237
return ratingsHelpfulnessScoreFiltered

0 commit comments

Comments
 (0)