1
+ import logging
2
+
1
3
from . import constants as c , explanation_tags
2
4
from .helpfulness_scores import author_helpfulness
3
5
from .note_ratings import get_ratings_with_scores , get_valid_ratings
4
6
5
7
import pandas as pd
6
8
7
9
10
+ logger = logging .getLogger ("birdwatch.contributor_state" )
11
+ logger .setLevel (logging .INFO )
12
+
13
+
8
14
def should_earn_in (contributorScoresWithEnrollment : pd .DataFrame ):
9
15
"""
10
16
The participant should earn in when they are in the earnedOutAcknowledged, earnedoutNoAck and newUser state.
@@ -124,21 +130,21 @@ def _get_rated_after_decision(
124
130
assert (
125
131
len (ratingInfos ) == len (ratings )
126
132
), f"assigning a status timestamp shouldn't decrease number of ratings: { len (ratingInfos )} vs. { len (ratings )} "
127
- print ("Calculating ratedAfterDecision:" )
128
- print (f" Total ratings: { len (ratingInfos )} " )
133
+ logger . info ("Calculating ratedAfterDecision:" )
134
+ logger . info (f" Total ratings: { len (ratingInfos )} " )
129
135
ratingInfos = ratingInfos [~ pd .isna (ratingInfos [c .timestampMillisOfNoteMostRecentNonNMRLabelKey ])]
130
- print (f" Total ratings on notes with status: { len (ratingInfos )} " )
136
+ logger . info (f" Total ratings on notes with status: { len (ratingInfos )} " )
131
137
ratingInfos = ratingInfos [
132
138
ratingInfos [c .createdAtMillisKey ] > ratingInfos [c .timestampMillisOfNoteMostRecentNonNMRLabelKey ]
133
139
]
134
- print (f" Total ratings after status: { len (ratingInfos )} " )
140
+ logger . info (f" Total ratings after status: { len (ratingInfos )} " )
135
141
ratingInfos [c .ratedAfterDecision ] = 1
136
142
ratedAfterDecision = (
137
143
ratingInfos [[c .raterParticipantIdKey , c .ratedAfterDecision ]]
138
144
.groupby (c .raterParticipantIdKey )
139
145
.sum ()
140
146
)
141
- print (f" Total raters rating after decision: { len (ratedAfterDecision )} " )
147
+ logger . info (f" Total raters rating after decision: { len (ratedAfterDecision )} " )
142
148
return ratedAfterDecision
143
149
144
150
@@ -421,7 +427,7 @@ def get_contributor_state(
421
427
ratings : pd .DataFrame ,
422
428
noteStatusHistory : pd .DataFrame ,
423
429
userEnrollment : pd .DataFrame ,
424
- logging : bool = True ,
430
+ log : bool = True ,
425
431
) -> pd .DataFrame :
426
432
"""
427
433
Given scored notes, ratings, note status history, the current user enrollment state, this
@@ -433,7 +439,7 @@ def get_contributor_state(
433
439
ratings (pd.DataFrame): all ratings
434
440
noteStatusHistory (pd.DataFrame): history of note statuses
435
441
userEnrollment (pd.DataFrame): User enrollment for BW participants.
436
- logging (bool): Should we log
442
+ log (bool): Should we log
437
443
Returns:
438
444
pd.DataFrame: contributorScoresWithEnrollment The contributor scores with enrollments
439
445
"""
@@ -582,27 +588,22 @@ def get_contributor_state(
582
588
# users that do not have an id.
583
589
contributorScoresWithEnrollment .dropna (subset = [c .raterParticipantIdKey ], inplace = True )
584
590
585
- if logging :
586
- print ("Enrollment State" )
587
- print (
588
- "Number of Earned In" ,
589
- len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 0 ]),
591
+ if log :
592
+ logger .info ("Enrollment State" )
593
+ logger .info (
594
+ f"Number of Earned In { len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 0 ])} "
590
595
)
591
- print (
592
- "Number At Risk" ,
593
- len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 1 ]),
596
+ logger .info (
597
+ f"Number At Risk { len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 1 ])} "
594
598
)
595
- print (
596
- "Number of Earn Out No Ack" ,
597
- len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 2 ]),
599
+ logger .info (
600
+ f"Number of Earn Out No Ack { len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 2 ])} "
598
601
)
599
- print (
600
- "Number of Earned Out Ack" ,
601
- len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 3 ]),
602
+ logger .info (
603
+ f"Number of Earned Out Ack { len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 3 ])} "
602
604
)
603
- print (
604
- "Number of New Users" ,
605
- len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 4 ]),
605
+ logger .info (
606
+ f"Number of New Users { len (contributorScoresWithEnrollment [contributorScoresWithEnrollment [c .enrollmentState ] == 4 ])} "
606
607
)
607
608
608
609
return contributorScoresWithEnrollment , mappedUserEnrollment
@@ -615,7 +616,7 @@ def get_contributor_scores(
615
616
lastNNotes = - 1 ,
616
617
countNMRNotesLast : bool = False ,
617
618
sinceLastEarnOut : bool = False ,
618
- logging : bool = True ,
619
+ log : bool = True ,
619
620
) -> pd .DataFrame :
620
621
"""
621
622
Given the outputs of the MF model, this function aggregates stats over notes and ratings. The
@@ -628,7 +629,7 @@ def get_contributor_scores(
628
629
lastNNotes (int): count over the last n notes
629
630
countNMRNotesLast (bool): count NMR notes last. Useful when you want to calculate over a limited set of CRH + CRNH notes
630
631
sinceLastEarnOut: only count notes since last Earn Out event
631
- logging (bool): Should we log?
632
+ log (bool): Should we log?
632
633
Returns:
633
634
pd.DataFrame: contributorScores - rating + note aggregates per contributor.
634
635
"""
@@ -676,7 +677,7 @@ def get_contributor_scores(
676
677
]
677
678
)
678
679
679
- if logging :
680
- print ( "Number Contributor Counts: " , len (contributorCounts ))
680
+ if log :
681
+ logger . info ( f "Number Contributor Counts: { len (contributorCounts )} " )
681
682
682
683
return contributorCounts
0 commit comments