@@ -1039,11 +1039,21 @@ def _validate_contributor_scoring_output(helpfulnessScores: pd.DataFrame) -> pd.
1039
1039
return helpfulnessScores
1040
1040
1041
1041
1042
+ def run_post_selection_similarity (notes : pd .DataFrame , ratings : pd .DataFrame ) -> pd .DataFrame :
1043
+ with c .time_block ("Compute Post Selection Similarity" ):
1044
+ pss = PostSelectionSimilarity (notes , ratings )
1045
+ postSelectionSimilarityValues = pss .get_post_selection_similarity_values ()
1046
+ del pss
1047
+ gc .collect ()
1048
+ return postSelectionSimilarityValues
1049
+
1050
+
1042
1051
def run_prescoring (
1043
1052
notes : pd .DataFrame ,
1044
1053
ratings : pd .DataFrame ,
1045
1054
noteStatusHistory : pd .DataFrame ,
1046
1055
userEnrollment : pd .DataFrame ,
1056
+ postSelectionSimilarityValues : pd .DataFrame ,
1047
1057
seed : Optional [int ] = None ,
1048
1058
enabledScorers : Optional [Set [Scorers ]] = None ,
1049
1059
runParallel : bool = True ,
@@ -1081,16 +1091,12 @@ def run_prescoring(
1081
1091
logger .info (
1082
1092
f"ratings summary before PSS: { get_df_fingerprint (ratings , [c .noteIdKey , c .raterParticipantIdKey ])} "
1083
1093
)
1084
- with c .time_block ("Compute Post Selection Similarity" ):
1085
- pss = PostSelectionSimilarity (notes , ratings )
1086
- postSelectionSimilarityValues = pss .get_post_selection_similarity_values ()
1094
+ with c .time_block ("Filter ratings by Post Selection Similarity" ):
1087
1095
logger .info (f"Post Selection Similarity Prescoring: begin with { len (ratings )} ratings." )
1088
1096
ratings = filter_ratings_by_post_selection_similarity (
1089
1097
notes , ratings , postSelectionSimilarityValues
1090
1098
)
1091
1099
logger .info (f"Post Selection Similarity Prescoring: { len (ratings )} ratings remaining." )
1092
- del pss
1093
- gc .collect ()
1094
1100
logger .info (
1095
1101
f"ratings summary after PSS: { get_df_fingerprint (ratings , [c .noteIdKey , c .raterParticipantIdKey ])} "
1096
1102
)
@@ -1868,6 +1874,8 @@ def run_scoring(
1868
1874
filterPrescoringInputToSimulateDelayInHours ,
1869
1875
)
1870
1876
1877
+ postSelectionSimilarityValues = run_post_selection_similarity (notes = notes , ratings = ratings )
1878
+
1871
1879
(
1872
1880
prescoringNoteModelOutput ,
1873
1881
prescoringRaterModelOutput ,
@@ -1880,6 +1888,7 @@ def run_scoring(
1880
1888
ratings = prescoringRatingsInput ,
1881
1889
noteStatusHistory = noteStatusHistory ,
1882
1890
userEnrollment = userEnrollment ,
1891
+ postSelectionSimilarityValues = postSelectionSimilarityValues ,
1883
1892
seed = seed ,
1884
1893
enabledScorers = enabledScorers ,
1885
1894
runParallel = runParallel ,
0 commit comments