Skip to content

Commit b111aaf

Browse files
author
Brad Miller
committed
global expansion
1 parent cfe3c71 commit b111aaf

File tree

4 files changed

+18
-8
lines changed

4 files changed

+18
-8
lines changed

sourcecode/scoring/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -78,8 +78,8 @@
7878

7979
# Scoring Groups
8080
coreGroups: Set[int] = {1, 2, 3, 6, 8, 9, 10, 11, 13, 14, 19, 21, 25}
81-
expansionGroups: Set[int] = {0, 4, 5, 7, 12, 16, 18, 20, 22, 23, 24, 26, 27, 28}
82-
expansionPlusGroups: Set[int] = {15, 17, 29, 30}
81+
expansionGroups: Set[int] = {0, 4, 5, 7, 12, 15, 16, 18, 20, 22, 23, 26, 27, 28, 29}
82+
expansionPlusGroups: Set[int] = {17, 24, 30, 31, 32}
8383

8484
# TSV Values
8585
notHelpfulValueTsv = "NOT_HELPFUL"

sourcecode/scoring/pandas_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,7 @@ def _inner(*args, **kwargs) -> Any:
657657
clArgs = kwargs["args"]
658658
else:
659659
# Handle the following, which expect args as the second positional argument:
660+
# birdwatch/scoring/src/main/python/run_post_selection_similarity.py
660661
# birdwatch/scoring/src/main/python/run_prescoring.py
661662
# birdwatch/scoring/src/main/python/run_final_scoring.py
662663
# birdwatch/scoring/src/main/python/run_contributor_scoring.py

sourcecode/scoring/run_scoring.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1039,11 +1039,21 @@ def _validate_contributor_scoring_output(helpfulnessScores: pd.DataFrame) -> pd.
10391039
return helpfulnessScores
10401040

10411041

1042+
def run_post_selection_similarity(notes: pd.DataFrame, ratings: pd.DataFrame) -> pd.DataFrame:
1043+
with c.time_block("Compute Post Selection Similarity"):
1044+
pss = PostSelectionSimilarity(notes, ratings)
1045+
postSelectionSimilarityValues = pss.get_post_selection_similarity_values()
1046+
del pss
1047+
gc.collect()
1048+
return postSelectionSimilarityValues
1049+
1050+
10421051
def run_prescoring(
10431052
notes: pd.DataFrame,
10441053
ratings: pd.DataFrame,
10451054
noteStatusHistory: pd.DataFrame,
10461055
userEnrollment: pd.DataFrame,
1056+
postSelectionSimilarityValues: pd.DataFrame,
10471057
seed: Optional[int] = None,
10481058
enabledScorers: Optional[Set[Scorers]] = None,
10491059
runParallel: bool = True,
@@ -1081,16 +1091,12 @@ def run_prescoring(
10811091
logger.info(
10821092
f"ratings summary before PSS: {get_df_fingerprint(ratings, [c.noteIdKey, c.raterParticipantIdKey])}"
10831093
)
1084-
with c.time_block("Compute Post Selection Similarity"):
1085-
pss = PostSelectionSimilarity(notes, ratings)
1086-
postSelectionSimilarityValues = pss.get_post_selection_similarity_values()
1094+
with c.time_block("Filter ratings by Post Selection Similarity"):
10871095
logger.info(f"Post Selection Similarity Prescoring: begin with {len(ratings)} ratings.")
10881096
ratings = filter_ratings_by_post_selection_similarity(
10891097
notes, ratings, postSelectionSimilarityValues
10901098
)
10911099
logger.info(f"Post Selection Similarity Prescoring: {len(ratings)} ratings remaining.")
1092-
del pss
1093-
gc.collect()
10941100
logger.info(
10951101
f"ratings summary after PSS: {get_df_fingerprint(ratings, [c.noteIdKey, c.raterParticipantIdKey])}"
10961102
)
@@ -1868,6 +1874,8 @@ def run_scoring(
18681874
filterPrescoringInputToSimulateDelayInHours,
18691875
)
18701876

1877+
postSelectionSimilarityValues = run_post_selection_similarity(notes=notes, ratings=ratings)
1878+
18711879
(
18721880
prescoringNoteModelOutput,
18731881
prescoringRaterModelOutput,
@@ -1880,6 +1888,7 @@ def run_scoring(
18801888
ratings=prescoringRatingsInput,
18811889
noteStatusHistory=noteStatusHistory,
18821890
userEnrollment=userEnrollment,
1891+
postSelectionSimilarityValues=postSelectionSimilarityValues,
18831892
seed=seed,
18841893
enabledScorers=enabledScorers,
18851894
runParallel=runParallel,

sourcecode/scoring/scoring_rules.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class RuleID(Enum):
4040

4141
# Rules used in _meta_score.
4242
META_INITIAL_NMR = RuleAndVersion("MetaInitialNMR", "1.0", False)
43-
EXPANSION_MODEL = RuleAndVersion("ExpansionModel", "1.1", False)
43+
EXPANSION_MODEL = RuleAndVersion("ExpansionModel", "1.1", True)
4444
EXPANSION_PLUS_MODEL = RuleAndVersion("ExpansionPlusModel", "1.1", False)
4545
CORE_MODEL = RuleAndVersion("CoreModel", "1.1", True)
4646
COVERAGE_MODEL = RuleAndVersion("CoverageModel", "1.1", False)

0 commit comments

Comments
 (0)