Skip to content

Commit

Permalink
Improved error handling in TextScoring.cpp. Better constructors in Te…
Browse files Browse the repository at this point in the history
…xtScoring.h.
  • Loading branch information
Flixtastic committed Feb 27, 2025
1 parent 563ce80 commit 4c09e59
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 39 deletions.
43 changes: 23 additions & 20 deletions src/index/TextScoring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,17 @@ void ScoreData::addDocumentOrLiteralToScoreDataInvertedIndex(

// ____________________________________________________________________________
float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) {
AD_CORRECTNESS_CHECK(!(scoringMetric_ == TextScoringMetric::EXPLICIT),
"This method shouldn't be called for explicit scores.");
// Retrieve inner map
if (!invertedIndex_.contains(wordIndex)) {
auto it = invertedIndex_.find(wordIndex);
if (it == invertedIndex_.end()) {
LOG(DEBUG) << "Didn't find word in Inverted Scoring Index. WordId: "
<< wordIndex << std::endl;
return 0;
}
calculateAVDL();
InnerMap& innerMap = invertedIndex_.find(wordIndex)->second;
InnerMap& innerMap = it->second;
size_t df = innerMap.size();
float idf = std::log2f(nofDocuments_ / df);

Expand All @@ -92,18 +95,17 @@ float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) {
docId = DocumentIndex::make(contextId.get());
} else {
auto it = docIdSet_.upper_bound(convertedContextId);
if (it == docIdSet_.end()) {
if (docIdSet_.empty()) {
AD_THROW("docIdSet is empty and shouldn't be");
}
LOG(DEBUG) << "Requesting a contextId that is bigger than the largest "
"docId. contextId: "
<< contextId.get() << " Largest docId: " << *docIdSet_.rbegin()
<< std::endl;
return 0;
} else {
docId = *it;
}
AD_CORRECTNESS_CHECK(!docIdSet_.empty(),
"docIdSet is empty and shouldn't be.");
AD_CORRECTNESS_CHECK(
!(it == docIdSet_.end()),
absl::StrCat("Requesting a contextId that is bigger than the largest "
"docId. Requested contextId: ",
contextId.get(),
" Largest docId: ", docIdSet_.rbegin()->get(),
" This hints on faulty input data for wordsfile.tsv and "
"or docsfile.tsv"));
docId = *it;
}
auto ret1 = innerMap.find(docId);
if (ret1 == innerMap.end()) {
Expand All @@ -118,12 +120,13 @@ float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) {
}

auto ret2 = docLengthMap_.find(docId);
if (ret2 == docLengthMap_.end()) {
LOG(DEBUG)
<< "The calculated docId doesn't exist in the dochLengthMap. docId: "
<< docId << std::endl;
return 0;
}
AD_CORRECTNESS_CHECK(
!(ret2 == docLengthMap_.end()),
absl::StrCat("The calculated docId doesn't exist in the docLengthMap. "
"The requested contextId was: ",
contextId.get(), " The calculated docId was: ", docId.get(),
" This hints on faulty input data for wordsfile.tsv and or "
"docsfile.tsv"));
size_t dl = ret2->second;
float alpha = (1 - b_ + b_ * (dl / averageDocumentLength_));
float tf_star = (tf * (k_ + 1)) / (k_ * alpha + tf);
Expand Down
28 changes: 9 additions & 19 deletions src/index/TextScoring.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,12 @@

class ScoreData {
public:
ScoreData()
: scoringMetric_(TextScoringMetric::EXPLICIT), b_(0.75), k_(1.75){};
ScoreData() = default;

ScoreData(LocaleManager localeManager)
: scoringMetric_(TextScoringMetric::EXPLICIT),
b_(0.75),
k_(1.75),
localeManager_(localeManager){};
ScoreData(LocaleManager localeManager) : localeManager_(localeManager){};

Check warning on line 14 in src/index/TextScoring.h

View check run for this annotation

Codecov / codecov/patch

src/index/TextScoring.h#L14

Added line #L14 was not covered by tests

ScoreData(LocaleManager localeManager, TextScoringMetric scoringMetric)
: scoringMetric_(scoringMetric),
b_(0.75),
k_(1.75),
localeManager_(localeManager){};
: scoringMetric_(scoringMetric), localeManager_(localeManager){};

Check warning on line 17 in src/index/TextScoring.h

View check run for this annotation

Codecov / codecov/patch

src/index/TextScoring.h#L17

Added line #L17 was not covered by tests

ScoreData(LocaleManager localeManager, TextScoringMetric scoringMetric,
std::pair<float, float> bAndKParam)
Expand All @@ -31,21 +23,19 @@ class ScoreData {
k_(bAndKParam.second),
localeManager_(localeManager){};

// Getters
TextScoringMetric getScoringMetric() { return scoringMetric_; }
TextScoringMetric getScoringMetric() const { return scoringMetric_; }

float getScore(WordIndex wordIndex, TextRecordIndex contextId);

// Functions
void calculateScoreData(const string& docsFileName, bool addWordsFromLiterals,
const Index::TextVocab& textVocab,
const Index::Vocab& vocab);

float getScore(WordIndex wordIndex, TextRecordIndex contextId);

private:
//
TextScoringMetric scoringMetric_;
float b_;
float k_;
TextScoringMetric scoringMetric_ = TextScoringMetric::EXPLICIT;
float b_ = 0.75;
float k_ = 1.75;

//
LocaleManager localeManager_;
Expand Down

0 comments on commit 4c09e59

Please sign in to comment.