diff --git a/src/index/TextScoring.cpp b/src/index/TextScoring.cpp index 11de632fbb..73bae09618 100644 --- a/src/index/TextScoring.cpp +++ b/src/index/TextScoring.cpp @@ -71,14 +71,17 @@ void ScoreData::addDocumentOrLiteralToScoreDataInvertedIndex( // ____________________________________________________________________________ float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) { + AD_CORRECTNESS_CHECK(!(scoringMetric_ == TextScoringMetric::EXPLICIT), + "This method shouldn't be called for explicit scores."); // Retrieve inner map - if (!invertedIndex_.contains(wordIndex)) { + auto it = invertedIndex_.find(wordIndex); + if (it == invertedIndex_.end()) { LOG(DEBUG) << "Didn't find word in Inverted Scoring Index. WordId: " << wordIndex << std::endl; return 0; } calculateAVDL(); - InnerMap& innerMap = invertedIndex_.find(wordIndex)->second; + InnerMap& innerMap = it->second; size_t df = innerMap.size(); float idf = std::log2f(nofDocuments_ / df); @@ -92,18 +95,17 @@ float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) { docId = DocumentIndex::make(contextId.get()); } else { auto it = docIdSet_.upper_bound(convertedContextId); - if (it == docIdSet_.end()) { - if (docIdSet_.empty()) { - AD_THROW("docIdSet is empty and shouldn't be"); - } - LOG(DEBUG) << "Requesting a contextId that is bigger than the largest " - "docId. contextId: " - << contextId.get() << " Largest docId: " << *docIdSet_.rbegin() - << std::endl; - return 0; - } else { - docId = *it; - } + AD_CORRECTNESS_CHECK(!docIdSet_.empty(), + "docIdSet is empty and shouldn't be."); + AD_CORRECTNESS_CHECK( + !(it == docIdSet_.end()), + absl::StrCat("Requesting a contextId that is bigger than the largest " + "docId. Requested contextId: ", + contextId.get(), + " Largest docId: ", docIdSet_.rbegin()->get(), + " This hints on faulty input data for wordsfile.tsv and " + "or docsfile.tsv")); + docId = *it; } auto ret1 = innerMap.find(docId); if (ret1 == innerMap.end()) { @@ -118,12 +120,13 @@ float ScoreData::getScore(WordIndex wordIndex, TextRecordIndex contextId) { } auto ret2 = docLengthMap_.find(docId); - if (ret2 == docLengthMap_.end()) { - LOG(DEBUG) - << "The calculated docId doesn't exist in the dochLengthMap. docId: " - << docId << std::endl; - return 0; - } + AD_CORRECTNESS_CHECK( + !(ret2 == docLengthMap_.end()), + absl::StrCat("The calculated docId doesn't exist in the docLengthMap. " + "The requested contextId was: ", + contextId.get(), " The calculated docId was: ", docId.get(), + " This hints on faulty input data for wordsfile.tsv and or " + "docsfile.tsv")); size_t dl = ret2->second; float alpha = (1 - b_ + b_ * (dl / averageDocumentLength_)); float tf_star = (tf * (k_ + 1)) / (k_ * alpha + tf); diff --git a/src/index/TextScoring.h b/src/index/TextScoring.h index a5d93a2f05..de45e03d91 100644 --- a/src/index/TextScoring.h +++ b/src/index/TextScoring.h @@ -9,20 +9,12 @@ class ScoreData { public: - ScoreData() - : scoringMetric_(TextScoringMetric::EXPLICIT), b_(0.75), k_(1.75){}; + ScoreData() = default; - ScoreData(LocaleManager localeManager) - : scoringMetric_(TextScoringMetric::EXPLICIT), - b_(0.75), - k_(1.75), - localeManager_(localeManager){}; + ScoreData(LocaleManager localeManager) : localeManager_(localeManager){}; ScoreData(LocaleManager localeManager, TextScoringMetric scoringMetric) - : scoringMetric_(scoringMetric), - b_(0.75), - k_(1.75), - localeManager_(localeManager){}; + : scoringMetric_(scoringMetric), localeManager_(localeManager){}; ScoreData(LocaleManager localeManager, TextScoringMetric scoringMetric, std::pair bAndKParam) @@ -31,21 +23,19 @@ class ScoreData { k_(bAndKParam.second), localeManager_(localeManager){}; - // Getters - TextScoringMetric getScoringMetric() { return scoringMetric_; } + TextScoringMetric getScoringMetric() const { return scoringMetric_; } + + float getScore(WordIndex wordIndex, TextRecordIndex contextId); - // Functions void calculateScoreData(const string& docsFileName, bool addWordsFromLiterals, const Index::TextVocab& textVocab, const Index::Vocab& vocab); - float getScore(WordIndex wordIndex, TextRecordIndex contextId); - private: // - TextScoringMetric scoringMetric_; - float b_; - float k_; + TextScoringMetric scoringMetric_ = TextScoringMetric::EXPLICIT; + float b_ = 0.75; + float k_ = 1.75; // LocaleManager localeManager_;