diff --git a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp index 83e8bf38b4f5..0bda095c6d6c 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp +++ b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.cpp @@ -49,10 +49,10 @@ QueryEnvironment::getAttributeContext() const return _attrContext; } -double -QueryEnvironment::get_average_field_length(const std::string &field_name) const +search::index::FieldLengthInfo +QueryEnvironment::get_field_length_info(const std::string &field_name) const { - return _field_length_inspector.get_field_length_info(field_name).get_average_field_length(); + return _field_length_inspector.get_field_length_info(field_name); } const search::fef::IIndexEnvironment & diff --git a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h index 776b14280fa8..4323e0c8e3d4 100644 --- a/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h +++ b/searchcore/src/vespa/searchcore/proton/matching/queryenvironment.h @@ -76,7 +76,7 @@ class QueryEnvironment : public search::fef::IQueryEnvironment // inherited from search::fef::IQueryEnvironment const search::attribute::IAttributeContext & getAttributeContext() const override; - double get_average_field_length(const std::string &field_name) const override; + search::index::FieldLengthInfo get_field_length_info(const std::string &field_name) const override; // inherited from search::fef::IQueryEnvironment const search::fef::IIndexEnvironment & getIndexEnvironment() const override; diff --git a/searchlib/src/tests/features/bm25/bm25_test.cpp b/searchlib/src/tests/features/bm25/bm25_test.cpp index d6da8beb0ef6..21d42439fbff 100644 --- a/searchlib/src/tests/features/bm25/bm25_test.cpp +++ b/searchlib/src/tests/features/bm25/bm25_test.cpp @@ -96,10 +96,11 @@ TEST_F(Bm25BlueprintTest, blueprint_can_prepare_shared_state_with_average_field_ { auto blueprint = expect_setup_succeed({"is"}); test::QueryEnvironment query_env; - query_env.get_avg_field_lengths()["is"] = 10; + query_env.get_field_length_info_map()["is"] = + search::index::FieldLengthInfo(10.0, 10.0, 1); ObjectStore store; blueprint->prepareSharedState(query_env, store); - EXPECT_DOUBLE_EQ(10, as_value(*store.get("bm25.afl.is"))); + EXPECT_DOUBLE_EQ(10.0, as_value(*store.get("bm25.afl.is"))); } TEST_F(Bm25BlueprintTest, dump_features_for_all_index_fields) diff --git a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp index 37549b05bab7..be158ebb8759 100644 --- a/searchlib/src/vespa/searchlib/features/bm25_feature.cpp +++ b/searchlib/src/vespa/searchlib/features/bm25_feature.cpp @@ -201,6 +201,13 @@ make_avg_field_length_key(const std::string& base_name, const std::string& field return base_name + ".afl." + field_name; } +double +get_average_field_length(const search::fef::IQueryEnvironment& env, const std::string& field_name) +{ + auto info = env.get_field_length_info(field_name); + return info.get_average_field_length(); +} + } void @@ -208,7 +215,7 @@ Bm25Blueprint::prepareSharedState(const fef::IQueryEnvironment& env, fef::IObjec { std::string key = make_avg_field_length_key(getBaseName(), _field->name()); if (store.get(key) == nullptr) { - double avg_field_length = _avg_field_length.value_or(env.get_average_field_length(_field->name())); + double avg_field_length = _avg_field_length.value_or(get_average_field_length(env, _field->name())); store.add(key, std::make_unique>(avg_field_length)); } } @@ -219,7 +226,7 @@ Bm25Blueprint::createExecutor(const fef::IQueryEnvironment& env, vespalib::Stash const auto* lookup_result = env.getObjectStore().get(make_avg_field_length_key(getBaseName(), _field->name())); double avg_field_length = lookup_result != nullptr ? as_value(*lookup_result) : - _avg_field_length.value_or(env.get_average_field_length(_field->name())); + _avg_field_length.value_or(get_average_field_length(env, _field->name())); return stash.create(*_field, env, avg_field_length, _k1_param, _b_param); } diff --git a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h index ee6c484b164a..78dff3a5ba0c 100644 --- a/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/iqueryenvironment.h @@ -5,6 +5,7 @@ #include "iindexenvironment.h" #include "objectstore.h" #include +#include namespace search::common { struct GeoLocationSpec; } @@ -80,7 +81,7 @@ class IQueryEnvironment * * @return average field length **/ - virtual double get_average_field_length(const std::string &field_name) const = 0; + virtual index::FieldLengthInfo get_field_length_info(const std::string &field_name) const = 0; /** * Returns a const view of the index environment. diff --git a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h index dba428f4cea2..1dee834be291 100644 --- a/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h +++ b/searchlib/src/vespa/searchlib/fef/phrase_splitter_query_env.h @@ -77,7 +77,7 @@ class PhraseSplitterQueryEnv : public IQueryEnvironment return _queryEnv.getAllLocations(); } const attribute::IAttributeContext & getAttributeContext() const override { return _queryEnv.getAttributeContext(); } - double get_average_field_length(const std::string &field_name) const override { return _queryEnv.get_average_field_length(field_name); } + index::FieldLengthInfo get_field_length_info(const std::string &field_name) const override { return _queryEnv.get_field_length_info(field_name); } const IIndexEnvironment & getIndexEnvironment() const override { return _queryEnv.getIndexEnvironment(); } // Accessor methods used by PhraseSplitter diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h index fb96a7bd060e..03ea9754626f 100644 --- a/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironment.h @@ -26,7 +26,7 @@ class QueryEnvironment : public IQueryEnvironment Properties _properties; std::vector _locations; search::attribute::IAttributeContext::UP _attrCtx; - std::unordered_map _avg_field_lengths; + std::unordered_map _field_length_info; public: /** @@ -48,12 +48,12 @@ class QueryEnvironment : public IQueryEnvironment return locations; } const search::attribute::IAttributeContext &getAttributeContext() const override { return *_attrCtx; } - double get_average_field_length(const std::string& field_name) const override { - auto itr = _avg_field_lengths.find(field_name); - if (itr != _avg_field_lengths.end()) { + index::FieldLengthInfo get_field_length_info(const std::string& field_name) const override { + auto itr = _field_length_info.find(field_name); + if (itr != _field_length_info.end()) { return itr->second; } - return 1.0; + return index::FieldLengthInfo(1.0, 1.0, 1); } const IIndexEnvironment &getIndexEnvironment() const override { assert(_indexEnv != NULL); return *_indexEnv; } @@ -92,7 +92,7 @@ class QueryEnvironment : public IQueryEnvironment /** Returns a reference to the location of this. */ void addLocation(const GeoLocationSpec &spec) { _locations.push_back(spec); } - std::unordered_map& get_avg_field_lengths() { return _avg_field_lengths; } + std::unordered_map& get_field_length_info_map() { return _field_length_info; } }; } diff --git a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp index d6912867eaa2..a168ab713682 100644 --- a/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp +++ b/searchlib/src/vespa/searchlib/fef/test/queryenvironmentbuilder.cpp @@ -79,7 +79,7 @@ QueryEnvironmentBuilder::add_node(const FieldInfo &info) QueryEnvironmentBuilder& QueryEnvironmentBuilder::set_avg_field_length(const std::string& field_name, double avg_field_length) { - _queryEnv.get_avg_field_lengths()[field_name] = avg_field_length; + _queryEnv.get_field_length_info_map()[field_name] = index::FieldLengthInfo(avg_field_length, avg_field_length, 1); return *this; } diff --git a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h index 6db53bbf4c04..f744c4a916af 100644 --- a/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h +++ b/streamingvisitors/src/vespa/searchvisitor/queryenvironment.h @@ -46,7 +46,9 @@ class QueryEnvironment : public search::fef::IQueryEnvironment GeoLocationSpecPtrs getAllLocations() const override; const search::attribute::IAttributeContext & getAttributeContext() const override { return *_attrCtx; } - double get_average_field_length(const std::string &) const override { return 100.0; } + search::index::FieldLengthInfo get_field_length_info(const std::string &) const override { + return search::index::FieldLengthInfo(100.0, 100.0, 1); + } const search::fef::IIndexEnvironment & getIndexEnvironment() const override { return _indexEnv; } void addTerm(const search::fef::ITermData *term) { _queryTerms.push_back(term); }