Skip to content

Commit 4e80f6e

Browse files
authored
use filters when searching for matching topics in agenda (#871)
* use filters when searching for matching topics in agenda CPCN-745
1 parent c0e062f commit 4e80f6e

7 files changed

Lines changed: 154 additions & 44 deletions

File tree

newsroom/agenda/agenda.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
save_user_notifications,
3131
UserNotification,
3232
)
33+
from newsroom.search import BoolQuery, BoolQueryParams
3334
from newsroom.template_filters import is_admin_or_internal, is_admin
3435
from newsroom.utils import (
3536
get_user_dict,
@@ -501,7 +502,7 @@ def nested_query(path, query, inner_hits=True, name=None):
501502
planning_filters = coverage_filters + ["agendas"]
502503

503504

504-
def _filter_terms(filters, item_type):
505+
def _filter_terms(filters, item_type, highlights=False):
505506
must_term_filters = []
506507
must_not_term_filters = []
507508
for key, val in filters.items():
@@ -623,6 +624,7 @@ def _filter_terms(filters, item_type):
623624
path="planning_items",
624625
query={"bool": {"filter": [{"terms": {f"planning_items.{agg_field}": val}}]}},
625626
name=key,
627+
inner_hits=highlights,
626628
),
627629
],
628630
},
@@ -864,12 +866,11 @@ def prefill_search_items(self, search):
864866

865867
pass
866868

867-
def apply_filters(self, search, section_filters=None):
869+
def apply_filters(self, search: SearchQuery, section_filters=None):
868870
"""Generate and apply the different search filters
869871
870872
:param newsroom.search.SearchQuery search: the search query instance
871873
"""
872-
873874
# First construct the product query
874875
self.apply_company_filter(search)
875876

@@ -913,8 +914,8 @@ def apply_filters(self, search, section_filters=None):
913914
{
914915
# Match Events before ``item_type`` field was added
915916
"bool": {
916-
"must_not": {"exists": {"field": "item_type"}},
917-
"filter": {"exists": {"field": "event_id"}},
917+
"must_not": [{"exists": {"field": "item_type"}}],
918+
"filter": [{"exists": {"field": "event_id"}}],
918919
},
919920
},
920921
],
@@ -949,12 +950,12 @@ def apply_filters(self, search, section_filters=None):
949950
{
950951
"bool": {
951952
"should": [
952-
{"bool": {"must_not": {"exists": {"field": "item_type"}}}},
953+
{"bool": {"must_not": [{"exists": {"field": "item_type"}}]}},
953954
{"term": {"item_type": "event"}},
954955
{
955956
"bool": {
956-
"filter": {"term": {"item_type": "planning"}},
957-
"must_not": {"exists": {"field": "event_id"}},
957+
"filter": [{"term": {"item_type": "planning"}}],
958+
"must_not": [{"exists": {"field": "event_id"}}],
958959
},
959960
},
960961
],
@@ -979,14 +980,14 @@ def apply_product_filter(self, search, product):
979980
if product.get("planning_item_query") and search.item_type != "events":
980981
search.planning_items_should.append(planning_items_query_string(product.get("planning_item_query")))
981982

982-
def apply_request_filter(self, search):
983+
def apply_request_filter(self, search: SearchQuery, highlights=False):
983984
"""Generate the request filters
984985
985986
:param newsroom.search.SearchQuery search: The search query instance
986987
"""
987988

988989
if search.args.get("q"):
989-
test_query = {"bool": {"should": []}}
990+
test_query: BoolQuery = {"bool": {"should": []}}
990991
try:
991992
q = json.loads(search.args.get("q"))
992993
if isinstance(q, dict):
@@ -1014,17 +1015,21 @@ def apply_request_filter(self, search):
10141015
)
10151016

10161017
if search.args.get("id"):
1017-
search.query["bool"]["filter"].append({"term": {"_id": search.args["id"]}})
1018+
search.query["bool"]["filter"].append({"ids": {"values": [search.args["id"]]}})
10181019

10191020
if search.args.get("ids"):
1020-
search.query["bool"]["filter"].append({"terms": {"_id": search.args["ids"]}})
1021+
search.query["bool"]["filter"].append({"ids": {"values": search.args["ids"]}})
10211022

10221023
if search.args.get("bookmarks"):
10231024
set_saved_items_query(search.query, search.args["bookmarks"])
10241025

10251026
if search.args.get("date_from") or search.args.get("date_to"):
10261027
_set_event_date_range(search)
10271028

1029+
filters = self.parse_filters(search)
1030+
if filters:
1031+
self.set_bool_query_from_filters(search.query["bool"], filters, search.item_type, highlights=highlights)
1032+
10281033
self.apply_request_advanced_search(search)
10291034

10301035
def set_post_filter(self, source: Dict[str, Any], req: ParsedRequest, item_type: Optional[str] = None):
@@ -1038,8 +1043,6 @@ def set_post_filter(self, source: Dict[str, Any], req: ParsedRequest, item_type:
10381043
if app.config.get("FILTER_BY_POST_FILTER", False):
10391044
source["post_filter"] = {"bool": {}}
10401045
self.set_bool_query_from_filters(source["post_filter"]["bool"], filters, item_type)
1041-
else:
1042-
self.set_bool_query_from_filters(source["query"]["bool"], filters, item_type)
10431046

10441047
def gen_source_from_search(self, search):
10451048
"""Generate the eve source object from the search query instance
@@ -1277,9 +1280,13 @@ def update_coverage_details(coverage):
12771280
return agenda_items
12781281

12791282
def set_bool_query_from_filters(
1280-
self, bool_query: Dict[str, Any], filters: Dict[str, Any], item_type: Optional[str] = None
1283+
self,
1284+
bool_query: BoolQueryParams,
1285+
filters: Dict[str, Any],
1286+
item_type: Optional[str] = None,
1287+
highlights=False,
12811288
):
1282-
filter_terms = _filter_terms(filters, item_type)
1289+
filter_terms = _filter_terms(filters, item_type, highlights=highlights)
12831290
bool_query.setdefault("filter", [])
12841291
bool_query["filter"] += filter_terms["must_term_filters"]
12851292

newsroom/news_api/news/search_service.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ def validate_unknown_params(self, search, whitelist, allow_filtering=True):
318318
desc = "Multiple values received for parameter ({})"
319319
raise UnexpectedParameterError(desc=desc.format(param_name))
320320

321-
def apply_request_filter(self, search):
321+
def apply_request_filter(self, search, highlights=True):
322322
"""Generate the filters from request args
323323
324324
:param newsroom.search.SearchQuery search: The search query instance

newsroom/search/__init__.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from flask import render_template
2+
from typing import Dict, List, Literal, TypedDict, Union
23

34
from newsroom.gettext import get_session_locale
45
from newsroom.email import get_language_template_name
@@ -13,3 +14,73 @@ def render_search_tips_html(search_type) -> str:
1314
template_name = get_language_template_name(f"search_tips_{search_type}", locale, "html")
1415

1516
return render_template(template_name)
17+
18+
19+
class QueryStringParams(TypedDict, total=False):
20+
query: str
21+
default_operator: Literal["AND", "OR"]
22+
analyze_wildcard: bool
23+
lenient: bool
24+
fields: List[str]
25+
type: Literal["cross_fields", "best_fields"]
26+
27+
28+
Value = Union[str, bool, int]
29+
30+
31+
class QueryStringQuery(TypedDict):
32+
query_string: QueryStringParams
33+
34+
35+
class ExistsQueryParam(TypedDict):
36+
field: str
37+
38+
39+
class ExistsQuery(TypedDict):
40+
exists: ExistsQueryParam
41+
42+
43+
class MatchQuery(TypedDict):
44+
match: Dict[str, Value]
45+
46+
47+
class TermQuery(TypedDict):
48+
term: Dict[str, Value]
49+
50+
51+
class TermsQuery(TypedDict):
52+
terms: Dict[str, List[Value]]
53+
54+
55+
class IDsQuery(TypedDict):
56+
ids: Dict[Literal["values"], List[str]]
57+
58+
59+
class RangeQuery(TypedDict):
60+
range: Dict[str, Dict[Literal["gte", "gt", "lte", "lt"], Value]]
61+
62+
63+
class BoolQueryParams(TypedDict, total=False):
64+
must: List["Query"]
65+
must_not: List["Query"]
66+
should: List["Query"]
67+
filter: List["Query"]
68+
minimum_should_match: int
69+
70+
71+
class BoolQuery(TypedDict):
72+
bool: BoolQueryParams
73+
74+
75+
class NestedQueryParams(TypedDict):
76+
path: str
77+
query: Union[TermQuery, BoolQuery]
78+
79+
80+
class NestedQuery(TypedDict):
81+
nested: NestedQueryParams
82+
83+
84+
TermLevelQuery = Union[TermQuery, TermsQuery, IDsQuery, ExistsQuery, RangeQuery]
85+
FullTextQuery = Union[QueryStringQuery, MatchQuery]
86+
Query = Union[TermLevelQuery, FullTextQuery, BoolQuery, NestedQuery]

newsroom/search/service.py

Lines changed: 36 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from newsroom import Service
1515
from newsroom.auth.utils import user_has_section_allowed
16+
from newsroom.search import BoolQuery, BoolQueryParams, QueryStringQuery
1617
from newsroom.search.config import (
1718
SearchGroupNestedConfig,
1819
get_nested_config,
@@ -48,7 +49,7 @@ def query_string(
4849
fields: List[str] = ["*"],
4950
multimatch_type: Literal["cross_fields", "best_fields"] = "cross_fields",
5051
analyze_wildcard=False,
51-
):
52+
) -> QueryStringQuery:
5253
query_string_settings = app.config["ELASTICSEARCH_SETTINGS"]["settings"]["query_string"]
5354
return {
5455
"query_string": {
@@ -106,6 +107,16 @@ class AdvancedSearchParams(TypedDict):
106107
fields: List[str]
107108

108109

110+
class SearchArgs(TypedDict, total=False):
111+
q: str
112+
id: str
113+
ids: List[str]
114+
size: int
115+
bookmarks: str
116+
ignore_latest: bool
117+
filter: Union[Dict[str, str], str]
118+
119+
109120
class SearchQuery(object):
110121
"""Class for storing the search parameters for validation and query generation"""
111122

@@ -120,14 +131,14 @@ def __init__(self):
120131
self.requested_products = []
121132
self.advanced: Optional[AdvancedSearchParams] = None
122133

123-
self.args = {}
134+
self.args: SearchArgs = {}
124135
self.lookup = {}
125136
self.projections = {}
126137
self.req = None
127138

128139
self.aggs = None
129140
self.source = {}
130-
self.query = {"bool": {"filter": [], "must": [], "must_not": [], "should": []}}
141+
self.query: BoolQuery = {"bool": {"filter": [], "must": [], "must_not": [], "should": []}}
131142
self.highlight = None
132143
self.item_type = None
133144
self.planning_items_should = []
@@ -199,7 +210,7 @@ def _search_all_versions(self, search: SearchQuery, req, lookup):
199210

200211
# Now run a query only using the IDs from the above search
201212
# This final search makes sure pagination still works
202-
search.query["bool"] = {"filter": {"terms": {"_id": next_item_ids}}}
213+
search.query["bool"] = {"filter": [{"ids": {"values": next_item_ids}}]}
203214
self.gen_source_from_search(search)
204215
internal_req = self.get_internal_request(search)
205216
res = self.internal_get(internal_req, search.lookup)
@@ -330,10 +341,11 @@ def get_internal_request(self, search):
330341

331342
return internal_req
332343

333-
def set_bool_query_from_filters(self, bool_query: Dict[str, Any], filters: Dict[str, Any]):
344+
def set_bool_query_from_filters(self, bool_query: BoolQueryParams, filters: Dict[str, Any]) -> None:
334345
for key, val in filters.items():
335346
if not val:
336347
continue
348+
bool_query.setdefault("must", [])
337349
bool_query["must"].append(
338350
get_filter_query(key, val, self.get_aggregation_field(key), get_nested_config("items", key))
339351
)
@@ -576,7 +588,7 @@ def prefill_search_highlights(self, search, req):
576588
highlight_search.advanced = deepcopy(search.advanced)
577589

578590
# Set up the search query for filtering
579-
self.apply_request_filter(highlight_search)
591+
self.apply_request_filter(highlight_search, highlights=True)
580592

581593
# Set up highlighting settings
582594
highlight_search.source.setdefault("highlight", {})
@@ -742,31 +754,34 @@ def get_product_filter(self, search, product):
742754
if product.get("query"):
743755
return self.query_string(product["query"])
744756

745-
def apply_request_filter(self, search):
757+
def parse_filters(self, search: SearchQuery) -> Optional[Dict[str, Any]]:
758+
if search.args.get("filter"):
759+
if isinstance(search.args["filter"], dict):
760+
return search.args["filter"]
761+
else:
762+
try:
763+
return json.loads(search.args["filter"])
764+
except TypeError:
765+
raise BadParameterValueError("Incorrect type supplied for filter parameter")
766+
return None
767+
768+
def apply_request_filter(self, search: SearchQuery, highlights=False) -> None:
746769
if search.args.get("q"):
747770
search.query["bool"].setdefault("must", []).append(
748771
self.query_string(search.args["q"], search.args.get("default_operator") or "AND")
749772
)
750773

751774
if search.args.get("ids"):
752-
search.query["bool"]["must"].append({"terms": {"_id": search.args["ids"]}})
775+
search.query["bool"]["must"].append({"ids": {"values": search.args["ids"]}})
753776

754-
filters = None
755-
if search.args.get("filter"):
756-
if isinstance(search.args["filter"], dict):
757-
filters = search.args["filter"]
758-
else:
759-
try:
760-
filters = json.loads(search.args["filter"])
761-
except TypeError:
762-
raise BadParameterValueError("Incorrect type supplied for filter parameter")
777+
filters = self.parse_filters(search)
763778

764779
if not app.config.get("FILTER_BY_POST_FILTER", False):
765780
if filters:
766781
if app.config.get("FILTER_AGGREGATIONS", True):
767782
self.set_bool_query_from_filters(search.query["bool"], filters)
768-
else:
769-
search.query["bool"]["must"].append(filters)
783+
elif isinstance(filters, dict):
784+
search.query["bool"]["must"].append(filters) # type: ignore
770785

771786
if search.args.get("created_from") or search.args.get("created_to"):
772787
search.query["bool"]["must"].append(self.versioncreated_range(search.args))
@@ -906,7 +921,7 @@ def get_matching_topics_for_item(self, topics, users, companies, query):
906921

907922
return topic_matches
908923

909-
def apply_topic_args(self, topic, args=None):
924+
def apply_topic_args(self, topic, args=None) -> SearchArgs:
910925
if args is None:
911926
args = {}
912927

@@ -974,7 +989,7 @@ def get_items_by_query(self, search, size=10, aggs=None):
974989
internal_req = self.get_internal_request(search)
975990
return self.internal_get(internal_req, search.lookup)
976991

977-
def query_string(self, query, default_operator="AND"):
992+
def query_string(self, query, default_operator="AND") -> QueryStringQuery:
978993
fields_config_key = "WIRE_SEARCH_FIELDS" if self.section == "wire" else "AGENDA_SEARCH_FIELDS"
979994
fields = app.config.get(fields_config_key, ["*"])
980995
return query_string(query, default_operator=default_operator, fields=fields)

newsroom/wire/search.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def get_product_items(self, product_id: str, size: int, exclude_embargoed: bool
162162
"bool": {
163163
"should": [
164164
{"range": {"embargoed": {"lt": "now"}}},
165-
{"bool": {"must_not": {"exists": {"field": "embargoed"}}}},
165+
{"bool": {"must_not": [{"exists": {"field": "embargoed"}}]}},
166166
]
167167
}
168168
}
@@ -269,13 +269,13 @@ def has_permissions(self, item, ignore_latest=False):
269269
except Forbidden:
270270
return False
271271

272-
def apply_request_filter(self, search):
272+
def apply_request_filter(self, search, highlights=True):
273273
"""Generate the filters from request args
274274
275275
:param newsroom.search.SearchQuery search: The search query instance
276276
"""
277277

278-
super().apply_request_filter(search)
278+
super().apply_request_filter(search, highlights=highlights)
279279

280280
if search.args.get("bookmarks"):
281281
set_bookmarks_query(search.query, search.args["bookmarks"])

0 commit comments

Comments
 (0)