@@ -4,6 +4,7 @@ class SearchItemRes
44
55 @@count = [ "hits" , "total" ]
66 @@facets = [ "aggregations" ]
7+ @@facets_label = [ "top_matches" , "hits" , "hits" , "_source" ]
78 @@item = [ "hits" , "hits" , 0 , "_source" ]
89 @@items = [ "hits" , "hits" ]
910
@@ -18,9 +19,10 @@ def build_response
1819 items = combine_highlights
1920 facets = reformat_facets
2021
21- return {
22+ {
2223 "code" => 200 ,
2324 "count" => count ,
25+ "api_version" => Api ::Application ::VERSION ,
2426 "facets" => facets ,
2527 "items" => items ,
2628 }
@@ -29,45 +31,83 @@ def build_response
2931 def combine_highlights
3032 hits = @body . dig ( *@@items )
3133 if hits
32- return hits . map do |hit |
34+ hits . map do |hit |
3335 hit [ "_source" ] [ "highlight" ] = hit [ "highlight" ] || { }
3436 hit [ "_source" ]
3537 end
3638 else
37- return [ ]
39+ [ ]
3840 end
3941 end
4042
43+ def find_source_from_top_hits ( top_hits , field , key )
44+ # elasticsearch stores nested source results without the "path"
45+ nested_child = field . split ( "." ) . last
46+ hit = top_hits . first . dig ( "_source" , nested_child )
47+ # if this is a multivalued field (for example: works or places),
48+ # ALL of the values come back as the source, but we only want
49+ # the single value from which the key was derived
50+ if hit . class == Array
51+ # I don't love this, because we will have to match exactly the logic
52+ # that got us the key to get this to work
53+ match_index = hit
54+ . map { |s | remove_nonword_chars ( s ) }
55+ . index ( remove_nonword_chars ( key ) )
56+ # if nothing matches the original key, return the entire source hit
57+ # should return a string, regardless
58+ return match_index ? hit [ match_index ] : hit . join ( " " )
59+ else
60+ # it must be single-valued and therefore we are good to go
61+ return hit
62+ end
63+ end
64+
65+ def format_bucket_value ( facets , field , bucket )
66+ # dates return in wonktastic ways, so grab key_as_string instead of gibberish number
67+ # but otherwise just grab the key if key_as_string unavailable
68+ key = bucket . key? ( "key_as_string" ) ? bucket [ "key_as_string" ] : bucket [ "key" ]
69+ val = bucket [ "doc_count" ]
70+ source = key
71+ # top_matches is a top_hits aggregation which returns a list of terms
72+ # which were used for the facet.
73+ # Example: "Willa Cather" and "WILLA CATHER"
74+ # Those terms will both have been normalized as "willa cather" but
75+ # we will want to display one of the non-normalized terms instead
76+ top_hits = bucket . dig ( "top_matches" , "hits" , "hits" )
77+ if top_hits
78+ source = find_source_from_top_hits ( top_hits , field , key )
79+ end
80+ facets [ field ] [ key ] = {
81+ "num" => val ,
82+ "source" => source
83+ }
84+ end
85+
4186 def reformat_facets
42- facets = @body . dig ( *@@facets )
43- if facets
44- formatted = { }
45- facets . each do |field , info |
46- formatted [ field ] = { }
47- buckets = { }
48- # nested fields do not have buckets
49- # at this level in the response structure
50- if info . has_key? ( "buckets" )
51- buckets = info [ "buckets" ]
52- else
53- buckets = info . dig ( field , "buckets" )
54- end
87+ raw_facets = @body . dig ( *@@facets )
88+ if raw_facets
89+ facets = { }
90+ raw_facets . each do |field , info |
91+ facets [ field ] = { }
92+ # nested fields do not have buckets at this level of response structure
93+ buckets = info . key? ( "buckets" ) ? info [ "buckets" ] : info . dig ( field , "buckets" )
5594 if buckets
56- buckets . each do |b |
57- # dates return in wonktastic ways, so grab key_as_string instead of gibberish number
58- # but otherwise just grab the key if key_as_string unavailable
59- key = b . has_key? ( "key_as_string" ) ? b [ "key_as_string" ] : b [ "key" ]
60- val = b [ "doc_count" ]
61- formatted [ field ] [ key ] = val
62- end
95+ buckets . each { |b | format_bucket_value ( facets , field , b ) }
6396 else
64- formatted [ field ] = { }
97+ facets [ field ] = { }
6598 end
6699 end
67- return formatted
100+ facets
68101 else
69- return { }
102+ { }
70103 end
71104 end
72105
106+ def remove_nonword_chars ( term )
107+ # transliterate to ascii (ø -> o)
108+ transliterated = I18n . transliterate ( term )
109+ # remove html tags like em, u, and strong, then strip remaining non-alpha characters
110+ transliterated . gsub ( /<\/ ?(?:em|strong|u)>|\W / , "" ) . downcase
111+ end
112+
73113end
0 commit comments