diff --git a/.gitignore b/.gitignore index 76fb9c4..a3bd71e 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,5 @@ bower.json .byebug_history .DS_Store + +/config/master.key diff --git a/.ruby-gemset b/.ruby-gemset index eedd89b..fcf5595 100644 --- a/.ruby-gemset +++ b/.ruby-gemset @@ -1 +1 @@ -api +api-v2 diff --git a/.ruby-version b/.ruby-version index 324db8d..4e34c4d 100644 --- a/.ruby-version +++ b/.ruby-version @@ -1 +1 @@ -ruby-2.6.8 +ruby-2.7.6 diff --git a/Gemfile b/Gemfile index 88ac8b9..4170559 100644 --- a/Gemfile +++ b/Gemfile @@ -11,7 +11,7 @@ gem 'rails', '~> 6.0.2' # Use sqlite3 as the database for Active Record gem 'sqlite3' # Use Puma as the app server -gem 'puma', '~> 3.7' +gem 'puma', '>= 5.6' # Build JSON APIs with ease. Read more: https://github.com/rails/jbuilder # gem 'jbuilder', '~> 2.5' # Use Redis adapter to run Action Cable in production diff --git a/Gemfile.lock b/Gemfile.lock index c38a145..3f1cd9c 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -69,7 +69,7 @@ GEM globalid (1.0.0) activesupport (>= 5.0) http-accept (1.7.0) - http-cookie (1.0.4) + http-cookie (1.0.5) domain_name (~> 0.5) i18n (1.10.0) concurrent-ruby (~> 1.0) @@ -96,7 +96,8 @@ GEM nokogiri (1.13.6) mini_portile2 (~> 2.8.0) racc (~> 1.4) - puma (3.12.6) + puma (5.6.4) + nio4r (~> 2.0) racc (1.6.0) rack (2.2.3) rack-test (1.1.0) @@ -168,7 +169,7 @@ DEPENDENCIES bootsnap byebug listen (>= 3.0.5, < 3.2) - puma (~> 3.7) + puma (>= 5.6) rails (~> 6.0.2) rest-client (>= 2.1.0.rc1, < 2.2) spring diff --git a/app/controllers/application_controller.rb b/app/controllers/application_controller.rb index 15ff0d8..e97e006 100644 --- a/app/controllers/application_controller.rb +++ b/app/controllers/application_controller.rb @@ -3,7 +3,8 @@ class ApplicationController < ActionController::API def post_search(json, error_method=method(:display_error)) - res = RestClient.post("#{ES_URI}/_search", json.to_json, { "content-type" => "json" }) + auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{ES_USER}:#{ES_PASSWORD}")}" } + res = RestClient.post("#{ES_URI}/_search", json.to_json, auth_hash.merge({ "content-type" => "json" })) raise return JSON.parse(res.body) rescue => e diff --git a/app/services/search_item_req.rb b/app/services/search_item_req.rb index e9260c0..56e6453 100644 --- a/app/services/search_item_req.rb +++ b/app/services/search_item_req.rb @@ -51,6 +51,8 @@ def build_request # add bool to request body req["query"]["bool"] = bool + # uncomment below line to log ES query for debugging + # puts req.to_json() return req end @@ -72,7 +74,7 @@ def facets dir = "desc" if @params["facet_sort"].present? sort_type, sort_dir = @params["facet_sort"].split(@@filter_separator) - type = "_term" if sort_type == "term" + type = "term" if sort_type == "term" dir = sort_dir if sort_dir == "asc" end @@ -83,8 +85,7 @@ def facets aggs = {} Array.wrap(@params["facet"]).each do |f| # histograms use a different ordering terminology than normal aggs - f_type = type == "_term" ? "_key" : "_count" - + f_type = (type == "term") ? "_key" : "_count" if f.include?("date") || f[/_d$/] # NOTE: if nested fields will ever have dates we will # need to refactor this to be available to both @@ -98,13 +99,76 @@ def facets aggs[f] = { "date_histogram" => { "field" => field, - "interval" => interval, + "calendar_interval" => interval, "format" => formatted, "min_doc_count" => 1, "order" => { f_type => dir }, } } - # if nested, has extra syntax + #nested facet, matching on another nested facet + + elsif f.include?("[") + # will be an array including the original, and an alternate aggregation name + + + options = JSON.parse(f) + original = options[0] + agg_name = options[1] + facet = original.split("[")[0] + # may or may not be nested + nested = facet.include?(".") + if nested + path = facet.split(".").first + end + condition = original[/(?<=\[).+?(?=\])/] + subject = condition.split("#").first + predicate = condition.split("#").last + aggregation = { + # common to nested and non-nested + "filter" => { + "term" => { + subject => predicate + } + }, + "aggs" => { + agg_name => { + "terms" => { + "field" => facet, + "order" => {f_type => dir}, + "size" => size + }, + "aggs" => { + "field_to_item" => { + "reverse_nested" => {}, + "aggs" => { + "top_matches" => { + "top_hits" => { + "_source" => { + "includes" => [ agg_name ] + }, + "size" => 1 + } + } + } + } + } + } + } + } + #interpolate above hash into nested query + if nested + aggs[agg_name] = { + "nested" => { + "path" => path + }, + "aggs" => { + agg_name => aggregation + } + } + else + #otherwise it is the whole query + aggs[agg_name] = aggregation + end elsif f.include?(".") path = f.split(".").first aggs[f] = { @@ -115,7 +179,7 @@ def facets f => { "terms" => { "field" => f, - "order" => { type => dir }, + "order" => {f_type => dir}, "size" => size }, "aggs" => { @@ -135,7 +199,7 @@ def facets aggs[f] = { "terms" => { "field" => f, - "order" => { type => dir }, + "order" => { f_type => dir }, "size" => size }, "aggs" => { @@ -161,8 +225,43 @@ def filters # (type 2 will only be used for dates) filters = fields.map {|f| f.split(@@filter_separator, 3) } filters.each do |filter| - # NESTED FIELD FILTER - if filter[0].include?(".") + # filter aggregation with nesting + if filter[0].include?("[") + original = filter[0] + facet = original.split("[")[0] + nested = facet.include?(".") + if nested + path = facet.split(".").first + end + condition = original[/(?<=\[).+?(?=\])/] + subject = condition.split("#").first + predicate = condition.split("#").last + term_match = { + # "person.name" => "oliver wendell holmes" + # Remove CR's added by hidden input field values with returns + facet => filter[1].gsub(/\r/, "") + } + term_filter = { + subject => predicate + } + if nested + query = { + "nested" => { + "path" => path, + "query" => { + "bool" => { + "must" => [ + { "match" => term_filter }, + { "match" => term_match } + ] + } + } + } + } + end + filter_list << query + #ordinary nested facet + elsif filter[0].include?(".") path = filter[0].split(".").first # this is a nested field and must be treated differently nested = { diff --git a/app/services/search_item_res.rb b/app/services/search_item_res.rb index a82a199..05210e7 100644 --- a/app/services/search_item_res.rb +++ b/app/services/search_item_res.rb @@ -18,7 +18,6 @@ def build_response # strip out only the fields for the item response items = combine_highlights facets = reformat_facets - { "code" => 200, "count" => count, @@ -66,7 +65,7 @@ def format_bucket_value(facets, field, bucket) # dates return in wonktastic ways, so grab key_as_string instead of gibberish number # but otherwise just grab the key if key_as_string unavailable key = bucket.key?("key_as_string") ? bucket["key_as_string"] : bucket["key"] - val = bucket["doc_count"] + val = bucket.key?("field_to_item") ? bucket["field_to_item"]["doc_count"] : bucket["doc_count"] source = key # top_matches is a top_hits aggregation which returns a list of terms # which were used for the facet. @@ -89,8 +88,7 @@ def reformat_facets facets = {} raw_facets.each do |field, info| facets[field] = {} - # nested fields do not have buckets at this level of response structure - buckets = info.key?("buckets") ? info["buckets"] : info.dig(field, "buckets") + buckets = get_buckets(info, field) if buckets buckets.each { |b| format_bucket_value(facets, field, b) } else @@ -110,4 +108,18 @@ def remove_nonword_chars(term) transliterated.gsub(/<\/?(?:em|strong|u)>|\W/, "").downcase end + def get_buckets(info, field) + buckets = nil + # ordinary facet + if info.key?("buckets") + buckets = info["buckets"] + # nested facet + elsif info.dig(field, "buckets") + buckets = info.dig(field, "buckets") + # filtered facet + else + buckets = info.dig(field, field, "buckets") + end + buckets + end end diff --git a/app/services/search_service.rb b/app/services/search_service.rb index dbd8877..319b97b 100644 --- a/app/services/search_service.rb +++ b/app/services/search_service.rb @@ -11,7 +11,8 @@ def initialize(url, params={}, user_req) end def post(url_ending, json) - res = RestClient.post("#{@url}/#{url_ending}", json.to_json, { "content-type" => "json" } ) + auth_hash = { "Authorization" => "Basic #{Base64::encode64("#{Rails.application.credentials.elasticsearch[:user]}:#{Rails.application.credentials.elasticsearch[:password]}")}" } + res = RestClient.post("#{@url}/#{url_ending}", json.to_json, auth_hash.merge({ "content-type" => "json" } )) JSON.parse(res.body) rescue => e e diff --git a/config/environments/development.rb b/config/environments/development.rb index 1e22e09..14a0a40 100644 --- a/config/environments/development.rb +++ b/config/environments/development.rb @@ -61,4 +61,5 @@ # CDRH CONFIGURATION config.hosts << "cdrhdev1.unl.edu" + config.hosts << "whitman-dev.unl.edu" end diff --git a/test/services/search_item_req_test.rb b/test/services/search_item_req_test.rb index 29bf323..c4d8197 100644 --- a/test/services/search_item_req_test.rb +++ b/test/services/search_item_req_test.rb @@ -44,7 +44,7 @@ def test_facets "facet" => [ "title", "subcategory" ] }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"asc"}, "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>{"_term"=>"asc"}, "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["subcategory"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"asc", "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "subcategory"=>{"terms"=>{"field"=>"subcategory", "order"=>"asc", "size"=>10}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["subcategory"]}, "size"=>1}}}}}, facets ) @@ -69,7 +69,7 @@ def test_facets "facet" => [ "creator.name" ] }).facets assert_equal( - {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["creator.name"]}, "size"=>1}}}}}}}, + {"creator.name"=>{"nested"=>{"path"=>"creator"}, "aggs"=>{"creator.name"=>{"terms"=>{"field"=>"creator.name", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["creator.name"]}, "size"=>1}}}}}}}, facets ) @@ -83,14 +83,14 @@ def test_facets # sort term order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term|desc" }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, facets ) # sort term no order specified facets = SearchItemReq.new({ "facet" => ["title", "format"], "facet_sort" => "term" }).facets assert_equal( - {"title"=>{"terms"=>{"field"=>"title", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>{"_term"=>"desc"}, "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, + {"title"=>{"terms"=>{"field"=>"title", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["title"]}, "size"=>1}}}}, "format"=>{"terms"=>{"field"=>"format", "order"=>"desc", "size"=>20}, "aggs"=>{"top_matches"=>{"top_hits"=>{"_source"=>{"includes"=>["format"]}, "size"=>1}}}}}, facets )