From c49164bd1ca81b48155de735730795fd24f690f4 Mon Sep 17 00:00:00 2001 From: Benjamin Armintor Date: Tue, 12 Dec 2023 18:55:56 -0500 Subject: [PATCH] extract a concern for scrubbing resolvers and include in Site (DLC-1052) --- .../concerns/solr_document/clean_resolver.rb | 17 ++++++++++++++ app/models/site.rb | 6 +++++ app/models/solr_document.rb | 17 +------------- spec/models/site_spec.rb | 23 +++++++++++++++++++ 4 files changed, 47 insertions(+), 16 deletions(-) create mode 100644 app/models/concerns/solr_document/clean_resolver.rb diff --git a/app/models/concerns/solr_document/clean_resolver.rb b/app/models/concerns/solr_document/clean_resolver.rb new file mode 100644 index 00000000..b0e7fd61 --- /dev/null +++ b/app/models/concerns/solr_document/clean_resolver.rb @@ -0,0 +1,17 @@ +module SolrDocument::CleanResolver + # Scrub permanent links from catalog data to use modern resolver syntax + # @param perma_link [String] the original link + # @return [String] link with cgi version of resolver replaced with modern version + def clean_resolver(link_src) + if link_src + link_uri = URI(link_src) + if link_uri.path == "/cgi-bin/cul/resolve" && link_uri.host == "www.columbia.edu" + return "https://resolver.library.columbia.edu/#{link_uri.query}" + end + if link_uri.host == "library.columbia.edu" && link_uri.path =~ /^\/resolve\/([^\/]+)/ + return "https://resolver.library.columbia.edu/#{$1}" + end + end + link_src + end +end diff --git a/app/models/site.rb b/app/models/site.rb index 551e03a9..c2a0e530 100644 --- a/app/models/site.rb +++ b/app/models/site.rb @@ -2,6 +2,7 @@ class Site < ApplicationRecord include Dcv::Sites::Constants include Blacklight::Configurable + include SolrDocument::CleanResolver has_many :scope_filters, as: :scopeable has_many :nav_links, dependent: :destroy, inverse_of: :site has_many :site_pages, dependent: :destroy @@ -181,6 +182,11 @@ def watermark_url watermark_uploader.store_path.sub(File.join(Rails.root, 'public'), '') end + # scrub CUL resolvers for format, or pass through + def persistent_url + clean_resolver(super) + end + def to_subsite_config config = { 'slug' => slug, 'restricted' => (slug =~ /restricted/).present?, 'palette' => palette, 'layout' => layout, 'scope_constraints' => constraints diff --git a/app/models/solr_document.rb b/app/models/solr_document.rb index e025433c..9039fe21 100644 --- a/app/models/solr_document.rb +++ b/app/models/solr_document.rb @@ -15,6 +15,7 @@ class SolrDocument RESOURCE_MODEL = 'GenericResource' include Blacklight::Solr::Document + include SolrDocument::CleanResolver include SolrDocument::FieldSemantics include SolrDocument::OpenUrlContext include SolrDocument::PublicationInfo @@ -89,22 +90,6 @@ def schema_image_identifier end end - # Scrub permanent links from catalog data to use modern resolver syntax - # @param perma_link [String] the original link - # @return [String] link with cgi version of resolver replaced with modern version - def clean_resolver(link_src) - if link_src - link_uri = URI(link_src) - if link_uri.path == "/cgi-bin/cul/resolve" && link_uri.host == "www.columbia.edu" - return "https://resolver.library.columbia.edu/#{link_uri.query}" - end - if link_uri.host == "library.columbia.edu" && link_uri.path =~ /^\/resolve\/([^\/]+)/ - return "https://resolver.library.columbia.edu/#{$1}" - end - end - link_src - end - def slug if self[:restriction_ssim].present? Array(self[:slug_ssim]).compact.map { |val| "restricted/#{val}" }.first diff --git a/spec/models/site_spec.rb b/spec/models/site_spec.rb index b3e796d9..1b5950d2 100644 --- a/spec/models/site_spec.rb +++ b/spec/models/site_spec.rb @@ -249,4 +249,27 @@ end end end + describe '#persistent_url' do + let(:site_slug) { 'persistent_url' } + let(:rkey) { 'lweb0138' } + let(:cgi_http) { "http://www.columbia.edu/cgi-bin/cul/resolve?#{rkey}" } + let(:cgi_https) { "https://www.columbia.edu/cgi-bin/cul/resolve?#{rkey}" } + let(:lweb_http) { "https://library.columbia.edu/resolve/#{rkey}" } + let(:lweb_https) { "https://library.columbia.edu/resolve/#{rkey}" } + let(:current_https) { "https://resolver.library.columbia.edu/#{rkey}" } + let(:na_https) { "https://nothing.library.columbia.edu/#{rkey}" } + it "cleans resolvers" do + site.persistent_url = cgi_http + expect(site.persistent_url).to eql(current_https) + site.persistent_url = cgi_https + expect(site.persistent_url).to eql(current_https) + site.persistent_url = lweb_http + expect(site.persistent_url).to eql(current_https) + site.persistent_url = lweb_https + expect(site.persistent_url).to eql(current_https) + site.persistent_url = na_https + expect(site.persistent_url).to eql(na_https) + end + + end end \ No newline at end of file