From 390d761903e3b1ce47b70b66aaa55202fad01d97 Mon Sep 17 00:00:00 2001 From: Bess Sadler Date: Wed, 29 Jan 2025 14:11:59 -0500 Subject: [PATCH] Stop indexing from DataSpace (#746) * Stop indexing from DataSpace * Fix typo * Increase solr_writer thread pool As suggested by an error message on the server * Remove change to solr_writer.thread_pool If we need it, it should be in a separate PR --- README.md | 6 +++--- lib/tasks/index.rake | 17 ----------------- 2 files changed, 3 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 31381b9e..489d2be1 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # pdc_discovery -A discovery portal for Princeton research data. Initially it will provide a better browsing experience for the research data contained in [DataSpace](https://dataspace.princeton.edu). +A discovery portal for Princeton research data. Please note: While this is open-source software, we would disourage anyone from trying to just check it out and run it. Princeton specifics, from styling to authentication and authorization, are hard coded, and we have not invested any time in the kind of configurabily that would be needed for use at another institution. Instead it should be taken as an example of breaking a monolithic project into separate components, and developing iteratively in response to local user feedback. @@ -59,9 +59,9 @@ We utilize Rubocop for our Ryby code and Prettier for our JavaScript To create a tagged release use the [steps in the RDSS handbook](https://github.com/pulibrary/rdss-handbook/blob/main/release_process.md) -## Indexing research data from DataSpace and PDC Describe +## Indexing research data from PDC Describe -PDC Discovery indexes data from both DataSpace and from PDC Describe via the following rake task: +PDC Discovery indexes data from PDC Describe via the following rake task: ```ruby rake index:research_data diff --git a/lib/tasks/index.rake b/lib/tasks/index.rake index 904804bf..ff9eea2e 100644 --- a/lib/tasks/index.rake +++ b/lib/tasks/index.rake @@ -9,22 +9,12 @@ namespace :index do Rails.logger.info "Indexing: Fetching PDC Describe records" Rake::Task['index:pdc_describe_research_data'].invoke - Rails.logger.info "Indexing: Fetching DataSpace records" - Rake::Task['index:dspace_research_data'].invoke Rails.logger.info "Indexing: Fetching completed" Indexing::SolrCloudHelper.update_solr_alias! Rails.logger.info "Indexing: Updated Solr to read from the new collection: #{Indexing::SolrCloudHelper.alias_url} -> #{Indexing::SolrCloudHelper.collection_reader_url}" end - desc 'Index all DSpace research data collections' - task dspace_research_data: :environment do - Rails.logger.info "Indexing: Harvesting and indexing DataSpace research data collections started" - DspaceResearchDataHarvester.harvest(false) - Indexing::SolrCloudHelper.collection_writer_commit! - Rails.logger.info "Indexing: Harvesting and indexing DataSpace research data collections completed" - end - desc 'Index all PDC Describe data' task pdc_describe_research_data: :environment do Rails.logger.info "Indexing: Harvesting and indexing PDC Describe data started" @@ -40,13 +30,6 @@ namespace :index do Blacklight.default_index.connection.commit end - desc 'Fetches the most recent community information from DataSpace and saves it to a file.' - task cache_dataspace_communities: :environment do - cache_file = ENV['COMMUNITIES_FILE'] || './spec/fixtures/files/dataspace_communities.json' - communities = DataspaceCommunities.new - File.write(cache_file, JSON.pretty_generate(communities.tree)) - end - desc 'Prints to console the current Solr URLs and how they are configured' task print_solr_urls: :environment do puts "Solr alias.: #{Indexing::SolrCloudHelper.alias_url}"