diff --git a/.env b/.env index 07d8cd93..67eab864 100644 --- a/.env +++ b/.env @@ -1,6 +1,7 @@ +# UCLALibrary - .env setup -- Start DEPLOY_HOOK=CHANGEME DOCKER_PORTS=80 -MAKE_WAVES= +MAKE_WAVES=true # Set to false to disable Waveform generation NEGATIVE_CAPTCHA_SECRET=64fe54311a8e54b637a1da1ff993b560ff5c742211f645f35b8b9bd8b3d2e4015e95dea8db4dc235df0396ddd94d21d18d0c787bcaa5b579cb5f6f2aac90e601 SECRET_KEY_BASE=CHANGEME PASSENGER_APP_ENV=development @@ -8,6 +9,7 @@ POSTGRES_DB=oral_history POSTGRES_HOST=postgres POSTGRES_PASSWORD=DatabaseFTW POSTGRES_USER=postgres +# Commented out for development purposes @SoftServ REGISTRY_HOST=index.docker.io/ REGISTRY_URI=uclalibrary SITE_URI=oralhistory-test.library.ucla.edu @@ -20,3 +22,11 @@ SOLR_PORT=8983 SOLR_URL="http://${SOLR_ADMIN_USER}:${SOLR_ADMIN_PASSWORD}@${SOLR_HOST}:${SOLR_PORT}/solr/blacklight-core" TAG=dev TEST_DB=oral_history_test +# UCLALibrary - .env setup -- End + +# SoftServ - .env additions/alterations --- Start +# REGISTRY_HOST=ghcr.io +# REGISTRY_URI=oral-history +# ADMIN_EMAIL=admin@example.com +# ADMIN_PASSWORD=testing123 +# SoftServ - .env additions/alterations --- End \ No newline at end of file diff --git a/.github/workflows/build-dockerhub.yml b/.github/workflows/build-dockerhub.yml index 864d7e81..2b5f0428 100644 --- a/.github/workflows/build-dockerhub.yml +++ b/.github/workflows/build-dockerhub.yml @@ -1,13 +1,24 @@ name: Build Oral History Web for Docker Hub on: push: - # branches: - # - main - workflow_dispatch: + branches: + - main + pull_request: + branches: + - main + jobs: build-for-docker-hub: runs-on: ubuntu-latest steps: + - name: Set env + run: >- + echo "TAG=${HEAD_TAG::8}" >> ${GITHUB_ENV}; + echo ${HEAD_TAG::8} + env: + HEAD_TAG: ${{ github.event.pull_request.head.sha || github.sha }} + shell: bash + - name: Check out code uses: actions/checkout@v3 @@ -20,19 +31,20 @@ jobs: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} - - name: Read chart yaml to get version for docker tag - id: image-tag - uses: KJ002/read-yaml@1.6 + - name: Retag latest if merge to main action + id: meta-web + uses: docker/metadata-action@v4.1.1 with: - file: 'charts/prod-oralhistory-values.yaml' - key-path: '["image", "tag"]' - - - name: Report image tag - run: echo "${{ steps.image-tag.outputs.data }}" + images: | + name=uclalibrary/oral-history + tags: | + type=raw,value=latest,enable={{is_default_branch}} - name: Build and push uses: docker/build-push-action@v3 with: context: . push: true - tags: uclalibrary/oral-history:${{ steps.image-tag.outputs.data }} + tags: | + ${{ steps.meta-web.outputs.tags }} + uclalibrary/oral-history:${{ env.TAG }} \ No newline at end of file diff --git a/app/controllers/admin_controller.rb b/app/controllers/admin_controller.rb index 6f6f827c..0dc575cc 100644 --- a/app/controllers/admin_controller.rb +++ b/app/controllers/admin_controller.rb @@ -21,4 +21,9 @@ def delete_jobs def logs send_file(Rails.root.join('log/indexing.log')) end + + def destroy_all_delayed_jobs + Delayed::Job.destroy_all + redirect_to root_path, notice: 'All Delayed::Jobs have been destroyed.' + end end diff --git a/app/models/oral_history_item.rb b/app/models/oral_history_item.rb index 5f62b70c..87b76da8 100644 --- a/app/models/oral_history_item.rb +++ b/app/models/oral_history_item.rb @@ -26,35 +26,31 @@ def self.index_logger def self.client(args) - url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do" - OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods' + url = args[:url] || "https://oh-staff.library.ucla.edu/oai/" + + OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300}) end def self.fetch(args) - set = args[:set] || "oralhistory" - response = client(args).list_records(set: set, metadata_prefix: 'mods') + response = client(args).list_records end def self.get(args) - response = client(args).get_record(identifier: args[:identifier], metadata_prefix: 'mods', ) + response = client(args).get_record(identifier: args[:identifier] ) end def self.fetch_first_id - response = self.fetch({progress: false, limit:1}) - response.full&.first&.header&.identifier&.split('/')&.last + response = self.fetch({limit:1}) + response.full&.first&.header&.identifier end def self.import(args) return false if !args[:override] && check_for_tmp_file begin create_import_tmp_file - progress = args[:progress] || true limit = args[:limit] || 20000000 # essentially no limit response = self.fetch(args) - if progress - bar = ProgressBar.new(response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i) - end total = 0 new_record_ids = [] @@ -78,9 +74,6 @@ def self.import(args) yield(total) if block_given? end - if progress - bar.increment! - end total += 1 break if total >= limit end @@ -100,7 +93,8 @@ def self.import(args) end def self.import_single(id) - record = self.get(identifier: id)&.record + converted_id = id.gsub('-','/') + record = self.get(identifier: converted_id)&.record history = process_record(record) history.index_record if ENV['MAKE_WAVES'] && history.attributes["audio_b"] && history.should_process_peaks? @@ -116,8 +110,8 @@ def self.process_record(record) if record.header.blank? || record.header.identifier.blank? return false end - - history = OralHistoryItem.find_or_new(record.header.identifier.split('/').last) #Digest::MD5.hexdigest(record.header.identifier).to_i(16)) + record_id = record.header.identifier.gsub('/','-') + history = OralHistoryItem.find_or_new(record_id) #Digest::MD5.hexdigest(record.header.identifier).to_i(16)) history.attributes['id_t'] = history.id if record.header.datestamp history.attributes[:timestamp] = Time.parse(record.header.datestamp) @@ -140,7 +134,8 @@ def self.process_record(record) history.attributes['links_t'] = [] set.children.each do |child| next if child.class == REXML::Text - if child.name == "titleInfo" + + if child.name == "titleInfo" # child.elements.each('mods:title') do |title| title_text = title.text.to_s.strip if(child.attributes["type"] == "alternative") && title_text.size > 0 @@ -157,22 +152,30 @@ def self.process_record(record) end end end - elsif child.name == "typeOfResource" - history.attributes["type_of_resource_display"] = child.text - history.attributes["type_of_resource_t"] ||= [] - history.attributes["type_of_resource_t"] << child.text - history.attributes["type_of_resource_facet"] ||= [] - history.attributes["type_of_resource_facet"] << child.text + + # not in new oai feed remove? + # elsif child.name == "typeOfResource" + # history.attributes["type_of_resource_display"] = child.text + # history.attributes["type_of_resource_t"] ||= [] + # history.attributes["type_of_resource_t"] << child.text + # history.attributes["type_of_resource_facet"] ||= [] + # history.attributes["type_of_resource_facet"] << child.text + + # elsif child.name == "accessCondition" history.attributes["rights_display"] = [child.text] history.attributes["rights_t"] = [] history.attributes["rights_t"] << child.text + + # elsif child.name == 'language' child.elements.each('mods:languageTerm') do |e| history.attributes["language_facet"] = LanguageList::LanguageInfo.find(e.text).try(:name) history.attributes["language_sort"] = LanguageList::LanguageInfo.find(e.text).try(:name) history.attributes["language_t"] = [LanguageList::LanguageInfo.find(e.text).try(:name)] end + + # elsif child.name == "subject" child.elements.each('mods:topic') do |e| history.attributes["subject_topic_facet"] ||= [] @@ -180,13 +183,21 @@ def self.process_record(record) history.attributes["subject_t"] ||= [] history.attributes["subject_t"] << e.text end + + # elsif child.name == "name" + + # + # interviewer if child.elements['mods:role/mods:roleTerm'].text == "interviewer" history.attributes["author_display"] = child.elements['mods:namePart'].text history.attributes["author_t"] ||= [] if !history.attributes["author_t"].include?(child.elements['mods:namePart'].text) history.attributes["author_t"] << child.elements['mods:namePart'].text end + + # + # interviewee elsif child.elements['mods:role/mods:roleTerm'].text == "interviewee" history.attributes["interviewee_display"] = child.elements['mods:namePart'].text history.attributes["interviewee_t"] ||= [] @@ -195,10 +206,11 @@ def self.process_record(record) end history.attributes["interviewee_sort"] = child.elements['mods:namePart'].text end + + # elsif child.name == "relatedItem" && child.attributes['type'] == "constituent" time_log_url = '' order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1 - if child.elements['mods:location/mods:url[@usage="timed log"]'].present? time_log_url = child.elements['mods:location/mods:url[@usage="timed log"]'].text transcript = self.generate_xml_transcript(time_log_url) @@ -228,51 +240,71 @@ def self.process_record(record) end history.attributes["peaks_t"] ||= [] child_doc_json = child_document.to_json - history.attributes["peaks_t"] << child_doc_json unless history.attributes["peaks_t"].include? child_doc_json + history.attributes["peaks_t"] << child_doc_json unless history.attributes["peaks_t"].include? child_doc_json history.attributes["children_t"] << child_doc_json + + # elsif child.name == "relatedItem" && child.attributes['type'] == "series" history.attributes["series_facet"] = child.elements['mods:titleInfo/mods:title'].text history.attributes["series_t"] = child.elements['mods:titleInfo/mods:title'].text history.attributes["series_sort"] = child.elements['mods:titleInfo/mods:title'].text - history.attributes["abstract_display"] = child.elements['mods:abstract'].text + history.attributes["abstract_display"] = child.elements['mods:abstract']&.text history.attributes["abstract_t"] = [] - history.attributes["abstract_t"] << child.elements['mods:abstract'].text + history.attributes["abstract_t"] << child.elements['mods:abstract']&.text + + # elsif child.name == "note" if child.attributes == {} history.attributes["admin_note_display"] = child.text history.attributes["admin_note_t"] = [] history.attributes["admin_note_t"] << child.text end + + # if child.attributes['type'].to_s.match('biographical') history.attributes["biographical_display"] = child.text history.attributes["biographical_t"] = [] history.attributes["biographical_t"] << child.text end + + # if child.attributes['type'].to_s.match('personpresent') history.attributes['person_present_display'] = child.text history.attributes['person_present_t'] << child.text end + + # if child.attributes['type'].to_s.match('place') history.attributes['place_display'] = child.text history.attributes['place_t'] << child.text end + + # if child.attributes['type'].to_s.match('supportingdocuments') history.attributes['supporting_documents_display'] = child.text history.attributes['supporting_documents_t'] << child.text end + + # if child.attributes['type'].to_s.match('interviewerhistory') history.attributes['interviewer_history_display'] = child.text history.attributes['interviewer_history_t'] << child.text end + + # if child.attributes['type'].to_s.match('processinterview') history.attributes['process_interview_display'] = child.text history.attributes['process_interview_t'] << child.text end history.attributes["description_t"] << child.text + + # elsif child.name == 'location' child.elements.each do |f| history.attributes['links_t'] << [f.text, f.attributes['displayLabel']].to_json order = child.elements['mods:part'].present? ? child.elements['mods:part'].attributes['order'] : 1 + + # elsif child.name == 'physicalDescription' history.attributes["extent_display"] = child.elements['mods:extent'].text history.attributes['extent_t'] = [] history.attributes['extent_t'] << child.elements['mods:extent'].text + + # elsif child.name == 'abstract' history.attributes['interview_abstract_display'] = child.text history.attributes["interview_abstract_t"] = [] @@ -381,11 +417,9 @@ def self.generate_xml_transcript(url) end def self.total_records(args = {}) - url = args[:url] || "https://webservices.library.ucla.edu/dldataprovider/oai2_0.do" - set = args[:set] || "oralhistory" - client = OAI::Client.new url, :headers => { "From" => "rob@notch8.com" }, :parser => 'rexml', metadata_prefix: 'mods' - response = client.list_records(set: set, metadata_prefix: 'mods') - response.doc.elements['//resumptionToken'].attributes['completeListSize'].to_i + url = args[:url] || "https://oh-staff.library.ucla.edu/oai/" + + OAI::Client.new(url, http: Faraday.new {|c| c.options.timeout = 300}) end def has_peaks? @@ -418,10 +452,11 @@ def self.create_import_tmp_file end def self.remove_import_tmp_file - FileUtils.rm(Rails.root.join('tmp/importer.tmp')) + tmp_file = Rails.root.join('tmp/importer.tmp') + FileUtils.rm(tmp_file) if File.exist?(tmp_file) end def self.check_for_tmp_file File.exist?(File.join('tmp/importer.tmp')) end -end +end \ No newline at end of file diff --git a/app/views/admin/index.html.erb b/app/views/admin/index.html.erb index 66970e5f..530fb288 100644 --- a/app/views/admin/index.html.erb +++ b/app/views/admin/index.html.erb @@ -40,7 +40,7 @@ <% end %> <%= link_to "Background Jobs", delayed_job_web_path, class: "btn btn-lg btn-primary" %> <%= link_to "Download Logs", admin_logs_path, class: "btn btn-lg btn-primary" %> - + <%= button_to "Destroy All Delayed Jobs", destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-lg btn-danger' %>
<%= link_to t('blacklight.header_links.logout'), destroy_user_session_path %>
diff --git a/app/views/delayed_jobs/destroy_all.html.erb b/app/views/delayed_jobs/destroy_all.html.erb new file mode 100644 index 00000000..6624de5e --- /dev/null +++ b/app/views/delayed_jobs/destroy_all.html.erb @@ -0,0 +1,4 @@ + +

Destroy All Delayed Jobs

+

Are you sure you want to destroy all Delayed Jobs?

+<%= button_to 'Destroy All', destroy_all_delayed_jobs_path, method: :delete, data: { confirm: 'Are you sure?' }, class: 'btn btn-danger' %> diff --git a/charts/Chart.yaml b/charts/Chart.yaml index ab0ad2af..e8fa72e9 100644 --- a/charts/Chart.yaml +++ b/charts/Chart.yaml @@ -2,7 +2,7 @@ apiVersion: v2 appVersion: "0.0.2" description: Chart for Oral History Public-Facing App name: oralhistory -version: 1.0.3 +version: 1.0.0 # The `appVersion` is not a required field whereas `version` is required. If # you’re making changes to a helm chart template file and/or the default values diff --git a/charts/test-oralhistory-values.yaml b/charts/test-oralhistory-values.yaml index 56c46f2e..328bc06b 100644 --- a/charts/test-oralhistory-values.yaml +++ b/charts/test-oralhistory-values.yaml @@ -7,7 +7,7 @@ replicaCount: 1 image: repository: uclalibrary/oral-history # changing this tag will cause a deploy via ArgoCD - tag: v0.0.2 + tag: 5a3acdd5 pullPolicy: Always # Chart documentation: https://github.com/bitnami/charts/tree/main/bitnami/solr diff --git a/config/routes.rb b/config/routes.rb index fc20bd59..2789f24b 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -27,6 +27,8 @@ post 'admin/run_single_import', to: 'admin#run_single_import', as: 'run_single_import' delete 'admin/delete_jobs', to: 'admin#delete_jobs', as: 'delete_jobs' + delete 'destroy_all_delayed_jobs', to: 'admin#destroy_all_delayed_jobs' + mount Blacklight::Engine => '/' mount BlacklightDynamicSitemap::Engine => '/' diff --git a/db/seeds.rb b/db/seeds.rb index f18bd410..f326aebb 100644 --- a/db/seeds.rb +++ b/db/seeds.rb @@ -10,10 +10,4 @@ u = User.find_or_create_by(email: ENV['ADMIN_EMAIL'] || 'admin@example.com') u.password = ENV['ADMIN_PASSWORD'] || 'password' u.save - - # Import the work with the video - # OralHistoryItem.import_single("21198-zz002jxz7z") - - # Import additional works with audio - # OralHistoryItem.import_single("21198-zz002bfs89") end diff --git a/docker-compose.yml b/docker-compose.yml index 7a61b5b7..6ec5e12d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,6 +53,8 @@ services: build: context: . target: web + cache_from: + - "${REGISTRY_HOST}${REGISTRY_URI}/oral-history:${TAG}" env_file: - .env - .env.development