Skip to content

Commit

Permalink
use ActiveModel for Sword::Metadata::PersonalName and memoize expensi…
Browse files Browse the repository at this point in the history
…ve parses

- specs should not presumptively duplicate parser xpath data
- parse advisor names as per author names in ProQuest metadata parser
  • Loading branch information
barmintor committed Oct 23, 2024
1 parent c6b19d8 commit abf8db2
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 93 deletions.
18 changes: 10 additions & 8 deletions lib/sword/metadata/personal_name.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
module Sword
module Metadata
class PersonalName
attr_accessor :first_name,
:full_name_naf_format,
:last_name,
:middle_name,
:role,
# type can be primary, alternate, etc. Kinda freeform
:type,
:uni
include ActiveModel::Model
include ActiveModel::Attributes
attribute :first_name, :string
attribute :full_name_naf_format, :string
attribute :last_name, :string
attribute :middle_name, :string
attribute :role, :string
# type can be primary, alternate, etc. Kinda freeform
attribute :type, :string
attribute :uni, :string
end
end
end
19 changes: 11 additions & 8 deletions lib/sword/mets/proquest_constants.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,15 @@
module Sword::Mets::ProquestConstants
OTHER = 'OTHER'
OTHER_MDTYPE = 'PROQUEST'
XPATH_INFO = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
abstract: '//etdsword:DISS_abstract',
author_name: '//etdsword:DISS_author/etdsword:DISS_name',
first_name: './etdsword:DISS_fname',
middle_name: './etdsword:DISS_middle',
subjects: '//etdsword:DISS_cat_desc',
surname: './etdsword:DISS_surname',
title: '//etdsword:DISS_title' }.freeze
XPATH_INFO = {
namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
abstract: '//etdsword:DISS_abstract',
advisor_name: '//etdsword:DISS_advisor/etdsword:DISS_name',
author_name: '//etdsword:DISS_author/etdsword:DISS_name',
first_name: './etdsword:DISS_fname',
middle_name: './etdsword:DISS_middle',
subjects: '//etdsword:DISS_cat_desc',
surname: './etdsword:DISS_surname',
title: '//etdsword:DISS_title'
}.freeze
end
51 changes: 30 additions & 21 deletions lib/sword/mets/proquest_etd_xml_data_element.rb
Original file line number Diff line number Diff line change
@@ -1,36 +1,45 @@
# frozen_string_literal: true

class Sword::Mets::ProquestEtdXmlDataElement < Sword::Mets::XmlDataElement
attr_accessor :authors,
:subjects
include Sword::Mets::ProquestConstants

def initialize(nokogiri_xml_element, xpath_info)
def initialize(nokogiri_xml_element, xpath_info = Sword::Mets::ProquestConstants::XPATH_INFO)
super
@authors = []
@subjects = []
@authors = nil
@subjects = nil
end

def parse_authors
diss_authors = @xml_data.xpath(@xpath_info[:author_name], @xpath_info[:namespace])
diss_authors.each do |author|
person = create_personal_author(author)
person.role = 'author'
@authors << person
# Override
def authors
@authors ||= begin
diss_authors = @xml_data.xpath(@xpath_info[:author_name], @xpath_info[:namespace])
diss_authors.map { |author| create_personal_name(author, role: 'author') }
end
end

def create_personal_author(author)
person = Sword::Metadata::PersonalName.new
person.first_name = author.xpath(@xpath_info[:first_name], @xpath_info[:namespace]).text
person.middle_name = author.xpath(@xpath_info[:middle_name], @xpath_info[:namespace]).text
person.last_name = author.xpath(@xpath_info[:surname], @xpath_info[:namespace]).text
person
def advisors
@advisors ||= begin
diss_advisors = @xml_data.xpath(@xpath_info[:advisor_name], @xpath_info[:namespace])
diss_advisors.map { |author| create_personal_name(author, role: 'advisor') }
end
end

def parse_subjects
@diss_cat_descs = @xml_data.xpath(@xpath_info[:subjects], @xpath_info[:namespace])
@diss_cat_descs.each do |diss_cat_desc|
@subjects.push diss_cat_desc.text
# Override
def subjects
@subjects ||= begin
@diss_cat_descs = @xml_data.xpath(@xpath_info[:subjects], @xpath_info[:namespace])
@diss_cat_descs.map(&:text)
end
end

private

def create_personal_name(author, **other_attrs)
author_attrs = other_attrs.merge({
first_name: author.xpath(@xpath_info[:first_name], @xpath_info[:namespace]).text,
middle_name: author.xpath(@xpath_info[:middle_name], @xpath_info[:namespace]).text,
last_name: author.xpath(@xpath_info[:surname], @xpath_info[:namespace]).text
})
Sword::Metadata::PersonalName.new(**author_attrs)
end
end
9 changes: 8 additions & 1 deletion lib/sword/mets/xml_data_element.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,14 @@ class Sword::Mets::XmlDataElement
:advisors,
:authors,
:subjects,
:title
:title,
:date_conferred,
:degree,
:embargo_code,
:embargo_release_date,
:institution_department_name,
:institution_name,
:institution_school_code

def initialize(nokogiri_xml_element, xpath_info)
@xml_data = nokogiri_xml_element
Expand Down
77 changes: 30 additions & 47 deletions spec/sword/mets/proquest_etd_xml_data_element_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,56 +3,39 @@
require 'rails_helper'

RSpec.describe Sword::Mets::ProquestEtdXmlDataElement do
let(:xpath_info) do
{ namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
abstract: '//etdsword:DISS_abstract',
author_name: '//etdsword:DISS_author/etdsword:DISS_name',
first_name: 'etdsword:DISS_fname',
middle_name: 'etdsword:DISS_middle',
subjects: '//etdsword:DISS_cat_desc',
surname: 'etdsword:DISS_surname',
title: '//etdsword:DISS_title' }
let(:pq_xml_data) { described_class.new(@xml_data_proquest, Sword::Mets::ProquestConstants::XPATH_INFO) }

let(:author_attrs) { { first_name: 'Ariana', middle_name: 'Cecilia', last_name: 'Gavin', role: 'author' } }
let(:expected_first_author) { Sword::Metadata::PersonalName.new(author_attrs) }

let(:advisor_attrs) { { first_name: 'Henry', middle_name: 'M', last_name: 'Colecraft', role: 'advisor' } }
let(:expected_first_advisor) { Sword::Metadata::PersonalName.new(advisor_attrs) }

before(:context) do
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
end

it 'sets the abstract correctly' do
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
end

it 'sets the title correctly' do
expect(pq_xml_data.title).to include('Unraveling the logic')
end

it 'sets the subjects correctly' do
expect(pq_xml_data.instance_variable_get(:@subjects)).to be_nil
expect(pq_xml_data.subjects).to contain_exactly('Molecular biology', 'Pharmacology', 'Physiology')
end

describe 'at initialization' do
before(:context) do
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
end

it 'sets the abstract correctly' do
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
end

it 'sets the title correctly' do
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
expect(pq_xml_data.title).to include('Unraveling the logic')
end
it 'sets the authors correctly' do
expect(pq_xml_data.instance_variable_get(:@authors)).to be_nil
expect(pq_xml_data.authors.first.attributes).to eql(expected_first_author.attributes)
end

describe 'after parsing' do
before(:context) do
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
end

it 'sets the subjects correctly' do
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
pq_xml_data.parse_subjects
expect(pq_xml_data.subjects).to include('Molecular biology')
expect(pq_xml_data.subjects).to include('Pharmacology')
expect(pq_xml_data.subjects).to include('Physiology')
end

it 'sets the authors correctly' do
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
pq_xml_data.parse_authors
expect(pq_xml_data.authors.first.first_name).to include('Ariana')
expect(pq_xml_data.authors.first.middle_name).to include('Cecilia')
expect(pq_xml_data.authors.first.last_name).to include('Gavin')
end
it 'sets the advisors correctly' do
expect(pq_xml_data.advisors.first.attributes).to eql(expected_first_advisor.attributes)
end
end
10 changes: 2 additions & 8 deletions spec/sword/mets/xml_data_element_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,14 @@
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
end

let(:pq_xml_data) { described_class.new(@xml_data_proquest, Sword::Mets::ProquestConstants::XPATH_INFO) }

it 'sets the abstract correctly' do
xpath_info = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
abstract: '//etdsword:DISS_abstract' }
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
# expect(true).to be true
end

it 'sets the title correctly' do
xpath_info = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
title: '//etdsword:DISS_title' }
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
expect(pq_xml_data.title).to include('Unraveling the logic')
# expect(true).to be true
end
end
end

0 comments on commit abf8db2

Please sign in to comment.