Skip to content

Commit aaeb34d

Browse files
committed
use ActiveModel for Sword::Metadata::PersonalName and memoize expensive parses
- specs should not presumptively duplicate parser xpath data - parse advisor names as per author names in ProQuest metadata parser
1 parent c6b19d8 commit aaeb34d

6 files changed

+81
-86
lines changed

lib/sword/metadata/personal_name.rb

+10-8
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,16 @@
33
module Sword
44
module Metadata
55
class PersonalName
6-
attr_accessor :first_name,
7-
:full_name_naf_format,
8-
:last_name,
9-
:middle_name,
10-
:role,
11-
# type can be primary, alternate, etc. Kinda freeform
12-
:type,
13-
:uni
6+
include ActiveModel::Model
7+
include ActiveModel::Attributes
8+
attribute :first_name, :string
9+
attribute :full_name_naf_format, :string
10+
attribute :last_name, :string
11+
attribute :middle_name, :string
12+
attribute :role, :string
13+
# type can be primary, alternate, etc. Kinda freeform
14+
attribute :type, :string
15+
attribute :uni, :string
1416
end
1517
end
1618
end

lib/sword/mets/proquest_constants.rb

+3-1
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ module Sword::Mets::ProquestConstants
55
OTHER_MDTYPE = 'PROQUEST'
66
XPATH_INFO = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
77
abstract: '//etdsword:DISS_abstract',
8+
advisor_name: '//etdsword:DISS_advisor/etdsword:DISS_name',
89
author_name: '//etdsword:DISS_author/etdsword:DISS_name',
910
first_name: './etdsword:DISS_fname',
1011
middle_name: './etdsword:DISS_middle',
1112
subjects: '//etdsword:DISS_cat_desc',
1213
surname: './etdsword:DISS_surname',
13-
title: '//etdsword:DISS_title' }.freeze
14+
title: '//etdsword:DISS_title'
15+
}.freeze
1416
end
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,44 @@
11
# frozen_string_literal: true
22

33
class Sword::Mets::ProquestEtdXmlDataElement < Sword::Mets::XmlDataElement
4-
attr_accessor :authors,
5-
:subjects
4+
include Sword::Mets::ProquestConstants
65

7-
def initialize(nokogiri_xml_element, xpath_info)
6+
def initialize(nokogiri_xml_element, xpath_info = Sword::Mets::ProquestConstants::XPATH_INFO)
87
super
9-
@authors = []
10-
@subjects = []
8+
@authors = nil
9+
@subjects = nil
1110
end
1211

13-
def parse_authors
14-
diss_authors = @xml_data.xpath(@xpath_info[:author_name], @xpath_info[:namespace])
15-
diss_authors.each do |author|
16-
person = create_personal_author(author)
17-
person.role = 'author'
18-
@authors << person
12+
# Override
13+
def authors
14+
@authors ||= begin
15+
diss_authors = @xml_data.xpath(@xpath_info[:author_name], @xpath_info[:namespace])
16+
diss_authors.map { |author| create_personal_name(author, role: 'author') }
1917
end
2018
end
2119

22-
def create_personal_author(author)
23-
person = Sword::Metadata::PersonalName.new
24-
person.first_name = author.xpath(@xpath_info[:first_name], @xpath_info[:namespace]).text
25-
person.middle_name = author.xpath(@xpath_info[:middle_name], @xpath_info[:namespace]).text
26-
person.last_name = author.xpath(@xpath_info[:surname], @xpath_info[:namespace]).text
27-
person
20+
def advisors
21+
@advisors ||= begin
22+
diss_advisors = @xml_data.xpath(@xpath_info[:advisor_name], @xpath_info[:namespace])
23+
diss_advisors.map { |author| create_personal_name(author, role: 'advisor') }
24+
end
2825
end
2926

30-
def parse_subjects
31-
@diss_cat_descs = @xml_data.xpath(@xpath_info[:subjects], @xpath_info[:namespace])
32-
@diss_cat_descs.each do |diss_cat_desc|
33-
@subjects.push diss_cat_desc.text
27+
# Override
28+
def subjects
29+
@subjects ||= begin
30+
@diss_cat_descs = @xml_data.xpath(@xpath_info[:subjects], @xpath_info[:namespace])
31+
@diss_cat_descs.map(&:text)
3432
end
3533
end
34+
35+
private
36+
def create_personal_name(author, **other_attrs)
37+
author_attrs = other_attrs.merge({
38+
first_name: author.xpath(@xpath_info[:first_name], @xpath_info[:namespace]).text,
39+
middle_name: author.xpath(@xpath_info[:middle_name], @xpath_info[:namespace]).text,
40+
last_name: author.xpath(@xpath_info[:surname], @xpath_info[:namespace]).text,
41+
})
42+
Sword::Metadata::PersonalName.new(**author_attrs)
43+
end
3644
end

lib/sword/mets/xml_data_element.rb

+8-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,14 @@ class Sword::Mets::XmlDataElement
55
:advisors,
66
:authors,
77
:subjects,
8-
:title
8+
:title,
9+
:date_conferred,
10+
:degree,
11+
:embargo_code,
12+
:embargo_release_date,
13+
:institution_department_name,
14+
:institution_name,
15+
:institution_school_code
916

1017
def initialize(nokogiri_xml_element, xpath_info)
1118
@xml_data = nokogiri_xml_element

spec/sword/mets/proquest_etd_xml_data_element_spec.rb

+30-47
Original file line numberDiff line numberDiff line change
@@ -3,56 +3,39 @@
33
require 'rails_helper'
44

55
RSpec.describe Sword::Mets::ProquestEtdXmlDataElement do
6-
let(:xpath_info) do
7-
{ namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
8-
abstract: '//etdsword:DISS_abstract',
9-
author_name: '//etdsword:DISS_author/etdsword:DISS_name',
10-
first_name: 'etdsword:DISS_fname',
11-
middle_name: 'etdsword:DISS_middle',
12-
subjects: '//etdsword:DISS_cat_desc',
13-
surname: 'etdsword:DISS_surname',
14-
title: '//etdsword:DISS_title' }
6+
let(:pq_xml_data) { described_class.new(@xml_data_proquest, Sword::Mets::ProquestConstants::XPATH_INFO) }
7+
8+
let(:author_attrs) { { first_name: 'Ariana', middle_name: 'Cecilia', last_name: 'Gavin', role: 'author' } }
9+
let(:expected_first_author) { Sword::Metadata::PersonalName.new(author_attrs) }
10+
11+
let(:advisor_attrs) { { first_name: 'Henry', middle_name: 'M', last_name: 'Colecraft', role: 'advisor' } }
12+
let(:expected_first_advisor) { Sword::Metadata::PersonalName.new(advisor_attrs) }
13+
14+
before(:context) do
15+
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
16+
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
17+
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
18+
end
19+
20+
it 'sets the abstract correctly' do
21+
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
22+
end
23+
24+
it 'sets the title correctly' do
25+
expect(pq_xml_data.title).to include('Unraveling the logic')
26+
end
27+
28+
it 'sets the subjects correctly' do
29+
expect(pq_xml_data.instance_variable_get(:@subjects)).to be_nil
30+
expect(pq_xml_data.subjects).to contain_exactly('Molecular biology', 'Pharmacology', 'Physiology')
1531
end
1632

17-
describe 'at initialization' do
18-
before(:context) do
19-
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
20-
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
21-
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
22-
end
23-
24-
it 'sets the abstract correctly' do
25-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
26-
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
27-
end
28-
29-
it 'sets the title correctly' do
30-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
31-
expect(pq_xml_data.title).to include('Unraveling the logic')
32-
end
33+
it 'sets the authors correctly' do
34+
expect(pq_xml_data.instance_variable_get(:@authors)).to be_nil
35+
expect(pq_xml_data.authors.first.attributes).to eql(expected_first_author.attributes)
3336
end
3437

35-
describe 'after parsing' do
36-
before(:context) do
37-
@pq_mets_file = Sword::Mets::MetsFile.new(file_fixture('xml/mets/PQ_mets.xml').read)
38-
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
39-
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
40-
end
41-
42-
it 'sets the subjects correctly' do
43-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
44-
pq_xml_data.parse_subjects
45-
expect(pq_xml_data.subjects).to include('Molecular biology')
46-
expect(pq_xml_data.subjects).to include('Pharmacology')
47-
expect(pq_xml_data.subjects).to include('Physiology')
48-
end
49-
50-
it 'sets the authors correctly' do
51-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
52-
pq_xml_data.parse_authors
53-
expect(pq_xml_data.authors.first.first_name).to include('Ariana')
54-
expect(pq_xml_data.authors.first.middle_name).to include('Cecilia')
55-
expect(pq_xml_data.authors.first.last_name).to include('Gavin')
56-
end
38+
it 'sets the advisors correctly' do
39+
expect(pq_xml_data.advisors.first.attributes).to eql(expected_first_advisor.attributes)
5740
end
5841
end

spec/sword/mets/xml_data_element_spec.rb

+1-8
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,14 @@
99
# @xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_elements(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
1010
@xml_data_proquest = @pq_mets_file.find_md_wrap_xml_data_element(mdtype: 'OTHER', other_mdtype: 'PROQUEST')
1111
end
12+
let(:pq_xml_data) { described_class.new(@xml_data_proquest, Sword::Mets::ProquestConstants::XPATH_INFO) }
1213

1314
it 'sets the abstract correctly' do
14-
xpath_info = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
15-
abstract: '//etdsword:DISS_abstract' }
16-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
1715
expect(pq_xml_data.abstract).to include('relief of Rad inhibition of cardiac')
18-
# expect(true).to be true
1916
end
2017

2118
it 'sets the title correctly' do
22-
xpath_info = { namespace: { 'etdsword' => 'http://www.etdadmin.com/ns/etdsword' },
23-
title: '//etdsword:DISS_title' }
24-
pq_xml_data = described_class.new(@xml_data_proquest, xpath_info)
2519
expect(pq_xml_data.title).to include('Unraveling the logic')
26-
# expect(true).to be true
2720
end
2821
end
2922
end

0 commit comments

Comments
 (0)