Skip to content

Commit

Permalink
WIP: hyacinth encoder
Browse files Browse the repository at this point in the history
  • Loading branch information
fcd1 committed Feb 7, 2025
1 parent c0eb1f8 commit a5ac9a1
Show file tree
Hide file tree
Showing 2 changed files with 395 additions and 0 deletions.
186 changes: 186 additions & 0 deletions lib/sword/encoders/json_hyacinth2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# frozen_string_literal: true

class Sword::Encoders::JsonHyacinth2
attr_reader :dynamic_field_data


attr_accessor :abstract,
:corporate_names,
:date_issued,
:date_issued_start,
:degree,
:deposited_by,
:embargo_release_date,
:genre_uri,
:genre_value,
:language_uri,
:language_value,
:license_uri,
:personal_names

def initialize
@dynamic_field_data = {}
@notes = []
@corporate_names = []
@personal_names = []
@subjects = []
end

def encode_abstract
@dynamic_field_data[:abstract] = []
@dynamic_field_data[:abstract] << { abstract_value: @abstract }
end

def encode_date_issued
@dynamic_field_data[:date_issued] = []
@dynamic_field_data[:date_issued] << { date_issued_start_value: @date_issued_start }
end

def encode_degree
@dynamic_field_data[:degree] = []
@dynamic_field_data[:degree] << { degree_name: @degree.name,
degree_level: @degree.level,
degree_discipline: @degree.discipline,
degree_grantor: 'Columbia University' }
end

def encode_deposited_by
@dynamic_field_data[:deposited_by] = []
@dynamic_field_data[:deposited_by] << { deposited_by_value: @deposited_by }
end

def compose_internal_format_item
@digital_object_data = {}
@digital_object_data[:digital_object_type] = {string_key: 'item'}
@digital_object_data[:project] = {string_key: @hyacinth_project}
compose_dynamic_field_data
@digital_object_data[:dynamic_field_data] = @dynamic_field_data
end

def compose_internal_format_asset(parent_pid,
asset_import_filepath)
@digital_object_data = {}
@digital_object_data[:digital_object_type] = {string_key: 'asset'}
@digital_object_data[:project] = {string_key: @hyacinth_project}
@digital_object_data[:parent_digital_objects] = [{identifier: parent_pid}]
@digital_object_data[:import_file] = compose_import_file_data asset_import_filepath
# Rails.logger.info "!!!!!!!!!!!!!!!!!! import_file!!!!!!!!"
# Rails.logger.info "#{digital_object_data[:import_file]}"
end

def encode_embargo_release_date
@dynamic_field_data[:embargo_release_date] = []
@dynamic_field_data[:embargo_release_date] << { embargo_release_date_value: @embargo_release_date }
end

def encode_genre
genre_data = { value: @genre_value,
uri: @genre_uri }
@dynamic_field_data[:genre] = []
@dynamic_field_data[:genre] << { genre_term: genre_data }
end

def encode_language
language_data = { value: @language_value,
uri: @language_uri }
@dynamic_field_data[:language] = []
@dynamic_field_data[:language] << { language_term: language_data }
end

def encode_license
license_data = { uri: @license_uri }
@dynamic_field_data[:license] = []
@dynamic_field_data[:license] << { license_term: license_data }
end

def encode_names
@dynamic_field_data[:name] = []
encode_corporate_names
encode_personal_names
end

def encode_corporate_names
@corporate_names.each do |corporate_name|
set_corporate_name_and_originator_role corporate_name
end
end

def set_corporate_name_and_originator_role corporate_entity
corporate_name_data = { value: "#{corporate_entity.name}",
name_type: 'corporate' }
name_role_data = []
name_role_data << set_name_role(METADATA_VALUES[:name_role_originator_value],
METADATA_VALUES[:name_role_originator_uri])
@dynamic_field_data[:name] << { name_term: corporate_name_data,
name_role: name_role_data }
end

def set_name_role(name_role_value, name_role_uri = nil)
name_role_term_data = { value: name_role_value }
name_role_term_data[:uri] = name_role_uri if name_role_uri
{ name_role_term: name_role_term_data }
end

def encode_personal_names
# SWORD-86: here, if upper bound is set on the number of names allowed in Hyacinth ingest, truncate @personal_names
if HYACINTH_CONFIG.has_key? :max_number_names and @personal_names.length > HYACINTH_CONFIG[:max_number_names]
@notes << Sword::Metadata::Note.new('SWORD deposit contains more than ' + HYACINTH_CONFIG[:max_number_names].to_s + ' names, only first ' +
HYACINTH_CONFIG[:max_number_names].to_s + ' processed. See mets.xml for full list.' +
' Large number of Name fields will generate error in Hyacinth. See JIRA ticket SWORD-86.','internal')
@personal_names = @personal_names[0..HYACINTH_CONFIG[:max_number_names] - 1]
end
@personal_names.each do |personal_name|
case personal_name.role
when 'author'
set_personal_name_and_author_role personal_name
when 'thesis_advisor'
set_personal_name_and_advisor_role personal_name
else
# default to author? (check this)
set_personal_name_and_advisor_role personal_name
end
end
end

def set_personal_name_and_author_role author
value_data = prep_name author
personal_name_data = { value: value_data,
name_type: 'personal' }
name_role_data = []
name_role_data << set_name_role(METADATA_VALUES[:name_role_author_value],
METADATA_VALUES[:name_role_author_uri])
@dynamic_field_data[:name] << { name_term: personal_name_data,
name_role: name_role_data }
end

def prep_name person
if person.full_name_naf_format.nil?
# Add period to first_name and/or middle_name if name contains only one letter
prepped_first_name = person.first_name.length == 1 ? person.first_name.slice(0,1) + "." :
person.first_name unless person.first_name.nil?
prepped_middle_name = person.middle_name.length == 1 ? person.middle_name.slice(0,1) + "." :
person.middle_name unless person.middle_name.nil?
value_data = "#{person.last_name}, #{prepped_first_name} #{prepped_middle_name}"
else
value_data = "#{person.full_name_naf_format}"
# using lookahead, following catches all single letter names and puts
# a period after them, except for the last one, which does not have
# a trailing space
value_data.gsub!( / (\w)(?= )/, ' \1.')
# Following puts period on last initial, if needed
value_data.gsub!( /( \w$)/, '\1.')
end
value_data
end

def set_personal_name_and_advisor_role advisor
value_data = prep_name advisor
personal_name_data = { value: value_data,
name_type: 'personal' }
name_role_data = []
name_role_data << set_name_role(METADATA_VALUES[:name_role_thesis_advisor_value],
METADATA_VALUES[:name_role_thesis_advisor_uri])
@dynamic_field_data[:name] << { name_term: personal_name_data,
name_role: name_role_data }
end
end
209 changes: 209 additions & 0 deletions spec/sword/encoders/json_hyacinth2_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe Sword::Encoders::JsonHyacinth2 do
########################################## encoding methods functionality specs
describe 'encode methods' do

########################################## #encode_abstract
describe '#encode_abstract' do
context 'given @abstract set to a given value' do
expected_value = [ { abstract_value: 'This is a sample abstract' } ]
it 'constructs correct encoded format' do
subject.abstract = 'This is a sample abstract'
subject.encode_abstract
expect(subject.dynamic_field_data[:abstract]).to eq(expected_value)
end
end
end

########################################## #encode_date_issued
describe '#encode_date_issued' do
context 'given @date_issued set to a given value' do
expected_value = [ { date_issued_start_value: 2015 } ]
it 'constructs correct encoded format' do
subject.date_issued_start = 2015
subject.encode_date_issued
expect(subject.dynamic_field_data[:date_issued]).to eq(expected_value)
end
end
end
########################################## #encode_degree
describe '#encode_degree' do
context 'given @degree set to a given test value' do
expected_value = [ { degree_name: 'Ph.D',
degree_level: 2,
degree_discipline: 'Microbiology, Immunology and Infection',
degree_grantor: 'Columbia University'
} ]
it 'constructs correct encoded format' do
degree = Sword::Metadata::Degree.new
degree.discipline = 'Microbiology, Immunology and Infection'
degree.grantor = 'Columbia University'
degree.level = 2
degree.name = 'Ph.D'
subject.degree = degree
subject.encode_degree
expect(subject.dynamic_field_data[:degree]).to eq(expected_value)
end
end
end

########################################## #encode_deposited_by
describe '#encode_deposited_by' do
context 'given @deposited_by set to a given value' do
expected_value = [ { deposited_by_value: 'First Test Depositor' } ]
it 'constructs correct encoded format' do
subject.deposited_by = 'First Test Depositor'
subject.encode_deposited_by
expect(subject.dynamic_field_data[:deposited_by]).to eq(expected_value)
end
end
end

########################################## #encode_embargo_release_date
describe '#encode_embargo_release_date' do
context 'given @embargo_release_date set to a given value' do
expected_value = [ { embargo_release_date_value: '2015' } ]
it 'constructs correct encoded format' do
subject.embargo_release_date = '2015'
subject.encode_embargo_release_date
expect(subject.dynamic_field_data[:embargo_release_date]).to eq(expected_value)
end
end
end

########################################## #encode_genre
describe '#encode_genre' do
context 'given @genre_uri and @genre_value set to a given value' do
expected_value = [ { genre_term:
{ value: "articles",
uri: "http://vocab.getty.edu/aat/300048715"
}
}
]
it 'constructs correct encoded format' do
subject.genre_uri = 'http://vocab.getty.edu/aat/300048715'
subject.genre_value = 'articles'
subject.encode_genre
expect(subject.dynamic_field_data[:genre]).to eq(expected_value)
end
end
end

########################################## #encode_language
describe '#encode_language' do
context 'given @language_uri and @language_value set to a given value' do
expected_value = [ { language_term:
{ value: "English",
uri: "http://id.loc.gov/vocabulary/iso639-2/eng"
}
}
]
it 'constructs correct encoded format' do
subject.language_uri = 'http://id.loc.gov/vocabulary/iso639-2/eng'
subject.language_value = 'English'
subject.encode_language
expect(subject.dynamic_field_data[:language]).to eq(expected_value)
end
end
end

########################################## #encode_license
describe '#encode_license' do
context 'given @license_uri sset to a given value' do
expected_value = [ { license_term:
{ uri: "https://creativecommons.org/licenses/by/4.0/"
}
}
]
it 'constructs correct encoded format' do
subject.license_uri = 'https://creativecommons.org/licenses/by/4.0/'
subject.encode_license
expect(subject.dynamic_field_data[:license]).to eq(expected_value)
end
end
end

########################################## #encode_names
describe '#encode_names' do
context 'given @personalnames and @corporate_names set to a given values' do
expected_value =
[
{
name_term:
{
value: 'Columbia University. Microbiology, Immunology and Infection',
name_type: "corporate"
},
name_role:
[
{
name_role_term:
{
uri: "http://id.loc.gov/vocabulary/relators/org",
value: "Originator"
}
}
]
},
{
name_term:
{
value: "Smith, John Howard",
name_type: "personal"
},
name_role:
[
{
name_role_term:
{
uri: "http://id.loc.gov/vocabulary/relators/aut",
value: "Author"
}
}
]
},
{
name_term:
{
value: "Smithy, Johny Howardy",
name_type: "personal"
},
name_role:
[
{
name_role_term:
{
uri: "http://id.loc.gov/vocabulary/relators/ths",
value: "Thesis advisor"
}
}
]
}
]

it 'constructs correct encoded format' do
corporate_name = Sword::Metadata::CorporateName.new
corporate_name.name = 'Columbia University. Microbiology, Immunology and Infection'
corporate_name.role = 'originator'
subject.corporate_names << corporate_name

first_personal_name = Sword::Metadata::PersonalName.new
first_personal_name.full_name_naf_format = 'Smith, John Howard'
first_personal_name.role = 'author'
subject.personal_names << first_personal_name

second_personal_name = Sword::Metadata::PersonalName.new
second_personal_name.full_name_naf_format = 'Smithy, Johny Howardy'
second_personal_name.role = 'thesis_advisor'
subject.personal_names << second_personal_name

subject.encode_names
expect(subject.dynamic_field_data[:name]).to eq(expected_value)
end
end
end
end
end

0 comments on commit a5ac9a1

Please sign in to comment.