Skip to content

Commit ef8f1d7

Browse files
besssourcefilter
authored andcommitted
Almost working: Parallelized import
Any error associated with an individual row is now attached to an individual background job
1 parent 0df9930 commit ef8f1d7

10 files changed

+92
-67
lines changed

.rubocop.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ AllCops:
1010

1111
Metrics/AbcSize:
1212
Enabled: true
13+
Max: 30
1314
Exclude:
1415
- 'app/importers/californica_csv_cleaner.rb'
1516
- app/jobs/hyrax/characterize_job.rb
@@ -57,6 +58,7 @@ Metrics/LineLength:
5758

5859
Metrics/MethodLength:
5960
Enabled: true
61+
Max: 20
6062
Exclude:
6163
- 'app/importers/californica_csv_parser.rb'
6264
- app/importers/actor_record_importer.rb

app/importers/actor_record_importer.rb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ def create_for(record:)
7171
created = import_type(object_type).new
7272
created.apply_depositor_metadata(@depositor.user_key)
7373
attrs = record.attributes.merge(additional_attrs)
74-
74+
7575
attrs = attrs.merge(member_of_collections_attributes: { '0' => { id: collection_id } }) if collection_id
7676

7777
# Ensure nothing is passed in the files field,
@@ -90,12 +90,15 @@ def create_for(record:)
9090

9191
if middleware.create(actor_env)
9292
info_stream << "event: record_created, batch_id: #{batch_id}, record_id: #{created.id}, collection_id: #{collection_id}, record_title: #{attrs[:title]&.first}"
93-
@success_count += 1
9493
else
94+
error_messages = []
9595
created.errors.each do |attr, msg|
9696
error_stream << "event: validation_failed, batch_id: #{batch_id}, collection_id: #{collection_id}, attribute: #{attr.capitalize}, message: #{msg}, record_title: record_title: #{attrs[:title] ? attrs[:title] : attrs}"
97+
error_messages << msg
9798
end
98-
@failure_count += 1
99+
# Errors raised here should be rescued in the CsvRowImportJob and the
100+
# message should be recorded on the CsvRow object for reporting in the UI
101+
raise "Validation failed: #{error_messages.join(', ')}"
99102
end
100103
end
101104
end

app/importers/record_importer.rb

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ class RecordImporter < Darlingtonia::HyraxRecordImporter
66
attr_reader :actor_record_importer
77
attr_reader :collection_record_importer
88

9-
109
def initialize(error_stream:, info_stream:, attributes: {})
1110
@actor_record_importer = ::ActorRecordImporter.new(error_stream: error_stream, info_stream: info_stream, attributes: attributes)
1211
@collection_record_importer = ::CollectionRecordImporter.new(error_stream: error_stream, info_stream: info_stream, attributes: attributes)
@@ -16,11 +15,11 @@ def initialize(error_stream:, info_stream:, attributes: {})
1615

1716
def import(record:)
1817
csv_row = CsvRow.create(
19-
metadata: record.mapper.metadata.to_json,
20-
row_number: record.mapper.row_number,
21-
csv_import_id: batch_id
22-
)
23-
CsvRowImportJob.perform_later(row_id: csv_row.id )
18+
metadata: record.mapper.metadata.to_json,
19+
row_number: record.mapper.row_number,
20+
csv_import_id: batch_id
21+
)
22+
CsvRowImportJob.perform_later(row_id: csv_row.id)
2423
end
2524

2625
def success_count

app/jobs/csv_row_import_job.rb

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,22 @@ def perform(row_id:)
55
metadata = JSON.parse(row.metadata)
66
csv_import = CsvImport.find(row.csv_import_id)
77
import_file_path = csv_import.import_file_path
8-
record = Darlingtonia::InputRecord.from(metadata: metadata, mapper: CalifornicaMapper.new(import_file_path: import_file_path ))
8+
record = Darlingtonia::InputRecord.from(metadata: metadata, mapper: CalifornicaMapper.new(import_file_path: import_file_path))
99

1010
collection_record_importer = ::CollectionRecordImporter.new(attributes: metadata)
1111
actor_record_importer = ::ActorRecordImporter.new(attributes: metadata)
1212
selected_importer = if record.mapper.collection?
13-
collection_record_importer
14-
else
15-
actor_record_importer
16-
end
13+
collection_record_importer
14+
else
15+
actor_record_importer
16+
end
1717
selected_importer.import(record: record)
1818

1919
row.status = 'complete'
2020
row.save
21+
rescue => e
22+
row.status = 'error'
23+
row.error_messages = e.message
24+
row.save
2125
end
2226
end

spec/factories/csv_imports.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# frozen_string_literal: true
22
FactoryBot.define do
33
factory :csv_import do
4-
user { nil }
4+
user { FactoryBot.create(:user) }
55
manifest { Rack::Test::UploadedFile.new(Rails.root.join('spec', 'fixtures', 'csv_import', 'import_manifest.csv'), 'text/csv') }
66
import_file_path { Rails.root.join('spec', 'fixtures') }
77
end

spec/factories/csv_rows.rb

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,41 @@
77
status { "complete" }
88
error_messages { "here is your error" }
99
metadata do
10-
{ "Project Name" => "Ethiopic Manuscripts", "Item ARK" => "21198/zz001pz6jq", "Parent ARK" => "21198/zz001pz6h6", "Object Type" => "Page", "File Name" => "mss_page2.png", "Item Sequence" => "2", "Type.typeOfResource" => "text", "Type.genre" => nil, "Rights.copyrightStatus" => "pd", "Rights.servicesContact" => nil, "Language" => "gez", "Name.repository" => nil, "AltIdentifier.local" => "Ms. 50_#r or #v?", "Title" => "Image 2", "AltTitle.other" => "መርበብተ ሰሎሞን|~|ጸሎተ ሱስንዮስ", "AltTitle.translated" => "Prayer of Susneyos|~|Prayer of King Solomon for Catching of Demons", "AltTitle.uniform" => "Magical prayers", "Place of origin" => "Ethiopia", "Date.creation" => "18th c.",
11-
"Date.normalized" => "1700/1799", "Format.extent" => "1 image", "Format.dimensions" => nil, "Support" => "Vellum", "Relation.rectoVerso" => nil, "Description.note" => nil, "Description.history" => nil, "Description.condition" => nil, "Coverage.temporal" => nil, "Description.collation" => nil, "Description.binding" => nil, "Description.tableOfContents" => nil, "Description.contents" => nil, "Description.abstract" => nil }.to_json
10+
{
11+
"Project Name" => "Ethiopic Manuscripts",
12+
"Item ARK" => "21198/zz001pz6jq",
13+
"Parent ARK" => "21198/zz001pz6h6",
14+
"Object Type" => "Page",
15+
"File Name" => "mss_page2.png",
16+
"Item Sequence" => "2",
17+
"Type.typeOfResource" => "text",
18+
"Type.genre" => nil,
19+
"Rights.copyrightStatus" => "pd",
20+
"Rights.servicesContact" => nil,
21+
"Language" => "gez",
22+
"Name.repository" => nil,
23+
"AltIdentifier.local" => "Ms. 50_#r or #v?",
24+
"Title" => "Image 2",
25+
"AltTitle.other" => "መርበብተ ሰሎሞን|~|ጸሎተ ሱስንዮስ",
26+
"AltTitle.translated" => "Prayer of Susneyos|~|Prayer of King Solomon for Catching of Demons",
27+
"AltTitle.uniform" => "Magical prayers",
28+
"Place of origin" => "Ethiopia",
29+
"Date.creation" => "18th c.",
30+
"Date.normalized" => "1700/1799",
31+
"Format.extent" => "1 image",
32+
"Format.dimensions" => nil,
33+
"Support" => "Vellum",
34+
"Relation.rectoVerso" => nil,
35+
"Description.note" => nil,
36+
"Description.history" => nil,
37+
"Description.condition" => nil,
38+
"Coverage.temporal" => nil,
39+
"Description.collation" => nil,
40+
"Description.binding" => nil,
41+
"Description.tableOfContents" => nil,
42+
"Description.contents" => nil,
43+
"Description.abstract" => nil
44+
}.to_json
1245
end
1346
end
1447
end

spec/importers/actor_record_importer_spec.rb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,8 @@
5757
}
5858
end
5959

60-
it 'logs an error' do
61-
expect { importer.import(record: record) }
62-
.to change { error_stream.first }
63-
.to match(/^event: validation_failed/)
60+
it 'raises an error' do
61+
expect { importer.import(record: record) }.to raise_error(RuntimeError, /Validation failed/)
6462
end
6563
end
6664
end

spec/importers/californica_importer_spec.rb

Lines changed: 1 addition & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
require 'rails_helper'
44

5-
RSpec.describe CalifornicaImporter, :clean do
5+
RSpec.describe CalifornicaImporter, :clean, inline_jobs: true do
66
subject(:importer) { described_class.new(csv_import) }
77
let(:csv_import) { FactoryBot.create(:csv_import, user: user, manifest: manifest) }
88
let(:manifest) { Rack::Test::UploadedFile.new(Rails.root.join(csv_path), 'text/csv') }
@@ -32,10 +32,6 @@
3232
expect(Work.last.date_uploaded).not_to be_nil
3333
end
3434

35-
it 'enqueues local file attachment job' do
36-
expect { importer.import }.to have_enqueued_job(IngestLocalFileJob)
37-
end
38-
3935
it "has an ingest log" do
4036
expect(importer.ingest_log).to be_kind_of(Logger)
4137
end
@@ -49,11 +45,6 @@
4945
expect(File.readlines(importer.ingest_log_filename).each(&:chomp!).last).to match(/elapsed_time/)
5046
end
5147

52-
it "records the number of records ingested" do
53-
importer.import
54-
expect(File.read(importer.ingest_log_filename)).to match(/successful_record_count: 1/)
55-
end
56-
5748
context 'when the collection doesn\'t exist yet' do
5849
it 'creates a new collection with a modified ark as the id' do
5950
importer.import
@@ -144,33 +135,5 @@
144135
expect(File.readlines(ENV['MISSING_FILE_LOG']).each(&:chomp!).last).to match(/missing_file.tif/)
145136
end
146137
end
147-
148-
context 'when the records error' do
149-
let(:ldp_error) { Ldp::PreconditionFailed }
150-
let(:error_record) { Darlingtonia::InputRecord.from(metadata: metadata, mapper: CalifornicaMapper.new) }
151-
let(:metadata) do
152-
{
153-
'Title' => 'Comet in Moominland',
154-
'language' => 'English',
155-
'visibility' => 'open'
156-
}
157-
end
158-
before do
159-
allow(error_record).to receive(:attributes).and_raise(ldp_error)
160-
allow(importer.parser).to receive(:records).and_return([error_record])
161-
allow(importer.parser).to receive(:validate).and_return(true)
162-
end
163-
164-
it 'does not ingest the item' do
165-
expect { importer.import }.not_to change { Work.count }
166-
end
167-
168-
it 'logs the errors' do
169-
importer.import
170-
171-
expect(File.readlines(importer.error_log_filename).each(&:chomp!))
172-
.to include(/ERROR.+#{ldp_error}/)
173-
end
174-
end
175138
end
176139
end

spec/jobs/csv_row_import_job_spec.rb

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,35 @@
11
# frozen_string_literal: true
22
require 'rails_helper'
33

4-
RSpec.describe CsvRowImportJob do
5-
let(:csv_row) { FactoryBot.create(:csv_row, status: nil) }
4+
RSpec.describe CsvRowImportJob, :clean do
5+
let(:csv_import) { FactoryBot.create(:csv_import) }
66

7-
it 'can set a status' do
8-
described_class.perform_now(row_id: csv_row.id)
9-
csv_row.reload
10-
expect(csv_row.status).to eq('complete')
7+
context 'happy path' do
8+
let(:csv_row) { FactoryBot.create(:csv_row, status: nil, csv_import_id: csv_import.id) }
9+
10+
it 'can set a status' do
11+
described_class.perform_now(row_id: csv_row.id)
12+
csv_row.reload
13+
expect(csv_row.status).to eq('complete')
14+
end
15+
end
16+
17+
context 'error cases' do
18+
let(:metadata) do
19+
{
20+
"Project Name" => "Ethiopic Manuscripts",
21+
"Item ARK" => "",
22+
"Parent ARK" => "21198/zz001pz6h6",
23+
"Object Type" => "Page"
24+
}.to_json
25+
end
26+
let(:csv_row) { FactoryBot.create(:csv_row, status: nil, metadata: metadata, error_messages: nil, csv_import_id: csv_import.id) }
27+
28+
it 'records an error state and error messages' do
29+
described_class.perform_now(row_id: csv_row.id)
30+
csv_row.reload
31+
expect(csv_row.status).to eq('error')
32+
expect(csv_row.error_messages).to eq 'Cannot set id without a valid ark'
33+
end
1134
end
1235
end

spec/jobs/mss_csv_import_job_spec.rb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ def expected_objects_in_expected_order
3333
expect(work.member_of_collections.first.id).to eq "x3xg9000zz-89112"
3434

3535
# The work is attached to the Collection as expected
36-
#expect(collection.child_works.count).to eq 1
37-
#expect(collection.child_works.first.title.first).to eq "Ms. 50 Marbabeta Salomon, Ṣalota Susnyos"
36+
expect(collection.child_works.count).to eq 1
37+
expect(collection.child_works.first.title.first).to eq "Ms. 50 Marbabeta Salomon, Ṣalota Susnyos"
3838
# The Pages are attached to the Work as expected
3939
work = Work.last
4040
expect(work.members.size).to eq 3

0 commit comments

Comments
 (0)