Skip to content

Commit 971e304

Browse files
authored
Prevent setting of extent subfield for datastreams if width or height is missing from FITS (#1342)
1 parent 30ed571 commit 971e304

File tree

3 files changed

+121
-3
lines changed

3 files changed

+121
-3
lines changed

indexing-solr/src/main/java/edu/unc/lib/boxc/indexing/solr/filter/SetDatastreamFilter.java

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import java.util.Optional;
2929
import java.util.stream.Collectors;
3030

31+
import org.apache.commons.lang3.StringUtils;
3132
import org.apache.jena.rdf.model.Resource;
3233
import org.apache.jena.rdf.model.Statement;
3334
import org.jdom2.Document;
@@ -140,7 +141,16 @@ private String getExtent(List<BinaryObject> binList) {
140141
if (imgMd != null) {
141142
String imgHeight = imgMd.getChildTextTrim("imageHeight", FITS_NS);
142143
String imgWidth = imgMd.getChildTextTrim("imageWidth", FITS_NS);
143-
extent = imgHeight + "x" + imgWidth;
144+
if (!StringUtils.isBlank(imgHeight) && !StringUtils.isBlank(imgWidth)) {
145+
try {
146+
Integer.parseInt(imgHeight);
147+
Integer.parseInt(imgWidth);
148+
extent = imgHeight + "x" + imgWidth;
149+
} catch (NumberFormatException e) {
150+
log.warn("Invalid image width or height from FITS {}: {} x {}",
151+
fits.getPid().getQualifiedId(), imgWidth, imgHeight);
152+
}
153+
}
144154
}
145155
}
146156
return extent;

indexing-solr/src/test/java/edu/unc/lib/boxc/indexing/solr/filter/SetDatastreamFilterTest.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,32 @@ public void fileObjectImageBinaryTest() throws Exception {
223223
assertEquals(FILE_SIZE + FILE2_SIZE + (FILE3_SIZE * 2), (long) idb.getFilesizeTotal());
224224
}
225225

226+
@Test
227+
public void fileObjectImageBinaryNoDimensionsTest() throws Exception {
228+
when(binObj.getResource()).thenReturn(
229+
fileResource(ORIGINAL_FILE.getId(), FILE_SIZE, FILE3_MIMETYPE, "test.png", FILE_DIGEST));
230+
231+
BinaryObject binObj2 = mock(BinaryObject.class);
232+
when(binObj2.getPid()).thenReturn(DatastreamPids.getTechnicalMetadataPid(pid));
233+
when(binObj2.getResource()).thenReturn(
234+
fileResource(TECHNICAL_METADATA.getId(), FILE2_SIZE, FILE2_MIMETYPE, FILE2_NAME, FILE2_DIGEST));
235+
when(binObj2.getBinaryStream()).thenReturn(getClass().getResourceAsStream("/datastream/techmdImageNoDimensions.xml"));
236+
237+
when(fileObj.getBinaryObjects()).thenReturn(Arrays.asList(binObj, binObj2));
238+
dip.setContentObject(fileObj);
239+
240+
filter.filter(dip);
241+
242+
assertContainsDatastream(idb.getDatastream(), ORIGINAL_FILE.getId(),
243+
FILE_SIZE, FILE3_MIMETYPE, "test.png", FILE_DIGEST, null, null);
244+
assertContainsDatastream(idb.getDatastream(), TECHNICAL_METADATA.getId(),
245+
FILE2_SIZE, FILE2_MIMETYPE, FILE2_NAME, FILE2_DIGEST, null, null);
246+
247+
assertEquals(FILE_SIZE, (long) idb.getFilesizeSort());
248+
// JP2 and thumbnail set to same size
249+
assertEquals(FILE_SIZE + FILE2_SIZE, (long) idb.getFilesizeTotal());
250+
}
251+
226252
@Test(expected = IndexingException.class)
227253
public void fileObjectNoOriginalTest() throws Exception {
228254
when(binObj.getResource()).thenReturn(
@@ -358,8 +384,6 @@ private void assertContainsDatastream(List<String> values, String name, long fil
358384
String joined = components.stream()
359385
.map(c -> c == null ? "" : c.toString())
360386
.collect(Collectors.joining("|"));
361-
System.out.println("Values: " + values);
362-
System.out.println("Seeking: " + joined);
363387
assertTrue("Did not contain datastream " + name, values.contains(joined));
364388
}
365389

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<premis3:premis xmlns:premis3="http://www.loc.gov/premis/v3">
3+
<premis3:object xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="premis3:file">
4+
<premis3:objectCharacteristics>
5+
<premis3:compositionLevel>0</premis3:compositionLevel>
6+
<premis3:format>
7+
<premis3:formatDesignation>
8+
<premis3:formatName>JPEG EXIF</premis3:formatName>
9+
</premis3:formatDesignation>
10+
</premis3:format>
11+
<premis3:size>66500</premis3:size>
12+
<premis3:objectCharacteristicsExtension>
13+
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="1.6.4-wisc-SNAPSHOT" timestamp="2/4/22, 11:11 AM">
14+
<identification status="SINGLE_RESULT">
15+
<identity format="JPEG EXIF" mimetype="image/jpeg" toolname="FITS" toolversion="1.6.4-wisc-SNAPSHOT">
16+
<tool toolname="NLNZ Metadata Extractor" toolversion="3.6GA" />
17+
<version toolname="NLNZ Metadata Extractor" toolversion="3.6GA">1.1</version>
18+
</identity>
19+
</identification>
20+
<fileinfo>
21+
<lastmodified toolname="Exiftool" toolversion="12.29" status="CONFLICT">2012:05:08 14:09:05</lastmodified>
22+
<lastmodified toolname="Tika" toolversion="2.0.0" status="CONFLICT">2012-05-08T10:09:05</lastmodified>
23+
<created toolname="NLNZ Metadata Extractor" toolversion="3.6GA" status="CONFLICT">2012:05:08 14:09:05</created>
24+
<created toolname="Tika" toolversion="2.0.0" status="CONFLICT">2012-05-08T10:09:05</created>
25+
<filepath toolname="OIS File Information" toolversion="1.0" status="SINGLE_RESULT">/opt/data/test_staging/pyr_dir/therapy-dog-pyr1.jpg</filepath>
26+
<filename toolname="OIS File Information" toolversion="1.0" status="SINGLE_RESULT">therapy-dog-pyr1.jpg</filename>
27+
<size toolname="OIS File Information" toolversion="1.0">66500</size>
28+
<md5checksum toolname="OIS File Information" toolversion="1.0" status="SINGLE_RESULT">c55102ab3c25319a1ff13730532e52a0</md5checksum>
29+
<fslastmodified toolname="OIS File Information" toolversion="1.0" status="SINGLE_RESULT">1643991034440</fslastmodified>
30+
</fileinfo>
31+
<filestatus />
32+
<metadata>
33+
<image>
34+
<lightSource toolname="NLNZ Metadata Extractor" toolversion="3.6GA" status="SINGLE_RESULT">unknown</lightSource>
35+
<standard>
36+
<mix:mix xmlns:mix="http://www.loc.gov/mix/v20">
37+
<mix:BasicDigitalObjectInformation />
38+
<mix:BasicImageInformation>
39+
<mix:BasicImageCharacteristics>
40+
<mix:PhotometricInterpretation />
41+
</mix:BasicImageCharacteristics>
42+
</mix:BasicImageInformation>
43+
<mix:ImageCaptureMetadata>
44+
<mix:GeneralCaptureInformation>
45+
<mix:dateTimeCreated>2012-05-08T14:09:05</mix:dateTimeCreated>
46+
</mix:GeneralCaptureInformation>
47+
<mix:DigitalCameraCapture>
48+
<mix:DigitalCameraModel />
49+
<mix:CameraCaptureSettings>
50+
<mix:ImageData>
51+
<mix:lightSource>unknown</mix:lightSource>
52+
</mix:ImageData>
53+
</mix:CameraCaptureSettings>
54+
</mix:DigitalCameraCapture>
55+
</mix:ImageCaptureMetadata>
56+
<mix:ImageAssessmentMetadata>
57+
<mix:SpatialMetrics />
58+
<mix:ImageColorEncoding />
59+
</mix:ImageAssessmentMetadata>
60+
</mix:mix>
61+
</standard>
62+
</image>
63+
</metadata>
64+
<statistics fitsExecutionTime="296">
65+
<tool toolname="MediaInfo" toolversion="21.03" status="did not run" />
66+
<tool toolname="OIS Audio Information" toolversion="0.1" status="did not run" />
67+
<tool toolname="ADL Tool" toolversion="0.1" status="did not run" />
68+
<tool toolname="VTT Tool" toolversion="0.1" status="did not run" />
69+
<tool toolname="Droid" toolversion="[could not launch tool]" status="failed" />
70+
<tool toolname="Exiftool" toolversion="12.29" executionTime="114" />
71+
<tool toolname="NLNZ Metadata Extractor" toolversion="3.6GA" executionTime="294" />
72+
<tool toolname="OIS File Information" toolversion="1.0" executionTime="1" />
73+
<tool toolname="OIS XML Metadata" toolversion="0.2" status="did not run" />
74+
<tool toolname="Tika" toolversion="2.0.0" executionTime="5" />
75+
</statistics>
76+
</fits>
77+
</premis3:objectCharacteristicsExtension>
78+
</premis3:objectCharacteristics>
79+
<premis3:objectIdentifier>
80+
<premis3:objectIdentifierType>Fedora Datastream PID</premis3:objectIdentifierType>
81+
<premis3:objectIdentifierValue>http://dcr-test-bes.libint.unc.edu:8181/fcrepo/rest/content/38/a6/42/65/38a64265-9bfd-48f2-89f0-6507a33d7df3</premis3:objectIdentifierValue>
82+
</premis3:objectIdentifier>
83+
</premis3:object>
84+
</premis3:premis>

0 commit comments

Comments
 (0)