Skip to content

Commit d5223a6

Browse files
authored
Merge pull request #44 from riga/fix/extended_attribute_files
Add check for extended attribute files in validator.
2 parents 7ccfa8d + bb36229 commit d5223a6

File tree

8 files changed

+122
-7
lines changed

8 files changed

+122
-7
lines changed

hepdata_validator/full_submission_validator.py

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -205,27 +205,48 @@ def validate(self, directory=None, file=None, archive=None):
205205

206206
# Check all files in directory are in included_files
207207
if not self.single_yaml_file and self.schema_version >= packaging_version.parse("1.1.0"):
208+
# helper to check if a provided file is not meant to describe HEP data, but rather
209+
# represents "extended attributes" (e.g.) as a result of BSD tar (default on MacOS)
210+
# which creates these extra files when archiving files with extended attributes on
211+
# HSF+ volumes (denoted by "@" in permission bits)
212+
def is_ext_attr_file(f):
213+
# three conditions must be fulfilled
214+
# 1. the file must not be referenced in the submission (already checked below)
215+
# 2. the file name must have the format "._<actual_file>"
216+
prefix = "._"
217+
if not f.startswith(prefix):
218+
return False
219+
# 3. a file named "<actual_file>" must exist in the same directory
220+
if not os.path.isfile(os.path.join(self.directory, f[len(prefix):])):
221+
return False
222+
return True
223+
208224
for f in os.listdir(self.directory):
209225
file_path = os.path.join(self.directory, f)
210226
if file_path not in self.included_files:
211227
self._add_validation_message(
212-
file=file_path, message='%s is not referenced in the submission.' % f
228+
file=file_path, message=f'{f} is not referenced in the submission.'
213229
)
230+
if is_ext_attr_file(f):
231+
self._add_validation_message(
232+
file=file_path, message=f'{f} might be a file created by tar on MacOS. Set COPYFILE_DISABLE=1 before creating the archive.',
233+
level='hint'
234+
)
214235

215236
return len(self.messages) == 0
216237
finally:
217238
if self.temp_directory:
218239
# Delete temporary Directory
219240
shutil.rmtree(self.temp_directory)
220241

221-
def _add_validation_message(self, file, message):
242+
def _add_validation_message(self, file, message, **kwargs):
222243
if self.temp_directory:
223244
# Remove temp directory from filename and message
224245
file = self._remove_temp_directory(file)
225246
message = self._remove_temp_directory(message)
226247

227248
self.add_validation_message(ValidationMessage(
228-
file=file, message=message
249+
file=file, message=message, **kwargs
229250
))
230251

231252
def _remove_temp_directory(self, s):
67 Bytes
Binary file not shown.

testsuite/test_data/TestHEPSubmission_invalid/._data10.yaml

Whitespace-only changes.

testsuite/test_data/TestHEPSubmission_invalid/._data11.yaml

Whitespace-only changes.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
independent_variables:
2+
- header: {name: M(ZZ), units: GEV}
3+
values:
4+
- {low: 0, high: 240}
5+
- {low: 240, high: 300}
6+
- {low: 300, high: 400}
7+
- {low: 400, high: 800}
8+
dependent_variables:
9+
- header: {name: 10**6 * 1/SIG(fiducial) * D(SIG(fiducial))/DM(ZZ)}
10+
qualifiers:
11+
- {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > Z0 < LEPTON+ LEPTON- > X}
12+
- {name: SQRT(S), units: GeV, value: 7000}
13+
values:
14+
- value: 2200
15+
errors:
16+
- {label: stat, symerror: 300}
17+
- {label: 'sys,detector', symerror: 40}
18+
- {label: 'sys,background', symerror: 2}
19+
- value: 4500
20+
errors:
21+
- {label: stat, symerror: 1000}
22+
- {label: 'sys,detector', symerror: 100}
23+
- {label: 'sys,background', symerror: 5}
24+
- value: 1000
25+
errors:
26+
- {label: stat, symerror: 400}
27+
- {label: 'sys,detector', symerror: 20}
28+
- {label: 'sys,background', symerror: 2}
29+
- value: 280
30+
errors:
31+
- {label: stat, symerror: 100}
32+
- {label: 'sys,detector', symerror: 10}
33+
- {label: 'sys,background', symerror: 1}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
independent_variables:
2+
- header: {name: M(ZZ), units: GEV}
3+
values:
4+
- {low: 0, high: 240}
5+
- {low: 240, high: 300}
6+
- {low: 300, high: 400}
7+
- {low: 400, high: 800}
8+
dependent_variables:
9+
- header: {name: 10**6 * 1/SIG(fiducial) * D(SIG(fiducial))/DM(ZZ)}
10+
qualifiers:
11+
- {name: RE, value: P P --> Z0 < LEPTON+ LEPTON- > Z0 < LEPTON+ LEPTON- > X}
12+
- {name: SQRT(S), units: GeV, value: 7000}
13+
values:
14+
- value: 2200
15+
errors:
16+
- {label: stat, symerror: 300}
17+
- {label: 'sys,detector', symerror: 40}
18+
- {label: 'sys,background', symerror: 2}
19+
- value: 4500
20+
errors:
21+
- {label: stat, symerror: 1000}
22+
- {label: 'sys,detector', symerror: 100}
23+
- {label: 'sys,background', symerror: 5}
24+
- value: 1000
25+
errors:
26+
- {label: stat, symerror: 400}
27+
- {label: 'sys,detector', symerror: 20}
28+
- {label: 'sys,background', symerror: 2}
29+
- value: 280
30+
errors:
31+
- {label: stat, symerror: 100}
32+
- {label: 'sys,detector', symerror: 10}
33+
- {label: 'sys,background', symerror: 1}

testsuite/test_data/TestHEPSubmission_invalid/submission.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,4 +152,18 @@ data_file: data8.yaml
152152
additional_resources:
153153
- {description: Image file, location: figFigure10B.png}
154154
- {description: Thumbnail image file, location: thumb_figFigure10B.png}
155+
---
156+
# This is Table 9.
157+
name: "Table 9"
158+
location: Data from Figure 10B
159+
description: Table with a weird but accepted data_file name
160+
keywords: []
161+
data_file: ._data9.yaml
162+
---
163+
# This is Table 10.
164+
name: "Table 10"
165+
location: Data from Figure 10B
166+
description: Table whose data_file is accompanied by a file representing extended arguments which should lead to a hint.
167+
keywords: []
168+
data_file: data10.yaml
155169
# End of YAML file.

testsuite/test_full_submission_validator.py

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def test_invalid_data_directory(validator_v1, data_path, capsys):
247247
is_valid = validator_v1.validate(directory=dir)
248248
assert not is_valid
249249
expected_valid_files = [os.path.join(dir, f) for f in [
250-
'data1.yaml', 'data4.yaml', 'data5.yaml', 'data6.yaml', 'data7.yaml'
250+
'data1.yaml', 'data4.yaml', 'data5.yaml', 'data6.yaml', 'data7.yaml', '._data9.yaml', 'data10.yaml'
251251
]]
252252
assert validator_v1.valid_files == {SchemaType.DATA: expected_valid_files}
253253
assert validator_v1.has_errors
@@ -257,7 +257,9 @@ def test_invalid_data_directory(validator_v1, data_path, capsys):
257257
os.path.join(dir, 'submission.yaml'),
258258
os.path.join(dir, 'data3.yaml'),
259259
os.path.join(dir, 'data8.yaml'),
260-
os.path.join(dir, 'figFigure8B.png')
260+
os.path.join(dir, 'figFigure8B.png'),
261+
os.path.join(dir, '._data10.yaml'),
262+
os.path.join(dir, '._data11.yaml')
261263
]
262264
assert set(errors.keys()) == set(expected_file_names)
263265
assert errors[expected_file_names[0]][0].message == "Name of data_file 'mydirectory/data2.yaml' should not contain '/'."
@@ -270,6 +272,11 @@ def test_invalid_data_directory(validator_v1, data_path, capsys):
270272
did not find expected key
271273
in "{dir}/data8.yaml", line 9, column 3"""
272274
assert errors[expected_file_names[3]][0].message == f"figFigure8B.png is not referenced in the submission."
275+
assert len(errors[expected_file_names[4]]) == 2
276+
assert errors[expected_file_names[4]][0].message == f"._data10.yaml is not referenced in the submission."
277+
assert errors[expected_file_names[4]][1].message == f"._data10.yaml might be a file created by tar on MacOS. Set COPYFILE_DISABLE=1 before creating the archive."
278+
assert errors[expected_file_names[4]][1].level == 'hint'
279+
assert errors[expected_file_names[5]][0].message == f"._data11.yaml is not referenced in the submission."
273280

274281

275282
def test_invalid_archive(validator_v1, data_path):#, capsys):
@@ -278,7 +285,7 @@ def test_invalid_archive(validator_v1, data_path):#, capsys):
278285
is_valid = validator_v1.validate(archive=archive)
279286
assert not is_valid
280287
expected_valid_files = [os.path.join(dir, f) for f in [
281-
'data1.yaml', 'data4.yaml', 'data5.yaml', 'data6.yaml', 'data7.yaml'
288+
'data1.yaml', 'data4.yaml', 'data5.yaml', 'data6.yaml', 'data7.yaml', '._data9.yaml', 'data10.yaml'
282289
]]
283290
assert validator_v1.valid_files == {SchemaType.DATA: expected_valid_files}
284291
assert validator_v1.has_errors
@@ -288,7 +295,9 @@ def test_invalid_archive(validator_v1, data_path):#, capsys):
288295
os.path.join(dir, 'submission.yaml'),
289296
os.path.join(dir, 'data3.yaml'),
290297
os.path.join(dir, 'data8.yaml'),
291-
os.path.join(dir, 'figFigure8B.png')
298+
os.path.join(dir, 'figFigure8B.png'),
299+
os.path.join(dir, '._data10.yaml'),
300+
os.path.join(dir, '._data11.yaml')
292301
]
293302
assert set(errors.keys()) == set(expected_file_names)
294303
assert errors[expected_file_names[0]][0].message == "Name of data_file 'mydirectory/data2.yaml' should not contain '/'."
@@ -301,6 +310,11 @@ def test_invalid_archive(validator_v1, data_path):#, capsys):
301310
did not find expected key
302311
in "{dir}/data8.yaml", line 9, column 3"""
303312
assert errors[expected_file_names[3]][0].message == f"figFigure8B.png is not referenced in the submission."
313+
assert len(errors[expected_file_names[4]]) == 2
314+
assert errors[expected_file_names[4]][0].message == f"._data10.yaml is not referenced in the submission."
315+
assert errors[expected_file_names[4]][1].message == f"._data10.yaml might be a file created by tar on MacOS. Set COPYFILE_DISABLE=1 before creating the archive."
316+
assert errors[expected_file_names[4]][1].level == 'hint'
317+
assert errors[expected_file_names[5]][0].message == f"._data11.yaml is not referenced in the submission."
304318

305319

306320
def test_invalid_syntax_submission(validator_v1, data_path, capsys):

0 commit comments

Comments
 (0)