Skip to content

Commit d892918

Browse files
committed
data file validator: allow data to be passed (needed by hepdata-converter)
Signed-off-by: Graeme Watt <[email protected]>
1 parent 464862e commit d892918

File tree

6 files changed

+72
-50
lines changed

6 files changed

+72
-50
lines changed

README.rst

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,10 @@ To validate files, you need to instantiate a validator (I love OO).
6464
# if there are any error messages, they are retrievable through this call
6565
submission_file_validator.get_messages()
6666
67-
67+
# the error messages can be printed
68+
submission_file_validator.print_errors(submission_file_path)
69+
70+
6871
Data file validation is exactly the same.
6972

7073
.. code:: python
@@ -79,6 +82,9 @@ Data file validation is exactly the same.
7982
# if there are any error messages, they are retrievable through this call
8083
data_file_validator.get_messages()
8184
85+
# the error messages can be printed
86+
data_file_validator.print_errors('data.yaml')
87+
8288
8389
Optionally, if you have already loaded the YAML object, then you can pass it through
8490
as a data object. You must also pass through the file_path since this is used as a key

hepdata_validator/data_file_validator.py

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -66,41 +66,62 @@ def load_custom_schema(self, type, schema_file_path=None):
6666
raise UnsupportedDataSchemaException(
6767
message="There is no schema defined for the '{0}' data type.".format(type))
6868

69-
def validate(self, file_path):
70-
try:
71-
default_data_schema = json.load(
72-
open(self.default_schema_file, 'r'))
69+
def validate(self, **kwargs):
70+
"""
71+
Validates a data file
72+
73+
:param file_path: path to file to be loaded.
74+
:param data: pre loaded YAML object (optional).
75+
:return: Bool to indicate the validity of the file.
76+
"""
77+
78+
default_data_schema = json.load(open(self.default_schema_file, 'r'))
79+
80+
# even though we are using the yaml package to load,
81+
# it supports JSON and YAML
82+
data = kwargs.pop("data", None)
83+
file_path = kwargs.pop("file_path", None)
84+
85+
if file_path is None:
86+
raise LookupError("file_path argument must be supplied")
87+
88+
if data is None:
7389

7490
try:
75-
data = yaml.load_all(open(file_path, 'r'), Loader=yaml.CLoader)
91+
# We try to load using the CLoader for speed improvements.
92+
try:
93+
data = yaml.load(open(file_path, 'r'), Loader=yaml.CLoader)
94+
except ScannerError as se:
95+
self.add_validation_message(ValidationMessage(file=file_path, message=
96+
'There was a problem parsing the file.\n' + str(se)))
97+
return False
7698
except: #pragma: no cover
77-
data = yaml.load_all(open(file_path, 'r')) #pragma: no cover
99+
try: # pragma: no cover
100+
data = yaml.load(open(file_path, 'r')) # pragma: no cover
101+
except ScannerError as se: # pragma: no cover
102+
self.add_validation_message(
103+
ValidationMessage(file=file_path, message=
104+
'There was a problem parsing the file.\n' + str(se))) # pragma: no cover
105+
return False
78106

79-
for data_item in data:
80-
if data_item is None:
81-
continue
82-
try:
83-
if 'type' in data_item:
84-
custom_schema = self.load_custom_schema(data_item['type'])
85-
json_validate(data_item, custom_schema)
86-
else:
87-
json_validate(data_item, default_data_schema)
107+
try:
88108

89-
except ValidationError as ve:
90-
self.add_validation_message(
91-
ValidationMessage(file=file_path,
92-
message=ve.message + ' in ' + str(ve.instance)))
93-
if self.has_errors(file_path):
94-
return False
109+
if 'type' in data:
110+
custom_schema = self.load_custom_schema(data['type'])
111+
json_validate(data, custom_schema)
95112
else:
96-
return True
97-
except ScannerError as se:
113+
json_validate(data, default_data_schema)
114+
115+
except ValidationError as ve:
116+
98117
self.add_validation_message(
99118
ValidationMessage(file=file_path,
100-
message='There was a problem parsing the file. '
101-
'This can be because you forgot spaces '
102-
'after colons in your YAML file for instance.\n{0}'.format(se.__repr__()))
103-
)
119+
message=ve.message + ' in ' + str(ve.instance)))
120+
121+
if self.has_errors(file_path):
122+
return False
123+
else:
124+
return True
104125

105126

106127
class UnsupportedDataSchemaException(Exception):

hepdata_validator/submission_file_validator.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,21 +14,20 @@ class SubmissionFileValidator(Validator):
1414
"""
1515
base_path = os.path.dirname(__file__)
1616
default_schema_file = base_path + '/schemas/submission_schema.json'
17-
additonal_info_schema = base_path + '/schemas/additional_info_schema.json'
17+
additional_info_schema = base_path + '/schemas/additional_info_schema.json'
1818

1919
def validate(self, **kwargs):
2020
"""
2121
Validates a submission file
22+
2223
:param file_path: path to file to be loaded.
2324
:param data: pre loaded YAML object (optional).
2425
:return: Bool to indicate the validity of the file.
2526
"""
2627
try:
27-
submission_file_schema = json.load(
28-
open(self.default_schema_file, 'r'))
28+
submission_file_schema = json.load(open(self.default_schema_file, 'r'))
2929

30-
additional_file_section_schema = json.load(
31-
open(self.additonal_info_schema, 'r'))
30+
additional_file_section_schema = json.load(open(self.additional_info_schema, 'r'))
3231

3332
# even though we are using the yaml package to load,
3433
# it supports JSON and YAML
@@ -57,18 +56,18 @@ def validate(self, **kwargs):
5756
except ValidationError as ve:
5857
self.add_validation_message(
5958
ValidationMessage(file=file_path,
60-
message=ve.message + ' in ' + str(ve.instance)))
59+
message=ve.message + ' in ' + str(ve.instance)))
6160

6261
if self.has_errors(file_path):
6362
return False
6463
else:
6564
return True
66-
except ScannerError as se:
67-
self.add_validation_message(
68-
ValidationMessage(file=file_path,
69-
message='There was a problem parsing the file. '
70-
'This can be because you forgot spaces '
71-
'after colons in your YAML file for instance. '
72-
'Diagnostic information follows.\n' + str(se))
73-
)
74-
return False
65+
66+
except ScannerError as se: # pragma: no cover
67+
self.add_validation_message( # pragma: no cover
68+
ValidationMessage(file=file_path, message=
69+
'There was a problem parsing the file. '
70+
'This can be because you forgot spaces '
71+
'after colons in your YAML file for instance. '
72+
'Diagnostic information follows.\n' + str(se)))
73+
return False

hepdata_validator/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,4 @@
2727

2828
from __future__ import absolute_import, print_function
2929

30-
__version__ = "0.1.13"
30+
__version__ = "0.1.14"

testsuite/test_data/invalid_data_file.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,4 @@ type: "different"
33
some_variables:
44
- header:{name: SQRT(S), units: GEV}
55
values:
6-
- value: 7000
7-
8-
---
6+
- value: 7000

testsuite/test_data/valid_file_custom.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,4 @@ type: "different"
33
some_variables:
44
- header: {name: SQRT(S), units: GEV}
55
values:
6-
- value: 7000
7-
8-
---
6+
- value: 7000

0 commit comments

Comments
 (0)