Skip to content

Commit a52e621

Browse files
authored
Merge branch 'master' into data-file-size-limit
2 parents 04aaac8 + 73536c0 commit a52e621

File tree

9 files changed

+85
-66
lines changed

9 files changed

+85
-66
lines changed

.github/workflows/ci.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,23 @@ jobs:
2828
run: |
2929
pip install --upgrade pip setuptools wheel coveralls
3030
pip install --ignore-installed -e .[tests]
31-
- name: Run tests
31+
- name: Run tests without libyaml
3232
run: |
33-
pytest testsuite
33+
USE_LIBYAML=False pytest testsuite
34+
env:
35+
COVERAGE_FILE: '.coverage_nolibyaml'
36+
- name: Run tests with libyaml
37+
run: |
38+
USE_LIBYAML=True pytest testsuite
39+
env:
40+
COVERAGE_FILE: '.coverage_libyaml'
3441
- name: Run coveralls
3542
if: startsWith(matrix.python-version, '3.8')
3643
env:
3744
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3845
COVERALLS_SERVICE_NAME: github
3946
run: |
47+
coverage combine .coverage_libyaml .coverage_nolibyaml
4048
coveralls
4149
4250
deploy:

README.rst

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,32 @@ Install from `PyPI <https://pypi.org/project/hepdata-validator/>`_ using ``pip``
4949
$ pip install --user hepdata-validator
5050
$ hepdata-validate --help
5151
52-
Install from GitHub (for developers) in a `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_:
52+
If you would like to use LibYAML, you may need an additional step if running on an M1 Mac, to ensure pyyaml is built
53+
with the LibYAML bindings. Run the following after installing LibYAML via Homebrew:
54+
55+
.. code:: bash
56+
57+
$ LDFLAGS="-L$(brew --prefix)/lib" CFLAGS="-I$(brew --prefix)/include" pip install --global-option="--with-libyaml" --force pyyaml
58+
59+
60+
Developers
61+
==========
62+
Developers should install from GitHub in a `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_:
5363

5464
.. code:: bash
5565
5666
$ git clone https://github.com/HEPData/hepdata-validator
5767
$ cd hepdata-validator
5868
$ python3 -m venv ~/venv/hepdata-validator
5969
$ source ~/venv/hepdata-validator/bin/activate
60-
(hepdata-validator) $ pip install --upgrade -e .[tests]
61-
(hepdata-validator) $ pytest testsuite
70+
(hepdata-validator) $ pip install --upgrade -e ".[tests]"
71+
72+
Tests should be run both with and without LibYAML, as error messages from the different YAML parsers vary:
73+
74+
.. code:: bash
6275
76+
(hepdata-validator) $ USE_LIBYAML=True pytest testsuite
77+
(hepdata-validator) $ USE_LIBYAML=False pytest testsuite
6378
6479
Usage
6580
-----

hepdata_validator/__init__.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@
3232

3333
from .version import __version__
3434

35+
# We try to load using the CSafeLoader for speed improvements
36+
YamlLoader = None
37+
YamlDumper = None
38+
use_libyaml = os.environ.get('USE_LIBYAML', True)
39+
if use_libyaml and use_libyaml not in ('False', 'false', 'f', 'F'):
40+
try:
41+
from yaml import CSafeLoader as YamlLoader
42+
from yaml import CSafeDumper as YamlDumper
43+
except ImportError: # pragma: no cover
44+
pass
45+
46+
if YamlLoader is None or YamlDumper is None:
47+
from yaml import SafeLoader as YamlLoader
48+
from yaml import SafeDumper as YamlDumper
49+
3550
__all__ = ('__version__', )
3651

3752
VALID_SCHEMA_VERSIONS = ['1.1.0', '1.0.1', '1.0.0', '0.1.0']

hepdata_validator/data_file_validator.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,7 @@
2929
from packaging import version as packaging_version
3030
import yaml
3131

32-
# We try to load using the CSafeLoader for speed improvements.
33-
try:
34-
from yaml import CSafeLoader as Loader
35-
except ImportError: #pragma: no cover
36-
from yaml import SafeLoader as Loader #pragma: no cover
37-
38-
from hepdata_validator import Validator, ValidationMessage
32+
from hepdata_validator import Validator, ValidationMessage, YamlLoader
3933
from jsonschema import ValidationError
4034
from jsonschema.exceptions import by_relevance
4135

@@ -104,7 +98,7 @@ def validate(self, **kwargs):
10498
try:
10599
# The yaml package support both JSON and YAML
106100
with open(file_path, 'r') as df:
107-
data = yaml.load(df, Loader=Loader)
101+
data = yaml.load(df, Loader=YamlLoader)
108102
if data is None:
109103
self.add_validation_message(ValidationMessage(
110104
file=file_path,

hepdata_validator/full_submission_validator.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,12 @@
88

99
import yaml
1010

11-
from hepdata_validator import Validator, ValidationMessage
11+
from hepdata_validator import Validator, ValidationMessage, YamlLoader, YamlDumper
1212
from .schema_resolver import JsonSchemaResolver
1313
from .schema_downloader import HTTPSchemaDownloader
1414
from .submission_file_validator import SubmissionFileValidator
1515
from .data_file_validator import DataFileValidator
1616

17-
# We try to load using the CSafeLoader for speed improvements.
18-
try:
19-
from yaml import CSafeLoader as Loader
20-
from yaml import CSafeDumper as Dumper
21-
except ImportError: # pragma: no cover
22-
from yaml import SafeLoader as Loader
23-
from yaml import SafeDumper as Dumper
24-
2517

2618
INDIVIDUAL_FILE_SIZE_LIMIT = 10485760
2719

@@ -171,7 +163,7 @@ def validate(self, directory=None, file=None, archive=None):
171163
# Open the submission.yaml file and load all YAML documents.
172164
with open(self.submission_file_path, 'r') as submission_file:
173165
try:
174-
self.submission_docs = list(yaml.load_all(submission_file, Loader=Loader))
166+
self.submission_docs = list(yaml.load_all(submission_file, Loader=YamlLoader))
175167
except yaml.YAMLError as e:
176168
self._add_validation_message(
177169
file=self.submission_file_path,
@@ -266,7 +258,7 @@ def _create_data_files(self, docs):
266258
file_name = os.path.join(self.directory, file_name)
267259
with open(file_name, 'w') as data_file:
268260
yaml.dump({'independent_variables': doc.pop('independent_variables', None),
269-
'dependent_variables': doc.pop('dependent_variables', None)}, data_file, Dumper=Dumper)
261+
'dependent_variables': doc.pop('dependent_variables', None)}, data_file, Dumper=YamlDumper)
270262

271263
def _check_doc(self, doc):
272264
# Skip empty YAML documents.
@@ -356,7 +348,7 @@ def _check_doc(self, doc):
356348

357349
# Just try to load YAML data file without validating schema.
358350
try:
359-
contents = yaml.load(open(data_file_path, 'r'), Loader=Loader)
351+
contents = yaml.load(open(data_file_path, 'r'), Loader=YamlLoader)
360352
except (OSError, yaml.YAMLError) as e:
361353
problem_type = 'reading' if isinstance(e, OSError) else 'parsing'
362354
self._add_validation_message(

hepdata_validator/submission_file_validator.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,7 @@
66
import yaml
77
from yaml.scanner import ScannerError
88

9-
# We try to load using the CSafeLoader for speed improvements.
10-
try:
11-
from yaml import CSafeLoader as Loader
12-
except ImportError: #pragma: no cover
13-
from yaml import SafeLoader as Loader #pragma: no cover
14-
15-
from hepdata_validator import Validator, ValidationMessage
9+
from hepdata_validator import Validator, ValidationMessage, YamlLoader
1610

1711
__author__ = 'eamonnmaguire'
1812

@@ -71,7 +65,7 @@ def validate(self, **kwargs):
7165

7266
if data is None:
7367
data_file_handle = open(file_path, 'r')
74-
data = yaml.load_all(data_file_handle, Loader=Loader)
68+
data = yaml.load_all(data_file_handle, Loader=YamlLoader)
7569

7670
table_names = []
7771
table_data_files = []

testsuite/test_data_validator.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,12 @@ def test_invalid_parser_yaml_file_v1(validator_v1, data_path, capsys):
280280

281281
assert is_valid is False
282282
out, err = capsys.readouterr()
283-
assert out.strip() == f"""error - There was a problem parsing the file.
283+
assert out.strip().startswith(f"""error - There was a problem parsing the file.
284284
while parsing a flow mapping
285-
in "{file}", line 9, column 9
286-
did not find expected ',' or '}}'
287-
in "{file}", line 10, column 5"""
285+
in "{file}", line 9, column 9""")
286+
# message is different in libyaml and non-libyaml versions but this is in both
287+
assert "expected ',' or '}'" in out
288+
assert out.strip().endswith(f'in "{file}", line 10, column 5')
288289

289290

290291
def test_io_error_yaml_file_v1(validator_v1, data_path, capsys):

testsuite/test_full_submission_validator.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,12 @@ def test_invalid_yaml_single_file_gzip(validator_v1, data_path, capsys):
222222
assert validator_v1.valid_files == {}
223223
validator_v1.print_errors('1512299_invalid_yaml.yaml')
224224
out, err = capsys.readouterr()
225-
assert out.strip() == """error - There was a problem parsing the file:
225+
assert out.strip().startswith("""error - There was a problem parsing the file:
226226
while parsing a flow mapping
227-
in "1512299_invalid_yaml.yaml", line 7, column 11
228-
did not find expected ',' or '}'
229-
in "1512299_invalid_yaml.yaml", line 8, column 3"""
227+
in "1512299_invalid_yaml.yaml", line 7, column 11""")
228+
# message is different in libyaml and non-libyaml versions but this is in both
229+
assert "expected ',' or '}'" in out
230+
assert out.strip().endswith('in "1512299_invalid_yaml.yaml", line 8, column 3')
230231

231232

232233
def test_invalid_single_file_data_file(validator_v1, data_path, capsys):
@@ -267,11 +268,13 @@ def test_invalid_data_directory(validator_v1, data_path, capsys):
267268
assert errors[expected_file_names[0]][1].message == "Location of 'additional_resources' file '../TestHEPSubmission/figFigure8B.png' should not contain '/'."
268269
assert errors[expected_file_names[0]][2].message == "Missing 'additional_resources' file 'figFigure9A.png'."
269270
assert errors[expected_file_names[1]][0].message == "Missing data_file 'data3.yaml'."
270-
assert errors[expected_file_names[2]][0].message == f"""There was a problem parsing the file:
271+
assert errors[expected_file_names[2]][0].message.startswith(f"""There was a problem parsing the file:
271272
while parsing a block mapping
272-
in "{dir}/data8.yaml", line 1, column 1
273-
did not find expected key
274-
in "{dir}/data8.yaml", line 9, column 3"""
273+
in "{dir}/data8.yaml", line 1, column 1""")
274+
# message differs depending on whether libyaml is used
275+
assert "did not find expected key" in errors[expected_file_names[2]][0].message or \
276+
"expected <block end>, but found '<block mapping start>'" in errors[expected_file_names[2]][0].message
277+
assert errors[expected_file_names[2]][0].message.endswith(f'in "{dir}/data8.yaml", line 9, column 3')
275278
assert errors[expected_file_names[3]][0].message == "figFigure8B.png is not referenced in the submission."
276279
assert len(errors[expected_file_names[4]]) == 2
277280
assert errors[expected_file_names[4]][0].message == "._data10.yaml is not referenced in the submission."
@@ -306,11 +309,13 @@ def test_invalid_archive(validator_v1, data_path):#, capsys):
306309
assert errors[expected_file_names[0]][1].message == "Location of 'additional_resources' file '../TestHEPSubmission/figFigure8B.png' should not contain '/'."
307310
assert errors[expected_file_names[0]][2].message == f"Missing 'additional_resources' file 'figFigure9A.png'."
308311
assert errors[expected_file_names[1]][0].message == f"Missing data_file 'data3.yaml'."
309-
assert errors[expected_file_names[2]][0].message == f"""There was a problem parsing the file:
312+
assert errors[expected_file_names[2]][0].message.startswith(f"""There was a problem parsing the file:
310313
while parsing a block mapping
311-
in "{dir}/data8.yaml", line 1, column 1
312-
did not find expected key
313-
in "{dir}/data8.yaml", line 9, column 3"""
314+
in "{dir}/data8.yaml", line 1, column 1""")
315+
# message differs depending on whether libyaml is used
316+
assert "did not find expected key" in errors[expected_file_names[2]][0].message or \
317+
"expected <block end>, but found '<block mapping start>'" in errors[expected_file_names[2]][0].message
318+
assert errors[expected_file_names[2]][0].message.endswith(f'in "{dir}/data8.yaml", line 9, column 3')
314319
assert errors[expected_file_names[3]][0].message == f"figFigure8B.png is not referenced in the submission."
315320
assert len(errors[expected_file_names[4]]) == 2
316321
assert errors[expected_file_names[4]][0].message == f"._data10.yaml is not referenced in the submission."

testsuite/test_submission_validator.py

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
import os
22
import pytest
33
import yaml
4-
from hepdata_validator import VALID_SCHEMA_VERSIONS
4+
from hepdata_validator import VALID_SCHEMA_VERSIONS, YamlLoader
55
from hepdata_validator.submission_file_validator import SubmissionFileValidator
66

7-
# We try to load using the CSafeLoader for speed improvements.
8-
try:
9-
from yaml import CSafeLoader as Loader
10-
except ImportError:
11-
from yaml import SafeLoader as Loader
12-
137

148
####################################################
159
# Tests fixtures #
@@ -45,7 +39,7 @@ def test_valid_submission_yaml_v0(validator_v0, data_path):
4539
file = os.path.join(data_path, 'valid_submission_v0.yaml')
4640

4741
with open(file, 'r') as submission:
48-
yaml_obj = yaml.load_all(submission, Loader=Loader)
42+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
4943
is_valid = validator_v0.validate(file_path=file, data=yaml_obj)
5044
validator_v0.print_errors(file)
5145

@@ -65,7 +59,7 @@ def test_valid_submission_yaml_v1(validator_v1, data_path):
6559
file = os.path.join(data_path, 'valid_submission.yaml')
6660

6761
with open(file, 'r') as submission:
68-
yaml_obj = yaml.load_all(submission, Loader=Loader)
62+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
6963
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
7064
validator_v1.print_errors(file)
7165

@@ -80,7 +74,7 @@ def test_v0_valid_submission_yaml_v1(validator_v1, data_path):
8074
file = os.path.join(data_path, 'valid_submission_v0.yaml')
8175

8276
with open(file, 'r') as submission:
83-
yaml_obj = yaml.load_all(submission, Loader=Loader)
77+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
8478
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
8579
validator_v1.print_errors(file)
8680

@@ -235,10 +229,11 @@ def test_invalid_parser_submission_yaml_v1(validator_v1, data_path, capsys):
235229

236230
assert is_valid is False
237231
out, err = capsys.readouterr()
238-
assert out.strip() == """error - while parsing a flow mapping
239-
in "{0}", line 6, column 5
240-
did not find expected ',' or '}}'
241-
in "{0}", line 7, column 3""".format(file)
232+
assert out.strip().startswith(f"""error - while parsing a flow mapping
233+
in "{file}", line 6, column 5""")
234+
# message is different in libyaml and non-libyaml versions but this is in both
235+
assert "expected ',' or '}'" in out
236+
assert out.strip().endswith(f'in "{file}", line 7, column 3')
242237

243238

244239
def test_io_error_submission_yaml_v1(validator_v1, data_path, capsys):
@@ -284,7 +279,7 @@ def test_data_schema_submission_yaml_v1(validator_v1, data_path):
284279
file = os.path.join(data_path, 'valid_submission_custom_remote.yaml')
285280

286281
with open(file, 'r') as submission:
287-
yaml_obj = yaml.load_all(submission, Loader=Loader)
282+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
288283
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
289284
assert is_valid is True
290285
assert not validator_v1.has_errors(file)
@@ -298,7 +293,7 @@ def test_invalid_cmenergies_submission_yaml_v1(validator_v1, data_path, capsys):
298293
file = os.path.join(data_path, 'invalid_cmenergies.yaml')
299294

300295
with open(file, 'r') as submission:
301-
yaml_obj = yaml.load_all(submission, Loader=Loader)
296+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
302297
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
303298
validator_v1.print_errors(file)
304299

@@ -332,7 +327,7 @@ def test_check_for_duplicates(validator_v1):
332327
def test_submission_with_no_data_tables(validator_v0, validator_v1, data_path, capsys):
333328
file = os.path.join(data_path, 'valid_file.yaml')
334329
with open(file, 'r') as submission:
335-
yaml_obj = yaml.load_all(submission, Loader=Loader)
330+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
336331
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
337332
validator_v1.print_errors(file)
338333

0 commit comments

Comments
 (0)