Skip to content

Commit 73536c0

Browse files
authored
Merge pull request #47 from HEPData/yaml-fixes
Fixes for LibYAML issues
2 parents d5223a6 + 1d747d3 commit 73536c0

9 files changed

+85
-66
lines changed

.github/workflows/ci.yml

+10-2
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,23 @@ jobs:
2828
run: |
2929
pip install --upgrade pip setuptools wheel coveralls
3030
pip install --ignore-installed -e .[tests]
31-
- name: Run tests
31+
- name: Run tests without libyaml
3232
run: |
33-
pytest testsuite
33+
USE_LIBYAML=False pytest testsuite
34+
env:
35+
COVERAGE_FILE: '.coverage_nolibyaml'
36+
- name: Run tests with libyaml
37+
run: |
38+
USE_LIBYAML=True pytest testsuite
39+
env:
40+
COVERAGE_FILE: '.coverage_libyaml'
3441
- name: Run coveralls
3542
if: startsWith(matrix.python-version, '3.8')
3643
env:
3744
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
3845
COVERALLS_SERVICE_NAME: github
3946
run: |
47+
coverage combine .coverage_libyaml .coverage_nolibyaml
4048
coveralls
4149
4250
deploy:

README.rst

+18-3
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,32 @@ Install from `PyPI <https://pypi.org/project/hepdata-validator/>`_ using ``pip``
4949
$ pip install --user hepdata-validator
5050
$ hepdata-validate --help
5151
52-
Install from GitHub (for developers) in a `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_:
52+
If you would like to use LibYAML, you may need an additional step if running on an M1 Mac, to ensure pyyaml is built
53+
with the LibYAML bindings. Run the following after installing LibYAML via Homebrew:
54+
55+
.. code:: bash
56+
57+
$ LDFLAGS="-L$(brew --prefix)/lib" CFLAGS="-I$(brew --prefix)/include" pip install --global-option="--with-libyaml" --force pyyaml
58+
59+
60+
Developers
61+
==========
62+
Developers should install from GitHub in a `virtual environment <https://docs.python.org/3/tutorial/venv.html>`_:
5363

5464
.. code:: bash
5565
5666
$ git clone https://github.com/HEPData/hepdata-validator
5767
$ cd hepdata-validator
5868
$ python3 -m venv ~/venv/hepdata-validator
5969
$ source ~/venv/hepdata-validator/bin/activate
60-
(hepdata-validator) $ pip install --upgrade -e .[tests]
61-
(hepdata-validator) $ pytest testsuite
70+
(hepdata-validator) $ pip install --upgrade -e ".[tests]"
71+
72+
Tests should be run both with and without LibYAML, as error messages from the different YAML parsers vary:
73+
74+
.. code:: bash
6275
76+
(hepdata-validator) $ USE_LIBYAML=True pytest testsuite
77+
(hepdata-validator) $ USE_LIBYAML=False pytest testsuite
6378
6479
Usage
6580
-----

hepdata_validator/__init__.py

+15
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,21 @@
3232

3333
from .version import __version__
3434

35+
# We try to load using the CSafeLoader for speed improvements
36+
YamlLoader = None
37+
YamlDumper = None
38+
use_libyaml = os.environ.get('USE_LIBYAML', True)
39+
if use_libyaml and use_libyaml not in ('False', 'false', 'f', 'F'):
40+
try:
41+
from yaml import CSafeLoader as YamlLoader
42+
from yaml import CSafeDumper as YamlDumper
43+
except ImportError: # pragma: no cover
44+
pass
45+
46+
if YamlLoader is None or YamlDumper is None:
47+
from yaml import SafeLoader as YamlLoader
48+
from yaml import SafeDumper as YamlDumper
49+
3550
__all__ = ('__version__', )
3651

3752
VALID_SCHEMA_VERSIONS = ['1.1.0', '1.0.1', '1.0.0', '0.1.0']

hepdata_validator/data_file_validator.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,7 @@
2929
from packaging import version as packaging_version
3030
import yaml
3131

32-
# We try to load using the CSafeLoader for speed improvements.
33-
try:
34-
from yaml import CSafeLoader as Loader
35-
except ImportError: #pragma: no cover
36-
from yaml import SafeLoader as Loader #pragma: no cover
37-
38-
from hepdata_validator import Validator, ValidationMessage
32+
from hepdata_validator import Validator, ValidationMessage, YamlLoader
3933
from jsonschema import ValidationError
4034
from jsonschema.exceptions import by_relevance
4135

@@ -104,7 +98,7 @@ def validate(self, **kwargs):
10498
try:
10599
# The yaml package support both JSON and YAML
106100
with open(file_path, 'r') as df:
107-
data = yaml.load(df, Loader=Loader)
101+
data = yaml.load(df, Loader=YamlLoader)
108102
if data is None:
109103
self.add_validation_message(ValidationMessage(
110104
file=file_path,

hepdata_validator/full_submission_validator.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,12 @@
88

99
import yaml
1010

11-
from hepdata_validator import Validator, ValidationMessage
11+
from hepdata_validator import Validator, ValidationMessage, YamlLoader, YamlDumper
1212
from .schema_resolver import JsonSchemaResolver
1313
from .schema_downloader import HTTPSchemaDownloader
1414
from .submission_file_validator import SubmissionFileValidator
1515
from .data_file_validator import DataFileValidator
1616

17-
# We try to load using the CSafeLoader for speed improvements.
18-
try:
19-
from yaml import CSafeLoader as Loader
20-
from yaml import CSafeDumper as Dumper
21-
except ImportError: # pragma: no cover
22-
from yaml import SafeLoader as Loader
23-
from yaml import SafeDumper as Dumper
24-
2517

2618
class SchemaType(Enum):
2719
SUBMISSION = 'submission'
@@ -169,7 +161,7 @@ def validate(self, directory=None, file=None, archive=None):
169161
# Open the submission.yaml file and load all YAML documents.
170162
with open(self.submission_file_path, 'r') as submission_file:
171163
try:
172-
self.submission_docs = list(yaml.load_all(submission_file, Loader=Loader))
164+
self.submission_docs = list(yaml.load_all(submission_file, Loader=YamlLoader))
173165
except yaml.YAMLError as e:
174166
self._add_validation_message(
175167
file=self.submission_file_path,
@@ -264,7 +256,7 @@ def _create_data_files(self, docs):
264256
file_name = os.path.join(self.directory, file_name)
265257
with open(file_name, 'w') as data_file:
266258
yaml.dump({'independent_variables': doc.pop('independent_variables', None),
267-
'dependent_variables': doc.pop('dependent_variables', None)}, data_file, Dumper=Dumper)
259+
'dependent_variables': doc.pop('dependent_variables', None)}, data_file, Dumper=YamlDumper)
268260

269261
def _check_doc(self, doc):
270262
# Skip empty YAML documents.
@@ -345,7 +337,7 @@ def _check_doc(self, doc):
345337

346338
# Just try to load YAML data file without validating schema.
347339
try:
348-
contents = yaml.load(open(data_file_path, 'r'), Loader=Loader)
340+
contents = yaml.load(open(data_file_path, 'r'), Loader=YamlLoader)
349341
except (OSError, yaml.YAMLError) as e:
350342
problem_type = 'reading' if isinstance(e, OSError) else 'parsing'
351343
self._add_validation_message(

hepdata_validator/submission_file_validator.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,7 @@
66
import yaml
77
from yaml.scanner import ScannerError
88

9-
# We try to load using the CSafeLoader for speed improvements.
10-
try:
11-
from yaml import CSafeLoader as Loader
12-
except ImportError: #pragma: no cover
13-
from yaml import SafeLoader as Loader #pragma: no cover
14-
15-
from hepdata_validator import Validator, ValidationMessage
9+
from hepdata_validator import Validator, ValidationMessage, YamlLoader
1610

1711
__author__ = 'eamonnmaguire'
1812

@@ -71,7 +65,7 @@ def validate(self, **kwargs):
7165

7266
if data is None:
7367
data_file_handle = open(file_path, 'r')
74-
data = yaml.load_all(data_file_handle, Loader=Loader)
68+
data = yaml.load_all(data_file_handle, Loader=YamlLoader)
7569

7670
table_names = []
7771
table_data_files = []

testsuite/test_data_validator.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -280,11 +280,12 @@ def test_invalid_parser_yaml_file_v1(validator_v1, data_path, capsys):
280280

281281
assert is_valid is False
282282
out, err = capsys.readouterr()
283-
assert out.strip() == f"""error - There was a problem parsing the file.
283+
assert out.strip().startswith(f"""error - There was a problem parsing the file.
284284
while parsing a flow mapping
285-
in "{file}", line 9, column 9
286-
did not find expected ',' or '}}'
287-
in "{file}", line 10, column 5"""
285+
in "{file}", line 9, column 9""")
286+
# message is different in libyaml and non-libyaml versions but this is in both
287+
assert "expected ',' or '}'" in out
288+
assert out.strip().endswith(f'in "{file}", line 10, column 5')
288289

289290

290291
def test_io_error_yaml_file_v1(validator_v1, data_path, capsys):

testsuite/test_full_submission_validator.py

+17-12
Original file line numberDiff line numberDiff line change
@@ -222,11 +222,12 @@ def test_invalid_yaml_single_file_gzip(validator_v1, data_path, capsys):
222222
assert validator_v1.valid_files == {}
223223
validator_v1.print_errors('1512299_invalid_yaml.yaml')
224224
out, err = capsys.readouterr()
225-
assert out.strip() == """error - There was a problem parsing the file:
225+
assert out.strip().startswith("""error - There was a problem parsing the file:
226226
while parsing a flow mapping
227-
in "1512299_invalid_yaml.yaml", line 7, column 11
228-
did not find expected ',' or '}'
229-
in "1512299_invalid_yaml.yaml", line 8, column 3"""
227+
in "1512299_invalid_yaml.yaml", line 7, column 11""")
228+
# message is different in libyaml and non-libyaml versions but this is in both
229+
assert "expected ',' or '}'" in out
230+
assert out.strip().endswith('in "1512299_invalid_yaml.yaml", line 8, column 3')
230231

231232

232233
def test_invalid_single_file_data_file(validator_v1, data_path, capsys):
@@ -266,11 +267,13 @@ def test_invalid_data_directory(validator_v1, data_path, capsys):
266267
assert errors[expected_file_names[0]][1].message == "Location of 'additional_resources' file '../TestHEPSubmission/figFigure8B.png' should not contain '/'."
267268
assert errors[expected_file_names[0]][2].message == f"Missing 'additional_resources' file 'figFigure9A.png'."
268269
assert errors[expected_file_names[1]][0].message == f"Missing data_file 'data3.yaml'."
269-
assert errors[expected_file_names[2]][0].message == f"""There was a problem parsing the file:
270+
assert errors[expected_file_names[2]][0].message.startswith(f"""There was a problem parsing the file:
270271
while parsing a block mapping
271-
in "{dir}/data8.yaml", line 1, column 1
272-
did not find expected key
273-
in "{dir}/data8.yaml", line 9, column 3"""
272+
in "{dir}/data8.yaml", line 1, column 1""")
273+
# message differs depending on whether libyaml is used
274+
assert "did not find expected key" in errors[expected_file_names[2]][0].message or \
275+
"expected <block end>, but found '<block mapping start>'" in errors[expected_file_names[2]][0].message
276+
assert errors[expected_file_names[2]][0].message.endswith(f'in "{dir}/data8.yaml", line 9, column 3')
274277
assert errors[expected_file_names[3]][0].message == f"figFigure8B.png is not referenced in the submission."
275278
assert len(errors[expected_file_names[4]]) == 2
276279
assert errors[expected_file_names[4]][0].message == f"._data10.yaml is not referenced in the submission."
@@ -304,11 +307,13 @@ def test_invalid_archive(validator_v1, data_path):#, capsys):
304307
assert errors[expected_file_names[0]][1].message == "Location of 'additional_resources' file '../TestHEPSubmission/figFigure8B.png' should not contain '/'."
305308
assert errors[expected_file_names[0]][2].message == f"Missing 'additional_resources' file 'figFigure9A.png'."
306309
assert errors[expected_file_names[1]][0].message == f"Missing data_file 'data3.yaml'."
307-
assert errors[expected_file_names[2]][0].message == f"""There was a problem parsing the file:
310+
assert errors[expected_file_names[2]][0].message.startswith(f"""There was a problem parsing the file:
308311
while parsing a block mapping
309-
in "{dir}/data8.yaml", line 1, column 1
310-
did not find expected key
311-
in "{dir}/data8.yaml", line 9, column 3"""
312+
in "{dir}/data8.yaml", line 1, column 1""")
313+
# message differs depending on whether libyaml is used
314+
assert "did not find expected key" in errors[expected_file_names[2]][0].message or \
315+
"expected <block end>, but found '<block mapping start>'" in errors[expected_file_names[2]][0].message
316+
assert errors[expected_file_names[2]][0].message.endswith(f'in "{dir}/data8.yaml", line 9, column 3')
312317
assert errors[expected_file_names[3]][0].message == f"figFigure8B.png is not referenced in the submission."
313318
assert len(errors[expected_file_names[4]]) == 2
314319
assert errors[expected_file_names[4]][0].message == f"._data10.yaml is not referenced in the submission."

testsuite/test_submission_validator.py

+12-17
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
import os
22
import pytest
33
import yaml
4-
from hepdata_validator import VALID_SCHEMA_VERSIONS
4+
from hepdata_validator import VALID_SCHEMA_VERSIONS, YamlLoader
55
from hepdata_validator.submission_file_validator import SubmissionFileValidator
66

7-
# We try to load using the CSafeLoader for speed improvements.
8-
try:
9-
from yaml import CSafeLoader as Loader
10-
except ImportError:
11-
from yaml import SafeLoader as Loader
12-
137

148
####################################################
159
# Tests fixtures #
@@ -45,7 +39,7 @@ def test_valid_submission_yaml_v0(validator_v0, data_path):
4539
file = os.path.join(data_path, 'valid_submission_v0.yaml')
4640

4741
with open(file, 'r') as submission:
48-
yaml_obj = yaml.load_all(submission, Loader=Loader)
42+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
4943
is_valid = validator_v0.validate(file_path=file, data=yaml_obj)
5044
validator_v0.print_errors(file)
5145

@@ -65,7 +59,7 @@ def test_valid_submission_yaml_v1(validator_v1, data_path):
6559
file = os.path.join(data_path, 'valid_submission.yaml')
6660

6761
with open(file, 'r') as submission:
68-
yaml_obj = yaml.load_all(submission, Loader=Loader)
62+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
6963
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
7064
validator_v1.print_errors(file)
7165

@@ -80,7 +74,7 @@ def test_v0_valid_submission_yaml_v1(validator_v1, data_path):
8074
file = os.path.join(data_path, 'valid_submission_v0.yaml')
8175

8276
with open(file, 'r') as submission:
83-
yaml_obj = yaml.load_all(submission, Loader=Loader)
77+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
8478
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
8579
validator_v1.print_errors(file)
8680

@@ -235,10 +229,11 @@ def test_invalid_parser_submission_yaml_v1(validator_v1, data_path, capsys):
235229

236230
assert is_valid is False
237231
out, err = capsys.readouterr()
238-
assert out.strip() == """error - while parsing a flow mapping
239-
in "{0}", line 6, column 5
240-
did not find expected ',' or '}}'
241-
in "{0}", line 7, column 3""".format(file)
232+
assert out.strip().startswith(f"""error - while parsing a flow mapping
233+
in "{file}", line 6, column 5""")
234+
# message is different in libyaml and non-libyaml versions but this is in both
235+
assert "expected ',' or '}'" in out
236+
assert out.strip().endswith(f'in "{file}", line 7, column 3')
242237

243238

244239
def test_io_error_submission_yaml_v1(validator_v1, data_path, capsys):
@@ -284,7 +279,7 @@ def test_data_schema_submission_yaml_v1(validator_v1, data_path):
284279
file = os.path.join(data_path, 'valid_submission_custom_remote.yaml')
285280

286281
with open(file, 'r') as submission:
287-
yaml_obj = yaml.load_all(submission, Loader=Loader)
282+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
288283
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
289284
assert is_valid is True
290285
assert not validator_v1.has_errors(file)
@@ -298,7 +293,7 @@ def test_invalid_cmenergies_submission_yaml_v1(validator_v1, data_path, capsys):
298293
file = os.path.join(data_path, 'invalid_cmenergies.yaml')
299294

300295
with open(file, 'r') as submission:
301-
yaml_obj = yaml.load_all(submission, Loader=Loader)
296+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
302297
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
303298
validator_v1.print_errors(file)
304299

@@ -332,7 +327,7 @@ def test_check_for_duplicates(validator_v1):
332327
def test_submission_with_no_data_tables(validator_v0, validator_v1, data_path, capsys):
333328
file = os.path.join(data_path, 'valid_file.yaml')
334329
with open(file, 'r') as submission:
335-
yaml_obj = yaml.load_all(submission, Loader=Loader)
330+
yaml_obj = yaml.load_all(submission, Loader=YamlLoader)
336331
is_valid = validator_v1.validate(file_path=file, data=yaml_obj)
337332
validator_v1.print_errors(file)
338333

0 commit comments

Comments
 (0)