Skip to content

Commit 7b8b72a

Browse files
authored
Merge pull request #22 from effigies/enh/bidsignore
feat: Add bidsignore implementation
2 parents 9966ee4 + 4a3217b commit 7b8b72a

File tree

7 files changed

+289
-1
lines changed

7 files changed

+289
-1
lines changed

.github/workflows/build-test-deploy.yml

-1
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ jobs:
7979

8080
- name: Set up Python ${{ matrix.python-version }}
8181
uses: actions/setup-python@v5
82-
if: matrix.os != 'ubuntu-latest'
8382
with:
8483
python-version: ${{ matrix.python-version }}
8584
allow-prereleases: true

.gitmodules

+3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
11
[submodule "tests/data/bids-examples"]
22
path = tests/data/bids-examples
33
url = https://github.com/bids-standard/bids-examples
4+
[submodule "tests/data/gitignore-test"]
5+
path = tests/data/gitignore-test
6+
url = https://github.com/svent/gitignore-test

pyproject.toml

+2
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ exclude_lines = [
8787
"if __name__ == .__main__.:",
8888
"if TYPE_CHECKING:",
8989
"pytest.skip",
90+
"class .*\\bProtocol\\):",
91+
"@(abc\\.)?abstractmethod",
9092
]
9193

9294
# Disable black

src/bids_validator/bidsignore.py

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""Utilities for working with .bidsignore files."""
2+
3+
import os
4+
import re
5+
from functools import lru_cache
6+
from typing import Protocol, Union
7+
8+
import attrs
9+
10+
from .types.files import FileTree
11+
12+
13+
@lru_cache
14+
def compile_pat(pattern: str) -> Union[re.Pattern, None]:
15+
"""Compile .gitignore-style ignore lines to regular expressions."""
16+
orig = pattern
17+
# A line starting with # serves as a comment.
18+
if pattern.startswith('#'):
19+
return None
20+
21+
# An optional prefix "!" which negates the pattern;
22+
invert = pattern.startswith('!')
23+
24+
# Put a backslash ("\") in front of the first hash for patterns that begin with a hash.
25+
# Put a backslash ("\") in front of the first "!" for patterns that begin with a literal "!"
26+
if pattern.startswith((r'\#', r'\!')):
27+
pattern = pattern[1:] # Unescape
28+
29+
# Trailing spaces are ignored unless they are quoted with backslash ("\").
30+
pattern = re.sub(r'(?<!\\) +$', '', pattern)
31+
32+
# A blank line matches no files, so it can serve as a separator for readability.
33+
if pattern == '':
34+
return None
35+
36+
# If there is a separator at the beginning or middle (or both) of the pattern,
37+
# then the pattern is relative to the [root]
38+
relative_match = pattern == '/' or '/' in pattern[:-1]
39+
# If there is a separator at the end of the pattern then the pattern will only match
40+
# directories, otherwise the pattern can match both files and directories.
41+
directory_match = pattern.endswith('/')
42+
43+
# This does not handle character ranges correctly except when they are also valid regex
44+
parts = [
45+
'.*'
46+
if part == '**'
47+
else part.replace('*', '[^/]*').replace('?', '[^/]').replace('.', r'\.')
48+
for part in pattern.strip('/').split('/')
49+
]
50+
51+
prefix = '^' if relative_match else '^(?:.*/|)'
52+
postfix = r'/' if directory_match else r'(/|\Z)'
53+
54+
# "**/" matches zero or more directories, so wrap in an optional segment
55+
out_pattern = '/'.join(parts).replace('.*/', '(?:.*/)?')
56+
out_pattern = f'{prefix}{out_pattern}{postfix}'
57+
58+
if invert:
59+
raise ValueError(f'Inverted patterns not supported: {orig}')
60+
# out_pattern = f'(?!{out_pattern})'
61+
62+
return re.compile(out_pattern)
63+
64+
65+
class HasMatch(Protocol): # noqa: D101
66+
def match(self, relpath: str) -> bool: ... # noqa: D102
67+
68+
69+
@attrs.define
70+
class Ignore:
71+
"""Collection of .gitignore-style patterns.
72+
73+
Tracks successfully matched files for reporting.
74+
"""
75+
76+
patterns: list[str] = attrs.field(factory=list)
77+
history: list[str] = attrs.field(factory=list, init=False)
78+
79+
@classmethod
80+
def from_file(cls, pathlike: os.PathLike):
81+
"""Load Ignore contents from file."""
82+
with open(pathlike) as fobj:
83+
return cls([line.rstrip('\n') for line in fobj])
84+
85+
def match(self, relpath: str) -> bool:
86+
"""Match a relative path against a collection of ignore patterns."""
87+
if any(compile_pat(pattern).match(relpath) for pattern in self.patterns if pattern):
88+
self.history.append(relpath)
89+
return True
90+
return False
91+
92+
93+
@attrs.define
94+
class IgnoreMany:
95+
"""Match against several ignore filters."""
96+
97+
ignores: list[Ignore] = attrs.field()
98+
99+
def match(self, relpath: str) -> bool:
100+
"""Return true if any filters match the given file.
101+
102+
Will short-circuit, so ordering is significant for side-effects,
103+
such as recording files ignored by a particular filter.
104+
"""
105+
return any(ignore.match(relpath) for ignore in self.ignores)
106+
107+
108+
def filter_file_tree(filetree: FileTree) -> FileTree:
109+
"""Read .bidsignore and filter file tree."""
110+
bidsignore = filetree.children.get('.bidsignore')
111+
if not bidsignore:
112+
return filetree
113+
ignore = IgnoreMany([Ignore.from_file(bidsignore), Ignore(['/.bidsignore'])])
114+
return _filter(filetree, ignore)
115+
116+
117+
def _filter(filetree: FileTree, ignore: HasMatch) -> FileTree:
118+
items = filetree.children.items()
119+
children = {
120+
name: _filter(child, ignore)
121+
for name, child in items
122+
if not ignore.match(child.relative_path)
123+
}
124+
125+
# XXX This check may not be worth the time. Profile this.
126+
if any(children.get(name) is not child for name, child in items):
127+
filetree = attrs.evolve(filetree, children=children)
128+
129+
return filetree

tests/conftest.py

+15
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,19 @@ def examples() -> Path:
1515
ret = importlib.resources.files(__spec__.parent) / 'data' / 'bids-examples'
1616
if not any(ret.iterdir()):
1717
pytest.skip('bids-examples submodule is not checked out')
18+
else: # pragma: no cover
19+
pass
20+
return Path(ret)
21+
22+
23+
@pytest.fixture(scope='session')
24+
def gitignore_test() -> Path:
25+
"""Get bids-examples from submodule, allow environment variable override."""
26+
ret = os.getenv('GITIGNORE_TEST_DIR')
27+
if not ret:
28+
ret = importlib.resources.files(__spec__.parent) / 'data' / 'gitignore-test'
29+
if not any(ret.iterdir()):
30+
pytest.skip('gitignore-test submodule is not checked out')
31+
else: # pragma: no cover
32+
pass
1833
return Path(ret)

tests/data/gitignore-test

Submodule gitignore-test added at 2a2cc1d

tests/test_bidsignore.py

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
"""Test bids_validator.bidsignore."""
2+
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
from bids_validator.bidsignore import Ignore, compile_pat, filter_file_tree
8+
from bids_validator.types.files import FileTree
9+
10+
11+
@pytest.mark.parametrize(
12+
('pattern', 'hits', 'misses'),
13+
[
14+
('/', ['/'], ['dir/', 'file']),
15+
# Match file or directory named foo
16+
('foo', ['foo', 'foo/', 'bar/foo', 'bar/foo/'], ['bar', 'foobar', 'barfoo', 'barfoo/']),
17+
# Directories named foo only
18+
('foo/', ['foo/', 'bar/foo/'], ['foo', 'bar/foo', 'bar', 'foobar', 'barfoo', 'barfoo/']),
19+
# Files or directories at the root
20+
('/foo', ['foo', 'foo/'], ['bar/foo', 'bar/foo/', 'bar', 'foobar', 'barfoo', 'barfoo/']),
21+
# doc/frotz/ examples from GITIGNORE(5)
22+
('doc/frotz/', ['doc/frotz/'], ['a/doc/frotz/']),
23+
('frotz/', ['frotz/', 'doc/frotz/', 'a/doc/frotz/'], []),
24+
# * matches everything because everything has a basename
25+
('*', ['foo', 'foo/', 'foo/bar', 'foo/bar/'], []),
26+
# *o matches things with basename ending in o, including parent directories
27+
('*o', ['foo', 'foo/', 'bar/foo', 'bar/foo/', 'foo/bar'], ['bar', 'bar/baz', 'bar/bar/']),
28+
# Leading **/ matches in all directories
29+
(
30+
'**/foo',
31+
['foo', 'foo/', 'bar/foo', 'bar/foo/', 'foo/bar'],
32+
['foobar/baz', 'foobar/baz/', 'baz/foobar', 'baz/barfoo'],
33+
),
34+
('**/foo/bar', ['foo/bar', 'foo/bar/', 'a/foo/bar'], ['foo/', 'bar/foo', 'bar']),
35+
# Trailing /** matches everything inside a root-relative directory
36+
('foo/**', ['foo/', 'foo/x', 'foo/x/y/z'], ['foo', 'bar/foo/x/y/z']),
37+
# /**/ matches zero or more directories
38+
('a/**/b', ['a/b', 'a/x/b', 'a/x/y/b'], ['x/a/b', 'x/a/y/b']),
39+
# ** surrounded by something other than slashes acts like a regular *
40+
('a/x**/b', ['a/x/b', 'a/xy/b'], ['x/a/b', 'x/a/y/b', 'a/x/y/b']),
41+
# Escaped special prefixes
42+
(r'\#*', ['#', '#foo'], ['foo', 'bar#']),
43+
(r'\!*', ['!', '!foo'], ['foo', 'bar!']),
44+
],
45+
)
46+
def test_patterns(pattern, hits, misses):
47+
"""Test expected hits and misses of ignore patterns."""
48+
regex = compile_pat(pattern)
49+
for fname in hits:
50+
assert regex.match(fname), f'"{fname}" should match "{pattern}"'
51+
for fname in misses:
52+
assert not regex.match(fname), f'"{fname}" should not match "{pattern}"'
53+
54+
55+
def test_skipped_patterns():
56+
"""Test ignore patterns that should match nothing."""
57+
assert compile_pat('') is None
58+
assert compile_pat('# commented line') is None
59+
assert compile_pat(' ') is None
60+
with pytest.raises(ValueError, match='Inverted patterns not supported'):
61+
compile_pat('!inverted pattern')
62+
63+
64+
def test_Ignore_ds000117(examples):
65+
"""Test that we can load a .bidsignore file and match a file."""
66+
ds000117 = FileTree.read_from_filesystem(examples / 'ds000117')
67+
ignore = Ignore.from_file(ds000117.children['.bidsignore'])
68+
assert 'run-*_echo-*_FLASH.json' in ignore.patterns
69+
assert 'sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz' in ds000117
70+
assert ignore.match('sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz')
71+
assert not ignore.match('acq-mprage_T1w.json')
72+
flash_file = (
73+
ds000117.children['sub-01']
74+
.children['ses-mri']
75+
.children['anat']
76+
.children['sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz']
77+
)
78+
assert ignore.match(flash_file.relative_path)
79+
80+
81+
def test_filter_file_tree(examples):
82+
"""Test file tree filtering with .bidsignore."""
83+
ds000117 = FileTree.read_from_filesystem(examples / 'ds000117')
84+
assert '.bidsignore' in ds000117
85+
assert 'sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz' in ds000117
86+
87+
filtered = filter_file_tree(ds000117)
88+
assert '.bidsignore' not in filtered
89+
assert 'sub-01/ses-mri/anat/sub-01_ses-mri_run-1_echo-1_FLASH.nii.gz' not in filtered
90+
91+
ds000247 = FileTree.read_from_filesystem(examples / 'ds000247')
92+
assert '.bidsignore' not in ds000247
93+
94+
filtered = filter_file_tree(ds000247)
95+
assert filtered is ds000247
96+
97+
98+
def _walk(tree: FileTree):
99+
for child in tree.children.values():
100+
if child.is_dir:
101+
yield from _walk(child)
102+
else:
103+
yield child
104+
105+
106+
def test_gitignore_battery(gitignore_test):
107+
"""Test our implementation against a gitignore battery."""
108+
filetree = FileTree.read_from_filesystem(gitignore_test)
109+
ignore = Ignore.from_file(filetree.children['.gitignore'])
110+
# Remove inverted patterns
111+
ignore.patterns = [patt for patt in ignore.patterns if not patt.startswith('!')]
112+
113+
expected_failures = Ignore(
114+
[
115+
'.git*', # Ignore .git/, .gitignore, etc
116+
'README.md', # README is an exception
117+
# Inverted patterns are not supported
118+
'foo*.html',
119+
'/log/foo.log',
120+
'findthis*',
121+
# Nested gitignore swaps expectations for all files
122+
'git-sample-3/',
123+
# Inversions in nested gitignores
124+
'arch/foo/kernel/vmlinux*',
125+
'htmldoc/*.html',
126+
]
127+
)
128+
129+
for file in _walk(filetree):
130+
if expected_failures.match(file.relative_path):
131+
continue
132+
if ignore.match(file.relative_path):
133+
assert Path(file).read_text().strip() == 'foo: FAIL', (
134+
f'{file.relative_path} should have failed'
135+
)
136+
else:
137+
assert Path(file).read_text().strip() == 'foo: OK', (
138+
f'{file.relative_path} should have passed'
139+
)

0 commit comments

Comments
 (0)