Skip to content

Commit ea522de

Browse files
authored
Merge pull request #135 from nexB/api-update
Api update
2 parents 2b509c7 + 798dd87 commit ea522de

File tree

22 files changed

+264
-124
lines changed

22 files changed

+264
-124
lines changed

matchcode-toolkit/CHANGELOG.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,16 @@
11
Changelog
22
=========
33

4+
v1.1.1
5+
------
6+
7+
*2023-06-29* -- Do not include empty files when computing directory fingerprints.
8+
9+
v1.1.0
10+
------
11+
12+
*2023-06-22* -- Rename ``compute_directory_fingerprints`` to ``compute_codebase_directory_fingerprints`` and create a new version of ``compute_directory_fingerprints`` that works on Resource objects instead of codebases.
13+
414
v1.0.0
515
------
616

matchcode-toolkit/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "matchcode-toolkit"
3-
version = "1.0.0"
3+
version = "1.1.1"
44

55
[build-system]
66
requires = ["setuptools >= 50", "wheel", "setuptools_scm[toml] >= 6"]

matchcode-toolkit/setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = matchcode-toolkit
3-
version = 1.0.0
3+
version = 1.1.1
44
license = Apache-2.0
55

66
# description must be on ONE line https://github.com/pypa/setuptools/issues/1390

matchcode-toolkit/src/matchcode_toolkit/fingerprinting.py

Lines changed: 39 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -69,30 +69,50 @@ def create_structure_fingerprint(directory, children):
6969
return _create_directory_fingerprint(features)
7070

7171

72-
def compute_directory_fingerprints(codebase):
72+
def _compute_directory_fingerprints(directory, codebase):
7373
"""
74-
Compute fingerprints for a directory from `codebase`
74+
Compute fingerprints for `directory` from `codebase`
7575
"""
76-
for resource in codebase.walk(topdown=False):
77-
if resource.is_file or not resource.path:
78-
continue
79-
children = [r for r in resource.walk(codebase) if r.is_file]
80-
if len(children) == 1:
81-
continue
76+
# We do not want to add empty files to our fingerprint
77+
children = [r for r in directory.walk(codebase) if r.is_file and r.size]
78+
if len(children) == 1:
79+
return
8280

83-
directory_content_fingerprint = create_content_fingerprint(children)
84-
if hasattr(resource, 'directory_content_fingerprint'):
85-
resource.directory_content_fingerprint = directory_content_fingerprint
86-
else:
87-
resource.extra_data['directory_content'] = directory_content_fingerprint
81+
directory_content_fingerprint = create_content_fingerprint(children)
82+
if hasattr(directory, 'directory_content_fingerprint'):
83+
directory.directory_content_fingerprint = directory_content_fingerprint
84+
else:
85+
directory.extra_data['directory_content'] = directory_content_fingerprint
8886

89-
directory_structure_fingerprint = create_structure_fingerprint(resource, children)
90-
if hasattr(resource, 'directory_structure_fingerprint'):
91-
resource.directory_structure_fingerprint = directory_structure_fingerprint
92-
else:
93-
resource.extra_data['directory_structure'] = create_structure_fingerprint(resource, children)
87+
directory_structure_fingerprint = create_structure_fingerprint(directory, children)
88+
if hasattr(directory, 'directory_structure_fingerprint'):
89+
directory.directory_structure_fingerprint = directory_structure_fingerprint
90+
else:
91+
directory.extra_data['directory_structure'] = directory_structure_fingerprint
92+
93+
directory.save(codebase)
94+
return directory
95+
96+
97+
def compute_directory_fingerprints(directory, codebase):
98+
"""
99+
Recursivly compute fingerprints for `directory` from `codebase`
100+
"""
101+
for resource in directory.walk(codebase, topdown=False):
102+
if resource.is_file:
103+
continue
104+
_ = _compute_directory_fingerprints(resource, codebase)
105+
return directory
94106

95-
resource.save(codebase)
107+
108+
def compute_codebase_directory_fingerprints(codebase):
109+
"""
110+
Compute fingerprints for directories from `codebase`
111+
"""
112+
for resource in codebase.walk(topdown=False):
113+
if resource.is_file or not resource.path:
114+
continue
115+
_ = _compute_directory_fingerprints(resource, codebase)
96116
return codebase
97117

98118

matchcode-toolkit/src/matchcode_toolkit/pipelines/scan_and_fingerprint_package.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
2121
# Visit https://github.com/nexB/scancode.io for support and download.
2222

23-
from matchcode_toolkit.fingerprinting import compute_directory_fingerprints
23+
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
2424

2525
from scanpipe.pipelines.scan_package import ScanPackage
2626
from scanpipe.pipes.codebase import ProjectCodebase
@@ -63,4 +63,4 @@ def fingerprint_codebase(self):
6363
Compute directory fingerprints for matching purposes
6464
"""
6565
project_codebase = ProjectCodebase(self.project)
66-
compute_directory_fingerprints(project_codebase)
66+
compute_codebase_directory_fingerprints(project_codebase)

matchcode-toolkit/src/matchcode_toolkit/plugin_fingerprint.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from commoncode.cliutils import PluggableCommandLineOption
1313
from commoncode.cliutils import POST_SCAN_GROUP
14-
from matchcode_toolkit.fingerprinting import compute_directory_fingerprints
14+
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
1515
from plugincode.post_scan import post_scan_impl
1616
from plugincode.post_scan import PostScanPlugin
1717

@@ -41,4 +41,4 @@ def is_enabled(self, fingerprint, **kwargs):
4141
return fingerprint
4242

4343
def process_codebase(self, codebase, **kwargs):
44-
codebase = compute_directory_fingerprints(codebase)
44+
codebase = compute_codebase_directory_fingerprints(codebase)

matchcode-toolkit/tests/test_fingerprinting.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
from matchcode_toolkit.fingerprinting import _create_directory_fingerprint
1616
from matchcode_toolkit.fingerprinting import _get_resource_subpath
17-
from matchcode_toolkit.fingerprinting import compute_directory_fingerprints
17+
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
1818
from matchcode_toolkit.fingerprinting import create_content_fingerprint
1919
from matchcode_toolkit.fingerprinting import create_halohash_chunks
2020
from matchcode_toolkit.fingerprinting import create_structure_fingerprint
@@ -95,10 +95,10 @@ def test_create_halohash_chunks(self):
9595
self.assertEqual(chunk3, expected_chunk3)
9696
self.assertEqual(chunk4, expected_chunk4)
9797

98-
def test_compute_directory_fingerprints(self):
98+
def test_compute_codebase_directory_fingerprints(self):
9999
scan_loc = self.get_test_loc('abbrev-1.0.3-i.json')
100100
vc = VirtualCodebase(location=scan_loc)
101-
vc = compute_directory_fingerprints(vc)
101+
vc = compute_codebase_directory_fingerprints(vc)
102102
directory_content = vc.root.extra_data['directory_content']
103103
directory_structure = vc.root.extra_data['directory_structure']
104104
expected_directory_content = '0000000346ce04751a3c98f00086f16a91d9790b'

matchcode/api.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,28 @@ class MultipleCharFilter(MultipleChoiceFilter):
129129
field_class = MultipleCharField
130130

131131

132+
# TODO: Think of a better name for this filter
133+
class MultipleCharInFilter(MultipleCharFilter):
134+
def filter(self, qs, value):
135+
if not value:
136+
# Even though not a noop, no point filtering if empty.
137+
return qs
138+
139+
if self.is_noop(qs, value):
140+
return qs
141+
142+
predicate = self.get_filter_predicate(value)
143+
old_field_name = next(iter(predicate))
144+
new_field_name = f'{old_field_name}__in'
145+
predicate[new_field_name] = predicate[old_field_name]
146+
predicate.pop(old_field_name)
147+
148+
q = Q(**predicate)
149+
qs = self.get_method(qs)(q)
150+
151+
return qs.distinct() if self.distinct else qs
152+
153+
132154
class MultipleSHA1Filter(MultipleCharFilter):
133155
"""
134156
Overrides `MultipleCharFilter.filter()` to convert the SHA1

matchcode/indexing.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313

1414
from commoncode.resource import VirtualCodebase
1515

16-
from matchcode_toolkit.fingerprinting import compute_directory_fingerprints
16+
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
1717
from matchcode.models import ApproximateDirectoryContentIndex
1818
from matchcode.models import ApproximateDirectoryStructureIndex
1919
from matchcode.models import ExactPackageArchiveIndex
@@ -150,5 +150,5 @@ def index_package_directories(package):
150150
if not vc:
151151
return 0, 0
152152

153-
vc = compute_directory_fingerprints(vc)
153+
vc = compute_codebase_directory_fingerprints(vc)
154154
return index_directory_fingerprints(vc, package)

matchcode/tests/test_index_packages.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from commoncode.resource import VirtualCodebase
1313

14-
from matchcode_toolkit.fingerprinting import compute_directory_fingerprints
14+
from matchcode_toolkit.fingerprinting import compute_codebase_directory_fingerprints
1515
from matchcode_toolkit.fingerprinting import hexstring_to_binarray
1616
from matchcode.indexing import _create_virtual_codebase_from_package_resources
1717
from matchcode.indexing import index_directory_fingerprints
@@ -155,7 +155,7 @@ def test__create_virtual_codebase_from_package_resources(self):
155155

156156
def test_index_directory_fingerprints(self):
157157
vc = _create_virtual_codebase_from_package_resources(self.test_package1)
158-
vc = compute_directory_fingerprints(vc)
158+
vc = compute_codebase_directory_fingerprints(vc)
159159

160160
# Ensure tables are empty prior to indexing
161161
self.assertFalse(ApproximateDirectoryContentIndex.objects.all())

0 commit comments

Comments
 (0)