Skip to content

Commit 152abda

Browse files
authored
Merge pull request #2718 from nexB/2717-bug-summary
Return package details in summary #2717
2 parents 1bddb92 + d37bb16 commit 152abda

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+6659
-560
lines changed

CHANGELOG.rst

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,30 @@ Outputs:
6464

6565

6666

67+
30.1.0 - 2021-09-25
68+
--------------------
69+
70+
This is a bug fix release for these bugs:
71+
72+
- https://github.com/nexB/scancode-toolkit/issues/2717
73+
74+
We now return the package in the summaries as before.
75+
76+
There is also a minor API change: we no longer return a count of "null" empty
77+
values in the summaries for license, copyrights, etc.
78+
79+
80+
Thank you to:
81+
- Thomas Druez @tdruez
82+
83+
84+
6785
30.0.1 - 2021-09-24
6886
--------------------
6987

7088
This is a minor bug fix release for these bugs:
7189

72-
- https://github.com/nexB/scancode-toolkit/issues/2713
90+
- https://github.com/nexB/commoncode/issues/31
7391
- https://github.com/nexB/scancode-toolkit/issues/2713
7492

7593
We now correctly work with all supported Click versions.

setup-mini.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = scancode-toolkit-mini
3-
version = 30.0.1
3+
version = 30.1.0
44
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft
55

66
description = ScanCode is a tool to scan code for license, copyright, package and their documented dependencies and other interesting facts. scancode-toolkit-mini is a special build that does not come with pre-built binary dependencies by default. These are instead installed separately or with the extra_requires scancode-toolkit-mini[full]

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[metadata]
22
name = scancode-toolkit
3-
version = 30.0.1
3+
version = 30.1.0
44
license = Apache-2.0 AND CC-BY-4.0 AND LicenseRef-scancode-other-permissive AND LicenseRef-scancode-other-copyleft
55

66
description = ScanCode is a tool to scan code for license, copyright, package and their documented dependencies and other interesting facts.

src/scancode_config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def _create_dir(location):
7777

7878
# in case package is not installed or we do not have setutools/pkg_resources
7979
# on hand fall back to this version
80-
__version__ = '30.0.1'
80+
__version__ = '30.1.0'
8181

8282
# used to warn user when the version is out of date
8383
__release_date__ = datetime.datetime(2021, 9, 24)

src/summarycode/summarizer.py

Lines changed: 44 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -40,49 +40,7 @@ def logger_debug(*args):
4040
return logger.debug(' '.join(isinstance(a, str) and a or repr(a) for a in args))
4141

4242
"""
43-
top_level:
44-
- license_expressions:
45-
- count: 1
46-
value: gpl-2.0
47-
- holders:
48-
- count: 1
49-
value: RedHat Inc.
50-
51-
by_facet:
52-
facet: core
53-
- license_expressions:
54-
- count: 10
55-
value: gpl-2.0 or bsd-new
56-
- count: 2
57-
value: mit
58-
- programming_language:
59-
- count: 10
60-
value: java
61-
- holders:
62-
- count: 10
63-
value: RedHat Inc.
64-
facet: dev
65-
- license_expressions:
66-
- count: 23
67-
value: gpl-2.0
68-
- holders:
69-
- count: 20
70-
value: RedHat Inc.
71-
- count: 10
72-
value: none
73-
- programming_languages:
74-
- count: 34
75-
value: java
76-
all:
77-
- license_expressions:
78-
- count: 10
79-
value: gpl-2.0 or bsd-new
80-
- programming_language:
81-
- count: 10
82-
value: java
83-
- holders:
84-
- count: 10
85-
value: RedHat Inc.
43+
Create summarized scan data.
8644
"""
8745

8846

@@ -205,8 +163,10 @@ def license_summarizer(resource, children, keep_details=False):
205163
child_summaries = get_resource_summary(child, key=LIC_EXP, as_attribute=keep_details) or []
206164
for child_summary in child_summaries:
207165
# TODO: review this: this feels rather weird
208-
values = [child_summary['value']] * child_summary['count']
209-
license_expressions.extend(values)
166+
child_sum_val = child_summary.get('value')
167+
if child_sum_val:
168+
values = [child_sum_val] * child_summary['count']
169+
license_expressions.extend(values)
210170

211171
# summarize proper
212172
licenses_counter = summarize_licenses(license_expressions)
@@ -245,8 +205,10 @@ def language_summarizer(resource, children, keep_details=False):
245205
for child in children:
246206
child_summaries = get_resource_summary(child, key=PROG_LANG, as_attribute=keep_details) or []
247207
for child_summary in child_summaries:
248-
values = [child_summary['value']] * child_summary['count']
249-
languages.extend(values)
208+
child_sum_val = child_summary.get('value')
209+
if child_sum_val:
210+
values = [child_sum_val] * child_summary['count']
211+
languages.extend(values)
250212

251213
# summarize proper
252214
languages_counter = summarize_languages(languages)
@@ -264,11 +226,23 @@ def summarize_languages(languages):
264226
return Counter(languages)
265227

266228

229+
SUMMARIZABLE_ATTRS = set([
230+
'license_expressions',
231+
'copyrights',
232+
'holders',
233+
'authors',
234+
'programming_language',
235+
# 'packages',
236+
])
237+
238+
267239
def summarize_values(values, attribute):
268240
"""
269241
Given a list of `values` for a given `attribute`, return a mapping of
270242
{value: count of occurences} using a summarization specific to the attribute.
271243
"""
244+
if attribute not in SUMMARIZABLE_ATTRS:
245+
return {}
272246
from summarycode.copyright_summary import summarize_holders
273247
from summarycode.copyright_summary import summarize_copyrights
274248

@@ -278,7 +252,6 @@ def summarize_values(values, attribute):
278252
holders=summarize_holders,
279253
authors=summarize_holders,
280254
programming_language=summarize_languages,
281-
packages=summarize_packages,
282255
)
283256
return value_summarizers_by_attr[attribute](values)
284257

@@ -317,23 +290,14 @@ def summarize_codebase_key_files(codebase, **kwargs):
317290
"""
318291
Summarize codebase key files.
319292
"""
320-
summarizable_attributes = codebase.attributes.summary.keys()
321-
if TRACE: logger_debug('summarizable_attributes:', summarizable_attributes)
322-
323-
# TODO: we cannot summarize packages with "key files for now
324-
really_summarizable_attributes = set([
325-
'license_expressions',
326-
'copyrights',
327-
'holders',
328-
'authors',
329-
'programming_language',
330-
# 'packages',
331-
])
332-
summarizable_attributes = [k for k in summarizable_attributes
333-
if k in really_summarizable_attributes]
293+
summarizables = codebase.attributes.summary.keys()
294+
if TRACE: logger_debug('summarizables:', summarizables)
295+
296+
# TODO: we cannot summarize packages with "key files" for now
297+
summarizables = [k for k in summarizables if k in SUMMARIZABLE_ATTRS]
334298

335299
# create one counter for each summarized attribute
336-
summarizable_values_by_key = dict([(key, []) for key in summarizable_attributes])
300+
summarizable_values_by_key = dict([(key, []) for key in summarizables])
337301

338302
# filter to get only key files
339303
key_files = (res for res in codebase.walk(topdown=True)
@@ -347,10 +311,14 @@ def summarize_codebase_key_files(codebase, **kwargs):
347311
res_summaries = get_resource_summary(resource, key=key, as_attribute=False) or []
348312
for summary in res_summaries:
349313
# each summary is a mapping with value/count: we transform back to values
350-
values.extend([summary['value']] * summary['count'])
314+
sum_value = summary.get('value')
315+
if sum_value:
316+
values.extend([sum_value] * summary['count'])
351317

352318
summary_counters = []
353319
for key, values in summarizable_values_by_key.items():
320+
if key not in SUMMARIZABLE_ATTRS:
321+
continue
354322
summarized = summarize_values(values, key)
355323
summary_counters.append((key, summarized))
356324

@@ -394,13 +362,13 @@ def summarize_codebase_by_facet(codebase, **kwargs):
394362
"""
395363
from summarycode import facet as facet_module
396364

397-
summarizable_attributes = codebase.attributes.summary.keys()
365+
summarizable = codebase.attributes.summary.keys()
398366
if TRACE:
399-
logger_debug('summarize_codebase_by_facet for attributes:', summarizable_attributes)
367+
logger_debug('summarize_codebase_by_facet for attributes:', summarizable)
400368

401369
# create one group of by-facet values lists for each summarized attribute
402370
summarizable_values_by_key_by_facet = dict([
403-
(facet, dict([(key, []) for key in summarizable_attributes]))
371+
(facet, dict([(key, []) for key in summarizable]))
404372
for facet in facet_module.FACETS
405373
])
406374

@@ -417,7 +385,9 @@ def summarize_codebase_by_facet(codebase, **kwargs):
417385
res_summaries = get_resource_summary(resource, key=key, as_attribute=False) or []
418386
for summary in res_summaries:
419387
# each summary is a mapping with value/count: we transform back to discrete values
420-
values.extend([summary['value']] * summary['count'])
388+
sum_value = summary.get('value')
389+
if sum_value:
390+
values.extend([sum_value] * summary['count'])
421391

422392
final_summaries = []
423393
for facet, summarizable_values_by_key in summarizable_values_by_key_by_facet.items():
@@ -480,28 +450,11 @@ def package_summarizer(resource, children, keep_details=False):
480450
logger_debug('package_summarizer: for:', resource,
481451
'packages are:', packs)
482452

483-
package_urls = []
484-
for package in packages:
485-
purl = package.get('purl')
486-
if purl:
487-
package_urls.append(purl)
453+
# Collect direct children packages summary
454+
for child in children:
455+
child_summaries = get_resource_summary(child, key='packages', as_attribute=False) or []
456+
packages.extend(child_summaries)
488457

489458
# summarize proper
490-
packages_counter = summarize_packages(package_urls)
491-
summarized = sorted_counter(packages_counter)
492-
set_resource_summary(
493-
resource=resource,
494-
key='packages',
495-
value=summarized,
496-
as_attribute=keep_details,
497-
)
498-
499-
return summarized
500-
501-
502-
def summarize_packages(package_urls):
503-
"""
504-
Given a list of package urls, return a mapping of {expression: count
505-
of occurences}
506-
"""
507-
return Counter(package_urls)
459+
set_resource_summary(resource, key='packages', value=packages, as_attribute=False)
460+
return packages

tests/summarycode/data/classify/cli.expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"--json-pp": "<file>"
1010
},
1111
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12+
"output_format_version": "1.0.0",
1213
"message": null,
1314
"errors": [],
1415
"extra_data": {

tests/summarycode/data/copyright_summary/summary.expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"--summary": true
1010
},
1111
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12+
"output_format_version": "1.0.0",
1213
"message": null,
1314
"errors": [],
1415
"extra_data": {

tests/summarycode/data/copyright_summary/summary2.expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"--summary": true
1010
},
1111
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12+
"output_format_version": "1.0.0",
1213
"message": null,
1314
"errors": [],
1415
"extra_data": {

tests/summarycode/data/copyright_summary/summary_details.expected.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"--summary-with-details": true
1010
},
1111
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12+
"output_format_version": "1.0.0",
1213
"message": null,
1314
"errors": [],
1415
"extra_data": {

tests/summarycode/data/copyright_summary/summary_details.expected2.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
"--summary-with-details": true
1010
},
1111
"notice": "Generated with ScanCode and provided on an \"AS IS\" BASIS, WITHOUT WARRANTIES\nOR CONDITIONS OF ANY KIND, either express or implied. No content created from\nScanCode should be considered or used as legal advice. Consult an Attorney\nfor any legal advice.\nScanCode is a free software code scanning tool from nexB Inc. and others.\nVisit https://github.com/nexB/scancode-toolkit/ for support and download.",
12+
"output_format_version": "1.0.0",
1213
"message": null,
1314
"errors": [],
1415
"extra_data": {

0 commit comments

Comments
 (0)