Skip to content

Commit 51d0c5c

Browse files
authored
Merge pull request #45 from nexB/open-as-text
Open as text
2 parents 1a2a144 + 7aa7d4c commit 51d0c5c

File tree

4 files changed

+245
-21
lines changed

4 files changed

+245
-21
lines changed

configure

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,10 @@ CFG_ROOT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5151
CFG_BIN_DIR=$CFG_ROOT_DIR/$VIRTUALENV_DIR/bin
5252

5353
# Find packages from the local thirdparty directory or from thirdparty.aboutcode.org
54-
PIP_EXTRA_ARGS="--find-links $CFG_ROOT_DIR/thirdparty --find-links https://thirdparty.aboutcode.org/pypi"
54+
if [ -f "$CFG_ROOT_DIR/thirdparty" ]; then
55+
PIP_EXTRA_ARGS="--find-links $CFG_ROOT_DIR/thirdparty "
56+
fi
57+
PIP_EXTRA_ARGS="$PIP_EXTRA_ARGS --find-links https://thirdparty.aboutcode.org/pypi"
5558

5659

5760
################################

configure.bat

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ set "CFG_BIN_DIR=%CFG_ROOT_DIR%\%VIRTUALENV_DIR%\Scripts"
4949

5050
@rem ################################
5151
@rem # Thirdparty package locations and index handling
52-
set "PIP_EXTRA_ARGS=--find-links %CFG_ROOT_DIR%\thirdparty --find-links https://thirdparty.aboutcode.org/pypi" & %INDEX_ARG%
52+
if exist ""%CFG_ROOT_DIR%\thirdparty"" (
53+
set "PIP_EXTRA_ARGS=--find-links %CFG_ROOT_DIR%\thirdparty "
54+
)
55+
56+
set "PIP_EXTRA_ARGS=%PIP_EXTRA_ARGS% --find-links https://thirdparty.aboutcode.org/pypi" & %INDEX_ARG%
5357
@rem ################################
5458

5559

etc/scripts/publish_files.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
#!/usr/bin/env python
2+
#
3+
# Copyright (c) nexB Inc. and others. All rights reserved.
4+
# ScanCode is a trademark of nexB Inc.
5+
# SPDX-License-Identifier: Apache-2.0
6+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
7+
# See https://github.com/nexB/scancode-toolkit for support or download.
8+
# See https://aboutcode.org for more information about nexB OSS projects.
9+
#
10+
import hashlib
11+
import os
12+
import sys
13+
14+
from pathlib import Path
15+
16+
import click
17+
import requests
18+
import utils_thirdparty
19+
20+
from github_release_retry import github_release_retry as grr
21+
22+
"""
23+
Create GitHub releases and upload files there.
24+
"""
25+
26+
27+
def get_files(location):
28+
"""
29+
Return an iterable of (filename, Path, md5) tuples for files in the `location`
30+
directory tree recursively.
31+
"""
32+
for top, _dirs, files in os.walk(location):
33+
for filename in files:
34+
pth = Path(os.path.join(top, filename))
35+
with open(pth, 'rb') as fi:
36+
md5 = hashlib.md5(fi.read()).hexdigest()
37+
yield filename, pth, md5
38+
39+
40+
def get_etag_md5(url):
41+
"""
42+
Return the cleaned etag of URL `url` or None.
43+
"""
44+
headers = utils_thirdparty.get_remote_headers(url)
45+
headers = {k.lower(): v for k, v in headers.items()}
46+
etag = headers .get('etag')
47+
if etag:
48+
etag = etag.strip('"').lower()
49+
return etag
50+
51+
52+
def create_or_update_release_and_upload_directory(
53+
user,
54+
repo,
55+
tag_name,
56+
token,
57+
directory,
58+
retry_limit=10,
59+
description=None,
60+
):
61+
"""
62+
Create or update a GitHub release at https://github.com/<user>/<repo> for
63+
`tag_name` tag using the optional `description` for this release.
64+
Use the provided `token` as a GitHub token for API calls authentication.
65+
Upload all files found in the `directory` tree to that GitHub release.
66+
Retry API calls up to `retry_limit` time to work around instability the
67+
GitHub API.
68+
69+
Remote files that are not the same as the local files are deleted and re-
70+
uploaded.
71+
"""
72+
release_homepage_url = f'https://github.com/{user}/{repo}/releases/{tag_name}'
73+
74+
# scrape release page HTML for links
75+
urls_by_filename = {os.path.basename(l): l
76+
for l in utils_thirdparty.get_paths_or_urls(links_url=release_homepage_url)
77+
}
78+
79+
# compute what is new, modified or unchanged
80+
print(f'Compute which files is new, modified or unchanged in {release_homepage_url}')
81+
82+
new_to_upload = []
83+
unchanged_to_skip = []
84+
modified_to_delete_and_reupload = []
85+
for filename, pth, md5 in get_files(directory):
86+
url = urls_by_filename.get(filename)
87+
if not url:
88+
print(f'{filename} content is NEW, will upload')
89+
new_to_upload.append(pth)
90+
continue
91+
92+
out_of_date = get_etag_md5(url) != md5
93+
if out_of_date:
94+
print(f'{url} content is CHANGED based on md5 etag, will re-upload')
95+
modified_to_delete_and_reupload.append(pth)
96+
else:
97+
# print(f'{url} content is IDENTICAL, skipping upload based on Etag')
98+
unchanged_to_skip.append(pth)
99+
print('.')
100+
101+
ghapi = grr.GithubApi(
102+
github_api_url='https://api.github.com',
103+
user=user,
104+
repo=repo,
105+
token=token,
106+
retry_limit=retry_limit,
107+
)
108+
109+
# yank modified
110+
print(
111+
f'Unpublishing {len(modified_to_delete_and_reupload)} published but '
112+
f'locally modified files in {release_homepage_url}')
113+
114+
release = ghapi.get_release_by_tag(tag_name)
115+
116+
for pth in modified_to_delete_and_reupload:
117+
filename = os.path.basename(pth)
118+
asset_id = ghapi.find_asset_id_by_file_name(filename, release)
119+
print (f' Unpublishing file: {filename}).')
120+
response = ghapi.delete_asset(asset_id)
121+
if response.status_code != requests.codes.no_content: # NOQA
122+
raise Exception(f'failed asset deletion: {response}')
123+
124+
# finally upload new and modified
125+
to_upload = new_to_upload + modified_to_delete_and_reupload
126+
print(f'Publishing with {len(to_upload)} files to {release_homepage_url}')
127+
release = grr.Release(tag_name=tag_name, body=description)
128+
grr.make_release(ghapi, release, to_upload)
129+
130+
131+
TOKEN_HELP = (
132+
'The Github personal acess token is used to authenticate API calls. '
133+
'Required unless you set the GITHUB_TOKEN environment variable as an alternative. '
134+
'See for details: https://github.com/settings/tokens and '
135+
'https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token'
136+
)
137+
138+
139+
@click.command()
140+
141+
@click.option(
142+
'--user-repo-tag',
143+
help='The GitHub qualified repository user/name/tag in which '
144+
'to create the release such as in nexB/thirdparty/pypi',
145+
type=str,
146+
required=True,
147+
)
148+
@click.option(
149+
'-d', '--directory',
150+
help='The directory that contains files to upload to the release.',
151+
type=click.Path(exists=True, readable=True, path_type=str, file_okay=False, resolve_path=True),
152+
required=True,
153+
)
154+
@click.option(
155+
'--token',
156+
help=TOKEN_HELP,
157+
default=os.environ.get('GITHUB_TOKEN', None),
158+
type=str,
159+
required=False,
160+
)
161+
@click.option(
162+
'--description',
163+
help='Text description for the release. Ignored if the release exists.',
164+
default=None,
165+
type=str,
166+
required=False,
167+
)
168+
@click.option(
169+
'--retry_limit',
170+
help='Number of retries when making failing GitHub API calls. '
171+
'Retrying helps work around transient failures of the GitHub API.',
172+
type=int,
173+
default=10,
174+
)
175+
@click.help_option('-h', '--help')
176+
def publish_files(
177+
user_repo_tag,
178+
directory,
179+
retry_limit=10, token=None, description=None,
180+
):
181+
"""
182+
Publish all the files in DIRECTORY as assets to a GitHub release.
183+
Either create or update/replace remote files'
184+
"""
185+
if not token:
186+
click.secho('--token required option is missing.')
187+
click.secho(TOKEN_HELP)
188+
sys.exit(1)
189+
190+
user, repo, tag_name = user_repo_tag.split('/')
191+
192+
create_or_update_release_and_upload_directory(
193+
user=user,
194+
repo=repo,
195+
tag_name=tag_name,
196+
description=description,
197+
retry_limit=retry_limit,
198+
token=token,
199+
directory=directory,
200+
)
201+
202+
203+
if __name__ == '__main__':
204+
publish_files()

etc/scripts/utils_thirdparty.py

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,14 @@
2424
import attr
2525
import license_expression
2626
import packageurl
27-
import utils_pip_compatibility_tags
28-
import utils_pypi_supported_tags
2927
import requests
3028
import saneyaml
29+
import utils_pip_compatibility_tags
30+
import utils_pypi_supported_tags
3131

3232
from commoncode import fileutils
3333
from commoncode.hash import multi_checksums
34+
from commoncode.text import python_safe_name
3435
from packaging import tags as packaging_tags
3536
from packaging import version as packaging_version
3637
from utils_requirements import load_requirements
@@ -172,11 +173,20 @@ def fetch_wheels(
172173
else:
173174
force_pinned = False
174175

175-
rrp = list(get_required_remote_packages(
176-
requirements_file=requirements_file,
177-
force_pinned=force_pinned,
178-
remote_links_url=remote_links_url,
179-
))
176+
try:
177+
rrp = list(get_required_remote_packages(
178+
requirements_file=requirements_file,
179+
force_pinned=force_pinned,
180+
remote_links_url=remote_links_url,
181+
))
182+
except Exception as e:
183+
raise Exception(
184+
dict(
185+
requirements_file=requirements_file,
186+
force_pinned=force_pinned,
187+
remote_links_url=remote_links_url,
188+
)
189+
) from e
180190

181191
fetched_filenames = set()
182192
for name, version, package in rrp:
@@ -211,6 +221,7 @@ def fetch_wheels(
211221
print(f'Missed package {nv} in remote repo, has only:')
212222
for pv in rr.get_versions(n):
213223
print(' ', pv)
224+
raise Exception('Missed some packages in remote repo')
214225

215226

216227
def fetch_sources(
@@ -261,6 +272,8 @@ def fetch_sources(
261272
fetched = package.fetch_sdist(dest_dir=dest_dir)
262273
error = f'Failed to fetch' if not fetched else None
263274
yield package, error
275+
if missed:
276+
raise Exception(f'Missing source packages in {remote_links_url}', missed)
264277

265278
################################################################################
266279
#
@@ -693,8 +706,7 @@ def save_if_modified(location, content):
693706
return False
694707

695708
if TRACE: print(f'Saving ABOUT (and NOTICE) files for: {self}')
696-
wmode = 'wb' if isinstance(content, bytes) else 'w'
697-
with open(location, wmode, encoding="utf-8") as fo:
709+
with open(location, 'w') as fo:
698710
fo.write(content)
699711
return True
700712

@@ -905,16 +917,16 @@ def load_pkginfo_data(self, dest_dir=THIRDPARTY_DIR):
905917
other_classifiers = [c for c in classifiers if not c.startswith('License')]
906918

907919
holder = raw_data['Author']
908-
holder_contact=raw_data['Author-email']
909-
copyright = f'Copyright (c) {holder} <{holder_contact}>'
920+
holder_contact = raw_data['Author-email']
921+
copyright_statement = f'Copyright (c) {holder} <{holder_contact}>'
910922

911923
pkginfo_data = dict(
912924
name=raw_data['Name'],
913925
declared_license=declared_license,
914926
version=raw_data['Version'],
915927
description=raw_data['Summary'],
916928
homepage_url=raw_data['Home-page'],
917-
copyright=copyright,
929+
copyright=copyright_statement,
918930
license_expression=license_expression,
919931
holder=holder,
920932
holder_contact=holder_contact,
@@ -1845,7 +1857,7 @@ def get(self, path_or_url, as_text=True):
18451857
if not os.path.exists(cached):
18461858
content = get_file_content(path_or_url=path_or_url, as_text=as_text)
18471859
wmode = 'w' if as_text else 'wb'
1848-
with open(cached, wmode, encoding="utf-8") as fo:
1860+
with open(cached, wmode) as fo:
18491861
fo.write(content)
18501862
return content
18511863
else:
@@ -1857,7 +1869,7 @@ def put(self, filename, content):
18571869
"""
18581870
cached = os.path.join(self.directory, filename)
18591871
wmode = 'wb' if isinstance(content, bytes) else 'w'
1860-
with open(cached, wmode, encoding="utf-8") as fo:
1872+
with open(cached, wmode) as fo:
18611873
fo.write(content)
18621874

18631875

@@ -2331,7 +2343,7 @@ def get_required_remote_packages(
23312343
repo = get_remote_repo(remote_links_url=remote_links_url)
23322344
else:
23332345
# a local path
2334-
assert os.path.exists(remote_links_url)
2346+
assert os.path.exists(remote_links_url), f'Path does not exist: {remote_links_url}'
23352347
repo = get_local_repo(directory=remote_links_url)
23362348

23372349
for name, version in required_name_versions:
@@ -2365,7 +2377,7 @@ def update_requirements(name, version=None, requirements_file='requirements.txt'
23652377
updated_name_versions = sorted(updated_name_versions)
23662378
nvs = '\n'.join(f'{name}=={version}' for name, version in updated_name_versions)
23672379

2368-
with open(requirements_file, 'w', encoding="utf-8") as fo:
2380+
with open(requirements_file, 'w') as fo:
23692381
fo.write(nvs)
23702382

23712383

@@ -2383,7 +2395,7 @@ def hash_requirements(dest_dir=THIRDPARTY_DIR, requirements_file='requirements.t
23832395
raise Exception(f'Missing required package {name}=={version}')
23842396
hashed.append(package.specifier_with_hashes)
23852397

2386-
with open(requirements_file, 'w', encoding="utf-8") as fo:
2398+
with open(requirements_file, 'w') as fo:
23872399
fo.write('\n'.join(hashed))
23882400

23892401
################################################################################
@@ -2961,5 +2973,6 @@ def compute_normalized_license_expression(declared_licenses):
29612973
from packagedcode import pypi
29622974
return pypi.compute_normalized_license(declared_licenses)
29632975
except ImportError:
2964-
# Scancode is not installed, we join all license strings and return it
2965-
return ' '.join(declared_licenses)
2976+
# Scancode is not installed, clean and join all the licenses
2977+
lics = [python_safe_name(l).lower() for l in declared_licenses]
2978+
return ' AND '.join(lics).lower()

0 commit comments

Comments
 (0)