Skip to content

Commit ab65fa6

Browse files
feat: remove credentials from urls (#841)
1 parent 8533a7a commit ab65fa6

File tree

6 files changed

+50
-8
lines changed

6 files changed

+50
-8
lines changed

renku/cli/dataset.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -410,7 +410,10 @@ def edit(dataset_id):
410410
'--force', is_flag=True, help='Allow adding otherwise ignored files.'
411411
)
412412
@click.option(
413-
'--create', is_flag=True, help='Create dataset if it does not exist.'
413+
'-c',
414+
'--create',
415+
is_flag=True,
416+
help='Create dataset if it does not exist.'
414417
)
415418
@click.option(
416419
'-s',

renku/core/commands/dataset.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
from renku.core.models.refs import LinkReference
4848
from renku.core.models.tabulate import tabulate
4949
from renku.core.utils.doi import extract_doi
50+
from renku.core.utils.urls import remove_credentials
5051

5152
from .client import pass_local_client
5253
from .echo import WARNING
@@ -206,6 +207,7 @@ def add_to_dataset(
206207
file_.url = file_.url.geturl()
207208

208209
file_.path = added_.path
210+
file_.url = remove_credentials(file_.url)
209211
file_.creator = with_metadata.creator
210212
file_._label = added_._label
211213
file_.commit = added_.commit
@@ -505,6 +507,7 @@ def import_dataset(
505507
pool.close()
506508

507509
dataset_name = name or dataset.display_name
510+
dataset.url = remove_credentials(dataset.url)
508511
add_to_dataset(
509512
client,
510513
urls=[str(p) for p in Path(data_folder).glob('*')],

renku/core/management/datasets.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from renku.core.models.locals import with_reference
4242
from renku.core.models.provenance.agents import Person
4343
from renku.core.models.refs import LinkReference
44+
from renku.core.utils.urls import remove_credentials
4445

4546

4647
@attr.s
@@ -301,7 +302,7 @@ def _add_from_local(self, dataset, path, link, destination):
301302
else:
302303
return [{
303304
'path': path_in_repo,
304-
'url': src.as_uri(),
305+
'url': path_in_repo,
305306
'creator': dataset.creator,
306307
'dataset': dataset.name,
307308
'parent': self
@@ -323,7 +324,7 @@ def _add_from_local(self, dataset, path, link, destination):
323324

324325
return [{
325326
'path': destination.relative_to(self.path),
326-
'url': src.as_uri(),
327+
'url': 'file://' + os.path.relpath(str(src), str(self.path)),
327328
'creator': dataset.creator,
328329
'dataset': dataset.name,
329330
'parent': self
@@ -349,7 +350,7 @@ def _add_from_url(self, dataset, url, destination):
349350

350351
return [{
351352
'path': destination.relative_to(self.path),
352-
'url': url,
353+
'url': remove_credentials(url),
353354
'creator': dataset.creator,
354355
'dataset': dataset.name,
355356
'parent': self
@@ -426,7 +427,7 @@ def _add_from_git(self, dataset, url, sources, destination, ref):
426427

427428
results.append({
428429
'path': path_in_dst_repo,
429-
'url': url,
430+
'url': remove_credentials(url),
430431
'creator': creators,
431432
'dataset': dataset.name,
432433
'parent': self,

renku/core/management/git.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import gitdb
3434

3535
from renku.core import errors
36+
from renku.core.utils.urls import remove_credentials
3637

3738
COMMIT_DIFF_STRATEGY = 'DIFF'
3839
STARTED_AT = int(time.time() * 1e3)
@@ -301,7 +302,8 @@ def commit(
301302
raise errors.NothingToCommit()
302303
return
303304

304-
argv = [os.path.basename(sys.argv[0])] + sys.argv[1:]
305+
argv = [os.path.basename(sys.argv[0])
306+
] + [remove_credentials(arg) for arg in sys.argv[1:]]
305307

306308
# Ignore pre-commit hooks since we have already done everything.
307309
self.repo.index.commit(

renku/core/utils/urls.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
# limitations under the License.
1818
"""Helpers utils for handling URLs."""
1919

20+
import urllib
2021
from urllib.parse import ParseResult
2122

2223

@@ -39,3 +40,11 @@ def url_to_string(url):
3940
return url
4041

4142
raise ValueError('url value not recognized')
43+
44+
45+
def remove_credentials(url):
46+
"""Remove username and password from a URL."""
47+
if url is None:
48+
return ''
49+
parsed = urllib.parse.urlparse(url)
50+
return parsed._replace(netloc=parsed.hostname).geturl()

tests/cli/test_datasets.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1170,8 +1170,8 @@ def test_dataset_clean_up_when_add_fails(runner, client):
11701170
assert not ref.is_symlink() and not ref.exists()
11711171

11721172

1173-
def test_add_removes_local_path_information(runner, client, directory_tree):
1174-
"""Test local paths are removed when adding to a dataset."""
1173+
def test_avoid_empty_commits(runner, client, directory_tree):
1174+
"""Test no empty commit is created when adding existing data."""
11751175
runner.invoke(cli, ['dataset', 'create', 'my-dataset'])
11761176

11771177
commit_sha_before = client.repo.head.object.hexsha
@@ -1192,3 +1192,27 @@ def test_add_removes_local_path_information(runner, client, directory_tree):
11921192
commit_sha_after = client.repo.head.object.hexsha
11931193
assert commit_sha_before == commit_sha_after
11941194
assert 'Error: There is nothing to commit.' in result.output
1195+
1196+
1197+
def test_add_removes_credentials(runner, client):
1198+
"""Test credentials are removed when adding to a dataset."""
1199+
URL = 'https://username:[email protected]/index.html'
1200+
result = runner.invoke(cli, ['dataset', 'add', '-c', 'my-dataset', URL])
1201+
assert 0 == result.exit_code
1202+
1203+
with client.with_dataset('my-dataset') as dataset:
1204+
file_ = dataset.files[0]
1205+
assert file_.url == 'https://example.com/index.html'
1206+
1207+
1208+
def test_add_removes_local_path_information(runner, client, directory_tree):
1209+
"""Test local paths are removed when adding to a dataset."""
1210+
result = runner.invoke(
1211+
cli, ['dataset', 'add', '-c', 'my-dataset', directory_tree.strpath]
1212+
)
1213+
assert 0 == result.exit_code
1214+
1215+
with client.with_dataset('my-dataset') as dataset:
1216+
for file_ in dataset.files:
1217+
assert file_.url.startswith('file://../')
1218+
assert file_.url.endswith(file_.name)

0 commit comments

Comments
 (0)