Skip to content

Commit b53bb86

Browse files
authored
[ID-323] Allow tnu to use either Martha or DRSHub to resolve DRS URIs (#398)
* make drs resolver configurable * add env var to readme * fix lint error
1 parent 839a8fb commit b53bb86

File tree

6 files changed

+48
-37
lines changed

6 files changed

+48
-37
lines changed

Changes.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
# Changes for v0.11.0 (2022-11-09)
2+
Added support for calling DRSHub for DRS resolution
3+
Made DRS Resolver configurable by setting `DRS_RESOLVER` to `martha` or `drshub`
4+
15
# Changes for v0.10.0 (2022-06-30)
26

37
# Changes for v0.9.0 (2022-06-30)
@@ -128,4 +132,4 @@ Config override for tests (#72)
128132
## 0.2.1 - 06/12/2020
129133
- Added requester pays set-up to drs module
130134
- Added ENV variable for local testing to __init__
131-
- Added some print messages to drs module to let user know what is happening
135+
- Added some print messages to drs module to let user know what is happening

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ This will run tests against Terra and Martha Dev using Jade Dev DRS url (make su
240240
This will run tests against Terra and Martha Prod (make sure you have proper access to DRS urls, workspace and Google bucket)
241241

242242
3. log in with your Google credentials using `gcloud auth application-default login` with your Terra Prod account
243-
4. set `export GOOGLE_PROJECT=firecloud-cgl; export TERRA_DEPLOYMENT_ENV=prod`
243+
4. set `export GOOGLE_PROJECT=firecloud-cgl; export TERRA_DEPLOYMENT_ENV=prod; export TNU_BLOBSTORE_TEST_GS_BUCKET=tnu-test-bucket`
244244
5. run in package root:
245245
- `make test`: skips controlled and dev access tests
246246
- `make controlled_access_test`: runs tests marked as `controlled_access`

terra_notebook_utils/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
WORKSPACE_GOOGLE_PROJECT = os.environ.get('GOOGLE_PROJECT') # This env var is set in Terra Cloud Environments
66
TERRA_DEPLOYMENT_ENV = os.environ.get('TERRA_DEPLOYMENT_ENV', 'prod')
77
MARTHA_URL_VERSION = os.environ.get('MARTHA_URL_VERSION', 'martha_v3')
8+
DRS_RESOLVER = os.environ.get('DRS_RESOLVER', 'martha') # values for DRS_RESOLVER are either 'drshub' or 'martha'
89

910
if not WORKSPACE_GOOGLE_PROJECT:
1011
WORKSPACE_GOOGLE_PROJECT = os.environ.get('GCP_PROJECT') # Useful for running outside of notebook
@@ -20,3 +21,8 @@
2021
IO_CONCURRENCY = 3
2122

2223
MARTHA_URL = f"https://us-central1-broad-dsde-{TERRA_DEPLOYMENT_ENV}.cloudfunctions.net/{MARTHA_URL_VERSION}"
24+
DRSHUB_URL = f"https://drshub.dsde-{TERRA_DEPLOYMENT_ENV}.broadinstitute.org/api/v4/drs/resolve"
25+
if DRS_RESOLVER == "martha":
26+
DRS_RESOLVER_URL = MARTHA_URL
27+
else:
28+
DRS_RESOLVER_URL = DRSHUB_URL

terra_notebook_utils/drs.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Dict, List, Tuple, Optional, Union, Iterable
88
from requests import Response
99

10-
from terra_notebook_utils import WORKSPACE_BUCKET, WORKSPACE_NAME, MARTHA_URL, WORKSPACE_NAMESPACE, \
10+
from terra_notebook_utils import WORKSPACE_BUCKET, WORKSPACE_NAME, DRS_RESOLVER_URL, WORKSPACE_NAMESPACE, \
1111
WORKSPACE_GOOGLE_PROJECT
1212
from terra_notebook_utils import workspace, gs, tar_gz, TERRA_DEPLOYMENT_ENV, _GS_SCHEMA
1313
from terra_notebook_utils.utils import is_notebook
@@ -60,18 +60,18 @@ def enable_requester_pays(workspace_name: Optional[str]=WORKSPACE_NAME,
6060
"You will not be able to access DRS URIs that interact with requester pays buckets.")
6161

6262
def get_drs(drs_url: str, fields: List[str]) -> Response:
63-
"""Request DRS information from martha."""
63+
"""Request DRS information from DRS Resolver."""
6464
access_token = gs.get_access_token()
6565

6666
headers = {
6767
'authorization': f"Bearer {access_token}",
6868
'content-type': "application/json"
6969
}
7070

71-
logger.debug(f"Resolving DRS uri '{drs_url}' through '{MARTHA_URL}'.")
71+
logger.debug(f"Resolving DRS uri '{drs_url}' through '{DRS_RESOLVER_URL}'.")
7272

7373
json_body = dict(url=drs_url, fields=fields)
74-
resp = http.post(MARTHA_URL, headers=headers, json=json_body)
74+
resp = http.post(DRS_RESOLVER_URL, headers=headers, json=json_body)
7575

7676
if 200 != resp.status_code:
7777
logger.warning(resp.content)
@@ -99,18 +99,18 @@ def access(drs_url: str,
9999
info = get_drs_info(drs_url, access_url=True)
100100

101101
if info.access_url:
102-
martha_url = info.access_url.strip()
103-
response = requests.get(martha_url, headers={'Range': 'bytes=0-1'})
102+
drs_resolver_url = info.access_url.strip()
103+
response = requests.get(drs_resolver_url, headers={'Range': 'bytes=0-1'})
104104
if response.status_code < 300:
105-
return martha_url
105+
return drs_resolver_url
106106
logger.warning('Got an invalid/inaccessible signed URL... attempting to generate an alternate signed URL...')
107107

108108
url = gs.get_signed_url(bucket=info.bucket_name,
109109
key=info.key,
110110
sa_credentials=info.credentials)
111111
# attempt to get the first byte; we'll get an HTTPError error if we need requester pays
112-
# TODO: hopefully Martha returns this information eventually and we can avoid this check,
113-
# but even in the meantime, there's probably a better way of doing this
112+
# TODO: hopefully the DRS Resolver will return this information eventually and
113+
# we can avoid this check, but even in the meantime, there's probably a better way of doing this
114114
response = requests.get(url, headers={'Range': 'bytes=0-1'})
115115
if b'Bucket is a requester pays bucket' in response.content and response.status_code >= 400:
116116
url = gs.get_signed_url(bucket=info.bucket_name,
@@ -164,8 +164,8 @@ def _drs_info_from_martha_v2(drs_url: str, drs_data: dict) -> DRSInfo:
164164
else:
165165
raise DRSResolutionError(f"No metadata was returned for DRS uri '{drs_url}'")
166166

167-
def _drs_info_from_martha_v3(drs_url: str, drs_data: dict) -> DRSInfo:
168-
"""Convert response from martha_v3 to DRSInfo."""
167+
def _drs_info_from_martha_v3_or_drshub(drs_url: str, drs_data: dict) -> DRSInfo:
168+
"""Convert DRS response to DRSInfo."""
169169
access_url: Optional[str] = None
170170
if drs_data.get('accessUrl') is not None:
171171
access_url = drs_data['accessUrl'].get('url')
@@ -193,14 +193,14 @@ def get_drs_info(drs_url: str, access_url: bool = False) -> DRSInfo:
193193
"name",
194194
"timeUpdated",
195195
"googleServiceAccount"]
196-
# only include this field if necessary because it significantly increases the time for Martha to respond
196+
# only include this field if necessary because it significantly increases the time for the DRS Resolver to respond
197197
if access_url:
198198
fields.append("accessUrl")
199199
drs_data = get_drs(drs_url, fields).json()
200200
if 'dos' in drs_data:
201201
return _drs_info_from_martha_v2(drs_url, drs_data)
202202
else:
203-
return _drs_info_from_martha_v3(drs_url, drs_data)
203+
return _drs_info_from_martha_v3_or_drshub(drs_url, drs_data)
204204

205205
def get_drs_blob(drs_url_or_info: Union[str, DRSInfo],
206206
billing_project: Optional[str]=WORKSPACE_GOOGLE_PROJECT) -> Union[GSBlob, URLBlob]:

terra_notebook_utils/gs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,10 @@ def get_signed_url(bucket: str,
8282
"""
8383
default_service_account_credentials = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS')
8484

85-
# these are the service account credentials returned from Martha
85+
# these are the service account credentials returned from the DRS Resolver
8686
if sa_credentials:
8787
creds = service_account.Credentials.from_service_account_info(sa_credentials)
88-
# if Martha did not give us a service account, try the credentials set at GOOGLE_APPLICATION_CREDENTIALS
88+
# if the DRS Resolver did not give us a service account, try the credentials set at GOOGLE_APPLICATION_CREDENTIALS
8989
elif default_service_account_credentials:
9090
creds = service_account.Credentials.from_service_account_file(default_service_account_credentials)
9191
# we can't access the file

tests/test_drs.py

Lines changed: 21 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ class TestTerraNotebookUtilsDRS(SuppressWarningsMixin, unittest.TestCase):
7474
jade_dev_url = "drs://jade.datarepo-dev.broadinstitute.org/v1_0c86170e-312d-4b39-a0a4-2a2bfaa24c7a_" \
7575
"c0e40912-8b14-43f6-9a2f-b278144d0060"
7676

77-
# martha_v3 responses
77+
# drs_resolver responses
7878
mock_jdr_response = {
7979
'contentType': 'application/octet-stream',
8080
'size': 15601108255,
@@ -92,7 +92,7 @@ class TestTerraNotebookUtilsDRS(SuppressWarningsMixin, unittest.TestCase):
9292
'crc32c': '8a366443'
9393
}
9494
}
95-
mock_martha_v3_response_missing_fields = {
95+
mock_drs_resolver_response_missing_fields = {
9696
'contentType': 'application/octet-stream',
9797
'bucket': 'broad-jade-dev-data-bucket',
9898
'name': 'fd8d8492-ad02-447d-b54e-35a7ffd0e7a5/8b07563a-542f-4b5c-9e00-e8fe6b1861de',
@@ -107,7 +107,7 @@ class TestTerraNotebookUtilsDRS(SuppressWarningsMixin, unittest.TestCase):
107107
'md5': '336ea55913bc261b72875bd259753046',
108108
}
109109
}
110-
mock_martha_v3_response_without_gs_uri = {
110+
mock_drs_resolver_response_without_gs_uri = {
111111
'contentType': 'application/octet-stream',
112112
'bucket': 'broad-jade-dev-data-bucket',
113113
'googleServiceAccount': {
@@ -119,7 +119,7 @@ class TestTerraNotebookUtilsDRS(SuppressWarningsMixin, unittest.TestCase):
119119
'md5': '336ea55913bc261b72875bd259753046',
120120
}
121121
}
122-
mock_martha_v3_error_response = {
122+
mock_drs_resolver_error_response = {
123123
"status": 500,
124124
"response": {
125125
"req": {
@@ -144,7 +144,7 @@ class TestTerraNotebookUtilsDRS(SuppressWarningsMixin, unittest.TestCase):
144144
"text": "{\"msg\":\"User 'null' does not have required action: read_data\",\"status_code\":500}"
145145
}
146146
}
147-
mock_martha_v3_empty_error_response = {
147+
mock_drs_resolver_empty_error_response = {
148148
"status": 500,
149149
"response": {
150150
"status": 500,
@@ -423,8 +423,8 @@ def test_arg_propagation(self):
423423
drs.extract_tar_gz(self.drs_url)
424424
enable_requester_pays.assert_called_with(WORKSPACE_NAME, WORKSPACE_NAMESPACE)
425425

426-
# test for when we get everything what we wanted in martha_v3 response
427-
def test_martha_v3_response(self):
426+
# test for when we get everything what we wanted in drs_resolver response
427+
def test_drs_resolver_response(self):
428428
resp_json = mock.MagicMock(return_value=self.mock_jdr_response)
429429
requests_post = mock.MagicMock(return_value=mock.MagicMock(status_code=200, json=resp_json))
430430
with ExitStack() as es:
@@ -454,9 +454,9 @@ def test_martha_v2_response(self):
454454
self.assertEqual(15601108255, actual_info.size)
455455
self.assertEqual('2020-04-27T15:56:09.696Z', actual_info.updated)
456456

457-
# test for when some fields are missing in martha_v3 response
458-
def test_martha_v3_response_with_missing_fields(self):
459-
resp_json = mock.MagicMock(return_value=self.mock_martha_v3_response_missing_fields)
457+
# test for when some fields are missing in drs_resolver response
458+
def test_drs_resolver_response_with_missing_fields(self):
459+
resp_json = mock.MagicMock(return_value=self.mock_drs_resolver_response_missing_fields)
460460
requests_post = mock.MagicMock(return_value=mock.MagicMock(status_code=200, json=resp_json))
461461
with ExitStack() as es:
462462
es.enter_context(mock.patch("terra_notebook_utils.drs.gs.get_client"))
@@ -485,10 +485,10 @@ def test_martha_v2_response_with_missing_fields(self):
485485
self.assertEqual(None, actual_info.size)
486486
self.assertEqual(None, actual_info.updated)
487487

488-
# test for when 'gsUrl' is missing in martha_v3 response. It should throw error
488+
# test for when 'gsUrl' is missing in drs_resolver response. It should throw error
489489
@unittest.skip("TODO: Test no gsUri _and_ no accessUrl")
490-
def test_martha_v3_response_without_gs_uri(self):
491-
resp_json = mock.MagicMock(return_value=self.mock_martha_v3_response_without_gs_uri)
490+
def test_drs_resolver_response_without_gs_uri(self):
491+
resp_json = mock.MagicMock(return_value=self.mock_drs_resolver_response_without_gs_uri)
492492
requests_post = mock.MagicMock(return_value=mock.MagicMock(status_code=200, json=resp_json))
493493
with ExitStack() as es:
494494
es.enter_context(mock.patch("terra_notebook_utils.drs.gs.get_client"))
@@ -506,19 +506,19 @@ def test_martha_v2_response_without_gs_uri(self):
506506
with self.assertRaisesRegex(Exception, f"No GS url found for DRS uri '{self.drs_url}'"):
507507
drs.get_drs_blob(self.drs_url)
508508

509-
# test for when martha_v3 returns error. It should throw error
510-
def test_martha_v3_error_response(self):
511-
resp_json = mock.MagicMock(return_value=self.mock_martha_v3_error_response)
509+
# test for when drs_resolver returns error. It should throw error
510+
def test_drs_resolver_error_response(self):
511+
resp_json = mock.MagicMock(return_value=self.mock_drs_resolver_error_response)
512512
requests_post = mock.MagicMock(return_value=mock.MagicMock(status_code=500, json=resp_json))
513513
with ExitStack() as es:
514514
es.enter_context(mock.patch("terra_notebook_utils.drs.gs.get_client"))
515515
es.enter_context(mock.patch("terra_notebook_utils.drs.http", post=requests_post))
516516
with self.assertRaises(drs.DRSResolutionError):
517517
drs.get_drs_blob(self.jade_dev_url)
518518

519-
# test for when martha_v3 returns error response with 'text' field. It should throw error
520-
def test_martha_v3_empty_error_response(self):
521-
resp_json = mock.MagicMock(return_value=self.mock_martha_v3_empty_error_response)
519+
# test for when drs_resolver returns error response with 'text' field. It should throw error
520+
def test_drs_resolver_empty_error_response(self):
521+
resp_json = mock.MagicMock(return_value=self.mock_drs_resolver_empty_error_response)
522522
requests_post = mock.MagicMock(return_value=mock.MagicMock(status_code=500, json=resp_json))
523523
with ExitStack() as es:
524524
es.enter_context(mock.patch("terra_notebook_utils.drs.gs.get_client"))
@@ -570,7 +570,8 @@ def test_access(self):
570570
response = requests.get(signed_url, headers={'Range': 'bytes=0-1'})
571571
response.raise_for_status()
572572
# Test a Jade resource
573-
# requires that GOOGLE_APPLICATION_CREDENTIALS be set, because martha does not return a service account
573+
# requires that GOOGLE_APPLICATION_CREDENTIALS be set
574+
# because neither martha nor DRSHub returns a service account
574575
jade_uri = 'drs://jade-terra.datarepo-prod.broadinstitute.org/' \
575576
'v1_c3c588a8-be3f-467f-a244-da614be6889a_635984f0-3267-4201-b1ee-d82f64b8e6d1'
576577
with self.subTest(f'Testing DRS Access: {jade_uri}'):

0 commit comments

Comments
 (0)