Skip to content

Commit 68b7888

Browse files
Enable TNU to use Azure auth for DRS operations #399 (#401)
Add support for using Microsoft Azure default credentials for `drs` subcommands (only) when running in a Terra Azure Interactive Analysis Cloud Environment. These credentials are used to auth with Terra backend services including Martha/terra-drs-hub. When running in a Terra Google IA Cloud Environment, the behavior is as before, with no changes.
1 parent b53bb86 commit 68b7888

File tree

12 files changed

+372
-6
lines changed

12 files changed

+372
-6
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ __pycache__/
2626
/.idea
2727
/*.iml
2828

29+
# Default virtual environment name used by PyCharm
30+
/.venv
31+
2932
# JS/node/npm/web dev files
3033
node_modules
3134
npm-debug.log

README.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,13 @@ pip install --upgrade --no-cache-dir terra-notebook-utils
4040

4141
## Credentials
4242
Much of the terra-notebook-utilities functionality requires credentialed access through a Google Cloud Platform account.
43-
Credentials are already available when running in a Terra notebook environment. Otherwise, credentials may be obtained
44-
with the command
43+
Credentials are already available when running in a Terra Google notebook environment.
44+
Otherwise, Google credentials may be obtained with the command
4545
```
4646
gcloud auth application-default login
4747
```
48+
The terra-notebook-utilities `drs` subcommands (only) run successfully in a Terra Azure
49+
notebook environment using the Azure default credentials.
4850

4951
## Usage
5052

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
azure-identity >= 1.12.0, < 2
12
google-cloud-storage >= 1.38.0, < 2
23
gs-chunked-io >= 0.5.1, < 0.6
34
firecloud

terra_notebook_utils/__init__.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import os
2+
from dataclasses import dataclass
3+
from enum import Enum
24

35
WORKSPACE_NAME = os.environ.get('WORKSPACE_NAME', None)
46
WORKSPACE_NAMESPACE = os.environ.get('WORKSPACE_NAMESPACE') # This env var is set in Terra Cloud Environments
@@ -26,3 +28,20 @@
2628
DRS_RESOLVER_URL = MARTHA_URL
2729
else:
2830
DRS_RESOLVER_URL = DRSHUB_URL
31+
32+
33+
class ExecutionEnvironment(Enum):
34+
TERRA_WORKSPACE = "TERRA_WORKSPACE", # Executing in a Terra Workspace (on any supported platform)
35+
OTHER = "OTHER" # Executing outside a Terra Workspace (e.g., local system)
36+
37+
38+
class ExecutionPlatform(Enum):
39+
AZURE = "AZURE", # Executing in an Azure compute environment
40+
GOOGLE = "GOOGLE", # Executing in a Google compute environment
41+
UNKNOWN = "UNKNOWN" # Execution platform not identified (e.g., local system)
42+
43+
44+
@dataclass
45+
class ExecutionContext:
46+
execution_environment: ExecutionEnvironment
47+
execution_platform: ExecutionPlatform

terra_notebook_utils/azure_auth.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""
2+
Microsoft Azure identity/auth support.
3+
4+
See:
5+
https://azuresdkdocs.blob.core.windows.net/$web/python/azure-identity/1.12.0/index.html
6+
https://learn.microsoft.com/en-us/python/api/azure-identity/azure.identity.defaultazurecredential?view=azure-python
7+
https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/identity/azure-identity/azure/identity/_credentials/default.py
8+
"""
9+
import logging
10+
import os
11+
from typing import Optional
12+
13+
from azure.identity import DefaultAzureCredential
14+
from terra_notebook_utils.logger import logger
15+
16+
17+
# Single instance of DefaultAzureCredential that initialized lazily.
18+
# The instance is treated as threadsafe and reusable.
19+
# The Azure documentation is silent on thread safety.
20+
# Based on scanning the code, it appears to be threadsafe.
21+
# See: https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/identity/
22+
# azure-identity/azure/identity/_credentials/default.py
23+
_AZURE_CREDENTIAL: Optional[DefaultAzureCredential] = None
24+
25+
26+
def _set_azure_identity_logging_level(level: int) -> None:
27+
""" Set the logging level for modules participating the Azure default credential flow """
28+
import azure.identity
29+
logging.getLogger(azure.identity._credentials.environment.__name__).setLevel(level)
30+
logging.getLogger(azure.identity._credentials.managed_identity.__name__).setLevel(level)
31+
logging.getLogger(azure.identity._credentials.chained.__name__).setLevel(level)
32+
logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(level)
33+
34+
35+
# Suppress extraneous azure-identity INFO level logging
36+
_set_azure_identity_logging_level(logging.WARNING)
37+
38+
39+
def _get_default_credential() -> DefaultAzureCredential:
40+
"""
41+
Instantiate DefaultAzureCredential lazily if/when needed.
42+
43+
Note: It would not need to be instantiated this way, as
44+
# no exception is raised even if Azure credentials are not configured.
45+
:return: Reference to instance of DefaultAzureCredential
46+
"""
47+
48+
# Should a more sophisticated Singleton pattern be used instead?
49+
global _AZURE_CREDENTIAL
50+
if not _AZURE_CREDENTIAL:
51+
_AZURE_CREDENTIAL = DefaultAzureCredential()
52+
return _AZURE_CREDENTIAL
53+
54+
55+
def get_azure_access_token() -> str:
56+
"""
57+
Return an Azure access token.
58+
59+
raises ClientAuthenticationError
60+
"""
61+
if os.environ.get('TERRA_NOTEBOOK_AZURE_ACCESS_TOKEN'):
62+
logger.debug("Using Azure token configured using 'TERRA_NOTEBOOK_AZURE_ACCESS_TOKEN'")
63+
token = os.environ['TERRA_NOTEBOOK_AZURE_ACCESS_TOKEN']
64+
else:
65+
logger.debug("Requesting Azure default credentials token.")
66+
token_scope = "https://management.azure.com/.default"
67+
azure_token = _get_default_credential().get_token(token_scope)
68+
logger.debug("Using Azure default credentials token.")
69+
token = azure_token.token
70+
return token

terra_notebook_utils/drs.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from terra_notebook_utils.blobstore.progress import Indicator
1919
from terra_notebook_utils.blobstore import Blob, copy_client, BlobNotFoundError
2020
from terra_notebook_utils.logger import logger
21+
from terra_notebook_utils.terra_auth import get_terra_access_token
2122

2223

2324
DRSInfo = namedtuple("DRSInfo", "credentials access_url bucket_name key name size updated checksums")
@@ -46,7 +47,7 @@ def enable_requester_pays(workspace_name: Optional[str]=WORKSPACE_NAME,
4647
rawls_url = (f"https://rawls.dsde-{TERRA_DEPLOYMENT_ENV}.broadinstitute.org/api/workspaces/"
4748
f"{workspace_namespace}/{encoded_workspace}/enableRequesterPaysForLinkedServiceAccounts")
4849
logger.info("Enabling requester pays for your workspace. This will only take a few seconds...")
49-
access_token = gs.get_access_token()
50+
access_token = get_terra_access_token()
5051

5152
headers = {
5253
'authorization': f"Bearer {access_token}",
@@ -61,7 +62,7 @@ def enable_requester_pays(workspace_name: Optional[str]=WORKSPACE_NAME,
6162

6263
def get_drs(drs_url: str, fields: List[str]) -> Response:
6364
"""Request DRS information from DRS Resolver."""
64-
access_token = gs.get_access_token()
65+
access_token = get_terra_access_token()
6566

6667
headers = {
6768
'authorization': f"Bearer {access_token}",

terra_notebook_utils/logger.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import json
21
import logging
2+
from logging import Logger
33

4-
logger = logging.getLogger(__name__)
4+
logger: Logger = logging.getLogger(__name__)

terra_notebook_utils/terra_auth.py

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
"""
2+
Support for auth with Terra backend services.
3+
"""
4+
from azure.core.exceptions import ClientAuthenticationError
5+
from google.auth.exceptions import DefaultCredentialsError
6+
7+
from terra_notebook_utils import azure_auth, gs, ExecutionPlatform
8+
from terra_notebook_utils.logger import logger
9+
from terra_notebook_utils.utils import get_execution_context
10+
11+
12+
class AuthenticationError(Exception):
13+
pass
14+
15+
16+
class TerraAuthTokenProvider:
17+
"""
18+
Provides auth bearer tokens suitable for use with Terra backend services.
19+
"""
20+
def __init__(self):
21+
self.execution_context = get_execution_context()
22+
23+
@staticmethod
24+
def _identify_valid_access_token() -> str:
25+
"""
26+
Try to obtain an auth bearer token suitable for use with Terra backend services
27+
from the Terra supported auth providers. First try Google, then try Azure.
28+
Return the first successfully obtained token, otherwise raise AuthenticationError.
29+
30+
:return: auth bearer token suitable for use with Terra backend services
31+
:raises: AuthenticationError
32+
"""
33+
try:
34+
logger.debug("Attempting to obtain a Google access token to use with Terra backend services.")
35+
google_token = gs.get_access_token()
36+
logger.debug("Using Google access token to use with Terra backend services.")
37+
return google_token
38+
except DefaultCredentialsError as ex:
39+
logger.debug("Failed to obtain a Google access token to use with Terra backend services.", exc_info=ex)
40+
41+
try:
42+
logger.debug("Attempting to obtain a Azure access token to use with Terra backend services.")
43+
azure_token = azure_auth.get_azure_access_token()
44+
logger.debug("Using Azure access token to use with Terra backend services.")
45+
return azure_token
46+
except ClientAuthenticationError as ex:
47+
logger.debug("Failed to obtain a Azure access token to use with Terra backend services.", exc_info=ex)
48+
49+
raise AuthenticationError("Failed to obtain a Google or Azure token to auth with Terra backend services.")
50+
51+
def get_terra_access_token(self) -> str:
52+
if self.execution_context.execution_platform == ExecutionPlatform.GOOGLE:
53+
logger.debug("Using Google default credentials to auth with Terra services.")
54+
return gs.get_access_token()
55+
elif self.execution_context.execution_platform == ExecutionPlatform.AZURE:
56+
logger.debug("Using Azure default credentials to auth with Terra services.")
57+
return azure_auth.get_azure_access_token()
58+
else:
59+
return self._identify_valid_access_token()
60+
61+
62+
# Single instance of TerraAuthTokenProvider.
63+
TERRA_AUTH_TOKEN_PROVIDER = TerraAuthTokenProvider()
64+
65+
66+
def get_terra_access_token() -> str:
67+
""" Return an auth bearer token suitable for use with Terra backend services.
68+
:raises: AuthenticationError
69+
"""
70+
return TERRA_AUTH_TOKEN_PROVIDER.get_terra_access_token()

terra_notebook_utils/utils.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
import json
2+
import os
23
import threading
34
from functools import lru_cache
45
from concurrent.futures import ThreadPoolExecutor, Future, as_completed
56
from typing import Any, Callable, Dict, Optional, Iterable, Set
67

78
import jmespath
89

10+
from terra_notebook_utils import ExecutionEnvironment, ExecutionPlatform, ExecutionContext
11+
912

1013
class _AsyncContextManager:
1114
"""Context manager for asynchronous execution. Wait on exit for all jobs to complete."""
@@ -63,3 +66,25 @@ def is_notebook() -> bool:
6366
return "ZMQInteractiveShell" == get_ipython().__class__.__name__ # type: ignore
6467
except NameError:
6568
return False
69+
70+
71+
@lru_cache()
72+
def get_execution_context() -> ExecutionContext:
73+
"""
74+
Identify information about the context in which terra-notebook-utils is executing.
75+
TODO Improve the information available and algorithm to identify these values accurately!
76+
"""
77+
# Workaround current insufficient information by assuming
78+
# the execution environment is Terra, as that is the most
79+
# common and important case.
80+
# execution_environment = ExecutionEnvironment.OTHER
81+
execution_environment = ExecutionEnvironment.TERRA_WORKSPACE
82+
execution_platform = ExecutionPlatform.UNKNOWN
83+
workspace_bucket = os.environ.get('WORKSPACE_BUCKET', None)
84+
if workspace_bucket and workspace_bucket.startswith("gs://"):
85+
execution_platform = ExecutionPlatform.GOOGLE
86+
else:
87+
# Workaround current insufficient information by assuming
88+
# the execution platform is not Google then it is Azure.
89+
execution_platform = ExecutionPlatform.AZURE
90+
return ExecutionContext(execution_environment, execution_platform)

tests/test_azure_auth.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python
2+
"""
3+
Test auth for Terra running on Microsoft Azure.
4+
Ideally tests should be run in a Terra Azure workspace.
5+
Unit test this module by mocking as needed to run in a generic context.
6+
"""
7+
import os
8+
import sys
9+
import unittest
10+
from unittest import mock
11+
from unittest.mock import patch
12+
from collections import namedtuple
13+
14+
15+
pkg_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) # noqa
16+
sys.path.insert(0, pkg_root) # noqa
17+
18+
# from tests import config # initialize the test environment
19+
from tests.infra.testmode import testmode
20+
from terra_notebook_utils import azure_auth
21+
22+
23+
@testmode("workspace_access")
24+
class TestAzureAuth(unittest.TestCase):
25+
def test_get_azure_access_token(self):
26+
"""
27+
See this article for info about mock patch for environment variables:
28+
https://adamj.eu/tech/2020/10/13/how-to-mock-environment-variables-with-pythons-unittest/
29+
"""
30+
dummy_token_value = "@@DUMMY_AZURE_ACCESS_TOKEN@@"
31+
32+
# Test token that is explicitly provided by env var "TERRA_NOTEBOOK_AZURE_ACCESS_TOKEN"
33+
with self.subTest("Azure token provided as env var"):
34+
env_vars = {"TERRA_NOTEBOOK_AZURE_ACCESS_TOKEN": dummy_token_value}
35+
with mock.patch.dict(os.environ, env_vars, clear=True):
36+
self.assertEqual(dummy_token_value, azure_auth.get_azure_access_token())
37+
38+
# Test getting Azure default credentials token
39+
with self.subTest("Get Azure default credentials access token"):
40+
AzureToken = namedtuple("AzureToken", "token")
41+
azure_token = AzureToken(dummy_token_value)
42+
with patch.object(azure_auth.DefaultAzureCredential, "get_token", return_value=azure_token):
43+
self.assertEqual(dummy_token_value, azure_auth.get_azure_access_token())
44+
45+
# Verify that the default credential is managed as a singleton
46+
with self.subTest("Verify DefaultAzureCredential singleton"):
47+
azure_auth._AZURE_CREDENTIAL = None
48+
first_value = azure_auth._get_default_credential()
49+
second_value = azure_auth._get_default_credential()
50+
self.assertIs(first_value, second_value)
51+
52+
53+
if __name__ == '__main__':
54+
unittest.main()

0 commit comments

Comments
 (0)