Skip to content

Commit 00dc758

Browse files
committed
Create mirror queues and bucket (#6859)
1 parent 836782d commit 00dc758

File tree

9 files changed

+70
-9
lines changed

9 files changed

+70
-9
lines changed

deployments/dev/environment.py

+2
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ def env() -> Mapping[str, Optional[str]]:
256256

257257
'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '713613812354-aelk662bncv14d319dk8juce9p11um00.apps.googleusercontent.com',
258258

259+
'AZUL_MIRRORING_ENABLED': '1',
260+
259261
'azul_slack_integration': json.dumps({
260262
'workspace_id': 'T09P9H91S', # ucsc-gi.slack.com
261263
'channel_id': 'C04K81HUALD' # #team-boardwalk-dev

deployments/sandbox/environment.py

+2
Original file line numberDiff line numberDiff line change
@@ -278,4 +278,6 @@ def env() -> Mapping[str, Optional[str]]:
278278
'GOOGLE_PROJECT': 'platform-hca-dev',
279279

280280
'AZUL_GOOGLE_OAUTH2_CLIENT_ID': '713613812354-3bj4m7vnsbco82bke96idvg8cpdv6r9r.apps.googleusercontent.com',
281+
282+
'AZUL_MIRRORING_ENABLED': '1',
281283
}

environment.py

+5
Original file line numberDiff line numberDiff line change
@@ -633,6 +633,11 @@ def env() -> Mapping[str, Optional[str]]:
633633
#
634634
'AZUL_DSS_SOURCE': None,
635635

636+
# Mirror data files from the indexed repository in a dedicated S3 bucket
637+
# (1 yes, 0 no).
638+
#
639+
'AZUL_MIRRORING_ENABLED': '0',
640+
636641
# A short string (no punctuation allowed) that identifies a Terraform
637642
# component i.e., a distinct set of Terraform resources to be deployed
638643
# together but separately from resources in other components. They are

scripts/mirror_file.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
"""
2-
Copy a file in an HCA catalog from TDR to the current deployment's storage
2+
Copy a file in an HCA catalog from TDR to the current deployment's mirroring
33
bucket and print a signed URL to the file's destination. Authentication is not
44
supported, so the file must be publicly accessible.
55
"""
@@ -18,6 +18,9 @@
1818
from azul.azulclient import (
1919
AzulClient,
2020
)
21+
from azul.deployment import (
22+
aws,
23+
)
2124
from azul.drs import (
2225
AccessMethod,
2326
)
@@ -76,12 +79,13 @@ def object_key(file: JSON) -> str:
7679

7780

7881
def mirror_file(catalog: CatalogName, file_uuid: str, part_size: int) -> str:
82+
assert config.enable_mirroring, 'Mirroring must be enabled'
7983
assert config.is_tdr_enabled(catalog), 'Only TDR catalogs are supported'
8084
assert config.is_hca_enabled(catalog), 'Only HCA catalogs are supported'
8185
file = get_file(catalog, file_uuid)
8286
download_url = get_download_url(catalog, file)
8387
key = object_key(file)
84-
storage = StorageService()
88+
storage = StorageService(bucket_name=aws.mirror_bucket)
8589
upload = storage.create_multipart_upload(key, content_type=file['content-type'])
8690

8791
total_size = file['size']

src/azul/__init__.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ def qualified_bucket_name(self,
285285

286286
storage_term = 'storage'
287287

288+
mirror_term = 'datamirror'
289+
288290
current = Sentinel()
289291

290292
def alb_access_log_path_prefix(self,
@@ -1323,6 +1325,9 @@ def contribution_lambda_timeout(self, *, retry: bool) -> int:
13231325
def aggregation_lambda_timeout(self, *, retry: bool) -> int:
13241326
return (10 if retry else 1) * 60
13251327

1328+
def mirror_lambda_timeout(self) -> int:
1329+
return 15
1330+
13261331
service_lambda_timeout = 15 * 60
13271332

13281333
api_gateway_timeout = 29
@@ -1494,16 +1499,22 @@ def derive(self, *, retry: bool = False, fail: bool = False) -> Self:
14941499

14951500
notifications_queue = Queue('notifications')
14961501
tallies_queue = Queue('tallies', fifo=True)
1502+
mirror_queue = Queue('mirror', fifo=True)
14971503

14981504
@property
14991505
def all_queue_names(self) -> list[str]:
1500-
return self.indexer_queue_names + self.fail_queue_names
1506+
return [
1507+
*self.indexer_queue_names,
1508+
*self.fail_queue_names,
1509+
*([self.mirror_queue.name] if self.enable_mirroring else []),
1510+
]
15011511

15021512
@property
15031513
def fail_queue_names(self) -> list[str]:
15041514
return [
15051515
self.tallies_queue.to_fail.name,
15061516
self.notifications_queue.to_fail.name,
1517+
*([self.mirror_queue.to_fail.name] if self.enable_mirroring else []),
15071518
]
15081519

15091520
@property
@@ -1747,6 +1758,10 @@ def vpn_subnet(self) -> str:
17471758
def it_flags(self) -> set[str]:
17481759
return set(self.environ.get('azul_it_flags', '').split())
17491760

1761+
@property
1762+
def enable_mirroring(self) -> bool:
1763+
return self._boolean(self.environ['AZUL_MIRRORING_ENABLED'])
1764+
17501765

17511766
config: Config = Config() # yes, the type hint does help PyCharm
17521767

src/azul/deployment.py

+5
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,11 @@ def storage_bucket(self):
604604
return self.qualified_bucket_name(config.storage_term,
605605
deployment_name=config.deployment_stage)
606606

607+
@property
608+
def mirror_bucket(self):
609+
return self.qualified_bucket_name(config.mirror_term,
610+
deployment_name=config.deployment_stage)
611+
607612
# An ELB account ID, which varies depending on region, is needed to specify
608613
# the principal in bucket policies for buckets storing LB access logs.
609614
#

src/azul/queues.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,10 @@ def submit(f, *args, **kwargs):
426426
try:
427427
function = functions_by_queue[queue_name]
428428
except KeyError:
429-
assert queue_name in config.fail_queue_names
429+
assert queue_name in {
430+
*config.fail_queue_names,
431+
config.mirror_queue.name
432+
}
430433
else:
431434
if queue_name == config.notifications_queue.name:
432435
# Prevent new notifications from being added

terraform/s3.tf.json.template.py

+11-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@
2424
config.storage_term: {
2525
'bucket': aws.storage_bucket,
2626
'force_destroy': True
27-
}
27+
},
28+
**({config.mirror_term: {
29+
'bucket': aws.mirror_bucket,
30+
}} if config.enable_mirroring else {})
2831
},
2932
'aws_s3_bucket_lifecycle_configuration': {
3033
config.storage_term: {
@@ -45,13 +48,17 @@
4548
}
4649
},
4750
'aws_s3_bucket_logging': {
48-
config.storage_term: {
49-
'bucket': '${aws_s3_bucket.%s.id}' % config.storage_term,
51+
bucket: {
52+
'bucket': '${aws_s3_bucket.%s.id}' % bucket,
5053
'target_bucket': '${data.aws_s3_bucket.%s.id}' % config.logs_term,
5154
# Other S3 log deliveries, like ELB, implicitly put a slash
5255
# after the prefix. S3 doesn't, so we add one explicitly.
53-
'target_prefix': config.s3_access_log_path_prefix(config.storage_term) + '/'
56+
'target_prefix': config.s3_access_log_path_prefix(bucket) + '/'
5457
}
58+
for bucket in (
59+
config.storage_term,
60+
*([config.mirror_term] if config.enable_mirroring else [])
61+
)
5562
}
5663
}
5764
}

terraform/sqs.tf.json.template.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,25 @@
5050
'fifo_queue': True,
5151
'name': config.tallies_queue.to_fail.name,
5252
'message_retention_seconds': 14 * 24 * 60 * 60,
53-
}
53+
},
54+
**({
55+
config.mirror_queue.unqual_name: {
56+
'name': config.mirror_queue.name,
57+
'fifo_queue': True,
58+
'message_retention_seconds': 7 * 24 * 60 * 60,
59+
'visibility_timeout_seconds': config.mirror_lambda_timeout() + 10,
60+
'redrive_policy': json.dumps({
61+
'maxReceiveCount': 1,
62+
'deadLetterTargetArn': '${aws_sqs_queue.%s.arn}'
63+
% config.mirror_queue.fail.unqual_name
64+
})
65+
},
66+
config.mirror_queue.fail.unqual_name: {
67+
'name': config.mirror_queue.fail.name,
68+
'fifo_queue': True,
69+
'message_retention_seconds': 14 * 24 * 60 * 60,
70+
}
71+
} if config.enable_mirroring else {})
5472
}
5573
}
5674
]

0 commit comments

Comments
 (0)