Skip to content

Commit 057c557

Browse files
authored
Rework adb-exfiltration-protection module based on the practical usage (#93)
Changes include: - Added `dns` provider to resolve FQDNs into IP Addresses to simplify configuration - SCC Relay & Metastore are now lists of FQDNs instead of IP addresses - that's required for bigger regions like, West Europe - Added variable for extended infrastructure IP range - Moved ADB EventHubs into a separate variable and made it a list of FQDNs (also required for bigger regions. Moved handling of EventHubs traffic to the network rules because we need to handle port 9093 - Added a variable to control if we should skip routing of SCC traffic via firewall or not - Added a possibility to add/override tags - Bumped Databricks provider version `adb-with-private-links-exfiltration-protection` will be changed as well after this PR is merged.
1 parent ff8d404 commit 057c557

File tree

9 files changed

+257
-81
lines changed

9 files changed

+257
-81
lines changed
Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
/**
2-
* Azure Databricks workspace in custom VNet
2+
* Azure Databricks workspace in custom VNet with traffic routed via firewall in the Hub VNet
33
*
44
* Module creates:
55
* * Resource group with random prefix
66
* * Tags, including `Owner`, which is taken from `az account show --query user`
7-
* * VNet with public and private subnet
7+
* * VNet with public and private subnet for Databricks
8+
* * VNet with subnet for deployment of Azure Firewall
9+
* * Azure Firewall with access enabled to Databricks-related resources
810
* * Databricks workspace
911
*/
1012

1113
module "adb-exfiltration-protection" {
12-
source = "github.com/databricks/terraform-databricks-examples/modules/adb-exfiltration-protection"
13-
hubcidr = var.hubcidr
14-
spokecidr = var.spokecidr
15-
no_public_ip = var.no_public_ip
16-
rglocation = var.rglocation
17-
metastoreip = var.metastoreip
18-
sccip = var.sccip
19-
webappip = var.webappip
20-
dbfs_prefix = var.dbfs_prefix
21-
workspace_prefix = var.workspace_prefix
22-
firewallfqdn = var.firewallfqdn
23-
}
14+
source = "../../modules/adb-exfiltration-protection"
15+
hubcidr = var.hubcidr
16+
spokecidr = var.spokecidr
17+
no_public_ip = var.no_public_ip
18+
rglocation = var.rglocation
19+
metastore = var.metastore
20+
scc_relay = var.scc_relay
21+
webapp_ips = var.webapp_ips
22+
extended_infra_ip = var.extended_infra_ip
23+
dbfs_prefix = var.dbfs_prefix
24+
workspace_prefix = var.workspace_prefix
25+
firewallfqdn = var.firewallfqdn
26+
eventhubs = var.eventhubs
27+
}
Lines changed: 56 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,57 @@
1-
hubcidr = "10.178.0.0/20"
2-
spokecidr = "10.179.0.0/20"
3-
no_public_ip = true
4-
rglocation = "southeastasia"
5-
metastoreip = "40.78.233.2"
6-
sccip = "52.230.27.216/32" // get scc ip from nslookup
7-
webappip = "52.187.145.107/32"
8-
dbfs_prefix = "dbfs"
9-
workspace_prefix = "adb"
10-
firewallfqdn = [ // dbfs rule will be added - depends on dbfs storage name
11-
"dbartifactsprodseap.blob.core.windows.net", //databricks artifacts
12-
"dbartifactsprodeap.blob.core.windows.net", //databricks artifacts secondary
13-
"dblogprodseasia.blob.core.windows.net", //log blob
14-
"prod-southeastasia-observabilityeventhubs.servicebus.windows.net", //eventhub
15-
"cdnjs.com", //ganglia
1+
hubcidr = "10.178.0.0/20"
2+
spokecidr = "10.179.0.0/20"
3+
no_public_ip = true
4+
rglocation = "westeurope"
5+
metastore = ["consolidated-westeurope-prod-metastore.mysql.database.azure.com",
6+
"consolidated-westeurope-prod-metastore-addl-1.mysql.database.azure.com",
7+
"consolidated-westeurope-prod-metastore-addl-2.mysql.database.azure.com",
8+
"consolidated-westeurope-prod-metastore-addl-3.mysql.database.azure.com",
9+
"consolidated-westeuropec2-prod-metastore-0.mysql.database.azure.com",
10+
"consolidated-westeuropec2-prod-metastore-1.mysql.database.azure.com",
11+
"consolidated-westeuropec2-prod-metastore-2.mysql.database.azure.com",
12+
"consolidated-westeuropec2-prod-metastore-3.mysql.database.azure.com",
1613
]
14+
// get from https://learn.microsoft.com/en-us/azure/databricks/resources/supported-regions#--metastore-artifact-blob-storage-system-tables-blob-storage-log-blob-storage-and-event-hub-endpoint-ip-addresses
15+
scc_relay = ["tunnel.westeurope.azuredatabricks.net", "tunnel.westeuropec2.azuredatabricks.net"]
16+
webapp_ips = ["52.230.27.216/32", "40.74.30.80/32"]
17+
eventhubs = ["prod-westeurope-observabilityeventhubs.servicebus.windows.net",
18+
"prod-westeuc2-observabilityeventhubs.servicebus.windows.net",
19+
]
20+
extended_infra_ip = "20.73.215.48/28"
21+
dbfs_prefix = "dbfs"
22+
workspace_prefix = "adb"
23+
firewallfqdn = [ // dbfs rule will be added - depends on dbfs storage name
24+
"dbartifactsprodwesteu.blob.core.windows.net", //databricks artifacts
25+
"arprodwesteua1.blob.core.windows.net",
26+
"arprodwesteua2.blob.core.windows.net",
27+
"arprodwesteua3.blob.core.windows.net",
28+
"arprodwesteua4.blob.core.windows.net",
29+
"arprodwesteua5.blob.core.windows.net",
30+
"arprodwesteua6.blob.core.windows.net",
31+
"arprodwesteua7.blob.core.windows.net",
32+
"arprodwesteua8.blob.core.windows.net",
33+
"arprodwesteua9.blob.core.windows.net",
34+
"arprodwesteua10.blob.core.windows.net",
35+
"arprodwesteua11.blob.core.windows.net",
36+
"arprodwesteua12.blob.core.windows.net",
37+
"arprodwesteua13.blob.core.windows.net",
38+
"arprodwesteua14.blob.core.windows.net",
39+
"arprodwesteua15.blob.core.windows.net",
40+
"arprodwesteua16.blob.core.windows.net",
41+
"arprodwesteua17.blob.core.windows.net",
42+
"arprodwesteua18.blob.core.windows.net",
43+
"arprodwesteua19.blob.core.windows.net",
44+
"arprodwesteua20.blob.core.windows.net",
45+
"arprodwesteua21.blob.core.windows.net",
46+
"arprodwesteua22.blob.core.windows.net",
47+
"arprodwesteua23.blob.core.windows.net",
48+
"arprodwesteua24.blob.core.windows.net",
49+
"dbartifactsprodnortheu.blob.core.windows.net", //databricks artifacts secondary
50+
"ucstprdwesteu.blob.core.windows.net", // system tables storage
51+
"dblogprodwesteurope.blob.core.windows.net", //log blob
52+
"cdnjs.com", //ganglia
53+
// Azure monitor
54+
"global.handler.control.monitor.azure.com",
55+
"westeurope.handler.control.monitor.azure.com",
56+
]
57+
Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,45 +1,64 @@
11
variable "hubcidr" {
2+
description = "IP range for creaiton of the Spoke VNet"
23
type = string
34
default = "10.178.0.0/20"
45
}
56

67
variable "spokecidr" {
8+
description = "IP range for creaiton of the Hub VNet"
79
type = string
810
default = "10.179.0.0/20"
911
}
1012

1113
variable "no_public_ip" {
14+
description = "If workspace should be created with No-Public-IP"
1215
type = bool
1316
default = true
1417
}
1518

1619
variable "rglocation" {
20+
description = "Location of resource group"
1721
type = string
18-
default = "southeastasia"
1922
}
2023

21-
variable "metastoreip" {
22-
type = string
24+
variable "metastore" {
25+
description = "List of FQDNs for Azure Databricks Metastore databases"
26+
type = list(string)
2327
}
2428

25-
variable "sccip" {
26-
type = string
29+
variable "scc_relay" {
30+
description = "List of FQDNs for Azure Databricks Secure Cluster Connectivity relay"
31+
type = list(string)
32+
}
33+
34+
variable "webapp_ips" {
35+
description = "List of IP ranges for Azure Databricks Webapp"
36+
type = list(string)
2737
}
2838

29-
variable "webappip" {
39+
variable "extended_infra_ip" {
40+
description = "IP range for Azure Databricks extended infrastructure"
3041
type = string
3142
}
3243

44+
variable "eventhubs" {
45+
description = "List of FQDNs for Azure Databricks EventHubs traffic"
46+
type = list(string)
47+
}
48+
3349
variable "dbfs_prefix" {
50+
description = "Prefix for DBFS storage account name"
3451
type = string
3552
default = "dbfs"
3653
}
3754

3855
variable "workspace_prefix" {
56+
description = "Prefix for workspace name"
3957
type = string
4058
default = "adb"
4159
}
4260

4361
variable "firewallfqdn" {
4462
type = list(any)
63+
description = "List of domains names to put into application rules for handling of HTTPS traffic (Databricks storage accounts, etc.)"
4564
}

examples/adb-exfiltration-protection/versions.tf

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@ terraform {
33
required_providers {
44
databricks = {
55
source = "databricks/databricks"
6-
version = ">=0.5.1"
6+
version = ">=1.20.0"
77
}
8-
98
azurerm = {
109
source = "hashicorp/azurerm"
1110
version = ">=2.83.0"
1211
}
12+
random = {
13+
source = "hashicorp/random"
14+
}
15+
dns = {
16+
source = "hashicorp/dns"
17+
}
1318
}
1419
}

modules/adb-exfiltration-protection/README.md

Lines changed: 23 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
This template provides an example deployment of: Hub-Spoke networking with egress firewall to control all outbound traffic from Databricks subnets. Details are described in: https://databricks.com/blog/2020/03/27/data-exfiltration-protection-with-azure-databricks.html
44

5-
With this setup, you can setup firewall rules to block / allow egress traffic from your Databricks clusters. You can also use firewall to block all access to storage accounts, and use private endpoint connection to bypass this firewall, such that you allow access only to specific storage accounts.
5+
With this setup, you can setup firewall rules to block / allow egress traffic from your Databricks clusters. You can also use firewall to block all access to storage accounts, and use private endpoint connection to bypass this firewall, such that you allow access only to specific storage accounts.
66

77

88
To find IP and FQDN for your deployment, go to: https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr
@@ -20,7 +20,7 @@ Resources to be created:
2020

2121
## How to use
2222

23-
> **Note**
23+
> **Note**
2424
> You can customize this module by adding, deleting or updating the Azure resources to adapt the module to your requirements.
2525
> A deployment example using this module can be found in [examples/adb-exfiltration-protection](../../examples/adb-exfiltration-protection)
2626
@@ -35,18 +35,19 @@ Resources to be created:
3535

3636
## How to fill in variable values
3737

38-
Most of the values are to be found at: https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr
38+
Most of the values are to be found at: https://learn.microsoft.com/en-us/azure/databricks/resources/supported-regions and https://docs.microsoft.com/en-us/azure/databricks/administration-guide/cloud-configurations/azure/udr
3939

40-
In `variables.tfvars`, set these variables:
41-
42-
metastoreip = "40.78.233.2" # find your metastore service ip
43-
44-
sccip = "52.230.27.216" # use nslookup on the domain name to find the ip
45-
46-
webappip = "52.187.145.107/32" # given at UDR page
47-
48-
firewallfqdn = ["dbartifactsprodseap.blob.core.windows.net","dbartifactsprodeap.blob.core.windows.net","dblogprodseasia.blob.core.windows.net","prod-southeastasia-observabilityeventhubs.servicebus.windows.net","cdnjs.com"] # find these for your region, follow Databricks blog tutorial.
40+
In `variables.tfvars`, set these variables (bigger regions have multiple instances of each service):
4941

42+
```hcl
43+
metastore = ["consolidated-westeurope-prod-metastore.mysql.database.azure.com"]
44+
scc_relay = ["tunnel.westeurope.azuredatabricks.net"]
45+
webapp_ips = ["52.230.27.216/32"] # given at UDR page
46+
extended_infra_ip = "20.73.215.48/28"
47+
eventhubs = ["prod-westeurope-observabilityeventhubs.servicebus.windows.net"]
48+
# find these for your region, follow Databricks blog tutorial.
49+
firewallfqdn = ["dbartifactsprodseap.blob.core.windows.net","dbartifactsprodeap.blob.core.windows.net","dblogprodseasia.blob.core.windows.net","cdnjs.com"]
50+
```
5051

5152
<!-- BEGIN_TF_DOCS -->
5253
## Requirements
@@ -63,6 +64,7 @@ firewallfqdn = ["dbartifactsprodseap.blob.core.windows.net","dbartifactsprodeap.
6364
| <a name="provider_azurerm"></a> [azurerm](#provider\_azurerm) | 2.83.0 |
6465
| <a name="provider_external"></a> [external](#provider\_external) | 2.2.0 |
6566
| <a name="provider_random"></a> [random](#provider\_random) | 3.1.0 |
67+
| <a name="provider_dns"></a> [dns](#provider\_dns) | 3.3.0 |
6668

6769
## Modules
6870

@@ -102,16 +104,20 @@ No modules.
102104

103105
| Name | Description | Type | Default | Required |
104106
| -------------------------------------------------------------------------------------------------------------- | ----------- | ----------- | ----------------- | :------: |
107+
| <a name="input_bypass_scc_relay"></a> [bypass\_scc\_relay](#input\_bypass\_scc\_relay) | n/a | `bool` | `true` | no |
105108
| <a name="input_dbfs_prefix"></a> [dbfs\_prefix](#input\_dbfs\_prefix) | n/a | `string` | `"dbfs"` | no |
106-
| <a name="input_firewallfqdn"></a> [firewallfqdn](#input\_firewallfqdn) | n/a | `list(any)` | n/a | yes |
109+
| <a name="input_extended_infra_ip"></a> [extended_infra_ip](#input\_extended_infra_ip) | n/a | `string` | n/a | yes |
110+
| <a name="input_eventhubs"></a> [eventhubs](#input\_eventhubs) | n/a | `list(string)` | n/a | yes |
111+
| <a name="input_firewallfqdn"></a> [firewallfqdn](#input\_firewallfqdn) | n/a | `list(string)` | n/a | yes |
107112
| <a name="input_hubcidr"></a> [hubcidr](#input\_hubcidr) | n/a | `string` | `"10.178.0.0/20"` | no |
108-
| <a name="input_metastoreip"></a> [metastoreip](#input\_metastoreip) | n/a | `string` | n/a | yes |
113+
| <a name="input_metastore"></a> [metastore](#input\_metastore) | n/a | `list(string)` | n/a | yes |
109114
| <a name="input_no_public_ip"></a> [no\_public\_ip](#input\_no\_public\_ip) | n/a | `bool` | `true` | no |
110115
| <a name="input_private_subnet_endpoints"></a> [private\_subnet\_endpoints](#input\_private\_subnet\_endpoints) | n/a | `list` | `[]` | no |
111116
| <a name="input_rglocation"></a> [rglocation](#input\_rglocation) | n/a | `string` | `"southeastasia"` | no |
112-
| <a name="input_sccip"></a> [sccip](#input\_sccip) | n/a | `string` | n/a | yes |
117+
| <a name="input_scc_relay"></a> [scc_relay](#input\_scc_relay) | n/a | `list(string)` | n/a | yes |
113118
| <a name="input_spokecidr"></a> [spokecidr](#input\_spokecidr) | n/a | `string` | `"10.179.0.0/20"` | no |
114-
| <a name="input_webappip"></a> [webappip](#input\_webappip) | n/a | `string` | n/a | yes |
119+
| <a name="input_tags"></a> [tags](#input\_tags) | n/a | `map` | `{}` | no |
120+
| <a name="input_webappip"></a> [webappip](#input\_webappip) | n/a | `list(string)` | n/a | yes |
115121
| <a name="input_workspace_prefix"></a> [workspace\_prefix](#input\_workspace\_prefix) | n/a | `string` | `"adb"` | no |
116122

117123
## Outputs
@@ -125,4 +131,4 @@ No modules.
125131
| <a name="output_databricks_azure_workspace_resource_id"></a> [databricks\_azure\_workspace\_resource\_id](#output\_databricks\_azure\_workspace\_resource\_id) | n/a |
126132
| <a name="output_resource_group"></a> [resource\_group](#output\_resource\_group) | n/a |
127133
| <a name="output_workspace_url"></a> [workspace\_url](#output\_workspace\_url) | n/a |
128-
<!-- END_TF_DOCS -->
134+
<!-- END_TF_DOCS -->

0 commit comments

Comments
 (0)