Skip to content

Commit a742e24

Browse files
authored
Added private link option to the exfiltration protection example (#100)
* Predefine regional settings Corrected README Simplify steps by providing outputs Updated to the latest version of dbx and aws modules Simplify configuration of examples * reverted change * refactor * updated comment * reformat code * Added Private link between clusters on the data plane and core services on the control plane Whitelist maven * updated readme * updated readme * updated vars * updated vars * updated vars * updated comments * refactor for private link * code refactor * implemented code review feedback * implemented code review feedback * removed versions from examples * code refactor * added no public ip param * updated main route table association name * renamed resources * renamed resources * refactor * refactor * moved provider out of the module * moved resource out of variables * use default provider
1 parent 9813743 commit a742e24

19 files changed

+457
-116
lines changed

examples/aws-exfiltration-protection/README.md

+8-4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ This template provides an example deployment of AWS Databricks E2 workspace with
1212

1313
> **Note**
1414
> If you are using AWS Firewall to block most traffic but allow the URLs that Databricks needs to connect to, please update the configuration based on your region. You can get the configuration details for your region from [Firewall Appliance](https://docs.databricks.com/administration-guide/cloud-configurations/aws/customer-managed-vpc.html#firewall-appliance-infrastructure) document.
15+
>
16+
> You can optionally enable Private Link in the variables. Enabling Private link on AWS requires Databricks "Enterprise" tier which is configured at the Databricks account level.
17+
1518

1619
1. Reference this module using one of the different [module source types](https://developer.hashicorp.com/terraform/language/modules/sources)
1720
2. Add a `variables.tf` with the same content in [variables.tf](variables.tf)
@@ -20,7 +23,8 @@ This template provides an example deployment of AWS Databricks E2 workspace with
2023
* TF_VAR_databricks_account_username, set to the value of your Databricks account-level admin username.
2124
* TF_VAR_databricks_account_password, set to the value of the password for your Databricks account-level admin user.
2225
* TF_VAR_databricks_account_id, set to the value of the ID of your Databricks account. You can find this value in the corner of your Databricks account console.
23-
5. Add a `output.tf` file.
24-
6. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/s3)
25-
7. Run `terraform init` to initialize terraform and get provider ready.
26-
8. Run `terraform apply` to create the resources.
26+
5. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/s3)
27+
6. Run `terraform init` to initialize terraform and get provider ready.
28+
7. Run `terraform plan` to validate and preview the deployment.
29+
8. Run `terraform apply` to create the resources.
30+
9. Run `terraform output -json` to print url (host) of the created Databricks workspace.

examples/aws-exfiltration-protection/main.tf

+14-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,18 @@
11
module "aws-exfiltration-protection" {
2-
source = "github.com/databricks/terraform-databricks-examples/modules/aws-exfiltration-protection"
3-
databricks_account_id = var.databricks_account_id
4-
databricks_account_username = var.databricks_account_username
5-
databricks_account_password = var.databricks_account_password
2+
source = "github.com/databricks/terraform-databricks-examples/modules/aws-exfiltration-protection"
3+
databricks_account_id = var.databricks_account_id
4+
prefix = var.prefix
5+
tags = var.tags
6+
spoke_cidr_block = var.spoke_cidr_block
7+
hub_cidr_block = var.hub_cidr_block
8+
region = var.region
9+
whitelisted_urls = var.whitelisted_urls
10+
enable_private_link = var.enable_private_link
11+
12+
providers = {
13+
aws = aws
14+
databricks = databricks.mws
15+
}
616
}
717

818
resource "random_string" "naming" {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
output "databricks_host" {
2+
value = module.aws-exfiltration-protection.databricks_host
3+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
provider "aws" {
2+
region = var.region
3+
}
4+
5+
// initialize provider in "MWS" mode to provision new workspace
6+
provider "databricks" {
7+
alias = "mws"
8+
host = "https://accounts.cloud.databricks.com"
9+
account_id = var.databricks_account_id
10+
username = var.databricks_account_username
11+
password = var.databricks_account_password
12+
}

examples/aws-exfiltration-protection/variables.tf

+21-30
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
1-
variable "databricks_account_username" {}
2-
variable "databricks_account_password" {}
1+
variable "databricks_account_username" {
2+
type = string
3+
}
4+
5+
variable "databricks_account_password" {
6+
type = string
7+
}
38

49
variable "databricks_account_id" {
510
type = string
@@ -25,39 +30,25 @@ variable "hub_cidr_block" {
2530
}
2631

2732
variable "region" {
28-
default = "eu-central-1"
2933
type = string
34+
default = "eu-central-1"
3035
description = "AWS region to deploy to"
3136
}
3237

3338
variable "whitelisted_urls" {
34-
default = [".pypi.org", ".pythonhosted.org", ".cran.r-project.org"]
35-
description = "List of the domains to allow traffic to"
36-
type = list(string)
37-
}
38-
39-
variable "db_web_app" {
40-
default = "frankfurt.cloud.databricks.com"
41-
description = "Hostname of Databricks web application"
42-
type = string
43-
}
44-
45-
variable "db_tunnel" {
46-
default = "tunnel.eu-central-1.cloud.databricks.com"
47-
description = "Hostname of Databricks SCC Relay"
48-
type = string
49-
}
50-
51-
variable "db_rds" {
52-
default = "mdv2llxgl8lou0.ceptxxgorjrc.eu-central-1.rds.amazonaws.com"
53-
description = "Hostname of AWS RDS instance for built-in Hive Metastore"
54-
type = string
55-
}
56-
57-
variable "db_control_plane" {
58-
default = "18.159.44.32/28"
59-
description = "IP Range for AWS Databricks control plane"
60-
type = string
39+
type = list(string)
40+
default = [
41+
".pypi.org", ".pythonhosted.org", # python packages
42+
".cran.r-project.org", # R packages
43+
".maven.org", # maven artifacts
44+
".storage-download.googleapis.com", # maven mirror
45+
".spark-packages.org", # spark packages
46+
]
47+
}
48+
49+
variable "enable_private_link" {
50+
type = bool
51+
default = false
6152
}
6253

6354
variable "prefix" {
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
# versions.tf
21
terraform {
32
required_providers {
43
databricks = {
5-
source = "databricks/databricks"
6-
version = ">=1.13.0"
4+
source = "databricks/databricks"
75
}
86

97
aws = {
10-
source = "hashicorp/aws"
11-
version = "~> 4.58.0"
8+
source = "hashicorp/aws"
129
}
1310
}
1411
}

modules/aws-exfiltration-protection/README.md

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Provisioning AWS Databricks E2 with a Hub & Spoke firewall for data exfiltration protection
1+
# Provisioning AWS Databricks E2 workspace with a Hub & Spoke firewall for data exfiltration protection
22

33
This template provides an example deployment of AWS Databricks E2 workspace with a Hub & Spoke firewall for data exfiltration protection. Details are described in [Data Exfiltration Protection With Databricks on AWS](https://www.databricks.com/blog/2021/02/02/data-exfiltration-protection-with-databricks-on-aws.html).
44

@@ -16,21 +16,22 @@ Resources to be created:
1616
* S3 Root bucket
1717
* Cross-account IAM role
1818
* Databricks E2 workspace
19+
* (Optional) Private link between clusters on the data plane and core services on the control plane
1920

21+
Note that enabling Private link on AWS requires Databricks "Enterprise" tier. On AWS the tier is configured at the Databricks account level.
22+
If your Databricks account is using lower tier disable the private link in the variables (see below).
2023

2124
## How to use
2225

2326
> **Note**
24-
> You can customize this module by adding, deleting or updating the Azure resources to adapt the module to your requirements.
27+
> You can customize this module by adding, deleting or updating the AWS resources to adapt the module to your requirements.
2528
> A deployment example using this module can be found in [examples/aws-exfiltration-protection](../../examples/aws-exfiltration-protection)
2629
> If you are using AWS Firewall to block most traffic but allow the URLs that Databricks needs to connect to, please update the configuration based on your region. You can get the configuration details for your region from [Firewall Appliance](https://docs.databricks.com/administration-guide/cloud-configurations/aws/customer-managed-vpc.html#firewall-appliance-infrastructure) document.
2730
2831
1. Reference this module using one of the different [module source types](https://developer.hashicorp.com/terraform/language/modules/sources)
2932
2. Add a `variables.tf` with the same content in [variables.tf](variables.tf)
3033
3. Add a `terraform.tfvars` file and provide values to each defined variable
3134
4. Configure the following environment variables:
32-
* TF_VAR_databricks_account_username, set to the value of your Databricks account-level admin username.
33-
* TF_VAR_databricks_account_password, set to the value of the password for your Databricks account-level admin user.
3435
* TF_VAR_databricks_account_id, set to the value of the ID of your Databricks account. You can find this value in the corner of your Databricks account console.
3536
5. Add a `output.tf` file.
3637
6. (Optional) Configure your [remote backend](https://developer.hashicorp.com/terraform/language/settings/backends/s3)

modules/aws-exfiltration-protection/firewall.tf

+5-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ resource "aws_networkfirewall_rule_group" "databricks_fqdns_rg" {
77
rules_source_list {
88
generated_rules_type = "ALLOWLIST"
99
target_types = ["TLS_SNI", "HTTP_HOST"]
10-
targets = concat([var.db_web_app, var.db_tunnel, var.db_rds, local.db_root_bucket], var.whitelisted_urls)
10+
targets = var.enable_private_link ? local.private_link_whitelisted_urls : local.whitelisted_urls
1111
}
1212
}
1313
rule_variables {
@@ -42,7 +42,7 @@ resource "aws_networkfirewall_rule_group" "allow_db_cpl_protocols_rg" {
4242
content {
4343
action = "PASS"
4444
header {
45-
destination = var.db_control_plane
45+
destination = local.db_control_plane
4646
destination_port = "443"
4747
protocol = stateful_rule.value
4848
direction = "ANY"
@@ -75,7 +75,7 @@ resource "aws_networkfirewall_rule_group" "deny_protocols_rg" {
7575
}
7676
rules_source {
7777
dynamic "stateful_rule" {
78-
for_each = local.protocols
78+
for_each = local.protocols_to_drop
7979
content {
8080
action = "DROP"
8181
header {
@@ -128,12 +128,14 @@ resource "aws_networkfirewall_firewall" "exfiltration_firewall" {
128128
tags = var.tags
129129
}
130130

131+
# Add Route from Nat Gateway to Firewall
131132
resource "aws_route" "db_nat_firewall" {
132133
route_table_id = aws_route_table.hub_nat_public_rt.id
133134
destination_cidr_block = "0.0.0.0/0"
134135
vpc_endpoint_id = data.aws_vpc_endpoint.firewall.id
135136
}
136137

138+
# Add Route from Internet Gateway to Firewall
137139
resource "aws_route" "db_igw_nat_firewall" {
138140
route_table_id = aws_route_table.hub_igw_rt.id
139141
count = length(local.hub_nat_public_subnets_cidr)

modules/aws-exfiltration-protection/iam.tf

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ resource "aws_iam_role" "cross_account_role" {
44
tags = var.tags
55
}
66

7-
resource "aws_iam_role_policy" "this" {
7+
resource "aws_iam_role_policy" "cross_account_policy" {
88
name = "${local.prefix}-policy"
99
role = aws_iam_role.cross_account_role.id
1010
policy = data.databricks_aws_crossaccount_policy.this.json

0 commit comments

Comments
 (0)