Skip to content

Commit cc0c0b1

Browse files
authored
Update adb-external-hive-metastore to use azurerm v4 (#151)
Signed-off-by: Niko <[email protected]>
1 parent 0763c45 commit cc0c0b1

File tree

11 files changed

+67
-56
lines changed

11 files changed

+67
-56
lines changed

examples/adb-external-hive-metastore/README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ This template will complete 99% process for external hive metastore deployment w
1111
On your local machine:
1212

1313
1. Clone this repository to local.
14-
2. Provide values to variables, some variabes will have default values defined. See inputs section below on optional/required variables.
14+
2. Update `terraform.tfvars` file and provide values to each defined variable. Some variabes may have default values defined in `variables.tf` file.
1515
3. For step 2, variables for db_username and db_password, you can also use your environment variables: terraform will automatically look for environment variables with name format TF_VAR_xxxxx.
1616

1717
`export TF_VAR_db_username=yoursqlserveradminuser`
@@ -56,4 +56,4 @@ for db in dbs:
5656
f.write(DDL.first()[0])
5757
f.write("\n")
5858
f.close()
59-
```
59+
```

examples/adb-external-hive-metastore/akv.tf

+7-7
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,12 @@ resource "azurerm_key_vault" "akv1" {
77
soft_delete_retention_days = 7
88
purge_protection_enabled = false
99
enabled_for_disk_encryption = true
10+
}
1011

11-
access_policy {
12-
tenant_id = data.azurerm_client_config.current.tenant_id
13-
object_id = data.azurerm_client_config.current.object_id
14-
15-
key_permissions = ["Backup", "Delete", "Get", "List", "Purge", "Recover", "Restore"]
16-
secret_permissions = ["Backup", "Delete", "Get", "List", "Purge", "Recover", "Restore", "Set"]
17-
}
12+
resource "azurerm_key_vault_access_policy" "this" {
13+
key_vault_id = azurerm_key_vault.akv1.id
14+
tenant_id = data.azurerm_client_config.current.tenant_id
15+
object_id = data.azurerm_client_config.current.object_id
16+
key_permissions = ["Delete", "Get", "List", "Purge", "Recover", "Restore"]
17+
secret_permissions = ["Delete", "Get", "List", "Purge", "Recover", "Restore", "Set"]
1818
}

examples/adb-external-hive-metastore/cold_start_metastore.tf

+6-3
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,11 @@ resource "databricks_notebook" "ddl" {
66

77
resource "databricks_job" "metastoresetup" {
88
name = "Initialize external hive metastore"
9-
existing_cluster_id = databricks_cluster.coldstart.id
10-
notebook_task {
11-
notebook_path = databricks_notebook.ddl.path
9+
task {
10+
task_key = "task-1"
11+
existing_cluster_id = databricks_cluster.coldstart.id
12+
notebook_task {
13+
notebook_path = databricks_notebook.ddl.path
14+
}
1215
}
1316
}

examples/adb-external-hive-metastore/main.tf

+8-18
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
1-
/**
2-
* this example creates:
3-
* * Resource group with random prefix
4-
* * Tags, including `Owner`, which is taken from `az account show --query user`
5-
* * VNet with public and private subnet
6-
* * Databricks workspace
7-
* * External Hive Metastore for ADB workspace
8-
*/
9-
101
resource "random_string" "naming" {
112
special = false
123
upper = false
@@ -33,17 +24,16 @@ data "databricks_spark_version" "latest_lts" {
3324

3425

3526
locals {
36-
// dltp - databricks labs terraform provider
37-
prefix = join("-", [var.workspace_prefix, "${random_string.naming.result}"])
38-
location = var.rglocation
39-
cidr = var.spokecidr
40-
sqlcidr = var.sqlvnetcidr
41-
dbfsname = join("", [var.dbfs_prefix, "${random_string.naming.result}"]) // dbfs name must not have special chars
42-
db_url = "jdbc:sqlserver://${azurerm_mssql_server.metastoreserver.name}.database.windows.net:1433;database=${azurerm_mssql_database.sqlmetastore.name};user=${var.db_username}@${azurerm_mssql_server.metastoreserver.name};password={${var.db_password}};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
43-
27+
prefix = join("-", [var.workspace_prefix, "${random_string.naming.result}"])
28+
location = var.rglocation
29+
cidr = var.spokecidr
30+
sqlcidr = var.sqlvnetcidr
31+
dbfsname = join("", [var.dbfs_prefix, "${random_string.naming.result}"]) // dbfs name must not have special chars
32+
db_url = "jdbc:sqlserver://${azurerm_mssql_server.metastoreserver.name}.database.windows.net:1433;database=${azurerm_mssql_database.sqlmetastore.name};user=${var.db_username}@${azurerm_mssql_server.metastoreserver.name};password={${var.db_password}};encrypt=true;trustServerCertificate=false;hostNameInCertificate=*.database.windows.net;loginTimeout=30;"
33+
my_username = lookup(data.external.me.result, "name")
4434
tags = {
4535
Environment = "Testing"
46-
Owner = lookup(data.external.me.result, "name")
36+
Owner = local.my_username
4737
Epoch = random_string.naming.result
4838
}
4939
}
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,29 @@
11
output "databricks_azure_workspace_resource_id" {
2-
value = azurerm_databricks_workspace.this.id
2+
description = "**Depricated**"
3+
value = azurerm_databricks_workspace.this.id
34
}
45

5-
output "workspace_url" {
6-
// The workspace URL which is of the format 'adb-{workspaceId}.{random}.azuredatabricks.net'
7-
// this is not named as DATABRICKS_HOST, because it affect authentication
8-
value = "https://${azurerm_databricks_workspace.this.workspace_url}/"
6+
output "resource_group" {
7+
description = "**Depricated**"
8+
value = azurerm_resource_group.this.name
99
}
1010

11-
output "resource_group" {
12-
value = azurerm_resource_group.this.name
11+
output "azure_resource_group_id" {
12+
description = "The Azure resource group ID"
13+
value = azurerm_resource_group.this.id
14+
}
15+
16+
output "workspace_id" {
17+
description = "The Databricks workspace ID"
18+
value = azurerm_databricks_workspace.this.workspace_id
1319
}
20+
21+
output "workspace_url" {
22+
description = "The Databricks workspace URL"
23+
value = "https://${azurerm_databricks_workspace.this.workspace_url}/"
24+
}
25+
26+
output "keyvault_id" {
27+
description = "The Azure KeyVault ID"
28+
value = azurerm_key_vault.akv1.id
29+
}

examples/adb-external-hive-metastore/providers.tf

+4-7
Original file line numberDiff line numberDiff line change
@@ -2,28 +2,25 @@ terraform {
22
required_providers {
33
databricks = {
44
source = "databricks/databricks"
5-
version = ">=1.27.0"
5+
version = ">=1.52.0"
66
}
7-
87
azurerm = {
98
source = "hashicorp/azurerm"
10-
version = ">=3.76.0"
9+
version = ">=4.0.0"
1110
}
1211
}
1312
}
1413

15-
provider "random" {
16-
}
17-
1814
provider "azurerm" {
15+
subscription_id = var.subscription_id
1916
features {
2017
key_vault {
2118
purge_soft_delete_on_destroy = true
2219
}
2320
}
2421
}
2522

26-
# Use Azure CLI to authenticate at Azure Databricks account level, and the Azure Databricks workspace level
23+
# This will be used to manage Azure Databricks workspace resources (Azure Databricks workspace itself is managed by `azurerm` provider)
2724
provider "databricks" {
2825
host = azurerm_databricks_workspace.this.workspace_url
2926
}
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
resource "databricks_secret_scope" "kv" {
22
# akv backed secret scope
3-
name = "hive"
3+
name = "hive"
4+
initial_manage_principal = "users"
45
keyvault_metadata {
56
resource_id = azurerm_key_vault.akv1.id
67
dns_name = azurerm_key_vault.akv1.vault_uri
@@ -11,19 +12,19 @@ resource "azurerm_key_vault_secret" "hiveurl" {
1112
name = "HIVE-URL"
1213
value = local.db_url
1314
key_vault_id = azurerm_key_vault.akv1.id
14-
depends_on = [azurerm_key_vault.akv1]
15+
depends_on = [azurerm_key_vault_access_policy.this]
1516
}
1617

1718
resource "azurerm_key_vault_secret" "hiveuser" {
1819
name = "HIVE-USER"
1920
value = var.db_username
2021
key_vault_id = azurerm_key_vault.akv1.id
21-
depends_on = [azurerm_key_vault.akv1]
22+
depends_on = [azurerm_key_vault_access_policy.this]
2223
}
2324

2425
resource "azurerm_key_vault_secret" "hivepwd" {
2526
name = "HIVE-PASSWORD"
2627
value = var.db_password
2728
key_vault_id = azurerm_key_vault.akv1.id
28-
depends_on = [azurerm_key_vault.akv1]
29+
depends_on = [azurerm_key_vault_access_policy.this]
2930
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
subscription_id = "<your Azure Subscription ID here>"
2+
db_username = "<yoursqlserveradminuser>"
3+
db_password = "<yoursqlserveradminpassword>"

examples/adb-external-hive-metastore/variables.tf

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,8 @@
1+
variable "subscription_id" {
2+
type = string
3+
description = "Azure Subscription ID to deploy the workspace into"
4+
}
5+
16
variable "spokecidr" {
27
type = string
38
default = "10.179.0.0/20"
@@ -8,11 +13,6 @@ variable "sqlvnetcidr" {
813
default = "10.178.0.0/20"
914
}
1015

11-
variable "no_public_ip" {
12-
type = bool
13-
default = true
14-
}
15-
1616
variable "rglocation" {
1717
type = string
1818
default = "southeastasia"

examples/adb-external-hive-metastore/vnet.tf

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ resource "azurerm_subnet" "private" {
4242
virtual_network_name = azurerm_virtual_network.this.name
4343
address_prefixes = [cidrsubnet(local.cidr, 3, 1)]
4444

45-
private_endpoint_network_policies_enabled = true
45+
private_endpoint_network_policies = "Enabled"
4646
private_link_service_network_policies_enabled = true
4747

4848
delegation {
@@ -70,7 +70,7 @@ resource "azurerm_subnet" "plsubnet" {
7070
resource_group_name = azurerm_resource_group.this.name
7171
virtual_network_name = azurerm_virtual_network.this.name
7272
address_prefixes = [cidrsubnet(local.cidr, 3, 2)]
73-
private_endpoint_network_policies_enabled = true
73+
private_endpoint_network_policies = "Enabled"
7474
}
7575

7676

examples/adb-external-hive-metastore/workspace.tf

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ resource "azurerm_databricks_workspace" "this" {
66
tags = local.tags
77
customer_managed_key_enabled = true
88
custom_parameters {
9-
no_public_ip = var.no_public_ip
109
virtual_network_id = azurerm_virtual_network.this.id
1110
private_subnet_name = azurerm_subnet.private.name
1211
public_subnet_name = azurerm_subnet.public.name
@@ -26,6 +25,8 @@ resource "databricks_cluster" "coldstart" {
2625
cluster_name = "cluster - external metastore"
2726
spark_version = data.databricks_spark_version.latest_lts.id
2827
node_type_id = var.node_type
28+
data_security_mode = "SINGLE_USER"
29+
single_user_name = local.my_username
2930
autotermination_minutes = 30
3031
autoscale {
3132
min_workers = 1

0 commit comments

Comments
 (0)