Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multi-tenancy as a new capability. #143

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docs/images/multi-tenancy-using-products.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion scenarios/scripts/bicep/deploy-apim-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,6 @@ echo "$output" | jq "[.properties.outputs | to_entries | .[] | {key:.key, value:
appGatewayPublicIpAddress=$(cat "$script_dir/../../apim-baseline/bicep/output.json" | jq -r '.appGatewayPublicIpAddress')
apimStarterSubscriptionKey=$(cat "$script_dir/../../apim-baseline/bicep/output.json" | jq -r '.apimStarterSubscriptionKey')

testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${apimStarterSubscriptionKey}' https://${appGatewayPublicIpAddress}/echo/resource?param1=sample"
testUri="curl -k -v -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${apimStarterSubscriptionKey}' https://${appGatewayPublicIpAddress}/echo/resource?param1=sample"
echo "Test the deployment by running the following command: ${testUri}"
echo -e "\n"
11 changes: 11 additions & 0 deletions scenarios/scripts/bicep/deploy-workload-genai.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,17 @@ echo "== Completed bicep deployment ${deployment_name}"
echo "$output" | jq "[.properties.outputs | to_entries | .[] | {key:.key, value: .value.value}] | from_entries" > "$script_dir/../../workload-genai/bicep/output.json"

apimSubscriptionKey=$(cat "$script_dir/../../workload-genai/bicep/output.json" | jq -r '.apiManagementAzureOpenAIProductSubscriptionKey')
multiTenantProduct1SubscriptionKey=$(cat "$script_dir/../../workload-genai/bicep/output.json" | jq -r '.apiManagementMultitenantProduct1SubscriptionKey')
multiTenantProduct2SubscriptionKey=$(cat "$script_dir/../../workload-genai/bicep/output.json" | jq -r '.apiManagementMultitenantProduct2SubscriptionKey')

testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${apimSubscriptionKey}' -H 'Content-Type: application/json' https://${appGatewayPublicIpAddress}/openai/deployments/aoai/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment by running the following command: ${testUri}"
echo -e "\n"

multiTenantProduct1TestUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${multiTenantProduct1SubscriptionKey}' -H 'Content-Type: application/json' https://${appGatewayPublicIpAddress}/openai/deployments/aoai/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment for multi-tenant Product1 by running the following command: ${multiTenantProduct1TestUri}"
echo -e "\n"

multiTenantProduct2TestUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${multiTenantProduct2SubscriptionKey}' -H 'Content-Type: application/json' https://${appGatewayPublicIpAddress}/openai/deployments/aoai/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment for multi-tenant Product2 by running the following command: ${multiTenantProduct2TestUri}"
echo -e "\n"
2 changes: 1 addition & 1 deletion scenarios/scripts/terraform/deploy-apim-baseline.sh
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,6 @@ output=$(curl -s -S -X POST -H "Authorization: Bearer $TOKEN" \
PRIMARY_KEY=$(echo "$output" | jq -r '.primaryKey')

APPGATEWAYPUBLICIPADDRESS=$(az network public-ip show --resource-group "$NETWORK_RESOURCE_GROUP" --name "$APPGATEWAY_PIP" --query ipAddress -o tsv)
testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${PRIMARY_KEY}' -H 'Content-Type: application/json' https://${APPGATEWAYPUBLICIPADDRESS}/echo/resource?param1=sample"
testUri="curl -k -v -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${PRIMARY_KEY}' -H 'Content-Type: application/json' https://${APPGATEWAYPUBLICIPADDRESS}/echo/resource?param1=sample"
echo "Test the deployment by running the following command: ${testUri}"
echo -e "\n"
28 changes: 28 additions & 0 deletions scenarios/scripts/terraform/deploy-workload-genai.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ NETWORK_RESOURCE_GROUP="rg-networking-${RESOURCE_NAME_PREFIX}-${ENVIRONMENT_TAG}
APPGATEWAY_PIP="pip-appgw-${RESOURCE_NAME_PREFIX}-${ENVIRONMENT_TAG}-${AZURE_LOCATION}-${RANDOM_IDENTIFIER}"
SUBSCRIPTION_ID=$(az account show --query id -o tsv)
API_SUBSCRIPTION_ID="aoai-product-subscription"
MT_PRODUCT1_SUBSCRIPTION_ID="multi-tenant-product1-subscription"
MT_PRODUCT2_SUBSCRIPTION_ID="multi-tenant-product2-subscription"

# Get the access token
TOKEN=$(az account get-access-token --query accessToken --output tsv)
Expand All @@ -178,3 +180,29 @@ APPGATEWAYPUBLICIPADDRESS=$(az network public-ip show --resource-group "$NETWORK
testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${PRIMARY_KEY}' -H 'Content-Type: application/json' https://${APPGATEWAYPUBLICIPADDRESS}/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment by running the following command: ${testUri}"
echo -e "\n"

# Call the Azure REST API to get subscription key of multi-tenant product1
mt_product1_sub_output=$(curl -s -S -X POST -H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-H "Content-Length: 0" \
"https://management.azure.com/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$APIM_RESOURCE_GROUP/providers/Microsoft.ApiManagement/service/$APIM_SERVICE_NAME/subscriptions/$MT_PRODUCT1_SUBSCRIPTION_ID/listSecrets?api-version=2022-08-01")

# Extract the subscription keys
MT_PRODUCT1_SUB_PRIMARY_KEY=$(echo "$mt_product1_sub_output" | jq -r '.primaryKey')

testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${MT_PRODUCT1_SUB_PRIMARY_KEY}' -H 'Content-Type: application/json' https://${APPGATEWAYPUBLICIPADDRESS}/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment for multi-tenant Product1 by running the following command: ${testUri}"
echo -e "\n"

# Call the Azure REST API to get subscription key of multi-tenant product2
mt_product2_sub_output=$(curl -s -S -X POST -H "Authorization: Bearer $TOKEN" \
-H "Content-Type: application/json" \
-H "Content-Length: 0" \
"https://management.azure.com/subscriptions/$SUBSCRIPTION_ID/resourceGroups/$APIM_RESOURCE_GROUP/providers/Microsoft.ApiManagement/service/$APIM_SERVICE_NAME/subscriptions/$MT_PRODUCT2_SUBSCRIPTION_ID/listSecrets?api-version=2022-08-01")

# Extract the subscription keys
MT_PRODUCT2_SUB_PRIMARY_KEY=$(echo "$mt_product2_sub_output" | jq -r '.primaryKey')

testUri="curl -k -H 'Host: ${APPGATEWAY_FQDN}' -H 'Ocp-Apim-Subscription-Key: ${MT_PRODUCT2_SUB_PRIMARY_KEY}' -H 'Content-Type: application/json' https://${APPGATEWAYPUBLICIPADDRESS}/openai/deployments/gpt-35-turbo-16k/chat/completions?api-version=2024-02-15-preview -d '{\"messages\": [{\"role\":\"system\",\"content\":\"You are an AI assistant that helps people find information.\"}]}'"
echo "Test the deployment for multi-tenant Product2 by running the following command: ${testUri}"
echo -e "\n"
1 change: 1 addition & 0 deletions scenarios/workload-genai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ This repo currently contains the policies showing how to implement these GenAI G
| [Managing spikes with PAYG](./policies/fragments/manage-spikes-with-payg/README.md) | Manage spikes in traffic by routing traffic to PAYG endpoints when a PTU is out of capacity. |
| [Adaptive rate limiting](./policies/fragments/rate-limiting/README.md) | Dynamically adjust rate-limits applied to different workloads|
| [Tracking token usage](./policies/fragments/usage-tracking//README.md) | Record the token consumption for usage tracking and attribution|
| [Multi-tenancy](./policies/multi-tenancy/README.md)| Implementing multi-tenancy using Products and Product Policies|

### Test/Demo setup

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ resource azureOpenAIProduct 'Microsoft.ApiManagement/service/products@2023-05-01
}
}

resource multiTenantProduct1 'Microsoft.ApiManagement/service/products@2023-05-01-preview' = {
parent: apiManagementService
name: 'multi-tenant-product1'
properties: {
displayName: 'multi-tenant-product1'
subscriptionRequired: true
state: 'published'
approvalRequired: false
}
}

resource multiTenantProduct2 'Microsoft.ApiManagement/service/products@2023-05-01-preview' = {
parent: apiManagementService
name: 'multi-tenant-product2'
properties: {
displayName: 'multi-tenant-product2'
subscriptionRequired: true
state: 'published'
approvalRequired: false
}
}

var azureOpenAIAPINames = [
azureOpenAIApi.name
]
Expand All @@ -60,6 +82,18 @@ resource azureOpenAIProductAPIAssociation 'Microsoft.ApiManagement/service/produ
}
]

resource multiTenantProduct1APIAssociation 'Microsoft.ApiManagement/service/products/apis@2023-05-01-preview' = [
for apiName in azureOpenAIAPINames: {
name: '${apiManagementServiceName}/${multiTenantProduct1.name}/${apiName}'
}
]

resource multiTenantProduct2APIAssociation 'Microsoft.ApiManagement/service/products/apis@2023-05-01-preview' = [
for apiName in azureOpenAIAPINames: {
name: '${apiManagementServiceName}/${multiTenantProduct2.name}/${apiName}'
}
]

resource ptuBackendOne 'Microsoft.ApiManagement/service/backends@2023-05-01-preview' = {
parent: apiManagementService
name: 'ptu-backend-1'
Expand Down Expand Up @@ -97,6 +131,26 @@ resource azureOpenAIProductSubscription 'Microsoft.ApiManagement/service/subscri
}
}

resource multiTenantProduct1Subscription 'Microsoft.ApiManagement/service/subscriptions@2023-05-01-preview' = {
parent: apiManagementService
name: 'multi-tenant-product1-subscription'
properties: {
displayName: 'multi-tenant-product1-subscription'
state: 'active'
scope: multiTenantProduct1.id
}
}

resource multiTenantProduct2Subscription 'Microsoft.ApiManagement/service/subscriptions@2023-05-01-preview' = {
parent: apiManagementService
name: 'multi-tenant-product2-subscription'
properties: {
displayName: 'multi-tenant-product2-subscription'
state: 'active'
scope: multiTenantProduct2.id
}
}

resource simpleRoundRobinPolicyFragment 'Microsoft.ApiManagement/service/policyFragments@2023-05-01-preview' = {
parent: apiManagementService
name: 'simple-priority-weighted'
Expand Down Expand Up @@ -193,6 +247,26 @@ resource azureOpenAIApiPolicy 'Microsoft.ApiManagement/service/apis/policies@202
usageTrackingWithAppInsightsPolicyFragment]
}

resource multiTenantProduct1Policy 'Microsoft.ApiManagement/service/products/policies@2024-06-01-preview' = {
parent: multiTenantProduct1
name: 'policy'
properties: {
value: loadTextContent('../../policies/multi-tenancy/multi-tenant-product1-policy.xml')
format: 'rawxml'
}
dependsOn: [apiBackend]
}

resource multiTenantProduct2Policy 'Microsoft.ApiManagement/service/products/policies@2024-06-01-preview' = {
parent: multiTenantProduct2
name: 'policy'
properties: {
value: loadTextContent('../../policies/multi-tenancy/multi-tenant-product2-policy.xml')
format: 'rawxml'
}
dependsOn: [apiBackend]
}

resource apimOpenaiApiUamiNamedValue 'Microsoft.ApiManagement/service/namedValues@2022-08-01' = {
name: apimIdentityNameValue
parent: apiManagementService
Expand All @@ -219,3 +293,5 @@ resource eventHubLogger 'Microsoft.ApiManagement/service/loggers@2022-04-01-prev

output apiManagementServiceName string = apiManagementService.name
output apiManagementAzureOpenAIProductSubscriptionKey string = azureOpenAIProductSubscription.listSecrets().primaryKey
output apiManagementMultitenantProduct1SubscriptionKey string = multiTenantProduct1Subscription.listSecrets().primaryKey
output apiManagementMultitenantProduct2SubscriptionKey string = multiTenantProduct2Subscription.listSecrets().primaryKey
2 changes: 2 additions & 0 deletions scenarios/workload-genai/bicep/main.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,5 @@ resource telemetrydeployment 'Microsoft.Resources/deployments@2021-04-01' = if (

output apiManagementName string = apiManagement.outputs.apiManagementServiceName
output apiManagementAzureOpenAIProductSubscriptionKey string = apiManagement.outputs.apiManagementAzureOpenAIProductSubscriptionKey
output apiManagementMultitenantProduct1SubscriptionKey string = apiManagement.outputs.apiManagementMultitenantProduct1SubscriptionKey
output apiManagementMultitenantProduct2SubscriptionKey string = apiManagement.outputs.apiManagementMultitenantProduct2SubscriptionKey
63 changes: 63 additions & 0 deletions scenarios/workload-genai/policies/multi-tenancy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Multi-Tenancy using Azure API Management

Customers may sometimes would also like to have a multi-tenancy model on top of their backend APIs.
This is a typical requirement for customers/businesses operating in SaaS-based models. Multi-tenancy in such scenarios is typically defined using following two business concepts:

1. **Tiers**:
Tiers govern the _quality of service_ exposed to the users based on their pricing model.
For instance, a _Freemium_ tier can be thought of as for consumer groups who would like to explore the service at no cost thus having very limited quota and rate limiting, likewise a _Premium_ tier can be defined for consumers who would like to have the most premium-grade service experience with the maximum possible rate limiting and quota.

2. **Entitlements**: Apart from _tiers_, businesses would also like to define _entitlements_ ,which means _giving access of only selected APIs_ for a particular consumer group. For instance, access to only chat based APIs for consumer A or only image APIs for consumer B.

## Initial Approach

An initial approach can be to define separate APIs for different customers based on their _tiers_ & _entitlement_ combinations by defining the policies at the API level.The following image describes this approach -

![Rudimentary Solution Approach](../../../../docs/images/multi-tenancy-without-products.png)

### Downsides

As we can clearly observe, this solution results in a lot of redundancy of APIs and API policies, overall resulting in a very convoluted design. The API policy code is also now bloated with unnecessary responsibilities which does not fall under the scope of API(for e.g., with the above design, any new policy we want to include has to be defined as part of the API policies). Further, it's also hard to define the entitlements using this model.

## Solution Approach

A better and effective solution can be built by leveraging the concept of APIM [Products](https://learn.microsoft.com/en-us/azure/api-management/api-management-howto-add-products?tabs=azure-portal&pivots=interactive), helping us to cater to our "_entitlement_" requirement by grouping APIs related to that specific entitlement in a logical container and cater to our requirement of "_tier_" by leveraging Product's policies for the respective tier (like quota, rate limiting along with the respective backend model for e.g.: either a PAYG or PTU). Lastly, by defining [subscriptions](https://learn.microsoft.com/en-us/azure/api-management/api-management-subscriptions) at the Product level and giving access of only the Product's subscriptions to the end-user group, the users can only interact with the service via the specific Product's subscription.
Following design demonstrates this approach further -
![Solution Approach using Products](../../../../docs/images/multi-tenancy-using-products.png)

Product policy essentially here is helping us to define our "tenant" specific policies.

### Benefits

This solution not only helps to cater to the multi-tenancy requirement in an effective manner but also makes the overall design modular and extensible by having the capability to define n-number of products and APIs and their different combinations with clear separation of concerns and adherence to the DRY(Do not Repeat Yourself) principle.

_Note:
As this a general pattern, this solution is not only limited to the GenAI backend but can be used with any general backend as well._

### References

Following blog post further describes this scenario in detail -
https://devblogs.microsoft.com/ise/multitenant-genai-gateway-using-apim/

## Products and Policies

To summarize:

- Products: Acts as logical container of APIs for a specific consumer group (e.g., Chat APIs or Embedding APIs).

- Product Policies: For defining tenant policies (e.g., rate limits, quotas).

And as part of this capability's example scenario, we will apply a new _quota policy_ at the product level, such that if the number of requests to APIM via that Product's subscription exceed as per the defined "calls" attribute value, then the product policy will accordingly block the subsequent requests from that subscription until the quota is refreshed based on the defined "renewal-period" and "counter-key" attributes.

For this setup, we create two sample products(`multi-tenant-product1`,`multi-tenant-product2`) with different counter keys(`<subscription_id>-mt-product1`,`<subscription_id>-mt-product2`) respectively and with the following policies :

- [`multi-tenant-product1-policy.xml`](multi-tenant-product1-policy.xml)
- [`multi-tenant-product2-policy.xml`](multi-tenant-product2-policy.xml)

The Product Policy can thus be extended with any number of higher-level policies (for e.g., defining quota or rate limits) and any attributes (for e.g., setting the name of the backend pool) as per the respective _tenant's_ requirement.

## Note

prasann marked this conversation as resolved.
Show resolved Hide resolved
This capability/pattern is over the top of the existing core capabilities, which can be played around & tested separately and hence does not impact the existing setup.

However, ifmulti-tenancy capability is not needed i.e. these resources created as part of our deployment, then the respective code blocks can be commented from the bicep and terraform scripts.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!--
- Policies are applied in the order they appear.
- Position <base/> inside a section to inherit policies from the outer scope.
- Comments within policies are not preserved.
-->
<!-- Add policies as children to the <inbound>, <outbound>, <backend>, and <on-error> elements -->
<policies>
<!-- Throttle, authorize, validate, cache, or transform the requests -->
<inbound>
<base />
<!-- Define the Quota/Rate limiting policies or any tenant specific policies here..
Here, we are using subscriptionId as counter-key but it can be any unique identifier -->
<quota-by-key calls="5" renewal-period="300" counter-key="@(String.Concat(context.Subscription.Id,"-mt-product1"))" />
</inbound>
<!-- Control if and how the requests are forwarded to services -->
<backend>
<base />
</backend>
<!-- Customize the responses -->
<outbound>
<base />
</outbound>
<!-- Handle exceptions and customize error responses -->
<on-error>
<base />
</on-error>
</policies>
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<!--
prasann marked this conversation as resolved.
Show resolved Hide resolved
- Policies are applied in the order they appear.
- Position <base/> inside a section to inherit policies from the outer scope.
- Comments within policies are not preserved.
-->
<!-- Add policies as children to the <inbound>, <outbound>, <backend>, and <on-error> elements -->
<policies>
<!-- Throttle, authorize, validate, cache, or transform the requests -->
<inbound>
<base />
<!-- Define the Quota/Rate limiting policies or any tenant specific policies here..
Here, we are using subscriptionId as counter-key but it can be any unique identifier -->
<quota-by-key calls="3" renewal-period="300" counter-key="@(String.Concat(context.Subscription.Id,"-mt-product2"))" />
</inbound>
<!-- Control if and how the requests are forwarded to services -->
<backend>
<base />
</backend>
<!-- Customize the responses -->
<outbound>
<base />
</outbound>
<!-- Handle exceptions and customize error responses -->
<on-error>
<base />
</on-error>
</policies>
Loading