CDCgov · halprin · Jan 13, 2025 · Dec 16, 2024 · Dec 17, 2024 · Dec 17, 2024
diff --git a/.github/workflows/azure-load-tests.yml b/.github/workflows/azure-load-tests.yml
@@ -0,0 +1,46 @@
+name: Azure Load Tests
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: "0 0 * * 2" # Midnight UTC on Tuesdays
+
+  workflow_call:
+    secrets:
+      AZURE_CLIENT_ID:
+        required: true
+      AZURE_TENANT_ID:
+        required: true
+      AZURE_SUBSCRIPTION_ID:
+        required: true
+
+jobs:
+  loadtest:
+    name: Load Test
+    environment:
+      name: internal
+
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      contents: read
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+
+      - name: Login via Azure CLI
+        uses: azure/login@v2
+        with:
+          client-id: ${{ secrets.AZURE_CLIENT_ID }}
+          tenant-id: ${{ secrets.AZURE_TENANT_ID }}
+          subscription-id: ${{ secrets.AZURE_SUBSCRIPTION_ID }}
+
+      - name: Run Load Test
+        run: |
+          az load test-run create \
+            --resource-group "csels-rsti-internal-moderate-rg" \
+            --load-test-resource "load-testing-internal" \
+            --test-id "47a5e722-d63b-4ec9-8b05-17372866f00a" \
+            --test-run-id "run_"`date +"%Y%m%d%_H%M%S"` \
+            --description "Run by Github Action"
diff --git a/README.md b/README.md
@@ -176,6 +176,9 @@ weekday via Github actions. See [the rs-e2e readme](rs-e2e/README.md) for more d
 #### Load Testing
 
 Load tests are completed with [Locust.io](https://docs.locust.io/en/stable/installation.html).
+
+##### Running Locally
+
 Run the load tests by running...
 
 ```shell
@@ -184,13 +187,15 @@ Run the load tests by running...
 ./docker-load-execute.sh
 ```
 
-Currently, we are migrating to using Azure. Local load testing is using gradle, however a docker load test is available to mimic the Azure environment settings until the azure migration is complete.
+The Gradle version runs our API via Gradle.  The Docker version JARs our application and runs it in Docker.
 
 This will run the API for you, so no need to run it manually.
+
 >**Note:**
 >
 >**If you are already running the API, stop it before running the load tests or the cleanup steps won't work.**
->
+
+
 The load tests will also spin up (and clean up) a local test DB on port 5434 that should not interfere with the local dev DB.
 
 The `locustfile.py` that specifies the load test is located at
@@ -206,6 +211,52 @@ The terminal will start a local web interface, and you can enter
 the swarm parameters for the test and the local url where the app is running
 (usually `http://localhost:8080`).  You can also set time limits for the tests under 'Advanced Settings'.
 
+##### Running and Creating in Azure
+
+To run, navigate to the
+[Azure Load Tests GitHub Action](https://github.com/CDCgov/trusted-intermediary/actions/workflows/azure-load-tests.yml)
+and click on Run workflow.
+
+To create a new load test in Azure, the subscription first needs to be opted into Azure's Locust preview feature.
+Sadly, the Azure Terraform provider doesn't support load tests so we need to create this via ClickOps.  Therefore, you can follow these steps...
+
+1. Navigate to the Azure Portal with the `?Microsoft_Azure_CloudNativeTesting_locust=true` query parameter.  For
+   example, this [link](https://portal.azure.com/?Microsoft_Azure_CloudNativeTesting_locust=true) will work.
+2. Navigate or search for the Azure Load Testing service and click Create.
+   1. Walk through the wizard, but make sure to pick the same resource group as the environment you plan to test.
+3. After creation, navigate to the Identity slice, which is under the Settings group, of your new load test.
+   1. Turn the Status to On under the System assigned tab and click Save.
+4. Navigate to the TI key vault in the same resource group as the load test.
+   1. Navigate to the Secrets slice, under the Objects group, and click Generate/Import.
+   2. Provide the name `trusted-intermediary-valid-token-jwt`.
+   3. The secret value should be a newly created JWT that won't expire in a long time using the
+      `organization-trusted-intermediary-private-key-<environment>.pem` private key in Keybase as the signing key.
+   4. Click Create.
+   5. Drill into the latest version of this secret, and click the copy to clipboard button in the Secret Identifier
+      textbox.  We will be using this later during the creation of the actual load test.
+   6. Navigate to the Access policies slice, and click Create.  Select Get and List for Secrets for the permissions and
+      the name of the previously created load test as the principal.
+5. Navigate to the Tests slice, which is under the Tests group, of the previously created load test and click Create and then Upload a script to start
+   walking through the wizard.
+   1. Under the Test plan tab...
+      1. Select the Locust radio button.
+      2. Upload the [`./operations/locustfile.py`](./operations/locustfile.py) file.
+      3. Additional data files from our repository used by the load test need to be uploaded.  E.g. order and result
+         FHIR files.  You can inspect the `locustfile.py` file to find out which data files are used.  As of this
+         writing, that is `002_ORM_O01_short.fhir` and `001_ORU_R01_short.fhir`.
+   2. Under the Parameters tab, add a secret with `trusted-intermediary-valid-token-jwt` as the name.  The Value is the
+      secret URL referenced previously when you added the secret JWT to the key vault.  Before pasting the secret,
+      remove the hexadecimal version from the end of the URL.  E.g.
+      `https://<key-vault>.vault.azure.net/secrets/trusted-intermediary-valid-token-jwt/cf7eb05481c449878f2afe6b51464fd5`
+      becomes `https://<key-vault>.vault.azure.net/secrets/trusted-intermediary-valid-token-jwt/`.  We always want to
+      reference the last version, and we can do so by omitting the specific version.
+   3. Under the Load tab, configure how much load you want.  You also need to provide the URL of the application you
+      want to load test.
+   4. Under the Test criteria tab, fill in any client-side metrics that you want to evaluate at the end of the load
+      test.  This makes it easy to tell whether the application has the performance we want.  Consider whether you want
+      the test to automatically stop if there are too many errors.
+   5. Create the test.  All the other options not covered here should be looked at and considered.
+
 ### Debugging
 
 #### Attached JVM Config for IntelliJ

diff --git a/adr/008-load-testing.md b/adr/008-load-testing.md
@@ -1,6 +1,7 @@
 # 8. Load Testing
 
-Date: 2022-12-21
+Initial date: 2022-12-21
+Updated: 2025-01-07
 
 ## Decision
 
@@ -16,11 +17,16 @@ Accepted.
 Load Testing will assist in determining the performance of a system under
 real-life load conditions, both normal and extreme.
 
+In January 2025, we added the capability to run load tests automatically on a schedule in Azure.
+In the deployed load tests, we're hitting mock ReportStream endpoints. This allows us to 1) not
+bombard ReportStream with unexpected traffic and 2) identify performance issues that are specific
+to the Intermediary.
+
 ## Impact
 
 ### Positive
 
-- **Scalability:** Locust.io is highly scalable and can simulate millions of users, making it ideal for both small-scale and large-scale load tests. 
+- **Scalability:** Locust.io is highly scalable and can simulate millions of users, making it ideal for both small-scale and large-scale load tests.
 
 
 - **Python-Based:** Writing tests in Python allows for flexibility and ease of use, especially for teams already familiar with the language. 
@@ -29,6 +35,9 @@ real-life load conditions, both normal and extreme.
 - **Cost Efficiency:** Locust.io is open-source, and doesn’t require licensing fees, which can reduce the overall cost of performance testing.
 
 
+- **Azure:** Running the load tests on a schedule in a more realistic enviornment gives us more consistent data
+
+
 ### Negative
 
 - **Limited Features:** Compared to more feature-rich tools, Locust.io might lack advanced performance monitoring or detailed reporting features. 
@@ -48,6 +57,12 @@ real-life load conditions, both normal and extreme.
 - **Resource Usage:** Running large-scale tests using Locust.io may require significant system resources, which could impact cost and infrastructure planning.
 
 
+- **Locust Future in Azure is Uncertain:** We were able to create a Locust test in Azure in November 2024, but as of January 2025, were unable to create another one. We've submitted a bug report, but if we remain unable to create Locust tests in Azure, we won't be able to expand this test setup to other environments
+
+
+- **Azure Load Testing Cannot be Terraformed:** Since Azure Load Testing resources can't be created/managed in Terraform, they must be created manually. This is more work and more error prone
+
+
 ### Related Issues
 
-- #76
+- #76, #1122 
diff --git a/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java b/app/src/main/java/gov/hhs/cdc/trustedintermediary/external/javalin/DomainsRegistration.java
@@ -122,6 +122,10 @@ static DomainConnector constructNewDomainConnector(Class<? extends DomainConnect
     static Handler createHandler(
             Function<DomainRequest, DomainResponse> handler, boolean isProtected) {
         return (Context ctx) -> {
+            ApplicationContext
+                    .clearThreadRegistrations(); // clear this thread's specific registrations from
+            // its previous use
+
             LOGGER.logInfo(ctx.method().name() + " " + ctx.url());
 
             var request = javalinContextToDomainRequest(ctx);

diff --git a/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java b/etor/src/main/java/gov/hhs/cdc/trustedintermediary/etor/EtorDomainRegistration.java
@@ -173,6 +173,15 @@ DomainResponse handleResults(DomainRequest request) {
     }
 
     DomainResponse handleMetadata(DomainRequest request) {
+        // Any new endpoint that will call RS **must** include this check.
+        if (Boolean.parseBoolean(request.getHeaders().get("load-test"))
+                && ApplicationContext.isPropertyPresent("REPORT_STREAM_URL_PREFIX")) {
+            // register the mock RS endpoint for this HTTP request because we don't want to call RS
+            // for real when doing a load test.
+            ApplicationContext.registerForThread(
+                    RSEndpointClient.class, MockRSEndpointClient.getInstance());
+        }
+
         try {
             String metadataId = request.getPathParams().get("id");
             Optional<PartnerMetadata> metadataOptional =
@@ -226,6 +235,15 @@ protected DomainResponse handleMessageRequest(
         boolean markMetadataAsFailed = false;
         String errorMessage = "";
 
+        // Any new endpoint that will call RS **must** include this check.
+        if (Boolean.parseBoolean(request.getHeaders().get("load-test"))
+                && ApplicationContext.isPropertyPresent("REPORT_STREAM_URL_PREFIX")) {
+            // register the mock RS endpoint for this HTTP request because we don't want to call RS
+            // for real when doing a load test.
+            ApplicationContext.registerForThread(
+                    RSEndpointClient.class, MockRSEndpointClient.getInstance());
+        }
+
         try {
             return requestHandler.handle(inboundReportId);
         } catch (FhirParseException e) {

diff --git a/...va/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java b/...va/gov/hhs/cdc/trustedintermediary/etor/metadata/partner/PartnerMetadataOrchestrator.java
@@ -1,5 +1,6 @@
 package gov.hhs.cdc.trustedintermediary.etor.metadata.partner;
 
+import gov.hhs.cdc.trustedintermediary.context.ApplicationContext;
 import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient;
 import gov.hhs.cdc.trustedintermediary.etor.messagelink.MessageLink;
 import gov.hhs.cdc.trustedintermediary.etor.messagelink.MessageLinkException;
@@ -31,7 +32,6 @@ public class PartnerMetadataOrchestrator {
 
     @Inject PartnerMetadataStorage partnerMetadataStorage;
     @Inject MessageLinkStorage messageLinkStorage;
-    @Inject RSEndpointClient rsclient;
     @Inject Formatter formatter;
     @Inject Logger logger;
 
@@ -44,6 +44,9 @@ private PartnerMetadataOrchestrator() {}
     public void updateMetadataForInboundMessage(PartnerMetadata partnerMetadata)
             throws PartnerMetadataException {
 
+        // can't @Inject because the implementation can be different for this specific thread
+        RSEndpointClient rsclient = ApplicationContext.getImplementation(RSEndpointClient.class);
+
         logger.logInfo(
                 "Looking up sender name and timeReceived from RS delivery API for inboundReportId: {}",
                 partnerMetadata.inboundReportId());
@@ -130,6 +133,11 @@ public Optional<PartnerMetadata> getMetadata(String inboundReportId)
         PartnerMetadata partnerMetadata = optionalPartnerMetadata.get();
         var outboundReportId = partnerMetadata.outboundReportId();
         if (metadataIsStale(partnerMetadata) && outboundReportId != null) {
+
+            // can't @Inject because the implementation can be different for this specific thread
+            RSEndpointClient rsclient =
+                    ApplicationContext.getImplementation(RSEndpointClient.class);
+
             logger.logInfo(
                     "Receiver name not found in metadata or delivery status still pending, looking up {} from RS history API",
                     outboundReportId);

diff --git a/...ava/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java b/...ava/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamEndpointClient.java
@@ -4,11 +4,9 @@
 import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient;
 import gov.hhs.cdc.trustedintermediary.wrappers.AuthEngine;
 import gov.hhs.cdc.trustedintermediary.wrappers.Cache;
-import gov.hhs.cdc.trustedintermediary.wrappers.HapiFhir;
 import gov.hhs.cdc.trustedintermediary.wrappers.HttpClient;
 import gov.hhs.cdc.trustedintermediary.wrappers.HttpClientException;
 import gov.hhs.cdc.trustedintermediary.wrappers.Logger;
-import gov.hhs.cdc.trustedintermediary.wrappers.MetricMetadata;
 import gov.hhs.cdc.trustedintermediary.wrappers.SecretRetrievalException;
 import gov.hhs.cdc.trustedintermediary.wrappers.Secrets;
 import gov.hhs.cdc.trustedintermediary.wrappers.formatter.Formatter;
@@ -45,13 +43,10 @@ public class ReportStreamEndpointClient implements RSEndpointClient {
     @Inject private HttpClient client;
     @Inject private AuthEngine jwt;
     @Inject private Formatter formatter;
-    @Inject private HapiFhir fhir;
     @Inject private Logger logger;
     @Inject private Secrets secrets;
     @Inject private Cache cache;
 
-    @Inject MetricMetadata metadata;
-
     private static final ReportStreamEndpointClient INSTANCE = new ReportStreamEndpointClient();
 
     public static ReportStreamEndpointClient getInstance() {

diff --git a/.../java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java b/.../java/gov/hhs/cdc/trustedintermediary/external/reportstream/ReportStreamSenderHelper.java
@@ -1,5 +1,6 @@
 package gov.hhs.cdc.trustedintermediary.external.reportstream;
 
+import gov.hhs.cdc.trustedintermediary.context.ApplicationContext;
 import gov.hhs.cdc.trustedintermediary.etor.RSEndpointClient;
 import gov.hhs.cdc.trustedintermediary.etor.messages.UnableToSendMessageException;
 import gov.hhs.cdc.trustedintermediary.etor.metadata.EtorMetadataStep;
@@ -17,7 +18,6 @@
 public class ReportStreamSenderHelper {
     private static final ReportStreamSenderHelper INSTANCE = new ReportStreamSenderHelper();
 
-    @Inject RSEndpointClient rsclient;
     @Inject Formatter formatter;
     @Inject Logger logger;
     @Inject MetricMetadata metadata;
@@ -41,6 +41,10 @@ public Optional<String> sendResultToReportStream(String body, String fhirResourc
     protected Optional<String> sendToReportStream(
             String body, String fhirResourceId, PartnerMetadataMessageType messageType)
             throws UnableToSendMessageException {
+
+        // can't @Inject because the implementation can be different for this specific thread
+        RSEndpointClient rsclient = ApplicationContext.getImplementation(RSEndpointClient.class);
+
         String bearerToken;
         String rsResponseBody;