awslabs · bnusunny · Jan 7, 2026 · Jan 7, 2026
diff --git a/.config/nextest.toml b/.config/nextest.toml
@@ -113,7 +113,7 @@ store-failure-output = true
 test-threads = 1
 
 [profile.ci]
-inherits = "test"
+# inherits = "test"
 # Print out output for failing tests as soon as they fail, and also at the end
 # of the run (for easy scrollability).
 failure-output = "immediate-final"

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -22,11 +22,12 @@ http = "1.2.0"
 http-body = "1.0.1"
 hyper = { version = "1.5.2", features = ["client"] }
 hyper-util = "0.1.10"
-lambda_http = { version = "1.0.1", default-features = false, features = [
+lambda_http = { version = "1.1.0-rc1", default-features = false, features = [
     "apigw_http",
     "apigw_rest",
     "alb",
     "pass_through",
+    "experimental-concurrency"
 ] }
 serde_json = "1.0.135"
 tokio = { version = "1.48.0", features = [

diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@ The same docker image can run on AWS Lambda, Amazon EC2, AWS Fargate, and local
 - Run web applications on AWS Lambda
 - Supports Amazon API Gateway Rest API and Http API endpoints, Lambda Function URLs, and Application Load Balancer
 - Supports Lambda managed runtimes, custom runtimes and docker OCI images
+- Supports Lambda Managed Instances for multi-concurrent request handling
 - Supports any web frameworks and languages, no new code dependency to include
 - Automatic encode binary response
 - Enables graceful shutdown
@@ -101,7 +102,7 @@ The readiness check port/path and traffic port can be configured using environme
 | AWS_LWA_READINESS_CHECK_MIN_UNHEALTHY_STATUS                 | The minimum HTTP status code that is considered unhealthy                            | "500"      |
 | AWS_LWA_ASYNC_INIT / ASYNC_INIT*                             | enable asynchronous initialization for long initialization functions                 | "false"    |
 | AWS_LWA_REMOVE_BASE_PATH / REMOVE_BASE_PATH*                 | the base path to be removed from request path                                        | None       |
-| AWS_LWA_ENABLE_COMPRESSION                                   | enable gzip compression for response body                                            | "false"    |
+| AWS_LWA_ENABLE_COMPRESSION                                   | enable gzip/br compression for response body                                         | "false"    |
 | AWS_LWA_INVOKE_MODE                                          | Lambda function invoke mode: "buffered" or "response_stream", default is "buffered"  | "buffered" |
 | AWS_LWA_PASS_THROUGH_PATH                                    | the path for receiving event payloads that are passed through from non-http triggers | "/events"  |
 | AWS_LWA_AUTHORIZATION_SOURCE                                 | a header name to be replaced to `Authorization` | None  |
@@ -127,8 +128,8 @@ For example, you could have configured your API Gateway to have a /orders/{proxy
 Each resource is handled by a separate Lambda functions. For this reason, the application inside Lambda may not be aware of the fact that the /orders path exists.
 Use REMOVE_BASE_PATH to remove the /orders prefix when routing requests to the application. Defaults to empty string. Checkout [SpringBoot](examples/springboot) example.
 
-**AWS_LWA_ENABLE_COMPRESSION** - Lambda Web Adapter supports gzip compression for response body. This feature is disabled by default. Enable it by setting environment variable `AWS_LWA_ENABLE_COMPRESSION` to `true`.
-When enabled, this will compress responses unless it's an image as determined by the content-type starting with `image` or the response is less than 32 bytes. This will also compress HTTP/1.1 chunked streaming response.
+**AWS_LWA_ENABLE_COMPRESSION** - Lambda Web Adapter supports gzip/br compression for response body. This feature is disabled by default. Enable it by setting environment variable `AWS_LWA_ENABLE_COMPRESSION` to `true`.
+When enabled, this will compress responses unless it's an image as determined by the content-type starting with `image` or the response is less than 32 bytes. This feature does not support streaming response.
 
 **AWS_LWA_INVOKE_MODE** - Lambda function invoke mode, this should match Function Url invoke mode. The default is "buffered". When configured as "response_stream", Lambda Web Adapter will stream response to Lambda service [blog](https://aws.amazon.com/blogs/compute/introducing-aws-lambda-response-streaming/).
 Please check out [FastAPI with Response Streaming](examples/fastapi-response-streaming) example.
@@ -153,6 +154,23 @@ Lambda Web Adapter forwards this information to the web application in a Http He
 
 Lambda Web Adapter forwards this information to the web application in a Http Header named "x-amzn-lambda-context". In the web application, you can retrieve the value of this http header and deserialize it into a JSON object. Check out [Express.js in Zip](examples/expressjs-zip) on how to use it.
 
+## Lambda Managed Instances
+
+Lambda Web Adapter supports [Lambda Managed Instances](https://docs.aws.amazon.com/lambda/latest/dg/lambda-managed-instances.html), which allows a single Lambda execution environment to handle multiple concurrent requests. This can improve throughput and reduce costs for I/O-bound workloads.
+
+When running on Lambda Managed Instances, Lambda Web Adapter automatically handles concurrent invocations by forwarding multiple requests to your web application simultaneously. Since most web frameworks (Express.js, FastAPI, Spring Boot, etc.) are already designed to handle concurrent requests, your application should work without modification.
+
+### Considerations for Multi-Concurrency
+
+When using Lambda Managed Instances, keep these points in mind:
+
+- **Shared state**: Global variables and in-memory caches are shared across concurrent requests. Ensure your application handles shared state safely.
+- **Connection pooling**: Use connection pools for databases and external services rather than single connections.
+- **File system**: The `/tmp` directory is shared across concurrent requests. Use unique file names or implement file locking to avoid conflicts.
+- **Resource limits**: Memory and CPU are shared across concurrent requests. Monitor resource usage under concurrent load.
+
+Lambda Managed Instances works with both buffered and response streaming modes.
+
 ## Graceful Shutdown
 
 For a function with Lambda Extensions registered, Lambda enables shutdown phase for the function. When Lambda service is about to shut down a Lambda execution environment,
@@ -184,6 +202,7 @@ The `AWS_LWA_LAMBDA_RUNTIME_API_PROXY` environment varible makes the Lambda Web
 - [FastAPI with Background Tasks](examples/fastapi-background-tasks)
 - [FastAPI with Response Streaming](examples/fastapi-response-streaming)
 - [FastAPI with Response Streaming in Zip](examples/fastapi-response-streaming-zip)
+- [FastAPI with Response Streaming on Lambda Managed Instances](examples/fastapi-response-streaming-lmi)
 - [FastAPI Response Streaming Backend with IAM Auth](examples/fastapi-backend-only-response-streaming/)
 - [Flask](examples/flask)
 - [Flask in Zip](examples/flask-zip)

diff --git a/examples/fastapi-response-streaming-lmi/.gitignore b/examples/fastapi-response-streaming-lmi/.gitignore
@@ -0,0 +1 @@
+.aws-sam/
diff --git a/examples/fastapi-response-streaming-lmi/README.md b/examples/fastapi-response-streaming-lmi/README.md
@@ -0,0 +1,121 @@
+# FastAPI Response Streaming with Lambda Managed Instances
+
+This example shows how to use Lambda Web Adapter to run a FastAPI application with response streaming on [Lambda Managed Instances](https://docs.aws.amazon.com/lambda/latest/dg/lambda-managed-instances.html) (LMI).
+
+Lambda Managed Instances allows a single Lambda execution environment to handle multiple concurrent requests, improving throughput and reducing costs for I/O-bound workloads like streaming responses.
+
+## Prerequisites
+
+Lambda Managed Instances requires a VPC with:
+- At least one subnet (two subnets recommended)
+- A security group that allows outbound traffic
+
+If you don't have a VPC configured, you can use the default VPC or create one.
+
+## How does it work?
+
+This example combines three Lambda features:
+
+1. **Lambda Web Adapter** - Runs your FastAPI app on Lambda without code changes
+2. **Response Streaming** - Streams responses back to clients as they're generated
+3. **Lambda Managed Instances** - Handles multiple concurrent requests per execution environment
+
+### Key Configuration
+
+```yaml
+LMICapacityProvider:
+  Type: AWS::Lambda::CapacityProvider
+  Properties:
+    Name: !Sub "${AWS::StackName}-capacity-provider"
+    VpcConfig:
+      SubnetIds: !Ref SubnetIds
+      SecurityGroupIds: !Ref SecurityGroupIds
+    InstanceRequirements:
+      Architectures:
+        - x86_64
+      AllowedTypes:
+        - m7i.large
+    ScalingConfig:
+      MaxVCpuCount: 2
+
+FastAPIFunction:
+  Type: AWS::Serverless::Function
+  Properties:
+    CodeUri: app/
+    Handler: run.sh
+    Runtime: python3.13
+    MemorySize: 2048
+    Environment:
+      Variables:
+        AWS_LAMBDA_EXEC_WRAPPER: /opt/bootstrap
+        AWS_LWA_INVOKE_MODE: response_stream
+        PORT: 8000
+    Layers:
+      - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:25
+    CapacityProviderConfig:
+      Arn: !Ref LMICapacityProvider
+      PerExecutionEnvironmentMaxConcurrency: 64
+    FunctionUrlConfig:
+      AuthType: NONE
+      InvokeMode: RESPONSE_STREAM
+```
+
+- `AWS::Lambda::CapacityProvider` - Creates the LMI capacity provider with VPC configuration
+- `CapacityProviderConfig.Arn` - References the capacity provider
+- `CapacityProviderConfig.PerExecutionEnvironmentMaxConcurrency: 64` - Up to 64 concurrent requests per instance
+- `AWS_LWA_INVOKE_MODE: response_stream` - Configures Lambda Web Adapter for streaming
+- `FunctionUrlConfig.InvokeMode: RESPONSE_STREAM` - Enables streaming on the Function URL
+
+## Build and Deploy
+
+First, get your VPC subnet and security group IDs:
+
+```bash
+# List subnets in your default VPC
+aws ec2 describe-subnets --filters "Name=default-for-az,Values=true" \
+  --query 'Subnets[*].[SubnetId,AvailabilityZone]' --output table
+
+# List security groups
+aws ec2 describe-security-groups --filters "Name=group-name,Values=default" \
+  --query 'SecurityGroups[*].[GroupId,GroupName]' --output table
+```
+
+Build and deploy:
+
+```bash
+sam build --use-container
+sam deploy --guided
+```
+
+During guided deployment, you'll be prompted for:
+- `SubnetIds` - Comma-separated list of subnet IDs (e.g., `subnet-abc123,subnet-def456`)
+- `SecurityGroupIds` - Comma-separated list of security group IDs (e.g., `sg-abc123`)
+
+## Verify it works
+
+Open the Function URL in a browser. You should see a message stream back character by character, with a unique request ID prefix like `[a1b2c3d4] This is streaming from Lambda Managed Instances!`.
+
+### Test concurrent requests
+
+To verify LMI is working, send multiple concurrent requests:
+
+```bash
+# Get your function URL
+URL=$(aws cloudformation describe-stacks --stack-name fastapi-response-streaming-lmi \
+  --query 'Stacks[0].Outputs[?OutputKey==`FastAPIFunctionUrl`].OutputValue' --output text)
+
+# Send 10 concurrent requests
+for i in {1..10}; do curl -s "$URL" & done; wait
+```
+
+Each response will have a different request ID, but they may share the same execution environment (visible in CloudWatch logs).
+
+## Considerations
+
+When using LMI with streaming:
+
+- **VPC**: LMI requires VPC configuration. Ensure your subnets have internet access (via NAT Gateway) if your function needs to call external services
+- **Shared state**: FastAPI/Uvicorn handles concurrency natively, but avoid mutable global state
+- **Memory**: With 64 concurrent requests, ensure sufficient memory (2048MB in this example)
+- **Timeouts**: Streaming responses can run up to 15 minutes with Function URLs
+- **Scaling**: `MaxInstanceCount` controls the maximum number of EC2 instances in the capacity provider
diff --git a/examples/fastapi-response-streaming-lmi/__init__.py b/examples/fastapi-response-streaming-lmi/__init__.py
diff --git a/examples/fastapi-response-streaming-lmi/app/__init__.py b/examples/fastapi-response-streaming-lmi/app/__init__.py
diff --git a/examples/fastapi-response-streaming-lmi/app/main.py b/examples/fastapi-response-streaming-lmi/app/main.py
@@ -0,0 +1,25 @@
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+import asyncio
+import uuid
+
+app = FastAPI()
+
+@app.get("/health")
+async def health():
+    return {"status": "healthy"}
+
+
+async def streamer(request_id: str):
+    """Stream a message character by character with request ID for tracing."""
+    message = f"[{request_id}] This is streaming from Lambda Managed Instances!\n"
+    for char in message:
+        yield char
+        await asyncio.sleep(0.05)
+
+
+@app.get("/")
+async def index():
+    """Stream response - each concurrent request gets a unique ID."""
+    request_id = str(uuid.uuid4())[:8]
+    return StreamingResponse(streamer(request_id), media_type="text/plain")
diff --git a/examples/fastapi-response-streaming-lmi/app/requirements.txt b/examples/fastapi-response-streaming-lmi/app/requirements.txt
@@ -0,0 +1,2 @@
+fastapi==0.115.5
+uvicorn==0.32.0
diff --git a/examples/fastapi-response-streaming-lmi/app/run.sh b/examples/fastapi-response-streaming-lmi/app/run.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+PATH=$PATH:$LAMBDA_TASK_ROOT/bin \
+    PYTHONPATH=$PYTHONPATH:/opt/python:$LAMBDA_RUNTIME_DIR \
+    exec python -m uvicorn --port=$PORT main:app
diff --git a/examples/fastapi-response-streaming-lmi/template.yaml b/examples/fastapi-response-streaming-lmi/template.yaml
@@ -0,0 +1,60 @@
+AWSTemplateFormatVersion: '2010-09-09'
+Transform: AWS::Serverless-2016-10-31
+Description: FastAPI response streaming with Lambda Managed Instances
+
+Parameters:
+  SubnetIds:
+    Type: List<AWS::EC2::Subnet::Id>
+    Description: Subnet IDs for the Lambda Managed Instances capacity provider
+  SecurityGroupIds:
+    Type: List<AWS::EC2::SecurityGroup::Id>
+    Description: Security Group IDs for the Lambda Managed Instances capacity provider
+
+Globals:
+  Function:
+    Timeout: 120
+
+Resources:
+  LMICapacityProvider:
+    Type: AWS::Serverless::CapacityProvider
+    Properties:
+      CapacityProviderName: !Sub "${AWS::StackName}-capacity-provider"
+      VpcConfig:
+        SubnetIds: !Ref SubnetIds
+        SecurityGroupIds: !Ref SecurityGroupIds
+      ScalingConfig:
+        MaxVCpuCount: 20
+        AverageCPUUtilization: 70.0
+
+  FastAPIFunction:
+    Type: AWS::Serverless::Function
+    Properties:
+      CodeUri: app/
+      Handler: run.sh
+      Runtime: python3.13
+      MemorySize: 2048
+      AutoPublishAlias: live
+      Environment:
+        Variables:
+          AWS_LAMBDA_EXEC_WRAPPER: /opt/bootstrap
+          AWS_LWA_INVOKE_MODE: response_stream
+          PORT: 8000
+      Layers:
+        - !Sub arn:aws:lambda:${AWS::Region}:753240598075:layer:LambdaAdapterLayerX86:26
+      CapacityProviderConfig:
+        Arn: !GetAtt LMICapacityProvider.Arn
+        PerExecutionEnvironmentMaxConcurrency: 64
+      FunctionUrlConfig:
+        AuthType: NONE
+        InvokeMode: RESPONSE_STREAM
+
+Outputs:
+  FastAPIFunctionUrl:
+    Description: "Function URL for FastAPI function"
+    Value: !GetAtt FastAPIFunctionUrl.FunctionUrl
+  FastAPIFunction:
+    Description: "FastAPI Lambda Function ARN"
+    Value: !GetAtt FastAPIFunction.Arn
+  CapacityProviderArn:
+    Description: "Capacity Provider ARN"
+    Value: !GetAtt LMICapacityProvider.Arn
diff --git a/src/lib.rs b/src/lib.rs
@@ -317,16 +317,20 @@ impl Adapter<HttpConnector, Body> {
             env::set_var("AWS_LAMBDA_RUNTIME_API", runtime_proxy);
         }
 
-        if compression {
-            let svc = ServiceBuilder::new().layer(CompressionLayer::new()).service(self);
-            match invoke_mode {
-                LambdaInvokeMode::Buffered => lambda_http::run(svc).await,
-                LambdaInvokeMode::ResponseStream => lambda_http::run_with_streaming_response(svc).await,
+        match invoke_mode {
+            LambdaInvokeMode::Buffered if compression => {
+                let svc = ServiceBuilder::new().layer(CompressionLayer::new()).service(self);
+                lambda_http::run_concurrent(svc).await
             }
-        } else {
-            match invoke_mode {
-                LambdaInvokeMode::Buffered => lambda_http::run(self).await,
-                LambdaInvokeMode::ResponseStream => lambda_http::run_with_streaming_response(self).await,
+            LambdaInvokeMode::Buffered => lambda_http::run_concurrent(self).await,
+            LambdaInvokeMode::ResponseStream => {
+                if compression {
+                    tracing::warn!(
+                        "Compression is not supported with response streaming mode. \
+                        Compression will be disabled."
+                    );
+                }
+                lambda_http::run_with_streaming_response_concurrent(self).await
             }
         }
     }