Skip to content

Commit f9cb7f3

Browse files
committed
Add example of running an emr serverless job from cdk
1 parent ab70dc0 commit f9cb7f3

12 files changed

+230
-0
lines changed

cdk/emr-serverless-job-run/.gitignore

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
*.swp
2+
package-lock.json
3+
__pycache__
4+
.pytest_cache
5+
.venv
6+
*.egg-info
7+
8+
# CDK asset staging directory
9+
.cdk.staging
10+
cdk.out

cdk/emr-serverless-job-run/README.md

+58
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
2+
# Welcome to your CDK Python project!
3+
4+
This is a blank project for CDK development with Python.
5+
6+
The `cdk.json` file tells the CDK Toolkit how to execute your app.
7+
8+
This project is set up like a standard Python project. The initialization
9+
process also creates a virtualenv within this project, stored under the `.venv`
10+
directory. To create the virtualenv it assumes that there is a `python3`
11+
(or `python` for Windows) executable in your path with access to the `venv`
12+
package. If for any reason the automatic creation of the virtualenv fails,
13+
you can create the virtualenv manually.
14+
15+
To manually create a virtualenv on MacOS and Linux:
16+
17+
```
18+
$ python3 -m venv .venv
19+
```
20+
21+
After the init process completes and the virtualenv is created, you can use the following
22+
step to activate your virtualenv.
23+
24+
```
25+
$ source .venv/bin/activate
26+
```
27+
28+
If you are a Windows platform, you would activate the virtualenv like this:
29+
30+
```
31+
% .venv\Scripts\activate.bat
32+
```
33+
34+
Once the virtualenv is activated, you can install the required dependencies.
35+
36+
```
37+
$ pip install -r requirements.txt
38+
```
39+
40+
At this point you can now synthesize the CloudFormation template for this code.
41+
42+
```
43+
$ cdk synth
44+
```
45+
46+
To add additional dependencies, for example other CDK libraries, just add
47+
them to your `setup.py` file and rerun the `pip install -r requirements.txt`
48+
command.
49+
50+
## Useful commands
51+
52+
* `cdk ls` list all stacks in the app
53+
* `cdk synth` emits the synthesized CloudFormation template
54+
* `cdk deploy` deploy this stack to your default AWS account/region
55+
* `cdk diff` compare deployed stack with current state
56+
* `cdk docs` open CDK documentation
57+
58+
Enjoy!

cdk/emr-serverless-job-run/app.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python3
2+
import os
3+
4+
import aws_cdk as cdk
5+
6+
from emr_serverless_job_run.emr_serverless_job_run_stack import EmrServerlessJobRunStack
7+
8+
9+
app = cdk.App()
10+
EmrServerlessJobRunStack(app, "EmrServerlessJobRunStack",
11+
# If you don't specify 'env', this stack will be environment-agnostic.
12+
# Account/Region-dependent features and context lookups will not work,
13+
# but a single synthesized template can be deployed anywhere.
14+
15+
# Uncomment the next line to specialize this stack for the AWS Account
16+
# and Region that are implied by the current CLI configuration.
17+
18+
#env=cdk.Environment(account=os.getenv('CDK_DEFAULT_ACCOUNT'), region=os.getenv('CDK_DEFAULT_REGION')),
19+
20+
# Uncomment the next line if you know exactly what Account and Region you
21+
# want to deploy the stack to. */
22+
23+
#env=cdk.Environment(account='123456789012', region='us-east-1'),
24+
25+
# For more information, see https://docs.aws.amazon.com/cdk/latest/guide/environments.html
26+
)
27+
28+
app.synth()

cdk/emr-serverless-job-run/cdk.json

+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"app": "python3 app.py",
3+
"watch": {
4+
"include": [
5+
"**"
6+
],
7+
"exclude": [
8+
"README.md",
9+
"cdk*.json",
10+
"requirements*.txt",
11+
"source.bat",
12+
"**/__init__.py",
13+
"python/__pycache__",
14+
"tests"
15+
]
16+
},
17+
"context": {
18+
"@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true,
19+
"@aws-cdk/core:stackRelativeExports": true,
20+
"@aws-cdk/aws-rds:lowercaseDbIdentifier": true,
21+
"@aws-cdk/aws-lambda:recognizeVersionProps": true,
22+
"@aws-cdk/aws-lambda:recognizeLayerVersion": true,
23+
"@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true,
24+
"@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true,
25+
"@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true,
26+
"@aws-cdk/core:checkSecretUsage": true,
27+
"@aws-cdk/aws-iam:minimizePolicies": true,
28+
"@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true,
29+
"@aws-cdk/core:validateSnapshotRemovalPolicy": true,
30+
"@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true,
31+
"@aws-cdk/aws-s3:createDefaultLoggingPolicy": true,
32+
"@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true,
33+
"@aws-cdk/aws-apigateway:disableCloudWatchRole": true,
34+
"@aws-cdk/core:enablePartitionLiterals": true,
35+
"@aws-cdk/core:target-partitions": [
36+
"aws",
37+
"aws-cn"
38+
]
39+
}
40+
}

cdk/emr-serverless-job-run/emr_serverless_job_run/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from aws_cdk import Stack
2+
from aws_cdk import aws_emrserverless as emrs
3+
from aws_cdk import aws_iam as iam # Duration,
4+
from aws_cdk import custom_resources as custom
5+
from constructs import Construct
6+
7+
8+
class EmrServerlessJobRunStack(Stack):
9+
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
10+
super().__init__(scope, construct_id, **kwargs)
11+
12+
# Create a serverless Spark app
13+
serverless_app = emrs.CfnApplication(
14+
self,
15+
"spark_app",
16+
release_label="emr-6.9.0",
17+
type="SPARK",
18+
name="cdk-spark",
19+
)
20+
21+
# We need an execution role to run the job, this one has no access to anything
22+
# But will be granted PassRole access by the Lambda that's starting the job.
23+
role = iam.Role(
24+
scope=self,
25+
id="spark_job_execution_role",
26+
assumed_by=iam.ServicePrincipal("emr-serverless.amazonaws.com"),
27+
)
28+
29+
# Create a custom resource that starts a job run
30+
myjobrun = custom.AwsCustomResource(
31+
self,
32+
"serverless-job-run",
33+
on_create={
34+
"service": "EMRServerless",
35+
"action": "startJobRun",
36+
"parameters": {
37+
"applicationId": serverless_app.attr_application_id,
38+
"executionRoleArn": role.role_arn,
39+
"name": "cdkJob",
40+
"jobDriver": {"sparkSubmit": {"entryPoint": "local:///usr/lib/spark/examples/src/main/python/pi.py"}},
41+
},
42+
"physical_resource_id": custom.PhysicalResourceId.from_response(
43+
"jobRunId"
44+
),
45+
},
46+
policy=custom.AwsCustomResourcePolicy.from_sdk_calls(
47+
resources=custom.AwsCustomResourcePolicy.ANY_RESOURCE
48+
),
49+
)
50+
51+
# Ensure the Lambda can call startJobRun with the earlier-created role
52+
myjobrun.grant_principal.add_to_policy(
53+
iam.PolicyStatement(
54+
effect=iam.Effect.ALLOW,
55+
resources=[role.role_arn],
56+
actions=["iam:PassRole"],
57+
conditions={
58+
"StringLike": {
59+
"iam:PassedToService": "emr-serverless.amazonaws.com"
60+
}
61+
},
62+
)
63+
)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
pytest==6.2.5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
aws-cdk-lib==2.43.1
2+
constructs>=10.0.0,<11.0.0

cdk/emr-serverless-job-run/source.bat

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
@echo off
2+
3+
rem The sole purpose of this script is to make the command
4+
rem
5+
rem source .venv/bin/activate
6+
rem
7+
rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows.
8+
rem On Windows, this command just runs this batch file (the argument is ignored).
9+
rem
10+
rem Now we don't need to document a Windows command for activating a virtualenv.
11+
12+
echo Executing .venv\Scripts\activate.bat for you
13+
.venv\Scripts\activate.bat

cdk/emr-serverless-job-run/tests/__init__.py

Whitespace-only changes.

cdk/emr-serverless-job-run/tests/unit/__init__.py

Whitespace-only changes.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import aws_cdk as core
2+
import aws_cdk.assertions as assertions
3+
4+
from emr_serverless_job_run.emr_serverless_job_run_stack import EmrServerlessJobRunStack
5+
6+
# example tests. To run these tests, uncomment this file along with the example
7+
# resource in emr_serverless_job_run/emr_serverless_job_run_stack.py
8+
def test_sqs_queue_created():
9+
app = core.App()
10+
stack = EmrServerlessJobRunStack(app, "emr-serverless-job-run")
11+
template = assertions.Template.from_stack(stack)
12+
13+
# template.has_resource_properties("AWS::SQS::Queue", {
14+
# "VisibilityTimeout": 300
15+
# })

0 commit comments

Comments
 (0)