Skip to content

Commit 44d0bf3

Browse files
Merge pull request #26 from aws-samples/feat/document-explorer-release
* feat: v1.0.0 and amd64 as the default * fix: bandit, semgrep, checkov from probe --------- Signed-off-by: Scott Schreckengaust <[email protected]>
2 parents 78cbee6 + 92b0193 commit 44d0bf3

File tree

9 files changed

+57
-34
lines changed

9 files changed

+57
-34
lines changed

CHANGELOG.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,16 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
66
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

8-
## [Unreleased]
8+
## [Unreleased]
9+
10+
### Added
11+
12+
- Bedrock Guardrails sample
13+
14+
<!-- markdownlint-disable MD024 -->
15+
## [1.0.0]
16+
17+
### Added
18+
19+
- Document Explorer sample
20+
- Bedrock Agent sample

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ This repository showcases Terraform examples to review and test AWS generative A
66

77
## Getting started
88

9-
Explore each self-contained example in the samples directory to get started!
9+
Explore each self-contained example in the samples directory to get started!
1010

1111
## Structure
1212

samples/bedrock-agent/lambda/action-group.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ info:
33
title: Literary API
44
description: Actions that Bedrock Agents can take to retrieve book details.
55
version: 1.0.0
6+
security: #checkov:skip=CKV_OPENAPI_4:Sample has no global security field rules
67
paths:
78
/top_books:
89
get:
910
summary: Get metadata about the most popular books
1011
description: Get metadata about the most popular books in the library.
1112
operationId: getTopBooks
13+
security: #checkov:skip=CKV_OPENAPI_5:Demonstration security operations empty
1214
responses:
1315
'200':
1416
description: Successful operation
@@ -22,6 +24,7 @@ paths:
2224
description: The number of books in the library.
2325
books:
2426
type: array
27+
maxItems: 100
2528
items:
2629
$ref: '#/components/schemas/Book'
2730
'400':
@@ -53,26 +56,31 @@ components:
5356
description: The title of the book.
5457
subjects:
5558
type: array
59+
maxItems: 1000
5660
description: The subjects of the book.
5761
items:
5862
type: string
5963
authors:
6064
type: array
65+
maxItems: 100
6166
description: The author of the book.
6267
items:
6368
$ref: '#/components/schemas/Person'
6469
translators:
6570
type: array
71+
maxItems: 100
6672
description: The translator of the book.
6773
items:
6874
$ref: '#/components/schemas/Person'
6975
bookshelves:
7076
type: array
77+
maxItems: 1000
7178
description: The bookshelves the book is in.
7279
items:
7380
type: string
7481
languages:
7582
type: array
83+
maxItems: 1000
7684
description: The languages the book is in.
7785
items:
7886
type: string
@@ -84,6 +92,7 @@ components:
8492
description: The media type of the book.
8593
formats:
8694
type: array
95+
maxItems: 100
8796
description: The download formats of the book.
8897
items:
8998
$ref: '#/components/schemas/Format'

samples/bedrock-agent/lambda/action-group/gutendex.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
def get_books_from_gutendex(n: int) -> dict:
1717
"""Return the count and first n books from the /books API."""
1818
api_url = "https://gutendex.com"
19-
response = requests.get(api_url + "/books")
19+
response = requests.get(api_url + "/books", timeout=14) # keep a little less than the Lambda timeout
20+
response.raise_for_status()
2021
books = response.json()
2122
return {"count": books["count"], "books": books["results"][:n]}

samples/bedrock-agent/scripts/load-kb.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,29 +13,29 @@ if [ $# -ne 3 ]; then
1313
exit 1
1414
fi
1515

16-
S3_URI=$1
17-
KB_ID=$2
18-
DS_ID=$3
16+
S3_URI="$1"
17+
KB_ID="$2"
18+
DS_ID="$3"
1919

2020
BOOKS_LIST=("https://www.gutenberg.org/ebooks/84.txt.utf-8"
2121
"https://www.gutenberg.org/ebooks/1342.txt.utf-8"
2222
"https://www.gutenberg.org/ebooks/2701.txt.utf-8"
2323
"https://www.gutenberg.org/ebooks/1513.txt.utf-8")
2424

2525
# make a temporary directory to download the books
26-
BOOKS_DIR=`mktemp -d -t books.$$`
26+
BOOKS_DIR=$(mktemp -d -t "books.$$")
2727

28-
pushd $BOOKS_DIR
28+
pushd "$BOOKS_DIR"
2929

3030
# download the books
3131
for book in "${BOOKS_LIST[@]}"
3232
do
33-
curl -L -o "$(basename $book).txt" $book
33+
curl -L -o "$(basename $book).txt" "$book"
3434
done
3535

36-
aws s3 sync . $S3_URI
36+
aws s3 sync . "$S3_URI"
3737
popd
38-
rm -rf $BOOKS_DIR
38+
rm -rf "$BOOKS_DIR"
3939

4040
# sync kb
41-
aws bedrock-agent start-ingestion-job --knowledge-base-id $KB_ID --data-source-id $DS_ID
41+
aws bedrock-agent start-ingestion-job --knowledge-base-id "$KB_ID" --data-source-id "$DS_ID"

samples/document-explorer/client_app/Dockerfile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
#syntax=docker/dockerfile:1.4
2-
#checkov:skip=CKV_DOCKER_3: No need for a user
3-
FROM public.ecr.aws/docker/library/python:3.13.0-slim
2+
FROM public.ecr.aws/docker/library/python@sha256:4efa69bf17cfbd83a9942e60e2642335c3b397448e00410063a0421f9727c4c4
43

5-
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get -y install procps
4+
RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get -y install procps && apt-get clean && rm -rf /var/lib/apt/lists/*
65

76
WORKDIR /app
87
COPY requirements.txt ./requirements.txt
9-
RUN pip3 install -r requirements.txt
8+
RUN pip3 install --no-cache-dir -r requirements.txt
109

1110
# Set most of the environment variables from Terraform backend deployment outputs
1211
ENV COGNITO_DOMAIN='<COGNITO_DOMAIN>'
@@ -30,4 +29,5 @@ RUN chmod a+x /app/healthcheck.sh
3029

3130
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 CMD [ "/app/healthcheck.sh" ]
3231

32+
USER nobody
3333
ENTRYPOINT ["streamlit", "run", "Home.py", "--server.port=8501"]

samples/document-explorer/client_app/common/cognito_helper.py

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from cryptography.hazmat.primitives.asymmetric import rsa
2525
from cryptography.hazmat.backends import default_backend
2626

27+
REQUESTS_TIMEOUT=30
2728
class CognitoHelper:
2829
"""Handles user authentication with AWS Cognito."""
2930

@@ -70,7 +71,9 @@ def jwt_to_pem(self, n, e):
7071
# https://docs.aws.amazon.com/cognito/latest/developerguide/amazon-cognito-user-pools-using-tokens-verifying-a-jwt.html
7172
def get_cognito_jwk(self, kid):
7273
url = f"https://cognito-idp.{self.region}.amazonaws.com/{self.user_pool_id}/.well-known/jwks.json"
73-
jwks = requests.get(url).json()
74+
response = requests.get(url, timeout=REQUESTS_TIMEOUT)
75+
response.raise_for_status()
76+
jwks = response.json()
7477
# Extract the specific key from jwks for verification
7578
for jwk in jwks["keys"]:
7679
if jwk["kid"] == kid:
@@ -82,7 +85,7 @@ def decode_id_token(self, id_token = None):
8285
if id_token is None:
8386
id_token = st.session_state.get("id_token", "")
8487

85-
if id_token != "":
88+
if id_token != "": # nosec B105
8689
jwt_headers = jwt.get_unverified_header(id_token)
8790
jwk = self.get_cognito_jwk(jwt_headers["kid"])
8891
public_key = self.jwt_to_pem(jwk["n"], jwk["e"])
@@ -93,15 +96,15 @@ def decode_id_token(self, id_token = None):
9396
def get_user_tokens(self, auth_code = None):
9497
"""Gets user access and ID tokens using auth code."""
9598

96-
access_token = ""
97-
id_token = ""
99+
access_token = "" # nosec B105
100+
id_token = "" # nosec B105
98101

99102
# if auth_code is not provided, try to get credentianls from the session state.
100103
if not auth_code:
101104
access_token = st.session_state.get("access_token", "")
102105
id_token = st.session_state.get("id_token", "")
103106

104-
if access_token != "" and id_token != "":
107+
if access_token != "" and id_token != "": # nosec B105
105108
return access_token, id_token
106109

107110
try:
@@ -118,13 +121,14 @@ def get_user_tokens(self, auth_code = None):
118121
"redirect_uri": self.app_uri,
119122
}
120123

121-
token_response = requests.post(self.token_url, headers=headers, data=body)
124+
token_response = requests.post(self.token_url, headers=headers, data=body, timeout=REQUESTS_TIMEOUT)
125+
token_response.raise_for_status()
122126
access_token = token_response.json()["access_token"]
123127
id_token = token_response.json()["id_token"]
124128

125129
except (KeyError, TypeError):
126-
access_token = ""
127-
id_token = ""
130+
access_token = "" # nosec B105
131+
id_token = "" # nosec B105
128132

129133
return access_token, id_token
130134

@@ -175,11 +179,11 @@ def set_session_state(self):
175179
auth_code = auth_query_params["code"]
176180
access_token, id_token = self.get_user_tokens(auth_code)
177181

178-
if access_token != "":
182+
if access_token != "": # nosec B105
179183
st.session_state["auth_code"] = auth_code
180184
st.session_state["access_token"] = access_token
181185

182-
if id_token != "":
186+
if id_token != "": # nosec B105
183187
st.session_state["id_token"] = id_token
184188
credentials = self.get_user_temporary_credentials(id_token)
185189
st.session_state["access_key_id"] = credentials["AccessKeyId"]
@@ -198,7 +202,7 @@ def is_authenticated(self):
198202
session_token = st.session_state.get("session_token", "")
199203
expiration = st.session_state.get("expiration")
200204

201-
is_valid_session = (access_key_id != "" and secret_access_key != "" and session_token != "")
205+
is_valid_session = (access_key_id != "" and secret_access_key != "" and session_token != "") # nosec B105
202206
# +5 seconds to consider a expiry buffer. If the session is about to expire, we need to renew it.
203207
has_not_expired = (expiration.timestamp() > (time.time() + 5)) if expiration else True
204208

samples/document-explorer/client_app/graphql/graphql_mutation_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ def execute(self, query, operation_name, variables=None):
5454
}
5555
print(f' query :: {query}')
5656
try:
57-
response = requests.post(self.graphql_endpoint, json=data, headers=self.headers)
57+
response = requests.post(self.graphql_endpoint, json=data, headers=self.headers, timeout=30)
5858
response.raise_for_status()
5959

6060
except requests.exceptions.RequestException as error:
Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,8 @@
11
module "genai_doc_ingestion" {
22
#checkov:skip=CKV_TF_1:Terraform registry has no ability to use a commit hash
3-
## TODO: Update to a Terraform registry version once PR is complete
4-
#source = "aws-ia/genai-document-ingestion-rag/aws"
5-
#version = "0.0.4"
6-
#source = "github.com/aws-ia/terraform-aws-genai-document-ingestion-rag//?ref=fix%2Fdocker_build"
7-
source = "github.com/aws-ia/terraform-aws-genai-document-ingestion-rag//?ref=11eb2bac799fd495e46d17f11156eefe5e6d9d71"
3+
source = "aws-ia/genai-document-ingestion-rag/aws"
4+
version = "1.0.0"
85
solution_prefix = "doc-explorer"
9-
container_platform = "linux/arm64"
6+
container_platform = "linux/amd64"
107
force_destroy = true
118
}

0 commit comments

Comments
 (0)