Skip to content

Commit 61e1cf5

Browse files
committed
Initial support for AWS metrics
1 parent f7c55b3 commit 61e1cf5

File tree

5 files changed

+99
-18
lines changed

5 files changed

+99
-18
lines changed

README.md

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,37 @@
11
# AccountingDB: A high-performance financial ledger based on DynamoDB
22

3-
This project implements a financial tracking component that heavily leans on AWS
4-
components. At the core, durable stage storage is provided by DynamoDB. Other
5-
aspects of the solution are implemented by maximally relying on a serverless
3+
This project implements a financial tracking component that heavily leans on AWS components. At the core, durable stage
4+
storage is provided by DynamoDB. Other aspects of the solution are implemented by maximally relying on a serverless
65
approach in order to achieve extremely low human operational cost.
76

87
## Operations
98

10-
Transfers are transactional debits/credits between pairs of accounts. You
11-
specify a credit and debit account, and an amount, and the system will accept or
12-
reject the transfer depending on which logic rules are active.
9+
Transfers are transactional debits/credits between pairs of accounts. You specify a credit and debit account, and an
10+
amount, and the system will accept or reject the transfer depending on which logic rules are active.
1311

14-
## Deploying a test stack
12+
## Deploying the stack
13+
14+
The stack creates a single on-demand billing DynamoDB table and the benchmark runner Lambda Function. These have no idle
15+
cost if left running. Depending on how many other dashboards you have in your account, the benchmark dashboard may
16+
exceed your free tier allowance and attract a charge. Data stored in DynamoDB will be billed according to the
1517

1618
```shell
1719
npm run deploy
1820
eval $(./bin/get-benchmark-function-name.sh)
1921
```
22+
23+
### Read benchmark and sparse account balances
24+
25+
Note that for the read workload to provide representative data, you will want to ensure that account entries exist for
26+
all accounts in the range. We don't yet have a mechanism to fill these in. For accounts that do not exist, the cost of
27+
returning "not found" may be different from the cost of performing a balance read; hence be careful when setting up the
28+
read benchmark.
29+
30+
## Running load tests
31+
32+
The scripts `invoke-benchmark.sh` and `invoke-parallel.sh` can be used to start 1 or N parallel runs with the
33+
configuration defined in `benchmark-request.json`. When using parallel benchmarks, the aggregations returned by any of
34+
the runners only capture the data of that particular runner; instead look to the CloudWatch Dashboard for the aggregated
35+
statistics.
36+
37+
Note that with high-resolution metrics, you only have three hours to see the second-level resolution data.

bin/benchmark-local.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,16 @@ import { CreateTransfersLoadTest, ReadAccountBalancesLoadTest } from "../lib/loa
99
// Load test parameters
1010

1111
const testDurationSeconds = 10;
12-
const numAccounts = 1_000_000;
12+
const numAccounts = 1_000;
1313
const hotAccounts = 1_000;
1414

15-
const readRate = 0; // Set to 0 to disable
16-
const readConcurrency = 1;
17-
const readBatchSize = 1;
15+
const readRate = 500; // Set to 0 to disable
16+
const readConcurrency = 5;
17+
const readBatchSize = 5;
1818

19-
const writeRate = 10; // Set to 0 to disable
20-
const writeConcurrency = 1;
21-
const writeBatchSize = 1;
19+
const writeRate = 500; // Set to 0 to disable
20+
const writeConcurrency = 5;
21+
const writeBatchSize = 5;
2222
const writeAccountSelectionStrategy = AccountSelectionStrategy.RANDOM_PEER_TO_PEER;
2323

2424
const requestTimeoutMs = 100;

bin/benchmark-request.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"durationSeconds": 5,
2+
"durationSeconds": 60,
33
"numAccounts": 1000000,
44

55
"writeRate": 10,

lib/load-test-runner.ts

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,17 @@
11
import assert from "assert";
22
import { createHistogram, performance, RecordableHistogram } from "perf_hooks";
3+
import { Configuration, createMetricsLogger, MetricsLogger, StorageResolution, Unit } from "aws-embedded-metrics";
4+
5+
export interface TestMetadata {
6+
/**
7+
* The test name will be used as a metric dimension for reporting.
8+
*/
9+
name: string;
10+
}
311

412
export interface Test {
13+
metadata(): TestMetadata;
14+
515
setup(): Promise<void>;
616

717
teardown(): Promise<void>;
@@ -19,6 +29,12 @@ export interface Test {
1929
}
2030

2131
export abstract class AbstractBaseTest implements Test {
32+
metadata() {
33+
return {
34+
name: this.constructor.name,
35+
};
36+
}
37+
2238
async setup(): Promise<void> {
2339
}
2440

@@ -36,16 +52,25 @@ export abstract class AbstractBaseTest implements Test {
3652
}
3753
}
3854

55+
export const METRIC_NAMESPACE = "AccountingDB";
56+
Configuration.namespace = METRIC_NAMESPACE;
57+
58+
export type MetricNames = "Success" // overall success (1) / failure (0) of a single batch
59+
| "Latency" // elapsed time from batch arrival time to commit
60+
| "ServiceTime" // elapsed time from batch processing start time to commit
61+
| "BatchSize"; // number of successfully processed items in a single batch; not emitted for failed transactions
62+
63+
const METRICS_RESOLUTION = StorageResolution.High;
64+
3965
export class LoadTestDriver {
4066
private readonly concurrency: number;
4167
private readonly targetRps: number;
4268
private readonly workerCycleTimeMs: number;
4369
private readonly arrivalIntervalTimeMs: number;
4470
private readonly test: Test;
71+
private readonly name: string;
4572
private readonly overallDurationMs: number;
4673
private readonly warmupDurationMs: number;
47-
private readonly requestLatencyMicros: RecordableHistogram;
48-
private readonly serviceTimeMicros: RecordableHistogram;
4974
private readonly timeoutValueMs: number;
5075
private completedIterationsCount: number = 0;
5176
private scheduledIterationsCount: number = 0;
@@ -59,6 +84,10 @@ export class LoadTestDriver {
5984
private workerRunTime: number = 0;
6085
private workerBackoffTime: number = 0;
6186
private workerBehindScheduleTime: number = 0;
87+
// We track metrics internally, and optionally post them to CloudWatch. The latter is great for distributed use.
88+
private readonly requestLatencyMicros: RecordableHistogram;
89+
private readonly serviceTimeMicros: RecordableHistogram;
90+
private readonly metrics: MetricsLogger;
6291

6392
constructor(
6493
test: Test,
@@ -85,10 +114,12 @@ export class LoadTestDriver {
85114
this.test = test;
86115
this.requestLatencyMicros = createHistogram();
87116
this.serviceTimeMicros = createHistogram();
117+
this.metrics = createMetricsLogger();
118+
this.name = test.metadata().name;
88119
}
89120

90121
async run(): Promise<any> {
91-
if (this.targetRps == 0) {
122+
if (this.targetRps <= 0) {
92123
return;
93124
}
94125

@@ -105,11 +136,16 @@ export class LoadTestDriver {
105136

106137
while (nextRequestTime < endTime) {
107138
const cutoffTime = performance.now() - this.timeoutValueMs;
139+
// Prune expired-in-queue requests from the work queue and record timeouts:
108140
while (this.workQueue.length > 0 && this.workQueue[0] < cutoffTime) {
109141
assert(this.workQueue.shift() !== undefined);
142+
this.metrics.putDimensions({ Name: this.name });
143+
this.metrics.putMetric($m("Latency"), this.timeoutValueMs, Unit.Milliseconds, METRICS_RESOLUTION);
144+
this.metrics.putMetric($m("Success"), 0, Unit.None, METRICS_RESOLUTION);
110145
this.missedIterations += 1;
111146
this.recordDuration(this.requestLatencyMicros, this.timeoutValueMs * 1000);
112147
}
148+
await this.metrics.flush();
113149

114150
while (this.workQueue.length < this.concurrency * 2 && nextRequestTime < endTime) {
115151
this.workQueue.push(nextRequestTime);
@@ -129,14 +165,18 @@ export class LoadTestDriver {
129165
// Skip over any scheduled iterations that have already timed out in-queue
130166
const cutoffTime = workerLoopStart - this.timeoutValueMs;
131167
let arrivalTime = this.workQueue.shift();
168+
this.metrics.putDimensions({ Name: this.name });
132169
for (; arrivalTime !== undefined && arrivalTime < cutoffTime; arrivalTime = this.workQueue.shift()) {
133170
// Only record timeouts post-warmup
134171
if (arrivalTime > measurementStartTime) {
135172
this.missedIterations += 1;
136173
this.recordDuration(this.requestLatencyMicros, Math.round(this.timeoutValueMs * 1000));
174+
this.metrics.putMetric($m("Latency"), this.timeoutValueMs, Unit.Milliseconds, METRICS_RESOLUTION);
175+
this.metrics.putMetric($m("Success"), 0, Unit.None, METRICS_RESOLUTION);
137176
arrivalTime = this.workQueue.shift();
138177
}
139178
}
179+
await this.metrics.flush();
140180

141181
// No more work for this worker to do
142182
if (arrivalTime === undefined) {
@@ -181,6 +221,13 @@ export class LoadTestDriver {
181221
this.completedIterationsCount += 1;
182222
this.requestCount += this.requestsPerIteration;
183223
this.workerRunTime += serviceTimeMillis;
224+
225+
this.metrics.putDimensions({ Name: this.name });
226+
this.metrics.putMetric($m("Latency"), iterationDurationMillis, Unit.Milliseconds, METRICS_RESOLUTION);
227+
this.metrics.putMetric($m("ServiceTime"), serviceTimeMillis, Unit.Milliseconds, METRICS_RESOLUTION);
228+
this.metrics.putMetric($m("BatchSize"), this.requestsPerIteration, Unit.Count, METRICS_RESOLUTION);
229+
this.metrics.putMetric($m("Success"), 1, Unit.None, METRICS_RESOLUTION);
230+
await this.metrics.flush();
184231
}
185232
} while (true);
186233
};
@@ -264,3 +311,7 @@ export class LoadTestDriver {
264311
export async function sleep(ms: number) {
265312
await new Promise((resolve) => setTimeout(resolve, ms));
266313
}
314+
315+
function $m(metric: MetricNames) {
316+
return metric;
317+
}

lib/load-tests.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ export class CreateTransfersLoadTest extends AbstractBaseTest {
7676
};
7777
}
7878

79+
metadata(): { name: string } {
80+
return {
81+
name: "CreateTransfers",
82+
}
83+
}
84+
7985
async performIteration() {
8086
const transfers = generateTransfers(this.transferBatchSize, this.accountSelectionStrategy, {
8187
numAccounts: this.numAccounts,
@@ -188,6 +194,12 @@ export class ReadAccountBalancesLoadTest extends AbstractBaseTest {
188194
}
189195
}
190196

197+
metadata(): { name: string } {
198+
return {
199+
name: "ReadAccountBalances",
200+
}
201+
}
202+
191203
requestsPerIteration() {
192204
return this.batchSize;
193205
}

0 commit comments

Comments
 (0)