Skip to content

Commit 5a00433

Browse files
committed
Add Lambda rollout with CodeDeploy traffic shifting
1 parent b062f07 commit 5a00433

63 files changed

Lines changed: 2843 additions & 416 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
export function createCanaryAlarms(
2+
name: string,
3+
opts: {
4+
fn: sst.aws.Function;
5+
alertsTopic: sst.aws.SnsTopic;
6+
},
7+
) {
8+
const { fn, alertsTopic } = opts;
9+
10+
// These alarms track a specific Lambda version via the ExecutedVersion
11+
// dimension. When we deploy a new version, we replace the alarm so it starts
12+
// fresh — no leftover state from the previous version. The old alarm is
13+
// deleted after the new one is created so the deployment is not interrupted
14+
// by the removal of the alarm mid-deploy.
15+
const pulumiOpts: $util.CustomResourceOptions = {
16+
replaceOnChanges: ["dimensions"],
17+
deleteBeforeReplace: false,
18+
};
19+
20+
// Triggers if any errors occur in the deployed version within a 5-minute window.
21+
const errorAlarm = new aws.cloudwatch.MetricAlarm(
22+
`${name}ErrorAlarm`,
23+
{
24+
alarmActions: [alertsTopic.arn],
25+
namespace: "AWS/Lambda",
26+
metricName: "Errors",
27+
dimensions: {
28+
FunctionName: fn.name,
29+
Resource: getFunctionResource(fn.targetArn),
30+
ExecutedVersion: fn.nodes.function.version,
31+
},
32+
statistic: "Sum",
33+
period: 300,
34+
evaluationPeriods: 1,
35+
threshold: 1,
36+
comparisonOperator: "GreaterThanOrEqualToThreshold",
37+
treatMissingData: "notBreaching",
38+
},
39+
pulumiOpts,
40+
);
41+
42+
// Triggers if average latency exceeds 2 seconds in a 5-minute window.
43+
const latencyAlarm = new aws.cloudwatch.MetricAlarm(
44+
`${name}LatencyAlarm`,
45+
{
46+
alarmActions: [alertsTopic.arn],
47+
namespace: "AWS/Lambda",
48+
metricName: "Duration",
49+
dimensions: {
50+
FunctionName: fn.name,
51+
Resource: getFunctionResource(fn.targetArn),
52+
ExecutedVersion: fn.nodes.function.version,
53+
},
54+
statistic: "Average",
55+
period: 300,
56+
evaluationPeriods: 1,
57+
threshold: 2000,
58+
comparisonOperator: "GreaterThanOrEqualToThreshold",
59+
treatMissingData: "notBreaching",
60+
},
61+
pulumiOpts,
62+
);
63+
64+
return { errorAlarm, latencyAlarm };
65+
}
66+
67+
/**
68+
* Extracts the `FunctionName:Alias` resource identifier from a Lambda alias ARN.
69+
* CloudWatch metrics use this format for the `Resource` dimension when tracking
70+
* a specific alias.
71+
*
72+
* For example, given `arn:aws:lambda:us-east-1:123456789:function:my-fn:live`,
73+
* this returns `my-fn:live`.
74+
*/
75+
function getFunctionResource(targetArn: $util.Input<string>) {
76+
return aws
77+
.getArnOutput({ arn: targetArn })
78+
.resource.apply((r) => r.split(":").slice(1).join(":"));
79+
}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
export function createTopics(opts?: { email?: string }) {
2+
const alertsTopic = new sst.aws.SnsTopic("Alerts");
3+
4+
allowCloudWatchPublish("Alerts", alertsTopic);
5+
6+
if (opts?.email) {
7+
subscribeEmail("Alerts", alertsTopic, opts.email);
8+
}
9+
10+
return { alertsTopic };
11+
}
12+
13+
function subscribeEmail(name: string, topic: sst.aws.SnsTopic, email: string) {
14+
new aws.sns.TopicSubscription(`${name}Email`, {
15+
topic: topic.arn,
16+
protocol: "email",
17+
endpoint: email,
18+
});
19+
}
20+
21+
function allowCloudWatchPublish(name: string, topic: sst.aws.SnsTopic) {
22+
new aws.sns.TopicPolicy(`${name}CloudWatchPolicy`, {
23+
arn: topic.arn,
24+
policy: aws.iam.getPolicyDocumentOutput({
25+
statements: [
26+
{
27+
actions: ["sns:Publish"],
28+
principals: [
29+
{ type: "Service", identifiers: ["cloudwatch.amazonaws.com"] },
30+
],
31+
resources: [topic.arn],
32+
},
33+
],
34+
}).json,
35+
});
36+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"name": "aws-lambda-rollout",
3+
"private": true,
4+
"type": "module"
5+
}
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
interface Result {
2+
status: number;
3+
latency: number;
4+
version: string;
5+
error: boolean;
6+
}
7+
8+
export async function loadTest(url: string, count = 50) {
9+
console.log(`URL: ${url}\n`);
10+
console.log(`Sending ${count} concurrent requests...\n`);
11+
12+
const results: Result[] = await Promise.all(
13+
Array.from({ length: count }, async () => {
14+
const start = Date.now();
15+
try {
16+
const res = await fetch(url);
17+
const body = (await res.json()) as { version?: string };
18+
return {
19+
status: res.status,
20+
latency: Date.now() - start,
21+
version: body.version ?? "unknown",
22+
error: false,
23+
};
24+
} catch {
25+
return {
26+
status: 0,
27+
latency: Date.now() - start,
28+
version: "unknown",
29+
error: true,
30+
};
31+
}
32+
}),
33+
);
34+
35+
const byVersion = new Map<string, Result[]>();
36+
for (const r of results) {
37+
if (!byVersion.has(r.version)) byVersion.set(r.version, []);
38+
byVersion.get(r.version)!.push(r);
39+
}
40+
41+
for (const [version, vResults] of byVersion) {
42+
const succeeded = vResults.filter(
43+
(r) => !r.error && r.status >= 200 && r.status < 300,
44+
);
45+
const failed = vResults.filter((r) => r.error || r.status >= 400);
46+
const avgLatency =
47+
vResults.reduce((sum, r) => sum + r.latency, 0) / vResults.length;
48+
49+
console.log(`Version ${version}:`);
50+
console.log(` Requests: ${vResults.length}`);
51+
console.log(` Succeeded: ${succeeded.length}`);
52+
console.log(` Failed: ${failed.length}`);
53+
console.log(` Avg latency: ${Math.round(avgLatency)}ms`);
54+
55+
if (failed.length > 0) {
56+
const statusCounts: Record<number, number> = {};
57+
for (const r of failed) {
58+
statusCounts[r.status] = (statusCounts[r.status] || 0) + 1;
59+
}
60+
console.log(` Status codes:`, statusCounts);
61+
}
62+
console.log();
63+
}
64+
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
export async function handler() {
2+
return {
3+
statusCode: 200,
4+
body: JSON.stringify({
5+
message: "Hello from the API",
6+
version: process.env.AWS_LAMBDA_FUNCTION_VERSION ?? "unknown",
7+
}),
8+
};
9+
}
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/// <reference path="./.sst/platform/config.d.ts" />
2+
3+
/**
4+
* ## AWS Lambda Rollout
5+
*
6+
* Deploys a Lambda function with a canary rollout. Each deploy publishes a new version
7+
* and uses CodeDeploy to gradually shift traffic — 10% for 10 minutes, then 100%.
8+
*
9+
* CloudWatch alarms monitor the error rate and latency during the rollout. If either
10+
* alarm fires, CodeDeploy automatically rolls back to the previous version.
11+
*
12+
* An SNS topic sends notifications on failures, rollbacks, and stops.
13+
*/
14+
export default $config({
15+
app(input) {
16+
return {
17+
name: "aws-lambda-rollout",
18+
home: "aws",
19+
removal: input?.stage === "production" ? "retain" : "remove",
20+
};
21+
},
22+
async run() {
23+
const { createTopics } = await import("./infra/topics");
24+
const { createCanaryAlarms } = await import("./infra/alarms");
25+
26+
const { alertsTopic } = createTopics({
27+
// email: EMAIL,
28+
});
29+
30+
const fn = new sst.aws.Function("Function", {
31+
handler: "src/api.handler",
32+
rollout: { latestUrl: true },
33+
url: true,
34+
// Rollout only runs when function code changes. Set to false to deploy
35+
// actual code since sst dev deploys a stub that never changes.
36+
dev: false,
37+
});
38+
39+
const { errorAlarm: canaryErrorAlarm, latencyAlarm: canaryLatencyAlarm } =
40+
createCanaryAlarms("Function", {
41+
fn,
42+
alertsTopic,
43+
});
44+
45+
fn.addRollout({
46+
type: "canary",
47+
percentage: 10,
48+
duration: "10 minutes",
49+
wait: true,
50+
alarms: [canaryErrorAlarm.name, canaryLatencyAlarm.name],
51+
notifications: [
52+
{
53+
name: "Alerts",
54+
events: ["failure", "rollback", "stop"],
55+
topic: alertsTopic.arn,
56+
},
57+
],
58+
});
59+
60+
$util
61+
.all([
62+
fn.url,
63+
fn.nodes.function.version,
64+
fn.nodes.rolloutDeployment?.apply(
65+
(deployment) => deployment?.deploymentId,
66+
),
67+
])
68+
.apply(async ([url, version, deploymentId]) => {
69+
// wait for CodeDeploy to update the lambda alias
70+
await new Promise((r) => setTimeout(r, 10_000));
71+
72+
console.log(
73+
`\nDeployed version ${version} (deployment: ${deploymentId})\n`,
74+
);
75+
const { loadTest } = await import("./scripts/test");
76+
await loadTest(url);
77+
});
78+
79+
return {
80+
url: fn.url,
81+
latestUrl: fn.latestUrl,
82+
};
83+
},
84+
});
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"compilerOptions": {
3+
"target": "ESNext",
4+
"module": "ESNext",
5+
"moduleResolution": "bundler",
6+
"strict": true,
7+
"esModuleInterop": true,
8+
"skipLibCheck": true,
9+
"forceConsistentCasingInFileNames": true
10+
}
11+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"name": "aws-lambda-smoke-test-function-url",
3+
"private": true,
4+
"dependencies": {
5+
"sst": "^4"
6+
}
7+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
export async function handler(event: any) {
2+
if (event.type === "health-check") {
3+
return {
4+
statusCode: 200,
5+
body: JSON.stringify({ status: "healthy" }),
6+
};
7+
}
8+
9+
return {
10+
statusCode: 200,
11+
body: JSON.stringify({
12+
message: "Hello from the API",
13+
version: process.env.AWS_LAMBDA_FUNCTION_VERSION,
14+
}),
15+
headers: {
16+
"content-type": "application/json",
17+
},
18+
};
19+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import { Resource } from "sst";
2+
import { rollout } from "sst/aws/rollout";
3+
4+
export const handler = rollout.handler(async (event) => {
5+
const status = await validate();
6+
await rollout.report(event, status);
7+
});
8+
9+
async function validate(): Promise<"Succeeded" | "Failed"> {
10+
try {
11+
const resp = await fetch(Resource.Function.latestUrl);
12+
const payload = await resp.text();
13+
14+
if (resp.ok) {
15+
console.log("Health check passed:", payload);
16+
return "Succeeded";
17+
}
18+
console.log("Health check failed:", resp.status, payload);
19+
return "Failed";
20+
} catch (err) {
21+
console.error("Validation failed:", err);
22+
return "Failed";
23+
}
24+
}

0 commit comments

Comments
 (0)