|
| 1 | +export function createCanaryAlarms( |
| 2 | + name: string, |
| 3 | + opts: { |
| 4 | + fn: sst.aws.Function; |
| 5 | + alertsTopic: sst.aws.SnsTopic; |
| 6 | + }, |
| 7 | +) { |
| 8 | + const { fn, alertsTopic } = opts; |
| 9 | + |
| 10 | + // These alarms track a specific Lambda version via the ExecutedVersion |
| 11 | + // dimension. When we deploy a new version, we replace the alarm so it starts |
| 12 | + // fresh — no leftover state from the previous version. The old alarm is |
| 13 | + // deleted after the new one is created so the deployment is not interrupted |
| 14 | + // by the removal of the alarm mid-deploy. |
| 15 | + const pulumiOpts: $util.CustomResourceOptions = { |
| 16 | + replaceOnChanges: ["dimensions"], |
| 17 | + deleteBeforeReplace: false, |
| 18 | + }; |
| 19 | + |
| 20 | + // Triggers if any errors occur in the deployed version within a 5-minute window. |
| 21 | + const errorAlarm = new aws.cloudwatch.MetricAlarm( |
| 22 | + `${name}ErrorAlarm`, |
| 23 | + { |
| 24 | + alarmActions: [alertsTopic.arn], |
| 25 | + namespace: "AWS/Lambda", |
| 26 | + metricName: "Errors", |
| 27 | + dimensions: { |
| 28 | + FunctionName: fn.name, |
| 29 | + Resource: getFunctionResource(fn.targetArn), |
| 30 | + ExecutedVersion: fn.nodes.function.version, |
| 31 | + }, |
| 32 | + statistic: "Sum", |
| 33 | + period: 300, |
| 34 | + evaluationPeriods: 1, |
| 35 | + threshold: 1, |
| 36 | + comparisonOperator: "GreaterThanOrEqualToThreshold", |
| 37 | + treatMissingData: "notBreaching", |
| 38 | + }, |
| 39 | + pulumiOpts, |
| 40 | + ); |
| 41 | + |
| 42 | + // Triggers if average latency exceeds 2 seconds in a 5-minute window. |
| 43 | + const latencyAlarm = new aws.cloudwatch.MetricAlarm( |
| 44 | + `${name}LatencyAlarm`, |
| 45 | + { |
| 46 | + alarmActions: [alertsTopic.arn], |
| 47 | + namespace: "AWS/Lambda", |
| 48 | + metricName: "Duration", |
| 49 | + dimensions: { |
| 50 | + FunctionName: fn.name, |
| 51 | + Resource: getFunctionResource(fn.targetArn), |
| 52 | + ExecutedVersion: fn.nodes.function.version, |
| 53 | + }, |
| 54 | + statistic: "Average", |
| 55 | + period: 300, |
| 56 | + evaluationPeriods: 1, |
| 57 | + threshold: 2000, |
| 58 | + comparisonOperator: "GreaterThanOrEqualToThreshold", |
| 59 | + treatMissingData: "notBreaching", |
| 60 | + }, |
| 61 | + pulumiOpts, |
| 62 | + ); |
| 63 | + |
| 64 | + return { errorAlarm, latencyAlarm }; |
| 65 | +} |
| 66 | + |
| 67 | +/** |
| 68 | + * Extracts the `FunctionName:Alias` resource identifier from a Lambda alias ARN. |
| 69 | + * CloudWatch metrics use this format for the `Resource` dimension when tracking |
| 70 | + * a specific alias. |
| 71 | + * |
| 72 | + * For example, given `arn:aws:lambda:us-east-1:123456789:function:my-fn:live`, |
| 73 | + * this returns `my-fn:live`. |
| 74 | + */ |
| 75 | +function getFunctionResource(targetArn: $util.Input<string>) { |
| 76 | + return aws |
| 77 | + .getArnOutput({ arn: targetArn }) |
| 78 | + .resource.apply((r) => r.split(":").slice(1).join(":")); |
| 79 | +} |
0 commit comments