From a32c60fc499f26b460dda75eb021275c3d688d6d Mon Sep 17 00:00:00 2001 From: danellecline Date: Tue, 14 Nov 2023 14:24:21 -0800 Subject: [PATCH] perf: remove scale down on tasks, bump up warmup time to 5 minutes to allow for longer model spin-up; bump to 180 minutes for complex videos --- deepsea_ai/cdk/app/lib/ecs_task_autoscaling.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/deepsea_ai/cdk/app/lib/ecs_task_autoscaling.ts b/deepsea_ai/cdk/app/lib/ecs_task_autoscaling.ts index ff08413..884e4dd 100644 --- a/deepsea_ai/cdk/app/lib/ecs_task_autoscaling.ts +++ b/deepsea_ai/cdk/app/lib/ecs_task_autoscaling.ts @@ -178,7 +178,7 @@ export class AutoScalingTaskStack extends cdk.Stack { }) const scaleInQueueMetric = videoSqsQueue.metricApproximateNumberOfMessagesVisible({ - period: cdk.Duration.minutes(120), + period: cdk.Duration.minutes(180), statistic: "Average" }) @@ -188,6 +188,8 @@ export class AutoScalingTaskStack extends cdk.Stack { daemon: false, desiredCount: 0, taskDefinition: taskDefinition, + minHealthyPercent: 0, + maxHealthyPercent: 100, placementStrategies: [ecs.PlacementStrategy.spreadAcross('instanceId')] //to deploy only once task per instance }) @@ -196,11 +198,11 @@ export class AutoScalingTaskStack extends cdk.Stack { trackSqsQueue.grantConsumeMessages(service.taskDefinition.taskRole) // Task scaling steps - const serviceOutScaling = service.autoScaleTaskCount({minCapacity: 0, maxCapacity: config.FleetSize}); - serviceOutScaling.scaleOnMetric(`scaling-${config}`, { + const taskOutScaling = service.autoScaleTaskCount({minCapacity: 0, maxCapacity: config.FleetSize}); + taskOutScaling.scaleOnMetric(`scaling-${config}`, { metric: scaleOutQueueMetric, scalingSteps: [ - { upper: 0, change: -1 }, + { upper: 0, change: 0 }, { lower: 1, change: 1 } ], cooldown: cdk.Duration.minutes(5), @@ -216,7 +218,7 @@ export class AutoScalingTaskStack extends cdk.Stack { const scalingOutAction = new autoscaling.StepScalingAction(this, 'scale-out-action', { autoScalingGroup: asg, - estimatedInstanceWarmup: cdk.Duration.minutes(2), + estimatedInstanceWarmup: cdk.Duration.minutes(5), adjustmentType: autoscaling.AdjustmentType.CHANGE_IN_CAPACITY}) // The threshold is set to 1 so the lower bound must be equal to 0