Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add utilities for ecs. #73

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions awscommons/v2/auth.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/config"
"github.com/gruntwork-io/go-commons/errors"
"github.com/sirupsen/logrus"
)

const (
Expand All @@ -17,6 +18,8 @@ type Options struct {
Region string

Context context.Context

Logger *logrus.Entry
}

// NewOptions will create a new aws.Options struct that provides reasonable defaults for unspecified values.
Expand Down
224 changes: 224 additions & 0 deletions awscommons/v2/ecs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
package awscommons

import (
"fmt"
"math"
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/ecs"
ecs_types "github.com/aws/aws-sdk-go-v2/service/ecs/types"
"github.com/gruntwork-io/go-commons/collections"
"github.com/gruntwork-io/go-commons/errors"
"github.com/gruntwork-io/go-commons/retry"
)

// maxLoopsForNextToken is set to avoid infinite loops when checking for Container Instances.
const maxLoopsForNextToken = 100

// GetContainerInstanceArns gets the container instance ARNs of all the EC2 instances in an ECS Cluster.
// ECS container instance ARNs are different from EC2 instance IDs!
// An ECS container instance is an EC2 instance that runs the ECS container agent and has been registered into
// an ECS cluster.
// Example identifiers:
// - EC2 instance ID: i-08e8cfc073db135a9
// - container instance ID: 2db66342-5f69-4782-89a3-f9b707f979ab
// - container instance ARN: arn:aws:ecs:us-east-1:012345678910:container-instance/2db66342-5f69-4782-89a3-f9b707f979ab
func GetContainerInstanceArns(opts *Options, clusterName string) ([]string, error) {
client, err := NewECSClient(opts)
if err != nil {
return nil, err
}

if opts.Logger != nil {
opts.Logger.Debugf("Looking up Container Instance ARNs for ECS cluster %s", clusterName)
}

input := &ecs.ListContainerInstancesInput{Cluster: aws.String(clusterName)}
arns := []string{}
// Handle pagination by repeatedly making the API call while there is a next token set.
// TODO: Consider adding a maximum to this for loop in case we always get a NextToken.
// This could be timeout based or it could be based on the number of loops.
for i := 0; i < maxLoopsForNextToken; i++ {
result, err := client.ListContainerInstances(opts.Context, input)
if err != nil {
return nil, errors.WithStackTrace(err)
}
arns = append(arns, result.ContainerInstanceArns...)
if result.NextToken == nil {
break
}
input.NextToken = result.NextToken
}

return arns, nil
}

// StartDrainingContainerInstances puts ECS container instances in DRAINING state so that all ECS Tasks running on
// them are migrated to other container instances. Batches into chunks of 10 because of AWS API limitations.
// (An error occurred InvalidParameterException when calling the UpdateContainerInstancesState
// operation: instanceIds can have at most 10 items.)
func StartDrainingContainerInstances(opts *Options, clusterName string, containerInstanceArns []string) error {
client, err := NewECSClient(opts)
if err != nil {
return err
}

batchSize := 10
numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize)))

errList := NewMultipleDrainContainerInstanceErrors()
for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) {
batchedArns := aws.StringSlice(batchedArnList)

if opts.Logger != nil {
opts.Logger.Debugf("Putting batch %d/%d of container instances in cluster %s into DRAINING state", batchIdx, numBatches, clusterName)
}
input := &ecs.UpdateContainerInstancesStateInput{
Cluster: aws.String(clusterName),
ContainerInstances: aws.ToStringSlice(batchedArns),
Status: "DRAINING",
}
_, err := client.UpdateContainerInstancesState(opts.Context, input)
if err != nil {
errList.AddError(err)
if opts.Logger != nil {
opts.Logger.Errorf("Encountered error starting to drain container instances in batch %d: %s", batchIdx, err)
opts.Logger.Errorf("Container Instance ARNs: %s", strings.Join(batchedArnList, ","))
}
continue
}

if opts.Logger != nil {
opts.Logger.Debugf("Started draining %d container instances from batch %d", len(batchedArnList), batchIdx)
}
}

if !errList.IsEmpty() {
return errors.WithStackTrace(errList)
}

if opts.Logger != nil {
opts.Logger.Debugf("Successfully started draining all %d container instances", len(containerInstanceArns))
}
return nil
}

// WaitForContainerInstancesToDrain waits until there are no more ECS Tasks running on any of the ECS container
// instances. Batches container instances in groups of 100 because of AWS API limitations.
func WaitForContainerInstancesToDrain(opts *Options, clusterName string, containerInstanceArns []string, start time.Time, timeout time.Duration, maxRetries int, sleepBetweenRetries time.Duration) error {
client, err := NewECSClient(opts)
if err != nil {
return err
}

if opts.Logger != nil {
opts.Logger.Debugf("Checking if all ECS Tasks have been drained from the ECS Container Instances in Cluster %s.", clusterName)
}

batchSize := 100
numBatches := int(math.Ceil(float64(len(containerInstanceArns) / batchSize)))

err = retry.DoWithRetry(
opts.Logger.Logger,
"Wait for Container Instances to be Drained",
maxRetries, sleepBetweenRetries,
func() error {
responses := []*ecs.DescribeContainerInstancesOutput{}
for batchIdx, batchedArnList := range collections.BatchListIntoGroupsOf(containerInstanceArns, batchSize) {
batchedArns := aws.StringSlice(batchedArnList)

if opts.Logger != nil {
opts.Logger.Debugf("Fetching description of batch %d/%d of ECS Instances in Cluster %s.", batchIdx, numBatches, clusterName)
}
input := &ecs.DescribeContainerInstancesInput{
Cluster: aws.String(clusterName),
ContainerInstances: aws.ToStringSlice(batchedArns),
}
result, err := client.DescribeContainerInstances(opts.Context, input)
if err != nil {
return errors.WithStackTrace(err)
}
responses = append(responses, result)
}

// If we exceeded the timeout, halt with error.
if timeoutExceeded(start, timeout) {
return retry.FatalError{Underlying: fmt.Errorf("maximum drain timeout of %s seconds has elapsed and instances are still draining", timeout)}
}

// Yay, all done.
if drained, dErr := allInstancesFullyDrained(opts, responses); drained == true {
if dErr != nil {
// This error pertains to there being no instances found.
// TODO: Not sure if we should fail here or retry. Failing for now.
return retry.FatalError{Underlying: dErr}
}
if opts.Logger != nil {
opts.Logger.Debugf("All container instances have been drained in Cluster %s!", clusterName)
}
return nil
}

// In all other cases, retry.
return errors.WithStackTrace(fmt.Errorf("container instances still draining"))
})
return errors.WithStackTrace(err)
}

// timeoutExceeded returns true if the amount of time since start has exceeded the timeout.
func timeoutExceeded(start time.Time, timeout time.Duration) bool {
timeElapsed := time.Now().Sub(start)
return timeElapsed > timeout
}

// NewECSClient returns a new AWS SDK client for interacting with AWS ECS.
func NewECSClient(opts *Options) (*ecs.Client, error) {
cfg, err := NewDefaultConfig(opts)
if err != nil {
return nil, errors.WithStackTrace(err)
}
return ecs.NewFromConfig(cfg), nil
}

func allInstancesFullyDrained(opts *Options, responses []*ecs.DescribeContainerInstancesOutput) (bool, error) {
for _, response := range responses {
instances := response.ContainerInstances
if len(instances) == 0 {
return false, errors.WithStackTrace(fmt.Errorf("querying DescribeContainerInstances returned no instances"))
}

for _, instance := range instances {
if !instanceFullyDrained(opts, instance) {
return false, nil
}
}
}
return true, nil
}

func instanceFullyDrained(opts *Options, instance ecs_types.ContainerInstance) bool {
instanceArn := instance.ContainerInstanceArn

if *instance.Status == "ACTIVE" {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s is still in ACTIVE status", *instanceArn)
}
return false
}
if instance.PendingTasksCount > 0 {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s still has pending tasks", *instanceArn)
}
return false
}
if instance.RunningTasksCount > 0 {
if opts.Logger != nil {
opts.Logger.Debugf("The ECS Container Instance %s still has running tasks", *instanceArn)
}
return false
}

return true
}
34 changes: 34 additions & 0 deletions awscommons/v2/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package awscommons

import (
"fmt"
"strings"
)

// MultipleDrainContainerInstanceErrors represents multiple errors found while terminating instances
type MultipleDrainContainerInstanceErrors struct {
errors []error
}

func (err MultipleDrainContainerInstanceErrors) Error() string {
messages := []string{
fmt.Sprintf("%d errors found while draining container instances:", len(err.errors)),
}

for _, individualErr := range err.errors {
messages = append(messages, individualErr.Error())
}
return strings.Join(messages, "\n")
}

func (err MultipleDrainContainerInstanceErrors) AddError(newErr error) {
err.errors = append(err.errors, newErr)
}

func (err MultipleDrainContainerInstanceErrors) IsEmpty() bool {
return len(err.errors) == 0
}

func NewMultipleDrainContainerInstanceErrors() MultipleDrainContainerInstanceErrors {
return MultipleDrainContainerInstanceErrors{[]error{}}
}
9 changes: 5 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go 1.17

require (
github.com/aws/aws-sdk-go v1.44.48
github.com/aws/aws-sdk-go-v2 v1.16.7
github.com/aws/aws-sdk-go-v2 v1.16.16
github.com/aws/aws-sdk-go-v2/config v1.15.13
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2
github.com/aws/aws-sdk-go-v2/service/s3 v1.27.1
Expand Down Expand Up @@ -32,17 +32,18 @@ require (
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.12.8 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 // indirect
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.9.8 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.13.8 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.11.11 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 // indirect
github.com/aws/smithy-go v1.12.0 // indirect
github.com/aws/smithy-go v1.13.3 // indirect
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d // indirect
github.com/boombuler/barcode v1.0.1-0.20190219062509-6c824513bacc // indirect
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
Expand Down
10 changes: 10 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ github.com/aws/aws-sdk-go v1.44.48 h1:jLDC9RsNoYMLFlKpB8LdqUnoDdC2yvkS4QbuyPQJ8+
github.com/aws/aws-sdk-go v1.44.48/go.mod h1:y4AeaBuwd2Lk+GepC1E9v0qOiTws0MIWAX4oIKwKHZo=
github.com/aws/aws-sdk-go-v2 v1.16.7 h1:zfBwXus3u14OszRxGcqCDS4MfMCv10e8SMJ2r8Xm0Ns=
github.com/aws/aws-sdk-go-v2 v1.16.7/go.mod h1:6CpKuLXg2w7If3ABZCl/qZ6rEgwtjZTn4eAf4RcEyuw=
github.com/aws/aws-sdk-go-v2 v1.16.16 h1:M1fj4FE2lB4NzRb9Y0xdWsn2P0+2UHVxwKyOa4YJNjk=
github.com/aws/aws-sdk-go-v2 v1.16.16/go.mod h1:SwiyXi/1zTUZ6KIAmLK5V5ll8SiURNUYOqTerZPaF9k=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3 h1:S/ZBwevQkr7gv5YxONYpGQxlMFFYSRfz3RMcjsC9Qhk=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.4.3/go.mod h1:gNsR5CaXKmQSSzrmGxmwmct/r+ZBfbxorAuXYsj/M5Y=
github.com/aws/aws-sdk-go-v2/config v1.15.13 h1:CJH9zn/Enst7lDiGpoguVt0lZr5HcpNVlRJWbJ6qreo=
Expand All @@ -76,14 +78,20 @@ github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8 h1:VfBdn2AxwMbFyJN/lF/xuT3
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.12.8/go.mod h1:oL1Q3KuCq1D4NykQnIvtRiBGLUXhcpY5pl6QZB2XEPU=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14 h1:2C0pYHcUBmdzPj+EKNC4qj97oK6yjrUhc1KoSodglvk=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.14/go.mod h1:kdjrMwHwrC3+FsKhNcCMJ7tUVj/8uSD5CZXeQ4wV6fM=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23 h1:s4g/wnzMf+qepSNgTvaQQHNxyMLKSawNhKCPNy++2xY=
github.com/aws/aws-sdk-go-v2/internal/configsources v1.1.23/go.mod h1:2DFxAQ9pfIRy0imBCJv+vZ2X6RKxves6fbnEuSry6b4=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8 h1:2J+jdlBJWEmTyAwC82Ym68xCykIvnSnIN18b8xHGlcc=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.8/go.mod h1:ZIV8GYoC6WLBW5KGs+o4rsc65/ozd+eQ0L31XF5VDwk=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17 h1:/K482T5A3623WJgWT8w1yRAFK4RzGzEl7y39yhtn9eA=
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.4.17/go.mod h1:pRwaTYCJemADaqCbUAxltMoHKata7hmB5PjEXeu0kfg=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15 h1:QquxR7NH3ULBsKC+NoTpilzbKKS+5AELfNREInbhvas=
github.com/aws/aws-sdk-go-v2/internal/ini v1.3.15/go.mod h1:Tkrthp/0sNBShQQsamR7j/zY4p19tVTAs+nnqhH6R3c=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5 h1:tEEHn+PGAxRVqMPEhtU8oCSW/1Ge3zP5nUgPrGQNUPs=
github.com/aws/aws-sdk-go-v2/internal/v4a v1.0.5/go.mod h1:aIwFF3dUk95ocCcA3zfk3nhz0oLkpzHFWuMp8l/4nNs=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2 h1:81hrDgbXHL44WdY6M/fHGXLlv17qTpOFzutXRVDEk3Y=
github.com/aws/aws-sdk-go-v2/service/ec2 v1.47.2/go.mod h1:VoBcwURHnJVCWuXHdqVuG03i2lUlHJ5DTTqDSyCdEcc=
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22 h1:jBx029Z9GQIIq5fC5bW1ZMDsjihvmQQIe/QqdFl+7zY=
github.com/aws/aws-sdk-go-v2/service/ecs v1.18.22/go.mod h1:6bV2xEub6Vch19ZZASMbrNMNIpBPTwy64r9WIQ+wsSE=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3 h1:4n4KCtv5SUoT5Er5XV41huuzrCqepxlW3SDI9qHQebc=
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.9.3/go.mod h1:gkb2qADY+OHaGLKNTYxMaQNacfeyQpZ4csDTQMeFmcw=
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.1.9 h1:gVv2vXOMqJeR4ZHHV32K7LElIJIIzyw/RU1b0lSfWTQ=
Expand All @@ -102,6 +110,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.16.9 h1:yOfILxyjmtr2ubRkRJldlHDFBhf5
github.com/aws/aws-sdk-go-v2/service/sts v1.16.9/go.mod h1:O1IvkYxr+39hRf960Us6j0x1P8pDqhTX+oXM5kQNl/Y=
github.com/aws/smithy-go v1.12.0 h1:gXpeZel/jPoWQ7OEmLIgCUnhkFftqNfwWUwAHSlp1v0=
github.com/aws/smithy-go v1.12.0/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/aws/smithy-go v1.13.3 h1:l7LYxGuzK6/K+NzJ2mC+VvLUbae0sL3bXU//04MkmnA=
github.com/aws/smithy-go v1.13.3/go.mod h1:Tg+OJXh4MB2R/uN61Ko2f6hTZwB/ZYGOtib8J3gBHzA=
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d h1:xDfNPAt8lFiC1UJrqV3uuy861HCTo708pDMbjHHdCas=
github.com/bgentry/go-netrc v0.0.0-20140422174119-9fd32a8b3d3d/go.mod h1:6QX/PXZ00z/TKoufEY6K/a0k6AhaJrQKdFe6OfVXsa4=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
Expand Down
2 changes: 1 addition & 1 deletion logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
var globalLogLevel = logrus.InfoLevel
var globalLogLevelLock = sync.Mutex{}

// Create a new logger with the given name
// GetLogger create a new logger with the given name
func GetLogger(name string) *logrus.Logger {
logger := logrus.New()

Expand Down