Skip to content

Commit

Permalink
Add instance type offering availability to metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Mar 3, 2024
1 parent 1bf1f80 commit 548f9f5
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 132 deletions.
25 changes: 17 additions & 8 deletions pkg/providers/instancetype/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,15 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio
return item.([]*cloudprovider.InstanceType), nil
}
result := lo.Map(instanceTypes, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType {
instanceTypeVCPU.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.VCpuInfo.DefaultVCpus)))
instanceTypeMemory.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.MemoryInfo.SizeInMiB) * 1024 * 1024))

return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], zones, subnetZones))
})
for _, instanceType := range instanceTypes {
InstanceTypeVCPU.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.VCpuInfo.DefaultVCpus)))
InstanceTypeMemory.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.MemoryInfo.SizeInMiB) * 1024 * 1024))
}
p.cache.SetDefault(key, result)
return result, nil
}
Expand Down Expand Up @@ -167,6 +166,16 @@ func (p *Provider) createOfferings(ctx context.Context, instanceType *ec2.Instan
Price: price,
Available: available,
})
instanceTypeOfferingAvailable.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(float64(lo.Ternary(available, 1, 0)))
instanceTypeOfferingPriceEstimate.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(price)
}
}
return offerings
Expand Down
45 changes: 36 additions & 9 deletions pkg/providers/instancetype/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,61 @@ import (

const (
cloudProviderSubsystem = "cloudprovider"
instanceTypeLabel = "instance_type"
capacityTypeLabel = "capacity_type"
zoneLabel = "zone"
)

var (
InstanceTypeLabel = "instance_type"

InstanceTypeVCPU = prometheus.NewGaugeVec(
instanceTypeVCPU = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_cpu_cores",
Help: "VCPUs cores for a given instance type.",
},
[]string{
InstanceTypeLabel,
})

InstanceTypeMemory = prometheus.NewGaugeVec(
instanceTypeLabel,
},
)
instanceTypeMemory = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_memory_bytes",
Help: "Memory, in bytes, for a given instance type.",
},
[]string{
InstanceTypeLabel,
instanceTypeLabel,
},
)
instanceTypeOfferingAvailable = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_available",
Help: "Instance type offering availability, based on instance type, capacity type, and zone",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
},
)
instanceTypeOfferingPriceEstimate = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_price_estimate",
Help: "Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
})
)

func init() {
crmetrics.Registry.MustRegister(InstanceTypeVCPU, InstanceTypeMemory)
crmetrics.Registry.MustRegister(instanceTypeVCPU, instanceTypeMemory, instanceTypeOfferingAvailable, instanceTypeOfferingPriceEstimate)
}
93 changes: 65 additions & 28 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,34 +741,71 @@ var _ = Describe("InstanceTypes", func() {
Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 110))
}
})

It("should expose vcpu metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose memory metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
Context("Metrics", func() {
It("should expose vcpu metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose memory metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose availability metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
for _, of := range it.Offerings {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{
"instance_type": it.Name,
"capacity_type": of.CapacityType,
"zone": of.Zone,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically("==", lo.Ternary(of.Available, 1, 0)))
}
}
})
It("should expose pricing metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
for _, of := range it.Offerings {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{
"instance_type": it.Name,
"capacity_type": of.CapacityType,
"zone": of.Zone,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically("==", of.Price))
}
}
})
})
It("should launch instances in local zones", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
Expand Down
50 changes: 0 additions & 50 deletions pkg/providers/pricing/metrics.go

This file was deleted.

15 changes: 0 additions & 15 deletions pkg/providers/pricing/pricing.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/pricing"
"github.com/aws/aws-sdk-go/service/pricing/pricingiface"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/multierr"
"knative.dev/pkg/logging"
Expand Down Expand Up @@ -204,14 +203,6 @@ func (p *Provider) UpdateOnDemandPricing(ctx context.Context) error {
}

p.onDemandPrices = lo.Assign(onDemandPrices, onDemandMetalPrices)
for instanceType, price := range p.onDemandPrices {
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeOnDemand,
RegionLabel: p.region,
TopologyLabel: "",
}).Set(price)
}
if p.cm.HasChanged("on-demand-prices", p.onDemandPrices) {
logging.FromContext(ctx).With("instance-type-count", len(p.onDemandPrices)).Debugf("updated on-demand pricing")
}
Expand Down Expand Up @@ -341,12 +332,6 @@ func (p *Provider) UpdateSpotPricing(ctx context.Context) error {
prices[instanceType] = map[string]float64{}
}
prices[instanceType][az] = spotPrice
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeSpot,
RegionLabel: p.region,
TopologyLabel: az,
}).Set(spotPrice)
}
return true
})
Expand Down
20 changes: 0 additions & 20 deletions pkg/providers/pricing/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,10 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.OnDemandPrice("c98.large")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.20))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.20))

price, ok = awsEnv.PricingProvider.OnDemandPrice("c99.large")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.23))
Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.23))
})
It("should update spot pricing with response from the pricing API", func() {
now := time.Now()
Expand Down Expand Up @@ -171,12 +169,10 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.10))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10))

price, ok = awsEnv.PricingProvider.SpotPrice("c99.large", "test-zone-1a")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.23))
Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.23))
})
It("should update zonal pricing with data from the spot pricing API", func() {
now := time.Now()
Expand Down Expand Up @@ -207,7 +203,6 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1a")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.20))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.20))

_, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).ToNot(BeTrue())
Expand Down Expand Up @@ -300,20 +295,5 @@ var _ = Describe("Pricing", func() {
price, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.10))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10))
})
})

func getPricingEstimateMetricValue(instanceType string, capacityType string, zone string) float64 {
var value *float64
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_price_estimate", map[string]string{
pricing.InstanceTypeLabel: instanceType,
pricing.CapacityTypeLabel: capacityType,
pricing.RegionLabel: fake.DefaultRegion,
pricing.TopologyLabel: zone,
})
Expect(ok).To(BeTrue())
value = metric.GetGauge().Value
Expect(value).To(Not(BeNil()))
return *value
}
7 changes: 5 additions & 2 deletions website/content/en/preview/reference/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ Current count of nodes in cluster state

## Cloudprovider Metrics

### `karpenter_cloudprovider_instance_type_price_estimate`
Estimated hourly price used when making informed decisions on node cost calculation. This is updated once on startup and then every 12 hours.
### `karpenter_cloudprovider_instance_type_offering_price_estimate`
Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.

### `karpenter_cloudprovider_instance_type_offering_available`
Instance type offering availability, based on instance type, capacity type, and zone

### `karpenter_cloudprovider_instance_type_memory_bytes`
Memory, in bytes, for a given instance type.
Expand Down
8 changes: 8 additions & 0 deletions website/content/en/preview/upgrading/upgrade-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ kubectl apply -f https://raw.githubusercontent.com/aws/karpenter{{< githubRelRef
WHEN CREATING A NEW SECTION OF THE UPGRADE GUIDANCE FOR NEWER VERSIONS, ENSURE THAT YOU COPY THE BETA API ALERT SECTION FROM THE LAST RELEASE TO PROPERLY WARN USERS OF THE RISK OF UPGRADING WITHOUT GOING TO 0.32.x FIRST
-->
### Upgrading to `0.36.0`+
{{% alert title="Warning" color="warning" %}}
`0.33.0`+ _only_ supports Karpenter v1beta1 APIs and will not work with existing Provisioner, AWSNodeTemplate or Machine alpha APIs. Do not upgrade to `0.35.0`+ without first [upgrading to `0.32.x`]({{<ref "#upgrading-to-0320" >}}). This version supports both the alpha and beta APIs, allowing you to migrate all of your existing APIs to beta APIs without experiencing downtime.
{{% /alert %}}
* Karpenter changed the name of the `karpenter_cloudprovider_instance_type_price_estimate` metric to `karpenter_cloudprovider_instance_type_offering_price_estimate` to align with the new `karpenter_cloudprovider_instance_type_offering_available` metric. The `region` label was also dropped from the metric, since this can be inferred from the environment that Karpenter is running in.
### Upgrading to `0.35.0`+
{{% alert title="Warning" color="warning" %}}
Expand Down

0 comments on commit 548f9f5

Please sign in to comment.