Skip to content

Commit

Permalink
Add instance type offering availability to metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Mar 3, 2024
1 parent 1bf1f80 commit 2bfb54b
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 132 deletions.
25 changes: 17 additions & 8 deletions pkg/providers/instancetype/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,15 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio
return item.([]*cloudprovider.InstanceType), nil
}
result := lo.Map(instanceTypes, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType {
instanceTypeVCPU.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.VCpuInfo.DefaultVCpus)))
instanceTypeMemory.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.MemoryInfo.SizeInMiB) * 1024 * 1024))

return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], zones, subnetZones))
})
for _, instanceType := range instanceTypes {
InstanceTypeVCPU.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.VCpuInfo.DefaultVCpus)))
InstanceTypeMemory.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.MemoryInfo.SizeInMiB) * 1024 * 1024))
}
p.cache.SetDefault(key, result)
return result, nil
}
Expand Down Expand Up @@ -167,6 +166,16 @@ func (p *Provider) createOfferings(ctx context.Context, instanceType *ec2.Instan
Price: price,
Available: available,
})
instanceTypeOfferingAvailable.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(float64(lo.Ternary(available, 1, 0)))
instanceTypeOfferingPriceEstimate.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(price)
}
}
return offerings
Expand Down
45 changes: 36 additions & 9 deletions pkg/providers/instancetype/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,61 @@ import (

const (
cloudProviderSubsystem = "cloudprovider"
instanceTypeLabel = "instance_type"
capacityTypeLabel = "capacity_type"
zoneLabel = "zone"
)

var (
InstanceTypeLabel = "instance_type"

InstanceTypeVCPU = prometheus.NewGaugeVec(
instanceTypeVCPU = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_cpu_cores",
Help: "VCPUs cores for a given instance type.",
},
[]string{
InstanceTypeLabel,
})

InstanceTypeMemory = prometheus.NewGaugeVec(
instanceTypeLabel,
},
)
instanceTypeMemory = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_memory_bytes",
Help: "Memory, in bytes, for a given instance type.",
},
[]string{
InstanceTypeLabel,
instanceTypeLabel,
},
)
instanceTypeOfferingAvailable = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_available",
Help: "Instance type offering availability, based on instance type, capacity type, and zone",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
},
)
instanceTypeOfferingPriceEstimate = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_price_estimate",
Help: "Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
})
)

func init() {
crmetrics.Registry.MustRegister(InstanceTypeVCPU, InstanceTypeMemory)
crmetrics.Registry.MustRegister(instanceTypeVCPU, instanceTypeMemory, instanceTypeOfferingAvailable, instanceTypeOfferingPriceEstimate)
}
93 changes: 65 additions & 28 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,34 +741,71 @@ var _ = Describe("InstanceTypes", func() {
Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 110))
}
})

It("should expose vcpu metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose memory metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
Context("Metrics", func() {
It("should expose vcpu metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose memory metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose availability metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
for _, of := range it.Offerings {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{
"instance_type": it.Name,
"capacity_type": of.CapacityType,
"zone": of.Zone,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically("==", lo.Ternary(of.Available, 1, 0)))
}
}
})
It("should expose pricing metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
for _, of := range it.Offerings {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{
"instance_type": it.Name,
"capacity_type": of.CapacityType,
"zone": of.Zone,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically("==", of.Price))
}
}
})
})
It("should launch instances in local zones", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
Expand Down
50 changes: 0 additions & 50 deletions pkg/providers/pricing/metrics.go

This file was deleted.

15 changes: 0 additions & 15 deletions pkg/providers/pricing/pricing.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/pricing"
"github.com/aws/aws-sdk-go/service/pricing/pricingiface"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/multierr"
"knative.dev/pkg/logging"
Expand Down Expand Up @@ -204,14 +203,6 @@ func (p *Provider) UpdateOnDemandPricing(ctx context.Context) error {
}

p.onDemandPrices = lo.Assign(onDemandPrices, onDemandMetalPrices)
for instanceType, price := range p.onDemandPrices {
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeOnDemand,
RegionLabel: p.region,
TopologyLabel: "",
}).Set(price)
}
if p.cm.HasChanged("on-demand-prices", p.onDemandPrices) {
logging.FromContext(ctx).With("instance-type-count", len(p.onDemandPrices)).Debugf("updated on-demand pricing")
}
Expand Down Expand Up @@ -341,12 +332,6 @@ func (p *Provider) UpdateSpotPricing(ctx context.Context) error {
prices[instanceType] = map[string]float64{}
}
prices[instanceType][az] = spotPrice
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeSpot,
RegionLabel: p.region,
TopologyLabel: az,
}).Set(spotPrice)
}
return true
})
Expand Down
20 changes: 0 additions & 20 deletions pkg/providers/pricing/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,10 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.OnDemandPrice("c98.large")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.20))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.20))

price, ok = awsEnv.PricingProvider.OnDemandPrice("c99.large")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.23))
Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.23))
})
It("should update spot pricing with response from the pricing API", func() {
now := time.Now()
Expand Down Expand Up @@ -171,12 +169,10 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.10))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10))

price, ok = awsEnv.PricingProvider.SpotPrice("c99.large", "test-zone-1a")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.23))
Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.23))
})
It("should update zonal pricing with data from the spot pricing API", func() {
now := time.Now()
Expand Down Expand Up @@ -207,7 +203,6 @@ var _ = Describe("Pricing", func() {
price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1a")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.20))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.20))

_, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).ToNot(BeTrue())
Expand Down Expand Up @@ -300,20 +295,5 @@ var _ = Describe("Pricing", func() {
price, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b")
Expect(ok).To(BeTrue())
Expect(price).To(BeNumerically("==", 1.10))
Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10))
})
})

func getPricingEstimateMetricValue(instanceType string, capacityType string, zone string) float64 {
var value *float64
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_price_estimate", map[string]string{
pricing.InstanceTypeLabel: instanceType,
pricing.CapacityTypeLabel: capacityType,
pricing.RegionLabel: fake.DefaultRegion,
pricing.TopologyLabel: zone,
})
Expect(ok).To(BeTrue())
value = metric.GetGauge().Value
Expect(value).To(Not(BeNil()))
return *value
}
7 changes: 5 additions & 2 deletions website/content/en/preview/reference/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ Current count of nodes in cluster state

## Cloudprovider Metrics

### `karpenter_cloudprovider_instance_type_price_estimate`
Estimated hourly price used when making informed decisions on node cost calculation. This is updated once on startup and then every 12 hours.
### `karpenter_cloudprovider_instance_type_offering_price_estimate`
Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.

### `karpenter_cloudprovider_instance_type_offering_available`
Instance type offering availability, based on instance type, capacity type, and zone

### `karpenter_cloudprovider_instance_type_memory_bytes`
Memory, in bytes, for a given instance type.
Expand Down

0 comments on commit 2bfb54b

Please sign in to comment.