Skip to content

Commit

Permalink
Add instance type offering availability to metrics
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Mar 3, 2024
1 parent 1bf1f80 commit c35b71a
Show file tree
Hide file tree
Showing 7 changed files with 88 additions and 96 deletions.
25 changes: 17 additions & 8 deletions pkg/providers/instancetype/instancetype.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,16 +118,15 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio
return item.([]*cloudprovider.InstanceType), nil
}
result := lo.Map(instanceTypes, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType {
instanceTypeVCPU.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.VCpuInfo.DefaultVCpus)))
instanceTypeMemory.With(prometheus.Labels{
instanceTypeLabel: *i.InstanceType,
}).Set(float64(aws.Int64Value(i.MemoryInfo.SizeInMiB) * 1024 * 1024))

return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], zones, subnetZones))
})
for _, instanceType := range instanceTypes {
InstanceTypeVCPU.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.VCpuInfo.DefaultVCpus)))
InstanceTypeMemory.With(prometheus.Labels{
InstanceTypeLabel: *instanceType.InstanceType,
}).Set(float64(aws.Int64Value(instanceType.MemoryInfo.SizeInMiB) * 1024 * 1024))
}
p.cache.SetDefault(key, result)
return result, nil
}
Expand Down Expand Up @@ -167,6 +166,16 @@ func (p *Provider) createOfferings(ctx context.Context, instanceType *ec2.Instan
Price: price,
Available: available,
})
instanceTypeOfferingAvailable.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(float64(lo.Ternary(available, 1, 0)))
instanceTypeOfferingPriceEstimate.With(prometheus.Labels{
instanceTypeLabel: *instanceType.InstanceType,
capacityTypeLabel: capacityType,
zoneLabel: zone,
}).Set(price)
}
}
return offerings
Expand Down
45 changes: 36 additions & 9 deletions pkg/providers/instancetype/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,34 +23,61 @@ import (

const (
cloudProviderSubsystem = "cloudprovider"
instanceTypeLabel = "instance_type"
capacityTypeLabel = "capacity_type"
zoneLabel = "zone"
)

var (
InstanceTypeLabel = "instance_type"

InstanceTypeVCPU = prometheus.NewGaugeVec(
instanceTypeVCPU = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_cpu_cores",
Help: "VCPUs cores for a given instance type.",
},
[]string{
InstanceTypeLabel,
})

InstanceTypeMemory = prometheus.NewGaugeVec(
instanceTypeLabel,
},
)
instanceTypeMemory = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_memory_bytes",
Help: "Memory, in bytes, for a given instance type.",
},
[]string{
InstanceTypeLabel,
instanceTypeLabel,
},
)
instanceTypeOfferingAvailable = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_available",
Help: "Instance type offering availability, based on instance type, capacity type, and zone",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
},
)
instanceTypeOfferingPriceEstimate = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: metrics.Namespace,
Subsystem: cloudProviderSubsystem,
Name: "instance_type_offering_price_estimate",
Help: "Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.",
},
[]string{
instanceTypeLabel,
capacityTypeLabel,
zoneLabel,
})
)

func init() {
crmetrics.Registry.MustRegister(InstanceTypeVCPU, InstanceTypeMemory)
crmetrics.Registry.MustRegister(instanceTypeVCPU, instanceTypeMemory, instanceTypeOfferingAvailable, instanceTypeOfferingPriceEstimate)
}
34 changes: 26 additions & 8 deletions pkg/providers/instancetype/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,12 +743,12 @@ var _ = Describe("InstanceTypes", func() {
})

It("should expose vcpu metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
Expand All @@ -757,19 +757,37 @@ var _ = Describe("InstanceTypes", func() {
}
})
It("should expose memory metrics for instance types", func() {
instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceInfo)).To(BeNumerically(">", 0))
for _, info := range instanceInfo {
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{
instancetype.InstanceTypeLabel: info.Name,
"instance_type": it.Name,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically(">", 0))
}
})
It("should expose availability metrics for instance types", func() {
instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass)
Expect(err).To(BeNil())
Expect(len(instanceTypes)).To(BeNumerically(">", 0))
for _, it := range instanceTypes {
for _, of := range it.Offerings {
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{
"instance_type": it.Name,
"capacity_type": of.CapacityType,
"zone": of.Zone,
})
Expect(ok).To(BeTrue())
Expect(metric).To(Not(BeNil()))
value := metric.GetGauge().Value
Expect(aws.Float64Value(value)).To(BeNumerically("==", lo.Ternary(of.Available, 1, 0)))
}
}
})
It("should launch instances in local zones", func() {
ExpectApplied(ctx, env.Client, nodePool, nodeClass)
pod := coretest.UnschedulablePod(coretest.PodOptions{
Expand Down
50 changes: 0 additions & 50 deletions pkg/providers/pricing/metrics.go

This file was deleted.

15 changes: 0 additions & 15 deletions pkg/providers/pricing/pricing.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/aws/aws-sdk-go/service/ec2/ec2iface"
"github.com/aws/aws-sdk-go/service/pricing"
"github.com/aws/aws-sdk-go/service/pricing/pricingiface"
"github.com/prometheus/client_golang/prometheus"
"github.com/samber/lo"
"go.uber.org/multierr"
"knative.dev/pkg/logging"
Expand Down Expand Up @@ -204,14 +203,6 @@ func (p *Provider) UpdateOnDemandPricing(ctx context.Context) error {
}

p.onDemandPrices = lo.Assign(onDemandPrices, onDemandMetalPrices)
for instanceType, price := range p.onDemandPrices {
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeOnDemand,
RegionLabel: p.region,
TopologyLabel: "",
}).Set(price)
}
if p.cm.HasChanged("on-demand-prices", p.onDemandPrices) {
logging.FromContext(ctx).With("instance-type-count", len(p.onDemandPrices)).Debugf("updated on-demand pricing")
}
Expand Down Expand Up @@ -341,12 +332,6 @@ func (p *Provider) UpdateSpotPricing(ctx context.Context) error {
prices[instanceType] = map[string]float64{}
}
prices[instanceType][az] = spotPrice
InstancePriceEstimate.With(prometheus.Labels{
InstanceTypeLabel: instanceType,
CapacityTypeLabel: ec2.UsageClassTypeSpot,
RegionLabel: p.region,
TopologyLabel: az,
}).Set(spotPrice)
}
return true
})
Expand Down
8 changes: 4 additions & 4 deletions pkg/providers/pricing/suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,10 @@ var _ = Describe("Pricing", func() {
func getPricingEstimateMetricValue(instanceType string, capacityType string, zone string) float64 {
var value *float64
metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_price_estimate", map[string]string{
pricing.InstanceTypeLabel: instanceType,
pricing.CapacityTypeLabel: capacityType,
pricing.RegionLabel: fake.DefaultRegion,
pricing.TopologyLabel: zone,
"instance_type": instanceType,
"capacity_type": capacityType,
"region": fake.DefaultRegion,
"zone": zone,
})
Expect(ok).To(BeTrue())
value = metric.GetGauge().Value
Expand Down
7 changes: 5 additions & 2 deletions website/content/en/preview/reference/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,11 @@ Current count of nodes in cluster state

## Cloudprovider Metrics

### `karpenter_cloudprovider_instance_type_price_estimate`
Estimated hourly price used when making informed decisions on node cost calculation. This is updated once on startup and then every 12 hours.
### `karpenter_cloudprovider_instance_type_offering_price_estimate`
Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.

### `karpenter_cloudprovider_instance_type_offering_available`
Instance type offering availability, based on instance type, capacity type, and zone

### `karpenter_cloudprovider_instance_type_memory_bytes`
Memory, in bytes, for a given instance type.
Expand Down

0 comments on commit c35b71a

Please sign in to comment.