diff --git a/pkg/providers/instancetype/instancetype.go b/pkg/providers/instancetype/instancetype.go index 3f1a157aeac4..6540e2590037 100644 --- a/pkg/providers/instancetype/instancetype.go +++ b/pkg/providers/instancetype/instancetype.go @@ -118,16 +118,15 @@ func (p *Provider) List(ctx context.Context, kc *corev1beta1.KubeletConfiguratio return item.([]*cloudprovider.InstanceType), nil } result := lo.Map(instanceTypes, func(i *ec2.InstanceTypeInfo, _ int) *cloudprovider.InstanceType { + instanceTypeVCPU.With(prometheus.Labels{ + instanceTypeLabel: *i.InstanceType, + }).Set(float64(aws.Int64Value(i.VCpuInfo.DefaultVCpus))) + instanceTypeMemory.With(prometheus.Labels{ + instanceTypeLabel: *i.InstanceType, + }).Set(float64(aws.Int64Value(i.MemoryInfo.SizeInMiB) * 1024 * 1024)) + return NewInstanceType(ctx, i, kc, p.region, nodeClass, p.createOfferings(ctx, i, instanceTypeOfferings[aws.StringValue(i.InstanceType)], zones, subnetZones)) }) - for _, instanceType := range instanceTypes { - InstanceTypeVCPU.With(prometheus.Labels{ - InstanceTypeLabel: *instanceType.InstanceType, - }).Set(float64(aws.Int64Value(instanceType.VCpuInfo.DefaultVCpus))) - InstanceTypeMemory.With(prometheus.Labels{ - InstanceTypeLabel: *instanceType.InstanceType, - }).Set(float64(aws.Int64Value(instanceType.MemoryInfo.SizeInMiB) * 1024 * 1024)) - } p.cache.SetDefault(key, result) return result, nil } @@ -167,6 +166,16 @@ func (p *Provider) createOfferings(ctx context.Context, instanceType *ec2.Instan Price: price, Available: available, }) + instanceTypeOfferingAvailable.With(prometheus.Labels{ + instanceTypeLabel: *instanceType.InstanceType, + capacityTypeLabel: capacityType, + zoneLabel: zone, + }).Set(float64(lo.Ternary(available, 1, 0))) + instanceTypeOfferingPriceEstimate.With(prometheus.Labels{ + instanceTypeLabel: *instanceType.InstanceType, + capacityTypeLabel: capacityType, + zoneLabel: zone, + }).Set(price) } } return offerings diff --git a/pkg/providers/instancetype/metrics.go b/pkg/providers/instancetype/metrics.go index 5e343aa9b12b..5632d3dd14dc 100644 --- a/pkg/providers/instancetype/metrics.go +++ b/pkg/providers/instancetype/metrics.go @@ -23,12 +23,13 @@ import ( const ( cloudProviderSubsystem = "cloudprovider" + instanceTypeLabel = "instance_type" + capacityTypeLabel = "capacity_type" + zoneLabel = "zone" ) var ( - InstanceTypeLabel = "instance_type" - - InstanceTypeVCPU = prometheus.NewGaugeVec( + instanceTypeVCPU = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: cloudProviderSubsystem, @@ -36,10 +37,10 @@ var ( Help: "VCPUs cores for a given instance type.", }, []string{ - InstanceTypeLabel, - }) - - InstanceTypeMemory = prometheus.NewGaugeVec( + instanceTypeLabel, + }, + ) + instanceTypeMemory = prometheus.NewGaugeVec( prometheus.GaugeOpts{ Namespace: metrics.Namespace, Subsystem: cloudProviderSubsystem, @@ -47,10 +48,36 @@ var ( Help: "Memory, in bytes, for a given instance type.", }, []string{ - InstanceTypeLabel, + instanceTypeLabel, + }, + ) + instanceTypeOfferingAvailable = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_available", + Help: "Instance type offering availability, based on instance type, capacity type, and zone", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, + }, + ) + instanceTypeOfferingPriceEstimate = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Namespace: metrics.Namespace, + Subsystem: cloudProviderSubsystem, + Name: "instance_type_offering_price_estimate", + Help: "Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone.", + }, + []string{ + instanceTypeLabel, + capacityTypeLabel, + zoneLabel, }) ) func init() { - crmetrics.Registry.MustRegister(InstanceTypeVCPU, InstanceTypeMemory) + crmetrics.Registry.MustRegister(instanceTypeVCPU, instanceTypeMemory, instanceTypeOfferingAvailable, instanceTypeOfferingPriceEstimate) } diff --git a/pkg/providers/instancetype/suite_test.go b/pkg/providers/instancetype/suite_test.go index 052b362e0418..9f251e5f395f 100644 --- a/pkg/providers/instancetype/suite_test.go +++ b/pkg/providers/instancetype/suite_test.go @@ -741,34 +741,71 @@ var _ = Describe("InstanceTypes", func() { Expect(it.Capacity.Pods().Value()).To(BeNumerically("==", 110)) } }) - - It("should expose vcpu metrics for instance types", func() { - instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) - Expect(err).To(BeNil()) - Expect(len(instanceInfo)).To(BeNumerically(">", 0)) - for _, info := range instanceInfo { - metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{ - instancetype.InstanceTypeLabel: info.Name, - }) - Expect(ok).To(BeTrue()) - Expect(metric).To(Not(BeNil())) - value := metric.GetGauge().Value - Expect(aws.Float64Value(value)).To(BeNumerically(">", 0)) - } - }) - It("should expose memory metrics for instance types", func() { - instanceInfo, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) - Expect(err).To(BeNil()) - Expect(len(instanceInfo)).To(BeNumerically(">", 0)) - for _, info := range instanceInfo { - metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{ - instancetype.InstanceTypeLabel: info.Name, - }) - Expect(ok).To(BeTrue()) - Expect(metric).To(Not(BeNil())) - value := metric.GetGauge().Value - Expect(aws.Float64Value(value)).To(BeNumerically(">", 0)) - } + Context("Metrics", func() { + It("should expose vcpu metrics for instance types", func() { + instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) + Expect(err).To(BeNil()) + Expect(len(instanceTypes)).To(BeNumerically(">", 0)) + for _, it := range instanceTypes { + metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_cpu_cores", map[string]string{ + "instance_type": it.Name, + }) + Expect(ok).To(BeTrue()) + Expect(metric).To(Not(BeNil())) + value := metric.GetGauge().Value + Expect(aws.Float64Value(value)).To(BeNumerically(">", 0)) + } + }) + It("should expose memory metrics for instance types", func() { + instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) + Expect(err).To(BeNil()) + Expect(len(instanceTypes)).To(BeNumerically(">", 0)) + for _, it := range instanceTypes { + metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_memory_bytes", map[string]string{ + "instance_type": it.Name, + }) + Expect(ok).To(BeTrue()) + Expect(metric).To(Not(BeNil())) + value := metric.GetGauge().Value + Expect(aws.Float64Value(value)).To(BeNumerically(">", 0)) + } + }) + It("should expose availability metrics for instance types", func() { + instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) + Expect(err).To(BeNil()) + Expect(len(instanceTypes)).To(BeNumerically(">", 0)) + for _, it := range instanceTypes { + for _, of := range it.Offerings { + metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_available", map[string]string{ + "instance_type": it.Name, + "capacity_type": of.CapacityType, + "zone": of.Zone, + }) + Expect(ok).To(BeTrue()) + Expect(metric).To(Not(BeNil())) + value := metric.GetGauge().Value + Expect(aws.Float64Value(value)).To(BeNumerically("==", lo.Ternary(of.Available, 1, 0))) + } + } + }) + It("should expose pricing metrics for instance types", func() { + instanceTypes, err := awsEnv.InstanceTypesProvider.List(ctx, nodePool.Spec.Template.Spec.Kubelet, nodeClass) + Expect(err).To(BeNil()) + Expect(len(instanceTypes)).To(BeNumerically(">", 0)) + for _, it := range instanceTypes { + for _, of := range it.Offerings { + metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_offering_price_estimate", map[string]string{ + "instance_type": it.Name, + "capacity_type": of.CapacityType, + "zone": of.Zone, + }) + Expect(ok).To(BeTrue()) + Expect(metric).To(Not(BeNil())) + value := metric.GetGauge().Value + Expect(aws.Float64Value(value)).To(BeNumerically("==", of.Price)) + } + } + }) }) It("should launch instances in local zones", func() { ExpectApplied(ctx, env.Client, nodePool, nodeClass) diff --git a/pkg/providers/pricing/metrics.go b/pkg/providers/pricing/metrics.go deleted file mode 100644 index 60c053580439..000000000000 --- a/pkg/providers/pricing/metrics.go +++ /dev/null @@ -1,50 +0,0 @@ -/* -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package pricing - -import ( - "github.com/prometheus/client_golang/prometheus" - crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics" - - "sigs.k8s.io/karpenter/pkg/metrics" -) - -const ( - cloudProviderSubsystem = "cloudprovider" -) - -var ( - InstanceTypeLabel = "instance_type" - CapacityTypeLabel = "capacity_type" - RegionLabel = "region" - TopologyLabel = "zone" - InstancePriceEstimate = prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Namespace: metrics.Namespace, - Subsystem: cloudProviderSubsystem, - Name: "instance_type_price_estimate", - Help: "Estimated hourly price used when making informed decisions on node cost calculation. This is updated once on startup and then every 12 hours.", - }, - []string{ - InstanceTypeLabel, - CapacityTypeLabel, - RegionLabel, - TopologyLabel, - }) -) - -func init() { - crmetrics.Registry.MustRegister(InstancePriceEstimate) -} diff --git a/pkg/providers/pricing/pricing.go b/pkg/providers/pricing/pricing.go index ee97df2d9594..b2c5456a2c96 100644 --- a/pkg/providers/pricing/pricing.go +++ b/pkg/providers/pricing/pricing.go @@ -33,7 +33,6 @@ import ( "github.com/aws/aws-sdk-go/service/ec2/ec2iface" "github.com/aws/aws-sdk-go/service/pricing" "github.com/aws/aws-sdk-go/service/pricing/pricingiface" - "github.com/prometheus/client_golang/prometheus" "github.com/samber/lo" "go.uber.org/multierr" "knative.dev/pkg/logging" @@ -204,14 +203,6 @@ func (p *Provider) UpdateOnDemandPricing(ctx context.Context) error { } p.onDemandPrices = lo.Assign(onDemandPrices, onDemandMetalPrices) - for instanceType, price := range p.onDemandPrices { - InstancePriceEstimate.With(prometheus.Labels{ - InstanceTypeLabel: instanceType, - CapacityTypeLabel: ec2.UsageClassTypeOnDemand, - RegionLabel: p.region, - TopologyLabel: "", - }).Set(price) - } if p.cm.HasChanged("on-demand-prices", p.onDemandPrices) { logging.FromContext(ctx).With("instance-type-count", len(p.onDemandPrices)).Debugf("updated on-demand pricing") } @@ -341,12 +332,6 @@ func (p *Provider) UpdateSpotPricing(ctx context.Context) error { prices[instanceType] = map[string]float64{} } prices[instanceType][az] = spotPrice - InstancePriceEstimate.With(prometheus.Labels{ - InstanceTypeLabel: instanceType, - CapacityTypeLabel: ec2.UsageClassTypeSpot, - RegionLabel: p.region, - TopologyLabel: az, - }).Set(spotPrice) } return true }) diff --git a/pkg/providers/pricing/suite_test.go b/pkg/providers/pricing/suite_test.go index 0c5a21d12a28..3f59751bff18 100644 --- a/pkg/providers/pricing/suite_test.go +++ b/pkg/providers/pricing/suite_test.go @@ -123,12 +123,10 @@ var _ = Describe("Pricing", func() { price, ok := awsEnv.PricingProvider.OnDemandPrice("c98.large") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.20)) - Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.20)) price, ok = awsEnv.PricingProvider.OnDemandPrice("c99.large") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.23)) - Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeOnDemand, "")).To(BeNumerically("==", 1.23)) }) It("should update spot pricing with response from the pricing API", func() { now := time.Now() @@ -171,12 +169,10 @@ var _ = Describe("Pricing", func() { price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.10)) - Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10)) price, ok = awsEnv.PricingProvider.SpotPrice("c99.large", "test-zone-1a") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.23)) - Expect(getPricingEstimateMetricValue("c99.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.23)) }) It("should update zonal pricing with data from the spot pricing API", func() { now := time.Now() @@ -207,7 +203,6 @@ var _ = Describe("Pricing", func() { price, ok := awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1a") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.20)) - Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1a")).To(BeNumerically("==", 1.20)) _, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b") Expect(ok).ToNot(BeTrue()) @@ -300,20 +295,5 @@ var _ = Describe("Pricing", func() { price, ok = awsEnv.PricingProvider.SpotPrice("c98.large", "test-zone-1b") Expect(ok).To(BeTrue()) Expect(price).To(BeNumerically("==", 1.10)) - Expect(getPricingEstimateMetricValue("c98.large", ec2.UsageClassTypeSpot, "test-zone-1b")).To(BeNumerically("==", 1.10)) }) }) - -func getPricingEstimateMetricValue(instanceType string, capacityType string, zone string) float64 { - var value *float64 - metric, ok := FindMetricWithLabelValues("karpenter_cloudprovider_instance_type_price_estimate", map[string]string{ - pricing.InstanceTypeLabel: instanceType, - pricing.CapacityTypeLabel: capacityType, - pricing.RegionLabel: fake.DefaultRegion, - pricing.TopologyLabel: zone, - }) - Expect(ok).To(BeTrue()) - value = metric.GetGauge().Value - Expect(value).To(Not(BeNil())) - return *value -} diff --git a/website/content/en/preview/reference/metrics.md b/website/content/en/preview/reference/metrics.md index b7cf1366f50f..c2c3ba455dcd 100644 --- a/website/content/en/preview/reference/metrics.md +++ b/website/content/en/preview/reference/metrics.md @@ -151,8 +151,11 @@ Current count of nodes in cluster state ## Cloudprovider Metrics -### `karpenter_cloudprovider_instance_type_price_estimate` -Estimated hourly price used when making informed decisions on node cost calculation. This is updated once on startup and then every 12 hours. +### `karpenter_cloudprovider_instance_type_offering_price_estimate` +Instance type offering estimated estimated hourly price used when making informed decisions on node cost calculation, based on instance type, capacity type, and zone. + +### `karpenter_cloudprovider_instance_type_offering_available` +Instance type offering availability, based on instance type, capacity type, and zone ### `karpenter_cloudprovider_instance_type_memory_bytes` Memory, in bytes, for a given instance type.