From a9d281dbbaf06c5bae67f738b91fe12e8d5b3189 Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Mon, 4 Sep 2023 17:00:32 +0300 Subject: Aggregate in case of node change --- .../core/integrations/prometheus/metrics/cpu.py | 52 +++++++++++++--------- .../core/integrations/prometheus/metrics/memory.py | 30 ++++++++----- robusta_krr/strategies/simple.py | 2 +- 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu.py b/robusta_krr/core/integrations/prometheus/metrics/cpu.py index b9c281d..e61a986 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/cpu.py +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu.py @@ -8,14 +8,16 @@ class CPULoader(QueryRangeMetric, FilterJobsMixin, BatchedRequestMixin): pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - rate( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{step}] - ) + sum( + rate( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }}[{step}] + ) + ) by (container, pod, job) """ @@ -27,14 +29,17 @@ def PercentileCPULoader(percentile: float) -> type[QueryMetric]: return f""" quantile_over_time( {round(percentile / 100, 2)}, - rate( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{step}] - )[{duration}:{step}] + sum( + rate( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }}[{step}] + ) + ) by (container, pod, job) + [{duration}:{step}] ) """ @@ -47,11 +52,14 @@ class CPUAmountLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - container_cpu_usage_seconds_total{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}] + sum( + container_cpu_usage_seconds_total{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index 21843b9..dc75f91 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -25,12 +25,15 @@ class MaxMemoryLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" max_over_time( - container_memory_working_set_bytes{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}:{step}] + sum( + container_memory_working_set_bytes{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ @@ -41,11 +44,14 @@ class MemoryAmountLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - container_memory_working_set_bytes{{ - namespace="{object.namespace}", - pod=~"{pods_selector}", - container="{object.container}" - {cluster_label} - }}[{duration}:{step}] + sum( + container_memory_working_set_bytes{{ + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (container, pod, job) + [{duration}:{step}] ) """ diff --git a/robusta_krr/strategies/simple.py b/robusta_krr/strategies/simple.py index 93efb5f..465a6f8 100644 --- a/robusta_krr/strategies/simple.py +++ b/robusta_krr/strategies/simple.py @@ -88,7 +88,7 @@ class SimpleStrategy(BaseStrategy[SimpleStrategySettings]): if object_data.hpa is not None and object_data.hpa.target_cpu_utilization_percentage is not None: return ResourceRecommendation.undefined(info="HPA detected") - cpu_usage = self.settings.calculate_cpu_proposal(data) + cpu_usage = self.settings.calculate_cpu_proposal(filtered_data) return ResourceRecommendation(request=cpu_usage, limit=None) def __calculate_memory_proposal( -- cgit v1.2.3 From fc7f4ee436ae146ab8749ce9a00d0f5f436614fa Mon Sep 17 00:00:00 2001 From: LeaveMyYard Date: Tue, 5 Sep 2023 12:59:28 +0300 Subject: Replace sum aggregation to max aggregation --- robusta_krr/core/integrations/prometheus/metrics/cpu.py | 8 ++++---- robusta_krr/core/integrations/prometheus/metrics/memory.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu.py b/robusta_krr/core/integrations/prometheus/metrics/cpu.py index e61a986..a965b3f 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/cpu.py +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu.py @@ -8,7 +8,7 @@ class CPULoader(QueryRangeMetric, FilterJobsMixin, BatchedRequestMixin): pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - sum( + max( rate( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", @@ -16,7 +16,7 @@ class CPULoader(QueryRangeMetric, FilterJobsMixin, BatchedRequestMixin): container="{object.container}" {cluster_label} }}[{step}] - ) + ) ) by (container, pod, job) """ @@ -29,7 +29,7 @@ def PercentileCPULoader(percentile: float) -> type[QueryMetric]: return f""" quantile_over_time( {round(percentile / 100, 2)}, - sum( + max( rate( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", @@ -52,7 +52,7 @@ class CPUAmountLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - sum( + max( container_cpu_usage_seconds_total{{ namespace="{object.namespace}", pod=~"{pods_selector}", diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index dc75f91..bc47467 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -8,7 +8,7 @@ class MemoryLoader(QueryRangeMetric, FilterJobsMixin, BatchedRequestMixin): pods_selector = "|".join(pod.name for pod in object.pods) cluster_label = self.get_prometheus_cluster_label() return f""" - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}", @@ -25,7 +25,7 @@ class MaxMemoryLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" max_over_time( - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}", @@ -44,7 +44,7 @@ class MemoryAmountLoader(QueryMetric, FilterJobsMixin, BatchedRequestMixin): cluster_label = self.get_prometheus_cluster_label() return f""" count_over_time( - sum( + max( container_memory_working_set_bytes{{ namespace="{object.namespace}", pod=~"{pods_selector}", -- cgit v1.2.3