diff options
| author | Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> | 2024-04-30 19:06:27 +0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-04-30 19:06:27 +0300 |
| commit | 09c372bfa7039c22eab9e411040585ec675caf6d (patch) | |
| tree | c6428b6c1ed3b6b2139ede20ff704b42a8b23fb3 /robusta_krr/core | |
| parent | f7d841278afe46fe3b21da64ca57691b44670ab4 (diff) | |
| parent | dd13deeac4ca62f8187d77d944305675abc9dba8 (diff) | |
Merge branch 'main' into prometheus-workload-loader
Diffstat (limited to 'robusta_krr/core')
5 files changed, 49 insertions, 6 deletions
diff --git a/robusta_krr/core/abstract/strategies.py b/robusta_krr/core/abstract/strategies.py index ab5ab6c..388595f 100644 --- a/robusta_krr/core/abstract/strategies.py +++ b/robusta_krr/core/abstract/strategies.py @@ -28,7 +28,7 @@ class ResourceRecommendation(pd.BaseModel): request: Optional[float] limit: Optional[float] info: Optional[str] = pd.Field( - None, description="Additional information about the recommendation. Currently used to explain undefined." + None, description="Additional information about the recommendation." ) @classmethod diff --git a/robusta_krr/core/integrations/prometheus/metrics/__init__.py b/robusta_krr/core/integrations/prometheus/metrics/__init__.py index e9a0568..aea497f 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/__init__.py +++ b/robusta_krr/core/integrations/prometheus/metrics/__init__.py @@ -1,3 +1,3 @@ from .base import PrometheusMetric from .cpu import CPUAmountLoader, CPULoader, PercentileCPULoader -from .memory import MaxMemoryLoader, MemoryAmountLoader, MemoryLoader +from .memory import MaxMemoryLoader, MemoryAmountLoader, MemoryLoader, MaxOOMKilledMemoryLoader diff --git a/robusta_krr/core/integrations/prometheus/metrics/base.py b/robusta_krr/core/integrations/prometheus/metrics/base.py index 34957ef..807a11f 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/base.py +++ b/robusta_krr/core/integrations/prometheus/metrics/base.py @@ -59,6 +59,7 @@ class PrometheusMetric(BaseMetric): query_type: QueryType = QueryType.Query filtering: bool = True pods_batch_size: Optional[int] = 50 + warning_on_no_data: bool = True def __init__( self, @@ -127,7 +128,10 @@ class PrometheusMetric(BaseMetric): return response["result"] else: # regular query, lighter on preformance - response = self.prometheus.safe_custom_query(query=data.query) + try: + response = self.prometheus.safe_custom_query(query=data.query) + except Exception as e: + raise ValueError(f"Failed to run query: {data.query}") from e results = response["result"] # format the results to return the same format as custom_query_range for result in results: diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py index ca324f6..9f7e30d 100644 --- a/robusta_krr/core/integrations/prometheus/metrics/memory.py +++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py @@ -69,3 +69,41 @@ class MemoryAmountLoader(PrometheusMetric): [{duration}:{step}] ) """ + +# TODO: Need to battle test if this one is correct. +class MaxOOMKilledMemoryLoader(PrometheusMetric): + """ + A metric loader for loading the maximum memory limits that were surpassed by the OOMKilled event. + """ + + warning_on_no_data = False + + def get_query(self, object: K8sObjectData, duration: str, step: str) -> str: + pods_selector = "|".join(pod.name for pod in object.pods) + cluster_label = self.get_prometheus_cluster_label() + return f""" + max_over_time( + max( + max( + kube_pod_container_resource_limits{{ + resource="memory", + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (pod, container, job) + * on(pod, container, job) group_left(reason) + max( + kube_pod_container_status_last_terminated_reason{{ + reason="OOMKilled", + namespace="{object.namespace}", + pod=~"{pods_selector}", + container="{object.container}" + {cluster_label} + }} + ) by (pod, container, job, reason) + ) by (container, pod, job) + [{duration}:{step}] + ) + """ diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index 08fd83f..ee82ca7 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -206,9 +206,10 @@ class PrometheusMetricsService(MetricsService): elif "Memory" in LoaderClass.__name__: object.add_warning("NoPrometheusMemoryMetrics") - logger.warning( - f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}" - ) + if LoaderClass.warning_on_no_data: + logger.warning( + f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}" + ) return data |
