summaryrefslogtreecommitdiff
path: root/robusta_krr/core
diff options
context:
space:
mode:
authorPavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com>2024-04-30 19:06:27 +0300
committerGitHub <noreply@github.com>2024-04-30 19:06:27 +0300
commit09c372bfa7039c22eab9e411040585ec675caf6d (patch)
treec6428b6c1ed3b6b2139ede20ff704b42a8b23fb3 /robusta_krr/core
parentf7d841278afe46fe3b21da64ca57691b44670ab4 (diff)
parentdd13deeac4ca62f8187d77d944305675abc9dba8 (diff)
Merge branch 'main' into prometheus-workload-loader
Diffstat (limited to 'robusta_krr/core')
-rw-r--r--robusta_krr/core/abstract/strategies.py2
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/__init__.py2
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/base.py6
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/memory.py38
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py7
5 files changed, 49 insertions, 6 deletions
diff --git a/robusta_krr/core/abstract/strategies.py b/robusta_krr/core/abstract/strategies.py
index ab5ab6c..388595f 100644
--- a/robusta_krr/core/abstract/strategies.py
+++ b/robusta_krr/core/abstract/strategies.py
@@ -28,7 +28,7 @@ class ResourceRecommendation(pd.BaseModel):
request: Optional[float]
limit: Optional[float]
info: Optional[str] = pd.Field(
- None, description="Additional information about the recommendation. Currently used to explain undefined."
+ None, description="Additional information about the recommendation."
)
@classmethod
diff --git a/robusta_krr/core/integrations/prometheus/metrics/__init__.py b/robusta_krr/core/integrations/prometheus/metrics/__init__.py
index e9a0568..aea497f 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/__init__.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/__init__.py
@@ -1,3 +1,3 @@
from .base import PrometheusMetric
from .cpu import CPUAmountLoader, CPULoader, PercentileCPULoader
-from .memory import MaxMemoryLoader, MemoryAmountLoader, MemoryLoader
+from .memory import MaxMemoryLoader, MemoryAmountLoader, MemoryLoader, MaxOOMKilledMemoryLoader
diff --git a/robusta_krr/core/integrations/prometheus/metrics/base.py b/robusta_krr/core/integrations/prometheus/metrics/base.py
index 34957ef..807a11f 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/base.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/base.py
@@ -59,6 +59,7 @@ class PrometheusMetric(BaseMetric):
query_type: QueryType = QueryType.Query
filtering: bool = True
pods_batch_size: Optional[int] = 50
+ warning_on_no_data: bool = True
def __init__(
self,
@@ -127,7 +128,10 @@ class PrometheusMetric(BaseMetric):
return response["result"]
else:
# regular query, lighter on preformance
- response = self.prometheus.safe_custom_query(query=data.query)
+ try:
+ response = self.prometheus.safe_custom_query(query=data.query)
+ except Exception as e:
+ raise ValueError(f"Failed to run query: {data.query}") from e
results = response["result"]
# format the results to return the same format as custom_query_range
for result in results:
diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory.py b/robusta_krr/core/integrations/prometheus/metrics/memory.py
index ca324f6..9f7e30d 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/memory.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/memory.py
@@ -69,3 +69,41 @@ class MemoryAmountLoader(PrometheusMetric):
[{duration}:{step}]
)
"""
+
+# TODO: Need to battle test if this one is correct.
+class MaxOOMKilledMemoryLoader(PrometheusMetric):
+ """
+ A metric loader for loading the maximum memory limits that were surpassed by the OOMKilled event.
+ """
+
+ warning_on_no_data = False
+
+ def get_query(self, object: K8sObjectData, duration: str, step: str) -> str:
+ pods_selector = "|".join(pod.name for pod in object.pods)
+ cluster_label = self.get_prometheus_cluster_label()
+ return f"""
+ max_over_time(
+ max(
+ max(
+ kube_pod_container_resource_limits{{
+ resource="memory",
+ namespace="{object.namespace}",
+ pod=~"{pods_selector}",
+ container="{object.container}"
+ {cluster_label}
+ }}
+ ) by (pod, container, job)
+ * on(pod, container, job) group_left(reason)
+ max(
+ kube_pod_container_status_last_terminated_reason{{
+ reason="OOMKilled",
+ namespace="{object.namespace}",
+ pod=~"{pods_selector}",
+ container="{object.container}"
+ {cluster_label}
+ }}
+ ) by (pod, container, job, reason)
+ ) by (container, pod, job)
+ [{duration}:{step}]
+ )
+ """
diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py
index 08fd83f..ee82ca7 100644
--- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py
+++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py
@@ -206,9 +206,10 @@ class PrometheusMetricsService(MetricsService):
elif "Memory" in LoaderClass.__name__:
object.add_warning("NoPrometheusMemoryMetrics")
- logger.warning(
- f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}"
- )
+ if LoaderClass.warning_on_no_data:
+ logger.warning(
+ f"{metric_loader.service_name} returned no {metric_loader.__class__.__name__} metrics for {object}"
+ )
return data