summaryrefslogtreecommitdiff
path: root/robusta_krr
diff options
context:
space:
mode:
authorПавел Жуков <33721692+LeaveMyYard@users.noreply.github.com>2023-04-27 18:13:02 +0300
committerПавел Жуков <33721692+LeaveMyYard@users.noreply.github.com>2023-04-27 18:13:02 +0300
commit3729eec24a8b3cec8fee2c89b5718ccd63fd4c67 (patch)
tree0bd43f4da150be37cb153ce0b3654b10e66def99 /robusta_krr
parentafbb08efb31031ffee80b919256c0a1015365520 (diff)
Add metric filtering to fix bug with double value
Diffstat (limited to 'robusta_krr')
-rw-r--r--robusta_krr/core/integrations/prometheus/loader.py3
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/base_metric.py18
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py2
-rw-r--r--robusta_krr/core/integrations/prometheus/metrics/memory_metric.py51
-rw-r--r--robusta_krr/core/runner.py2
5 files changed, 69 insertions, 7 deletions
diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py
index 8938496..c0c1961 100644
--- a/robusta_krr/core/integrations/prometheus/loader.py
+++ b/robusta_krr/core/integrations/prometheus/loader.py
@@ -111,11 +111,10 @@ class PrometheusLoader(Configurable):
resource: ResourceType,
period: datetime.timedelta,
*,
- timeframe: datetime.timedelta = datetime.timedelta(minutes=30),
+ step: datetime.timedelta = datetime.timedelta(minutes=30),
) -> ResourceHistoryData:
self.debug(f"Gathering data for {object} and {resource}")
- step = f"{int(timeframe.total_seconds()) // 60}m"
MetricLoaderType = BaseMetricLoader.get_by_resource(resource)
metric_loader = MetricLoaderType(self.config, self.prometheus)
return await metric_loader.load_data(object, period, step)
diff --git a/robusta_krr/core/integrations/prometheus/metrics/base_metric.py b/robusta_krr/core/integrations/prometheus/metrics/base_metric.py
index 9d366c8..407d3e3 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/base_metric.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/base_metric.py
@@ -26,11 +26,23 @@ class BaseMetricLoader(Configurable, abc.ABC):
def get_query(self, namespace: str, pod: str, container: str) -> str:
...
- async def load_data(self, object: K8sObjectData, period: datetime.timedelta, step: str) -> ResourceHistoryData:
+ async def query_prometheus(
+ self, query: str, start_time: datetime.datetime, end_time: datetime.datetime, step: datetime.timedelta
+ ) -> list[dict]:
+ return await asyncio.to_thread(
+ self.prometheus.custom_query_range,
+ query=query,
+ start_time=start_time,
+ end_time=end_time,
+ step=f"{int(step.total_seconds()) // 60}m",
+ )
+
+ async def load_data(
+ self, object: K8sObjectData, period: datetime.timedelta, step: datetime.timedelta
+ ) -> ResourceHistoryData:
result = await asyncio.gather(
*[
- asyncio.to_thread(
- self.prometheus.custom_query_range,
+ self.query_prometheus(
query=self.get_query(object.namespace, pod, object.container),
start_time=datetime.datetime.now() - period,
end_time=datetime.datetime.now(),
diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py b/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py
index a8c2b8e..d2ad841 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py
@@ -6,4 +6,4 @@ from .base_metric import BaseMetricLoader, bind_metric
@bind_metric(ResourceType.CPU)
class CPUMetricLoader(BaseMetricLoader):
def get_query(self, namespace: str, pod: str, container: str) -> str:
- return f'sum(irate(container_cpu_usage_seconds_total{{namespace="{namespace}", pod="{pod}", container="{container}"}}[1m]))'
+ return f'sum(irate(container_cpu_usage_seconds_total{{namespace="{namespace}", pod="{pod}", container="{container}"}}[5m])) by (container, pod, job)'
diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py b/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py
index 4e018ae..6d5bc0c 100644
--- a/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py
+++ b/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py
@@ -1,9 +1,60 @@
+import datetime
+from typing import Any, Optional
+
from robusta_krr.core.models.allocations import ResourceType
from .base_metric import BaseMetricLoader, bind_metric
+PrometheusSeries = Any
+
@bind_metric(ResourceType.Memory)
class MemoryMetricLoader(BaseMetricLoader):
+ @staticmethod
+ def get_target_name(series: PrometheusSeries) -> Optional[str]:
+ for label in ["container", "pod", "node"]:
+ if label in series.metric:
+ return series.metric[label]
+ return None
+
+ @staticmethod
+ def filter_prom_jobs_results(
+ series_list_result: list[PrometheusSeries],
+ ) -> list[PrometheusSeries]:
+ """
+ Because there might be multiple metrics with the same name, we need to filter them out.
+
+ :param series_list_result: list of PrometheusSeries
+ """
+
+ if len(series_list_result) == 1:
+ return series_list_result
+
+ target_names = {
+ MemoryMetricLoader.get_target_name(series)
+ for series in series_list_result
+ if MemoryMetricLoader.get_target_name(series)
+ }
+ return_list: list[PrometheusSeries] = []
+
+ # takes kubelet job if exists, return first job alphabetically if it doesn't
+ for target_name in target_names:
+ relevant_series = [
+ series for series in series_list_result if MemoryMetricLoader.get_target_name(series) == target_name
+ ]
+ relevant_kubelet_metric = [series for series in relevant_series if series.metric.get("job") == "kubelet"]
+ if len(relevant_kubelet_metric) == 1:
+ return_list.append(relevant_kubelet_metric[0])
+ continue
+ sorted_relevant_series = sorted(relevant_series, key=lambda series: series.metric.get("job"), reverse=False)
+ return_list.append(sorted_relevant_series[0])
+ return return_list
+
+ async def query_prometheus(
+ self, query: str, start_time: datetime.datetime, end_time: datetime.datetime, step: datetime.timedelta
+ ) -> list[PrometheusSeries]:
+ result = await super().query_prometheus(query, start_time, end_time, step)
+ return self.filter_prom_jobs_results(result)
+
def get_query(self, namespace: str, pod: str, container: str) -> str:
return f'sum(container_memory_working_set_bytes{{image!="", namespace="{namespace}", pod="{pod}", container="{container}"}})'
diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py
index 00dcbb9..f4d4d94 100644
--- a/robusta_krr/core/runner.py
+++ b/robusta_krr/core/runner.py
@@ -94,7 +94,7 @@ class Runner(Configurable):
object,
resource,
self._strategy.settings.history_timedelta,
- timeframe=self._strategy.settings.timeframe_timedelta,
+ step=self._strategy.settings.timeframe_timedelta,
)
for resource in ResourceType
]