summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeaveMyYard <zhukovpavel2001@gmail.com>2024-05-02 14:34:24 +0300
committerLeaveMyYard <zhukovpavel2001@gmail.com>2024-05-02 14:34:24 +0300
commite350084fdf3404559f23837c649d74e1152fc681 (patch)
tree1b27e2f47206632bb4d14f939dedfa8ca25a154a
parenteb84c954dc831449f09c9e42d233f8ff8eba855c (diff)
Add HPA detection for prometheus mode
-rw-r--r--robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py18
-rw-r--r--robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py77
-rw-r--r--robusta_krr/core/models/objects.py14
3 files changed, 87 insertions, 22 deletions
diff --git a/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py b/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py
index 54047a3..0174adc 100644
--- a/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py
+++ b/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py
@@ -15,7 +15,7 @@ from robusta_krr.core.integrations.prometheus.connector import PrometheusConnect
from robusta_krr.core.integrations.prometheus.metrics_service.prometheus_metrics_service import PrometheusMetricsService
from robusta_krr.core.models.config import settings
from robusta_krr.core.models.exceptions import CriticalRunnerException
-from robusta_krr.core.models.objects import HPAData, K8sWorkload, KindLiteral, PodData
+from robusta_krr.core.models.objects import HPAData, HPAKey, K8sWorkload, KindLiteral, PodData
from robusta_krr.core.models.result import ResourceAllocations
@@ -34,14 +34,12 @@ from .loaders import (
logger = logging.getLogger("krr")
-HPAKey = tuple[str, str, str]
-
class KubeAPIClusterLoader(BaseClusterLoader):
# NOTE: For KubeAPIClusterLoader we have to first connect to read kubeconfig
# We do not need to connect to Prometheus from here, as we query all data from Kubernetes API
# Also here we might have different Prometeus instances for different clusters
-
+
def __init__(self) -> None:
try:
settings.load_kubeconfig()
@@ -89,7 +87,7 @@ class KubeAPIClusterLoader(BaseClusterLoader):
@cache
def get_workload_loader(self, cluster: Optional[str]) -> KubeAPIWorkloadLoader:
return KubeAPIWorkloadLoader(cluster)
-
+
@cache
def get_prometheus(self, cluster: Optional[str]) -> PrometheusConnector:
connector = PrometheusConnector(cluster=cluster)
@@ -173,7 +171,7 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback):
kind=kind,
container=container.name,
allocations=ResourceAllocations.from_container(container),
- hpa=self._hpa_list.get((namespace, kind, name)),
+ hpa=self._hpa_list.get(HPAKey(namespace, kind, name)),
)
obj._api_resource = item
return obj
@@ -260,15 +258,13 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback):
)
return {
- (
+ HPAKey(
hpa.metadata.namespace,
hpa.spec.scale_target_ref.kind,
hpa.spec.scale_target_ref.name,
): HPAData(
min_replicas=hpa.spec.min_replicas,
max_replicas=hpa.spec.max_replicas,
- current_replicas=hpa.status.current_replicas,
- desired_replicas=hpa.status.desired_replicas,
target_cpu_utilization_percentage=hpa.spec.target_cpu_utilization_percentage,
target_memory_utilization_percentage=None,
)
@@ -301,15 +297,13 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback):
)
return {
- (
+ HPAKey(
hpa.metadata.namespace,
hpa.spec.scale_target_ref.kind,
hpa.spec.scale_target_ref.name,
): HPAData(
min_replicas=hpa.spec.min_replicas,
max_replicas=hpa.spec.max_replicas,
- current_replicas=hpa.status.current_replicas,
- desired_replicas=hpa.status.desired_replicas,
target_cpu_utilization_percentage=__get_metric(hpa, "cpu"),
target_memory_utilization_percentage=__get_metric(hpa, "memory"),
)
diff --git a/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py b/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py
index 1fbe470..5ac42eb 100644
--- a/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py
+++ b/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py
@@ -4,14 +4,15 @@ import asyncio
import itertools
import logging
-from collections import Counter
+from collections import Counter, defaultdict
from typing import Optional
from functools import cache
from robusta_krr.core.integrations.prometheus.connector import PrometheusConnector
+from robusta_krr.core.integrations.prometheus.metrics.base import PrometheusMetric
from robusta_krr.core.models.config import settings
-from robusta_krr.core.models.objects import K8sWorkload
+from robusta_krr.core.models.objects import HPAData, HPAKey, K8sWorkload
from robusta_krr.core.abstract.workload_loader import BaseWorkloadLoader
from robusta_krr.core.abstract.cluster_loader import BaseClusterLoader
from robusta_krr.core.models.exceptions import CriticalRunnerException
@@ -37,9 +38,7 @@ class PrometheusClusterLoader(BaseClusterLoader):
async def list_clusters(self) -> Optional[list[str]]:
if settings.prometheus_label is None:
- logger.info(
- "Assuming that Prometheus contains only one cluster."
- )
+ logger.info("Assuming that Prometheus contains only one cluster.")
logger.info("If you have multiple clusters in Prometheus, please provide the `-l` flag.")
return None
@@ -67,10 +66,12 @@ class PrometheusClusterLoader(BaseClusterLoader):
class PrometheusWorkloadLoader(BaseWorkloadLoader):
workloads: list[type[BaseKindLoader]] = [DoubleParentLoader, SimpleParentLoader]
- def __init__(self, cluster: str, prometheus_connector: PrometheusConnector) -> None:
+ def __init__(self, cluster: str, prometheus: PrometheusConnector) -> None:
self.cluster = cluster
- self.metric_service = prometheus_connector
- self.loaders = [loader(prometheus_connector) for loader in self.workloads]
+ self.prometheus = prometheus
+ self.loaders = [loader(prometheus) for loader in self.workloads]
+
+ self.cluster_selector = PrometheusMetric.get_prometheus_cluster_label()
async def list_workloads(self) -> list[K8sWorkload]:
workloads = list(
@@ -79,11 +80,71 @@ class PrometheusWorkloadLoader(BaseWorkloadLoader):
)
)
+ hpas = await self.__list_hpa()
+
+ for workload in workloads:
+ workload.hpa = hpas.get(
+ HPAKey(
+ namespace=workload.namespace,
+ kind=workload.kind,
+ name=workload.name,
+ )
+ )
+
kind_counts = Counter([workload.kind for workload in workloads])
for kind, count in kind_counts.items():
logger.info(f"Found {count} {kind} in {self.cluster}")
return workloads
+ async def __list_hpa(self) -> dict[HPAKey, HPAData]:
+ hpa_metrics, max_replicas, min_replicas, target_metrics = await asyncio.gather(
+ self.prometheus.loader.query("kube_horizontalpodautoscaler_info"),
+ self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_max_replicas"),
+ self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_min_replicas"),
+ self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_target_metric"),
+ )
+
+ max_replicas_dict = {
+ (metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"]): metric["value"][1]
+ for metric in max_replicas
+ }
+ min_replicas_dict = {
+ (metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"]): metric["value"][1]
+ for metric in min_replicas
+ }
+ target_metric_dict = defaultdict(dict)
+ for metric in target_metrics:
+ target_metric_dict[(metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"])] |= {
+ metric["metric"]["metric_name"]: metric["value"][1]
+ }
+
+ hpas = {}
+ if not hpa_metrics:
+ return {}
+
+ for hpa in hpa_metrics:
+ metric = hpa["metric"]
+ hpa_name = metric["horizontalpodautoscaler"]
+ key = HPAKey(
+ namespace=metric["namespace"],
+ kind=metric["scaletargetref_kind"],
+ name=metric["scaletargetref_name"],
+ )
+
+ max_replicas_value = max_replicas_dict[metric["namespace"], hpa_name]
+ min_replicas_value = min_replicas_dict[metric["namespace"], hpa_name]
+ cpu_utilization = target_metric_dict[metric["namespace"], hpa_name].get("cpu")
+ memory_utilization = target_metric_dict[metric["namespace"], hpa_name].get("memory")
+
+ hpas[key] = HPAData(
+ min_replicas=max_replicas_value,
+ max_replicas=min_replicas_value,
+ target_cpu_utilization_percentage=cpu_utilization,
+ target_memory_utilization_percentage=memory_utilization,
+ )
+
+ return hpas
+
__all__ = ["PrometheusClusterLoader", "PrometheusWorkloadLoader"]
diff --git a/robusta_krr/core/models/objects.py b/robusta_krr/core/models/objects.py
index 27f1c29..c5d375e 100644
--- a/robusta_krr/core/models/objects.py
+++ b/robusta_krr/core/models/objects.py
@@ -19,11 +19,21 @@ class PodData(pd.BaseModel):
return hash(self.name)
+class HPAKey(pd.BaseModel):
+ namespace: str
+ kind: str
+ name: str
+
+ class Config:
+ allow_mutation = False
+
+ def __hash__(self) -> int:
+ return hash((self.namespace, self.kind, self.name))
+
+
class HPAData(pd.BaseModel):
min_replicas: Optional[int]
max_replicas: int
- current_replicas: Optional[int]
- desired_replicas: int
target_cpu_utilization_percentage: Optional[float]
target_memory_utilization_percentage: Optional[float]