From 4ac68a6e2f13e3df4f07aa19aa1f382ace6af7fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Fri, 21 Apr 2023 14:29:18 +0300 Subject: Add info on using port-forwarding for prometheus --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index 3c2c35e..4d5a003 100644 --- a/README.md +++ b/README.md @@ -241,12 +241,33 @@ python krr.py simple --help

(back to top)

+ + +## Example of using port-forward for Prometheus + +If your prometheus is not auto-connecting, you can use `kubectl port-forward` for manually forwarding Prometheus. + +For example, if you have prometheus Pod called `kube-prometheus-st-prometheus-0`, then run this command to port-forward it: +```sh +kubectl port-forward pod/kube-prometheus-st-prometheus-0 9090 +``` + +Then, run another terminal and run robusta in it, adding explicit prometheus url: + +```sh +python krr.py simple -p http://127.0.0.1:9090 +``` + +

(back to top)

+ ## Creating a Custom Strategy/Formatter Look into the `examples` directory for examples on how to create a custom strategy/formatter. +

(back to top)

+ ## Building @@ -304,6 +325,8 @@ pip install -e . poetry run pytest ``` +

(back to top)

+ ## Contributing -- cgit v1.2.3 From 736b4c8b36b02df760bcbcea2916b1d8627ccdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Fri, 21 Apr 2023 16:06:16 +0300 Subject: Improve port-forward readme text --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 4d5a003..ad5e583 100644 --- a/README.md +++ b/README.md @@ -247,12 +247,12 @@ python krr.py simple --help If your prometheus is not auto-connecting, you can use `kubectl port-forward` for manually forwarding Prometheus. -For example, if you have prometheus Pod called `kube-prometheus-st-prometheus-0`, then run this command to port-forward it: +For example, if you have a Prometheus Pod called `kube-prometheus-st-prometheus-0`, then run this command to port-forward it: ```sh kubectl port-forward pod/kube-prometheus-st-prometheus-0 9090 ``` -Then, run another terminal and run robusta in it, adding explicit prometheus url: +Then, open another terminal and run krr in it, giving an explicit prometheus url: ```sh python krr.py simple -p http://127.0.0.1:9090 -- cgit v1.2.3 From fbdc62712187729ed7a95cc5a17f1831c49eb46d Mon Sep 17 00:00:00 2001 From: Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 10:49:24 +0300 Subject: Add the section about prometheus autodiscovery --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index ad5e583..15992aa 100644 --- a/README.md +++ b/README.md @@ -243,6 +243,23 @@ python krr.py simple --help +## Prometheus auto-discovery + +By default, KRR will try to auto-discover the running Prometheus by scanning those labels: +```python +"app=kube-prometheus-stack-prometheus" +"app=prometheus,component=server" +"app=prometheus-server" +"app=prometheus-operator-prometheus" +"app=prometheus-msteams" +"app=rancher-monitoring-prometheus" +"app=prometheus-prometheus" +``` + +If none of those labels result in finding Prometheus, you will get an error and will have to pass the working url explicitly (using the `-p` flag). + +

(back to top)

+ ## Example of using port-forward for Prometheus If your prometheus is not auto-connecting, you can use `kubectl port-forward` for manually forwarding Prometheus. -- cgit v1.2.3 From b54c2ef892a4964a329db3326fe1a80281b73d43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 11:09:01 +0300 Subject: Add --logtostderr flag --- robusta_krr/core/models/config.py | 1 + robusta_krr/core/runner.py | 2 +- robusta_krr/main.py | 2 ++ robusta_krr/utils/configurable.py | 11 ++++++++--- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/robusta_krr/core/models/config.py b/robusta_krr/core/models/config.py index edb1518..fb910a6 100644 --- a/robusta_krr/core/models/config.py +++ b/robusta_krr/core/models/config.py @@ -34,6 +34,7 @@ class Config(pd.BaseSettings): # Logging Settings format: str strategy: str + log_to_stderr: bool other_args: dict[str, Any] diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index 196894d..00dcbb9 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -44,7 +44,7 @@ class Runner(Configurable): def _process_result(self, result: Result) -> None: formatted = result.format(self.config.format) self.echo("\n", no_prefix=True) - self.console.print(formatted) + self.print_result(formatted) def __get_resource_minimal(self, resource: ResourceType) -> Decimal: if resource == ResourceType.CPU: diff --git a/robusta_krr/main.py b/robusta_krr/main.py index a58675f..0412974 100644 --- a/robusta_krr/main.py +++ b/robusta_krr/main.py @@ -79,6 +79,7 @@ def run() -> None: format: str = typer.Option("table", "--formatter", "-f", help="Output formatter ({formatters})", rich_help_panel="Logging Settings"), verbose: bool = typer.Option(False, "--verbose", "-v", help="Enable verbose mode", rich_help_panel="Logging Settings"), quiet: bool = typer.Option(False, "--quiet", "-q", help="Enable quiet mode", rich_help_panel="Logging Settings"), + log_to_stderr: bool = typer.Option(False, "--logtostderr", help="Pass logs to stderr", rich_help_panel="Logging Settings"), {strategy_settings}, ) -> None: '''Run KRR using the `{func_name}` strategy''' @@ -92,6 +93,7 @@ def run() -> None: format=format, verbose=verbose, quiet=quiet, + log_to_stderr=log_to_stderr, strategy="{func_name}", other_args={strategy_args}, ) diff --git a/robusta_krr/utils/configurable.py b/robusta_krr/utils/configurable.py index 3957e6e..ded2900 100644 --- a/robusta_krr/utils/configurable.py +++ b/robusta_krr/utils/configurable.py @@ -6,8 +6,6 @@ from rich.console import Console from robusta_krr.core.models.config import Config -console = Console() - class Configurable(abc.ABC): """ @@ -17,7 +15,7 @@ class Configurable(abc.ABC): def __init__(self, config: Config) -> None: self.config = config - self.console = console + self.console = Console(stderr=self.config.log_to_stderr) @property def debug_active(self) -> bool: @@ -31,6 +29,13 @@ class Configurable(abc.ABC): def __add_prefix(text: str, prefix: str, /, no_prefix: bool) -> str: return f"{prefix} {text}" if not no_prefix else text + def print_result(self, content: str) -> None: + """ + Prints the result in a console. The result is always put in stdout. + """ + result_console = Console() + result_console.print(content) + def echo( self, message: str = "", *, no_prefix: bool = False, type: Literal["INFO", "WARNING", "ERROR"] = "INFO" ) -> None: -- cgit v1.2.3 From dab80f310e424bc96eef200727b4a45648af73e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 11:13:36 +0300 Subject: Add --logtostderr to readme --- README.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ad5e583..430d120 100644 --- a/README.md +++ b/README.md @@ -120,7 +120,7 @@ More features (like seeing graphs, based on which recommendations were made) com | Installation Location 🌍 | ✅ Not required to be installed inside the cluster, can be used on your own device, connected to a cluster | ❌ Must be installed inside the cluster | | Workload Configuration 🔧 | ✅ No need to configure a VPA object for each workload | ❌ Requires VPA object configuration for each workload | | Immediate Results ⚡ | ✅ Gets results immediately (given Prometheus is running) | ❌ Requires time to gather data and provide recommendations | -| Reporting 📊 | ✅ Detailed CLI Report, web UI in [Robusta.dev](https://home.robusta.dev/) | ❌ Not supported | +| Reporting 📊 | ✅ Detailed CLI Report, web UI in [Robusta.dev](https://home.robusta.dev/) | ❌ Not supported | | Extensibility 🔧 | ✅ Add your own strategies with few lines of Python | :warning: Limited extensibility | | Custom Metrics 📏 | 🔄 Support in future versions | ❌ Not supported | | Custom Resources 🎛️ | 🔄 Support in future versions (e.g., GPU) | ❌ Not supported | @@ -165,7 +165,7 @@ sudo apt install robusta-krr `````sh docker pull robusta/krr -```` +```` #### Manual @@ -215,16 +215,16 @@ By default krr will run in the current context. If you want to run it in a diffe python krr.py simple -c my-cluster-1 -c my-cluster-2 ``` -If you want to get the output in JSON format (-q is for quiet mode): +If you want to get the output in JSON format (--logtostderr is required so no logs go to the result file): ```sh -python krr.py simple -q -f json > result.json +python krr.py simple --logtostderr -f json > result.json ``` If you want to get the output in YAML format: ```sh -python krr.py simple -q -f yaml > result.yaml +python krr.py simple --logtostderr -f yaml > result.yaml ``` If you want to see additional debug logs: @@ -248,6 +248,7 @@ python krr.py simple --help If your prometheus is not auto-connecting, you can use `kubectl port-forward` for manually forwarding Prometheus. For example, if you have a Prometheus Pod called `kube-prometheus-st-prometheus-0`, then run this command to port-forward it: + ```sh kubectl port-forward pod/kube-prometheus-st-prometheus-0 9090 ``` -- cgit v1.2.3 From 9b715a6cff7224108d57ea590a4127c5fa52de74 Mon Sep 17 00:00:00 2001 From: Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:12:00 +0300 Subject: Swap places for UI integration and VPA comparison --- README.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 430d120..e3f79fa 100644 --- a/README.md +++ b/README.md @@ -104,14 +104,6 @@ By default, we use a _simple_ strategy to calculate resource recommendations. It - For memory, we take the maximum value over the past week and add a 5% buffer. -### Robusta UI integration - -If you are using [Robusta SaaS](https://platform.robusta.dev/), then KRR is integrated starting from [v0.10.15](https://github.com/robusta-dev/robusta/releases/tag/0.10.15). You can view all your recommendations (previous ones also), filter and sort them by either cluster, namespace or name. - -More features (like seeing graphs, based on which recommendations were made) coming soon. [Tell us what you need the most!](https://github.com/robusta-dev/krr/issues/new) - -![Robusta UI Screen Shot][ui-screenshot] - ### Difference with Kubernetes VPA | Feature 🛠️ | Robusta KRR 🚀 | Kubernetes VPA 🌐 | @@ -127,6 +119,14 @@ More features (like seeing graphs, based on which recommendations were made) com | Explainability 📖 | 🔄 Support in future versions (Robusta will send you additional graphs) | ❌ Not supported | | Autoscaling 🔀 | 🔄 Support in future versions | ✅ Automatic application of recommendations | +### Robusta UI integration + +If you are using [Robusta SaaS](https://platform.robusta.dev/), then KRR is integrated starting from [v0.10.15](https://github.com/robusta-dev/robusta/releases/tag/0.10.15). You can view all your recommendations (previous ones also), filter and sort them by either cluster, namespace or name. + +More features (like seeing graphs, based on which recommendations were made) coming soon. [Tell us what you need the most!](https://github.com/robusta-dev/krr/issues/new) + +![Robusta UI Screen Shot][ui-screenshot] +

(back to top)

-- cgit v1.2.3 From 8fc5ae4f8bc054779abeb4241c2e2a3bdf25b0a6 Mon Sep 17 00:00:00 2001 From: Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:19:42 +0300 Subject: Add prometheus connection section in how it works --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 15992aa..783205a 100644 --- a/README.md +++ b/README.md @@ -104,6 +104,10 @@ By default, we use a _simple_ strategy to calculate resource recommendations. It - For memory, we take the maximum value over the past week and add a 5% buffer. +#### Prometheus connection + +Find about how KRR tries to find the default prometheus to connect here. + ### Robusta UI integration If you are using [Robusta SaaS](https://platform.robusta.dev/), then KRR is integrated starting from [v0.10.15](https://github.com/robusta-dev/robusta/releases/tag/0.10.15). You can view all your recommendations (previous ones also), filter and sort them by either cluster, namespace or name. -- cgit v1.2.3 From cdf6ab4305733ea3072acae438b800873fbc40f0 Mon Sep 17 00:00:00 2001 From: Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> Date: Mon, 24 Apr 2023 12:31:08 +0300 Subject: Fix readme after incorrect merge --- README.md | 8 -------- 1 file changed, 8 deletions(-) diff --git a/README.md b/README.md index 156bff5..7545e94 100644 --- a/README.md +++ b/README.md @@ -108,14 +108,6 @@ By default, we use a _simple_ strategy to calculate resource recommendations. It Find about how KRR tries to find the default prometheus to connect here. -### Robusta UI integration - -If you are using [Robusta SaaS](https://platform.robusta.dev/), then KRR is integrated starting from [v0.10.15](https://github.com/robusta-dev/robusta/releases/tag/0.10.15). You can view all your recommendations (previous ones also), filter and sort them by either cluster, namespace or name. - -More features (like seeing graphs, based on which recommendations were made) coming soon. [Tell us what you need the most!](https://github.com/robusta-dev/krr/issues/new) - -![Robusta UI Screen Shot][ui-screenshot] - ### Difference with Kubernetes VPA | Feature 🛠️ | Robusta KRR 🚀 | Kubernetes VPA 🌐 | -- cgit v1.2.3 From 47ece52acd2d9d810cb72101139759427eb379d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Fri, 21 Apr 2023 12:43:45 +0300 Subject: Rework CPU gathering metric --- robusta_krr/core/integrations/prometheus.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/robusta_krr/core/integrations/prometheus.py b/robusta_krr/core/integrations/prometheus.py index 39c7f00..ced865f 100644 --- a/robusta_krr/core/integrations/prometheus.py +++ b/robusta_krr/core/integrations/prometheus.py @@ -115,15 +115,16 @@ class PrometheusLoader(Configurable): ) -> ResourceHistoryData: self.debug(f"Gathering data for {object} and {resource}") + step = f"{int(timeframe.total_seconds()) // 60}m" if resource == ResourceType.CPU: result = await asyncio.gather( *[ asyncio.to_thread( self.prometheus.custom_query_range, - query=f'sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{{namespace="{object.namespace}", pod="{pod}", container="{object.container}"}})', + query=f'sum(irate(container_cpu_usage_seconds_total{{namespace="{object.namespace}", pod="{pod}", container="{object.container}"}}[{step}]))', start_time=datetime.datetime.now() - period, end_time=datetime.datetime.now(), - step=f"{int(timeframe.total_seconds()) // 60}m", + step=step, ) for pod in object.pods ] @@ -136,7 +137,7 @@ class PrometheusLoader(Configurable): query=f'sum(container_memory_working_set_bytes{{job="kubelet", metrics_path="/metrics/cadvisor", image!="", namespace="{object.namespace}", pod="{pod}", container="{object.container}"}})', start_time=datetime.datetime.now() - period, end_time=datetime.datetime.now(), - step=f"{int(timeframe.total_seconds()) // 60}m", + step=step, ) for pod in object.pods ] -- cgit v1.2.3 From c3d465223ef60baa75d902fa891f848058fc64aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Fri, 21 Apr 2023 14:49:57 +0300 Subject: Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7545e94..701ccd5 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,7 @@ Robusta KRR uses the following Prometheus queries to gather usage data: - CPU Usage: ``` - sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace="{object.namespace}", pod="{pod}", container="{object.container}"}) + sum(irate(container_cpu_usage_seconds_total{{namespace="{object.namespace}", pod="{pod}", container="{object.container}"}}[{step}])) ``` - Memory Usage: -- cgit v1.2.3 From 32fd0b042e6b07e0a66241588e81d791afa08786 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=B0=D0=B2=D0=B5=D0=BB=20=D0=96=D1=83=D0=BA=D0=BE?= =?UTF-8?q?=D0=B2?= <33721692+LeaveMyYard@users.noreply.github.com> Date: Wed, 26 Apr 2023 10:36:46 +0300 Subject: Refactor the way metrics are collected, fix memory metric query --- robusta_krr/core/integrations/prometheus.py | 156 --------------------- .../core/integrations/prometheus/__init__.py | 1 + robusta_krr/core/integrations/prometheus/loader.py | 121 ++++++++++++++++ .../integrations/prometheus/metrics/__init__.py | 3 + .../integrations/prometheus/metrics/base_metric.py | 66 +++++++++ .../integrations/prometheus/metrics/cpu_metric.py | 9 ++ .../prometheus/metrics/memory_metric.py | 9 ++ robusta_krr/core/runner.py | 2 +- 8 files changed, 210 insertions(+), 157 deletions(-) delete mode 100644 robusta_krr/core/integrations/prometheus.py create mode 100644 robusta_krr/core/integrations/prometheus/__init__.py create mode 100644 robusta_krr/core/integrations/prometheus/loader.py create mode 100644 robusta_krr/core/integrations/prometheus/metrics/__init__.py create mode 100644 robusta_krr/core/integrations/prometheus/metrics/base_metric.py create mode 100644 robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py create mode 100644 robusta_krr/core/integrations/prometheus/metrics/memory_metric.py diff --git a/robusta_krr/core/integrations/prometheus.py b/robusta_krr/core/integrations/prometheus.py deleted file mode 100644 index ced865f..0000000 --- a/robusta_krr/core/integrations/prometheus.py +++ /dev/null @@ -1,156 +0,0 @@ -import asyncio -import datetime -from decimal import Decimal -from typing import Optional, no_type_check - -import requests -from kubernetes import config as k8s_config -from kubernetes.client import ApiClient -from prometheus_api_client import PrometheusConnect, Retry -from requests.adapters import HTTPAdapter -from requests.exceptions import ConnectionError, HTTPError - -from robusta_krr.core.abstract.strategies import ResourceHistoryData -from robusta_krr.core.models.config import Config -from robusta_krr.core.models.objects import K8sObjectData -from robusta_krr.core.models.result import ResourceType -from robusta_krr.utils.configurable import Configurable -from robusta_krr.utils.service_discovery import ServiceDiscovery - - -class PrometheusDiscovery(ServiceDiscovery): - def find_prometheus_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]: - return super().find_url( - selectors=[ - "app=kube-prometheus-stack-prometheus", - "app=prometheus,component=server", - "app=prometheus-server", - "app=prometheus-operator-prometheus", - "app=prometheus-msteams", - "app=rancher-monitoring-prometheus", - "app=prometheus-prometheus", - ], - api_client=api_client, - ) - - -class PrometheusNotFound(Exception): - pass - - -class CustomPrometheusConnect(PrometheusConnect): - @no_type_check - def __init__( - self, - url: str = "http://127.0.0.1:9090", - headers: dict = None, - disable_ssl: bool = False, - retry: Retry = None, - auth: tuple = None, - ): - super().__init__(url, headers, disable_ssl, retry, auth) - self._session = requests.Session() - self._session.mount(self.url, HTTPAdapter(max_retries=retry, pool_maxsize=10, pool_block=True)) - - -class PrometheusLoader(Configurable): - def __init__( - self, - config: Config, - *, - cluster: Optional[str] = None, - ) -> None: - super().__init__(config=config) - - self.debug(f"Initializing PrometheusLoader for {cluster or 'default'} cluster") - - self.auth_header = self.config.prometheus_auth_header - self.ssl_enabled = self.config.prometheus_ssl_enabled - - self.api_client = k8s_config.new_client_from_config(context=cluster) if cluster is not None else None - self.prometheus_discovery = PrometheusDiscovery(config=self.config) - - self.url = self.config.prometheus_url - self.url = self.url or self.prometheus_discovery.find_prometheus_url(api_client=self.api_client) - - if not self.url: - raise PrometheusNotFound( - f"Prometheus url could not be found while scanning in {cluster or 'default'} cluster" - ) - - headers = {} - - if self.auth_header: - headers = {"Authorization": self.auth_header} - elif not self.config.inside_cluster: - self.api_client.update_params_for_auth(headers, {}, ["BearerToken"]) - - self.prometheus = CustomPrometheusConnect(url=self.url, disable_ssl=not self.ssl_enabled, headers=headers) - self._check_prometheus_connection() - - self.debug(f"PrometheusLoader initialized for {cluster or 'default'} cluster") - - def _check_prometheus_connection(self): - try: - response = self.prometheus._session.get( - f"{self.prometheus.url}/api/v1/query", - verify=self.prometheus.ssl_verification, - headers=self.prometheus.headers, - # This query should return empty results, but is correct - params={"query": "example"}, - ) - response.raise_for_status() - except (ConnectionError, HTTPError) as e: - raise PrometheusNotFound( - f"Couldn't connect to Prometheus found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})" - ) from e - - async def gather_data( - self, - object: K8sObjectData, - resource: ResourceType, - period: datetime.timedelta, - *, - timeframe: datetime.timedelta = datetime.timedelta(minutes=30), - ) -> ResourceHistoryData: - self.debug(f"Gathering data for {object} and {resource}") - - step = f"{int(timeframe.total_seconds()) // 60}m" - if resource == ResourceType.CPU: - result = await asyncio.gather( - *[ - asyncio.to_thread( - self.prometheus.custom_query_range, - query=f'sum(irate(container_cpu_usage_seconds_total{{namespace="{object.namespace}", pod="{pod}", container="{object.container}"}}[{step}]))', - start_time=datetime.datetime.now() - period, - end_time=datetime.datetime.now(), - step=step, - ) - for pod in object.pods - ] - ) - elif resource == ResourceType.Memory: - result = await asyncio.gather( - *[ - asyncio.to_thread( - self.prometheus.custom_query_range, - query=f'sum(container_memory_working_set_bytes{{job="kubelet", metrics_path="/metrics/cadvisor", image!="", namespace="{object.namespace}", pod="{pod}", container="{object.container}"}})', - start_time=datetime.datetime.now() - period, - end_time=datetime.datetime.now(), - step=step, - ) - for pod in object.pods - ] - ) - else: - raise ValueError(f"Unknown resource type: {resource}") - - if result == []: - return {pod: [] for pod in object.pods} - - pod_results = {pod: result[i] for i, pod in enumerate(object.pods)} - return { - pod: [Decimal(value) for _, value in pod_result[0]["values"]] - for pod, pod_result in pod_results.items() - if pod_result != [] - } diff --git a/robusta_krr/core/integrations/prometheus/__init__.py b/robusta_krr/core/integrations/prometheus/__init__.py new file mode 100644 index 0000000..e43e8aa --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/__init__.py @@ -0,0 +1 @@ +from .loader import CustomPrometheusConnect, PrometheusDiscovery, PrometheusLoader, PrometheusNotFound diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py new file mode 100644 index 0000000..4a79005 --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -0,0 +1,121 @@ +import datetime +from typing import Optional, no_type_check + +import requests +from kubernetes import config as k8s_config +from kubernetes.client import ApiClient +from prometheus_api_client import PrometheusConnect, Retry +from requests.adapters import HTTPAdapter +from requests.exceptions import ConnectionError, HTTPError + +from robusta_krr.core.abstract.strategies import ResourceHistoryData +from robusta_krr.core.models.config import Config +from robusta_krr.core.models.objects import K8sObjectData +from robusta_krr.core.models.result import ResourceType +from robusta_krr.utils.configurable import Configurable +from robusta_krr.utils.service_discovery import ServiceDiscovery + +from .metrics import BaseMetricLoader + + +class PrometheusDiscovery(ServiceDiscovery): + def find_prometheus_url(self, *, api_client: Optional[ApiClient] = None) -> Optional[str]: + return super().find_url( + selectors=[ + "app=kube-prometheus-stack-prometheus", + "app=prometheus,component=server", + "app=prometheus-server", + "app=prometheus-operator-prometheus", + "app=prometheus-msteams", + "app=rancher-monitoring-prometheus", + "app=prometheus-prometheus", + ], + api_client=api_client, + ) + + +class PrometheusNotFound(Exception): + pass + + +class CustomPrometheusConnect(PrometheusConnect): + @no_type_check + def __init__( + self, + url: str = "http://127.0.0.1:9090", + headers: dict = None, + disable_ssl: bool = False, + retry: Retry = None, + auth: tuple = None, + ): + super().__init__(url, headers, disable_ssl, retry, auth) + self._session = requests.Session() + self._session.mount(self.url, HTTPAdapter(max_retries=retry, pool_maxsize=10, pool_block=True)) + + +class PrometheusLoader(Configurable): + def __init__( + self, + config: Config, + *, + cluster: Optional[str] = None, + ) -> None: + super().__init__(config=config) + + self.debug(f"Initializing PrometheusLoader for {cluster or 'default'} cluster") + + self.auth_header = self.config.prometheus_auth_header + self.ssl_enabled = self.config.prometheus_ssl_enabled + + self.api_client = k8s_config.new_client_from_config(context=cluster) if cluster is not None else None + self.prometheus_discovery = PrometheusDiscovery(config=self.config) + + self.url = self.config.prometheus_url + self.url = self.url or self.prometheus_discovery.find_prometheus_url(api_client=self.api_client) + + if not self.url: + raise PrometheusNotFound( + f"Prometheus url could not be found while scanning in {cluster or 'default'} cluster" + ) + + headers = {} + + if self.auth_header: + headers = {"Authorization": self.auth_header} + elif not self.config.inside_cluster: + self.api_client.update_params_for_auth(headers, {}, ["BearerToken"]) + + self.prometheus = CustomPrometheusConnect(url=self.url, disable_ssl=not self.ssl_enabled, headers=headers) + self._check_prometheus_connection() + + self.debug(f"PrometheusLoader initialized for {cluster or 'default'} cluster") + + def _check_prometheus_connection(self): + try: + response = self.prometheus._session.get( + f"{self.prometheus.url}/api/v1/query", + verify=self.prometheus.ssl_verification, + headers=self.prometheus.headers, + # This query should return empty results, but is correct + params={"query": "example"}, + ) + response.raise_for_status() + except (ConnectionError, HTTPError) as e: + raise PrometheusNotFound( + f"Couldn't connect to Prometheus found under {self.prometheus.url}\nCaused by {e.__class__.__name__}: {e})" + ) from e + + async def gather_data( + self, + object: K8sObjectData, + resource: ResourceType, + period: datetime.timedelta, + *, + timeframe: datetime.timedelta = datetime.timedelta(minutes=30), + ) -> ResourceHistoryData: + self.debug(f"Gathering data for {object} and {resource}") + + step = f"{int(timeframe.total_seconds()) // 60}m" + MetricLoaderType = BaseMetricLoader.get_by_resource(resource) + metric_loader = MetricLoaderType(self.prometheus) + return await metric_loader.load_data(object, period, step) diff --git a/robusta_krr/core/integrations/prometheus/metrics/__init__.py b/robusta_krr/core/integrations/prometheus/metrics/__init__.py new file mode 100644 index 0000000..0852b67 --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/metrics/__init__.py @@ -0,0 +1,3 @@ +from .base_metric import BaseMetricLoader, bind_metric +from .cpu_metric import CPUMetricLoader +from .memory_metric import MemoryMetricLoader diff --git a/robusta_krr/core/integrations/prometheus/metrics/base_metric.py b/robusta_krr/core/integrations/prometheus/metrics/base_metric.py new file mode 100644 index 0000000..5b91db2 --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/metrics/base_metric.py @@ -0,0 +1,66 @@ +from __future__ import annotations + +import abc +import asyncio +import datetime +from decimal import Decimal +from typing import TYPE_CHECKING, Callable, TypeVar + +if TYPE_CHECKING: + from robusta_krr.core.abstract.strategies import ResourceHistoryData + from robusta_krr.core.models.objects import K8sObjectData + + from ..loader import CustomPrometheusConnect + +REGISTERED_METRICS: dict[str, type[BaseMetricLoader]] = {} + + +class BaseMetricLoader(abc.ABC): + def __init__(self, prometheus: CustomPrometheusConnect) -> None: + self.prometheus = prometheus + + @abc.abstractmethod + def get_query(self, namespace: str, pod: str, container: str) -> str: + ... + + async def load_data(self, object: K8sObjectData, period: datetime.timedelta, step: str) -> ResourceHistoryData: + result = await asyncio.gather( + *[ + asyncio.to_thread( + self.prometheus.custom_query_range, + query=self.get_query(object.namespace, pod, object.container), + start_time=datetime.datetime.now() - period, + end_time=datetime.datetime.now(), + step=step, + ) + for pod in object.pods + ] + ) + + if result == []: + return {pod: [] for pod in object.pods} + + pod_results = {pod: result[i] for i, pod in enumerate(object.pods)} + return { + pod: [Decimal(value) for _, value in pod_result[0]["values"]] + for pod, pod_result in pod_results.items() + if pod_result != [] + } + + @staticmethod + def get_by_resource(resource: str) -> type[BaseMetricLoader]: + try: + return REGISTERED_METRICS[resource] + except KeyError as e: + raise KeyError(f"Resource {resource} was not registered by `@bind_metric(...)`") from e + + +Self = TypeVar("Self", bound=BaseMetricLoader) + + +def bind_metric(resource: str) -> Callable[[type[Self]], type[Self]]: + def decorator(cls: type[Self]) -> type[Self]: + REGISTERED_METRICS[resource] = cls + return cls + + return decorator diff --git a/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py b/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py new file mode 100644 index 0000000..a8c2b8e --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/metrics/cpu_metric.py @@ -0,0 +1,9 @@ +from robusta_krr.core.models.allocations import ResourceType + +from .base_metric import BaseMetricLoader, bind_metric + + +@bind_metric(ResourceType.CPU) +class CPUMetricLoader(BaseMetricLoader): + def get_query(self, namespace: str, pod: str, container: str) -> str: + return f'sum(irate(container_cpu_usage_seconds_total{{namespace="{namespace}", pod="{pod}", container="{container}"}}[1m]))' diff --git a/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py b/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py new file mode 100644 index 0000000..4e018ae --- /dev/null +++ b/robusta_krr/core/integrations/prometheus/metrics/memory_metric.py @@ -0,0 +1,9 @@ +from robusta_krr.core.models.allocations import ResourceType + +from .base_metric import BaseMetricLoader, bind_metric + + +@bind_metric(ResourceType.Memory) +class MemoryMetricLoader(BaseMetricLoader): + def get_query(self, namespace: str, pod: str, container: str) -> str: + return f'sum(container_memory_working_set_bytes{{image!="", namespace="{namespace}", pod="{pod}", container="{container}"}})' diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index 00dcbb9..1ef0e14 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -5,7 +5,7 @@ from typing import Optional, Union from robusta_krr.core.abstract.strategies import ResourceRecommendation, RunResult from robusta_krr.core.integrations.kubernetes import KubernetesLoader -from robusta_krr.core.integrations.prometheus import PrometheusLoader +from robusta_krr.core.integrations.prometheus.loader import PrometheusLoader from robusta_krr.core.models.config import Config from robusta_krr.core.models.objects import K8sObjectData from robusta_krr.core.models.result import ResourceAllocations, ResourceScan, ResourceType, Result -- cgit v1.2.3