From ec49ff39efe46a69c603a73a4ce90c4e78e7f1b3 Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Tue, 5 May 2026 21:28:44 +0200 Subject: [PATCH 1/2] feat(helm): add metrics endpoint and optional ServiceMonitor support Adds controller.metrics values to enable the Prometheus metrics endpoint and optionally deploy a ServiceMonitor for Prometheus Operator. When controller.metrics.enabled is true the chart sets METRICS_BIND_ADDRESS and METRICS_SECURE env vars, exposes the metrics container port, and adds a named port to the controller Service. Setting controller.metrics.serviceMonitor.enabled to true additionally creates a ServiceMonitor resource. Closes #1369 Signed-off-by: mesutoezdil --- .../templates/controller-configmap.yaml | 4 +++ .../templates/controller-deployment.yaml | 5 ++++ helm/kagent/templates/controller-service.yaml | 6 +++++ .../templates/controller-servicemonitor.yaml | 20 ++++++++++++++ .../tests/controller-deployment_test.yaml | 23 ++++++++++++++++ .../kagent/tests/controller-service_test.yaml | 17 +++++++++++- .../tests/controller-servicemonitor_test.yaml | 26 +++++++++++++++++++ helm/kagent/values.yaml | 9 +++++++ 8 files changed, 109 insertions(+), 1 deletion(-) create mode 100644 helm/kagent/templates/controller-servicemonitor.yaml create mode 100644 helm/kagent/tests/controller-servicemonitor_test.yaml diff --git a/helm/kagent/templates/controller-configmap.yaml b/helm/kagent/templates/controller-configmap.yaml index aedd314a4..e75be9366 100644 --- a/helm/kagent/templates/controller-configmap.yaml +++ b/helm/kagent/templates/controller-configmap.yaml @@ -56,6 +56,10 @@ data: STREAMING_MAX_BUF_SIZE: {{ .Values.controller.streaming.maxBufSize | quote }} STREAMING_TIMEOUT: {{ .Values.controller.streaming.timeout | quote }} WATCH_NAMESPACES: {{ include "kagent.watchNamespaces" . | quote }} + {{- if .Values.controller.metrics.enabled }} + METRICS_BIND_ADDRESS: ":{{ .Values.controller.metrics.port }}" + METRICS_SECURE: {{ .Values.controller.metrics.secure | quote }} + {{- end }} ZAP_LOG_LEVEL: {{ .Values.controller.loglevel | quote }} {{- $agentHost := "" }} {{- if and .Values.controller.agentDeployment .Values.controller.agentDeployment.host (not (eq .Values.controller.agentDeployment.host "")) }} diff --git a/helm/kagent/templates/controller-deployment.yaml b/helm/kagent/templates/controller-deployment.yaml index ee7119b8e..1cec7e294 100644 --- a/helm/kagent/templates/controller-deployment.yaml +++ b/helm/kagent/templates/controller-deployment.yaml @@ -97,6 +97,11 @@ spec: - name: http containerPort: {{ .Values.controller.service.ports.targetPort }} protocol: TCP + {{- if .Values.controller.metrics.enabled }} + - name: metrics + containerPort: {{ .Values.controller.metrics.port }} + protocol: TCP + {{- end }} resources: {{- toYaml .Values.controller.resources | nindent 12 }} {{- with (.Values.controller.securityContext | default .Values.securityContext) }} diff --git a/helm/kagent/templates/controller-service.yaml b/helm/kagent/templates/controller-service.yaml index 54933c355..37c6a0319 100644 --- a/helm/kagent/templates/controller-service.yaml +++ b/helm/kagent/templates/controller-service.yaml @@ -12,5 +12,11 @@ spec: targetPort: {{ .Values.controller.service.ports.targetPort }} protocol: TCP name: controller + {{- if .Values.controller.metrics.enabled }} + - port: {{ .Values.controller.metrics.port }} + targetPort: {{ .Values.controller.metrics.port }} + protocol: TCP + name: metrics + {{- end }} selector: {{- include "kagent.controller.selectorLabels" . | nindent 4 }} diff --git a/helm/kagent/templates/controller-servicemonitor.yaml b/helm/kagent/templates/controller-servicemonitor.yaml new file mode 100644 index 000000000..c1edeb1d5 --- /dev/null +++ b/helm/kagent/templates/controller-servicemonitor.yaml @@ -0,0 +1,20 @@ +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "kagent.fullname" . }}-controller + namespace: {{ include "kagent.namespace" . }} + labels: + {{- include "kagent.labels" . | nindent 4 }} + {{- with .Values.controller.metrics.serviceMonitor.labels }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + selector: + matchLabels: + {{- include "kagent.controller.selectorLabels" . | nindent 6 }} + endpoints: + - port: metrics + interval: {{ .Values.controller.metrics.serviceMonitor.interval }} + scrapeTimeout: {{ .Values.controller.metrics.serviceMonitor.scrapeTimeout }} +{{- end }} diff --git a/helm/kagent/tests/controller-deployment_test.yaml b/helm/kagent/tests/controller-deployment_test.yaml index a35a9c227..06aeadc0e 100644 --- a/helm/kagent/tests/controller-deployment_test.yaml +++ b/helm/kagent/tests/controller-deployment_test.yaml @@ -76,6 +76,29 @@ tests: - equal: path: spec.template.spec.containers[0].ports[0].containerPort value: 8083 + - lengthEqual: + path: spec.template.spec.containers[0].ports + count: 1 + + - it: should add metrics port and env vars when enabled + set: + controller.metrics.enabled: true + asserts: + - contains: + path: spec.template.spec.containers[0].ports + content: + name: metrics + containerPort: 9093 + protocol: TCP + template: controller-deployment.yaml + - equal: + path: data.METRICS_BIND_ADDRESS + value: ":9093" + template: controller-configmap.yaml + - equal: + path: data.METRICS_SECURE + value: "false" + template: controller-configmap.yaml - it: should set A2A_BASE_URL with computed default value template: controller-configmap.yaml diff --git a/helm/kagent/tests/controller-service_test.yaml b/helm/kagent/tests/controller-service_test.yaml index f3bb1d97b..ab8864137 100644 --- a/helm/kagent/tests/controller-service_test.yaml +++ b/helm/kagent/tests/controller-service_test.yaml @@ -29,6 +29,9 @@ tests: - equal: path: spec.ports[0].protocol value: TCP + - lengthEqual: + path: spec.ports + count: 1 - it: should have correct selector labels asserts: @@ -68,4 +71,16 @@ tests: asserts: - equal: path: metadata.namespace - value: custom-namespace \ No newline at end of file + value: custom-namespace + + - it: should expose metrics port when enabled + set: + controller.metrics.enabled: true + asserts: + - contains: + path: spec.ports + content: + port: 9093 + targetPort: 9093 + protocol: TCP + name: metrics \ No newline at end of file diff --git a/helm/kagent/tests/controller-servicemonitor_test.yaml b/helm/kagent/tests/controller-servicemonitor_test.yaml new file mode 100644 index 000000000..d0d2c8f70 --- /dev/null +++ b/helm/kagent/tests/controller-servicemonitor_test.yaml @@ -0,0 +1,26 @@ +suite: test controller servicemonitor +templates: + - controller-servicemonitor.yaml +tests: + - it: should not render by default + asserts: + - hasDocuments: + count: 0 + + - it: should not render when only metrics.enabled is true + set: + controller.metrics.enabled: true + asserts: + - hasDocuments: + count: 0 + + - it: should render ServiceMonitor when both enabled + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + asserts: + - isKind: + of: ServiceMonitor + - equal: + path: spec.endpoints[0].port + value: metrics diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index 446cc54e7..e52a90938 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -222,6 +222,15 @@ controller: ports: port: 8083 targetPort: 8083 + metrics: + enabled: false + port: 9093 + secure: false + serviceMonitor: + enabled: false + interval: 30s + scrapeTimeout: 10s + labels: {} env: [] envFrom: [] From 2008de2a5f8b712d9f24d65a244e18226aa82f54 Mon Sep 17 00:00:00 2001 From: mesutoezdil Date: Tue, 5 May 2026 21:43:20 +0200 Subject: [PATCH 2/2] fix(helm): address review comments on metrics ServiceMonitor - Gate ServiceMonitor on Capabilities.APIVersions to avoid install failures on clusters without Prometheus Operator CRDs - Add scheme/tlsConfig to ServiceMonitor endpoint when secure=true - Document secure default and NodePort exposure in values.yaml Signed-off-by: mesutoezdil --- .../templates/controller-servicemonitor.yaml | 7 +++++- .../tests/controller-servicemonitor_test.yaml | 24 +++++++++++++++++-- helm/kagent/values.yaml | 2 ++ 3 files changed, 30 insertions(+), 3 deletions(-) diff --git a/helm/kagent/templates/controller-servicemonitor.yaml b/helm/kagent/templates/controller-servicemonitor.yaml index c1edeb1d5..57fde9863 100644 --- a/helm/kagent/templates/controller-servicemonitor.yaml +++ b/helm/kagent/templates/controller-servicemonitor.yaml @@ -1,4 +1,4 @@ -{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.serviceMonitor.enabled }} +{{- if and .Values.controller.metrics.enabled .Values.controller.metrics.serviceMonitor.enabled (.Capabilities.APIVersions.Has "monitoring.coreos.com/v1/ServiceMonitor") }} apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: @@ -17,4 +17,9 @@ spec: - port: metrics interval: {{ .Values.controller.metrics.serviceMonitor.interval }} scrapeTimeout: {{ .Values.controller.metrics.serviceMonitor.scrapeTimeout }} + {{- if .Values.controller.metrics.secure }} + scheme: https + tlsConfig: + insecureSkipVerify: true + {{- end }} {{- end }} diff --git a/helm/kagent/tests/controller-servicemonitor_test.yaml b/helm/kagent/tests/controller-servicemonitor_test.yaml index d0d2c8f70..b327db5b0 100644 --- a/helm/kagent/tests/controller-servicemonitor_test.yaml +++ b/helm/kagent/tests/controller-servicemonitor_test.yaml @@ -7,20 +7,40 @@ tests: - hasDocuments: count: 0 - - it: should not render when only metrics.enabled is true + - it: should not render when CRD is not installed set: controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true asserts: - hasDocuments: count: 0 - - it: should render ServiceMonitor when both enabled + - it: should render ServiceMonitor when both enabled and CRD present set: controller.metrics.enabled: true controller.metrics.serviceMonitor.enabled: true + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor asserts: - isKind: of: ServiceMonitor - equal: path: spec.endpoints[0].port value: metrics + + - it: should add TLS config when secure is true + set: + controller.metrics.enabled: true + controller.metrics.serviceMonitor.enabled: true + controller.metrics.secure: true + capabilities: + apiVersions: + - monitoring.coreos.com/v1/ServiceMonitor + asserts: + - equal: + path: spec.endpoints[0].scheme + value: https + - equal: + path: spec.endpoints[0].tlsConfig.insecureSkipVerify + value: true diff --git a/helm/kagent/values.yaml b/helm/kagent/values.yaml index e52a90938..ca040089b 100644 --- a/helm/kagent/values.yaml +++ b/helm/kagent/values.yaml @@ -225,6 +225,8 @@ controller: metrics: enabled: false port: 9093 + # -- The controller binary defaults to secure=true. Set to false for plain HTTP scraping (most common). + # Note: when the controller Service type is NodePort or LoadBalancer the metrics port will be externally reachable. secure: false serviceMonitor: enabled: false