From 4ccc98e2b041a9249d9c563ff631e0c922106f4e Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 00:10:42 +0300 Subject: [PATCH 01/10] Add broker FileSystemPackagesStorage support for Functions on Oxia The Pulsar Packages Management Service runs on the broker. Its default BookKeeperPackagesStorage requires ZooKeeper, so Pulsar Functions cannot store uploaded packages when Oxia is the metadata store. Add broker.packageManagement to host the Packages Management Service with FileSystemPackagesStorage on a shared PersistentVolumeClaim mounted on every broker pod, so Functions work with Oxia (and without ZooKeeper). It is configured in two levels, like auth.authentication / auth.authentication.jwt: - broker.packageManagement.enabled enables the service on the broker (sets enablePackagesManagement). - broker.packageManagement.fileSystemStorage.enabled selects the FileSystemPackagesStorageProvider and sets STORAGE_PATH. Its storageClass / persistentVolume / persistentVolumeClaim render raw YAML (only apiVersion/kind fixed; {} creates nothing), and storagePath/claimName configure the shared volume. The default PVC is a single-node ReadWriteOnce claim (minikube); multi-broker needs a ReadWriteMany shared filesystem (GKE Filestore / EKS EFS / AKS Azure Files CSI). - broker-statefulset mounts the shared volume on every broker pod; the embedded worker sets functionsWorkerEnablePackageManagement. - Fail the Helm install when functions run on Oxia without FileSystemPackagesStorage (broker-package-storage-validation.yaml). - CI: the Oxia install test enables functions + FileSystemPackagesStorage so ci::test_pulsar_function validates package upload end to end; render-all patch1 exercises the StorageClass / PV / PVC branches. - Docs: README, examples/README, examples/values-oxia. --- .ci/clusters/values-oxia.yaml | 18 +++++- .ci/templates-all-values-patch1.yaml | 39 ++++++++++++- .github/workflows/pulsar-helm-chart-ci.yaml | 2 +- README.md | 51 +++++++++++++++++ charts/pulsar/templates/broker-configmap.yaml | 20 +++++++ .../broker-package-storage-validation.yaml | 31 +++++++++++ .../templates/broker-package-storage.yaml | 55 +++++++++++++++++++ .../pulsar/templates/broker-statefulset.yaml | 11 ++++ charts/pulsar/values.yaml | 54 ++++++++++++++++++ examples/README.md | 35 +++++++++++- examples/values-oxia.yaml | 19 ++++++- 11 files changed, 325 insertions(+), 10 deletions(-) create mode 100644 charts/pulsar/templates/broker-package-storage-validation.yaml create mode 100644 charts/pulsar/templates/broker-package-storage.yaml diff --git a/.ci/clusters/values-oxia.yaml b/.ci/clusters/values-oxia.yaml index 15d69dc2..8bec06e2 100644 --- a/.ci/clusters/values-oxia.yaml +++ b/.ci/clusters/values-oxia.yaml @@ -20,9 +20,21 @@ components: zookeeper: false oxia: true - # disable functions for oxia tests since there's no support for Oxia in - # BookKeeperPackagesStorage which requires Zookeeper - functions: false + # Functions are enabled on Oxia together with broker.packageManagement (FileSystemPackagesStorage). + # The default BookKeeper package storage requires ZooKeeper, but FileSystemPackagesStorage does not, so + # this validates Oxia + FileSystemPackagesStorage end to end: the function smoke test + # (ci::test_pulsar_function) creates a function from a JAR, which uploads the package via the broker's + # FileSystem-backed Packages Management Service. + functions: true + +# Host the Packages Management Service on the broker with FileSystemPackagesStorage so functions work on +# Oxia. broker.replicaCount is 1 in CI (.ci/values-common.yaml), so the default ReadWriteOnce PVC on the +# kind default StorageClass is sufficient (no shared filesystem needed). +broker: + packageManagement: + enabled: true + fileSystemStorage: + enabled: true oxia: initialShardCount: 3 diff --git a/.ci/templates-all-values-patch1.yaml b/.ci/templates-all-values-patch1.yaml index 67239329..4a8b368b 100644 --- a/.ci/templates-all-values-patch1.yaml +++ b/.ci/templates-all-values-patch1.yaml @@ -120,12 +120,47 @@ bookkeeper: storageClass: # ----------------------------------------------------------------------------- -# Broker: flip statefulsetUpgrade off -# Exercises: the path where broker-statefulset-upgrade.yaml renders nothing. +# Broker: +# - flip statefulsetUpgrade off (exercises the path where +# broker-statefulset-upgrade.yaml renders nothing). +# - enable FileSystemPackagesStorage with a created StorageClass + PersistentVolume + PVC +# (exercises all three branches of broker-package-storage.yaml plus the +# broker.packageManagement volume mount in broker-statefulset.yaml and the +# enablePackagesManagement keys in broker-configmap.yaml). # ----------------------------------------------------------------------------- broker: statefulsetUpgrade: enabled: false + packageManagement: + enabled: true + fileSystemStorage: + enabled: true + storageClass: + metadata: + name: pulsar-pkg-sc + provisioner: kubernetes.io/no-provisioner + volumeBindingMode: WaitForFirstConsumer + persistentVolume: + metadata: + name: pulsar-pkg-pv + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + storageClassName: pulsar-pkg-sc + hostPath: + path: /tmp/pulsar-packages + persistentVolumeClaim: + metadata: + name: pulsar-broker-package-storage + spec: + accessModes: + - ReadWriteMany + storageClassName: pulsar-pkg-sc + resources: + requests: + storage: 10Gi # ----------------------------------------------------------------------------- # Cert-manager internal issuer: selfsigning -> ca diff --git a/.github/workflows/pulsar-helm-chart-ci.yaml b/.github/workflows/pulsar-helm-chart-ci.yaml index 83c21926..29195666 100644 --- a/.github/workflows/pulsar-helm-chart-ci.yaml +++ b/.github/workflows/pulsar-helm-chart-ci.yaml @@ -284,7 +284,7 @@ jobs: - name: Pulsar Manager values_file: .ci/clusters/values-pulsar-manager.yaml shortname: pulsar-manager - - name: Oxia + - name: Oxia + FileSystemPackagesStorage values_file: .ci/clusters/values-oxia.yaml shortname: oxia - name: OpenID diff --git a/README.md b/README.md index 0976bde7..3fd3d58d 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,57 @@ The default user is `pulsar` and you can find out the password with this command kubectl get secret -l component=pulsar-manager -o=jsonpath="{.items[0].data.UI_PASSWORD}" | base64 --decode ``` +## Pulsar Functions package storage (required for Oxia) + +The Pulsar **Packages Management Service** — which stores uploaded function packages +(`pulsar-admin functions create --jar ...`) — runs on the **broker**. Its default storage provider, +`BookKeeperPackagesStorage`, relies on DistributedLog metadata in **ZooKeeper**, so it does **not** work +when [Oxia](https://github.com/streamnative/oxia) is used as the metadata store (`components.oxia: true`). + +To run Pulsar Functions on Oxia you must enable `FileSystemPackagesStorage` on the broker. The Packages +Management Service is configured in two levels (like `auth.authentication` / `auth.authentication.jwt`): +`broker.packageManagement.enabled` turns the service on, and +`broker.packageManagement.fileSystemStorage.enabled` selects the FileSystem provider: + +```yaml +components: + oxia: true + functions: true +broker: + packageManagement: + enabled: true + fileSystemStorage: + enabled: true +``` + +This configures the broker with `enablePackagesManagement=true` and +`packagesManagementStorageProvider=FileSystemPackagesStorageProvider`, and mounts a **shared +`PersistentVolumeClaim`** on every broker pod as the package storage directory. If `components.functions` +is enabled with Oxia but FileSystemPackagesStorage is not enabled, the chart **fails the Helm install** with +an explanatory error (the default BookKeeper provider would not work without ZooKeeper). + +### Choosing a volume + +`FileSystemPackagesStorage` is a directory on disk, so the volume backing it determines how many broker +replicas can use it (all keys below are under `broker.packageManagement.fileSystemStorage`): + +- **Single broker / single-node dev clusters (e.g. minikube):** the default `persistentVolumeClaim` is a + `ReadWriteOnce` claim on the cluster's default `StorageClass` — no extra configuration is required. +- **Multiple broker replicas:** the package directory must be on a **`ReadWriteMany` shared filesystem**. + Provision one with a cloud CSI driver, set `persistentVolumeClaim: {}` so the chart does not create a + claim, and point `claimName` at the pre-created PVC: + + | Cloud | CSI driver | Reference | + | ----- | ---------- | --------- | + | GCP / GKE | Filestore CSI (`filestore.csi.storage.gke.io`) | | + | AWS / EKS | Amazon EFS CSI (`efs.csi.aws.com`) | | + | Azure / AKS | Azure Files CSI (`file.csi.azure.com`) | | + +`broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and +`PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a +value of `{}` creates nothing. See the `broker.packageManagement` section in +[`values.yaml`](charts/pulsar/values.yaml) and [`examples/values-oxia.yaml`](examples/values-oxia.yaml). + ## Grafana Dashboards The Apache Pulsar Helm Chart uses the `victoria-metrics-k8s-stack` Helm Chart to deploy Grafana. diff --git a/charts/pulsar/templates/broker-configmap.yaml b/charts/pulsar/templates/broker-configmap.yaml index 177b4d8d..0f8e9e91 100644 --- a/charts/pulsar/templates/broker-configmap.yaml +++ b/charts/pulsar/templates/broker-configmap.yaml @@ -146,6 +146,11 @@ data: {{- end }} {{- if .Values.components.functions }} functionsWorkerEnabled: "true" + {{- if .Values.broker.packageManagement.enabled }} + # store function packages via the broker's Packages Management Service (FileSystemPackagesStorage) + # instead of BookKeeper/DLog, so functions work without ZooKeeper (e.g. with Oxia) + PF_functionsWorkerEnablePackageManagement: "true" + {{- end }} {{- if .Values.functions.useBookieAsStateStore }} PF_stateStorageServiceUrl: "bk://{{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}:{{ .Values.bookkeeper.ports.statestore }}" {{- end }} @@ -186,6 +191,21 @@ data: {{- end }} {{- end }} + # Package Management Service + # The broker hosts the Packages Management Service (packageManagement.enabled). With + # fileSystemStorage.enabled it uses FileSystemPackagesStorage, which lets Functions store packages without + # ZooKeeper (works with Oxia); the storage path is a shared volume mounted on every broker pod (see + # broker-statefulset.yaml / templates/broker-package-storage.yaml). Otherwise the broker keeps its default + # BookKeeperPackagesStorage provider. + {{- if .Values.broker.packageManagement.enabled }} + enablePackagesManagement: "true" + {{- if .Values.broker.packageManagement.fileSystemStorage.enabled }} + packagesManagementStorageProvider: "org.apache.pulsar.packages.management.storage.filesystem.FileSystemPackagesStorageProvider" + # STORAGE_PATH is not a broker.conf key, so add it to the properties map via PULSAR_PREFIX_ + PULSAR_PREFIX_STORAGE_PATH: "{{ .Values.broker.packageManagement.fileSystemStorage.storagePath }}" + {{- end }} + {{- end }} + # prometheus needs to access /metrics endpoint webServicePort: "{{ .Values.broker.ports.http }}" {{- if or (not .Values.tls.enabled) (not .Values.tls.broker.enabled) }} diff --git a/charts/pulsar/templates/broker-package-storage-validation.yaml b/charts/pulsar/templates/broker-package-storage-validation.yaml new file mode 100644 index 00000000..acae47b7 --- /dev/null +++ b/charts/pulsar/templates/broker-package-storage-validation.yaml @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +{{- /* +Functions on Oxia require FileSystemPackagesStorage. +The function worker stores function packages in BookKeeper/DLog by default, which requires ZooKeeper and +therefore does not work with Oxia. The fix is to host the Packages Management Service with +FileSystemPackagesStorage on the broker (broker.packageManagement.enabled AND +broker.packageManagement.fileSystemStorage.enabled). Fail fast when functions run on Oxia without it. + +This check lives in a rendered template (not a `_`-prefixed partial) so that the `fail` is executed. +*/ -}} +{{- if (and .Values.components.functions .Values.components.oxia (not (and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled))) }} +{{- fail "ERROR: Pulsar Functions on Oxia require FileSystemPackagesStorage. The default BookKeeper package storage requires ZooKeeper and does not work with Oxia (components.oxia=true). Set broker.packageManagement.enabled=true and broker.packageManagement.fileSystemStorage.enabled=true to host FileSystemPackagesStorage on the broker, or use ZooKeeper as the metadata store. See the README and examples/README.md Functions section." }} +{{- end }} diff --git a/charts/pulsar/templates/broker-package-storage.yaml b/charts/pulsar/templates/broker-package-storage.yaml new file mode 100644 index 00000000..c21ac91a --- /dev/null +++ b/charts/pulsar/templates/broker-package-storage.yaml @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Storage objects for the broker's FileSystemPackagesStorage (Packages Management Service). +# Each of broker.packageManagement.fileSystemStorage.{storageClass,persistentVolume,persistentVolumeClaim} is +# rendered verbatim from values with only apiVersion and kind fixed by the chart; an empty value ({}) renders +# nothing. This lets you either create the objects here (default: a minikube-friendly PVC on the default +# StorageClass) or set them to {} and reference a pre-created PVC (e.g. a ReadWriteMany cloud filesystem) via +# broker.packageManagement.fileSystemStorage.claimName. +{{- if and .Values.components.broker (not .Values.standalone.enabled) .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} +{{- $pm := .Values.broker.packageManagement.fileSystemStorage }} +{{- if $pm.storageClass }} +apiVersion: storage.k8s.io/v1 +kind: StorageClass +{{ toYaml (omit $pm.storageClass "apiVersion" "kind") | trim }} +--- +{{- end }} +{{- if $pm.persistentVolume }} +apiVersion: v1 +kind: PersistentVolume +{{ toYaml (omit $pm.persistentVolume "apiVersion" "kind") | trim }} +--- +{{- end }} +{{- if $pm.persistentVolumeClaim }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + {{- if not (dig "metadata" "namespace" "" $pm.persistentVolumeClaim) }} + namespace: {{ template "pulsar.namespace" . }} + {{- end }} + {{- with $pm.persistentVolumeClaim.metadata }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- with $pm.persistentVolumeClaim.spec }} +spec: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/pulsar/templates/broker-statefulset.yaml b/charts/pulsar/templates/broker-statefulset.yaml index 697ee646..959c2a75 100644 --- a/charts/pulsar/templates/broker-statefulset.yaml +++ b/charts/pulsar/templates/broker-statefulset.yaml @@ -314,6 +314,10 @@ spec: {{- if .Values.broker.extraVolumeMounts }} {{ toYaml .Values.broker.extraVolumeMounts | indent 10 }} {{- end }} + {{- if and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} + - name: broker-package-storage + mountPath: {{ .Values.broker.packageManagement.fileSystemStorage.storagePath }} + {{- end }} {{- include "pulsar.broker.certs.volumeMounts" . | nindent 10 }} env: {{- if and (and .Values.broker.storageOffload (eq .Values.broker.storageOffload.driver "aws-s3")) .Values.broker.storageOffload.secret }} @@ -380,6 +384,13 @@ spec: secretName: {{ .Values.broker.storageOffload.gcsServiceAccountSecret }} {{- end }} {{- end }} + {{- if and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} + # Shared package-storage volume mounted on every broker pod (FileSystemPackagesStorage). + # For more than one broker replica this PVC must be ReadWriteMany (a shared filesystem). + - name: broker-package-storage + persistentVolumeClaim: + claimName: {{ .Values.broker.packageManagement.fileSystemStorage.claimName }} + {{- end }} {{- include "pulsar.broker.certs.volumes" . | nindent 6 }} {{- include "pulsar.imagePullSecrets" . | nindent 6}} {{- end }} diff --git a/charts/pulsar/values.yaml b/charts/pulsar/values.yaml index 7976a780..de9bcd59 100755 --- a/charts/pulsar/values.yaml +++ b/charts/pulsar/values.yaml @@ -1410,6 +1410,60 @@ broker: annotations: {} ## You may use the following annotation in order to use EKS IAM Roles for Service Accounts (IRSA) # eks.amazonaws.com/role-arn: arn:aws:iam::66666:role/my-iam-role-with-s3-access + ## Pulsar Functions / Packages: Packages Management Service + ## templates/broker-package-storage.yaml + ## + ## The Pulsar Packages Management Service runs on the broker and stores uploaded function packages + ## (e.g. `pulsar-admin functions create --jar ...`). It is configured in two levels, following the same + ## pattern as auth.authentication / auth.authentication.jwt: + ## - packageManagement.enabled enables the service on the broker (enablePackagesManagement). + ## - packageManagement.fileSystemStorage.enabled uses FileSystemPackagesStorage as the storage provider + ## instead of the default BookKeeperPackagesStorage. + ## + ## FileSystemPackagesStorage works WITHOUT ZooKeeper, so it is required for Functions on Oxia (the default + ## BookKeeper provider needs ZooKeeper). The storage directory is mounted from a shared PersistentVolumeClaim + ## added under the broker pod spec, so every broker replica sees the same packages. For a single broker (or + ## single-node dev clusters like minikube) the default ReadWriteOnce PVC on the cluster's default StorageClass + ## is enough. For MORE THAN ONE broker replica the volume must be a shared ReadWriteMany filesystem; provision + ## one with a cloud CSI driver and reference it via `claimName` (set `persistentVolumeClaim: {}` so the chart + ## does not create one). See the README and examples/README.md "Functions" section for GKE / EKS / AKS references. + packageManagement: + # Enable the Packages Management Service on the broker (sets enablePackagesManagement). + enabled: false + # FileSystemPackagesStorage: use a filesystem directory (on a shared volume) as the package storage + # provider instead of the default BookKeeperPackagesStorage. Requires packageManagement.enabled. + # Mandatory for Functions on Oxia. + fileSystemStorage: + enabled: false + # FileSystemPackagesStorage directory inside the broker (mounted from the PVC below). + storagePath: /pulsar/packages-storage + # Name of the PVC mounted on the broker pods. When `persistentVolumeClaim` is created below this must + # match its metadata.name; for a pre-created (e.g. ReadWriteMany cloud) PVC, set this to that claim's name. + claimName: pulsar-broker-package-storage + # StorageClass to create. Only apiVersion (storage.k8s.io/v1) and kind (StorageClass) are fixed by the + # chart; provide everything else here. {} = do not create one (use the cluster default StorageClass). + storageClass: {} + # PersistentVolume to create (for static provisioning, e.g. a pre-existing NFS/CSI/hostPath volume). + # Only apiVersion (v1) and kind (PersistentVolume) are fixed by the chart; provide everything else here + # (capacity, accessModes, the volume source, etc.). {} = do not create one (the default; most setups use + # dynamic provisioning via a StorageClass or reference a pre-created PVC). When set, bind the PVC below + # to it (e.g. via spec.volumeName / matching storageClassName). + persistentVolume: {} + # PersistentVolumeClaim to create. Only apiVersion (v1) and kind (PersistentVolumeClaim) are fixed by + # the chart; provide everything else here. {} = do not create one (reference a pre-created PVC via + # `claimName` above — this is how the GKE / EKS / AKS shared-filesystem options are wired). + # The default below works on single-node dev clusters (minikube): a ReadWriteOnce claim on the default + # StorageClass. For multi-broker use a ReadWriteMany shared filesystem. + persistentVolumeClaim: + metadata: + name: pulsar-broker-package-storage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + ## Tiered Storage ## storageOffload: {} diff --git a/examples/README.md b/examples/README.md index 1ac8b159..2690c6b5 100644 --- a/examples/README.md +++ b/examples/README.md @@ -136,9 +136,42 @@ in examples that deploy a broker. | File | Description | | ---- | ----------- | -| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are disabled (`components.functions: false`) because their BookKeeper package storage still requires ZooKeeper. | +| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are enabled (`components.functions: true`) together with broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage) (`broker.packageManagement.enabled` + `fileSystemStorage.enabled`), since the default BookKeeper package storage requires ZooKeeper. | | [`values-cs.yaml`](values-cs.yaml) | Deploy **only** ZooKeeper as a shared configuration store (`metadataPrefix: /configuration-store`); all other components are disabled. Intended to be combined with `values-local-cluster.yaml`. | +### Functions + +Pulsar Functions run in a worker that is **embedded in the broker** (`components.functions: true`). + +#### Package storage (FileSystemPackagesStorage) + +The Pulsar Packages Management Service — used to store uploaded function packages +(`pulsar-admin functions create --jar ...`) — runs on the **broker**. Its default `BookKeeperPackagesStorage` +requires ZooKeeper, so it does **not** work with Oxia. To support uploaded packages without ZooKeeper, enable +the service and the FileSystem provider (two levels, like `auth.authentication` / `auth.authentication.jwt`): +`broker.packageManagement.enabled: true` and `broker.packageManagement.fileSystemStorage.enabled: true`. This +configures the broker to use **`FileSystemPackagesStorage`** on a shared volume mounted on every broker pod. +(Enabling functions with Oxia but **without** FileSystemPackagesStorage fails chart rendering with a clear +error.) + +All keys below are under `broker.packageManagement.fileSystemStorage`: + +- **Single broker / dev (minikube):** the default `persistentVolumeClaim` creates a `ReadWriteOnce` PVC on the + cluster's default `StorageClass` — no extra setup. +- **Multiple broker replicas:** the volume must be a **`ReadWriteMany` shared filesystem**. Provision one with + a cloud CSI driver, set `persistentVolumeClaim: {}` (so the chart creates no claim), and reference the + pre-created PVC via `claimName`: + - **GCP / GKE** — Filestore CSI (`filestore.csi.storage.gke.io`): + + - **AWS / EKS** — Amazon EFS CSI (`efs.csi.aws.com`): + + - **Azure / AKS** — Azure Files CSI (`file.csi.azure.com`): + + +`broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and +`PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a value +of `{}` creates nothing. + ### Storage | File | Description | diff --git a/examples/values-oxia.yaml b/examples/values-oxia.yaml index 245b1db6..4c9b2efa 100644 --- a/examples/values-oxia.yaml +++ b/examples/values-oxia.yaml @@ -20,6 +20,19 @@ components: zookeeper: false oxia: true - # disable functions for oxia tests since there's no support for Oxia in - # BookKeeperPackagesStorage which requires Zookeeper - functions: false \ No newline at end of file + # Pulsar Functions work with Oxia as long as the broker hosts FileSystemPackagesStorage (see the + # broker.packageManagement block below). The default BookKeeper package storage requires ZooKeeper, so + # enabling functions on Oxia WITHOUT broker.packageManagement.enabled fails chart rendering. + functions: true + +broker: + packageManagement: + # Host the Packages Management Service on the broker (enabled) with FileSystemPackagesStorage + # (fileSystemStorage.enabled) — no ZooKeeper, so uploaded packages (e.g. + # `pulsar-admin functions create --jar ...`) work with Oxia. This uses the default single-node PVC + # (ReadWriteOnce on the default StorageClass) defined in the chart values, which is fine for a single + # broker / minikube. For multiple broker replicas use a ReadWriteMany shared filesystem — see the README + # and examples/README.md Functions section. + enabled: true + fileSystemStorage: + enabled: true \ No newline at end of file From a15b5ad738b2e910abc1be2c68d5f4a590208536 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 13:58:05 +0300 Subject: [PATCH 02/10] Fix Functions on Oxia: set functionsWorkerEnablePackageManagement in broker.conf The broker-embedded function worker stores function packages in BookKeeper/DLog by default, which requires ZooKeeper and fails with Oxia (NPE "dlogNamespace is null" on `pulsar-admin functions create`). Routing package storage through the broker's Packages Management Service requires functionsWorkerEnablePackageManagement=true. This is a broker ServiceConfiguration (broker.conf) key, not a function-worker (functions_worker.yml / PF_) key: for the broker-embedded worker, PulsarService overrides the worker value with the broker config value (workerConfig.setFunctionsWorkerEnablePackageManagement(brokerConfig.isFunctionsWorkerEnablePackageManagement())). The previous PF_functionsWorkerEnablePackageManagement therefore had no effect and the worker kept using DLog. Set it as a plain broker.conf key gated on components.functions + broker.packageManagement.enabled. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- charts/pulsar/templates/broker-configmap.yaml | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/charts/pulsar/templates/broker-configmap.yaml b/charts/pulsar/templates/broker-configmap.yaml index 0f8e9e91..4a77d8a6 100644 --- a/charts/pulsar/templates/broker-configmap.yaml +++ b/charts/pulsar/templates/broker-configmap.yaml @@ -146,11 +146,6 @@ data: {{- end }} {{- if .Values.components.functions }} functionsWorkerEnabled: "true" - {{- if .Values.broker.packageManagement.enabled }} - # store function packages via the broker's Packages Management Service (FileSystemPackagesStorage) - # instead of BookKeeper/DLog, so functions work without ZooKeeper (e.g. with Oxia) - PF_functionsWorkerEnablePackageManagement: "true" - {{- end }} {{- if .Values.functions.useBookieAsStateStore }} PF_stateStorageServiceUrl: "bk://{{ template "pulsar.fullname" . }}-{{ .Values.bookkeeper.component }}:{{ .Values.bookkeeper.ports.statestore }}" {{- end }} @@ -199,6 +194,14 @@ data: # BookKeeperPackagesStorage provider. {{- if .Values.broker.packageManagement.enabled }} enablePackagesManagement: "true" + {{- if .Values.components.functions }} + # Route the broker-embedded function worker's package storage through the broker's Packages Management + # Service instead of BookKeeper/DLog, so Functions work without ZooKeeper (e.g. with Oxia). This is a + # broker.conf key (ServiceConfiguration): for the embedded worker, PulsarService overrides the worker's + # functionsWorkerEnablePackageManagement with the broker config value, so setting it via PF_ + # (functions_worker.yml) has no effect. + functionsWorkerEnablePackageManagement: "true" + {{- end }} {{- if .Values.broker.packageManagement.fileSystemStorage.enabled }} packagesManagementStorageProvider: "org.apache.pulsar.packages.management.storage.filesystem.FileSystemPackagesStorageProvider" # STORAGE_PATH is not a broker.conf key, so add it to the properties map via PULSAR_PREFIX_ From aec89cfba0e54724b646b2f0e76d9f02750c587f Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 16:33:05 +0300 Subject: [PATCH 03/10] Functions on Oxia: broker fsGroup for package PVC + custom-path CI assertion The broker now mounts a PersistentVolumeClaim as the FileSystemPackagesStorage directory, but the broker pod set no securityContext/fsGroup. The pulsar image runs as uid 10000 with primary group 0 (root), so a freshly provisioned volume (root:root, 0755) is not writable by the broker and the function package upload would fail. CI did not catch this because the kind/microk8s hostpath provisioners create the backing dir world-writable (0777), which masks the problem; real StorageClasses (CSI/block, root:root 0755) and OpenShift (gid=0 prohibited) would fail. Add broker.securityContext (fsGroup: 0, fsGroupChangePolicy: OnRootMismatch), mirroring the bookkeeper/zookeeper securityContext, so the mounted volume is group-0-writable. Also harden the CI test: - Use a non-default storagePath (/pulsar/test-packages-storage) in the Oxia values so the test actually exercises PULSAR_PREFIX_STORAGE_PATH. The FileSystemPackagesStorage default ("packages-storage") resolves to /pulsar/packages-storage, which coincides with the chart's default mount path, so a broken STORAGE_PATH wiring would still pass. - Add ci::verify_package_storage_files: after `functions create`, assert the uploaded package files exist under the configured storagePath on the broker, catching broken STORAGE_PATH wiring or an unwritable volume. Verified end to end on a local cluster: broker runs uid=10000 gid=0; the package (data + meta) is written under /pulsar/test-packages-storage/function/..., while the default /pulsar/packages-storage stays empty (confirming STORAGE_PATH is honored). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- .ci/clusters/values-oxia.yaml | 7 +++++ .ci/helm.sh | 29 +++++++++++++++++++ .../pulsar/templates/broker-statefulset.yaml | 4 +++ charts/pulsar/values.yaml | 8 +++++ 4 files changed, 48 insertions(+) diff --git a/.ci/clusters/values-oxia.yaml b/.ci/clusters/values-oxia.yaml index 8bec06e2..b361d11d 100644 --- a/.ci/clusters/values-oxia.yaml +++ b/.ci/clusters/values-oxia.yaml @@ -35,6 +35,13 @@ broker: enabled: true fileSystemStorage: enabled: true + # Use a NON-default storage path so the test actually exercises PULSAR_PREFIX_STORAGE_PATH being + # applied to the broker. The FileSystemPackagesStorage default ("packages-storage") would resolve to + # /pulsar/packages-storage (the cwd), which happens to match the chart's default mount path, so a + # broken STORAGE_PATH wiring would still pass. With a custom path the PVC is mounted here AND + # STORAGE_PATH points here, so the function-package upload only lands on the volume if both are wired + # correctly. ci::verify_package_storage_files asserts the uploaded package files exist under this path. + storagePath: /pulsar/test-packages-storage oxia: initialShardCount: 3 diff --git a/.ci/helm.sh b/.ci/helm.sh index c5303f2d..342f13cd 100755 --- a/.ci/helm.sh +++ b/.ci/helm.sh @@ -416,10 +416,39 @@ function ci::wait_message_processed() { done } +function ci::verify_package_storage_files() { + # When the broker hosts FileSystemPackagesStorage, the function package uploaded by `functions create` + # must land on the broker's shared package-storage volume (at STORAGE_PATH). Verify the files are + # actually there - this catches a broken STORAGE_PATH wiring (package written to the wrong directory, + # off the volume) or a volume the broker cannot write to (permissions / missing fsGroup). + if [[ "$(ci::helm_values_for_deployment | yq '.broker.packageManagement.fileSystemStorage.enabled')" != "true" ]]; then + return 0 + fi + local storage_path + storage_path=$(ci::helm_values_for_deployment | yq '.broker.packageManagement.fileSystemStorage.storagePath') + if [[ -z "${storage_path}" || "${storage_path}" == "null" ]]; then + storage_path="/pulsar/packages-storage" + fi + echo "Verifying function package files exist under broker FileSystemPackagesStorage path: ${storage_path}" + ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-broker-0 -- bash -c "ls -laR '${storage_path}' || true" + local file_count + file_count=$(${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-broker-0 -- bash -c "find '${storage_path}' -type f 2>/dev/null | wc -l" | tr -d '[:space:]') + echo "FileSystemPackagesStorage file count under ${storage_path}: ${file_count}" + if [[ -z "${file_count}" || "${file_count}" -lt 1 ]]; then + echo >&2 "ERROR: no files found under FileSystemPackagesStorage path ${storage_path} on ${CLUSTER}-broker-0." + echo >&2 "The function package was not persisted to the package-storage volume (check STORAGE_PATH wiring, the PVC mount, and volume write permissions / fsGroup)." + return 1 + fi + echo "OK: function package persisted to FileSystemPackagesStorage (${file_count} file(s) under ${storage_path})" +} + function ci::test_pulsar_function() { echo "Testing functions" echo "Creating function" ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-toolset-0 -- bin/pulsar-admin functions create --tenant pulsar-ci --namespace test --name test-function --inputs "pulsar-ci/test/test_input" --output "pulsar-ci/test/test_output" --parallelism 1 --classname org.apache.pulsar.functions.api.examples.ExclamationFunction --jar /pulsar/examples/api-examples.jar + # The package upload happens at create time; verify it landed on the broker's FileSystemPackagesStorage + # volume (no-op unless fileSystemStorage is enabled). + ci::verify_package_storage_files echo "Creating subscription for output topic" ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-toolset-0 -- bin/pulsar-admin topics create-subscription -s test pulsar-ci/test/test_output echo "Waiting for function to be ready" diff --git a/charts/pulsar/templates/broker-statefulset.yaml b/charts/pulsar/templates/broker-statefulset.yaml index 959c2a75..bed5a747 100644 --- a/charts/pulsar/templates/broker-statefulset.yaml +++ b/charts/pulsar/templates/broker-statefulset.yaml @@ -74,6 +74,10 @@ spec: {{- end }} spec: serviceAccountName: "{{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}-acct" + {{- if .Values.broker.securityContext }} + securityContext: +{{ toYaml .Values.broker.securityContext | indent 8 }} + {{- end }} {{- if .Values.broker.nodeSelector }} nodeSelector: {{ toYaml .Values.broker.nodeSelector | indent 8 }} diff --git a/charts/pulsar/values.yaml b/charts/pulsar/values.yaml index de9bcd59..770da553 100755 --- a/charts/pulsar/values.yaml +++ b/charts/pulsar/values.yaml @@ -1255,6 +1255,14 @@ broker: # so the metrics are correctly rendered in grafana dashboard component: broker replicaCount: 3 + # Ensures the non-root docker image works correctly. With fsGroup: 0 a mounted volume is group-owned by + # GID 0 (the group the pulsar user, uid 10000, belongs to) and made group-writable, so the broker can + # read/write it. Required when packageManagement.fileSystemStorage is enabled (the broker mounts a + # PersistentVolumeClaim as the FileSystemPackagesStorage directory); harmless otherwise. Mirrors the + # bookkeeper/zookeeper securityContext. + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" autoscaling: enabled: false minReplicas: 1 From 19fd20d2ece964f5c806de7901d6f6c8501dd1d2 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:07:15 +0300 Subject: [PATCH 04/10] Examples: split Functions+FileSystemPackagesStorage into its own example examples/values-oxia.yaml is now a plain Oxia metadata-store example again: Functions are disabled by default, so it no longer explicitly disables them, and it no longer carries the broker.packageManagement block. It instead points to a new example for running Functions on Oxia. Add examples/values-functions-fs-storage.yaml: enables Functions and broker-hosted FileSystemPackagesStorage (no ZooKeeper dependency), documented as suitable for Oxia (merge it with values-oxia.yaml). Reference it from examples/README.md (table + Functions section) and from the top-level README.md. Docs: drop the "(like auth.authentication / auth.authentication.jwt)" analogy from README.md, examples/README.md and values.yaml, and reword the validation note to "enabled without ZooKeeper (using Oxia)". Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 10 ++--- charts/pulsar/values.yaml | 3 +- examples/README.md | 14 ++++--- examples/values-functions-fs-storage.yaml | 49 +++++++++++++++++++++++ examples/values-oxia.yaml | 21 +++------- 5 files changed, 68 insertions(+), 29 deletions(-) create mode 100644 examples/values-functions-fs-storage.yaml diff --git a/README.md b/README.md index 3fd3d58d..cb1cc1fe 100644 --- a/README.md +++ b/README.md @@ -392,8 +392,7 @@ The Pulsar **Packages Management Service** — which stores uploaded function pa when [Oxia](https://github.com/streamnative/oxia) is used as the metadata store (`components.oxia: true`). To run Pulsar Functions on Oxia you must enable `FileSystemPackagesStorage` on the broker. The Packages -Management Service is configured in two levels (like `auth.authentication` / `auth.authentication.jwt`): -`broker.packageManagement.enabled` turns the service on, and +Management Service is configured in two levels: `broker.packageManagement.enabled` turns the service on, and `broker.packageManagement.fileSystemStorage.enabled` selects the FileSystem provider: ```yaml @@ -410,8 +409,8 @@ broker: This configures the broker with `enablePackagesManagement=true` and `packagesManagementStorageProvider=FileSystemPackagesStorageProvider`, and mounts a **shared `PersistentVolumeClaim`** on every broker pod as the package storage directory. If `components.functions` -is enabled with Oxia but FileSystemPackagesStorage is not enabled, the chart **fails the Helm install** with -an explanatory error (the default BookKeeper provider would not work without ZooKeeper). +is enabled without ZooKeeper (using Oxia) but FileSystemPackagesStorage is not enabled, the chart **fails the +Helm install** with an explanatory error (the default BookKeeper provider would not work without ZooKeeper). ### Choosing a volume @@ -433,7 +432,8 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst `broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and `PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a value of `{}` creates nothing. See the `broker.packageManagement` section in -[`values.yaml`](charts/pulsar/values.yaml) and [`examples/values-oxia.yaml`](examples/values-oxia.yaml). +[`values.yaml`](charts/pulsar/values.yaml) and the +[`examples/values-functions-fs-storage.yaml`](examples/values-functions-fs-storage.yaml) example. ## Grafana Dashboards diff --git a/charts/pulsar/values.yaml b/charts/pulsar/values.yaml index 770da553..a82f9875 100755 --- a/charts/pulsar/values.yaml +++ b/charts/pulsar/values.yaml @@ -1422,8 +1422,7 @@ broker: ## templates/broker-package-storage.yaml ## ## The Pulsar Packages Management Service runs on the broker and stores uploaded function packages - ## (e.g. `pulsar-admin functions create --jar ...`). It is configured in two levels, following the same - ## pattern as auth.authentication / auth.authentication.jwt: + ## (e.g. `pulsar-admin functions create --jar ...`). It is configured in two levels: ## - packageManagement.enabled enables the service on the broker (enablePackagesManagement). ## - packageManagement.fileSystemStorage.enabled uses FileSystemPackagesStorage as the storage provider ## instead of the default BookKeeperPackagesStorage. diff --git a/examples/README.md b/examples/README.md index 2690c6b5..9098e905 100644 --- a/examples/README.md +++ b/examples/README.md @@ -136,23 +136,25 @@ in examples that deploy a broker. | File | Description | | ---- | ----------- | -| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are enabled (`components.functions: true`) together with broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage) (`broker.packageManagement.enabled` + `fileSystemStorage.enabled`), since the default BookKeeper package storage requires ZooKeeper. | +| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are disabled by default; to run Functions on Oxia, also merge [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) (broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage), required because the default BookKeeper package storage needs ZooKeeper). | | [`values-cs.yaml`](values-cs.yaml) | Deploy **only** ZooKeeper as a shared configuration store (`metadataPrefix: /configuration-store`); all other components are disabled. Intended to be combined with `values-local-cluster.yaml`. | +| [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) | Enable Pulsar Functions (`components.functions: true`) with broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage) (`broker.packageManagement.enabled` + `fileSystemStorage.enabled`). Needs no ZooKeeper, so it is **suitable for Oxia** — merge it with [`values-oxia.yaml`](values-oxia.yaml). Also works with the default ZooKeeper metadata store. | ### Functions Pulsar Functions run in a worker that is **embedded in the broker** (`components.functions: true`). +See [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) for a ready-to-merge example +(suitable for Oxia). #### Package storage (FileSystemPackagesStorage) The Pulsar Packages Management Service — used to store uploaded function packages (`pulsar-admin functions create --jar ...`) — runs on the **broker**. Its default `BookKeeperPackagesStorage` requires ZooKeeper, so it does **not** work with Oxia. To support uploaded packages without ZooKeeper, enable -the service and the FileSystem provider (two levels, like `auth.authentication` / `auth.authentication.jwt`): -`broker.packageManagement.enabled: true` and `broker.packageManagement.fileSystemStorage.enabled: true`. This -configures the broker to use **`FileSystemPackagesStorage`** on a shared volume mounted on every broker pod. -(Enabling functions with Oxia but **without** FileSystemPackagesStorage fails chart rendering with a clear -error.) +the service and the FileSystem provider: `broker.packageManagement.enabled: true` and +`broker.packageManagement.fileSystemStorage.enabled: true`. This configures the broker to use +**`FileSystemPackagesStorage`** on a shared volume mounted on every broker pod. (Enabling functions without +ZooKeeper (using Oxia) but **without** FileSystemPackagesStorage fails chart rendering with a clear error.) All keys below are under `broker.packageManagement.fileSystemStorage`: diff --git a/examples/values-functions-fs-storage.yaml b/examples/values-functions-fs-storage.yaml new file mode 100644 index 00000000..29dfe97b --- /dev/null +++ b/examples/values-functions-fs-storage.yaml @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Enable Pulsar Functions with broker-hosted FileSystemPackagesStorage. +# +# The Pulsar Packages Management Service — which stores uploaded function packages +# (`pulsar-admin functions create --jar ...`) — runs on the broker. Its default BookKeeperPackagesStorage +# provider stores packages in DistributedLog metadata in ZooKeeper, so it does NOT work without ZooKeeper. +# Enabling FileSystemPackagesStorage hosts the packages on a filesystem volume instead, with no ZooKeeper +# dependency. +# +# This example is therefore suitable for Oxia: merge it with values-oxia.yaml to run Functions on an +# Oxia-backed cluster (without ZooKeeper), for example: +# ./merge-values.sh values-functions-fs-storage.yaml values-oxia.yaml values-one-node.yaml > merged-values.yaml +# It also works with the default ZooKeeper metadata store. +# +# The default persistentVolumeClaim (in the chart values) is a ReadWriteOnce claim on the cluster's default +# StorageClass, which is fine for a single broker / single-node dev clusters (minikube). For multiple broker +# replicas the package directory must be a ReadWriteMany shared filesystem — see the "Pulsar Functions +# package storage" section in the top-level README.md and the "Functions" section in examples/README.md. + +components: + # Run the Pulsar Functions worker embedded in the broker. + functions: true + +broker: + packageManagement: + # Host the Packages Management Service on the broker (sets enablePackagesManagement). + enabled: true + fileSystemStorage: + # Use FileSystemPackagesStorage instead of the default BookKeeperPackagesStorage. It needs no + # ZooKeeper, so uploaded function packages work without it (using Oxia). + enabled: true diff --git a/examples/values-oxia.yaml b/examples/values-oxia.yaml index 4c9b2efa..328de663 100644 --- a/examples/values-oxia.yaml +++ b/examples/values-oxia.yaml @@ -20,19 +20,8 @@ components: zookeeper: false oxia: true - # Pulsar Functions work with Oxia as long as the broker hosts FileSystemPackagesStorage (see the - # broker.packageManagement block below). The default BookKeeper package storage requires ZooKeeper, so - # enabling functions on Oxia WITHOUT broker.packageManagement.enabled fails chart rendering. - functions: true - -broker: - packageManagement: - # Host the Packages Management Service on the broker (enabled) with FileSystemPackagesStorage - # (fileSystemStorage.enabled) — no ZooKeeper, so uploaded packages (e.g. - # `pulsar-admin functions create --jar ...`) work with Oxia. This uses the default single-node PVC - # (ReadWriteOnce on the default StorageClass) defined in the chart values, which is fine for a single - # broker / minikube. For multiple broker replicas use a ReadWriteMany shared filesystem — see the README - # and examples/README.md Functions section. - enabled: true - fileSystemStorage: - enabled: true \ No newline at end of file + # Pulsar Functions are disabled by default. To run Functions on Oxia, also enable broker-hosted + # FileSystemPackagesStorage: the default BookKeeper package storage requires ZooKeeper and does not work + # without it (using Oxia). See values-functions-fs-storage.yaml for an example of enabling Functions for + # Oxia (merge it with this file), and the "Functions" section in examples/README.md plus the "Pulsar + # Functions package storage" section in the top-level README.md for details. From 09c6945b7fb3e150ebbd52a60658155182aea8fc Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:16:31 +0300 Subject: [PATCH 05/10] examples/README: de-duplicate FileSystemPackagesStorage docs Drop the duplicated volume-sizing (single broker vs. ReadWriteMany shared filesystem with GKE/EKS/AKS CSI drivers) and StorageClass/PersistentVolume/ PersistentVolumeClaim raw-YAML details from the examples README. Keep a short summary and link to the canonical "Pulsar Functions package storage" section in the top-level README instead. The "Package storage (FileSystemPackagesStorage)" heading is retained so existing in-page anchor links still resolve. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- examples/README.md | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/examples/README.md b/examples/README.md index 9098e905..76679ff1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -148,31 +148,17 @@ See [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) for a #### Package storage (FileSystemPackagesStorage) -The Pulsar Packages Management Service — used to store uploaded function packages -(`pulsar-admin functions create --jar ...`) — runs on the **broker**. Its default `BookKeeperPackagesStorage` -requires ZooKeeper, so it does **not** work with Oxia. To support uploaded packages without ZooKeeper, enable -the service and the FileSystem provider: `broker.packageManagement.enabled: true` and -`broker.packageManagement.fileSystemStorage.enabled: true`. This configures the broker to use -**`FileSystemPackagesStorage`** on a shared volume mounted on every broker pod. (Enabling functions without -ZooKeeper (using Oxia) but **without** FileSystemPackagesStorage fails chart rendering with a clear error.) - -All keys below are under `broker.packageManagement.fileSystemStorage`: - -- **Single broker / dev (minikube):** the default `persistentVolumeClaim` creates a `ReadWriteOnce` PVC on the - cluster's default `StorageClass` — no extra setup. -- **Multiple broker replicas:** the volume must be a **`ReadWriteMany` shared filesystem**. Provision one with - a cloud CSI driver, set `persistentVolumeClaim: {}` (so the chart creates no claim), and reference the - pre-created PVC via `claimName`: - - **GCP / GKE** — Filestore CSI (`filestore.csi.storage.gke.io`): - - - **AWS / EKS** — Amazon EFS CSI (`efs.csi.aws.com`): - - - **Azure / AKS** — Azure Files CSI (`file.csi.azure.com`): - - -`broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and -`PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a value -of `{}` creates nothing. +The function worker stores uploaded packages (`pulsar-admin functions create --jar ...`) via the broker's +Packages Management Service. Its default `BookKeeperPackagesStorage` requires ZooKeeper, so on Oxia you must +enable broker-hosted **`FileSystemPackagesStorage`** (`broker.packageManagement.enabled: true` + +`broker.packageManagement.fileSystemStorage.enabled: true`) — see +[`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml). + +For the full configuration — choosing a volume (single broker vs. a `ReadWriteMany` shared filesystem with +GKE / EKS / AKS CSI drivers) and creating the `StorageClass` / `PersistentVolume` / `PersistentVolumeClaim` +from raw YAML — see the +[Pulsar Functions package storage](../README.md#pulsar-functions-package-storage-required-for-oxia) section in +the top-level README. ### Storage From f01622031c2d652280886301c8ff392b89edb357 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:19:55 +0300 Subject: [PATCH 06/10] README: clarify RWX shared-filesystem options and fix AKS link For multi-broker FileSystemPackagesStorage, name the managed file service to choose per cloud (Filestore / Amazon EFS / Azure Files) and note that block storage (Persistent Disk / EBS / Azure Disk) is ReadWriteOnce and cannot be shared across replicas. Fix the AKS reference to the correct Azure Files how-to: https://learn.microsoft.com/azure/aks/create-volume-azure-files Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index cb1cc1fe..bd772d71 100644 --- a/README.md +++ b/README.md @@ -419,15 +419,17 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst - **Single broker / single-node dev clusters (e.g. minikube):** the default `persistentVolumeClaim` is a `ReadWriteOnce` claim on the cluster's default `StorageClass` — no extra configuration is required. -- **Multiple broker replicas:** the package directory must be on a **`ReadWriteMany` shared filesystem**. - Provision one with a cloud CSI driver, set `persistentVolumeClaim: {}` so the chart does not create a - claim, and point `claimName` at the pre-created PVC: - - | Cloud | CSI driver | Reference | - | ----- | ---------- | --------- | - | GCP / GKE | Filestore CSI (`filestore.csi.storage.gke.io`) | | - | AWS / EKS | Amazon EFS CSI (`efs.csi.aws.com`) | | - | Azure / AKS | Azure Files CSI (`file.csi.azure.com`) | | +- **Multiple broker replicas:** the package directory must be on a **`ReadWriteMany` shared filesystem** — a + managed file service, **not** block storage (Persistent Disk / EBS / Azure Disk are `ReadWriteOnce` and + cannot be shared across replicas). Provision one with the matching cloud CSI driver, set + `persistentVolumeClaim: {}` so the chart does not create a claim, and point `claimName` at the pre-created + PVC: + + | Cloud | Shared file service to use | CSI driver | Reference | + | ----- | -------------------------- | ---------- | --------- | + | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | | + | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | | + | Azure / AKS | Azure Files | `file.csi.azure.com` | | `broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and `PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a From b3b2bf390b76a394558e02752e5e54998403b964 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:30:59 +0300 Subject: [PATCH 07/10] README: document package-storage volume permissions (uid 10000 / gid 0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pulsar images run as uid 10000, gid 0, so function package files are written by that user/group. Explain that broker.securityContext.fsGroup: 0 (OnRootMismatch) makes the volume group-0 group-writable and works on most volume types, but that NFS/SMB-backed ReadWriteMany volumes (EFS, Filestore, Azure Files) typically ignore fsGroup — in which case grant uid 10000 / gid 0 rwx on the share directly (group-0 owned + group-writable, e.g. chmod 2770, or via CSI mount options). Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index bd772d71..6a0d818f 100644 --- a/README.md +++ b/README.md @@ -431,6 +431,17 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | | | Azure / AKS | Azure Files | `file.csi.azure.com` | | +**Volume permissions.** Pulsar container images run as **uid `10000`, gid `0`** by default, so package files +are written by that user/group. The chart sets `broker.securityContext.fsGroup: 0` +(`fsGroupChangePolicy: OnRootMismatch`), which tells Kubernetes to set the volume's group to `0` and make it +group-writable — enough for the broker to read/write the package directory, and this works on most volume +types (block-storage CSI drivers, `hostPath`). Some shared filesystems — notably the NFS/SMB-backed +`ReadWriteMany` volumes above (EFS, Filestore, Azure Files) — **ignore `fsGroup`**. If package writes then +fail with permission errors, grant `uid 10000` / `gid 0` read-write-execute on the share itself: make the +directory group-`0`-owned and group-writable (e.g. `chown :0 && chmod 2770 ` — `rwxrwx---` plus the +setgid bit so new entries inherit gid `0`), or set it via the CSI driver's mount options (for Azure Files SMB, +for example, `mountOptions: [uid=10000, gid=0, file_mode=0770, dir_mode=0770]`). + `broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and `PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a value of `{}` creates nothing. See the `broker.packageManagement` section in From 1b8bc35b313af56201fcfd91f9b89efb3d6daa75 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:44:23 +0300 Subject: [PATCH 08/10] README: use short titled links for cloud volume references; update GKE link Replace the bare long URLs in the shared-filesystem table with short titled links, and point GKE at the Filestore-via-CSI stateful-workload tutorial. Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6a0d818f..59c423bd 100644 --- a/README.md +++ b/README.md @@ -427,9 +427,9 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst | Cloud | Shared file service to use | CSI driver | Reference | | ----- | -------------------------- | ---------- | --------- | - | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | | - | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | | - | Azure / AKS | Azure Files | `file.csi.azure.com` | | + | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore on GKE](https://docs.cloud.google.com/kubernetes-engine/docs/tutorials/stateful-workload#configure_the_managed_file_storage_with_using_csi) | + | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | [EFS CSI on EKS](https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html) | + | Azure / AKS | Azure Files | `file.csi.azure.com` | [Azure Files on AKS](https://learn.microsoft.com/azure/aks/create-volume-azure-files) | **Volume permissions.** Pulsar container images run as **uid `10000`, gid `0`** by default, so package files are written by that user/group. The chart sets `broker.securityContext.fsGroup: 0` From d4d38386e5c3e5d81360d1e5a0573360bed54e5e Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:46:18 +0300 Subject: [PATCH 09/10] README: use Filestore CSI driver doc as the main GKE link Point the GKE reference at the Filestore CSI driver documentation as the primary link, and keep the stateful-workload tutorial as a short "(example)". Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 59c423bd..96a2af69 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,7 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst | Cloud | Shared file service to use | CSI driver | Reference | | ----- | -------------------------- | ---------- | --------- | - | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore on GKE](https://docs.cloud.google.com/kubernetes-engine/docs/tutorials/stateful-workload#configure_the_managed_file_storage_with_using_csi) | + | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore CSI](https://docs.cloud.google.com/filestore/docs/csi-driver) ([example](https://docs.cloud.google.com/kubernetes-engine/docs/tutorials/stateful-workload#configure_the_managed_file_storage_with_using_csi)) | | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | [EFS CSI on EKS](https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html) | | Azure / AKS | Azure Files | `file.csi.azure.com` | [Azure Files on AKS](https://learn.microsoft.com/azure/aks/create-volume-azure-files) | From 1f761dc6e76169d84c415fafd6d4d5ceefb342a8 Mon Sep 17 00:00:00 2001 From: Lari Hotari Date: Thu, 25 Jun 2026 18:47:06 +0300 Subject: [PATCH 10/10] README: drop separate GKE example link (CSI driver doc already has examples) Co-Authored-By: Claude Opus 4.8 Claude-Session: https://claude.ai/code/session_01RZUbdHbb856wdmKxBU4V48 --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96a2af69..f12e77a1 100644 --- a/README.md +++ b/README.md @@ -427,7 +427,7 @@ replicas can use it (all keys below are under `broker.packageManagement.fileSyst | Cloud | Shared file service to use | CSI driver | Reference | | ----- | -------------------------- | ---------- | --------- | - | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore CSI](https://docs.cloud.google.com/filestore/docs/csi-driver) ([example](https://docs.cloud.google.com/kubernetes-engine/docs/tutorials/stateful-workload#configure_the_managed_file_storage_with_using_csi)) | + | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore CSI](https://docs.cloud.google.com/filestore/docs/csi-driver) | | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | [EFS CSI on EKS](https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html) | | Azure / AKS | Azure Files | `file.csi.azure.com` | [Azure Files on AKS](https://learn.microsoft.com/azure/aks/create-volume-azure-files) |