diff --git a/.ci/clusters/values-oxia.yaml b/.ci/clusters/values-oxia.yaml index 15d69dc2..b361d11d 100644 --- a/.ci/clusters/values-oxia.yaml +++ b/.ci/clusters/values-oxia.yaml @@ -20,9 +20,28 @@ components: zookeeper: false oxia: true - # disable functions for oxia tests since there's no support for Oxia in - # BookKeeperPackagesStorage which requires Zookeeper - functions: false + # Functions are enabled on Oxia together with broker.packageManagement (FileSystemPackagesStorage). + # The default BookKeeper package storage requires ZooKeeper, but FileSystemPackagesStorage does not, so + # this validates Oxia + FileSystemPackagesStorage end to end: the function smoke test + # (ci::test_pulsar_function) creates a function from a JAR, which uploads the package via the broker's + # FileSystem-backed Packages Management Service. + functions: true + +# Host the Packages Management Service on the broker with FileSystemPackagesStorage so functions work on +# Oxia. broker.replicaCount is 1 in CI (.ci/values-common.yaml), so the default ReadWriteOnce PVC on the +# kind default StorageClass is sufficient (no shared filesystem needed). +broker: + packageManagement: + enabled: true + fileSystemStorage: + enabled: true + # Use a NON-default storage path so the test actually exercises PULSAR_PREFIX_STORAGE_PATH being + # applied to the broker. The FileSystemPackagesStorage default ("packages-storage") would resolve to + # /pulsar/packages-storage (the cwd), which happens to match the chart's default mount path, so a + # broken STORAGE_PATH wiring would still pass. With a custom path the PVC is mounted here AND + # STORAGE_PATH points here, so the function-package upload only lands on the volume if both are wired + # correctly. ci::verify_package_storage_files asserts the uploaded package files exist under this path. + storagePath: /pulsar/test-packages-storage oxia: initialShardCount: 3 diff --git a/.ci/helm.sh b/.ci/helm.sh index c5303f2d..342f13cd 100755 --- a/.ci/helm.sh +++ b/.ci/helm.sh @@ -416,10 +416,39 @@ function ci::wait_message_processed() { done } +function ci::verify_package_storage_files() { + # When the broker hosts FileSystemPackagesStorage, the function package uploaded by `functions create` + # must land on the broker's shared package-storage volume (at STORAGE_PATH). Verify the files are + # actually there - this catches a broken STORAGE_PATH wiring (package written to the wrong directory, + # off the volume) or a volume the broker cannot write to (permissions / missing fsGroup). + if [[ "$(ci::helm_values_for_deployment | yq '.broker.packageManagement.fileSystemStorage.enabled')" != "true" ]]; then + return 0 + fi + local storage_path + storage_path=$(ci::helm_values_for_deployment | yq '.broker.packageManagement.fileSystemStorage.storagePath') + if [[ -z "${storage_path}" || "${storage_path}" == "null" ]]; then + storage_path="/pulsar/packages-storage" + fi + echo "Verifying function package files exist under broker FileSystemPackagesStorage path: ${storage_path}" + ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-broker-0 -- bash -c "ls -laR '${storage_path}' || true" + local file_count + file_count=$(${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-broker-0 -- bash -c "find '${storage_path}' -type f 2>/dev/null | wc -l" | tr -d '[:space:]') + echo "FileSystemPackagesStorage file count under ${storage_path}: ${file_count}" + if [[ -z "${file_count}" || "${file_count}" -lt 1 ]]; then + echo >&2 "ERROR: no files found under FileSystemPackagesStorage path ${storage_path} on ${CLUSTER}-broker-0." + echo >&2 "The function package was not persisted to the package-storage volume (check STORAGE_PATH wiring, the PVC mount, and volume write permissions / fsGroup)." + return 1 + fi + echo "OK: function package persisted to FileSystemPackagesStorage (${file_count} file(s) under ${storage_path})" +} + function ci::test_pulsar_function() { echo "Testing functions" echo "Creating function" ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-toolset-0 -- bin/pulsar-admin functions create --tenant pulsar-ci --namespace test --name test-function --inputs "pulsar-ci/test/test_input" --output "pulsar-ci/test/test_output" --parallelism 1 --classname org.apache.pulsar.functions.api.examples.ExclamationFunction --jar /pulsar/examples/api-examples.jar + # The package upload happens at create time; verify it landed on the broker's FileSystemPackagesStorage + # volume (no-op unless fileSystemStorage is enabled). + ci::verify_package_storage_files echo "Creating subscription for output topic" ${KUBECTL} exec -n "${NAMESPACE}" "${CLUSTER}"-toolset-0 -- bin/pulsar-admin topics create-subscription -s test pulsar-ci/test/test_output echo "Waiting for function to be ready" diff --git a/.ci/templates-all-values-patch1.yaml b/.ci/templates-all-values-patch1.yaml index 67239329..4a8b368b 100644 --- a/.ci/templates-all-values-patch1.yaml +++ b/.ci/templates-all-values-patch1.yaml @@ -120,12 +120,47 @@ bookkeeper: storageClass: # ----------------------------------------------------------------------------- -# Broker: flip statefulsetUpgrade off -# Exercises: the path where broker-statefulset-upgrade.yaml renders nothing. +# Broker: +# - flip statefulsetUpgrade off (exercises the path where +# broker-statefulset-upgrade.yaml renders nothing). +# - enable FileSystemPackagesStorage with a created StorageClass + PersistentVolume + PVC +# (exercises all three branches of broker-package-storage.yaml plus the +# broker.packageManagement volume mount in broker-statefulset.yaml and the +# enablePackagesManagement keys in broker-configmap.yaml). # ----------------------------------------------------------------------------- broker: statefulsetUpgrade: enabled: false + packageManagement: + enabled: true + fileSystemStorage: + enabled: true + storageClass: + metadata: + name: pulsar-pkg-sc + provisioner: kubernetes.io/no-provisioner + volumeBindingMode: WaitForFirstConsumer + persistentVolume: + metadata: + name: pulsar-pkg-pv + spec: + capacity: + storage: 10Gi + accessModes: + - ReadWriteMany + storageClassName: pulsar-pkg-sc + hostPath: + path: /tmp/pulsar-packages + persistentVolumeClaim: + metadata: + name: pulsar-broker-package-storage + spec: + accessModes: + - ReadWriteMany + storageClassName: pulsar-pkg-sc + resources: + requests: + storage: 10Gi # ----------------------------------------------------------------------------- # Cert-manager internal issuer: selfsigning -> ca diff --git a/.github/workflows/pulsar-helm-chart-ci.yaml b/.github/workflows/pulsar-helm-chart-ci.yaml index 83c21926..29195666 100644 --- a/.github/workflows/pulsar-helm-chart-ci.yaml +++ b/.github/workflows/pulsar-helm-chart-ci.yaml @@ -284,7 +284,7 @@ jobs: - name: Pulsar Manager values_file: .ci/clusters/values-pulsar-manager.yaml shortname: pulsar-manager - - name: Oxia + - name: Oxia + FileSystemPackagesStorage values_file: .ci/clusters/values-oxia.yaml shortname: oxia - name: OpenID diff --git a/README.md b/README.md index 0976bde7..f12e77a1 100644 --- a/README.md +++ b/README.md @@ -384,6 +384,70 @@ The default user is `pulsar` and you can find out the password with this command kubectl get secret -l component=pulsar-manager -o=jsonpath="{.items[0].data.UI_PASSWORD}" | base64 --decode ``` +## Pulsar Functions package storage (required for Oxia) + +The Pulsar **Packages Management Service** — which stores uploaded function packages +(`pulsar-admin functions create --jar ...`) — runs on the **broker**. Its default storage provider, +`BookKeeperPackagesStorage`, relies on DistributedLog metadata in **ZooKeeper**, so it does **not** work +when [Oxia](https://github.com/streamnative/oxia) is used as the metadata store (`components.oxia: true`). + +To run Pulsar Functions on Oxia you must enable `FileSystemPackagesStorage` on the broker. The Packages +Management Service is configured in two levels: `broker.packageManagement.enabled` turns the service on, and +`broker.packageManagement.fileSystemStorage.enabled` selects the FileSystem provider: + +```yaml +components: + oxia: true + functions: true +broker: + packageManagement: + enabled: true + fileSystemStorage: + enabled: true +``` + +This configures the broker with `enablePackagesManagement=true` and +`packagesManagementStorageProvider=FileSystemPackagesStorageProvider`, and mounts a **shared +`PersistentVolumeClaim`** on every broker pod as the package storage directory. If `components.functions` +is enabled without ZooKeeper (using Oxia) but FileSystemPackagesStorage is not enabled, the chart **fails the +Helm install** with an explanatory error (the default BookKeeper provider would not work without ZooKeeper). + +### Choosing a volume + +`FileSystemPackagesStorage` is a directory on disk, so the volume backing it determines how many broker +replicas can use it (all keys below are under `broker.packageManagement.fileSystemStorage`): + +- **Single broker / single-node dev clusters (e.g. minikube):** the default `persistentVolumeClaim` is a + `ReadWriteOnce` claim on the cluster's default `StorageClass` — no extra configuration is required. +- **Multiple broker replicas:** the package directory must be on a **`ReadWriteMany` shared filesystem** — a + managed file service, **not** block storage (Persistent Disk / EBS / Azure Disk are `ReadWriteOnce` and + cannot be shared across replicas). Provision one with the matching cloud CSI driver, set + `persistentVolumeClaim: {}` so the chart does not create a claim, and point `claimName` at the pre-created + PVC: + + | Cloud | Shared file service to use | CSI driver | Reference | + | ----- | -------------------------- | ---------- | --------- | + | GCP / GKE | Filestore (managed NFS) | `filestore.csi.storage.gke.io` | [Filestore CSI](https://docs.cloud.google.com/filestore/docs/csi-driver) | + | AWS / EKS | Amazon EFS (managed NFS) | `efs.csi.aws.com` | [EFS CSI on EKS](https://docs.aws.amazon.com/eks/latest/userguide/efs-csi.html) | + | Azure / AKS | Azure Files | `file.csi.azure.com` | [Azure Files on AKS](https://learn.microsoft.com/azure/aks/create-volume-azure-files) | + +**Volume permissions.** Pulsar container images run as **uid `10000`, gid `0`** by default, so package files +are written by that user/group. The chart sets `broker.securityContext.fsGroup: 0` +(`fsGroupChangePolicy: OnRootMismatch`), which tells Kubernetes to set the volume's group to `0` and make it +group-writable — enough for the broker to read/write the package directory, and this works on most volume +types (block-storage CSI drivers, `hostPath`). Some shared filesystems — notably the NFS/SMB-backed +`ReadWriteMany` volumes above (EFS, Filestore, Azure Files) — **ignore `fsGroup`**. If package writes then +fail with permission errors, grant `uid 10000` / `gid 0` read-write-execute on the share itself: make the +directory group-`0`-owned and group-writable (e.g. `chown :0 && chmod 2770 ` — `rwxrwx---` plus the +setgid bit so new entries inherit gid `0`), or set it via the CSI driver's mount options (for Azure Files SMB, +for example, `mountOptions: [uid=10000, gid=0, file_mode=0770, dir_mode=0770]`). + +`broker.packageManagement.fileSystemStorage` can also create the `StorageClass`, `PersistentVolume`, and +`PersistentVolumeClaim` directly from raw YAML — only `apiVersion`/`kind` are fixed by the chart, and a +value of `{}` creates nothing. See the `broker.packageManagement` section in +[`values.yaml`](charts/pulsar/values.yaml) and the +[`examples/values-functions-fs-storage.yaml`](examples/values-functions-fs-storage.yaml) example. + ## Grafana Dashboards The Apache Pulsar Helm Chart uses the `victoria-metrics-k8s-stack` Helm Chart to deploy Grafana. diff --git a/charts/pulsar/templates/broker-configmap.yaml b/charts/pulsar/templates/broker-configmap.yaml index 177b4d8d..4a77d8a6 100644 --- a/charts/pulsar/templates/broker-configmap.yaml +++ b/charts/pulsar/templates/broker-configmap.yaml @@ -186,6 +186,29 @@ data: {{- end }} {{- end }} + # Package Management Service + # The broker hosts the Packages Management Service (packageManagement.enabled). With + # fileSystemStorage.enabled it uses FileSystemPackagesStorage, which lets Functions store packages without + # ZooKeeper (works with Oxia); the storage path is a shared volume mounted on every broker pod (see + # broker-statefulset.yaml / templates/broker-package-storage.yaml). Otherwise the broker keeps its default + # BookKeeperPackagesStorage provider. + {{- if .Values.broker.packageManagement.enabled }} + enablePackagesManagement: "true" + {{- if .Values.components.functions }} + # Route the broker-embedded function worker's package storage through the broker's Packages Management + # Service instead of BookKeeper/DLog, so Functions work without ZooKeeper (e.g. with Oxia). This is a + # broker.conf key (ServiceConfiguration): for the embedded worker, PulsarService overrides the worker's + # functionsWorkerEnablePackageManagement with the broker config value, so setting it via PF_ + # (functions_worker.yml) has no effect. + functionsWorkerEnablePackageManagement: "true" + {{- end }} + {{- if .Values.broker.packageManagement.fileSystemStorage.enabled }} + packagesManagementStorageProvider: "org.apache.pulsar.packages.management.storage.filesystem.FileSystemPackagesStorageProvider" + # STORAGE_PATH is not a broker.conf key, so add it to the properties map via PULSAR_PREFIX_ + PULSAR_PREFIX_STORAGE_PATH: "{{ .Values.broker.packageManagement.fileSystemStorage.storagePath }}" + {{- end }} + {{- end }} + # prometheus needs to access /metrics endpoint webServicePort: "{{ .Values.broker.ports.http }}" {{- if or (not .Values.tls.enabled) (not .Values.tls.broker.enabled) }} diff --git a/charts/pulsar/templates/broker-package-storage-validation.yaml b/charts/pulsar/templates/broker-package-storage-validation.yaml new file mode 100644 index 00000000..acae47b7 --- /dev/null +++ b/charts/pulsar/templates/broker-package-storage-validation.yaml @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +{{- /* +Functions on Oxia require FileSystemPackagesStorage. +The function worker stores function packages in BookKeeper/DLog by default, which requires ZooKeeper and +therefore does not work with Oxia. The fix is to host the Packages Management Service with +FileSystemPackagesStorage on the broker (broker.packageManagement.enabled AND +broker.packageManagement.fileSystemStorage.enabled). Fail fast when functions run on Oxia without it. + +This check lives in a rendered template (not a `_`-prefixed partial) so that the `fail` is executed. +*/ -}} +{{- if (and .Values.components.functions .Values.components.oxia (not (and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled))) }} +{{- fail "ERROR: Pulsar Functions on Oxia require FileSystemPackagesStorage. The default BookKeeper package storage requires ZooKeeper and does not work with Oxia (components.oxia=true). Set broker.packageManagement.enabled=true and broker.packageManagement.fileSystemStorage.enabled=true to host FileSystemPackagesStorage on the broker, or use ZooKeeper as the metadata store. See the README and examples/README.md Functions section." }} +{{- end }} diff --git a/charts/pulsar/templates/broker-package-storage.yaml b/charts/pulsar/templates/broker-package-storage.yaml new file mode 100644 index 00000000..c21ac91a --- /dev/null +++ b/charts/pulsar/templates/broker-package-storage.yaml @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Storage objects for the broker's FileSystemPackagesStorage (Packages Management Service). +# Each of broker.packageManagement.fileSystemStorage.{storageClass,persistentVolume,persistentVolumeClaim} is +# rendered verbatim from values with only apiVersion and kind fixed by the chart; an empty value ({}) renders +# nothing. This lets you either create the objects here (default: a minikube-friendly PVC on the default +# StorageClass) or set them to {} and reference a pre-created PVC (e.g. a ReadWriteMany cloud filesystem) via +# broker.packageManagement.fileSystemStorage.claimName. +{{- if and .Values.components.broker (not .Values.standalone.enabled) .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} +{{- $pm := .Values.broker.packageManagement.fileSystemStorage }} +{{- if $pm.storageClass }} +apiVersion: storage.k8s.io/v1 +kind: StorageClass +{{ toYaml (omit $pm.storageClass "apiVersion" "kind") | trim }} +--- +{{- end }} +{{- if $pm.persistentVolume }} +apiVersion: v1 +kind: PersistentVolume +{{ toYaml (omit $pm.persistentVolume "apiVersion" "kind") | trim }} +--- +{{- end }} +{{- if $pm.persistentVolumeClaim }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + {{- if not (dig "metadata" "namespace" "" $pm.persistentVolumeClaim) }} + namespace: {{ template "pulsar.namespace" . }} + {{- end }} + {{- with $pm.persistentVolumeClaim.metadata }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- with $pm.persistentVolumeClaim.spec }} +spec: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} +{{- end }} diff --git a/charts/pulsar/templates/broker-statefulset.yaml b/charts/pulsar/templates/broker-statefulset.yaml index 697ee646..bed5a747 100644 --- a/charts/pulsar/templates/broker-statefulset.yaml +++ b/charts/pulsar/templates/broker-statefulset.yaml @@ -74,6 +74,10 @@ spec: {{- end }} spec: serviceAccountName: "{{ template "pulsar.fullname" . }}-{{ .Values.broker.component }}-acct" + {{- if .Values.broker.securityContext }} + securityContext: +{{ toYaml .Values.broker.securityContext | indent 8 }} + {{- end }} {{- if .Values.broker.nodeSelector }} nodeSelector: {{ toYaml .Values.broker.nodeSelector | indent 8 }} @@ -314,6 +318,10 @@ spec: {{- if .Values.broker.extraVolumeMounts }} {{ toYaml .Values.broker.extraVolumeMounts | indent 10 }} {{- end }} + {{- if and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} + - name: broker-package-storage + mountPath: {{ .Values.broker.packageManagement.fileSystemStorage.storagePath }} + {{- end }} {{- include "pulsar.broker.certs.volumeMounts" . | nindent 10 }} env: {{- if and (and .Values.broker.storageOffload (eq .Values.broker.storageOffload.driver "aws-s3")) .Values.broker.storageOffload.secret }} @@ -380,6 +388,13 @@ spec: secretName: {{ .Values.broker.storageOffload.gcsServiceAccountSecret }} {{- end }} {{- end }} + {{- if and .Values.broker.packageManagement.enabled .Values.broker.packageManagement.fileSystemStorage.enabled }} + # Shared package-storage volume mounted on every broker pod (FileSystemPackagesStorage). + # For more than one broker replica this PVC must be ReadWriteMany (a shared filesystem). + - name: broker-package-storage + persistentVolumeClaim: + claimName: {{ .Values.broker.packageManagement.fileSystemStorage.claimName }} + {{- end }} {{- include "pulsar.broker.certs.volumes" . | nindent 6 }} {{- include "pulsar.imagePullSecrets" . | nindent 6}} {{- end }} diff --git a/charts/pulsar/values.yaml b/charts/pulsar/values.yaml index 7976a780..a82f9875 100755 --- a/charts/pulsar/values.yaml +++ b/charts/pulsar/values.yaml @@ -1255,6 +1255,14 @@ broker: # so the metrics are correctly rendered in grafana dashboard component: broker replicaCount: 3 + # Ensures the non-root docker image works correctly. With fsGroup: 0 a mounted volume is group-owned by + # GID 0 (the group the pulsar user, uid 10000, belongs to) and made group-writable, so the broker can + # read/write it. Required when packageManagement.fileSystemStorage is enabled (the broker mounts a + # PersistentVolumeClaim as the FileSystemPackagesStorage directory); harmless otherwise. Mirrors the + # bookkeeper/zookeeper securityContext. + securityContext: + fsGroup: 0 + fsGroupChangePolicy: "OnRootMismatch" autoscaling: enabled: false minReplicas: 1 @@ -1410,6 +1418,59 @@ broker: annotations: {} ## You may use the following annotation in order to use EKS IAM Roles for Service Accounts (IRSA) # eks.amazonaws.com/role-arn: arn:aws:iam::66666:role/my-iam-role-with-s3-access + ## Pulsar Functions / Packages: Packages Management Service + ## templates/broker-package-storage.yaml + ## + ## The Pulsar Packages Management Service runs on the broker and stores uploaded function packages + ## (e.g. `pulsar-admin functions create --jar ...`). It is configured in two levels: + ## - packageManagement.enabled enables the service on the broker (enablePackagesManagement). + ## - packageManagement.fileSystemStorage.enabled uses FileSystemPackagesStorage as the storage provider + ## instead of the default BookKeeperPackagesStorage. + ## + ## FileSystemPackagesStorage works WITHOUT ZooKeeper, so it is required for Functions on Oxia (the default + ## BookKeeper provider needs ZooKeeper). The storage directory is mounted from a shared PersistentVolumeClaim + ## added under the broker pod spec, so every broker replica sees the same packages. For a single broker (or + ## single-node dev clusters like minikube) the default ReadWriteOnce PVC on the cluster's default StorageClass + ## is enough. For MORE THAN ONE broker replica the volume must be a shared ReadWriteMany filesystem; provision + ## one with a cloud CSI driver and reference it via `claimName` (set `persistentVolumeClaim: {}` so the chart + ## does not create one). See the README and examples/README.md "Functions" section for GKE / EKS / AKS references. + packageManagement: + # Enable the Packages Management Service on the broker (sets enablePackagesManagement). + enabled: false + # FileSystemPackagesStorage: use a filesystem directory (on a shared volume) as the package storage + # provider instead of the default BookKeeperPackagesStorage. Requires packageManagement.enabled. + # Mandatory for Functions on Oxia. + fileSystemStorage: + enabled: false + # FileSystemPackagesStorage directory inside the broker (mounted from the PVC below). + storagePath: /pulsar/packages-storage + # Name of the PVC mounted on the broker pods. When `persistentVolumeClaim` is created below this must + # match its metadata.name; for a pre-created (e.g. ReadWriteMany cloud) PVC, set this to that claim's name. + claimName: pulsar-broker-package-storage + # StorageClass to create. Only apiVersion (storage.k8s.io/v1) and kind (StorageClass) are fixed by the + # chart; provide everything else here. {} = do not create one (use the cluster default StorageClass). + storageClass: {} + # PersistentVolume to create (for static provisioning, e.g. a pre-existing NFS/CSI/hostPath volume). + # Only apiVersion (v1) and kind (PersistentVolume) are fixed by the chart; provide everything else here + # (capacity, accessModes, the volume source, etc.). {} = do not create one (the default; most setups use + # dynamic provisioning via a StorageClass or reference a pre-created PVC). When set, bind the PVC below + # to it (e.g. via spec.volumeName / matching storageClassName). + persistentVolume: {} + # PersistentVolumeClaim to create. Only apiVersion (v1) and kind (PersistentVolumeClaim) are fixed by + # the chart; provide everything else here. {} = do not create one (reference a pre-created PVC via + # `claimName` above — this is how the GKE / EKS / AKS shared-filesystem options are wired). + # The default below works on single-node dev clusters (minikube): a ReadWriteOnce claim on the default + # StorageClass. For multi-broker use a ReadWriteMany shared filesystem. + persistentVolumeClaim: + metadata: + name: pulsar-broker-package-storage + spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi + ## Tiered Storage ## storageOffload: {} diff --git a/examples/README.md b/examples/README.md index 1ac8b159..76679ff1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -136,8 +136,29 @@ in examples that deploy a broker. | File | Description | | ---- | ----------- | -| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are disabled (`components.functions: false`) because their BookKeeper package storage still requires ZooKeeper. | +| [`values-oxia.yaml`](values-oxia.yaml) | Use [Oxia](https://github.com/streamnative/oxia) as the metadata store instead of ZooKeeper (`components.zookeeper: false`, `components.oxia: true`). Pulsar Functions are disabled by default; to run Functions on Oxia, also merge [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) (broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage), required because the default BookKeeper package storage needs ZooKeeper). | | [`values-cs.yaml`](values-cs.yaml) | Deploy **only** ZooKeeper as a shared configuration store (`metadataPrefix: /configuration-store`); all other components are disabled. Intended to be combined with `values-local-cluster.yaml`. | +| [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) | Enable Pulsar Functions (`components.functions: true`) with broker-hosted [`FileSystemPackagesStorage`](#package-storage-filesystempackagesstorage) (`broker.packageManagement.enabled` + `fileSystemStorage.enabled`). Needs no ZooKeeper, so it is **suitable for Oxia** — merge it with [`values-oxia.yaml`](values-oxia.yaml). Also works with the default ZooKeeper metadata store. | + +### Functions + +Pulsar Functions run in a worker that is **embedded in the broker** (`components.functions: true`). +See [`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml) for a ready-to-merge example +(suitable for Oxia). + +#### Package storage (FileSystemPackagesStorage) + +The function worker stores uploaded packages (`pulsar-admin functions create --jar ...`) via the broker's +Packages Management Service. Its default `BookKeeperPackagesStorage` requires ZooKeeper, so on Oxia you must +enable broker-hosted **`FileSystemPackagesStorage`** (`broker.packageManagement.enabled: true` + +`broker.packageManagement.fileSystemStorage.enabled: true`) — see +[`values-functions-fs-storage.yaml`](values-functions-fs-storage.yaml). + +For the full configuration — choosing a volume (single broker vs. a `ReadWriteMany` shared filesystem with +GKE / EKS / AKS CSI drivers) and creating the `StorageClass` / `PersistentVolume` / `PersistentVolumeClaim` +from raw YAML — see the +[Pulsar Functions package storage](../README.md#pulsar-functions-package-storage-required-for-oxia) section in +the top-level README. ### Storage diff --git a/examples/values-functions-fs-storage.yaml b/examples/values-functions-fs-storage.yaml new file mode 100644 index 00000000..29dfe97b --- /dev/null +++ b/examples/values-functions-fs-storage.yaml @@ -0,0 +1,49 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Enable Pulsar Functions with broker-hosted FileSystemPackagesStorage. +# +# The Pulsar Packages Management Service — which stores uploaded function packages +# (`pulsar-admin functions create --jar ...`) — runs on the broker. Its default BookKeeperPackagesStorage +# provider stores packages in DistributedLog metadata in ZooKeeper, so it does NOT work without ZooKeeper. +# Enabling FileSystemPackagesStorage hosts the packages on a filesystem volume instead, with no ZooKeeper +# dependency. +# +# This example is therefore suitable for Oxia: merge it with values-oxia.yaml to run Functions on an +# Oxia-backed cluster (without ZooKeeper), for example: +# ./merge-values.sh values-functions-fs-storage.yaml values-oxia.yaml values-one-node.yaml > merged-values.yaml +# It also works with the default ZooKeeper metadata store. +# +# The default persistentVolumeClaim (in the chart values) is a ReadWriteOnce claim on the cluster's default +# StorageClass, which is fine for a single broker / single-node dev clusters (minikube). For multiple broker +# replicas the package directory must be a ReadWriteMany shared filesystem — see the "Pulsar Functions +# package storage" section in the top-level README.md and the "Functions" section in examples/README.md. + +components: + # Run the Pulsar Functions worker embedded in the broker. + functions: true + +broker: + packageManagement: + # Host the Packages Management Service on the broker (sets enablePackagesManagement). + enabled: true + fileSystemStorage: + # Use FileSystemPackagesStorage instead of the default BookKeeperPackagesStorage. It needs no + # ZooKeeper, so uploaded function packages work without it (using Oxia). + enabled: true diff --git a/examples/values-oxia.yaml b/examples/values-oxia.yaml index 245b1db6..328de663 100644 --- a/examples/values-oxia.yaml +++ b/examples/values-oxia.yaml @@ -20,6 +20,8 @@ components: zookeeper: false oxia: true - # disable functions for oxia tests since there's no support for Oxia in - # BookKeeperPackagesStorage which requires Zookeeper - functions: false \ No newline at end of file + # Pulsar Functions are disabled by default. To run Functions on Oxia, also enable broker-hosted + # FileSystemPackagesStorage: the default BookKeeper package storage requires ZooKeeper and does not work + # without it (using Oxia). See values-functions-fs-storage.yaml for an example of enabling Functions for + # Oxia (merge it with this file), and the "Functions" section in examples/README.md plus the "Pulsar + # Functions package storage" section in the top-level README.md for details.