diff --git a/docs/docs/reference/supported-resources.md b/docs/docs/reference/supported-resources.md index e0e0812f..c1b1aa85 100644 --- a/docs/docs/reference/supported-resources.md +++ b/docs/docs/reference/supported-resources.md @@ -901,6 +901,10 @@ When this resource is used in a client stack via the `uses` section, Simple Cont **For complete details on environment variables and template placeholders, see:** [Template Placeholders Advanced - GCP PostgreSQL Cloud SQL](../concepts/template-placeholders-advanced.md#postgresql-cloud-sql) +**Connectivity (Cloud SQL Auth Proxy):** + +Your container connects to Cloud SQL over `localhost` — the injected `*.host`/`*.port` placeholders resolve to `localhost:5432`. Simple Container runs the [Cloud SQL Auth Proxy](https://cloud.google.com/sql/docs/postgres/sql-proxy) for you as a **native sidecar** (a Kubernetes init container with `restartPolicy: Always`) alongside the app container in long-lived Deployments. Because it is a native sidecar with a startup probe on the proxy's `--health-check` endpoint, **Kubernetes does not start your app container until the proxy is accepting connections** — so the app never races ahead and logs `connection refused` to `localhost:5432` on pod startup, restart, node scale-down, or VPA eviction. Requires GKE ≥ 1.29 (the `SidecarContainers` feature, GA in 1.33). No extra configuration is needed. + #### **Redis** (`gcp-redis`) Creates and manages Google Cloud Memorystore Redis instances. diff --git a/pkg/clouds/pulumi/gcp/cloudsql_proxy.go b/pkg/clouds/pulumi/gcp/cloudsql_proxy.go index cac70a53..87481bf9 100644 --- a/pkg/clouds/pulumi/gcp/cloudsql_proxy.go +++ b/pkg/clouds/pulumi/gcp/cloudsql_proxy.go @@ -122,68 +122,136 @@ func NewCloudsqlProxy(ctx *sdk.Context, args CloudSQLProxyArgs, opts ...sdk.Reso }, nil } +// cloudSQLProxyHealthPort is the port the proxy's built-in health-check HTTP server +// listens on when running as a native sidecar (--health-check). Its /startup endpoint +// backs the startup probe that gates the app containers. +const cloudSQLProxyHealthPort = 9090 + func cloudsqlProxyContainer(credsSecret *v1.Secret, dbInstance PostgresDBInstanceArgs, timeout int) sdk.Output { return sdk.All(credsSecret.Metadata.Name(), dbInstance.Project, dbInstance.Region, dbInstance.InstanceName).ApplyT(func(all []interface{}) v1.ContainerArgs { secretName := all[0].(*string) project := all[1].(string) region := all[2].(string) instanceName := all[3].(string) + return cloudsqlProxyContainerArgs(lo.FromPtr(secretName), project, region, instanceName, timeout) + }).(v1.ContainerOutput) +} - command := "/cloud-sql-proxy" - args := []string{ - "--address", - "0.0.0.0", - "--structured-logs", - "--credentials-file=/var/run/secrets/cloudsql/credentials.json", - fmt.Sprintf("%s:%s:%s", project, region, instanceName), - } +// cloudsqlProxyCommandArgs returns the proxy entrypoint. timeout == 0 is the long-lived +// runtime proxy (with its health server enabled); timeout > 0 is the init-Job proxy, +// shell-wrapped to self-kill after `timeout`s so a RestartPolicy: Never Job can complete. +func cloudsqlProxyCommandArgs(project, region, instanceName string, timeout int) (string, []string) { + command := "/cloud-sql-proxy" + args := []string{ + "--address", + "0.0.0.0", + "--structured-logs", + "--credentials-file=/var/run/secrets/cloudsql/credentials.json", + fmt.Sprintf("%s:%s:%s", project, region, instanceName), + } - if timeout > 0 { - args = []string{ - "-c", - fmt.Sprintf(` + if timeout > 0 { + return "sh", []string{ + "-c", + fmt.Sprintf(` echo "Starting proxy with timeout %ds..." %s %s & PROXY_PID=$! - + echo "Waiting %ds until killing proxy..." sleep %d; - + echo "Killing proxy after %ds" kill -9 $PROXY_PID; exit 0; `, timeout, command, strings.Join(args, " "), timeout, timeout, timeout), - } - command = "sh" } + } - return v1.ContainerArgs{ - Name: sdk.String("cloudsql-proxy"), - Image: sdk.String("gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.8.1-alpine"), - Command: sdk.StringArray{ - sdk.String(command), - }, - Args: sdk.ToStringArray(args), - SecurityContext: &v1.SecurityContextArgs{ - RunAsNonRoot: sdk.Bool(true), + args = append(args, + "--http-address=0.0.0.0", + fmt.Sprintf("--http-port=%d", cloudSQLProxyHealthPort), + "--health-check", + ) + return command, args +} + +// cloudsqlProxyContainerArgs builds the proxy container from already-resolved values. +// timeout == 0 yields a native sidecar (RestartPolicy: Always + startup probe) so the app +// containers don't start before the proxy is listening. timeout > 0 (init-Job) stays an +// ordinary terminating container -- it must NOT be a native sidecar or the Job would hang. +func cloudsqlProxyContainerArgs(secretName, project, region, instanceName string, timeout int) v1.ContainerArgs { + command, args := cloudsqlProxyCommandArgs(project, region, instanceName, timeout) + + container := v1.ContainerArgs{ + Name: sdk.String("cloudsql-proxy"), + Image: sdk.String("gcr.io/cloud-sql-connectors/cloud-sql-proxy:2.8.1-alpine"), + Command: sdk.StringArray{sdk.String(command)}, + Args: sdk.ToStringArray(args), + SecurityContext: &v1.SecurityContextArgs{ + RunAsNonRoot: sdk.Bool(true), + }, + Resources: &v1.ResourceRequirementsArgs{ + Limits: sdk.StringMap{ + "memory": sdk.String("300Mi"), + "cpu": sdk.String("300m"), }, - Resources: &v1.ResourceRequirementsArgs{ - Limits: sdk.StringMap{ - "memory": sdk.String("300Mi"), - "cpu": sdk.String("300m"), - }, - Requests: sdk.StringMap{ - "memory": sdk.String("200Mi"), - "cpu": sdk.String("50m"), - }, + Requests: sdk.StringMap{ + "memory": sdk.String("200Mi"), + "cpu": sdk.String("50m"), }, - VolumeMounts: v1.VolumeMountArray{ - &v1.VolumeMountArgs{ - Name: sdk.String(lo.FromPtr(secretName)), - MountPath: sdk.String("/var/run/secrets/cloudsql"), - ReadOnly: sdk.Bool(true), - }, + }, + VolumeMounts: v1.VolumeMountArray{ + &v1.VolumeMountArgs{ + Name: sdk.String(secretName), + MountPath: sdk.String("/var/run/secrets/cloudsql"), + ReadOnly: sdk.Bool(true), }, - } - }).(v1.ContainerOutput) + }, + } + + if timeout > 0 { + return container + } + + container.RestartPolicy = sdk.String("Always") + container.Ports = v1.ContainerPortArray{ + &v1.ContainerPortArgs{ + Name: sdk.String("csql-hc"), + ContainerPort: sdk.Int(cloudSQLProxyHealthPort), + }, + } + container.StartupProbe = &v1.ProbeArgs{ + HttpGet: v1.HTTPGetActionArgs{ + Path: sdk.String("/startup"), + Port: sdk.String("csql-hc"), + }, + PeriodSeconds: sdk.IntPtr(2), + TimeoutSeconds: sdk.IntPtr(3), + FailureThreshold: sdk.IntPtr(30), + } + container.ReadinessProbe = &v1.ProbeArgs{ + HttpGet: v1.HTTPGetActionArgs{ + Path: sdk.String("/readiness"), + Port: sdk.String("csql-hc"), + }, + PeriodSeconds: sdk.IntPtr(10), + TimeoutSeconds: sdk.IntPtr(3), + FailureThreshold: sdk.IntPtr(3), + } + // On a native sidecar a failing readiness probe neither restarts the container nor + // gates pod readiness — only liveness recovers a proxy that passed startup and then + // hung (deadlock / pool exhaustion / partial-OOM), where the process stays alive but + // app DB calls to localhost:5432 fail. /liveness is already served by --health-check. + // kubelet defers liveness until the startup probe succeeds, so no InitialDelay is needed. + container.LivenessProbe = &v1.ProbeArgs{ + HttpGet: v1.HTTPGetActionArgs{ + Path: sdk.String("/liveness"), + Port: sdk.String("csql-hc"), + }, + PeriodSeconds: sdk.IntPtr(10), + TimeoutSeconds: sdk.IntPtr(3), + FailureThreshold: sdk.IntPtr(3), + } + return container } diff --git a/pkg/clouds/pulumi/gcp/cloudsql_proxy_test.go b/pkg/clouds/pulumi/gcp/cloudsql_proxy_test.go new file mode 100644 index 00000000..ead9883f --- /dev/null +++ b/pkg/clouds/pulumi/gcp/cloudsql_proxy_test.go @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) Simple Container + +package gcp + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + v1 "github.com/pulumi/pulumi-kubernetes/sdk/v4/go/kubernetes/core/v1" + sdk "github.com/pulumi/pulumi/sdk/v3/go/pulumi" + + "github.com/simple-container-com/api/pkg/clouds/pulumi/kubernetes" +) + +// The runtime (Deployment) proxy must expose its built-in health server so a startup +// probe can gate the app containers -- otherwise the app dials localhost:5432 before the +// proxy is listening and logs connection-refused on every pod (re)start. +func TestCloudsqlProxyCommandArgs_RuntimeEnablesHealthCheck(t *testing.T) { + cmd, args := cloudsqlProxyCommandArgs("proj", "europe-north1", "inst", 0) + + assert.Equal(t, "/cloud-sql-proxy", cmd, "runtime proxy runs the binary directly (no shell wrapper)") + assert.Contains(t, args, "--health-check", "runtime proxy must expose its health server for the startup probe") + assert.Contains(t, args, "--http-address=0.0.0.0", "health server must bind 0.0.0.0 so kubelet probes can reach it") + assert.Contains(t, args, "--http-port=9090") + assert.Contains(t, args, "proj:europe-north1:inst", "instance connection name must be preserved") + assert.NotContains(t, args, "-c", "runtime proxy must not be wrapped in a self-killing shell") + // Base flags moved from the old inline builder must survive the refactor. --credentials-file + // is the only flag that authenticates the proxy; it must equal the VolumeMount path + + // /credentials.json, so a future mount-path change forces this flag to move in lockstep. + assert.Contains(t, args, "--address") + assert.Contains(t, args, "0.0.0.0") + assert.Contains(t, args, "--credentials-file=/var/run/secrets/cloudsql/credentials.json", + "credentials flag must match the mounted secret path") +} + +// The init-Job proxy runs in a RestartPolicy: Never pod; it must self-terminate or the +// Job never completes. It must stay shell-wrapped and must NOT enable the health server. +func TestCloudsqlProxyCommandArgs_InitJobSelfKills(t *testing.T) { + cmd, args := cloudsqlProxyCommandArgs("proj", "europe-north1", "inst", 30) + + assert.Equal(t, "sh", cmd, "init-Job proxy must be shell-wrapped so it can self-terminate") + require.GreaterOrEqual(t, len(args), 2) + assert.Equal(t, "-c", args[0]) + script := args[1] + assert.Contains(t, script, "kill -9", "init-Job proxy must self-kill so the Job completes") + assert.Contains(t, script, "proj:europe-north1:inst") + assert.Contains(t, script, "--credentials-file=/var/run/secrets/cloudsql/credentials.json", + "init-Job proxy still authenticates via the mounted secret") + assert.NotContains(t, script, "--health-check", "init-Job proxy must not run a health server") +} + +// Runtime proxy => native sidecar: an init container with RestartPolicy: Always plus +// startup/readiness/liveness probes. This is what eliminates the startup race and lets a +// hung-but-alive proxy self-heal. +func TestCloudsqlProxyContainerArgs_RuntimeIsNativeSidecar(t *testing.T) { + c := cloudsqlProxyContainerArgs("creds", "proj", "reg", "inst", 0) + + assert.Equal(t, sdk.String("Always"), c.RestartPolicy, + "runtime proxy must be a native sidecar (init container, RestartPolicy: Always)") + assert.NotNil(t, c.StartupProbe, "startup probe must gate app containers until the proxy is listening") + assert.NotNil(t, c.ReadinessProbe, "readiness probe keeps the pod out of rotation until the DB path is up") + assert.NotNil(t, c.LivenessProbe, "liveness probe restarts a proxy that passed startup then hung") + assert.NotNil(t, c.Ports, "health-check port must be declared for the probes to target") +} + +// Init-Job proxy must NOT be a native sidecar: RestartPolicy: Always on a Job's container +// would keep the Job from ever completing, and it serves no health endpoints. +func TestCloudsqlProxyContainerArgs_InitJobIsNotSidecar(t *testing.T) { + c := cloudsqlProxyContainerArgs("creds", "proj", "reg", "inst", 30) + + assert.Nil(t, c.RestartPolicy, + "init-Job proxy must not carry RestartPolicy: Always -- it would hang the Job") + assert.Nil(t, c.StartupProbe, "init-Job proxy has no health server, so no probe") + assert.Nil(t, c.ReadinessProbe) + assert.Nil(t, c.LivenessProbe) + assert.Nil(t, c.Ports) +} + +// Pin the exact probe<->port wiring. A NotNil check would pass even with a port-name typo +// or a wrong probe path -- a probe the kubelet can never satisfy, which loops the pod in +// Init then restarts it. The agreement assertions (probe Port == declared port Name) are +// what actually defend the named-port linkage the whole sidecar depends on. +func TestCloudsqlProxyContainerArgs_RuntimeProbeWiring(t *testing.T) { + c := cloudsqlProxyContainerArgs("creds", "proj", "reg", "inst", 0) + + ports := c.Ports.(v1.ContainerPortArray) + require.Len(t, ports, 1) + p0 := ports[0].(*v1.ContainerPortArgs) + assert.Equal(t, sdk.Int(9090), p0.ContainerPort) + assert.Equal(t, sdk.String("csql-hc"), p0.Name) + + sp := c.StartupProbe.(*v1.ProbeArgs) + spHG := sp.HttpGet.(v1.HTTPGetActionArgs) + assert.Equal(t, sdk.String("/startup"), spHG.Path) + assert.Equal(t, sdk.IntPtr(30), sp.FailureThreshold, "startup budget ~= period(2s) x 30 = 60s") + assert.Equal(t, p0.Name, spHG.Port, "startup probe must target the declared health port by name") + + rp := c.ReadinessProbe.(*v1.ProbeArgs) + rpHG := rp.HttpGet.(v1.HTTPGetActionArgs) + assert.Equal(t, sdk.String("/readiness"), rpHG.Path) + assert.Equal(t, p0.Name, rpHG.Port, "readiness probe must target the declared health port by name") + + lp := c.LivenessProbe.(*v1.ProbeArgs) + lpHG := lp.HttpGet.(v1.HTTPGetActionArgs) + assert.Equal(t, sdk.String("/liveness"), lpHG.Path) + assert.Equal(t, p0.Name, lpHG.Port, "liveness probe must target the declared health port by name") +} + +// The proxy mounts its credential Secret at a fixed path; the mount Name must equal the +// secret name (the credential Volume that compute_proc.go appends derives from the same +// Metadata.Name(), so a drift breaks `--credentials-file` auth). +func TestCloudsqlProxyContainerArgs_MountsCredentialSecret(t *testing.T) { + c := cloudsqlProxyContainerArgs("creds", "proj", "reg", "inst", 0) + + mounts := c.VolumeMounts.(v1.VolumeMountArray) + require.Len(t, mounts, 1) + m := mounts[0].(*v1.VolumeMountArgs) + assert.Equal(t, sdk.String("creds"), m.Name, "mount name must equal the secret name") + assert.Equal(t, sdk.String("/var/run/secrets/cloudsql"), m.MountPath) + assert.Equal(t, sdk.Bool(true), m.ReadOnly) +} + +// The load-bearing wiring: the runtime proxy MUST be attached as a native sidecar (init +// container), never a regular container. RestartPolicy: Always on a regular container is +// rejected by the API server, and only the init-container placement gives the startup-probe +// ordering that removes the connection-refused race. Guards against a future refactor +// silently appending to SidecarOutputs. +func TestAttachCloudsqlProxyAsNativeSidecar_LandsInInitContainers(t *testing.T) { + kubeArgs := &kubernetes.SimpleContainerArgs{} + var proxy v1.ContainerOutput + var vol v1.VolumeOutput + + attachCloudsqlProxyAsNativeSidecar(kubeArgs, proxy, vol) + + assert.Len(t, kubeArgs.InitContainerOutputs, 1, "proxy must be a native sidecar (init container)") + assert.Empty(t, kubeArgs.SidecarOutputs, "proxy must NOT land in regular containers") + assert.Len(t, kubeArgs.VolumeOutputs, 1, "credential secret volume must ride along") +} diff --git a/pkg/clouds/pulumi/gcp/compute_proc.go b/pkg/clouds/pulumi/gcp/compute_proc.go index f159e32a..170f8a30 100644 --- a/pkg/clouds/pulumi/gcp/compute_proc.go +++ b/pkg/clouds/pulumi/gcp/compute_proc.go @@ -232,21 +232,36 @@ func addCloudsqlProxySidecarPreProcessor(ctx *sdk.Context, params appendParams) if err != nil { return errors.Wrapf(err, "failed to create cloudsql proxy for %q in stack %q", params.postgresName, params.stack.Name) } - kubeArgs.SidecarOutputs = append(kubeArgs.SidecarOutputs, cloudsqlProxy.ProxyContainer.ApplyT(func(arg any) corev1.ContainerArgs { + proxyContainer := cloudsqlProxy.ProxyContainer.ApplyT(func(arg any) corev1.ContainerArgs { return arg.(corev1.ContainerArgs) - }).(corev1.ContainerOutput)) - kubeArgs.VolumeOutputs = append(kubeArgs.VolumeOutputs, cloudsqlProxy.SqlProxySecret.Metadata.Name().ApplyT(func(arg any) corev1.VolumeArgs { + }).(corev1.ContainerOutput) + credsVolume := cloudsqlProxy.SqlProxySecret.Metadata.Name().ApplyT(func(arg any) corev1.VolumeArgs { return corev1.VolumeArgs{ Name: sdk.String(lo.FromPtr(arg.(*string))), Secret: &corev1.SecretVolumeSourceArgs{ SecretName: sdk.StringPtrFromPtr(arg.(*string)), }, } - }).(corev1.VolumeOutput)) + }).(corev1.VolumeOutput) + attachCloudsqlProxyAsNativeSidecar(kubeArgs, proxyContainer, credsVolume) return nil }) } +// attachCloudsqlProxyAsNativeSidecar wires the runtime proxy into kubeArgs as a native +// sidecar: its container goes in InitContainerOutputs (NOT SidecarOutputs) and its +// credential Secret volume rides along in VolumeOutputs. The init-container target is +// load-bearing — the container carries RestartPolicy: Always (set in cloudsqlProxyContainer), +// which the API server rejects on a regular container, and only the init-container placement +// gives the startup-probe ordering that stops the app from dialing localhost:5432 before the +// proxy is up. Kept as a tiny pure helper so that contract is unit-testable without the GCP +// provisioning path. The init-Job proxy (timeout>0) is attached elsewhere as a plain +// terminating container. +func attachCloudsqlProxyAsNativeSidecar(kubeArgs *kubernetes.SimpleContainerArgs, proxyContainer corev1.ContainerOutput, credsVolume corev1.VolumeOutput) { + kubeArgs.InitContainerOutputs = append(kubeArgs.InitContainerOutputs, proxyContainer) + kubeArgs.VolumeOutputs = append(kubeArgs.VolumeOutputs, credsVolume) +} + func createCloudsqlProxy(ctx *sdk.Context, params appendParams, namespaceOutput sdk.StringInput) (*CloudSQLProxy, error) { // Fix for custom stacks: ensure input.StackParams.ParentEnv is set correctly for proper resource naming if params.provisionParams.ParentStack != nil && params.provisionParams.ParentStack.ParentEnv != "" &&