From 3a61b4038d4f0581b47bd58c13b0fc4609702a1c Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Wed, 3 Jun 2026 10:15:28 +0100 Subject: [PATCH 01/16] Parallel tests running --- Makefile | 12 +++++++++++- test/mock/grpc/mock_management_command_service.go | 13 +++++++++++-- test/mock/grpc/mock_management_server.go | 4 ++-- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index b01c24907..7fa4aa1d1 100644 --- a/Makefile +++ b/Makefile @@ -172,7 +172,17 @@ integration-test: $(SELECTED_PACKAGE) build-mock-management-plane-grpc TEST_ENV="Container" CONTAINER_OS_TYPE=$(CONTAINER_OS_TYPE) BUILD_TARGET="install-agent-local" CONTAINER_NGINX_IMAGE_REGISTRY=${CONTAINER_NGINX_IMAGE_REGISTRY} \ PACKAGES_REPO=$(OSS_PACKAGES_REPO) PACKAGE_NAME=$(PACKAGE_NAME) BASE_IMAGE=$(BASE_IMAGE) DOCKERFILE_PATH=$(DOCKERFILE_PATH) IMAGE_PATH=$(IMAGE_PATH) TAG=${IMAGE_TAG} \ OS_VERSION=$(OS_VERSION) OS_RELEASE=$(OS_RELEASE) \ - go test -v ./test/integration/installuninstall ./test/integration/managementplane ./test/integration/auxiliarycommandserver ./test/integration/nginxless + go test -v -parallel 2 ./test/integration/installuninstall ./test/integration/nginxless + + TEST_ENV="Container" CONTAINER_OS_TYPE=$(CONTAINER_OS_TYPE) BUILD_TARGET="install-agent-local" CONTAINER_NGINX_IMAGE_REGISTRY=${CONTAINER_NGINX_IMAGE_REGISTRY} \ + PACKAGES_REPO=$(OSS_PACKAGES_REPO) PACKAGE_NAME=$(PACKAGE_NAME) BASE_IMAGE=$(BASE_IMAGE) DOCKERFILE_PATH=$(DOCKERFILE_PATH) IMAGE_PATH=$(IMAGE_PATH) TAG=${IMAGE_TAG} \ + OS_VERSION=$(OS_VERSION) OS_RELEASE=$(OS_RELEASE) \ + go test -v ./test/integration/managementplane + + TEST_ENV="Container" CONTAINER_OS_TYPE=$(CONTAINER_OS_TYPE) BUILD_TARGET="install-agent-local" CONTAINER_NGINX_IMAGE_REGISTRY=${CONTAINER_NGINX_IMAGE_REGISTRY} \ + PACKAGES_REPO=$(OSS_PACKAGES_REPO) PACKAGE_NAME=$(PACKAGE_NAME) BASE_IMAGE=$(BASE_IMAGE) DOCKERFILE_PATH=$(DOCKERFILE_PATH) IMAGE_PATH=$(IMAGE_PATH) TAG=${IMAGE_TAG} \ + OS_VERSION=$(OS_VERSION) OS_RELEASE=$(OS_RELEASE) \ + go test -v ./test/integration/auxiliarycommandserver upgrade-test: $(SELECTED_PACKAGE) build-mock-management-plane-grpc TEST_ENV="Container" CONTAINER_OS_TYPE=$(CONTAINER_OS_TYPE) BUILD_TARGET="install-agent-repo" CONTAINER_NGINX_IMAGE_REGISTRY=${CONTAINER_NGINX_IMAGE_REGISTRY} \ diff --git a/test/mock/grpc/mock_management_command_service.go b/test/mock/grpc/mock_management_command_service.go index ad752eb10..3686fc29a 100644 --- a/test/mock/grpc/mock_management_command_service.go +++ b/test/mock/grpc/mock_management_command_service.go @@ -34,6 +34,7 @@ import ( type CommandService struct { mpi.UnimplementedCommandServiceServer instanceFiles map[string][]*mpi.File + instanceFilesMutex sync.RWMutex firstConnectionCallCh chan struct{} server *gin.Engine connectionRequest *mpi.CreateConnectionRequest @@ -216,8 +217,11 @@ func (cs *CommandService) handleConfigUploadRequest( instanceID := upload.ConfigUploadRequest.GetOverview().GetConfigVersion().GetInstanceId() overviewFiles := upload.ConfigUploadRequest.GetOverview().GetFiles() + cs.instanceFilesMutex.Lock() + defer cs.instanceFilesMutex.Unlock() + if cs.instanceFiles[instanceID] != nil { - filesToDelete := cs.checkForDeletedFiles(instanceID, overviewFiles) + filesToDelete := cs.checkForDeletedFilesLocked(instanceID, overviewFiles) for _, fileToDelete := range filesToDelete { err := os.Remove(fileToDelete) if err != nil { @@ -228,7 +232,8 @@ func (cs *CommandService) handleConfigUploadRequest( cs.instanceFiles[instanceID] = overviewFiles } -func (cs *CommandService) checkForDeletedFiles(instanceID string, overviewFiles []*mpi.File) []string { +// checkForDeletedFilesLocked must be called while holding instanceFilesMutex +func (cs *CommandService) checkForDeletedFilesLocked(instanceID string, overviewFiles []*mpi.File) []string { filesToDelete := []string{} for _, diskfile := range cs.instanceFiles[instanceID] { @@ -401,6 +406,7 @@ func (cs *CommandService) addConfigApplyEndpoint() { return } + cs.instanceFilesMutex.Lock() if filesUpdated { cs.instanceFiles[instanceID] = updatedConfigFiles } else { @@ -425,6 +431,7 @@ func (cs *CommandService) addConfigApplyEndpoint() { }, }, } + cs.instanceFilesMutex.Unlock() cs.requestChan <- &request @@ -437,6 +444,7 @@ func (cs *CommandService) addConfigEndpoint() { instanceID := c.Param("instanceID") var data map[string]interface{} + cs.instanceFilesMutex.RLock() response := &mpi.GetOverviewResponse{ Overview: &mpi.FileOverview{ ConfigVersion: &mpi.ConfigVersion{ @@ -446,6 +454,7 @@ func (cs *CommandService) addConfigEndpoint() { Files: cs.instanceFiles[instanceID], }, } + cs.instanceFilesMutex.RUnlock() if err := json.Unmarshal([]byte(protojson.Format(response)), &data); err != nil { slog.Error("Failed to return connection", "error", err) diff --git a/test/mock/grpc/mock_management_server.go b/test/mock/grpc/mock_management_server.go index 7043e1027..dc38514a4 100644 --- a/test/mock/grpc/mock_management_server.go +++ b/test/mock/grpc/mock_management_server.go @@ -18,7 +18,7 @@ import ( "syscall" "time" - "github.com/nginx/agent/v3/api/grpc/mpi/v1" + v1 "github.com/nginx/agent/v3/api/grpc/mpi/v1" "github.com/nginx/agent/v3/internal/config" "buf.build/go/protovalidate" @@ -71,7 +71,7 @@ func NewMockManagementServer( externalFileServer *string, ) (*MockManagementServer, error) { var err error - requestChan := make(chan *v1.ManagementPlaneRequest) + requestChan := make(chan *v1.ManagementPlaneRequest, 100) commandService := serveCommandService(ctx, apiAddress, agentConfig, requestChan, *configDirectory, *externalFileServer) From a760cb2ce438f3b0ba63688cc9414a61f57cc613 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Wed, 3 Jun 2026 11:15:22 +0100 Subject: [PATCH 02/16] Changes after linting --- test/mock/grpc/mock_management_command_service.go | 2 +- test/mock/grpc/mock_management_server.go | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/test/mock/grpc/mock_management_command_service.go b/test/mock/grpc/mock_management_command_service.go index 3686fc29a..a136eafd5 100644 --- a/test/mock/grpc/mock_management_command_service.go +++ b/test/mock/grpc/mock_management_command_service.go @@ -34,7 +34,6 @@ import ( type CommandService struct { mpi.UnimplementedCommandServiceServer instanceFiles map[string][]*mpi.File - instanceFilesMutex sync.RWMutex firstConnectionCallCh chan struct{} server *gin.Engine connectionRequest *mpi.CreateConnectionRequest @@ -44,6 +43,7 @@ type CommandService struct { externalFileServer string configDirectory string dataPlaneResponses []*mpi.DataPlaneResponse + instanceFilesMutex sync.RWMutex dataPlaneResponsesMutex sync.Mutex updateDataPlaneStatusMutex sync.Mutex connectionMutex sync.Mutex diff --git a/test/mock/grpc/mock_management_server.go b/test/mock/grpc/mock_management_server.go index dc38514a4..ba64e524d 100644 --- a/test/mock/grpc/mock_management_server.go +++ b/test/mock/grpc/mock_management_server.go @@ -18,7 +18,7 @@ import ( "syscall" "time" - v1 "github.com/nginx/agent/v3/api/grpc/mpi/v1" + mpi "github.com/nginx/agent/v3/api/grpc/mpi/v1" "github.com/nginx/agent/v3/internal/config" "buf.build/go/protovalidate" @@ -39,6 +39,7 @@ const ( keepAliveTimeout = 10 * time.Second testTimeout = 100 * time.Millisecond connectionType = "tcp" + requestChanSize = 100 ) var ( @@ -71,7 +72,7 @@ func NewMockManagementServer( externalFileServer *string, ) (*MockManagementServer, error) { var err error - requestChan := make(chan *v1.ManagementPlaneRequest, 100) + requestChan := make(chan *mpi.ManagementPlaneRequest, requestChanSize) commandService := serveCommandService(ctx, apiAddress, agentConfig, requestChan, *configDirectory, *externalFileServer) @@ -98,8 +99,8 @@ func NewMockManagementServer( healthcheck := health.NewServer() healthgrpc.RegisterHealthServer(grpcServer, healthcheck) - v1.RegisterCommandServiceServer(grpcServer, commandService) - v1.RegisterFileServiceServer(grpcServer, fileServer) + mpi.RegisterCommandServiceServer(grpcServer, commandService) + mpi.RegisterFileServiceServer(grpcServer, fileServer) go reportHealth(healthcheck, agentConfig) go func() { @@ -187,7 +188,7 @@ func serveCommandService( ctx context.Context, apiAddress string, agentConfig *config.Config, - requestChan chan *v1.ManagementPlaneRequest, + requestChan chan *mpi.ManagementPlaneRequest, configDirectory string, externalFileServer string, ) *CommandService { From 4cce5dceed674a773f11db38a07f48483c630c54 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Thu, 4 Jun 2026 12:00:21 +0100 Subject: [PATCH 03/16] Adding retry logic to upgrade tests --- test/helpers/test_containers_utils.go | 33 ++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/test/helpers/test_containers_utils.go b/test/helpers/test_containers_utils.go index 164586ed5..3f2a9f778 100644 --- a/test/helpers/test_containers_utils.go +++ b/test/helpers/test_containers_utils.go @@ -11,6 +11,7 @@ import ( "io" "os" "testing" + "time" "github.com/moby/moby/client" "github.com/stretchr/testify/assert" @@ -21,6 +22,11 @@ import ( const configFilePermissions = 0o600 +const ( + extractFileMaxAttempts = 10 + extractFileRetryDelay = 100 * time.Millisecond +) + type Parameters struct { NginxConfigPath string NginxAgentConfigPath string @@ -496,8 +502,33 @@ func ExtractFileFromContainer( containerPath string, ) string { tb.Helper() - fileContent, err := testContainer.CopyFileFromContainer(ctx, containerPath) + + var ( + fileContent io.ReadCloser + err error + ) + + for attempt := 1; attempt <= extractFileMaxAttempts; attempt++ { + fileContent, err = testContainer.CopyFileFromContainer(ctx, containerPath) + if err == nil { + break + } + + if attempt == extractFileMaxAttempts { + break + } + + select { + case <-ctx.Done(): + require.NoError(tb, ctx.Err()) + case <-time.After(extractFileRetryDelay): + } + } + require.NoError(tb, err) + defer func() { + require.NoError(tb, fileContent.Close()) + }() content, err := io.ReadAll(fileContent) require.NoError(tb, err) From 3a0cc0fae9831c5c7f35a2dfa035d222df51fc50 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Thu, 4 Jun 2026 15:18:16 +0100 Subject: [PATCH 04/16] Re-enable upgrade test --- test/integration/upgrade/upgrade_test.go | 66 ++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/test/integration/upgrade/upgrade_test.go b/test/integration/upgrade/upgrade_test.go index a0d7b21fb..aa6fe7eec 100644 --- a/test/integration/upgrade/upgrade_test.go +++ b/test/integration/upgrade/upgrade_test.go @@ -99,6 +99,72 @@ func Test_UpgradeFromV3(t *testing.T) { slog.Info("finished agent v3 upgrade tests") } +func Test_UpgradeWithCustomOTELConfig(t *testing.T) { + ctx := context.Background() + + containerNetwork := utils.CreateContainerNetwork(ctx, t) + utils.SetupMockManagementPlaneGrpc(ctx, t, containerNetwork) + defer func(ctx context.Context) { + err := utils.MockManagementPlaneGrpcContainer.Terminate(ctx) + require.NoError(t, err) + }(ctx) + + testContainer, teardownTest := upgradeSetup(t, true, "custom_otel", containerNetwork) + defer teardownTest(t) + + slog.Info("starting agent v3 upgrade tests with custom OTEL config") + + // get currently installed agent version + oldVersion := agentVersion(ctx, t, testContainer) + + // verify agent upgrade + verifyAgentUpgrade(ctx, t, testContainer) + + // verify version of agent + verifyAgentVersion(ctx, t, testContainer, oldVersion) + + // Expected files to validate after upgrade + files := []helpers.ConfigFileDescriptor{ + { + ContainerPath: agentConfigDir + "/nginx-agent.conf", + ExpectedPath: "./configs/otel/nginx-agent.conf", + LogLabel: "agent config", + }, + { + ContainerPath: agentConfigDir + "/my_config.yaml", + ExpectedPath: "./configs/otel/my_config.yaml", + LogLabel: "otel custom config", + }, + { + ContainerPath: agentConfigDir + "/opentelemetry-collector-agent.yaml", + ExpectedPath: "./configs/otel/otel-config.yaml", + LogLabel: "otel config", + }, + } + // verify agent v3 configs has not changed + helpers.ValidateContainerFiles(ctx, t, testContainer, files) + + // Validate agent.log contains OTEL startup log + helpers.AssertStringInContainerFile( + ctx, t, testContainer, agentLogDir+"/agent.log", "Starting OTel collector", + ) + helpers.AssertStringInContainerFile( + ctx, + t, + testContainer, + agentLogDir+"/agent.log", + "Merging additional OTel config files", + ) + + // Validate agent otel log contains specific logs + helpers.AssertStringInContainerFile( + ctx, t, testContainer, agentLogDir+"/opentelemetry-collector-agent.log", + "Everything is ready. Begin running and processing data.", + ) + + slog.Info("finished agent v3 upgrade tests with custom OTEL config") +} + func upgradeSetup(tb testing.TB, expectNoErrorsInLogs bool, setupType string, containerNetwork *testcontainers.DockerNetwork, ) (testcontainers.Container, func(tb testing.TB)) { From 877124ca2acc2c5f0ffb9d9ee4467d8f30f86241 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 5 Jun 2026 13:13:27 +0100 Subject: [PATCH 05/16] Fix unit test --- .../health/health_watcher_service_test.go | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/internal/watcher/health/health_watcher_service_test.go b/internal/watcher/health/health_watcher_service_test.go index 65ceee351..d0e9b4277 100644 --- a/internal/watcher/health/health_watcher_service_test.go +++ b/internal/watcher/health/health_watcher_service_test.go @@ -189,13 +189,6 @@ func TestHealthWatcherService_health(t *testing.T) { func TestHealthWatcherService_compareCache(t *testing.T) { ossInstance := protos.NginxOssInstance([]string{}) plusInstance := protos.NginxPlusInstance([]string{}) - healthCache := map[string]*mpi.InstanceHealth{ - ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), - plusInstance.GetInstanceMeta().GetInstanceId(): { - InstanceId: plusInstance.GetInstanceMeta().GetInstanceId(), - InstanceHealthStatus: mpi.InstanceHealth_INSTANCE_HEALTH_STATUS_HEALTHY, - }, - } healths := []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), @@ -203,12 +196,20 @@ func TestHealthWatcherService_compareCache(t *testing.T) { tests := []struct { name string + initialCache map[string]*mpi.InstanceHealth expectedCache map[string]*mpi.InstanceHealth instances map[string]*mpi.Instance expectedHealth []*mpi.InstanceHealth }{ { name: "Test 1: Instance was deleted", + initialCache: map[string]*mpi.InstanceHealth{ + ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), + plusInstance.GetInstanceMeta().GetInstanceId(): { + InstanceId: plusInstance.GetInstanceMeta().GetInstanceId(), + InstanceHealthStatus: mpi.InstanceHealth_INSTANCE_HEALTH_STATUS_HEALTHY, + }, + }, expectedHealth: []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), { @@ -227,6 +228,9 @@ func TestHealthWatcherService_compareCache(t *testing.T) { }, { name: "Test 2: No change to instance list", + initialCache: map[string]*mpi.InstanceHealth{ + ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), + }, expectedHealth: []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), }, @@ -243,7 +247,7 @@ func TestHealthWatcherService_compareCache(t *testing.T) { t.Run(test.name, func(tt *testing.T) { agentConfig := types.AgentConfig() healthWatcher := NewHealthWatcherService(agentConfig) - healthWatcher.cache = healthCache + healthWatcher.cache = test.initialCache healthWatcher.instances = test.instances result := healthWatcher.compareCache(healths) From ce255f6dd72b8d644dbd856b2edef6b00efd98a6 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 5 Jun 2026 13:53:30 +0100 Subject: [PATCH 06/16] PR feedback --- test/helpers/test_containers_utils.go | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/test/helpers/test_containers_utils.go b/test/helpers/test_containers_utils.go index 3f2a9f778..44c6e8f63 100644 --- a/test/helpers/test_containers_utils.go +++ b/test/helpers/test_containers_utils.go @@ -503,29 +503,20 @@ func ExtractFileFromContainer( ) string { tb.Helper() - var ( - fileContent io.ReadCloser - err error - ) + var fileContent io.ReadCloser + totalTimeout := time.Duration(extractFileMaxAttempts) * extractFileRetryDelay - for attempt := 1; attempt <= extractFileMaxAttempts; attempt++ { + assert.Eventually(tb, func() bool { + var err error fileContent, err = testContainer.CopyFileFromContainer(ctx, containerPath) - if err == nil { - break - } - if attempt == extractFileMaxAttempts { - break - } + return err == nil + }, totalTimeout, extractFileRetryDelay, "Failed to extract file %s", containerPath) - select { - case <-ctx.Done(): - require.NoError(tb, ctx.Err()) - case <-time.After(extractFileRetryDelay): - } + if fileContent == nil { + tb.Fatalf("Unable to extract file %s", containerPath) } - require.NoError(tb, err) defer func() { require.NoError(tb, fileContent.Close()) }() From 41be4e2e206ab6cc3b2c9b2c95752f729c5efbcf Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 5 Jun 2026 16:35:35 +0100 Subject: [PATCH 07/16] Add reliability to tests --- .../health/health_watcher_service_test.go | 21 +++++++++++++------ test/integration/upgrade/upgrade_test.go | 10 +++++++-- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/internal/watcher/health/health_watcher_service_test.go b/internal/watcher/health/health_watcher_service_test.go index d0e9b4277..a06c14bfe 100644 --- a/internal/watcher/health/health_watcher_service_test.go +++ b/internal/watcher/health/health_watcher_service_test.go @@ -6,9 +6,9 @@ package health import ( + "context" "errors" "fmt" - "reflect" "testing" mpi "github.com/nginx/agent/v3/api/grpc/mpi/v1" @@ -171,17 +171,26 @@ func TestHealthWatcherService_health(t *testing.T) { healthWatcher := NewHealthWatcherService(agentConfig) fakeHealthWatcher := healthfakes.FakeHealthWatcherOperator{} - fakeHealthWatcher.HealthReturnsOnCall(0, protos.HealthyInstanceHealth(), nil) - fakeHealthWatcher.HealthReturnsOnCall(1, protos.UnhealthyInstanceHealth(), nil) - fakeHealthWatcher.HealthReturnsOnCall(2, nil, errors.New("unable to determine health")) + // Dispatch by instance ID so results are independent of map iteration order. + ossID := ossInstance.GetInstanceMeta().GetInstanceId() + plusID := plusInstance.GetInstanceMeta().GetInstanceId() + fakeHealthWatcher.HealthCalls(func(_ context.Context, instance *mpi.Instance) (*mpi.InstanceHealth, error) { + switch instance.GetInstanceMeta().GetInstanceId() { + case ossID: + return protos.HealthyInstanceHealth(), nil + case plusID: + return protos.UnhealthyInstanceHealth(), nil + default: + return nil, errors.New("unable to determine health") + } + }) healthWatcher.instances = test.instances healthWatcher.updateCache(test.cache) healthWatcher.watcher = &fakeHealthWatcher updatedStatus, isHealthDiff := healthWatcher.health(t.Context()) assert.Equal(t, test.isHealthDiff, isHealthDiff) - - reflect.DeepEqual(test.updatedInstances, updatedStatus) + assert.ElementsMatch(t, test.updatedInstances, updatedStatus) }) } } diff --git a/test/integration/upgrade/upgrade_test.go b/test/integration/upgrade/upgrade_test.go index aa6fe7eec..92ae39f54 100644 --- a/test/integration/upgrade/upgrade_test.go +++ b/test/integration/upgrade/upgrade_test.go @@ -262,8 +262,14 @@ func upgradeAgent(ctx context.Context, tb testing.TB, testContainer testcontaine func verifyAgentVersion(ctx context.Context, tb testing.TB, testContainer testcontainers.Container, oldVersion string) { tb.Helper() - newVersion := agentVersion(ctx, tb, testContainer) - assert.NotEqual(tb, oldVersion, newVersion) + var newVersion string + + assert.Eventually(tb, func() bool { + newVersion = agentVersion(ctx, tb, testContainer) + + return newVersion != oldVersion + }, maxUpgradeTime, 100*time.Millisecond, "agent version not upgraded, still %s after upgrade", oldVersion) + tb.Logf("agent upgraded to version %s successfully", newVersion) } From bd0551fb38c7c7edc808ff30ec1b9750da5de45d Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 5 Jun 2026 16:51:26 +0100 Subject: [PATCH 08/16] Restoring test --- .../health/health_watcher_service_test.go | 41 +++++++------------ 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/internal/watcher/health/health_watcher_service_test.go b/internal/watcher/health/health_watcher_service_test.go index a06c14bfe..65ceee351 100644 --- a/internal/watcher/health/health_watcher_service_test.go +++ b/internal/watcher/health/health_watcher_service_test.go @@ -6,9 +6,9 @@ package health import ( - "context" "errors" "fmt" + "reflect" "testing" mpi "github.com/nginx/agent/v3/api/grpc/mpi/v1" @@ -171,26 +171,17 @@ func TestHealthWatcherService_health(t *testing.T) { healthWatcher := NewHealthWatcherService(agentConfig) fakeHealthWatcher := healthfakes.FakeHealthWatcherOperator{} - // Dispatch by instance ID so results are independent of map iteration order. - ossID := ossInstance.GetInstanceMeta().GetInstanceId() - plusID := plusInstance.GetInstanceMeta().GetInstanceId() - fakeHealthWatcher.HealthCalls(func(_ context.Context, instance *mpi.Instance) (*mpi.InstanceHealth, error) { - switch instance.GetInstanceMeta().GetInstanceId() { - case ossID: - return protos.HealthyInstanceHealth(), nil - case plusID: - return protos.UnhealthyInstanceHealth(), nil - default: - return nil, errors.New("unable to determine health") - } - }) + fakeHealthWatcher.HealthReturnsOnCall(0, protos.HealthyInstanceHealth(), nil) + fakeHealthWatcher.HealthReturnsOnCall(1, protos.UnhealthyInstanceHealth(), nil) + fakeHealthWatcher.HealthReturnsOnCall(2, nil, errors.New("unable to determine health")) healthWatcher.instances = test.instances healthWatcher.updateCache(test.cache) healthWatcher.watcher = &fakeHealthWatcher updatedStatus, isHealthDiff := healthWatcher.health(t.Context()) assert.Equal(t, test.isHealthDiff, isHealthDiff) - assert.ElementsMatch(t, test.updatedInstances, updatedStatus) + + reflect.DeepEqual(test.updatedInstances, updatedStatus) }) } } @@ -198,6 +189,13 @@ func TestHealthWatcherService_health(t *testing.T) { func TestHealthWatcherService_compareCache(t *testing.T) { ossInstance := protos.NginxOssInstance([]string{}) plusInstance := protos.NginxPlusInstance([]string{}) + healthCache := map[string]*mpi.InstanceHealth{ + ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), + plusInstance.GetInstanceMeta().GetInstanceId(): { + InstanceId: plusInstance.GetInstanceMeta().GetInstanceId(), + InstanceHealthStatus: mpi.InstanceHealth_INSTANCE_HEALTH_STATUS_HEALTHY, + }, + } healths := []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), @@ -205,20 +203,12 @@ func TestHealthWatcherService_compareCache(t *testing.T) { tests := []struct { name string - initialCache map[string]*mpi.InstanceHealth expectedCache map[string]*mpi.InstanceHealth instances map[string]*mpi.Instance expectedHealth []*mpi.InstanceHealth }{ { name: "Test 1: Instance was deleted", - initialCache: map[string]*mpi.InstanceHealth{ - ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), - plusInstance.GetInstanceMeta().GetInstanceId(): { - InstanceId: plusInstance.GetInstanceMeta().GetInstanceId(), - InstanceHealthStatus: mpi.InstanceHealth_INSTANCE_HEALTH_STATUS_HEALTHY, - }, - }, expectedHealth: []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), { @@ -237,9 +227,6 @@ func TestHealthWatcherService_compareCache(t *testing.T) { }, { name: "Test 2: No change to instance list", - initialCache: map[string]*mpi.InstanceHealth{ - ossInstance.GetInstanceMeta().GetInstanceId(): protos.HealthyInstanceHealth(), - }, expectedHealth: []*mpi.InstanceHealth{ protos.HealthyInstanceHealth(), }, @@ -256,7 +243,7 @@ func TestHealthWatcherService_compareCache(t *testing.T) { t.Run(test.name, func(tt *testing.T) { agentConfig := types.AgentConfig() healthWatcher := NewHealthWatcherService(agentConfig) - healthWatcher.cache = test.initialCache + healthWatcher.cache = healthCache healthWatcher.instances = test.instances result := healthWatcher.compareCache(healths) From 494c4bca044d2a4aa30f2b3205a99fd5b16e350c Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Tue, 9 Jun 2026 12:13:05 +0100 Subject: [PATCH 09/16] Add assert eventually to test --- .../auxiliarycommandserver/connection_test.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/integration/auxiliarycommandserver/connection_test.go b/test/integration/auxiliarycommandserver/connection_test.go index 09b103270..5c6b20e4c 100644 --- a/test/integration/auxiliarycommandserver/connection_test.go +++ b/test/integration/auxiliarycommandserver/connection_test.go @@ -24,6 +24,11 @@ import ( "github.com/stretchr/testify/suite" ) +const ( + eventuallyTimeout = 1 * time.Second + eventuallyInterval = 100 * time.Millisecond +) + type AuxiliaryTestSuite struct { suite.Suite teardownTest func(tb testing.TB) @@ -125,8 +130,10 @@ func (s *AuxiliaryTestSuite) TestAuxiliary_Test3_DataplaneHealthRequest() { s.False(s.T().Failed()) // Check auxiliary server still only has 1 ManagementPlaneResponses as it didn't send the request - utils.ManagementPlaneResponses(s.T(), 0, utils.AuxiliaryMockManagementPlaneAPIAddress) - s.False(s.T().Failed()) + s.Eventually(func() bool { + responses := utils.ManagementPlaneResponses(s.T(), 0, utils.AuxiliaryMockManagementPlaneAPIAddress) + return len(responses) == 0 + }, eventuallyTimeout, eventuallyInterval, "Expected no responses from auxiliary server, got some") slog.Info("finished auxiliary command server data plane health request test") } From 5b0d37f31d3bf5d4b4b24c2d86d4a55fee90a5c0 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Tue, 9 Jun 2026 12:43:38 +0100 Subject: [PATCH 10/16] Clean up errors --- test/integration/installuninstall/install_uninstall_test.go | 1 + test/integration/nginxless/nginx_less_mpi_connection_test.go | 1 + test/integration/utils/grpc_management_plane_utils.go | 4 +++- 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/integration/installuninstall/install_uninstall_test.go b/test/integration/installuninstall/install_uninstall_test.go index 95c1ab841..e42cc5bed 100644 --- a/test/integration/installuninstall/install_uninstall_test.go +++ b/test/integration/installuninstall/install_uninstall_test.go @@ -76,6 +76,7 @@ func installUninstallSetup(tb testing.TB, expectNoErrorsInLogs bool) (testcontai } func TestInstallUninstall(t *testing.T) { + t.Parallel() testContainer, teardownTest := installUninstallSetup(t, true) defer teardownTest(t) ctx := context.Background() diff --git a/test/integration/nginxless/nginx_less_mpi_connection_test.go b/test/integration/nginxless/nginx_less_mpi_connection_test.go index cf3246f7e..2376c070a 100644 --- a/test/integration/nginxless/nginx_less_mpi_connection_test.go +++ b/test/integration/nginxless/nginx_less_mpi_connection_test.go @@ -16,6 +16,7 @@ import ( // Verify that the agent sends a connection request to Management Plane even when Nginx is not present func TestNginxLessGrpc_Connection(t *testing.T) { + t.Parallel() slog.Info("starting nginxless connection test") teardownTest := utils.SetupConnectionTest(t, false, true, false, "../../config/agent/nginx-config-with-grpc-client.conf") diff --git a/test/integration/utils/grpc_management_plane_utils.go b/test/integration/utils/grpc_management_plane_utils.go index 4ad20a698..af75bf905 100644 --- a/test/integration/utils/grpc_management_plane_utils.go +++ b/test/integration/utils/grpc_management_plane_utils.go @@ -126,7 +126,9 @@ func CreateContainerNetwork(ctx context.Context, tb testing.TB) *testcontainers. require.NoError(tb, err) tb.Cleanup(func() { networkErr := containerNetwork.Remove(ctx) - tb.Logf("Error removing container network: %v", networkErr) + if networkErr != nil { + tb.Logf("Error removing container network: %v", networkErr) + } }) return containerNetwork From 1496045688a6628315c15ff88a816b443f52c45a Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Tue, 9 Jun 2026 13:21:38 +0100 Subject: [PATCH 11/16] Increase timeout --- test/helpers/test_containers_utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/helpers/test_containers_utils.go b/test/helpers/test_containers_utils.go index 47848eb09..f55e7ca3f 100644 --- a/test/helpers/test_containers_utils.go +++ b/test/helpers/test_containers_utils.go @@ -37,7 +37,7 @@ const ( const ( extractFileMaxAttempts = 10 - extractFileRetryDelay = 100 * time.Millisecond + extractFileRetryDelay = 200 * time.Millisecond ) type Parameters struct { From 74bdbc5630acf6e4f60718f5a90d628866616338 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Tue, 9 Jun 2026 14:27:12 +0100 Subject: [PATCH 12/16] Update expected yaml --- test/integration/upgrade/configs/expected-otel-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/upgrade/configs/expected-otel-config.yaml b/test/integration/upgrade/configs/expected-otel-config.yaml index 204f5f4c7..f477b6ed5 100644 --- a/test/integration/upgrade/configs/expected-otel-config.yaml +++ b/test/integration/upgrade/configs/expected-otel-config.yaml @@ -44,8 +44,8 @@ service: pipelines: metrics/default: receivers: - - container_metrics - host_metrics + - container_metrics processors: - batch/default_metrics exporters: From 02490be5dc0aba14207c75f709a8e67e551faaef Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Thu, 11 Jun 2026 16:38:08 +0100 Subject: [PATCH 13/16] Update dockerfiles to imprve build times --- test/docker/nginx-oss/apk/Dockerfile | 4 ++-- test/docker/nginx-oss/deb/Dockerfile | 4 ++-- test/docker/nginx-oss/rpm/Dockerfile | 4 ++-- test/docker/nginx-plus/deb/Dockerfile | 11 ++++++++--- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/test/docker/nginx-oss/apk/Dockerfile b/test/docker/nginx-oss/apk/Dockerfile index a8f6ede59..2efc0740e 100644 --- a/test/docker/nginx-oss/apk/Dockerfile +++ b/test/docker/nginx-oss/apk/Dockerfile @@ -5,8 +5,6 @@ LABEL maintainer="NGINX Docker Maintainers " ARG ENTRY_POINT WORKDIR /agent -COPY ./build /agent/build -COPY $ENTRY_POINT /agent/entrypoint.sh RUN set -x \ && addgroup -g 101 -S nginx \ @@ -17,6 +15,7 @@ RUN set -x \ bash \ nginx +COPY $ENTRY_POINT /agent/entrypoint.sh RUN chmod +x /agent/entrypoint.sh STOPSIGNAL SIGTERM @@ -28,6 +27,7 @@ FROM install-nginx as install-agent-local ARG PACKAGE_NAME +COPY ./build /agent/build RUN apk add --allow-untrusted /agent/build/${PACKAGE_NAME}.apk FROM install-nginx as install-agent-repo diff --git a/test/docker/nginx-oss/deb/Dockerfile b/test/docker/nginx-oss/deb/Dockerfile index 5cea596c1..70754e29f 100644 --- a/test/docker/nginx-oss/deb/Dockerfile +++ b/test/docker/nginx-oss/deb/Dockerfile @@ -8,8 +8,6 @@ ARG PACKAGE_NAME ARG PACKAGES_REPO WORKDIR /agent -COPY ./build /agent/build -COPY $ENTRY_POINT /agent/entrypoint.sh RUN set -x \ && ls /usr/sbin/ \ @@ -29,7 +27,9 @@ RUN set -x \ RUN curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null \ && printf "deb [signed-by=/usr/share/keyrings/nginx-archive-keyring.gpg] http://${PACKAGES_REPO}/nginx-agent/ubuntu/ `lsb_release -cs` agent\n" > /etc/apt/sources.list.d/nginx-agent.list +COPY $ENTRY_POINT /agent/entrypoint.sh RUN chmod +x /agent/entrypoint.sh +COPY ./build /agent/build STOPSIGNAL SIGTERM EXPOSE 80 443 diff --git a/test/docker/nginx-oss/rpm/Dockerfile b/test/docker/nginx-oss/rpm/Dockerfile index 8db9618c0..7773e5a32 100644 --- a/test/docker/nginx-oss/rpm/Dockerfile +++ b/test/docker/nginx-oss/rpm/Dockerfile @@ -8,8 +8,6 @@ ARG OS_VERSION ARG OS_RELEASE WORKDIR /agent -COPY ./ /agent -COPY $ENTRY_POINT /agent/entrypoint.sh RUN if [ "$OS_VERSION" = "7" ] && [ "$OS_RELEASE" = "oraclelinux" ]; \ then yum install -y oracle-epel-release-el7; \ @@ -74,6 +72,7 @@ gpgkey=https://nginx.org/keys/nginx_signing.key \n\ module_hotfixes=true" > /etc/yum.repos.d/nginx-agent.repo; \ fi +COPY $ENTRY_POINT /agent/entrypoint.sh RUN chmod +x /agent/entrypoint.sh STOPSIGNAL SIGTERM @@ -87,6 +86,7 @@ FROM install-nginx as install-agent-local ARG PACKAGE_NAME +COPY ./build /agent/build RUN yum localinstall -y /agent/build/${PACKAGE_NAME}.rpm diff --git a/test/docker/nginx-plus/deb/Dockerfile b/test/docker/nginx-plus/deb/Dockerfile index 55c5c773f..d1ae875cb 100644 --- a/test/docker/nginx-plus/deb/Dockerfile +++ b/test/docker/nginx-plus/deb/Dockerfile @@ -8,8 +8,6 @@ ARG PACKAGE_NAME ARG PACKAGES_REPO WORKDIR /agent -COPY ./build /agent/build -COPY $ENTRY_POINT /agent/entrypoint.sh ENV PLUS_VERSION=R32 @@ -57,11 +55,18 @@ EXPOSE 80 STOPSIGNAL SIGQUIT +COPY $ENTRY_POINT /agent/entrypoint.sh RUN chmod +x /agent/entrypoint.sh -RUN apt install -y /agent/build/${PACKAGE_NAME}.deb STOPSIGNAL SIGTERM EXPOSE 80 443 ENTRYPOINT ["/agent/entrypoint.sh"] + +FROM install-nginx as install-agent-local + +ARG PACKAGE_NAME + +COPY ./build /agent/build +RUN apt install -y /agent/build/$PACKAGE_NAME.deb From a0e5d7d12823305a3571fed1e3da45eadd44bac0 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 12 Jun 2026 11:54:00 +0100 Subject: [PATCH 14/16] Update dockerfiles --- test/docker/nginx-oss/apk/Dockerfile | 11 +++++----- test/docker/nginx-oss/deb/Dockerfile | 14 +++++++------ test/docker/nginx-oss/rpm/Dockerfile | 30 +++++++++------------------- 3 files changed, 23 insertions(+), 32 deletions(-) diff --git a/test/docker/nginx-oss/apk/Dockerfile b/test/docker/nginx-oss/apk/Dockerfile index 2efc0740e..f9386d4c5 100644 --- a/test/docker/nginx-oss/apk/Dockerfile +++ b/test/docker/nginx-oss/apk/Dockerfile @@ -9,11 +9,12 @@ WORKDIR /agent RUN set -x \ && addgroup -g 101 -S nginx \ && adduser -S -D -H -u 101 -h /var/cache/nginx -s /sbin/nologin -G nginx -g nginx nginx \ - && apk add ca-certificates \ - curl \ - openssl \ - bash \ - nginx + && apk add --no-cache ca-certificates \ + curl \ + openssl \ + bash \ + procps \ + nginx COPY $ENTRY_POINT /agent/entrypoint.sh RUN chmod +x /agent/entrypoint.sh diff --git a/test/docker/nginx-oss/deb/Dockerfile b/test/docker/nginx-oss/deb/Dockerfile index 70754e29f..cfe1c0d85 100644 --- a/test/docker/nginx-oss/deb/Dockerfile +++ b/test/docker/nginx-oss/deb/Dockerfile @@ -10,18 +10,17 @@ ARG PACKAGES_REPO WORKDIR /agent RUN set -x \ - && ls /usr/sbin/ \ && groupadd --system --gid 101 nginx \ && useradd --system --gid nginx --no-create-home --home /nonexistent --comment "nginx user" --shell /bin/false --uid 101 nginx \ - && apt-get update \ + && (apt-get update || (sleep 30 && apt-get update) || (sleep 60 && apt-get update)) \ && apt-get install --no-install-recommends --no-install-suggests -y ca-certificates \ gnupg2 \ - git \ - make \ curl \ lsb-release \ procps \ - nginx + nginx \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* # Setup nginx agent repository RUN curl https://nginx.org/keys/nginx_signing.key | gpg --dearmor | tee /usr/share/keyrings/nginx-archive-keyring.gpg >/dev/null \ @@ -44,4 +43,7 @@ RUN apt install -y /agent/build/$PACKAGE_NAME.deb FROM install-nginx as install-agent-repo -RUN apt-get update && apt-get install -y nginx-agent +RUN (apt-get update || (sleep 30 && apt-get update) || (sleep 60 && apt-get update)) \ + && apt-get install -y nginx-agent \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* diff --git a/test/docker/nginx-oss/rpm/Dockerfile b/test/docker/nginx-oss/rpm/Dockerfile index 7773e5a32..2b99f8970 100644 --- a/test/docker/nginx-oss/rpm/Dockerfile +++ b/test/docker/nginx-oss/rpm/Dockerfile @@ -9,22 +9,11 @@ ARG OS_RELEASE WORKDIR /agent -RUN if [ "$OS_VERSION" = "7" ] && [ "$OS_RELEASE" = "oraclelinux" ]; \ - then yum install -y oracle-epel-release-el7; \ - fi - -RUN if [ "$OS_VERSION" = "2" ] && [ "$OS_RELEASE" = "amazonlinux" ]; \ - then amazon-linux-extras enable epel && yum clean metadata \ - && yum install -y epel-release; \ - fi - -RUN if [ "$OS_RELEASE" = "amazonlinux" ]; \ - then yum install -y shadow-utils; \ - fi - -RUN if [ "$OS_RELEASE" = "centos" ] && [ "$OS_VERSION" = "7" ]; \ - then yum install -y epel-release; \ - fi +RUN set -x \ + && if [ "$OS_VERSION" = "7" ] && [ "$OS_RELEASE" = "oraclelinux" ]; then yum install -y oracle-epel-release-el7; fi \ + && if [ "$OS_VERSION" = "2" ] && [ "$OS_RELEASE" = "amazonlinux" ]; then amazon-linux-extras enable epel && yum clean metadata && yum install -y epel-release; fi \ + && if [ "$OS_RELEASE" = "amazonlinux" ]; then yum install -y shadow-utils; fi \ + && if [ "$OS_RELEASE" = "centos" ] && [ "$OS_VERSION" = "7" ]; then yum install -y epel-release; fi RUN if [ "$OS_RELEASE" = "redhatenterprise" ] && [ "$OS_VERSION" != "9" ]; \ then printf "[nginx] \n\ @@ -40,11 +29,10 @@ RUN set -x \ && adduser -g nginx --system --no-create-home --home /nonexistent --shell /bin/false --uid 101 nginx 2>/dev/null || true \ && usermod -s /sbin/nologin nginx 2>/dev/null || true \ && usermod -L nginx 2>/dev/null || true \ - && yum install -y git \ - wget \ - procps \ - make \ - nginx + && yum install -y procps-ng \ + nginx \ + && yum clean all \ + && rm -rf /var/cache/yum # Setup nginx agent repository RUN if [ "$OS_VERSION" = "2023" ] && [ "$OS_RELEASE" = "amazonlinux" ]; \ From d078f89a0fc7c3eebb1a8bced6b048f841aa8dac Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 12 Jun 2026 16:33:37 +0100 Subject: [PATCH 15/16] Fix dockerfiles --- test/docker/nginx-oss/apk/Dockerfile | 2 +- test/docker/nginx-oss/rpm/Dockerfile | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/docker/nginx-oss/apk/Dockerfile b/test/docker/nginx-oss/apk/Dockerfile index f9386d4c5..feb371b75 100644 --- a/test/docker/nginx-oss/apk/Dockerfile +++ b/test/docker/nginx-oss/apk/Dockerfile @@ -5,6 +5,7 @@ LABEL maintainer="NGINX Docker Maintainers " ARG ENTRY_POINT WORKDIR /agent +COPY ./build /agent/build RUN set -x \ && addgroup -g 101 -S nginx \ @@ -28,7 +29,6 @@ FROM install-nginx as install-agent-local ARG PACKAGE_NAME -COPY ./build /agent/build RUN apk add --allow-untrusted /agent/build/${PACKAGE_NAME}.apk FROM install-nginx as install-agent-repo diff --git a/test/docker/nginx-oss/rpm/Dockerfile b/test/docker/nginx-oss/rpm/Dockerfile index 2b99f8970..9f029e424 100644 --- a/test/docker/nginx-oss/rpm/Dockerfile +++ b/test/docker/nginx-oss/rpm/Dockerfile @@ -8,6 +8,7 @@ ARG OS_VERSION ARG OS_RELEASE WORKDIR /agent +COPY ./build /agent/build RUN set -x \ && if [ "$OS_VERSION" = "7" ] && [ "$OS_RELEASE" = "oraclelinux" ]; then yum install -y oracle-epel-release-el7; fi \ From 7f9bd6b094623cfe761ef4c9bcbf10d02a496d17 Mon Sep 17 00:00:00 2001 From: Craig Elliott Date: Fri, 12 Jun 2026 17:04:59 +0100 Subject: [PATCH 16/16] Fix upgrade tests --- test/integration/upgrade/configs/otel/otel-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/integration/upgrade/configs/otel/otel-config.yaml b/test/integration/upgrade/configs/otel/otel-config.yaml index 204f5f4c7..f477b6ed5 100644 --- a/test/integration/upgrade/configs/otel/otel-config.yaml +++ b/test/integration/upgrade/configs/otel/otel-config.yaml @@ -44,8 +44,8 @@ service: pipelines: metrics/default: receivers: - - container_metrics - host_metrics + - container_metrics processors: - batch/default_metrics exporters: