diff --git a/fdbclient/ServerKnobs.cpp b/fdbclient/ServerKnobs.cpp index 1b2c9a42e0d..9fb0b091394 100644 --- a/fdbclient/ServerKnobs.cpp +++ b/fdbclient/ServerKnobs.cpp @@ -150,8 +150,10 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi // Hard cap on total relocations DD tracks (queued + in-flight). 1000 corresponds to a 500-server // cluster with two concurrent shard moves per storage server. We have observed large clusters doing // 25-30GB in flight, or closer to 100 shards at a time, so this has plenty of margin of safety - // built in. - init( DD_MAX_PIPELINE_MOVES, 1000 ); if( randomize && BUGGIFY ) DD_MAX_PIPELINE_MOVES = 5; + // built in. For simulation, we don't really know how many servers there are, but 10 seems like a good + // guess (thus 20 moves). Do not buggify this too small: testing under artificial scarcity results in + // uninteresting degenerate cases. + init( DD_MAX_PIPELINE_MOVES, 1000 ); if( randomize && BUGGIFY ) DD_MAX_PIPELINE_MOVES = 20; init( DD_REBALANCE_RESET_AMOUNT, 30 ); init( INFLIGHT_PENALTY_HEALTHY, 1.0 ); init( INFLIGHT_PENALTY_UNHEALTHY, 500.0 ); diff --git a/fdbserver/DDRelocationQueue.actor.cpp b/fdbserver/DDRelocationQueue.actor.cpp index cb0d73cce66..7af3852766a 100644 --- a/fdbserver/DDRelocationQueue.actor.cpp +++ b/fdbserver/DDRelocationQueue.actor.cpp @@ -591,6 +591,7 @@ void DDQueue::updatePipelineFull() { .detail("PipelineSize", pipelineSize()) .detail("PendingGateRelocations", pendingGateRelocations) .detail("PipelineLimit", SERVER_KNOBS->DD_MAX_PIPELINE_MOVES); + CODE_PROBE(true, "DD Pipeline Full"); } else if (pipelineSize() < SERVER_KNOBS->DD_MAX_PIPELINE_MOVES && pipelineFull->get()) { pipelineFull->set(false); TraceEvent("DDPipelineFullCleared", distributorId) @@ -2415,10 +2416,10 @@ ACTOR Future BgDDLoadRebalance(DDQueue* self, int teamCollectionIndex, Dat } } -// Gates the relocation input stream by the pipeline limit. Cancellations and high-priority -// moves (>= PRIORITY_TEAM_UNHEALTHY) always pass through immediately so that failure recovery -// is never blocked by stuck or zombie moves holding pipeline slots. All other relocations are -// held when the pipeline is full, waiting for pipelineFull to become false before forwarding. +// Gates the relocation input stream by the pipeline limit. Cancellations always pass through +// immediately because they reduce tracked metadata rather than adding to it. All other +// relocations, regardless of priority, are held when the pipeline is full, waiting for +// pipelineFull to become false before forwarding. // The global isDDPipelineControlEnabled() flag (cleared by disableDDPipelineControl()) also // bypasses the gate, allowing the test harness to open up the pipeline so DD can quiesce. // We poll it via delay() rather than AsyncVar to avoid cross-process callbacks in simulation. @@ -2427,7 +2428,7 @@ ACTOR Future pipelineGateActor(Reference self, PromiseStream output) { loop { state RelocateShard rs = waitNext(input); - if (!rs.cancelled && rs.priority < SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY) { + if (!rs.cancelled) { while (self->pipelineFull->get() && isDDPipelineControlEnabled()) { TraceEvent("DDPipelineFull", self->distributorId) .suppressFor(30.0) diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index bc8e329766c..3de3d0cb8cb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -157,6 +157,7 @@ if(WITH_PYTHON) add_fdb_test(TEST_FILES fast/ChangeFeedOperations.toml) add_fdb_test(TEST_FILES fast/ChangeFeedOperationsMove.toml) add_fdb_test(TEST_FILES fast/DataLossRecovery.toml) + add_fdb_test(TEST_FILES fast/DDPipelineSaturation.toml) add_fdb_test(TEST_FILES fast/EncryptionOps.toml) add_fdb_test(TEST_FILES fast/EncryptionUnitTests.toml) add_fdb_test(TEST_FILES fast/EncryptKeyProxyTest.toml) diff --git a/tests/fast/DDPipelineSaturation.toml b/tests/fast/DDPipelineSaturation.toml new file mode 100644 index 00000000000..d2794f39a5f --- /dev/null +++ b/tests/fast/DDPipelineSaturation.toml @@ -0,0 +1,35 @@ +# Exercises the Data Distribution relocation pipeline back-pressure gate +# (DD_MAX_PIPELINE_MOVES). Forces many small shards and then a burst of +# failure-recovery relocations (machine kills) so that pipelineSize() +# exceeds the limit, tripping the "DD Pipeline Full" code probe in +# DDRelocationQueue.actor.cpp. The limit is pinned to 20 -- the realistic +# low end of the knob's range -- rather than an unrealistically small value +# that would destabilize unrelated simulations. + +[[knobs]] +dd_max_pipeline_moves = 20 +min_shard_bytes = 10000 +shard_bytes_per_sqrt_bytes = 0 + +[[test]] +testTitle = 'DDPipelineSaturation' + + [[test.workload]] + testName = 'Cycle' + transactionsPerSecond = 5000.0 + nodeCount = 60000 + testDuration = 30.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 2 + machinesToLeave = 5 + reboot = true + testDuration = 30.0 + + [[test.workload]] + testName = 'Attrition' + machinesToKill = 2 + machinesToLeave = 5 + reboot = true + testDuration = 30.0