Skip to content

Commit 2721e87

Browse files
authored
[Chore] Increase server-load-protection thresholds (#17041)
1 parent d850692 commit 2721e87

10 files changed

Lines changed: 64 additions & 56 deletions

File tree

.github/workflows/cluster-test/mysql_with_mysql_registry/dolphinscheduler_env.sh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
4646

4747
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
4848

49-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
50-
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
51-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
52-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
53-
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
54-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
49+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
50+
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
51+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
52+
export MASTER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
53+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
54+
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
55+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
56+
export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
5557

5658
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
5759
#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

.github/workflows/cluster-test/mysql_with_zookeeper_registry/dolphinscheduler_env.sh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,14 @@ export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
4545

4646
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
4747

48-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
49-
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
50-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
51-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
52-
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
53-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
48+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
49+
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
50+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
51+
export MASTER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
52+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
53+
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
54+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
55+
export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
5456

5557
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
5658
#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,14 @@ export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
4646

4747
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
4848

49-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
50-
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
51-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
52-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
53-
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
54-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
49+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
50+
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
51+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
52+
export MASTER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
53+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
54+
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
55+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
56+
export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
5557

5658
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
5759
#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

.github/workflows/cluster-test/postgresql_with_zookeeper_registry/dolphinscheduler_env.sh

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,14 @@ export DATAX_LAUNCHER=${DATAX_LAUNCHER:-/opt/soft/datax/bin/datax.py}
4545

4646
export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PYTHON_LAUNCHER:$JAVA_HOME/bin:$HIVE_HOME/bin:$FLINK_HOME/bin:$DATAX_LAUNCHER:$PATH
4747

48-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
49-
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
50-
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
51-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
52-
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.7
53-
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.7
48+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
49+
export MASTER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
50+
export MASTER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
51+
export MASTER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
52+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
53+
export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9
54+
export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9
55+
export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9
5456

5557
# applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log
5658
#export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/*

docs/docs/en/architecture/configuration.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,10 @@ Location: `master-server/conf/application.yaml`
282282
| master.worker-load-balancer-configuration-properties.type | DYNAMIC_WEIGHTED_ROUND_ROBIN | Master will use the worker's cpu/memory/threadPool usage to calculate the worker load, the lower load will have more change to be dispatched task |
283283
| master.max-heartbeat-interval | 10s | master max heartbeat interval |
284284
| master.server-load-protection.enabled | true | If set true, will open master overload protection |
285-
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. |
286-
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Master max JVM cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. |
287-
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Master max system memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. |
288-
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. |
285+
| master.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.8 | Master max system cpu usage, when the master's system cpu usage is smaller then this value, master server can execute workflow. |
286+
| master.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.8 | Master max JVM cpu usage, when the master's jvm cpu usage is smaller then this value, master server can execute workflow. |
287+
| master.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.8 | Master max system memory usage , when the master's system memory usage is smaller then this value, master server can execute workflow. |
288+
| master.server-load-protection.max-disk-usage-percentage-thresholds | 0.8 | Master max disk usage , when the master's disk usage is smaller then this value, master server can execute workflow. |
289289
| master.worker-group-refresh-interval | 10s | The interval to refresh worker group from db to memory |
290290
| master.command-fetch-strategy.type | ID_SLOT_BASED | The command fetch strategy, only support `ID_SLOT_BASED` |
291291
| master.command-fetch-strategy.config.id-step | 1 | The id auto incremental step of t_ds_command in db |
@@ -301,10 +301,10 @@ Location: `worker-server/conf/application.yaml`
301301
| worker.max-heartbeat-interval | 10s | worker-service max heartbeat interval |
302302
| worker.host-weight | 100 | worker host weight to dispatch tasks |
303303
| worker.server-load-protection.enabled | true | If set true will open worker overload protection |
304-
| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, master server can execute workflow. |
305-
| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | Worker max JVM cpu usage, when the worker's jvm cpu usage is smaller then this value, master server can execute workflow. |
306-
| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | Worker max system memory usage , when the worker's system memory usage is smaller then this value, master server can execute workflow. |
307-
| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | Worker max disk usage , when the worker's disk usage is smaller then this value, master server can execute workflow. |
304+
| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.8 | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, master server can execute workflow. |
305+
| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.8 | Worker max JVM cpu usage, when the worker's jvm cpu usage is smaller then this value, master server can execute workflow. |
306+
| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.8 | Worker max system memory usage , when the worker's system memory usage is smaller then this value, master server can execute workflow. |
307+
| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.8 | Worker max disk usage , when the worker's disk usage is smaller then this value, master server can execute workflow. |
308308
| worker.registry-disconnect-strategy.strategy | stop | Used when the worker disconnect from registry, default value: stop. Optional values include stop, waiting |
309309
| worker.registry-disconnect-strategy.max-waiting-time | 100s | Used when the worker disconnect from registry, and the disconnect strategy is waiting, this config means the worker will waiting to reconnect to registry in given times, and after the waiting times, if the worker still cannot connect to registry, will stop itself, if the value is 0s, will wait infinitely |
310310
| worker.physical-task-config.task-executor-thread-size | 100 | The thread size used to execute physical task |

docs/docs/zh/architecture/configuration.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -309,10 +309,10 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId
309309
| worker.host-weight | 100 | 派发任务时,worker主机的权重 |
310310
| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 |
311311
| worker.server-load-protection.enabled | true | 是否开启系统保护策略 |
312-
| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.7 | worker最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统CPU |
313-
| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.7 | worker最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的JVM CPU |
314-
| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.7 | worker最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统内存 |
315-
| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.7 | worker最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,worker服务才能接收任务. 默认值为0.7: 会使用70%的操作系统磁盘空间 |
312+
| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.8 | worker最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统CPU |
313+
| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.8 | worker最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的JVM CPU |
314+
| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.8 | worker最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统内存 |
315+
| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.8 | worker最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统磁盘空间 |
316316
| worker.alert-listen-host | localhost | alert监听host |
317317
| worker.alert-listen-port | 50052 | alert监听端口 |
318318
| worker.physical-task-config.task-executor-thread-size | 100 | Worker中任务最大并发度 |

0 commit comments

Comments
 (0)