Skip to content

Cuda failure 217 'peer access is not supported between these two devices' #11

@X1AOX1A

Description

@X1AOX1A

NCCL P2P Communication Error During Multi-GPU Training

Problem

PPO training fails with NCCL error on multi-GPU setup due to unsupported peer access between devices.

Error Details

(WorkerDict pid=565043) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
(WorkerDict pid=565043)   warnings.warn(  # warn only once
(WorkerDict pid=564401) [rank0]:[W831 14:46:12.241792627 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 0]  using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device.
Error executing job with overrides: ['algorithm.adv_estimator=grpo', 'data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//orz_math_57k_train.json', 'data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json', 'data.train_batch_size=64', 'agent.agent_type=code', 'agent.tools=[code_interpreter]', 'agent.template=qwen2.5-no-system-tool', 'agent.model_name_or_path=Qwen/Qwen2.5-3B-Instruct', 'agent.max_turns=8', 'agent.backend=async_verl', 'agent.reward_name=math_reward_tool', 'agent.num_chains=8', 'agent.use_agent=True', 'actor_rollout_ref.actor.optim.lr=5e-7', 'actor_rollout_ref.model.use_remove_padding=False', 'actor_rollout_ref.model.path=Qwen/Qwen2.5-3B-Instruct', 'actor_rollout_ref.actor.ppo_mini_batch_size=64', 'actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2', 'actor_rollout_ref.actor.use_kl_loss=True', 'actor_rollout_ref.actor.kl_loss_coef=0.001', 'actor_rollout_ref.actor.kl_loss_type=mse', 'actor_rollout_ref.actor.entropy_coeff=0.001', 'actor_rollout_ref.model.enable_gradient_checkpointing=False', 'actor_rollout_ref.actor.fsdp_config.param_offload=True', 'actor_rollout_ref.actor.fsdp_config.optimizer_offload=True', 'actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.rollout.tensor_model_parallel_size=2', 'actor_rollout_ref.rollout.name=vllm', 'actor_rollout_ref.rollout.response_length=512', 'actor_rollout_ref.rollout.gpu_memory_utilization=0.5', 'actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4', 'actor_rollout_ref.ref.fsdp_config.param_offload=True', 'critic.model.path=Qwen/Qwen2.5-3B-Instruct', 'critic.ppo_mini_batch_size=64', 'critic.ppo_micro_batch_size_per_gpu=2', 'algorithm.kl_ctrl.kl_coef=0.001', 'trainer.critic_warmup=0', 'trainer.logger=[console,wandb]', 'trainer.project_name=AgentRL', 'trainer.experiment_name=test', 'trainer.n_gpus_per_node=4', 'trainer.nnodes=1', 'trainer.save_freq=50', 'trainer.test_freq=10', 'trainer.total_training_steps=200', 'trainer.val_before_train=False']
Traceback (most recent call last):
  File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/opt/conda/envs/ptca/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 244, in <module>
    main()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/main.py", line 94, in decorated_main
    _run_hydra(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 394, in _run_hydra
    _run_app(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 457, in _run_app
    run_and_report(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 223, in run_and_report
    raise ex
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 220, in run_and_report
    return func()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/utils.py", line 458, in <lambda>
    lambda: hydra.run(
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/_internal/hydra.py", line 132, in run
    _ = ret.return_value
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 260, in return_value
    raise self._return_value
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/hydra/core/utils.py", line 186, in run_job
    ret.return_value = task_function(task_cfg)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 62, in main
    run_ppo(config)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 74, in run_ppo
    ray.get(runner.run.remote(config))
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/auto_init_hook.py", line 22, in auto_init_wrapper
    return fn(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/client_mode_hook.py", line 104, in wrapper
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 2882, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/ray/_private/worker.py", line 968, in get_objects
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(DistBackendError): ray::TaskRunner.run() (pid=560810, ip=100.64.24.39, actor_id=e8bf18909706ae5475513a4701000000, repr=<main_ppo.TaskRunner object at 0x756c19ba2c80>)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/main_ppo.py", line 180, in run
    trainer.init_workers()
  File "/home/aiscuser/CODES/AgentFly/verl/verl/trainer/ppo/ray_trainer.py", line 746, in init_workers
    self.ref_policy_wg.init_model()
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 49, in func
    output = ray.get(output)
ray.exceptions.RayTaskError(DistBackendError): ray::WorkerDict.ref_init_model() (pid=565045, ip=100.64.24.39, actor_id=7cc9f5a6383c4734a8d1c4f501000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x749e8269b040>)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
    return getattr(self.worker_dict[key], name)(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
    self.ref_module_fsdp = self._build_model_optimizer(
  File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
    torch.distributed.barrier()
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
    return func(*args, **kwargs)
  File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
    work = group.barrier(opts=opts)
torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
ncclUnhandledCudaError: Call to CUDA function failed.
Last error:
Cuda failure 217 'peer access is not supported between these two devices'
Cleaning up environments...
0it [00:00, ?it/s]
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565044, ip=100.64.24.39, actor_id=0f341df54875fc057ed0c0bd01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x7d703320b040>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=564401, ip=100.64.24.39, actor_id=89558e3d0536a122e39c2a7e01000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x74fa36f1f0a0>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(TaskRunner pid=560810) Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): ray::WorkerDict.ref_init_model() (pid=565043, ip=100.64.24.39, actor_id=82614e5081ad45519bc4c3f201000000, repr=<verl.single_controller.ray.base.WorkerDict object at 0x70727f5d6f20>)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/ray/base.py", line 466, in func
(TaskRunner pid=560810)     return getattr(self.worker_dict[key], name)(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/single_controller/base/decorator.py", line 501, in inner
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 521, in init_model
(TaskRunner pid=560810)     self.ref_module_fsdp = self._build_model_optimizer(
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/verl/verl/workers/fsdp_workers.py", line 233, in _build_model_optimizer
(TaskRunner pid=560810)     torch.distributed.barrier()
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/c10d_logger.py", line 81, in wrapper
(TaskRunner pid=560810)     return func(*args, **kwargs)
(TaskRunner pid=560810)   File "/home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py", line 4635, in barrier
(TaskRunner pid=560810)     work = group.barrier(opts=opts)
(TaskRunner pid=560810) torch.distributed.DistBackendError: NCCL error in: /pytorch/torch/csrc/distributed/c10d/ProcessGroupNCCL.cpp:3356, unhandled cuda error (run with NCCL_DEBUG=INFO for details), NCCL version 2.26.2
(TaskRunner pid=560810) ncclUnhandledCudaError: Call to CUDA function failed.
(TaskRunner pid=560810) Last error:
(TaskRunner pid=560810) Cuda failure 217 'peer access is not supported between these two devices'
(WorkerDict pid=565044) [W831 14:46:10.832875108 Utils.hpp:137] Warning: Environment variable NCCL_ASYNC_ERROR_HANDLING is deprecated; use TORCH_NCCL_ASYNC_ERROR_HANDLING instead (function operator()) [repeated 3x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)
(WorkerDict pid=565044) [W831 14:46:10.832074897 socket.cpp:755] [c10d] The client socket cannot be initialized to connect to [100-64-24-39.proxy-node-0.79e5d84c-257e-473d-ad40-0b89b73e0ad7.svc.cluster.local]:50601 (errno: 97 - Address family not supported by protocol).
(WorkerDict pid=564401) `torch_dtype` is deprecated! Use `dtype` instead! [repeated 3x across cluster]
Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s] [repeated 3x across cluster]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00, 16.22it/s] [repeated 3x across cluster]
(WorkerDict pid=564401) /home/aiscuser/CODES/AgentFly/uv_agentfly/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4631: UserWarning: No device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.  [repeated 3x across cluster]
(WorkerDict pid=564401)   warnings.warn(  # warn only once [repeated 3x across cluster]
(WorkerDict pid=565044) [rank2]:[W831 14:46:12.312774421 ProcessGroupNCCL.cpp:4718] [PG ID 0 PG GUID 0 Rank 2]  using GPU 0 as device used by this process is currently unknown. This can potentially cause a hang if this rank to GPU mapping is incorrect. You can pecify device_id in init_process_group() to force use of a particular device. [repeated 3x across cluster]

Root Cause

ncclUnhandledCudaError: Call to CUDA function failed. Last error: Cuda failure 217 'peer access is not supported between these two devices'

Attempted Solutions

Environment variables tried but failed:

export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0

References

Environment Setup

# !/bin/bash
# Ref: https://github.com/Agent-One-Lab/AgentFly/blob/main/docs/start/installation.md
set -ex

curl -LsSf https://astral.sh/uv/install.sh | sh
export PATH="$HOME/.local/bin:$PATH"
uv venv uv_agentfly --python 3.10
source uv_agentfly/bin/activate
uv pip install --upgrade pip
export UV_LINK_MODE=copy

git submodule init
git submodule update

uv pip install -e .
uv pip install -e '.[verl]' --no-build-isolation
uv pip install --upgrade datasets
cd verl && uv pip install --no-deps -e .
cd ..
uv pip list

# enroot install
arch=$(dpkg --print-architecture)
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot_3.5.0-1_${arch}.deb
curl -fSsL -O https://github.com/NVIDIA/enroot/releases/download/v3.5.0/enroot+caps_3.5.0-1_${arch}.deb # optional
sudo apt install -y ./*.deb
rm -f ./*.deb

# optional Search requires redis to cache results
if redis-server --version 2>/dev/null | grep -q "7.4.0"; then
   echo "Redis 7.4.0 installed, skipped"
else
   echo "Installing Redis 7.4.0..."
   wget https://download.redis.io/releases/redis-7.4.0.tar.gz
   tar xzf redis-7.4.0.tar.gz
   cd redis-7.4.0
   make
   sudo make install
   cd ..
   rm -rf redis-7.4.0*
   redis-server --version
fi

Package Versions

Package                            Version       Editable project location
---------------------------------- ------------- ----------------------------------
accelerate                         1.10.1
agentfly                           0.0.1         /home/aiscuser/CODES/AgentFly
aiohappyeyeballs                   2.6.1
aiohttp                            3.12.15
aiohttp-cors                       0.8.1
aiosignal                          1.4.0
annotated-types                    0.7.0
antlr4-python3-runtime             4.9.3
anyio                              4.10.0
astor                              0.8.1
async-timeout                      5.0.1
attrs                              25.3.0
av                                 15.1.0
beautifulsoup4                     4.13.5
blake3                             1.0.5
bs4                                0.0.2
cachetools                         6.2.0
cbor2                              5.7.0
certifi                            2025.8.3
cffi                               1.17.1
cfgv                               3.4.0
charset-normalizer                 3.4.3
click                              8.2.1
cloudpickle                        3.1.1
codetiming                         1.4.0
coloredlogs                        15.0.1
colorful                           0.5.7
compressed-tensors                 0.10.2
cupy-cuda12x                       13.6.0
datasets                           4.0.0
depyf                              0.19.0
dill                               0.3.8
diskcache                          5.6.3
distlib                            0.4.0
distro                             1.9.0
dnspython                          2.7.0
docker                             7.1.0
einops                             0.8.1
email-validator                    2.3.0
exceptiongroup                     1.3.0
faiss-cpu                          1.12.0
fastapi                            0.116.1
fastapi-cli                        0.0.8
fastapi-cloud-cli                  0.1.5
fastrlock                          0.8.3
filelock                           3.19.1
flash-attn                         2.8.3
flatbuffers                        25.2.10
frozenlist                         1.7.0
fsspec                             2025.3.0
gguf                               0.17.1
gitdb                              4.0.12
gitpython                          3.1.45
google-api-core                    1.16.0
google-auth                        1.6.3
googleapis-common-protos           1.70.0
grpcio                             1.74.0
h11                                0.16.0
hf-xet                             1.1.9
httpcore                           1.0.9
httptools                          0.6.4
httpx                              0.28.1
huggingface-hub                    0.34.4
humanfriendly                      10.0
hydra-core                         1.3.2
identify                           2.6.13
idna                               3.10
importlib-metadata                 8.7.0
interegular                        0.3.3
jinja2                             3.1.6
jiter                              0.10.0
jsonschema                         4.25.1
jsonschema-specifications          2025.4.1
lark                               1.2.2
liger-kernel                       0.6.2
llguidance                         0.7.30
llvmlite                           0.44.0
lm-format-enforcer                 0.10.12
markdown-it-py                     4.0.0
markupsafe                         3.0.2
mdurl                              0.1.2
mistral-common                     1.8.4
mpmath                             1.3.0
msgpack                            1.1.1
msgspec                            0.19.0
multidict                          6.6.4
multiprocess                       0.70.16
networkx                           3.4.2
ninja                              1.13.0
nodeenv                            1.9.1
numba                              0.61.2
numpy                              2.2.6
nvidia-cublas-cu12                 12.6.4.1
nvidia-cuda-cupti-cu12             12.6.80
nvidia-cuda-nvrtc-cu12             12.6.77
nvidia-cuda-runtime-cu12           12.6.77
nvidia-cudnn-cu12                  9.5.1.17
nvidia-cufft-cu12                  11.3.0.4
nvidia-cufile-cu12                 1.11.1.6
nvidia-curand-cu12                 10.3.7.77
nvidia-cusolver-cu12               11.7.1.2
nvidia-cusparse-cu12               12.5.4.2
nvidia-cusparselt-cu12             0.6.3
nvidia-nccl-cu12                   2.26.2
nvidia-nvjitlink-cu12              12.6.85
nvidia-nvtx-cu12                   12.6.77
omegaconf                          2.3.0
onnxruntime                        1.22.1
openai                             1.90.0
opencensus                         0.11.4
opencensus-context                 0.1.3
opencv-python-headless             4.12.0.88
opentelemetry-api                  1.36.0
opentelemetry-exporter-prometheus  0.57b0
opentelemetry-proto                1.36.0
opentelemetry-sdk                  1.36.0
opentelemetry-semantic-conventions 0.57b0
orjson                             3.11.3
outlines-core                      0.2.10
packaging                          25.0
pandas                             2.3.2
partial-json-parser                0.2.1.1.post6
peft                               0.17.1
pillow                             11.3.0
pip                                25.2
platformdirs                       4.4.0
pre-commit                         4.3.0
prometheus-client                  0.22.1
prometheus-fastapi-instrumentator  7.1.0
propcache                          0.3.2
protobuf                           6.32.0
psutil                             7.0.0
py-cpuinfo                         9.0.0
py-spy                             0.4.1
pyarrow                            21.0.0
pyasn1                             0.6.1
pyasn1-modules                     0.4.2
pybase64                           1.4.2
pybind11                           3.0.1
pycountry                          24.6.1
pycparser                          2.22
pydantic                           2.11.7
pydantic-core                      2.33.2
pydantic-extra-types               2.10.5
pygments                           2.19.2
pylatexenc                         2.10
python-dateutil                    2.9.0.post0
python-dotenv                      1.1.1
python-json-logger                 3.3.0
python-multipart                   0.0.20
pytz                               2025.2
pyyaml                             6.0.2
pyzmq                              27.0.2
qwen-vl-utils                      0.0.11
ray                                2.49.0
redis                              6.4.0
referencing                        0.36.2
regex                              2025.8.29
requests                           2.32.5
responses                          0.18.0
rich                               14.1.0
rich-toolkit                       0.15.0
rignore                            0.6.4
rpds-py                            0.27.1
rsa                                4.9.1
safetensors                        0.6.2
scipy                              1.15.3
sentencepiece                      0.2.1
sentry-sdk                         2.35.1
setuptools                         80.9.0
shellingham                        1.5.4
six                                1.17.0
smart-open                         7.3.0.post1
smmap                              5.0.2
sniffio                            1.3.1
soundfile                          0.13.1
soupsieve                          2.8
soxr                               0.5.0.post1
starlette                          0.47.3
sympy                              1.14.0
tenacity                           9.1.2
tensordict                         0.6.2
termcolor                          3.1.0
tiktoken                           0.11.0
timeout-decorator                  0.5.0
tokenizers                         0.22.0
torch                              2.7.1
torchaudio                         2.7.1
torchdata                          0.11.0
torchvision                        0.22.1
tqdm                               4.67.1
transformers                       4.56.0
triton                             3.3.1
typer                              0.17.3
typing-extensions                  4.15.0
typing-inspection                  0.4.1
tzdata                             2025.2
urllib3                            2.5.0
uvicorn                            0.35.0
uvloop                             0.21.0
verl                               0.3.1.dev0    /home/aiscuser/CODES/AgentFly/verl
virtualenv                         20.34.0
vllm                               0.10.0
wandb                              0.21.3
watchfiles                         1.1.0
websockets                         15.0.1
wrapt                              1.17.3
xformers                           0.0.31
xgrammar                           0.1.21
xxhash                             3.5.0
yarl                               1.20.1
zipp                               3.23.0

Training Script

#!/bin/bash
# run with 80Gx4[A100/H100]
# verl/run_agents/run_code_agent.sh
set -ex
source uv_agentfly/bin/activate

# =================== Local Configuration ===================
NNODES=1  # Number of nodes for local execution
NGPUS_PER_NODE=4  # Number of GPUs per node
CPUS_PER_TASK=96  # Number of CPUs per task
HEAD_NODE="localhost"  # Head node for local execution

# Local node configuration
nodes=("$HEAD_NODE")
echo "Nodes to check: ${nodes[@]}"

# We'll track PIDs so we can wait on them and detect errors
declare -A pids
export head_node=${nodes[0]}
head_node_ip=$(hostname -I | awk '{print $1}')
port=6379
address_head=$head_node_ip:$port

export worker_num=$NNODES
export HYDRA_FULL_ERROR=1
export NCCL_IGNORE_DISABLED_P2P=1
export NCCL_P2P_DISABLE=1
export NCCL_IB_DISABLE=1
export NCCL_NET_GDR_LEVEL=0
export VLLM_USE_V1=1

# =================== Ray start ===================
# Stop existing Ray cluster
ray stop

sleep 10
# Remove existing Ray cluster
rm -rf /tmp/ray/ray_current_cluster

# Start Ray head node
ray start --head --node-ip-address="$head_node_ip" --port=$port \
    --num-cpus "${CPUS_PER_TASK}" --num-gpus $NGPUS_PER_NODE --include-dashboard=True --block &

sleep 10


# =================== Start RL training ===================
model=Qwen/Qwen2.5-3B-Instruct
template=qwen2.5-no-system-tool
lr=5e-7
length=512
batch_size=64
num_chains=8
kl_coef=0.001
train_dataset="orz_math_57k_train"
# adv_estimator=rloo
# adv_estimator=reinforce_plus_plus
# adv_estimator=remax
adv_estimator=grpo
# adv_estimator=gae

mini_batch_size=$batch_size

agent_type=code
tools="[code_interpreter]"
reward_name="math_reward_tool"
# reward_name="llm_as_judge_math_reward"
entropy_coeff=0.001
kl_loss_type=mse
max_turns=8
agent_backend="async_verl"
project_name="AgentRL"
total_training_steps=200

experiment_name="test"
# experiment_name="${model}-${agent_type}-${train_dataset}-${lr}-${length}-bs${batch_size}-n${num_chains}-kl${kl_loss_type}${kl_coef}-entropy${entropy_coeff}-${max_turns}steps-${adv_estimator}"

python3 -m verl.trainer.main_ppo \
    algorithm.adv_estimator=$adv_estimator \
    data.train_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//${train_dataset}.json \
    data.val_files=/mnt/yixiali/CODES/AgentFly/data/rlhf/math//MATH_500.json \
    data.train_batch_size=$batch_size \
    agent.agent_type=$agent_type \
    agent.tools=$tools \
    agent.template=$template \
    agent.model_name_or_path=$model \
    agent.max_turns=${max_turns} \
    agent.backend=${agent_backend} \
    agent.reward_name=$reward_name \
    agent.num_chains=$num_chains \
    agent.use_agent=True \
    actor_rollout_ref.actor.optim.lr=$lr \
    actor_rollout_ref.model.use_remove_padding=False \
    actor_rollout_ref.model.path=${model} \
    actor_rollout_ref.actor.ppo_mini_batch_size=${mini_batch_size} \
    actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=2 \
    actor_rollout_ref.actor.use_kl_loss=True \
    actor_rollout_ref.actor.kl_loss_coef=$kl_coef \
    actor_rollout_ref.actor.kl_loss_type=$kl_loss_type \
    actor_rollout_ref.actor.entropy_coeff=$entropy_coeff \
    actor_rollout_ref.model.enable_gradient_checkpointing=False \
    actor_rollout_ref.actor.fsdp_config.param_offload=True \
    actor_rollout_ref.actor.fsdp_config.optimizer_offload=True \
    actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu=4 \
    actor_rollout_ref.rollout.tensor_model_parallel_size=2 \
    actor_rollout_ref.rollout.name=vllm \
    actor_rollout_ref.rollout.response_length=$length \
    actor_rollout_ref.rollout.gpu_memory_utilization=0.5 \
    actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
    actor_rollout_ref.ref.fsdp_config.param_offload=True \
    critic.model.path=$model \
    critic.ppo_mini_batch_size=${mini_batch_size} \
    critic.ppo_micro_batch_size_per_gpu=2 \
    algorithm.kl_ctrl.kl_coef=$kl_coef \
    trainer.critic_warmup=0 \
    trainer.logger=['console','wandb'] \
    trainer.project_name=$project_name \
    trainer.experiment_name=${experiment_name} \
    trainer.n_gpus_per_node=4 \
    trainer.nnodes=1 \
    trainer.save_freq=50 \
    trainer.test_freq=10 \
    trainer.total_training_steps=$total_training_steps \
    trainer.val_before_train=False

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions