diff --git a/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml b/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml index cf7d2369c4..ffcbb4867b 100644 --- a/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml +++ b/CodeGen/docker_compose/amd/gpu/rocm/compose.yaml @@ -15,6 +15,12 @@ services: https_proxy: ${https_proxy} HUGGING_FACE_HUB_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 shm_size: 1g devices: - /dev/kfd:/dev/kfd @@ -31,7 +37,8 @@ services: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: codegen-llm-server depends_on: - - codegen-tgi-service + codegen-tgi-service: + condition: service_healthy ports: - "${CODEGEN_LLM_SERVICE_PORT:-9000}:9000" ipc: host @@ -39,7 +46,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: "http://codegen-tgi-service" + LLM_ENDPOINT: "http://codegen-tgi-service" + LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${CODEGEN_HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-backend-server: diff --git a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml index 64b74db71f..6d62260a83 100644 --- a/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml +++ b/CodeGen/docker_compose/intel/cpu/xeon/compose.yaml @@ -15,12 +15,19 @@ services: http_proxy: ${http_proxy} https_proxy: ${https_proxy} HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} + host_ip: ${host_ip} + healthcheck: + test: ["CMD-SHELL", "curl -f http://$host_ip:8008/health || exit 1"] + interval: 10s + timeout: 10s + retries: 100 command: --model-id ${LLM_MODEL_ID} --cuda-graphs 0 llm: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-server depends_on: - - tgi-service + tgi-service: + condition: service_healthy ports: - "9000:9000" ipc: host @@ -28,7 +35,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-xeon-backend-server: diff --git a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml index 92b70b099c..7f7e71295a 100644 --- a/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/CodeGen/docker_compose/intel/hpu/gaudi/compose.yaml @@ -20,6 +20,11 @@ services: LIMIT_HPU_GRAPH: true USE_FLASH_ATTENTION: true FLASH_ATTENTION_RECOMPUTE: true + healthcheck: + test: ["CMD-SHELL", "sleep 500 && exit 0"] + interval: 1s + timeout: 505s + retries: 1 runtime: habana cap_add: - SYS_NICE @@ -29,7 +34,8 @@ services: image: ${REGISTRY:-opea}/llm-tgi:${TAG:-latest} container_name: llm-tgi-gaudi-server depends_on: - - tgi-service + tgi-service: + condition: service_healthy ports: - "9000:9000" ipc: host @@ -37,7 +43,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - TGI_LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_ENDPOINT: ${TGI_LLM_ENDPOINT} + LLM_MODEL_ID: ${LLM_MODEL_ID} HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped codegen-gaudi-backend-server: