diff --git a/asap-tools/experiments/experiment_run_e2e.py b/asap-tools/experiments/experiment_run_e2e.py index 4f315394..bfb7096e 100644 --- a/asap-tools/experiments/experiment_run_e2e.py +++ b/asap-tools/experiments/experiment_run_e2e.py @@ -518,6 +518,12 @@ def main(cfg: DictConfig): http_port=http_port, remote_write_port=args.remote_write_base_port, ) + # For precompute mode the query engine IS the Prometheus remote-write + # target (port remote_write_base_port). Prometheus is already running + # and retrying writes against that port, so block until the query + # engine's HTTP server is accepting connections before proceeding. + if args.streaming_engine == "precompute": + query_engine_service.wait_until_ready() # Start system exporters (node_exporter, blackbox_exporter, cadvisor) system_exporters_service.start(cfg.experiment_params) diff --git a/asap-tools/experiments/experiment_utils/services/query_engine.py b/asap-tools/experiments/experiment_utils/services/query_engine.py index eb3d5e79..fe5e3b6a 100644 --- a/asap-tools/experiments/experiment_utils/services/query_engine.py +++ b/asap-tools/experiments/experiment_utils/services/query_engine.py @@ -524,16 +524,17 @@ def _is_healthy_bare_metal(self) -> bool: def _is_healthy_containerized(self) -> bool: """Check if Rust QueryEngine is healthy using containerized deployment.""" try: - # Check if container is running - result = subprocess.run( - ["docker", "inspect", "-f", "{{.State.Running}}", self.container_name], - capture_output=True, - text=True, - check=True, + cmd = f"docker inspect -f '{{{{.State.Running}}}}' {self.container_name}" + result = self.provider.execute_command( + node_idx=self.node_offset, + cmd=cmd, + cmd_dir=None, + nohup=False, + popen=False, + ignore_errors=True, ) + assert isinstance(result, subprocess.CompletedProcess) return result.stdout.strip() == "true" - except subprocess.CalledProcessError: - return False except Exception: return False