diff --git a/asap-tools/experiments/experiment_only_ingest_path.py b/asap-tools/experiments/experiment_only_ingest_path.py index eb0c9b0..d2654dc 100644 --- a/asap-tools/experiments/experiment_only_ingest_path.py +++ b/asap-tools/experiments/experiment_only_ingest_path.py @@ -119,9 +119,7 @@ def main(cfg: DictConfig): raise ValueError("Invalid exporter config: {}".format(rejection_reason)) # Initialize services - system_exporters_service = SystemExportersService( - provider, args.num_nodes, args.node_offset - ) + system_exporters_service = SystemExportersService(provider, args) prometheus_service = create_prometheus_service( cfg, provider, args.num_nodes, args.node_offset ) @@ -130,9 +128,8 @@ def main(cfg: DictConfig): exporter_service = ExporterServiceFactory.create_exporter_service( args.fake_exporter_language, provider, - num_nodes_in_experiment, + args, use_container=args.use_container_fake_exporter, - node_offset=args.node_offset, ) # Initialize V2-specific services (always initialize to allow cleanup from previous runs) diff --git a/asap-tools/experiments/experiment_run_e2e.py b/asap-tools/experiments/experiment_run_e2e.py index bfb7096..fed1420 100644 --- a/asap-tools/experiments/experiment_run_e2e.py +++ b/asap-tools/experiments/experiment_run_e2e.py @@ -140,9 +140,7 @@ def main(cfg: DictConfig): use_container=args.use_container_query_engine, node_offset=args.node_offset, ) - system_exporters_service = SystemExportersService( - provider, args.num_nodes, args.node_offset - ) + system_exporters_service = SystemExportersService(provider, args) prometheus_service = create_prometheus_service( cfg, provider, args.num_nodes, args.node_offset ) @@ -154,9 +152,7 @@ def main(cfg: DictConfig): use_container=args.use_container_arroyo, node_offset=args.node_offset, ) - deathstar_service = DeathstarService( - provider, num_nodes_in_experiment, args.node_offset - ) + deathstar_service = DeathstarService(provider, args) controller_service = ControllerService( provider, use_container=args.use_container_controller, @@ -169,20 +165,14 @@ def main(cfg: DictConfig): node_offset=args.node_offset, ) remote_monitor_service = RemoteMonitorService(provider, args.node_offset) - avalanche_service = AvalancheExporterService( - provider, - num_nodes_in_experiment, - use_container=False, - node_offset=args.node_offset, - ) + avalanche_service = AvalancheExporterService(provider, args, use_container=False) # Initialize exporter service based on language exporter_service = ExporterServiceFactory.create_exporter_service( args.fake_exporter_language, provider, - num_nodes_in_experiment, + args, use_container=args.use_container_fake_exporter, - node_offset=args.node_offset, ) # Initialize cluster data exporter service if configured diff --git a/asap-tools/experiments/experiment_run_exporters_and_prometheus.py b/asap-tools/experiments/experiment_run_exporters_and_prometheus.py index cae8ece..ebc8bdd 100644 --- a/asap-tools/experiments/experiment_run_exporters_and_prometheus.py +++ b/asap-tools/experiments/experiment_run_exporters_and_prometheus.py @@ -76,9 +76,7 @@ def main(cfg: DictConfig): raise ValueError("Invalid exporter config: {}".format(rejection_reason)) # Initialize services - system_exporters_service = SystemExportersService( - provider, args.num_nodes, args.node_offset - ) + system_exporters_service = SystemExportersService(provider, args) prometheus_service = create_prometheus_service( cfg, provider, args.num_nodes, args.node_offset ) @@ -87,9 +85,8 @@ def main(cfg: DictConfig): exporter_service = ExporterServiceFactory.create_exporter_service( args.fake_exporter_language, provider, - num_nodes_in_experiment, + args, use_container=args.use_container_fake_exporter, - node_offset=args.node_offset, ) # Stop any existing services to ensure clean state diff --git a/asap-tools/experiments/experiment_run_grafana_demo.py b/asap-tools/experiments/experiment_run_grafana_demo.py index 90282fe..5f9456c 100644 --- a/asap-tools/experiments/experiment_run_grafana_demo.py +++ b/asap-tools/experiments/experiment_run_grafana_demo.py @@ -125,9 +125,7 @@ def main(cfg: DictConfig): use_container=args.use_container_query_engine, node_offset=args.node_offset, ) - system_exporters_service = SystemExportersService( - provider, args.num_nodes, args.node_offset - ) + system_exporters_service = SystemExportersService(provider, args) prometheus_service = create_prometheus_service( cfg, provider, args.num_nodes, args.node_offset ) @@ -136,9 +134,7 @@ def main(cfg: DictConfig): use_container=args.use_container_arroyo, node_offset=args.node_offset, ) - deathstar_service = DeathstarService( - provider, num_nodes_in_experiment, args.node_offset - ) + deathstar_service = DeathstarService(provider, args) controller_service = ControllerService( provider, use_container=args.use_container_controller, @@ -154,20 +150,14 @@ def main(cfg: DictConfig): grafana_service = GrafanaService( provider, num_nodes_in_experiment, args.node_offset ) - avalanche_service = AvalancheExporterService( - provider, - num_nodes_in_experiment, - use_container=False, - node_offset=args.node_offset, - ) + avalanche_service = AvalancheExporterService(provider, args, use_container=False) # Initialize exporter service based on language exporter_service = ExporterServiceFactory.create_exporter_service( args.fake_exporter_language, provider, - num_nodes_in_experiment, + args, use_container=args.use_container_fake_exporter, - node_offset=args.node_offset, ) sync.copy_experiment_config(cfg.experiment_params, local_experiment_root_dir) diff --git a/asap-tools/experiments/experiment_teardown_everything.py b/asap-tools/experiments/experiment_teardown_everything.py index fd280ab..f14d277 100644 --- a/asap-tools/experiments/experiment_teardown_everything.py +++ b/asap-tools/experiments/experiment_teardown_everything.py @@ -79,9 +79,7 @@ def main(cfg: DictConfig): provider, use_container=False, node_offset=args.node_offset ) - system_exporters_service = SystemExportersService( - provider, num_nodes_in_experiment, args.node_offset - ) + system_exporters_service = SystemExportersService(provider, args) prometheus_service = create_prometheus_service( cfg, provider, num_nodes_in_experiment, args.node_offset ) @@ -96,9 +94,7 @@ def main(cfg: DictConfig): provider, use_container=False, node_offset=args.node_offset ) - deathstar_service = DeathstarService( - provider, num_nodes_in_experiment, args.node_offset - ) + deathstar_service = DeathstarService(provider, args) controller_service_container = ControllerService( provider, use_container=True, node_offset=args.node_offset @@ -122,42 +118,21 @@ def main(cfg: DictConfig): provider, num_nodes_in_experiment, args.node_offset ) - avalanche_service = AvalancheExporterService( - provider, - num_nodes_in_experiment, - use_container=False, - node_offset=args.node_offset, - ) + avalanche_service = AvalancheExporterService(provider, args, use_container=False) # Initialize both exporter languages fake_exporter_service_rust = ExporterServiceFactory.create_exporter_service( - "rust", - provider, - num_nodes_in_experiment, - use_container=True, - node_offset=args.node_offset, + "rust", provider, args, use_container=True ) fake_exporter_service_python = ExporterServiceFactory.create_exporter_service( - "python", - provider, - num_nodes_in_experiment, - use_container=True, - node_offset=args.node_offset, + "python", provider, args, use_container=True ) fake_exporter_service_rust_native = ExporterServiceFactory.create_exporter_service( - "rust", - provider, - num_nodes_in_experiment, - use_container=False, - node_offset=args.node_offset, + "rust", provider, args, use_container=False ) fake_exporter_service_python_native = ( ExporterServiceFactory.create_exporter_service( - "python", - provider, - num_nodes_in_experiment, - use_container=False, - node_offset=args.node_offset, + "python", provider, args, use_container=False ) ) diff --git a/asap-tools/experiments/experiment_utils/services/fake_exporters.py b/asap-tools/experiments/experiment_utils/services/fake_exporters.py index 8d14ac9..d6b78c3 100644 --- a/asap-tools/experiments/experiment_utils/services/fake_exporters.py +++ b/asap-tools/experiments/experiment_utils/services/fake_exporters.py @@ -20,23 +20,20 @@ class BaseExporterService(BaseService): def __init__( self, provider: InfrastructureProvider, - num_nodes: int, + args, use_container: bool, - node_offset: int, ): """ Initialize base exporter service. Args: provider: Infrastructure provider for node communication and management - num_nodes: Number of nodes to run exporters on + args: Experiment args object providing get_node_range() and get_coordinator_node() use_container: Whether to use containerized deployment - node_offset: Starting node index offset """ super().__init__(provider) - self.num_nodes: int = num_nodes + self.args = args self.use_container: bool = use_container - self.node_offset: int = node_offset self.container_names: List[str] = [] self.compose_files: List[str] = [] @@ -157,9 +154,7 @@ def _start_bare_metal( # Run commands in parallel across nodes for cmd in cmds: self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=cmd_dir, nohup=False, @@ -225,9 +220,7 @@ def _start_containerized( # Create output directory first mkdir_cmd = f"mkdir -p {output_dir}" self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=mkdir_cmd, cmd_dir="", nohup=False, @@ -244,9 +237,7 @@ def _start_containerized( batch_cmd = "; ".join(batch) self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=batch_cmd, cmd_dir="", nohup=False, @@ -278,9 +269,7 @@ def _stop_bare_metal(self, **kwargs) -> None: """ cmd = "pkill -f fake_exporter.py" self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -301,12 +290,7 @@ def _stop_containerized(self, **kwargs) -> None: cmd = f"docker stop {container_list} 2>/dev/null || true; docker rm {container_list} 2>/dev/null || true" self.provider.execute_command_parallel( - node_idxs=list( - range( - self.node_offset + 1, - self.node_offset + self.num_nodes + 1, - ) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -317,12 +301,7 @@ def _stop_containerized(self, **kwargs) -> None: # Fallback: stop all containers matching the base name pattern cmd = f"docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker stop; docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker rm" self.provider.execute_command_parallel( - node_idxs=list( - range( - self.node_offset + 1, - self.node_offset + self.num_nodes + 1, - ) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -410,9 +389,7 @@ def _start_bare_metal( # Run commands in parallel across nodes for cmd in cmds: self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=cmd_dir, nohup=False, @@ -481,9 +458,7 @@ def _start_containerized( batch_cmd = "; ".join(batch) self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=batch_cmd, cmd_dir="", nohup=False, @@ -515,9 +490,7 @@ def _stop_bare_metal(self, **kwargs) -> None: """ cmd = "pkill -f fake_exporter" self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -538,12 +511,7 @@ def _stop_containerized(self, **kwargs) -> None: cmd = f"docker stop {container_list} 2>/dev/null || true; docker rm {container_list} 2>/dev/null || true" self.provider.execute_command_parallel( - node_idxs=list( - range( - self.node_offset + 1, - self.node_offset + self.num_nodes + 1, - ) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -554,12 +522,7 @@ def _stop_containerized(self, **kwargs) -> None: # Fallback: stop all containers matching the base name pattern cmd = f"docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker stop; docker ps -a --filter name={BaseExporterService.FAKE_EXPORTER_BASE_CONTAINER_NAME} --format '{{{{.Names}}}}' | xargs -r docker rm" self.provider.execute_command_parallel( - node_idxs=list( - range( - self.node_offset + 1, - self.node_offset + self.num_nodes + 1, - ) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, @@ -630,7 +593,7 @@ def start( # Run on the first node (avalanche generates enough load from single instance) self.provider.execute_command( - node_idx=self.node_offset, + node_idx=self.args.get_coordinator_node(), cmd=docker_cmd, cmd_dir=None, nohup=False, @@ -647,7 +610,7 @@ def stop(self, **kwargs) -> None: # Stop avalanche containers (common naming pattern) cmd = "docker ps --filter name=avalanche-exporter --format '{{.Names}}' | xargs -r docker stop" self.provider.execute_command( - node_idx=self.node_offset, + node_idx=self.args.get_coordinator_node(), cmd=cmd, cmd_dir=None, nohup=False, @@ -657,7 +620,7 @@ def stop(self, **kwargs) -> None: # Remove containers cmd = "docker ps -a --filter name=avalanche-exporter --format '{{.Names}}' | xargs -r docker rm" self.provider.execute_command( - node_idx=self.node_offset, + node_idx=self.args.get_coordinator_node(), cmd=cmd, cmd_dir=None, nohup=False, @@ -686,9 +649,8 @@ class ExporterServiceFactory: def create_exporter_service( language: str, provider: "InfrastructureProvider", - num_nodes: int, + args, use_container: bool, - node_offset: int, ) -> BaseExporterService: """ Create an exporter service based on language. @@ -696,9 +658,8 @@ def create_exporter_service( Args: language: Programming language ("python" or "rust") provider: Infrastructure provider for node communication and management - num_nodes: Number of nodes + args: Experiment args object providing get_node_range() and get_coordinator_node() use_container: Whether to use containerized deployment - node_offset: Starting node index offset Returns: Appropriate exporter service instance @@ -707,11 +668,9 @@ def create_exporter_service( ValueError: If language is not supported """ if language == "python": - return PythonExporterService( - provider, num_nodes, use_container, node_offset - ) + return PythonExporterService(provider, args, use_container) elif language == "rust": - return RustExporterService(provider, num_nodes, use_container, node_offset) + return RustExporterService(provider, args, use_container) else: raise ValueError( f"Invalid fake exporter language: {language}. Supported languages are 'python' and 'rust'" diff --git a/asap-tools/experiments/experiment_utils/services/misc.py b/asap-tools/experiments/experiment_utils/services/misc.py index 3c37da9..5f75df7 100644 --- a/asap-tools/experiments/experiment_utils/services/misc.py +++ b/asap-tools/experiments/experiment_utils/services/misc.py @@ -14,20 +14,16 @@ class DeathstarService(BaseService): """Service for managing DeathStar benchmark.""" - def __init__( - self, provider: InfrastructureProvider, num_nodes: int, node_offset: int - ): + def __init__(self, provider: InfrastructureProvider, args): """ Initialize DeathStar service. Args: provider: Infrastructure provider for node communication and management - num_nodes: Number of nodes to run DeathStar on - node_offset: Starting node index offset + args: Experiment args object providing get_node_range() and get_coordinator_node() """ super().__init__(provider) - self.num_nodes = num_nodes - self.node_offset = node_offset + self.args = args def start(self, **kwargs) -> None: """ @@ -41,9 +37,7 @@ def start(self, **kwargs) -> None: f"{self.provider.get_home_dir()}/benchmarks/DeathStarBench/socialNetwork" ) self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=cmd_dir, nohup=False, @@ -64,9 +58,7 @@ def stop(self, **kwargs) -> None: f"{self.provider.get_home_dir()}/benchmarks/DeathStarBench/socialNetwork" ) self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=cmd_dir, nohup=False, @@ -97,22 +89,22 @@ def run_workload( TOTAL_CONNECTIONS = 480 TOTAL_REQUESTS = 1200 - connections = TOTAL_CONNECTIONS // self.num_nodes - requests = TOTAL_REQUESTS // self.num_nodes + connections = TOTAL_CONNECTIONS // self.args.num_nodes + requests = TOTAL_REQUESTS // self.args.num_nodes output_file_template = ( "{}/deathstar_logs/connections_{}_requests_{}_nodes_{}_ip_{}.txt" ) ips = [] output_files = [] - for i in range(self.node_offset + 1, self.node_offset + self.num_nodes + 1): + for i in self.args.get_node_range(include_coordinator=False): ips.append(self.provider.get_node_ip(i)) output_files.append( output_file_template.format( experiment_output_dir, TOTAL_CONNECTIONS, TOTAL_REQUESTS, - self.num_nodes, + self.args.num_nodes, i, ) ) diff --git a/asap-tools/experiments/experiment_utils/services/monitoring.py b/asap-tools/experiments/experiment_utils/services/monitoring.py index c361478..e82b54f 100644 --- a/asap-tools/experiments/experiment_utils/services/monitoring.py +++ b/asap-tools/experiments/experiment_utils/services/monitoring.py @@ -11,24 +11,19 @@ class MonitoringService(BaseService): """Service for managing monitoring across nodes.""" - def __init__( - self, provider: InfrastructureProvider, num_nodes: int, node_offset: int - ): + def __init__(self, provider: InfrastructureProvider, args): """ Initialize Monitoring service. Args: provider: Infrastructure provider for node communication and management - num_nodes: Number of nodes to monitor - node_offset: Starting node index offset + args: Experiment args object providing get_node_range() and get_coordinator_node() """ super().__init__(provider) - self.num_nodes = num_nodes - self.node_offset = node_offset - self.system_exporters_service = SystemExportersService( - provider, num_nodes, node_offset + self.system_exporters_service = SystemExportersService(provider, args) + self.prometheus_service = PrometheusService( + provider, args.num_nodes, args.node_offset ) - self.prometheus_service = PrometheusService(provider, num_nodes, node_offset) def start(self, experiment_params, experiment_output_dir: str, **kwargs) -> None: """ diff --git a/asap-tools/experiments/experiment_utils/services/system_exporters.py b/asap-tools/experiments/experiment_utils/services/system_exporters.py index be82a5a..1cf7bb1 100644 --- a/asap-tools/experiments/experiment_utils/services/system_exporters.py +++ b/asap-tools/experiments/experiment_utils/services/system_exporters.py @@ -12,20 +12,16 @@ class SystemExportersService(BaseService): """Service for managing system exporters (node_exporter, blackbox_exporter, cadvisor).""" - def __init__( - self, provider: InfrastructureProvider, num_nodes: int, node_offset: int - ): + def __init__(self, provider: InfrastructureProvider, args): """ Initialize System Exporters service. Args: provider: Infrastructure provider for node communication and management - num_nodes: Number of nodes to manage - node_offset: Starting node index offset + args: Experiment args object providing get_node_range() and get_coordinator_node() """ super().__init__(provider) - self.num_nodes = num_nodes - self.node_offset = node_offset + self.args = args def start(self, experiment_params: DictConfig, **kwargs) -> None: """ @@ -36,9 +32,7 @@ def start(self, experiment_params: DictConfig, **kwargs) -> None: **kwargs: Additional configuration """ # Start exporters on worker nodes - for node_idx in range( - self.node_offset + 1, self.node_offset + self.num_nodes + 1 - ): + for node_idx in self.args.get_node_range(include_coordinator=False): local_ip = self.provider.get_node_ip(node_idx) # Start node_exporter @@ -67,7 +61,7 @@ def start(self, experiment_params: DictConfig, **kwargs) -> None: ) # Start blackbox_exporter on controller node - coordinator_node = self.node_offset + coordinator_node = self.args.get_coordinator_node() cmd, cmd_dir = self._get_blackbox_exporter_cmd( local_ip=self.provider.get_node_ip(coordinator_node) ) @@ -88,9 +82,7 @@ def stop(self, **kwargs) -> None: """ cmd = "killall node_exporter; killall blackbox_exporter; docker stop cadvisor; docker rm cadvisor" self.provider.execute_command_parallel( - node_idxs=list( - range(self.node_offset + 1, self.node_offset + self.num_nodes + 1) - ), + node_idxs=self.args.get_node_range(include_coordinator=False), cmd=cmd, cmd_dir=None, nohup=False, diff --git a/asap-tools/experiments/generate_prometheus_config.py b/asap-tools/experiments/generate_prometheus_config.py index c70ef20..35d3e86 100644 --- a/asap-tools/experiments/generate_prometheus_config.py +++ b/asap-tools/experiments/generate_prometheus_config.py @@ -5,6 +5,7 @@ from omegaconf import DictConfig import experiment_utils +from utils import get_node_range def get_metrics_for_exporter(exporter_name, experiment_config): @@ -184,9 +185,10 @@ def main(args, experiment_config=None): { "targets": [ f"{args.node_ip_prefix}.{i + 1}:{port}" - for i in range( - args.node_offset + 1, - args.node_offset + args.num_nodes + 1, + for i in get_node_range( + args.node_offset, + args.num_nodes, + include_coordinator=False, ) ] } @@ -217,8 +219,8 @@ def main(args, experiment_config=None): ] for exporter, ports in fake_exporters: targets = [] - for target_ip in range( - args.node_offset + 1, args.node_offset + args.num_nodes + 1 + for target_ip in get_node_range( + args.node_offset, args.num_nodes, include_coordinator=False ): for port in ports: targets.append(f"{args.node_ip_prefix}.{target_ip + 1}:{port}") diff --git a/asap-tools/experiments/generate_victoriametrics_config.py b/asap-tools/experiments/generate_victoriametrics_config.py index 11901c3..68582b7 100644 --- a/asap-tools/experiments/generate_victoriametrics_config.py +++ b/asap-tools/experiments/generate_victoriametrics_config.py @@ -4,6 +4,7 @@ from omegaconf import DictConfig import experiment_utils +from utils import get_node_range from constants import ( VMAGENT_SCRAPE_CONFIG_FILE, VMAGENT_REMOTE_WRITE_CONFIG_FILE, @@ -227,7 +228,9 @@ def main(args, experiment_config=None): start_port = fake_exporter_config["start_port"] targets = [] - for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1): + for i in get_node_range( + args.node_offset, args.num_nodes, include_coordinator=False + ): for j in range(num_ports_per_server): targets.append(f"{args.node_ip_prefix}.{i + 1}:{start_port + j}") @@ -248,7 +251,9 @@ def main(args, experiment_config=None): avalanche_port = avalanche_config.get("port", 9001) targets = [] - for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1): + for i in get_node_range( + args.node_offset, args.num_nodes, include_coordinator=False + ): targets.append(f"{args.node_ip_prefix}.{i + 1}:{avalanche_port}") scrape_job = { @@ -270,7 +275,9 @@ def main(args, experiment_config=None): cluster_data_port = cluster_data_config.get("port", 9010) targets = [] - for i in range(args.node_offset + 1, args.node_offset + args.num_nodes + 1): + for i in get_node_range( + args.node_offset, args.num_nodes, include_coordinator=False + ): targets.append(f"{args.node_ip_prefix}.{i + 1}:{cluster_data_port}") scrape_job = { diff --git a/asap-tools/experiments/utils.py b/asap-tools/experiments/utils.py index a5e8694..b0777c6 100644 --- a/asap-tools/experiments/utils.py +++ b/asap-tools/experiments/utils.py @@ -5,6 +5,20 @@ import constants +def get_node_range( + node_offset: int, num_nodes: int, include_coordinator: bool = True +) -> list: + """Return the list of node indices for an experiment. + + Mirrors Args.get_node_range() for use in argparse-based scripts that don't + have an Args object. + """ + if include_coordinator: + return list(range(node_offset, node_offset + num_nodes + 1)) + else: + return list(range(node_offset + 1, node_offset + num_nodes + 1)) + + def run_cmd_with_retry( cmd, popen, ignore_errors=False, max_retries=3, retry_delay=5 ) -> Union[subprocess.Popen, subprocess.CompletedProcess]: