From 0263c0feeb088c2d7ccd7cb2221c0e14dc6e252d Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 23 Jan 2026 14:47:33 -0600 Subject: [PATCH 01/10] Add additional flags to snakemake_long for env management, verbose logging I was tired of hacking around wanting verbose logging in the HTCondor Snakemake executor, so I added some plumbing to pass Snakemake's '--verbose' flag through 'snakemake_long.py' to snakemake itself. Additionally, I added '--env-manager' so I could run things with my preferred mamba env instead of conda (which is too slow to rebuild). --- docker-wrappers/SPRAS/snakemake_long.py | 34 ++++++++++++++++++++----- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/docker-wrappers/SPRAS/snakemake_long.py index 230593b94..8ad1ff537 100755 --- a/docker-wrappers/SPRAS/snakemake_long.py +++ b/docker-wrappers/SPRAS/snakemake_long.py @@ -30,6 +30,9 @@ def parse_args(isLocal=False): parser.add_argument("command", help="Helper command to run", choices=["long"]) parser.add_argument("--snakefile", help="The Snakefile to run. If omitted, the Snakefile is assumed to be in the current directory.", required=False) parser.add_argument("--profile", help="A path to a directory containing the desired Snakemake profile.", required=True) + parser.add_argument("--verbose", help="Enable verbose output for debugging.", action="store_true", required=False) + parser.add_argument("--env-manager", help="The environment manager to use (conda or mamba). Default is conda.", + choices=["conda", "mamba"], default="conda", required=False) # I'd love to change this to "logdir", but using the same name as Snakemake for consistency of feeling between this script # and Snakemake proper. parser.add_argument("--htcondor-jobdir", help="The directory Snakemake will write logs to. If omitted, a 'logs` directory will be created in the current directory", required=False) @@ -39,19 +42,24 @@ def parse_args(isLocal=False): Given a Snakefile, profile, and HTCondor job directory, submit a local universe job that runs Snakemake from the context of the submission directory. """ -def submit_local(snakefile, profile, htcondor_jobdir): +def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager="conda"): # Get the location of this script, which also serves as the executable for the condor job. script_location = pathlib.Path(__file__).resolve() + # Build arguments string, including optional flags + args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}" + if verbose: + args_str += " --verbose" + submit_description = htcondor.Submit({ "executable": script_location, # We use the "long" command to indicate to the script that it should run the Snakemake command instead of submitting another job. # See comment in parse_args for more information. - "arguments": f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir}", + "arguments": args_str, "universe": "local", "request_disk": "512MB", "request_cpus": 1, - "request_memory": 512, + "request_memory": "512MB", # Set up logging "log": f"{htcondor_jobdir}/snakemake.log", @@ -100,7 +108,7 @@ def top_main(): os.makedirs(args.htcondor_jobdir) try: - submit_local(args.snakefile, args.profile, args.htcondor_jobdir) + submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager) except Exception as e: print(f"Error: Could not submit local universe job. {e}") raise @@ -108,17 +116,29 @@ def top_main(): """ Command to activate conda environment and run Snakemake. This is run by the local universe job, not the user. """ +def get_env_activation_command(env_manager, env_name="spras"): + """Generate the appropriate shell commands to activate the environment based on the env manager.""" + if env_manager == "mamba": + # mamba uses shell hook for activation + return f'eval "$(mamba shell hook --shell bash)" && mamba activate {env_name}' + else: # conda (default) + return f'source $(conda info --base)/etc/profile.d/conda.sh && conda activate {env_name}' + def long_main(): args = parse_args(True) # Note that we need to unset APPTAINER_CACHEDIR in this case but not in the local terminal case because the wrapper # HTCondor job has a different environment and populating this value causes Snakemake to fail when it tries to write # to spool (a read-only filesystem from the perspective of the EP job). + verbose_flag = "--verbose" if args.verbose else "" + + # Get the appropriate activation command for the detected/specified env manager + activation_cmd = get_env_activation_command(args.env_manager) + command = f""" - source $(conda info --base)/etc/profile.d/conda.sh && \ - conda activate spras && \ + {activation_cmd} && \ unset APPTAINER_CACHEDIR && \ - snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir} + snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir} {verbose_flag} """ try: From 6281e535baf010ca3a280db7bfd9509745f59264 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Fri, 23 Jan 2026 15:42:43 -0600 Subject: [PATCH 02/10] Overhaul HTCondor instructions The executor has matured quite a bit since these instructions were first drafted, and it's my hope that these changes remove a lot of the headache for running jobs. Now, you can edit config files in `config/` and use the `input/` directory directly. Workflows should be submitted directly from the repository root. --- .gitignore | 9 + docker-wrappers/SPRAS/example_config.yaml | 154 ---------------- docs/htcondor.rst | 172 +++++++++--------- .../SPRAS => htcondor}/snakemake_long.py | 2 +- {docker-wrappers/SPRAS => htcondor}/spras.sh | 0 {docker-wrappers/SPRAS => htcondor}/spras.sub | 26 +-- .../spras_profile/config.yaml | 13 +- run_htcondor.sh | 8 + 8 files changed, 132 insertions(+), 252 deletions(-) delete mode 100644 docker-wrappers/SPRAS/example_config.yaml rename {docker-wrappers/SPRAS => htcondor}/snakemake_long.py (98%) rename {docker-wrappers/SPRAS => htcondor}/spras.sh (100%) rename {docker-wrappers/SPRAS => htcondor}/spras.sub (87%) rename {docker-wrappers/SPRAS => htcondor}/spras_profile/config.yaml (83%) create mode 100755 run_htcondor.sh diff --git a/.gitignore b/.gitignore index 3629c49c6..91933def7 100644 --- a/.gitignore +++ b/.gitignore @@ -144,3 +144,12 @@ TempMat.mat # Singularity cache unpacked + +# HTCondor logs +htcondor/logs/ +*.err +*.out +*.log + +# Any sif files +*.sif diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml deleted file mode 100644 index 1e7fd69c2..000000000 --- a/docker-wrappers/SPRAS/example_config.yaml +++ /dev/null @@ -1,154 +0,0 @@ -# Global workflow control - -# The length of the hash used to identify a parameter combination -hash_length: 7 - -containers: - # Specify the container framework used by each PRM wrapper. Valid options include: - # - docker (default if not specified) - # - singularity OR apptainer -- Apptainer (formerly Singularity) is useful in HPC/HTC environments where docker isn't allowed - # - dsub -- experimental with limited support, used for running on Google Cloud - framework: singularity - - # Only used if framework is set to singularity/apptainer, this will unpack the containers - # to the local filesystem. This is useful when PRM containers need to run inside another container, - # such as would be the case in an HTCondor/OSPool environment. - # NOTE: This unpacks containers to the local filesystem, which will take up space in a way - # that persists after the workflow is complete. To clean up the unpacked containers, the user must - # manually delete them. For convenience, these unpacked files will exist in the current working directory - # under `unpacked`. - # Here, we unpack it since we're running on HTCondor. - unpack_singularity: true - - # Allow the user to configure which container registry containers should be pulled from - # Note that this assumes container names are consistent across registries, and that the - # registry being passed doesn't require authentication for pull actions - registry: - base_url: docker.io - # The owner or project of the registry - # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs - owner: reedcompbio - -# This list of algorithms should be generated by a script which checks the filesystem for installs. -# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm -# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved) -# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change -# which algorithms are run in a given experiment. -# -# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple -# parameters are specified then the algorithm will be run as many times as needed to cover all parameter -# combinations. For instance if we have the following: -# - name: "myAlg" -# params: -# include: true -# a: [1,2] -# b: [0.5,0.75] -# -# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be -# careful: too many parameters might make your runs take a long time. - -algorithms: - - name: "pathlinker" - include: false - runs: - run1: - k: range(100,201,100) - - - name: "omicsintegrator1" - include: true - runs: - run1: - r: [5] - b: [5, 6] - w: np.linspace(0,5,2) - g: [3] - d: [10] - - - name: "omicsintegrator2" - include: true - runs: - run1: - b: [4] - g: [0] - run2: - b: [2] - g: [3] - - - name: "meo" - include: true - runs: - run1: - max_path_length: [3] - local_search: [true] - rand_restarts: [10] - - - name: "mincostflow" - include: true - runs: - run1: - flow: [1] # The flow must be an int - capacity: [1] - - - name: "allpairs" - include: true - - - name: "domino" - include: true - runs: - run1: - slice_threshold: [0.3] - module_threshold: [0.05] - -# Here we specify which pathways to run and other file location information. -# DataLoader.py can currently only load a single dataset -# Assume that if a dataset label does not change, the lists of associated input files do not change -datasets: - - label: data0 - node_files: ["node-prizes.txt", "sources.txt", "targets.txt"] - # DataLoader.py can currently only load a single edge file, which is the primary network - edge_files: ["network.txt"] - # Placeholder - other_files: [] - # Relative path from the spras directory - data_dir: "input" -# - label: data1 -# # Reuse some of the same sources file as 'data0' but different network and targets -# node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"] -# edge_files: ["alternative-network.txt"] -# other_files: [] -# # Relative path from the spras directory -# data_dir: "input" - -# If we want to reconstruct then we should set run to true. -# TODO: if include is true above but run is false here, algs are not run. -# is this the behavior we want? -reconstruction_settings: - - #set where everything is saved - locations: - - #place the save path here - # TODO move to global - reconstruction_dir: "output" - -analysis: - # Create one summary per pathway file and a single summary table for all pathways for each dataset - summary: - include: true - # Create Cytoscape session file with all pathway graphs for each dataset - cytoscape: - include: false - # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset - ml: - include: true - # specify how many principal components to calculate - components: 2 - # boolean to show the labels on the pca graph - labels: true - # 'ward', 'complete', 'average', 'single' - # if linkage: ward, must use metric: euclidean - linkage: 'ward' - # 'euclidean', 'manhattan', 'cosine' - metric: 'euclidean' - evaluation: - include: false diff --git a/docs/htcondor.rst b/docs/htcondor.rst index f5a0f92e0..f955d6e77 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -2,10 +2,9 @@ Running with HTCondor ####################### -The folder `docker-wrappers/SPRAS -`_ -inside the SPRAS git repository contains several files that can be used -to run workflows with this container on HTCondor. To use the ``spras`` +The folder `htcondor/ `_ +inside the SPRAS git repository contains several files that can be used to +run workflows with this container on HTCondor. To use the ``spras`` image in this environment, first login to an HTCondor Access Point (AP). Then, from the AP clone this repo: @@ -67,65 +66,55 @@ image does not use a "v" in the tag. Submitting All Jobs to a Single EP ************************************ -Navigate to the ``spras/docker-wrappers/SPRAS`` directory and create the -``logs/`` directory (``mkdir logs``). Next, modify ``spras.sub`` so that -it uses the SPRAS apptainer image you created: - -.. code:: - - container_image = < your spras image >.sif - -Make sure to modify the configuration file to have -``unpack_singularity`` set to ``true``, and ``containers.framework`` set -to ``singularity``: else, the workflow will (likely) fail. - -Then run ``condor_submit spras.sub``, which will submit SPRAS to -HTCondor as a single job with as many cores as indicated by the -``NUM_PROCS`` line in ``spras.sub``, using the value of -``EXAMPLE_CONFIG`` as the SPRAS configuration file. By default, the -``example_config.yaml`` runs everything except for ``cytoscape``, which -appears to fail periodically in HTCondor. - -**Note**: The ``spras.sub`` submit file is an example of how this -workflow could be submitted from a CHTC Access Point (AP) to the OSPool. -To run in the local CHTC pool, omit the ``+WantGlideIn`` and -``requirements`` lines. +Running all SPRAS steps on a single remote Execution Point (EP) is a good way +to get started with HTCondor, but it is significantly less efficient than using +HTCondor's distributed capabilities. This approach is best suited for +workflows that are not computationally intensive, or for testing and +debugging purposes. + +Before submitting all SPRAS jobs to a single remote Execution Point (EP), +you'll need to set up three things: +1. You'll need to modify ``htcondor/spras.sub`` to point at your container + image, along with any other configuration changes you want to make like + choosing a logging directory or toggling OSPool submission. Note that all + paths in the submit file are relative to the directory from which you run + ``condor_submit``, which will typically be the root of the SPRAS repository. +2. You'll need to ensure your SPRAS configuration file has a few key values + set, including ``unpack_singularity: true`` and + ``containers.framework: singularity``. +3. Finally, it's best practice to create the logging directory configured in + the submit file before submitting the job, e.g. to create the default log + directory, run ``mkdir htcondor/logs`` from the root of the repository. + +Once these steps are complete, you can submit the job from the root of the +the SPRAS repository by running ``condor_submit htcondor/spras.sub``. + +When the job completes, the ``output`` directory from the workflow should be +returned as ``output``. ************************** Submitting Parallel Jobs ************************** -Parallelizing SPRAS workflows with HTCondor requires the same setup as -the previous section, but with two additions. First, it requires an -activated SPRAS conda environment with a ``pip install``-ed version of -the SPRAS module (via ``pip install .`` inside the SPRAS directory). - -Second, it requires an experimental executor for HTCondor that has been -forked from the upstream `HTCondor Snakemake executor -`__. - -After activating your ``spras`` conda environment and ``pip``-installing -SPRAS, you can install the HTCondor Snakemake executor with the -following: +Parallelizing SPRAS workflows with HTCondor requires much of the same setup +as the previous section, but with two additions. +1. Build/activate the SPRAS conda/mamba environment and ``pip install`` the SPRAS module + (via ``pip install .`` inside the SPRAS directory). +2. Install the `HTCondor Snakemake +executor `__; once your + SPRAS conda/mamba environment is activated and SPRAS is ``pip install``-ed, + you can install the HTCondor Snakemake executor with the following: .. code:: bash pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git -Currently, this executor requires that all input to the workflow is -scoped to the current working directory. Therefore, you'll need to copy -the Snakefile and your input directory (as specified by -``example_config.yaml``) to this directory: - -.. code:: bash - - cp ../../Snakefile . && \ - cp -r ../../input . - -Instead of editing ``spras.sub`` to define the workflow, this scenario -requires editing the SPRAS profile in ``spras_profile/config.yaml``. -Make sure you specify the correct container, and change any other config -values needed by your workflow (defaults are fine in most cases). +3. Instead of editing ``spras.sub`` to define the workflow, this scenario + requires editing the SPRAS profile in ``htcondor/spras_profile/config.yaml``. + Make sure you specify the correct container, and change any other config + values needed by your workflow (defaults are fine in most cases). +4. Modify your SPRAS configuration file to set ``unpack_singularity: true`` and + ``containers.framework: singularity``. Then, to start the workflow with HTCondor in the CHTC pool, there are two options: @@ -137,11 +126,16 @@ The first option is to run Snakemake in a way that ties its execution to your terminal. This is good for testing short workflows and running short jobs. The downside is that closing your terminal causes the process to exit, removing any unfinished jobs. To use this option, -invoke Snakemake directly by running: +invoke Snakemake directly from the repository root by running: .. code:: bash - snakemake --profile spras_profile + snakemake --profile htcondor/spras_profile/ + +**Note**: Running the workflow in this way requires that your terminal +session stays active. Closing the terminal will suspend ongoing jobs, but +Snakemake will handle picking up where any previously-completed jobs left off +when you restart the workflow. Long Running Snakemake Jobs (Managed by HTCondor) ================================================= @@ -149,31 +143,53 @@ Long Running Snakemake Jobs (Managed by HTCondor) The second option is to let HTCondor manage the Snakemake process, which allows the jobs to run as long as needed. Instead of seeing Snakemake output directly in your terminal, you'll be able to see it in a -specified log file. To use this option, make sure ``snakemake_long.py`` -is executable (you can run ``chmod +x snakemake_long.py`` from the AP to -make sure it is), and then run: +specified log file. To use this option, run from the repository root: -.. code:: +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ - ./snakemake_long.py --profile spras_profile --htcondor-jobdir +A convenience script called ``run_htcondor.sh`` is also provided in the +repository root. You can execute this script by running: + +.. code:: bash -When run in this mode, all log files for the workflow will be placed -into the path you provided for the logging directory. In particular, -Snakemake's outputs with job progress can be found split between -``/snakemake-long.err`` and ``/snakemake-long.out``. + ./run_htcondor.sh + +When executed in this mode, all log files for the workflow will be placed +into the logging directory (``htcondor/logs`` by default). In particular, +Snakemake's stdout/stderr outputs containing your workflow's progress can +be found split between ``htcondor/logs/snakemake.err`` and ``htcondor/logs/snakemake.out``. These will also log each rule and what HTCondor job ID was submitted for that rule (see the `troubleshooting section <#troubleshooting>`__ for information on how to use these extra log files). +**Note**: While you're in the initial stages of developing/debugging your +workflow, it's very useful to invoke Snakemake with the ``--verbose`` flag. +This can be passed to Snakemake via the ``snakemake_long.py`` script by +adding it to the script's argument list, e.g.: + +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose + +If you use mamba instead of conda for environment management, you can specify +this with the ``--env-manager`` flag: + +.. code:: bash + + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --env-manager mamba + ********************* Adjusting Resources ********************* Resource requirements can be adjusted as needed in -``spras_profile/config.yaml``, and HTCondor logs for this workflow can -be found in ``.snakemake/htcondor``. You can set a different log -directory by adding ``htcondor-jobdir: /path/to/dir`` to the profile's -configuration. +``htcondor/spras_profile/config.yaml``, and HTCondor logs for this workflow +can be found in your log directory. You can set a different log +directory by changing the configured ``htcondor-jobdir`` in the profile's +configuration. Alternatively, you can pass a different log directory +when invoking Snakemake with the ``--htcondor-jobdir`` argument. To run this same workflow in the OSPool, add the following to the profile's default-resources block: @@ -184,14 +200,9 @@ profile's default-resources block: requirements: | '(HAS_SINGULARITY == True) && (Poolname =!= "CHTC")' -**Note**: This workflow requires that the terminal session responsible -for running snakemake stays active. Closing the terminal will suspend -jobs, but the workflow can use Snakemake's checkpointing to pick up any -jobs where they left off. - -**Note**: If you encounter an error that says ``No module named -'spras'``, make sure you've ``pip install``-ed the SPRAS module into -your conda environment. +**Note**: If you encounter an error that says +``No module named 'spras'``, make sure you've ``pip install``-ed the +SPRAS module into your conda environment. **************** Job Monitoring @@ -202,11 +213,10 @@ To monitor the state of the job, you can use a second terminal to run ``condor_watch_q`` for realtime updates. Upon completion, the ``output`` directory from the workflow should be -returned as ``spras/docker-wrappers/SPRAS/output``, along with several -files containing the workflow's logging information (anything that -matches ``logs/spras_*`` and ending in ``.out``, ``.err``, or ``.log``). -If the job was unsuccessful, these files should contain useful debugging -clues about what may have gone wrong. +returned as ``output``, along with several files containing the workflow's +logging information (anything that matches ``htcondor/logs/spras_*`` and +ending in ``.out``, ``.err``, or ``.log``). If the job was unsuccessful, +these files should contain useful debugging clues about what may have gone wrong. **Note**: If you want to run the workflow with a different version of SPRAS, or one that contains development updates you've made, rebuild diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/htcondor/snakemake_long.py similarity index 98% rename from docker-wrappers/SPRAS/snakemake_long.py rename to htcondor/snakemake_long.py index 8ad1ff537..0f30a4439 100755 --- a/docker-wrappers/SPRAS/snakemake_long.py +++ b/htcondor/snakemake_long.py @@ -100,7 +100,7 @@ def top_main(): # Make sure we have a value for the log directory and that the directory exists. if args.htcondor_jobdir is None: - args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "snakemake-long-logs" + args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs" if not os.path.exists(args.htcondor_jobdir): os.makedirs(args.htcondor_jobdir) else: diff --git a/docker-wrappers/SPRAS/spras.sh b/htcondor/spras.sh similarity index 100% rename from docker-wrappers/SPRAS/spras.sh rename to htcondor/spras.sh diff --git a/docker-wrappers/SPRAS/spras.sub b/htcondor/spras.sub similarity index 87% rename from docker-wrappers/SPRAS/spras.sub rename to htcondor/spras.sub index 9dd1c4abc..79e1bbd94 100644 --- a/docker-wrappers/SPRAS/spras.sub +++ b/htcondor/spras.sub @@ -5,11 +5,11 @@ ############################################################ # Define a few macros we use throughout the submit file # ############################################################ -CONFIG_FILE = example_config.yaml +CONFIG_FILE = config/config.yaml NUM_PROCS = 4 # Paths to input data and Snakefile. -INPUT_DIR = ../../input -SNAKEFILE = ../../Snakefile +INPUT_DIR = input +SNAKEFILE = Snakefile ############################################################ # Specify that the workflow should run in the SPRAS # @@ -21,8 +21,9 @@ SNAKEFILE = ../../Snakefile # first # ############################################################ universe = container -container_image = .sif -# container_image = docker://reedcompbio/spras:v0.6.0 +#container_image = .sif +container_image = instructions-overhaul.sif +# container_image = docker://reedcompbio/spras:v0.2.0 ############################################################ # Specify names for log/stdout/stderr files generated by # @@ -30,15 +31,15 @@ container_image = .sif # NOTE: You should `mkdir logs/` before running, or the # # spras_$(Cluster).log file won't be available. # ############################################################ -log = logs/spras_$(Cluster)_$(Process).log -output = logs/spras_$(Cluster)_$(Process).out -error = logs/spras_$(Cluster)_$(Process).err +log = htcondor/logs/spras_$(Cluster)_$(Process).log +output = htcondor/logs/spras_$(Cluster)_$(Process).out +error = htcondor/logs/spras_$(Cluster)_$(Process).err ############################################################ # Specify the script to run inside the container. This is # # simply a wrapper on the Snakefile. # ############################################################ -executable = spras.sh +executable = htcondor/spras.sh arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE) --retries 3" ############################################################ @@ -49,6 +50,7 @@ when_to_transfer_output = ON_EXIT transfer_input_files = $(CONFIG_FILE), $(INPUT_DIR), $(SNAKEFILE) # The output directory should match whatever you configure in your configfile. transfer_output_files = output +preserve_relative_paths = true ############################################################ # System specifications. Be sure to request enough disk to # @@ -70,7 +72,7 @@ JobBatchName = "SPRAS-workflow-OSPool" # needed if running from CHTC. If running from an OSPool # # AP, omit this line. # ############################################################ -+WantGlideIn = true +# +WantGlideIn = true ############################################################ # Not all Execution Points in the OSPool will have # @@ -81,7 +83,7 @@ JobBatchName = "SPRAS-workflow-OSPool" # this submit file from CHTC, we also need a requirement # # to prevent landing on a CHTC Execution Point. # ############################################################ -requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC") +# requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC") # Queue the job -queue 1 \ No newline at end of file +queue 1 diff --git a/docker-wrappers/SPRAS/spras_profile/config.yaml b/htcondor/spras_profile/config.yaml similarity index 83% rename from docker-wrappers/SPRAS/spras_profile/config.yaml rename to htcondor/spras_profile/config.yaml index 5cc0697d0..1720b59c4 100644 --- a/docker-wrappers/SPRAS/spras_profile/config.yaml +++ b/htcondor/spras_profile/config.yaml @@ -1,26 +1,31 @@ # Default configuration for the SPRAS/HTCondor executor profile. Each of these values # can also be passed via command line flags, e.g. `--jobs 30 --executor htcondor`. +# NOTE: File paths in here should be relative to where you submit from, typically the +# root of the SPRAS repository + # 'jobs' specifies the maximum number of HTCondor jobs that can be in the queue at once. jobs: 30 executor: htcondor -configfile: example_config.yaml +configfile: config/config.yaml +htcondor-jobdir: htcondor/logs + # Indicate to the plugin that jobs running on various EPs do not share a filesystem with # each other, or with the AP. shared-fs-usage: none # Distributed, heterogeneous computational environments are a wild place where strange things # can happen. If something goes wrong, try again up to 5 times. After that, we assume there's # a real error that requires user/admin intervention -retries: 5 +retries: 2 # Default resources will apply to all workflow steps. If a single workflow step fails due # to insufficient resources, it can be re-run with modified values. Snakemake will handle # picking up where it left off, and won't re-run steps that have already completed. default-resources: - job_wrapper: "spras.sh" + job_wrapper: "htcondor/spras.sh" # If running in CHTC, this only works with apptainer images # Note requirement for quotes around the image name - container_image: "'spras-v0.6.0.sif'" + container_image: "spras-v0.6.0.sif" universe: "container" # The value for request_disk should be large enough to accommodate the runtime container # image, any additional PRM container images, and your input data. diff --git a/run_htcondor.sh b/run_htcondor.sh new file mode 100755 index 000000000..8ca2d86e0 --- /dev/null +++ b/run_htcondor.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# Example helper script to submit a SPRAS workflow to HTCondor with full parallelization +# +# Note that for full runs after any initial debugging, you may wish to remove the `--verbose` +# flag, as this significantly increases the size of log files + +./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose From 3fad221c1735bde69d8dc45c220aebcb763001aa Mon Sep 17 00:00:00 2001 From: Justin Hiemstra <75916364+jhiemstrawisc@users.noreply.github.com> Date: Tue, 27 Jan 2026 11:31:21 -0600 Subject: [PATCH 03/10] Update run_htcondor.sh Co-authored-by: Tristan F.-R. --- run_htcondor.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_htcondor.sh b/run_htcondor.sh index 8ca2d86e0..adf25e54d 100755 --- a/run_htcondor.sh +++ b/run_htcondor.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # Example helper script to submit a SPRAS workflow to HTCondor with full parallelization # From 811625a087203997d2f8f8fd1e5584162529e3e6 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Tue, 27 Jan 2026 12:34:16 -0600 Subject: [PATCH 04/10] Address review feedback --- htcondor/snakemake_long.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/htcondor/snakemake_long.py b/htcondor/snakemake_long.py index 0f30a4439..50848ce27 100755 --- a/htcondor/snakemake_long.py +++ b/htcondor/snakemake_long.py @@ -9,6 +9,7 @@ import argparse import os import pathlib +import shlex import subprocess import sys import time @@ -47,9 +48,10 @@ def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager script_location = pathlib.Path(__file__).resolve() # Build arguments string, including optional flags - args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}" + args_list = ["long", "--snakefile", snakefile, "--profile", profile, "--htcondor-jobdir", htcondor_jobdir, "--env-manager", env_manager] if verbose: - args_str += " --verbose" + args_list.append("--verbose") + args_str = " ".join(shlex.quote(str(arg)) for arg in args_list) submit_description = htcondor.Submit({ "executable": script_location, @@ -101,11 +103,7 @@ def top_main(): # Make sure we have a value for the log directory and that the directory exists. if args.htcondor_jobdir is None: args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs" - if not os.path.exists(args.htcondor_jobdir): - os.makedirs(args.htcondor_jobdir) - else: - if not os.path.exists(args.htcondor_jobdir): - os.makedirs(args.htcondor_jobdir) + args.htcondor_jobdir.mkdir(exist_ok=True) try: submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager) From 445c8c9f0a8e020268df112a4662bc70f200bb2d Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 10:45:37 -0500 Subject: [PATCH 05/10] Fix htcondor.rst formatting, post rebase --- docs/htcondor.rst | 221 ++++++++++++++++++++++++++-------------------- 1 file changed, 124 insertions(+), 97 deletions(-) diff --git a/docs/htcondor.rst b/docs/htcondor.rst index f955d6e77..98641bf86 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -2,34 +2,39 @@ Running with HTCondor ####################### -The folder `htcondor/ `_ -inside the SPRAS git repository contains several files that can be used to -run workflows with this container on HTCondor. To use the ``spras`` -image in this environment, first login to an HTCondor Access Point (AP). -Then, from the AP clone this repo: +The folder `htcondor/ +`_ inside the +SPRAS git repository contains several files that can be used to run +workflows with this container on HTCondor. To use the ``spras`` image in +this environment, first login to an HTCondor Access Point (AP). Then, +from the AP clone this repo: .. code:: bash git clone https://github.com/Reed-CompBio/spras.git -**Note:** To work with SPRAS in HTCondor, it is recommended that you -build an Apptainer image instead of using Docker. See `Converting Docker -Images to Apptainer/Singularity Images`_ for instructions. Importantly, -the Apptainer image must be built for the linux/amd64 architecture. Most -HTCondor APs will have ``apptainer`` installed, but they may not have -``docker``. If this is the case, you can build the image with Docker on -your local machine, push the image to Docker Hub, and then convert it to -Apptainer's ``sif`` format on the AP. - -**Note:** It is best practice to make sure that the Snakefile you copy -for your workflow is the same version as the Snakefile baked into your -workflow's container image. When this workflow runs, the Snakefile you -just copied will be used during remote execution instead of the -Snakefile from the container. As a result, difficult-to-diagnose -versioning issues may occur if the version of SPRAS in the remote -container doesn't support the Snakefile on your current branch. The -safest bet is always to create your own image so you always know what's -inside of it. +.. tip:: + + To work with SPRAS in HTCondor, it is recommended that you build an + Apptainer image instead of using Docker. See `Converting Docker + Images to Apptainer/Singularity Images`_ for instructions. + Importantly, the Apptainer image must be built for the linux/amd64 + architecture. Most HTCondor APs will have ``apptainer`` installed, + but they may not have ``docker``. If this is the case, you can build + the image with Docker on your local machine, push the image to Docker + Hub, and then convert it to Apptainer's ``sif`` format on the AP. + +.. tip:: + + It is best practice to make sure that the Snakefile you copy for your + workflow is the same version as the Snakefile baked into your + workflow's container image. When this workflow runs, the Snakefile + you just copied will be used during remote execution instead of the + Snakefile from the container. As a result, difficult-to-diagnose + versioning issues may occur if the version of SPRAS in the remote + container doesn't support the Snakefile on your current branch. The + safest bet is always to create your own image so you always know + what's inside of it. There are currently two options for running SPRAS with HTCondor. The first is to submit all SPRAS jobs to a single remote Execution Point @@ -66,55 +71,66 @@ image does not use a "v" in the tag. Submitting All Jobs to a Single EP ************************************ -Running all SPRAS steps on a single remote Execution Point (EP) is a good way -to get started with HTCondor, but it is significantly less efficient than using -HTCondor's distributed capabilities. This approach is best suited for -workflows that are not computationally intensive, or for testing and -debugging purposes. - -Before submitting all SPRAS jobs to a single remote Execution Point (EP), -you'll need to set up three things: -1. You'll need to modify ``htcondor/spras.sub`` to point at your container - image, along with any other configuration changes you want to make like - choosing a logging directory or toggling OSPool submission. Note that all - paths in the submit file are relative to the directory from which you run - ``condor_submit``, which will typically be the root of the SPRAS repository. -2. You'll need to ensure your SPRAS configuration file has a few key values - set, including ``unpack_singularity: true`` and +Running all SPRAS steps on a single remote Execution Point (EP) is a +good way to get started with HTCondor, but it is significantly less +efficient than using HTCondor's distributed capabilities. This approach +is best suited for workflows that are not computationally intensive, or +for testing and debugging purposes. + +Before submitting all SPRAS jobs to a single remote Execution Point +(EP), you'll need to set up three things: + +#. You'll need to modify ``htcondor/spras.sub`` to point at your + container image, along with any other configuration changes you want + to make like choosing a logging directory or toggling OSPool + submission. Note that all paths in the submit file are relative to + the directory from which you run ``condor_submit``, which will + typically be the root of the SPRAS repository. + +#. You'll need to ensure your SPRAS configuration file has a few key + values set, including ``unpack_singularity: true`` and ``containers.framework: singularity``. -3. Finally, it's best practice to create the logging directory configured in - the submit file before submitting the job, e.g. to create the default log - directory, run ``mkdir htcondor/logs`` from the root of the repository. -Once these steps are complete, you can submit the job from the root of the -the SPRAS repository by running ``condor_submit htcondor/spras.sub``. +#. Finally, it's best practice to create the logging directory + configured in the submit file before submitting the job, e.g. to + create the default log directory, run ``mkdir htcondor/logs`` from + the root of the repository. -When the job completes, the ``output`` directory from the workflow should be -returned as ``output``. +Once these steps are complete, you can submit the job from the root of +the the SPRAS repository by running ``condor_submit +htcondor/spras.sub``. + +When the job completes, the ``output`` directory from the workflow +should be returned as ``output``. ************************** Submitting Parallel Jobs ************************** -Parallelizing SPRAS workflows with HTCondor requires much of the same setup -as the previous section, but with two additions. -1. Build/activate the SPRAS conda/mamba environment and ``pip install`` the SPRAS module - (via ``pip install .`` inside the SPRAS directory). -2. Install the `HTCondor Snakemake -executor `__; once your - SPRAS conda/mamba environment is activated and SPRAS is ``pip install``-ed, - you can install the HTCondor Snakemake executor with the following: +Parallelizing SPRAS workflows with HTCondor requires much of the same +setup as the previous section, but with two additions. -.. code:: bash +#. Build/activate the SPRAS conda/mamba environment and ``pip install`` + the SPRAS module (via ``pip install .`` inside the SPRAS directory). - pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git +#. Install the `HTCondor Snakemake executor + `__; + once your SPRAS conda/mamba environment is activated and SPRAS is + ``pip install``-ed, you can install the HTCondor Snakemake executor + with the following: -3. Instead of editing ``spras.sub`` to define the workflow, this scenario - requires editing the SPRAS profile in ``htcondor/spras_profile/config.yaml``. - Make sure you specify the correct container, and change any other config - values needed by your workflow (defaults are fine in most cases). -4. Modify your SPRAS configuration file to set ``unpack_singularity: true`` and - ``containers.framework: singularity``. + .. code:: bash + + pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git + +#. Instead of editing ``spras.sub`` to define the workflow, this + scenario requires editing the SPRAS profile in + ``htcondor/spras_profile/config.yaml``. Make sure you specify the + correct container, and change any other config values needed by your + workflow (defaults are fine in most cases). + +#. Modify your SPRAS configuration file to set ``unpack_singularity: + true`` and ``containers.framework: singularity``. Then, to start the workflow with HTCondor in the CHTC pool, there are two options: @@ -132,10 +148,12 @@ invoke Snakemake directly from the repository root by running: snakemake --profile htcondor/spras_profile/ -**Note**: Running the workflow in this way requires that your terminal -session stays active. Closing the terminal will suspend ongoing jobs, but -Snakemake will handle picking up where any previously-completed jobs left off -when you restart the workflow. +.. tip:: + + Running the workflow in this way requires that your terminal session + stays active. Closing the terminal will suspend ongoing jobs, but + Snakemake will handle picking up where any previously-completed jobs + left off when you restart the workflow. Long Running Snakemake Jobs (Managed by HTCondor) ================================================= @@ -156,25 +174,28 @@ repository root. You can execute this script by running: ./run_htcondor.sh -When executed in this mode, all log files for the workflow will be placed -into the logging directory (``htcondor/logs`` by default). In particular, -Snakemake's stdout/stderr outputs containing your workflow's progress can -be found split between ``htcondor/logs/snakemake.err`` and ``htcondor/logs/snakemake.out``. -These will also log each rule and what HTCondor job ID was submitted for -that rule (see the `troubleshooting section <#troubleshooting>`__ for -information on how to use these extra log files). +When executed in this mode, all log files for the workflow will be +placed into the logging directory (``htcondor/logs`` by default). In +particular, Snakemake's stdout/stderr outputs containing your workflow's +progress can be found split between ``htcondor/logs/snakemake.err`` and +``htcondor/logs/snakemake.out``. These will also log each rule and what +HTCondor job ID was submitted for that rule (see the `troubleshooting +section <#troubleshooting>`__ for information on how to use these extra +log files). + +.. tip:: -**Note**: While you're in the initial stages of developing/debugging your -workflow, it's very useful to invoke Snakemake with the ``--verbose`` flag. -This can be passed to Snakemake via the ``snakemake_long.py`` script by -adding it to the script's argument list, e.g.: + While you're in the initial stages of developing/debugging your + workflow, it's very useful to invoke Snakemake with the ``--verbose`` + flag. This can be passed to Snakemake via the ``snakemake_long.py`` + script by adding it to the script's argument list, e.g.: .. code:: bash ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose -If you use mamba instead of conda for environment management, you can specify -this with the ``--env-manager`` flag: +If you use mamba instead of conda for environment management, you can +specify this with the ``--env-manager`` flag: .. code:: bash @@ -185,11 +206,12 @@ this with the ``--env-manager`` flag: ********************* Resource requirements can be adjusted as needed in -``htcondor/spras_profile/config.yaml``, and HTCondor logs for this workflow -can be found in your log directory. You can set a different log -directory by changing the configured ``htcondor-jobdir`` in the profile's -configuration. Alternatively, you can pass a different log directory -when invoking Snakemake with the ``--htcondor-jobdir`` argument. +``htcondor/spras_profile/config.yaml``, and HTCondor logs for this +workflow can be found in your log directory. You can set a different log +directory by changing the configured ``htcondor-jobdir`` in the +profile's configuration. Alternatively, you can pass a different log +directory when invoking Snakemake with the ``--htcondor-jobdir`` +argument. To run this same workflow in the OSPool, add the following to the profile's default-resources block: @@ -200,9 +222,11 @@ profile's default-resources block: requirements: | '(HAS_SINGULARITY == True) && (Poolname =!= "CHTC")' -**Note**: If you encounter an error that says -``No module named 'spras'``, make sure you've ``pip install``-ed the -SPRAS module into your conda environment. +.. tip:: + + If you encounter an error that says ``No module named 'spras'``, make + sure you've ``pip install``-ed the SPRAS module into your conda + environment. **************** Job Monitoring @@ -213,17 +237,20 @@ To monitor the state of the job, you can use a second terminal to run ``condor_watch_q`` for realtime updates. Upon completion, the ``output`` directory from the workflow should be -returned as ``output``, along with several files containing the workflow's -logging information (anything that matches ``htcondor/logs/spras_*`` and -ending in ``.out``, ``.err``, or ``.log``). If the job was unsuccessful, -these files should contain useful debugging clues about what may have gone wrong. - -**Note**: If you want to run the workflow with a different version of -SPRAS, or one that contains development updates you've made, rebuild -this image against the version of SPRAS you want to test, and push the -image to your image repository. To use that container in the workflow, -change the ``container_image`` line of ``spras.sub`` to point to the new -image. +returned as ``output``, along with several files containing the +workflow's logging information (anything that matches +``htcondor/logs/spras_*`` and ending in ``.out``, ``.err``, or +``.log``). If the job was unsuccessful, these files should contain +useful debugging clues about what may have gone wrong. + +.. tip:: + + If you want to run the workflow with a different version of SPRAS, or + one that contains development updates you've made, rebuild this image + against the version of SPRAS you want to test, and push the image to + your image repository. To use that container in the workflow, change + the ``container_image`` line of ``spras.sub`` to point to the new + image. ***************** Troubleshooting From ceea753c3bff82ea2b79e0b7a12498d60d385914 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 10:45:53 -0500 Subject: [PATCH 06/10] Fix other misc docs build issues --- spras/dataset.py | 2 +- spras/evaluation.py | 35 +++++++++++++++++++++-------------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/spras/dataset.py b/spras/dataset.py index ddf74736f..330bdcffc 100644 --- a/spras/dataset.py +++ b/spras/dataset.py @@ -164,7 +164,7 @@ def __init__(self, dataset_params: DatasetSchema): def get_node_columns(self, col_names: list[str]) -> pd.DataFrame: """ @param scope: The name of the algorithm (or a more general 'scope' like SPRAS) - to fail on if get_node_columns fails. + to fail on if get_node_columns fails. @returns: A table containing the requested column names and node IDs for all nodes with at least 1 of the requested values being non-empty """ diff --git a/spras/evaluation.py b/spras/evaluation.py index 507ffb10a..2faa8ffc7 100644 --- a/spras/evaluation.py +++ b/spras/evaluation.py @@ -161,9 +161,10 @@ def node_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], node_t @param file_paths: list of file paths of pathway reconstruction algorithm outputs @param node_table: the gold standard nodes @return: A DataFrame with the following columns: - - 'Pathway': Path object corresponding to each pathway file - - 'Precision': Precision of predicted nodes vs. gold standard nodes - - 'Recall': Recall of predicted nodes vs. gold standard nodes + + - 'Pathway': Path object corresponding to each pathway file + - 'Precision': Precision of predicted nodes vs. gold standard nodes + - 'Recall': Recall of predicted nodes vs. gold standard nodes """ y_true = set(node_table['NODEID']) results = [] @@ -194,7 +195,7 @@ def visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str | for each algorithm. @param pr_df: Dataframe of calculated precision and recall for each pathway file. - Must include a preprocessed 'Algorithm' column. + Must include a preprocessed 'Algorithm' column. @param output_file: the filename to save the precision and recall of each pathway @param output_png: the filename to plot the precision and recall of each pathway (not a PRC) @param title: The title to use for the plot @@ -311,9 +312,11 @@ def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathwa coordinates Calculates the Euclidean distance from each data point to the KDE peak, then selects the closest pathway as the representative pathway. - If there is more than one representative pathway, a tiebreaker will be used - 1) choose smallest pathway (smallest number of edges and nodes) - 2) end all be all, choose the first one based on name + If there is more than one representative pathway, a tiebreaker will be used: + + 1) choose smallest pathway (smallest number of edges and nodes) + 2) end all be all, choose the first one based on name + Returns a list of file paths for the representative pathway associated with the closest data point to the centroid. @@ -362,14 +365,18 @@ def edge_frequency_node_ensemble(node_table: pd.DataFrame, ensemble_files: Itera A list of ensemble files can contain an aggregated ensemble or algorithm-specific ensembles per dataset 1. Prepare a set of default nodes (from the interactome and gold standard) with frequency 0, - ensuring all nodes are represented in the ensemble. - - Answers "Did the algorithm(s) select the correct nodes from the entire network?" - - It measures whether the algorithm(s) can distinguish relevant gold standard nodes - from the full "universe" of possible nodes present in the input network. + ensuring all nodes are represented in the ensemble. + + - Answers "Did the algorithm(s) select the correct nodes from the entire network?" + - It measures whether the algorithm(s) can distinguish relevant gold standard nodes + from the full "universe" of possible nodes present in the input network. + 2. For each edge ensemble file: - a. Read edges and their frequencies. - b. Convert edges frequencies into node-level frequencies for Node1 and Node2. - c. Merge with the default node set and group by node, taking the maximum frequency per node. + + a. Read edges and their frequencies. + b. Convert edges frequencies into node-level frequencies for Node1 and Node2. + c. Merge with the default node set and group by node, taking the maximum frequency per node. + 3. Store the resulting node-frequency ensemble under the corresponding ensemble source (label). If the interactome or gold standard table is empty, a ValueError is raised. From 9aa43b80484d40f0d22ccbfb39ec9db9bd30e5d7 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 11:12:02 -0500 Subject: [PATCH 07/10] Cleanup htcondor docs based on more review feedback --- docs/htcondor.rst | 57 +++++++++++++++++++++++++++--- htcondor/spras.sub | 10 +++--- htcondor/spras_profile/config.yaml | 2 +- 3 files changed, 59 insertions(+), 10 deletions(-) diff --git a/docs/htcondor.rst b/docs/htcondor.rst index 98641bf86..4ba70f914 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -41,6 +41,56 @@ first is to submit all SPRAS jobs to a single remote Execution Point (EP). The second is to use the Snakemake HTCondor executor to parallelize the workflow by submitting each job to its own EP. +*********************************** + Which Files Are Used in Each Mode +*********************************** + +The ``htcondor`` directory contains several files, but not all of them +are used in both run modes. A common point of confusion is which files +apply where -- for example, ``spras.sub`` is only used when submitting +to a single EP and is ignored when running in parallel. The table below +summarizes what each file is for and which mode uses it, so you know +what to edit before submitting. + +.. list-table:: + :header-rows: 1 + :widths: 34 13 13 40 + + - - File + - Single EP + - Parallel + - Purpose + + - - ``htcondor/spras.sub`` + - ✓ + - + - HTCondor submit file that runs the entire workflow as a single + job on one EP. + + - - ``htcondor/spras.sh`` + - ✓ + - ✓ + - Wrapper script that invokes Snakemake inside the container. + Used as the executable in both modes. + + - - ``htcondor/spras_profile/config.yaml`` + - + - ✓ + - Snakemake HTCondor-executor profile defining resources and + submission settings for parallel runs. + + - - ``htcondor/snakemake_long.py`` + - + - ✓ + - Launches Snakemake as a long-running managed job so the + workflow survives terminal disconnects. + + - - ``run_htcondor.sh`` + - + - ✓ + - Convenience wrapper (in the repository root) around + ``snakemake_long.py``. + ********************************************************** Converting Docker Images to Apptainer/Singularity Images ********************************************************** @@ -97,8 +147,7 @@ Before submitting all SPRAS jobs to a single remote Execution Point the root of the repository. Once these steps are complete, you can submit the job from the root of -the the SPRAS repository by running ``condor_submit -htcondor/spras.sub``. +the SPRAS repository by running ``condor_submit htcondor/spras.sub``. When the job completes, the ``output`` directory from the workflow should be returned as ``output``. @@ -190,9 +239,9 @@ log files). flag. This can be passed to Snakemake via the ``snakemake_long.py`` script by adding it to the script's argument list, e.g.: -.. code:: bash + .. code:: bash - ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose + ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose If you use mamba instead of conda for environment management, you can specify this with the ``--env-manager`` flag: diff --git a/htcondor/spras.sub b/htcondor/spras.sub index 79e1bbd94..b48d0fb3a 100644 --- a/htcondor/spras.sub +++ b/htcondor/spras.sub @@ -21,15 +21,15 @@ SNAKEFILE = Snakefile # first # ############################################################ universe = container -#container_image = .sif -container_image = instructions-overhaul.sif -# container_image = docker://reedcompbio/spras:v0.2.0 +container_image = .sif +# container_image = docker://reedcompbio/spras:v0.6.0 ############################################################ # Specify names for log/stdout/stderr files generated by # # HTCondor. # -# NOTE: You should `mkdir logs/` before running, or the # -# spras_$(Cluster).log file won't be available. # +# NOTE: You should create the htcondor/logs/ directory # +# before running (mkdir htcondor/logs), or these files # +# won't be available. # ############################################################ log = htcondor/logs/spras_$(Cluster)_$(Process).log output = htcondor/logs/spras_$(Cluster)_$(Process).out diff --git a/htcondor/spras_profile/config.yaml b/htcondor/spras_profile/config.yaml index 1720b59c4..1b2c9e8a4 100644 --- a/htcondor/spras_profile/config.yaml +++ b/htcondor/spras_profile/config.yaml @@ -14,7 +14,7 @@ htcondor-jobdir: htcondor/logs # each other, or with the AP. shared-fs-usage: none # Distributed, heterogeneous computational environments are a wild place where strange things -# can happen. If something goes wrong, try again up to 5 times. After that, we assume there's +# can happen. If something goes wrong, try again up to 2 times. After that, we assume there's # a real error that requires user/admin intervention retries: 2 From 3ca8ec03bfabad6be3a13d14ed5ee7239eb64e43 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 13:55:28 -0500 Subject: [PATCH 08/10] Update executor installation to reflect availability on PyPI --- docs/htcondor.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/htcondor.rst b/docs/htcondor.rst index 4ba70f914..2e5810590 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -170,7 +170,7 @@ setup as the previous section, but with two additions. .. code:: bash - pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git + pip install snakemake-executor-plugin-htcondor #. Instead of editing ``spras.sub`` to define the workflow, this scenario requires editing the SPRAS profile in From 1c37c02ec003912e5077672bfd2b785d585d18e1 Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 16:12:35 -0500 Subject: [PATCH 09/10] Mention CHTC docs for building Apptainer images --- docs/htcondor.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/htcondor.rst b/docs/htcondor.rst index 2e5810590..fac841269 100644 --- a/docs/htcondor.rst +++ b/docs/htcondor.rst @@ -117,6 +117,12 @@ After running this command, a new file called ``spras-v0.6.0.sif`` will exist in the directory where the command was run. Note that the Docker image does not use a "v" in the tag. +.. tip:: + + If you're building an Apptainer image at CHTC, please follow this + guide for building images in an interactive job: + https://chtc.cs.wisc.edu/uw-research-computing/apptainer-htc.html + ************************************ Submitting All Jobs to a Single EP ************************************ From e30f7b6b93f72bca2473c00d7ab310be4a32f26b Mon Sep 17 00:00:00 2001 From: Justin Hiemstra Date: Mon, 8 Jun 2026 16:21:50 -0500 Subject: [PATCH 10/10] Add magic apptainer environment variables to htcondor spras executable These came from testing Neha's real workflow in June 2026. Not totally sure how they all work (and whether additional environment variables will need to be added in the future), but they were key to getting custom sif images to unpack alongside the jobs. --- htcondor/spras.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/htcondor/spras.sh b/htcondor/spras.sh index cdfb924c6..ce423f8a9 100755 --- a/htcondor/spras.sh +++ b/htcondor/spras.sh @@ -6,5 +6,13 @@ set -e # When .cache files are created, they need to know where HOME is to write there. # In this case, that should be the HTCondor scratch dir the job is executing in. export HOME=$(pwd) +# Various other apptainer-related environment variables that can causes problems +# if not explicitly set. These came from testing/debugging workflows on the +# OSPool. +export APPTAINER_CACHEDIR=$(pwd) +export APPTAINER_TMPDIR=$(pwd) +mkdir -p "$APPTAINER_TMPDIR" +unset SINGULARITY_BIND APPTAINER_BIND +unset SINGULARITY_TMPDIR snakemake "$@"