From 0263c0feeb088c2d7ccd7cb2221c0e14dc6e252d Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Fri, 23 Jan 2026 14:47:33 -0600
Subject: [PATCH 01/10] Add additional flags to snakemake_long for env
 management, verbose logging

I was tired of hacking around wanting verbose logging in the HTCondor
Snakemake executor, so I added some plumbing to pass Snakemake's
'--verbose' flag through 'snakemake_long.py' to snakemake itself.

Additionally, I added '--env-manager' so I could run things with my
preferred mamba env instead of conda (which is too slow to rebuild).
---
 docker-wrappers/SPRAS/snakemake_long.py | 34 ++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/docker-wrappers/SPRAS/snakemake_long.py
index 230593b94..8ad1ff537 100755
--- a/docker-wrappers/SPRAS/snakemake_long.py
+++ b/docker-wrappers/SPRAS/snakemake_long.py
@@ -30,6 +30,9 @@ def parse_args(isLocal=False):
         parser.add_argument("command", help="Helper command to run", choices=["long"])
     parser.add_argument("--snakefile", help="The Snakefile to run. If omitted, the Snakefile is assumed to be in the current directory.", required=False)
     parser.add_argument("--profile", help="A path to a directory containing the desired Snakemake profile.", required=True)
+    parser.add_argument("--verbose", help="Enable verbose output for debugging.", action="store_true", required=False)
+    parser.add_argument("--env-manager", help="The environment manager to use (conda or mamba). Default is conda.",
+                        choices=["conda", "mamba"], default="conda", required=False)
     # I'd love to change this to "logdir", but using the same name as Snakemake for consistency of feeling between this script
     # and Snakemake proper.
     parser.add_argument("--htcondor-jobdir", help="The directory Snakemake will write logs to. If omitted, a 'logs` directory will be created in the current directory", required=False)
@@ -39,19 +42,24 @@ def parse_args(isLocal=False):
 Given a Snakefile, profile, and HTCondor job directory, submit a local universe job that runs
 Snakemake from the context of the submission directory.
 """
-def submit_local(snakefile, profile, htcondor_jobdir):
+def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager="conda"):
     # Get the location of this script, which also serves as the executable for the condor job.
     script_location = pathlib.Path(__file__).resolve()
 
+    # Build arguments string, including optional flags
+    args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}"
+    if verbose:
+        args_str += " --verbose"
+
     submit_description = htcondor.Submit({
         "executable":              script_location,
         # We use the "long" command to indicate to the script that it should run the Snakemake command instead of submitting another job.
         # See comment in parse_args for more information.
-        "arguments":               f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir}",
+        "arguments":               args_str,
         "universe":                "local",
         "request_disk":            "512MB",
         "request_cpus":            1,
-        "request_memory":          512,
+        "request_memory":          "512MB",
 
         # Set up logging
         "log":                     f"{htcondor_jobdir}/snakemake.log",
@@ -100,7 +108,7 @@ def top_main():
             os.makedirs(args.htcondor_jobdir)
 
     try:
-        submit_local(args.snakefile, args.profile, args.htcondor_jobdir)
+        submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager)
     except Exception as e:
         print(f"Error: Could not submit local universe job. {e}")
         raise
@@ -108,17 +116,29 @@ def top_main():
 """
 Command to activate conda environment and run Snakemake. This is run by the local universe job, not the user.
 """
+def get_env_activation_command(env_manager, env_name="spras"):
+    """Generate the appropriate shell commands to activate the environment based on the env manager."""
+    if env_manager == "mamba":
+        # mamba uses shell hook for activation
+        return f'eval "$(mamba shell hook --shell bash)" && mamba activate {env_name}'
+    else:  # conda (default)
+        return f'source $(conda info --base)/etc/profile.d/conda.sh && conda activate {env_name}'
+
 def long_main():
     args = parse_args(True)
 
     # Note that we need to unset APPTAINER_CACHEDIR in this case but not in the local terminal case because the wrapper
     # HTCondor job has a different environment and populating this value causes Snakemake to fail when it tries to write
     # to spool (a read-only filesystem from the perspective of the EP job).
+    verbose_flag = "--verbose" if args.verbose else ""
+
+    # Get the appropriate activation command for the detected/specified env manager
+    activation_cmd = get_env_activation_command(args.env_manager)
+
     command = f"""
-    source $(conda info --base)/etc/profile.d/conda.sh && \
-    conda activate spras && \
+    {activation_cmd} && \
     unset APPTAINER_CACHEDIR && \
-    snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir}
+    snakemake -s {args.snakefile} --profile {args.profile} --htcondor-jobdir {args.htcondor_jobdir} {verbose_flag}
     """
 
     try:

From 6281e535baf010ca3a280db7bfd9509745f59264 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Fri, 23 Jan 2026 15:42:43 -0600
Subject: [PATCH 02/10] Overhaul HTCondor instructions

The executor has matured quite a bit since these instructions were
first drafted, and it's my hope that these changes remove a lot of
the headache for running jobs.

Now, you can edit config files in `config/` and use the `input/`
directory directly. Workflows should be submitted directly from the
repository root.
---
 .gitignore                                    |   9 +
 docker-wrappers/SPRAS/example_config.yaml     | 154 ----------------
 docs/htcondor.rst                             | 172 +++++++++---------
 .../SPRAS => htcondor}/snakemake_long.py      |   2 +-
 {docker-wrappers/SPRAS => htcondor}/spras.sh  |   0
 {docker-wrappers/SPRAS => htcondor}/spras.sub |  26 +--
 .../spras_profile/config.yaml                 |  13 +-
 run_htcondor.sh                               |   8 +
 8 files changed, 132 insertions(+), 252 deletions(-)
 delete mode 100644 docker-wrappers/SPRAS/example_config.yaml
 rename {docker-wrappers/SPRAS => htcondor}/snakemake_long.py (98%)
 rename {docker-wrappers/SPRAS => htcondor}/spras.sh (100%)
 rename {docker-wrappers/SPRAS => htcondor}/spras.sub (87%)
 rename {docker-wrappers/SPRAS => htcondor}/spras_profile/config.yaml (83%)
 create mode 100755 run_htcondor.sh

diff --git a/.gitignore b/.gitignore
index 3629c49c6..91933def7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -144,3 +144,12 @@ TempMat.mat
 
 # Singularity cache
 unpacked
+
+# HTCondor logs
+htcondor/logs/
+*.err
+*.out
+*.log
+
+# Any sif files
+*.sif
diff --git a/docker-wrappers/SPRAS/example_config.yaml b/docker-wrappers/SPRAS/example_config.yaml
deleted file mode 100644
index 1e7fd69c2..000000000
--- a/docker-wrappers/SPRAS/example_config.yaml
+++ /dev/null
@@ -1,154 +0,0 @@
-# Global workflow control
-
-# The length of the hash used to identify a parameter combination
-hash_length: 7
-
-containers:
-  # Specify the container framework used by each PRM wrapper. Valid options include:
-  # - docker (default if not specified)
-  # - singularity OR apptainer -- Apptainer (formerly Singularity) is useful in HPC/HTC environments where docker isn't allowed
-  # - dsub -- experimental with limited support, used for running on Google Cloud
-  framework: singularity
-
-  # Only used if framework is set to singularity/apptainer, this will unpack the containers
-  # to the local filesystem. This is useful when PRM containers need to run inside another container,
-  # such as would be the case in an HTCondor/OSPool environment.
-  # NOTE: This unpacks containers to the local filesystem, which will take up space in a way
-  # that persists after the workflow is complete. To clean up the unpacked containers, the user must
-  # manually delete them. For convenience, these unpacked files will exist in the current working directory
-  # under `unpacked`.
-  # Here, we unpack it since we're running on HTCondor.
-  unpack_singularity: true
-
-  # Allow the user to configure which container registry containers should be pulled from
-  # Note that this assumes container names are consistent across registries, and that the
-  # registry being passed doesn't require authentication for pull actions
-  registry:
-    base_url: docker.io
-    # The owner or project of the registry
-    # For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
-    owner: reedcompbio
-
-# This list of algorithms should be generated by a script which checks the filesystem for installs.
-# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
-# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved)
-# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change
-# which algorithms are run in a given experiment.
-#
-# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple
-# parameters are specified then the algorithm will be run as many times as needed to cover all parameter
-# combinations. For instance if we have the following:
-# - name: "myAlg"
-#   params:
-#         include: true
-#         a: [1,2]
-#         b: [0.5,0.75]
-#
-# then myAlg will be run on (a=1,b=0.5),(a=1,b=0.75),(a=2,b=0.5), and (a=2,b=0,75). Pretty neat, but be
-# careful: too many parameters might make your runs take a long time.
-
-algorithms:
-  - name: "pathlinker"
-    include: false
-    runs:
-      run1:
-        k: range(100,201,100)
-
-  - name: "omicsintegrator1"
-    include: true
-    runs:
-      run1:
-        r: [5]
-        b: [5, 6]
-        w: np.linspace(0,5,2)
-        g: [3]
-        d: [10]
-
-  - name: "omicsintegrator2"
-    include: true
-    runs:
-      run1:
-        b: [4]
-        g: [0]
-      run2:
-        b: [2]
-        g: [3]
-
-  - name: "meo"
-    include: true
-    runs:
-      run1:
-        max_path_length: [3]
-        local_search: [true]
-        rand_restarts: [10]
-
-  - name: "mincostflow"
-    include: true
-    runs:
-      run1:
-        flow: [1] # The flow must be an int
-        capacity: [1]
-
-  - name: "allpairs"
-    include: true
-
-  - name: "domino"
-    include: true
-    runs:
-      run1:
-        slice_threshold: [0.3]
-        module_threshold: [0.05]
-
-# Here we specify which pathways to run and other file location information.
-# DataLoader.py can currently only load a single dataset
-# Assume that if a dataset label does not change, the lists of associated input files do not change
-datasets:
-  - label: data0
-    node_files: ["node-prizes.txt", "sources.txt", "targets.txt"]
-    # DataLoader.py can currently only load a single edge file, which is the primary network
-    edge_files: ["network.txt"]
-    # Placeholder
-    other_files: []
-    # Relative path from the spras directory
-    data_dir: "input"
-#   - label: data1
-#     # Reuse some of the same sources file as 'data0' but different network and targets
-#     node_files: ["node-prizes.txt", "sources.txt", "alternative-targets.txt"]
-#     edge_files: ["alternative-network.txt"]
-#     other_files: []
-#     # Relative path from the spras directory
-#     data_dir: "input"
-
-# If we want to reconstruct then we should set run to true.
-# TODO: if include is true above but run is false here, algs are not run.
-# is this the behavior we want?
-reconstruction_settings:
-
-  #set where everything is saved
-  locations:
-
-    #place the save path here
-    # TODO move to global
-    reconstruction_dir: "output"
-
-analysis:
-  # Create one summary per pathway file and a single summary table for all pathways for each dataset
-  summary:
-    include: true
-  # Create Cytoscape session file with all pathway graphs for each dataset
-  cytoscape:
-    include: false
-  # Machine learning analysis (e.g. clustering) of the pathway output files for each dataset
-  ml:
-    include: true
-    # specify how many principal components to calculate
-    components: 2
-    # boolean to show the labels on the pca graph
-    labels: true
-    # 'ward', 'complete', 'average', 'single'
-    # if linkage: ward, must use metric: euclidean
-    linkage: 'ward'
-    # 'euclidean', 'manhattan', 'cosine'
-    metric: 'euclidean'
-  evaluation:
-    include: false
diff --git a/docs/htcondor.rst b/docs/htcondor.rst
index f5a0f92e0..f955d6e77 100644
--- a/docs/htcondor.rst
+++ b/docs/htcondor.rst
@@ -2,10 +2,9 @@
  Running with HTCondor
 #######################
 
-The folder `docker-wrappers/SPRAS
-<https://github.com/Reed-CompBio/spras/tree/main/docker-wrappers/SPRAS>`_
-inside the SPRAS git repository contains several files that can be used
-to run workflows with this container on HTCondor. To use the ``spras``
+The folder `htcondor/ <https://github.com/Reed-CompBio/spras/tree/main/htcondor>`_
+inside the SPRAS git repository contains several files that can be used to
+run workflows with this container on HTCondor. To use the ``spras``
 image in this environment, first login to an HTCondor Access Point (AP).
 Then, from the AP clone this repo:
 
@@ -67,65 +66,55 @@ image does not use a "v" in the tag.
  Submitting All Jobs to a Single EP
 ************************************
 
-Navigate to the ``spras/docker-wrappers/SPRAS`` directory and create the
-``logs/`` directory (``mkdir logs``). Next, modify ``spras.sub`` so that
-it uses the SPRAS apptainer image you created:
-
-.. code::
-
-   container_image = < your spras image >.sif
-
-Make sure to modify the configuration file to have
-``unpack_singularity`` set to ``true``, and ``containers.framework`` set
-to ``singularity``: else, the workflow will (likely) fail.
-
-Then run ``condor_submit spras.sub``, which will submit SPRAS to
-HTCondor as a single job with as many cores as indicated by the
-``NUM_PROCS`` line in ``spras.sub``, using the value of
-``EXAMPLE_CONFIG`` as the SPRAS configuration file. By default, the
-``example_config.yaml`` runs everything except for ``cytoscape``, which
-appears to fail periodically in HTCondor.
-
-**Note**: The ``spras.sub`` submit file is an example of how this
-workflow could be submitted from a CHTC Access Point (AP) to the OSPool.
-To run in the local CHTC pool, omit the ``+WantGlideIn`` and
-``requirements`` lines.
+Running all SPRAS steps on a single remote Execution Point (EP) is a good way
+to get started with HTCondor, but it is significantly less efficient than using
+HTCondor's distributed capabilities. This approach is best suited for
+workflows that are not computationally intensive, or for testing and
+debugging purposes.
+
+Before submitting all SPRAS jobs to a single remote Execution Point (EP),
+you'll need to set up three things:
+1. You'll need to modify ``htcondor/spras.sub`` to point at your container
+   image, along with any other configuration changes you want to make like
+   choosing a logging directory or toggling OSPool submission. Note that all
+   paths in the submit file are relative to the directory from which you run
+   ``condor_submit``, which will typically be the root of the SPRAS repository.
+2. You'll need to ensure your SPRAS configuration file has a few key values
+   set, including ``unpack_singularity: true`` and
+   ``containers.framework: singularity``.
+3. Finally, it's best practice to create the logging directory configured in
+   the submit file before submitting the job, e.g. to create the default log
+   directory, run ``mkdir htcondor/logs`` from the root of the repository.
+
+Once these steps are complete, you can submit the job from the root of the
+the SPRAS repository by running ``condor_submit htcondor/spras.sub``.
+
+When the job completes, the ``output`` directory from the workflow should be
+returned as ``output``.
 
 **************************
  Submitting Parallel Jobs
 **************************
 
-Parallelizing SPRAS workflows with HTCondor requires the same setup as
-the previous section, but with two additions. First, it requires an
-activated SPRAS conda environment with a ``pip install``-ed version of
-the SPRAS module (via ``pip install .`` inside the SPRAS directory).
-
-Second, it requires an experimental executor for HTCondor that has been
-forked from the upstream `HTCondor Snakemake executor
-<https://github.com/htcondor/snakemake-executor-plugin-htcondor>`__.
-
-After activating your ``spras`` conda environment and ``pip``-installing
-SPRAS, you can install the HTCondor Snakemake executor with the
-following:
+Parallelizing SPRAS workflows with HTCondor requires much of the same setup
+as the previous section, but with two additions. 
+1. Build/activate the SPRAS conda/mamba environment and ``pip install`` the SPRAS module
+   (via ``pip install .`` inside the SPRAS directory).
+2. Install the `HTCondor Snakemake
+executor <https://github.com/htcondor/snakemake-executor-plugin-htcondor>`__; once your
+   SPRAS conda/mamba environment is activated and SPRAS is ``pip install``-ed,
+   you can install the HTCondor Snakemake executor with the following:
 
 .. code:: bash
 
    pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git
 
-Currently, this executor requires that all input to the workflow is
-scoped to the current working directory. Therefore, you'll need to copy
-the Snakefile and your input directory (as specified by
-``example_config.yaml``) to this directory:
-
-.. code:: bash
-
-   cp ../../Snakefile . && \
-   cp -r ../../input .
-
-Instead of editing ``spras.sub`` to define the workflow, this scenario
-requires editing the SPRAS profile in ``spras_profile/config.yaml``.
-Make sure you specify the correct container, and change any other config
-values needed by your workflow (defaults are fine in most cases).
+3. Instead of editing ``spras.sub`` to define the workflow, this scenario
+   requires editing the SPRAS profile in ``htcondor/spras_profile/config.yaml``.
+   Make sure you specify the correct container, and change any other config
+   values needed by your workflow (defaults are fine in most cases).
+4. Modify your SPRAS configuration file to set ``unpack_singularity: true`` and
+   ``containers.framework: singularity``.
 
 Then, to start the workflow with HTCondor in the CHTC pool, there are
 two options:
@@ -137,11 +126,16 @@ The first option is to run Snakemake in a way that ties its execution to
 your terminal. This is good for testing short workflows and running
 short jobs. The downside is that closing your terminal causes the
 process to exit, removing any unfinished jobs. To use this option,
-invoke Snakemake directly by running:
+invoke Snakemake directly from the repository root by running:
 
 .. code:: bash
 
-   snakemake --profile spras_profile
+   snakemake --profile htcondor/spras_profile/
+
+**Note**: Running the workflow in this way requires that your terminal
+session stays active. Closing the terminal will suspend ongoing jobs, but
+Snakemake will handle picking up where any previously-completed jobs left off
+when you restart the workflow.
 
 Long Running Snakemake Jobs (Managed by HTCondor)
 =================================================
@@ -149,31 +143,53 @@ Long Running Snakemake Jobs (Managed by HTCondor)
 The second option is to let HTCondor manage the Snakemake process, which
 allows the jobs to run as long as needed. Instead of seeing Snakemake
 output directly in your terminal, you'll be able to see it in a
-specified log file. To use this option, make sure ``snakemake_long.py``
-is executable (you can run ``chmod +x snakemake_long.py`` from the AP to
-make sure it is), and then run:
+specified log file. To use this option, run from the repository root:
 
-.. code::
+.. code:: bash
+
+   ./htcondor/snakemake_long.py --profile htcondor/spras_profile/
 
-   ./snakemake_long.py --profile spras_profile --htcondor-jobdir <path/to/logging/directory>
+A convenience script called ``run_htcondor.sh`` is also provided in the
+repository root. You can execute this script by running:
+
+.. code:: bash
 
-When run in this mode, all log files for the workflow will be placed
-into the path you provided for the logging directory. In particular,
-Snakemake's outputs with job progress can be found split between
-``<logdir>/snakemake-long.err`` and ``<logdir>/snakemake-long.out``.
+   ./run_htcondor.sh
+
+When executed in this mode, all log files for the workflow will be placed
+into the logging directory (``htcondor/logs`` by default). In particular,
+Snakemake's stdout/stderr outputs containing your workflow's progress can
+be found split between ``htcondor/logs/snakemake.err`` and ``htcondor/logs/snakemake.out``.
 These will also log each rule and what HTCondor job ID was submitted for
 that rule (see the `troubleshooting section <#troubleshooting>`__ for
 information on how to use these extra log files).
 
+**Note**: While you're in the initial stages of developing/debugging your
+workflow, it's very useful to invoke Snakemake with the ``--verbose`` flag.
+This can be passed to Snakemake via the ``snakemake_long.py`` script by
+adding it to the script's argument list, e.g.:
+
+.. code:: bash
+
+   ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose
+
+If you use mamba instead of conda for environment management, you can specify
+this with the ``--env-manager`` flag:
+
+.. code:: bash
+
+   ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --env-manager mamba
+
 *********************
  Adjusting Resources
 *********************
 
 Resource requirements can be adjusted as needed in
-``spras_profile/config.yaml``, and HTCondor logs for this workflow can
-be found in ``.snakemake/htcondor``. You can set a different log
-directory by adding ``htcondor-jobdir: /path/to/dir`` to the profile's
-configuration.
+``htcondor/spras_profile/config.yaml``, and HTCondor logs for this workflow
+can be found in your log directory. You can set a different log
+directory by changing the configured ``htcondor-jobdir`` in the profile's
+configuration. Alternatively, you can pass a different log directory
+when invoking Snakemake with the ``--htcondor-jobdir`` argument.
 
 To run this same workflow in the OSPool, add the following to the
 profile's default-resources block:
@@ -184,14 +200,9 @@ profile's default-resources block:
    requirements: |
      '(HAS_SINGULARITY == True) && (Poolname =!= "CHTC")'
 
-**Note**: This workflow requires that the terminal session responsible
-for running snakemake stays active. Closing the terminal will suspend
-jobs, but the workflow can use Snakemake's checkpointing to pick up any
-jobs where they left off.
-
-**Note**: If you encounter an error that says ``No module named
-'spras'``, make sure you've ``pip install``-ed the SPRAS module into
-your conda environment.
+**Note**: If you encounter an error that says
+``No module named 'spras'``, make sure you've ``pip install``-ed the
+SPRAS module into your conda environment.
 
 ****************
  Job Monitoring
@@ -202,11 +213,10 @@ To monitor the state of the job, you can use a second terminal to run
 ``condor_watch_q`` for realtime updates.
 
 Upon completion, the ``output`` directory from the workflow should be
-returned as ``spras/docker-wrappers/SPRAS/output``, along with several
-files containing the workflow's logging information (anything that
-matches ``logs/spras_*`` and ending in ``.out``, ``.err``, or ``.log``).
-If the job was unsuccessful, these files should contain useful debugging
-clues about what may have gone wrong.
+returned as ``output``, along with several files containing the workflow's
+logging information (anything that matches ``htcondor/logs/spras_*`` and
+ending in ``.out``, ``.err``, or ``.log``). If the job was unsuccessful,
+these files should contain useful debugging clues about what may have gone wrong.
 
 **Note**: If you want to run the workflow with a different version of
 SPRAS, or one that contains development updates you've made, rebuild
diff --git a/docker-wrappers/SPRAS/snakemake_long.py b/htcondor/snakemake_long.py
similarity index 98%
rename from docker-wrappers/SPRAS/snakemake_long.py
rename to htcondor/snakemake_long.py
index 8ad1ff537..0f30a4439 100755
--- a/docker-wrappers/SPRAS/snakemake_long.py
+++ b/htcondor/snakemake_long.py
@@ -100,7 +100,7 @@ def top_main():
 
     # Make sure we have a value for the log directory and that the directory exists.
     if args.htcondor_jobdir is None:
-        args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "snakemake-long-logs"
+        args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs"
         if not os.path.exists(args.htcondor_jobdir):
             os.makedirs(args.htcondor_jobdir)
     else:
diff --git a/docker-wrappers/SPRAS/spras.sh b/htcondor/spras.sh
similarity index 100%
rename from docker-wrappers/SPRAS/spras.sh
rename to htcondor/spras.sh
diff --git a/docker-wrappers/SPRAS/spras.sub b/htcondor/spras.sub
similarity index 87%
rename from docker-wrappers/SPRAS/spras.sub
rename to htcondor/spras.sub
index 9dd1c4abc..79e1bbd94 100644
--- a/docker-wrappers/SPRAS/spras.sub
+++ b/htcondor/spras.sub
@@ -5,11 +5,11 @@
 ############################################################
 #  Define a few macros we use throughout the submit file   #
 ############################################################
-CONFIG_FILE = example_config.yaml
+CONFIG_FILE = config/config.yaml
 NUM_PROCS = 4
 # Paths to input data and Snakefile.
-INPUT_DIR = ../../input
-SNAKEFILE = ../../Snakefile
+INPUT_DIR = input
+SNAKEFILE = Snakefile
 
 ############################################################
 # Specify that the workflow should run in the SPRAS        #
@@ -21,8 +21,9 @@ SNAKEFILE = ../../Snakefile
 # first                                                    #
 ############################################################
 universe = container
-container_image = <your spras image>.sif
-# container_image = docker://reedcompbio/spras:v0.6.0
+#container_image = <your spras image>.sif
+container_image = instructions-overhaul.sif
+# container_image = docker://reedcompbio/spras:v0.2.0
 
 ############################################################
 # Specify names for log/stdout/stderr files generated by   #
@@ -30,15 +31,15 @@ container_image = <your spras image>.sif
 # NOTE: You should `mkdir logs/` before running, or the    #
 # spras_$(Cluster).log file won't be available.            #
 ############################################################
-log = logs/spras_$(Cluster)_$(Process).log
-output = logs/spras_$(Cluster)_$(Process).out
-error = logs/spras_$(Cluster)_$(Process).err
+log = htcondor/logs/spras_$(Cluster)_$(Process).log
+output = htcondor/logs/spras_$(Cluster)_$(Process).out
+error = htcondor/logs/spras_$(Cluster)_$(Process).err
 
 ############################################################
 # Specify the script to run inside the container. This is  #
 # simply a wrapper on the Snakefile.                       #
 ############################################################
-executable = spras.sh
+executable = htcondor/spras.sh
 arguments = "--cores $(NUM_PROCS) --configfile $(CONFIG_FILE) --retries 3"
 
 ############################################################
@@ -49,6 +50,7 @@ when_to_transfer_output = ON_EXIT
 transfer_input_files = $(CONFIG_FILE), $(INPUT_DIR), $(SNAKEFILE)
 # The output directory should match whatever you configure in your configfile.
 transfer_output_files = output
+preserve_relative_paths = true
 
 ############################################################
 # System specifications. Be sure to request enough disk to #
@@ -70,7 +72,7 @@ JobBatchName = "SPRAS-workflow-OSPool"
 # needed if running from CHTC. If running from an OSPool   #
 # AP, omit this line.                                      #
 ############################################################
-+WantGlideIn = true 
+# +WantGlideIn = true 
 
 ############################################################
 # Not all Execution Points in the OSPool will have         #
@@ -81,7 +83,7 @@ JobBatchName = "SPRAS-workflow-OSPool"
 # this submit file from CHTC, we also need a requirement   #
 # to prevent landing on a CHTC Execution Point.            #
 ############################################################
-requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC")
+# requirements = (HAS_SINGULARITY == True) && (Poolname =!= "CHTC")
 
 # Queue the job
-queue 1
\ No newline at end of file
+queue 1
diff --git a/docker-wrappers/SPRAS/spras_profile/config.yaml b/htcondor/spras_profile/config.yaml
similarity index 83%
rename from docker-wrappers/SPRAS/spras_profile/config.yaml
rename to htcondor/spras_profile/config.yaml
index 5cc0697d0..1720b59c4 100644
--- a/docker-wrappers/SPRAS/spras_profile/config.yaml
+++ b/htcondor/spras_profile/config.yaml
@@ -1,26 +1,31 @@
 # Default configuration for the SPRAS/HTCondor executor profile. Each of these values
 # can also be passed via command line flags, e.g. `--jobs 30 --executor htcondor`.
 
+# NOTE: File paths in here should be relative to where you submit from, typically the
+# root of the SPRAS repository
+
 # 'jobs' specifies the maximum number of HTCondor jobs that can be in the queue at once.
 jobs: 30
 executor: htcondor
-configfile: example_config.yaml
+configfile: config/config.yaml
+htcondor-jobdir: htcondor/logs
+
 # Indicate to the plugin that jobs running on various EPs do not share a filesystem with
 # each other, or with the AP.
 shared-fs-usage: none
 # Distributed, heterogeneous computational environments are a wild place where strange things
 # can happen. If something goes wrong, try again up to 5 times. After that, we assume there's
 # a real error that requires user/admin intervention
-retries: 5
+retries: 2
 
 # Default resources will apply to all workflow steps. If a single workflow step fails due
 # to insufficient resources, it can be re-run with modified values. Snakemake will handle
 # picking up where it left off, and won't re-run steps that have already completed.
 default-resources:
-  job_wrapper: "spras.sh"
+  job_wrapper: "htcondor/spras.sh"
   # If running in CHTC, this only works with apptainer images
   # Note requirement for quotes around the image name
-  container_image: "'spras-v0.6.0.sif'"
+  container_image: "spras-v0.6.0.sif"
   universe: "container"
   # The value for request_disk should be large enough to accommodate the runtime container
   # image, any additional PRM container images, and your input data.
diff --git a/run_htcondor.sh b/run_htcondor.sh
new file mode 100755
index 000000000..8ca2d86e0
--- /dev/null
+++ b/run_htcondor.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+# Example helper script to submit a SPRAS workflow to HTCondor with full parallelization
+#
+# Note that for full runs after any initial debugging, you may wish to remove the `--verbose`
+# flag, as this significantly increases the size of log files
+
+./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose

From 3fad221c1735bde69d8dc45c220aebcb763001aa Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <75916364+jhiemstrawisc@users.noreply.github.com>
Date: Tue, 27 Jan 2026 11:31:21 -0600
Subject: [PATCH 03/10] Update run_htcondor.sh

Co-authored-by: Tristan F.-R. <pub.tristanf@gmail.com>
---
 run_htcondor.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/run_htcondor.sh b/run_htcondor.sh
index 8ca2d86e0..adf25e54d 100755
--- a/run_htcondor.sh
+++ b/run_htcondor.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 # Example helper script to submit a SPRAS workflow to HTCondor with full parallelization
 #

From 811625a087203997d2f8f8fd1e5584162529e3e6 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Tue, 27 Jan 2026 12:34:16 -0600
Subject: [PATCH 04/10] Address review feedback

---
 htcondor/snakemake_long.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/htcondor/snakemake_long.py b/htcondor/snakemake_long.py
index 0f30a4439..50848ce27 100755
--- a/htcondor/snakemake_long.py
+++ b/htcondor/snakemake_long.py
@@ -9,6 +9,7 @@
 import argparse
 import os
 import pathlib
+import shlex
 import subprocess
 import sys
 import time
@@ -47,9 +48,10 @@ def submit_local(snakefile, profile, htcondor_jobdir, verbose=False, env_manager
     script_location = pathlib.Path(__file__).resolve()
 
     # Build arguments string, including optional flags
-    args_str = f"long --snakefile {snakefile} --profile {profile} --htcondor-jobdir {htcondor_jobdir} --env-manager {env_manager}"
+    args_list = ["long", "--snakefile", snakefile, "--profile", profile, "--htcondor-jobdir", htcondor_jobdir, "--env-manager", env_manager]
     if verbose:
-        args_str += " --verbose"
+        args_list.append("--verbose")
+    args_str = " ".join(shlex.quote(str(arg)) for arg in args_list)
 
     submit_description = htcondor.Submit({
         "executable":              script_location,
@@ -101,11 +103,7 @@ def top_main():
     # Make sure we have a value for the log directory and that the directory exists.
     if args.htcondor_jobdir is None:
         args.htcondor_jobdir = pathlib.Path(os.getcwd()) / "htcondor" / "logs"
-        if not os.path.exists(args.htcondor_jobdir):
-            os.makedirs(args.htcondor_jobdir)
-    else:
-        if not os.path.exists(args.htcondor_jobdir):
-            os.makedirs(args.htcondor_jobdir)
+    args.htcondor_jobdir.mkdir(exist_ok=True)
 
     try:
         submit_local(args.snakefile, args.profile, args.htcondor_jobdir, args.verbose, args.env_manager)

From 445c8c9f0a8e020268df112a4662bc70f200bb2d Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 10:45:37 -0500
Subject: [PATCH 05/10] Fix htcondor.rst formatting, post rebase

---
 docs/htcondor.rst | 221 ++++++++++++++++++++++++++--------------------
 1 file changed, 124 insertions(+), 97 deletions(-)

diff --git a/docs/htcondor.rst b/docs/htcondor.rst
index f955d6e77..98641bf86 100644
--- a/docs/htcondor.rst
+++ b/docs/htcondor.rst
@@ -2,34 +2,39 @@
  Running with HTCondor
 #######################
 
-The folder `htcondor/ <https://github.com/Reed-CompBio/spras/tree/main/htcondor>`_
-inside the SPRAS git repository contains several files that can be used to
-run workflows with this container on HTCondor. To use the ``spras``
-image in this environment, first login to an HTCondor Access Point (AP).
-Then, from the AP clone this repo:
+The folder `htcondor/
+<https://github.com/Reed-CompBio/spras/tree/main/htcondor>`_ inside the
+SPRAS git repository contains several files that can be used to run
+workflows with this container on HTCondor. To use the ``spras`` image in
+this environment, first login to an HTCondor Access Point (AP). Then,
+from the AP clone this repo:
 
 .. code:: bash
 
    git clone https://github.com/Reed-CompBio/spras.git
 
-**Note:** To work with SPRAS in HTCondor, it is recommended that you
-build an Apptainer image instead of using Docker. See `Converting Docker
-Images to Apptainer/Singularity Images`_ for instructions. Importantly,
-the Apptainer image must be built for the linux/amd64 architecture. Most
-HTCondor APs will have ``apptainer`` installed, but they may not have
-``docker``. If this is the case, you can build the image with Docker on
-your local machine, push the image to Docker Hub, and then convert it to
-Apptainer's ``sif`` format on the AP.
-
-**Note:** It is best practice to make sure that the Snakefile you copy
-for your workflow is the same version as the Snakefile baked into your
-workflow's container image. When this workflow runs, the Snakefile you
-just copied will be used during remote execution instead of the
-Snakefile from the container. As a result, difficult-to-diagnose
-versioning issues may occur if the version of SPRAS in the remote
-container doesn't support the Snakefile on your current branch. The
-safest bet is always to create your own image so you always know what's
-inside of it.
+.. tip::
+
+   To work with SPRAS in HTCondor, it is recommended that you build an
+   Apptainer image instead of using Docker. See `Converting Docker
+   Images to Apptainer/Singularity Images`_ for instructions.
+   Importantly, the Apptainer image must be built for the linux/amd64
+   architecture. Most HTCondor APs will have ``apptainer`` installed,
+   but they may not have ``docker``. If this is the case, you can build
+   the image with Docker on your local machine, push the image to Docker
+   Hub, and then convert it to Apptainer's ``sif`` format on the AP.
+
+.. tip::
+
+   It is best practice to make sure that the Snakefile you copy for your
+   workflow is the same version as the Snakefile baked into your
+   workflow's container image. When this workflow runs, the Snakefile
+   you just copied will be used during remote execution instead of the
+   Snakefile from the container. As a result, difficult-to-diagnose
+   versioning issues may occur if the version of SPRAS in the remote
+   container doesn't support the Snakefile on your current branch. The
+   safest bet is always to create your own image so you always know
+   what's inside of it.
 
 There are currently two options for running SPRAS with HTCondor. The
 first is to submit all SPRAS jobs to a single remote Execution Point
@@ -66,55 +71,66 @@ image does not use a "v" in the tag.
  Submitting All Jobs to a Single EP
 ************************************
 
-Running all SPRAS steps on a single remote Execution Point (EP) is a good way
-to get started with HTCondor, but it is significantly less efficient than using
-HTCondor's distributed capabilities. This approach is best suited for
-workflows that are not computationally intensive, or for testing and
-debugging purposes.
-
-Before submitting all SPRAS jobs to a single remote Execution Point (EP),
-you'll need to set up three things:
-1. You'll need to modify ``htcondor/spras.sub`` to point at your container
-   image, along with any other configuration changes you want to make like
-   choosing a logging directory or toggling OSPool submission. Note that all
-   paths in the submit file are relative to the directory from which you run
-   ``condor_submit``, which will typically be the root of the SPRAS repository.
-2. You'll need to ensure your SPRAS configuration file has a few key values
-   set, including ``unpack_singularity: true`` and
+Running all SPRAS steps on a single remote Execution Point (EP) is a
+good way to get started with HTCondor, but it is significantly less
+efficient than using HTCondor's distributed capabilities. This approach
+is best suited for workflows that are not computationally intensive, or
+for testing and debugging purposes.
+
+Before submitting all SPRAS jobs to a single remote Execution Point
+(EP), you'll need to set up three things:
+
+#. You'll need to modify ``htcondor/spras.sub`` to point at your
+   container image, along with any other configuration changes you want
+   to make like choosing a logging directory or toggling OSPool
+   submission. Note that all paths in the submit file are relative to
+   the directory from which you run ``condor_submit``, which will
+   typically be the root of the SPRAS repository.
+
+#. You'll need to ensure your SPRAS configuration file has a few key
+   values set, including ``unpack_singularity: true`` and
    ``containers.framework: singularity``.
-3. Finally, it's best practice to create the logging directory configured in
-   the submit file before submitting the job, e.g. to create the default log
-   directory, run ``mkdir htcondor/logs`` from the root of the repository.
 
-Once these steps are complete, you can submit the job from the root of the
-the SPRAS repository by running ``condor_submit htcondor/spras.sub``.
+#. Finally, it's best practice to create the logging directory
+   configured in the submit file before submitting the job, e.g. to
+   create the default log directory, run ``mkdir htcondor/logs`` from
+   the root of the repository.
 
-When the job completes, the ``output`` directory from the workflow should be
-returned as ``output``.
+Once these steps are complete, you can submit the job from the root of
+the the SPRAS repository by running ``condor_submit
+htcondor/spras.sub``.
+
+When the job completes, the ``output`` directory from the workflow
+should be returned as ``output``.
 
 **************************
  Submitting Parallel Jobs
 **************************
 
-Parallelizing SPRAS workflows with HTCondor requires much of the same setup
-as the previous section, but with two additions. 
-1. Build/activate the SPRAS conda/mamba environment and ``pip install`` the SPRAS module
-   (via ``pip install .`` inside the SPRAS directory).
-2. Install the `HTCondor Snakemake
-executor <https://github.com/htcondor/snakemake-executor-plugin-htcondor>`__; once your
-   SPRAS conda/mamba environment is activated and SPRAS is ``pip install``-ed,
-   you can install the HTCondor Snakemake executor with the following:
+Parallelizing SPRAS workflows with HTCondor requires much of the same
+setup as the previous section, but with two additions.
 
-.. code:: bash
+#. Build/activate the SPRAS conda/mamba environment and ``pip install``
+   the SPRAS module (via ``pip install .`` inside the SPRAS directory).
 
-   pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git
+#. Install the `HTCondor Snakemake executor
+   <https://github.com/htcondor/snakemake-executor-plugin-htcondor>`__;
+   once your SPRAS conda/mamba environment is activated and SPRAS is
+   ``pip install``-ed, you can install the HTCondor Snakemake executor
+   with the following:
 
-3. Instead of editing ``spras.sub`` to define the workflow, this scenario
-   requires editing the SPRAS profile in ``htcondor/spras_profile/config.yaml``.
-   Make sure you specify the correct container, and change any other config
-   values needed by your workflow (defaults are fine in most cases).
-4. Modify your SPRAS configuration file to set ``unpack_singularity: true`` and
-   ``containers.framework: singularity``.
+   .. code:: bash
+
+      pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git
+
+#. Instead of editing ``spras.sub`` to define the workflow, this
+   scenario requires editing the SPRAS profile in
+   ``htcondor/spras_profile/config.yaml``. Make sure you specify the
+   correct container, and change any other config values needed by your
+   workflow (defaults are fine in most cases).
+
+#. Modify your SPRAS configuration file to set ``unpack_singularity:
+   true`` and ``containers.framework: singularity``.
 
 Then, to start the workflow with HTCondor in the CHTC pool, there are
 two options:
@@ -132,10 +148,12 @@ invoke Snakemake directly from the repository root by running:
 
    snakemake --profile htcondor/spras_profile/
 
-**Note**: Running the workflow in this way requires that your terminal
-session stays active. Closing the terminal will suspend ongoing jobs, but
-Snakemake will handle picking up where any previously-completed jobs left off
-when you restart the workflow.
+.. tip::
+
+   Running the workflow in this way requires that your terminal session
+   stays active. Closing the terminal will suspend ongoing jobs, but
+   Snakemake will handle picking up where any previously-completed jobs
+   left off when you restart the workflow.
 
 Long Running Snakemake Jobs (Managed by HTCondor)
 =================================================
@@ -156,25 +174,28 @@ repository root. You can execute this script by running:
 
    ./run_htcondor.sh
 
-When executed in this mode, all log files for the workflow will be placed
-into the logging directory (``htcondor/logs`` by default). In particular,
-Snakemake's stdout/stderr outputs containing your workflow's progress can
-be found split between ``htcondor/logs/snakemake.err`` and ``htcondor/logs/snakemake.out``.
-These will also log each rule and what HTCondor job ID was submitted for
-that rule (see the `troubleshooting section <#troubleshooting>`__ for
-information on how to use these extra log files).
+When executed in this mode, all log files for the workflow will be
+placed into the logging directory (``htcondor/logs`` by default). In
+particular, Snakemake's stdout/stderr outputs containing your workflow's
+progress can be found split between ``htcondor/logs/snakemake.err`` and
+``htcondor/logs/snakemake.out``. These will also log each rule and what
+HTCondor job ID was submitted for that rule (see the `troubleshooting
+section <#troubleshooting>`__ for information on how to use these extra
+log files).
+
+.. tip::
 
-**Note**: While you're in the initial stages of developing/debugging your
-workflow, it's very useful to invoke Snakemake with the ``--verbose`` flag.
-This can be passed to Snakemake via the ``snakemake_long.py`` script by
-adding it to the script's argument list, e.g.:
+   While you're in the initial stages of developing/debugging your
+   workflow, it's very useful to invoke Snakemake with the ``--verbose``
+   flag. This can be passed to Snakemake via the ``snakemake_long.py``
+   script by adding it to the script's argument list, e.g.:
 
 .. code:: bash
 
    ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose
 
-If you use mamba instead of conda for environment management, you can specify
-this with the ``--env-manager`` flag:
+If you use mamba instead of conda for environment management, you can
+specify this with the ``--env-manager`` flag:
 
 .. code:: bash
 
@@ -185,11 +206,12 @@ this with the ``--env-manager`` flag:
 *********************
 
 Resource requirements can be adjusted as needed in
-``htcondor/spras_profile/config.yaml``, and HTCondor logs for this workflow
-can be found in your log directory. You can set a different log
-directory by changing the configured ``htcondor-jobdir`` in the profile's
-configuration. Alternatively, you can pass a different log directory
-when invoking Snakemake with the ``--htcondor-jobdir`` argument.
+``htcondor/spras_profile/config.yaml``, and HTCondor logs for this
+workflow can be found in your log directory. You can set a different log
+directory by changing the configured ``htcondor-jobdir`` in the
+profile's configuration. Alternatively, you can pass a different log
+directory when invoking Snakemake with the ``--htcondor-jobdir``
+argument.
 
 To run this same workflow in the OSPool, add the following to the
 profile's default-resources block:
@@ -200,9 +222,11 @@ profile's default-resources block:
    requirements: |
      '(HAS_SINGULARITY == True) && (Poolname =!= "CHTC")'
 
-**Note**: If you encounter an error that says
-``No module named 'spras'``, make sure you've ``pip install``-ed the
-SPRAS module into your conda environment.
+.. tip::
+
+   If you encounter an error that says ``No module named 'spras'``, make
+   sure you've ``pip install``-ed the SPRAS module into your conda
+   environment.
 
 ****************
  Job Monitoring
@@ -213,17 +237,20 @@ To monitor the state of the job, you can use a second terminal to run
 ``condor_watch_q`` for realtime updates.
 
 Upon completion, the ``output`` directory from the workflow should be
-returned as ``output``, along with several files containing the workflow's
-logging information (anything that matches ``htcondor/logs/spras_*`` and
-ending in ``.out``, ``.err``, or ``.log``). If the job was unsuccessful,
-these files should contain useful debugging clues about what may have gone wrong.
-
-**Note**: If you want to run the workflow with a different version of
-SPRAS, or one that contains development updates you've made, rebuild
-this image against the version of SPRAS you want to test, and push the
-image to your image repository. To use that container in the workflow,
-change the ``container_image`` line of ``spras.sub`` to point to the new
-image.
+returned as ``output``, along with several files containing the
+workflow's logging information (anything that matches
+``htcondor/logs/spras_*`` and ending in ``.out``, ``.err``, or
+``.log``). If the job was unsuccessful, these files should contain
+useful debugging clues about what may have gone wrong.
+
+.. tip::
+
+   If you want to run the workflow with a different version of SPRAS, or
+   one that contains development updates you've made, rebuild this image
+   against the version of SPRAS you want to test, and push the image to
+   your image repository. To use that container in the workflow, change
+   the ``container_image`` line of ``spras.sub`` to point to the new
+   image.
 
 *****************
  Troubleshooting

From ceea753c3bff82ea2b79e0b7a12498d60d385914 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 10:45:53 -0500
Subject: [PATCH 06/10] Fix other misc docs build issues

---
 spras/dataset.py    |  2 +-
 spras/evaluation.py | 35 +++++++++++++++++++++--------------
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/spras/dataset.py b/spras/dataset.py
index ddf74736f..330bdcffc 100644
--- a/spras/dataset.py
+++ b/spras/dataset.py
@@ -164,7 +164,7 @@ def __init__(self, dataset_params: DatasetSchema):
     def get_node_columns(self, col_names: list[str]) -> pd.DataFrame:
         """
         @param scope: The name of the algorithm (or a more general 'scope' like SPRAS)
-            to fail on if get_node_columns fails.
+        to fail on if get_node_columns fails.
         @returns: A table containing the requested column names and node IDs
         for all nodes with at least 1 of the requested values being non-empty
         """
diff --git a/spras/evaluation.py b/spras/evaluation.py
index 507ffb10a..2faa8ffc7 100644
--- a/spras/evaluation.py
+++ b/spras/evaluation.py
@@ -161,9 +161,10 @@ def node_precision_and_recall(file_paths: Iterable[Union[str, PathLike]], node_t
         @param file_paths: list of file paths of pathway reconstruction algorithm outputs
         @param node_table: the gold standard nodes
         @return: A DataFrame with the following columns:
-                - 'Pathway': Path object corresponding to each pathway file
-                - 'Precision': Precision of predicted nodes vs. gold standard nodes
-                - 'Recall': Recall of predicted nodes vs. gold standard nodes
+
+        - 'Pathway': Path object corresponding to each pathway file
+        - 'Precision': Precision of predicted nodes vs. gold standard nodes
+        - 'Recall': Recall of predicted nodes vs. gold standard nodes
         """
         y_true = set(node_table['NODEID'])
         results = []
@@ -194,7 +195,7 @@ def visualize_precision_and_recall_plot(pr_df: pd.DataFrame, output_file: str |
         for each algorithm.
 
         @param pr_df: Dataframe of calculated precision and recall for each pathway file.
-                      Must include a preprocessed 'Algorithm' column.
+        Must include a preprocessed 'Algorithm' column.
         @param output_file: the filename to save the precision and recall of each pathway
         @param output_png: the filename to plot the precision and recall of each pathway (not a PRC)
         @param title: The title to use for the plot
@@ -311,9 +312,11 @@ def pca_chosen_pathway(coordinates_files: Iterable[Union[str, PathLike]], pathwa
         coordinates
         Calculates the Euclidean distance from each data point to the KDE peak, then selects the closest pathway as the
         representative pathway.
-        If there is more than one representative pathway, a tiebreaker will be used
-            1) choose smallest pathway (smallest number of edges and nodes)
-            2) end all be all, choose the first one based on name
+        If there is more than one representative pathway, a tiebreaker will be used:
+
+        1) choose smallest pathway (smallest number of edges and nodes)
+        2) end all be all, choose the first one based on name
+
         Returns a list of file paths for the representative pathway associated with the closest data point to the
         centroid.
 
@@ -362,14 +365,18 @@ def edge_frequency_node_ensemble(node_table: pd.DataFrame, ensemble_files: Itera
         A list of ensemble files can contain an aggregated ensemble or algorithm-specific ensembles per dataset
 
         1. Prepare a set of default nodes (from the interactome and gold standard) with frequency 0,
-        ensuring all nodes are represented in the ensemble.
-            - Answers "Did the algorithm(s) select the correct nodes from the entire network?"
-            - It measures whether the algorithm(s) can distinguish relevant gold standard nodes
-            from the full "universe" of possible nodes present in the input network.
+           ensuring all nodes are represented in the ensemble.
+
+           - Answers "Did the algorithm(s) select the correct nodes from the entire network?"
+           - It measures whether the algorithm(s) can distinguish relevant gold standard nodes
+             from the full "universe" of possible nodes present in the input network.
+
         2. For each edge ensemble file:
-            a. Read edges and their frequencies.
-            b. Convert edges frequencies into node-level frequencies for Node1 and Node2.
-            c. Merge with the default node set and group by node, taking the maximum frequency per node.
+
+           a. Read edges and their frequencies.
+           b. Convert edges frequencies into node-level frequencies for Node1 and Node2.
+           c. Merge with the default node set and group by node, taking the maximum frequency per node.
+
         3. Store the resulting node-frequency ensemble under the corresponding ensemble source (label).
 
         If the interactome or gold standard table is empty, a ValueError is raised.

From 9aa43b80484d40f0d22ccbfb39ec9db9bd30e5d7 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 11:12:02 -0500
Subject: [PATCH 07/10] Cleanup htcondor docs based on more review feedback

---
 docs/htcondor.rst                  | 57 +++++++++++++++++++++++++++---
 htcondor/spras.sub                 | 10 +++---
 htcondor/spras_profile/config.yaml |  2 +-
 3 files changed, 59 insertions(+), 10 deletions(-)

diff --git a/docs/htcondor.rst b/docs/htcondor.rst
index 98641bf86..4ba70f914 100644
--- a/docs/htcondor.rst
+++ b/docs/htcondor.rst
@@ -41,6 +41,56 @@ first is to submit all SPRAS jobs to a single remote Execution Point
 (EP). The second is to use the Snakemake HTCondor executor to
 parallelize the workflow by submitting each job to its own EP.
 
+***********************************
+ Which Files Are Used in Each Mode
+***********************************
+
+The ``htcondor`` directory contains several files, but not all of them
+are used in both run modes. A common point of confusion is which files
+apply where -- for example, ``spras.sub`` is only used when submitting
+to a single EP and is ignored when running in parallel. The table below
+summarizes what each file is for and which mode uses it, so you know
+what to edit before submitting.
+
+.. list-table::
+   :header-rows: 1
+   :widths: 34 13 13 40
+
+   -  -  File
+      -  Single EP
+      -  Parallel
+      -  Purpose
+
+   -  -  ``htcondor/spras.sub``
+      -  ✓
+      -
+      -  HTCondor submit file that runs the entire workflow as a single
+         job on one EP.
+
+   -  -  ``htcondor/spras.sh``
+      -  ✓
+      -  ✓
+      -  Wrapper script that invokes Snakemake inside the container.
+         Used as the executable in both modes.
+
+   -  -  ``htcondor/spras_profile/config.yaml``
+      -
+      -  ✓
+      -  Snakemake HTCondor-executor profile defining resources and
+         submission settings for parallel runs.
+
+   -  -  ``htcondor/snakemake_long.py``
+      -
+      -  ✓
+      -  Launches Snakemake as a long-running managed job so the
+         workflow survives terminal disconnects.
+
+   -  -  ``run_htcondor.sh``
+      -
+      -  ✓
+      -  Convenience wrapper (in the repository root) around
+         ``snakemake_long.py``.
+
 **********************************************************
  Converting Docker Images to Apptainer/Singularity Images
 **********************************************************
@@ -97,8 +147,7 @@ Before submitting all SPRAS jobs to a single remote Execution Point
    the root of the repository.
 
 Once these steps are complete, you can submit the job from the root of
-the the SPRAS repository by running ``condor_submit
-htcondor/spras.sub``.
+the SPRAS repository by running ``condor_submit htcondor/spras.sub``.
 
 When the job completes, the ``output`` directory from the workflow
 should be returned as ``output``.
@@ -190,9 +239,9 @@ log files).
    flag. This can be passed to Snakemake via the ``snakemake_long.py``
    script by adding it to the script's argument list, e.g.:
 
-.. code:: bash
+   .. code:: bash
 
-   ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose
+      ./htcondor/snakemake_long.py --profile htcondor/spras_profile/ --verbose
 
 If you use mamba instead of conda for environment management, you can
 specify this with the ``--env-manager`` flag:
diff --git a/htcondor/spras.sub b/htcondor/spras.sub
index 79e1bbd94..b48d0fb3a 100644
--- a/htcondor/spras.sub
+++ b/htcondor/spras.sub
@@ -21,15 +21,15 @@ SNAKEFILE = Snakefile
 # first                                                    #
 ############################################################
 universe = container
-#container_image = <your spras image>.sif
-container_image = instructions-overhaul.sif
-# container_image = docker://reedcompbio/spras:v0.2.0
+container_image = <your spras image>.sif
+# container_image = docker://reedcompbio/spras:v0.6.0
 
 ############################################################
 # Specify names for log/stdout/stderr files generated by   #
 # HTCondor.                                                #
-# NOTE: You should `mkdir logs/` before running, or the    #
-# spras_$(Cluster).log file won't be available.            #
+# NOTE: You should create the htcondor/logs/ directory     #
+# before running (mkdir htcondor/logs), or these files     #
+# won't be available.                                      #
 ############################################################
 log = htcondor/logs/spras_$(Cluster)_$(Process).log
 output = htcondor/logs/spras_$(Cluster)_$(Process).out
diff --git a/htcondor/spras_profile/config.yaml b/htcondor/spras_profile/config.yaml
index 1720b59c4..1b2c9e8a4 100644
--- a/htcondor/spras_profile/config.yaml
+++ b/htcondor/spras_profile/config.yaml
@@ -14,7 +14,7 @@ htcondor-jobdir: htcondor/logs
 # each other, or with the AP.
 shared-fs-usage: none
 # Distributed, heterogeneous computational environments are a wild place where strange things
-# can happen. If something goes wrong, try again up to 5 times. After that, we assume there's
+# can happen. If something goes wrong, try again up to 2 times. After that, we assume there's
 # a real error that requires user/admin intervention
 retries: 2
 

From 3ca8ec03bfabad6be3a13d14ed5ee7239eb64e43 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 13:55:28 -0500
Subject: [PATCH 08/10] Update executor installation to reflect availability on
 PyPI

---
 docs/htcondor.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/htcondor.rst b/docs/htcondor.rst
index 4ba70f914..2e5810590 100644
--- a/docs/htcondor.rst
+++ b/docs/htcondor.rst
@@ -170,7 +170,7 @@ setup as the previous section, but with two additions.
 
    .. code:: bash
 
-      pip install git+https://github.com/htcondor/snakemake-executor-plugin-htcondor.git
+      pip install snakemake-executor-plugin-htcondor
 
 #. Instead of editing ``spras.sub`` to define the workflow, this
    scenario requires editing the SPRAS profile in

From 1c37c02ec003912e5077672bfd2b785d585d18e1 Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 16:12:35 -0500
Subject: [PATCH 09/10] Mention CHTC docs for building Apptainer images

---
 docs/htcondor.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/htcondor.rst b/docs/htcondor.rst
index 2e5810590..fac841269 100644
--- a/docs/htcondor.rst
+++ b/docs/htcondor.rst
@@ -117,6 +117,12 @@ After running this command, a new file called ``spras-v0.6.0.sif`` will
 exist in the directory where the command was run. Note that the Docker
 image does not use a "v" in the tag.
 
+.. tip::
+
+   If you're building an Apptainer image at CHTC, please follow this
+   guide for building images in an interactive job:
+   https://chtc.cs.wisc.edu/uw-research-computing/apptainer-htc.html
+
 ************************************
  Submitting All Jobs to a Single EP
 ************************************

From e30f7b6b93f72bca2473c00d7ab310be4a32f26b Mon Sep 17 00:00:00 2001
From: Justin Hiemstra <jhiemstra@wisc.edu>
Date: Mon, 8 Jun 2026 16:21:50 -0500
Subject: [PATCH 10/10] Add magic apptainer environment variables to htcondor
 spras executable

These came from testing Neha's real workflow in June 2026. Not totally
sure how they all work (and whether additional environment variables
will need to be added in the future), but they were key to getting
custom sif images to unpack alongside the jobs.
---
 htcondor/spras.sh | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/htcondor/spras.sh b/htcondor/spras.sh
index cdfb924c6..ce423f8a9 100755
--- a/htcondor/spras.sh
+++ b/htcondor/spras.sh
@@ -6,5 +6,13 @@ set -e
 # When .cache files are created, they need to know where HOME is to write there.
 # In this case, that should be the HTCondor scratch dir the job is executing in.
 export HOME=$(pwd)
+# Various other apptainer-related environment variables that can causes problems
+# if not explicitly set. These came from testing/debugging workflows on the
+# OSPool.
+export APPTAINER_CACHEDIR=$(pwd)
+export APPTAINER_TMPDIR=$(pwd)
+mkdir -p "$APPTAINER_TMPDIR"
+unset SINGULARITY_BIND APPTAINER_BIND
+unset SINGULARITY_TMPDIR
 
 snakemake "$@"