diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index eecca486..9ae86e78 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -78,7 +78,7 @@ If you wish to contribute a new step, please use the following coding standards: 5. Add any new parameters to `nextflow_schema.json` with help text (via the `nf-core pipelines schema build` tool). 6. Add sanity checks and validation for all relevant parameters. 7. Perform local tests to validate that the new code works as expected. -8. If applicable, add a new test command in `.github/workflow/ci.yml`. +8. If applicable, add a new test in the `tests` directory. 9. Update MultiQC config `assets/multiqc_config.yml` so relevant suffixes, file name clean up and module plots are in the appropriate order. If applicable, add a [MultiQC](https://https://multiqc.info/) module. 10. Add a description of the output files and if relevant any appropriate images from the MultiQC report to `docs/output.md`. diff --git a/.github/actions/get-shards/action.yml b/.github/actions/get-shards/action.yml new file mode 100644 index 00000000..34085279 --- /dev/null +++ b/.github/actions/get-shards/action.yml @@ -0,0 +1,69 @@ +name: "Get number of shards" +description: "Get the number of nf-test shards for the current CI job" +inputs: + max_shards: + description: "Maximum number of shards allowed" + required: true + paths: + description: "Component paths to test" + required: false + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +outputs: + shard: + description: "Array of shard numbers" + value: ${{ steps.shards.outputs.shard }} + total_shards: + description: "Total number of shards" + value: ${{ steps.shards.outputs.total_shards }} +runs: + using: "composite" + steps: + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: ${{ env.NFT_VER }} + - name: Get number of shards + id: shards + shell: bash + run: | + # Run nf-test with dynamic parameter + nftest_output=$(nf-test test \ + --profile +docker \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --dry-run \ + --ci \ + --changed-since HEAD^) || { + echo "nf-test command failed with exit code $?" + echo "Full output: $nftest_output" + exit 1 + } + echo "nf-test dry-run output: $nftest_output" + + # Default values for shard and total_shards + shard="[]" + total_shards=0 + + # Check if there are related tests + if echo "$nftest_output" | grep -q 'No tests to execute'; then + echo "No related tests found." + else + # Extract the number of related tests + number_of_shards=$(echo "$nftest_output" | sed -n 's|.*Executed \([0-9]*\) tests.*|\1|p') + if [[ -n "$number_of_shards" && "$number_of_shards" -gt 0 ]]; then + shards_to_run=$(( $number_of_shards < ${{ inputs.max_shards }} ? $number_of_shards : ${{ inputs.max_shards }} )) + shard=$(seq 1 "$shards_to_run" | jq -R . | jq -c -s .) + total_shards="$shards_to_run" + else + echo "Unexpected output format. Falling back to default values." + fi + fi + + # Write to GitHub Actions outputs + echo "shard=$shard" >> $GITHUB_OUTPUT + echo "total_shards=$total_shards" >> $GITHUB_OUTPUT + + # Debugging output + echo "Final shard array: $shard" + echo "Total number of shards: $total_shards" diff --git a/.github/actions/nf-test/action.yml b/.github/actions/nf-test/action.yml new file mode 100644 index 00000000..bf44d961 --- /dev/null +++ b/.github/actions/nf-test/action.yml @@ -0,0 +1,109 @@ +name: "nf-test Action" +description: "Runs nf-test with common setup steps" +inputs: + profile: + description: "Profile to use" + required: true + shard: + description: "Shard number for this CI job" + required: true + total_shards: + description: "Total number of test shards(NOT the total number of matrix jobs)" + required: true + paths: + description: "Test paths" + required: true + tags: + description: "Tags to pass as argument for nf-test --tag parameter" + required: false +runs: + using: "composite" + steps: + - name: Setup Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ env.NXF_VERSION }}" + + - name: Set up Python + uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5 + with: + python-version: "3.13" + + - name: Install nf-test + uses: nf-core/setup-nf-test@v1 + with: + version: "${{ env.NFT_VER }}" + install-pdiff: true + + - name: Setup apptainer + if: contains(inputs.profile, 'singularity') + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: contains(inputs.profile, 'singularity') + shell: bash + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Conda setup + if: contains(inputs.profile, 'conda') + uses: conda-incubator/setup-miniconda@505e6394dae86d6a5c7fbb6e3fb8938e3e863830 # v3 + with: + auto-update-conda: true + conda-solver: libmamba + conda-remove-defaults: true + + - name: Run nf-test + shell: bash + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + run: | + nf-test test \ + --profile=+${{ inputs.profile }} \ + $(if [ -n "${{ inputs.tags }}" ]; then echo "--tag ${{ inputs.tags }}"; fi) \ + --ci \ + --changed-since HEAD^ \ + --verbose \ + --tap=test.tap \ + --shard ${{ inputs.shard }}/${{ inputs.total_shards }} + + # Save the absolute path of the test.tap file to the output + echo "tap_file_path=$(realpath test.tap)" >> $GITHUB_OUTPUT + + - name: Generate test summary + if: always() + shell: bash + run: | + # Add header if it doesn't exist (using a token file to track this) + if [ ! -f ".summary_header" ]; then + echo "# 🚀 nf-test results" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "| Status | Test Name | Profile | Shard |" >> $GITHUB_STEP_SUMMARY + echo "|:------:|-----------|---------|-------|" >> $GITHUB_STEP_SUMMARY + touch .summary_header + fi + + if [ -f test.tap ]; then + while IFS= read -r line; do + if [[ $line =~ ^ok ]]; then + test_name="${line#ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ✅ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + elif [[ $line =~ ^not\ ok ]]; then + test_name="${line#not ok }" + # Remove the test number from the beginning + test_name="${test_name#* }" + echo "| ❌ | ${test_name} | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + done < test.tap + else + echo "| ⚠️ | No test results found | ${{ inputs.profile }} | ${{ inputs.shard }}/${{ inputs.total_shards }} |" >> $GITHUB_STEP_SUMMARY + fi + + - name: Clean up + if: always() + shell: bash + run: | + sudo rm -rf /home/ubuntu/tests/ diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 8b87c69f..51327137 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -4,64 +4,40 @@ name: nf-core AWS full size tests # It runs the -profile 'test_full' on AWS batch on: - pull_request: - branches: - - main - - master workflow_dispatch: pull_request_review: types: [submitted] + release: + types: [published] jobs: run-platform: name: Run AWS full tests - # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered - if: github.repository == 'nf-core/bactmap' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' + # run only if the PR is approved by at least 2 reviewers and against the master/main branch or manually triggered + if: github.repository == 'nf-core/bactmap' && github.event.review.state == 'approved' && (github.event.pull_request.base.ref == 'master' || github.event.pull_request.base.ref == 'main') || github.event_name == 'workflow_dispatch' || github.event_name == 'release' runs-on: ubuntu-latest steps: - - name: Get PR reviews - uses: octokit/request-action@v2.x - if: github.event_name != 'workflow_dispatch' - id: check_approvals - continue-on-error: true - with: - route: GET /repos/${{ github.repository }}/pulls/${{ github.event.pull_request.number }}/reviews?per_page=100 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - - name: Check for approvals - if: ${{ failure() && github.event_name != 'workflow_dispatch' }} - run: | - echo "No review approvals found. At least 2 approvals are required to run this action automatically." - exit 1 - - - name: Check for enough approvals (>=2) - id: test_variables - if: github.event_name != 'workflow_dispatch' + - name: Set revision variable + id: revision run: | - JSON_RESPONSE='${{ steps.check_approvals.outputs.data }}' - CURRENT_APPROVALS_COUNT=$(echo $JSON_RESPONSE | jq -c '[.[] | select(.state | contains("APPROVED")) ] | length') - test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required + echo "revision=${{ (github.event_name == 'workflow_dispatch' || github.event_name == 'release') && github.sha || 'dev' }}" >> "$GITHUB_OUTPUT" - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} - revision: ${{ github.sha }} - workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/bactmap/work-${{ github.sha }} + revision: ${{ steps.revision.outputs.revision }} + workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/bactmap/work-${{ steps.revision.outputs.revision }} parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/bactmap/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/bactmap/results-${{ steps.revision.outputs.revision }}" } profiles: test_full - - uses: actions/upload-artifact@v4 + - uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: name: Seqera Platform debug log file path: | diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index df615bda..00000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: nf-core CI -# This workflow runs the pipeline with the minimal test dataset to check that it completes without any syntax errors -on: - push: - branches: - - dev - pull_request: - release: - types: [published] - workflow_dispatch: - -env: - NXF_ANSI_LOG: false - NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity - NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity - -concurrency: - group: "${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}" - cancel-in-progress: true - -jobs: - test: - name: "Run pipeline with test data (${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }})" - # Only run on push if this is the nf-core dev branch (merged PRs) - if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/bactmap') }}" - runs-on: ubuntu-latest - strategy: - matrix: - NXF_VER: - - "24.04.2" - - "latest-everything" - profile: - - "conda" - - "docker" - - "singularity" - test_name: - - "test" - isMaster: - - ${{ github.base_ref == 'master' }} - # Exclude conda and singularity on dev - exclude: - - isMaster: false - profile: "conda" - - isMaster: false - profile: "singularity" - steps: - - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 - with: - fetch-depth: 0 - - - name: Set up Nextflow - uses: nf-core/setup-nextflow@v2 - with: - version: "${{ matrix.NXF_VER }}" - - - name: Set up Apptainer - if: matrix.profile == 'singularity' - uses: eWaterCycle/setup-apptainer@main - - - name: Set up Singularity - if: matrix.profile == 'singularity' - run: | - mkdir -p $NXF_SINGULARITY_CACHEDIR - mkdir -p $NXF_SINGULARITY_LIBRARYDIR - - - name: Set up Miniconda - if: matrix.profile == 'conda' - uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 - with: - miniconda-version: "latest" - auto-update-conda: true - conda-solver: libmamba - channels: conda-forge,bioconda - - - name: Set up Conda - if: matrix.profile == 'conda' - run: | - echo $(realpath $CONDA)/condabin >> $GITHUB_PATH - echo $(realpath python) >> $GITHUB_PATH - - - name: Clean up Disk space - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 - - - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" - run: | - nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index dbd52d5a..7a527a34 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -3,9 +3,6 @@ name: nf-core linting # It runs the `nf-core pipelines lint` and markdown lint tests to ensure # that the code meets the nf-core guidelines. on: - push: - branches: - - dev pull_request: release: types: [published] @@ -14,12 +11,12 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + - uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - - name: Set up Python 3.12 - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - name: Set up Python 3.14 + uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" - name: Install pre-commit run: pip install pre-commit @@ -31,18 +28,18 @@ jobs: runs-on: ubuntu-latest steps: - name: Check out pipeline code - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5 - name: Install Nextflow uses: nf-core/setup-nextflow@v2 - - uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5 + - uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6 with: - python-version: "3.12" + python-version: "3.14" architecture: "x64" - name: read .nf-core.yml - uses: pietrobolcato/action-read-yaml@1.1.0 + uses: pietrobolcato/action-read-yaml@9f13718d61111b69f30ab4ac683e67a56d254e1d # 1.1.0 id: read_yml with: config: ${{ github.workspace }}/.nf-core.yml @@ -74,7 +71,7 @@ jobs: - name: Upload linting log file artifact if: ${{ always() }} - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4 + uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5 with: name: linting-logs path: | diff --git a/.github/workflows/linting_comment.yml b/.github/workflows/linting_comment.yml index 95b6b6af..e6e9bc26 100644 --- a/.github/workflows/linting_comment.yml +++ b/.github/workflows/linting_comment.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download lint results - uses: dawidd6/action-download-artifact@20319c5641d495c8a52e688b7dc5fada6c3a9fbc # v8 + uses: dawidd6/action-download-artifact@ac66b43f0e6a346234dd65d4d0c8fbb31cb316e5 # v11 with: workflow: linting.yml workflow_conclusion: completed @@ -21,7 +21,7 @@ jobs: run: echo "pr_number=$(cat linting-logs/PR_number.txt)" >> $GITHUB_OUTPUT - name: Post PR comment - uses: marocchino/sticky-pull-request-comment@331f8f5b4215f0445d3c07b4967662a32a2d3e31 # v2 + uses: marocchino/sticky-pull-request-comment@773744901bac0e8cbb5a0dc842800d45e9b2b405 # v2 with: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} number: ${{ steps.pr_number.outputs.pr_number }} diff --git a/.github/workflows/nf-test.yml b/.github/workflows/nf-test.yml new file mode 100644 index 00000000..f1cbbb0e --- /dev/null +++ b/.github/workflows/nf-test.yml @@ -0,0 +1,147 @@ +name: Run nf-test +on: + pull_request: + paths-ignore: + - "docs/**" + - "**/meta.yml" + - "**/*.md" + - "**/*.png" + - "**/*.svg" + release: + types: [published] + workflow_dispatch: + +# Cancel if a newer run is started +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + NFT_TAGS: "cicd" + NFT_VER: "0.9.2" + NFT_WORKDIR: "~" + NXF_ANSI_LOG: false + NXF_SINGULARITY_CACHEDIR: ${{ github.workspace }}/.singularity + NXF_SINGULARITY_LIBRARYDIR: ${{ github.workspace }}/.singularity + +jobs: + nf-test-changes: + name: nf-test-changes + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test-changes + - runner=4cpu-linux-x64 + outputs: + shard: ${{ steps.set-shards.outputs.shard }} + total_shards: ${{ steps.set-shards.outputs.total_shards }} + steps: + - name: Clean Workspace # Purge the workspace in case it's running on a self-hosted runner + run: | + ls -la ./ + rm -rf ./* || true + rm -rf ./.??* || true + ls -la ./ + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: get number of shards + id: set-shards + uses: ./.github/actions/get-shards + env: + NFT_VER: ${{ env.NFT_VER }} + with: + max_shards: 7 + tags: ${{ env.NFT_TAGS }} + + - name: debug + run: | + echo ${{ steps.set-shards.outputs.shard }} + echo ${{ steps.set-shards.outputs.total_shards }} + + nf-test: + name: "${{ matrix.profile }} | ${{ matrix.NXF_VER }} | ${{ matrix.shard }}/${{ needs.nf-test-changes.outputs.total_shards }}" + needs: [nf-test-changes] + if: ${{ needs.nf-test-changes.outputs.total_shards != '0' }} + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-nf-test + - runner=4cpu-linux-x64 + - disk=large + strategy: + fail-fast: false + matrix: + shard: ${{ fromJson(needs.nf-test-changes.outputs.shard) }} + profile: [conda, docker, singularity] + isMain: + - ${{ github.base_ref == 'master' || github.base_ref == 'main' }} + # Exclude conda and singularity on dev + exclude: + - isMain: false + profile: "conda" + - isMain: false + profile: "singularity" + NXF_VER: + - "25.10.0" + - "latest-everything" + env: + NXF_ANSI_LOG: false + TOTAL_SHARDS: ${{ needs.nf-test-changes.outputs.total_shards }} + + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4 + with: + fetch-depth: 0 + + - name: Run nf-test + id: run_nf_test + uses: ./.github/actions/nf-test + continue-on-error: ${{ matrix.NXF_VER == 'latest-everything' }} + env: + NFT_WORKDIR: ${{ env.NFT_WORKDIR }} + with: + profile: ${{ matrix.profile }} + shard: ${{ matrix.shard }} + total_shards: ${{ env.TOTAL_SHARDS }} + tags: ${{ env.NFT_TAGS }} + + - name: Report test status + if: ${{ always() }} + run: | + if [[ "${{ steps.run_nf_test.outcome }}" == "failure" ]]; then + echo "::error::Test with ${{ matrix.NXF_VER }} failed" + # Add to workflow summary + echo "## ❌ Test failed: ${{ matrix.profile }} | ${{ matrix.NXF_VER }} | Shard ${{ matrix.shard }}/${{ env.TOTAL_SHARDS }}" >> $GITHUB_STEP_SUMMARY + if [[ "${{ matrix.NXF_VER }}" == "latest-everything" ]]; then + echo "::warning::Test with latest-everything failed but will not cause workflow failure. Please check if the error is expected or if it needs fixing." + fi + if [[ "${{ matrix.NXF_VER }}" != "latest-everything" ]]; then + exit 1 + fi + fi + + confirm-pass: + needs: [nf-test] + if: always() + runs-on: # use self-hosted runners + - runs-on=${{ github.run_id }}-confirm-pass + - runner=2cpu-linux-x64 + steps: + - name: One or more tests failed (excluding latest-everything) + if: ${{ contains(needs.*.result, 'failure') }} + run: exit 1 + + - name: One or more tests cancelled + if: ${{ contains(needs.*.result, 'cancelled') }} + run: exit 1 + + - name: All tests ok + if: ${{ contains(needs.*.result, 'success') }} + run: exit 0 + + - name: debug-print + if: always() + run: | + echo "::group::DEBUG: `needs` Contents" + echo "DEBUG: toJSON(needs) = ${{ toJSON(needs) }}" + echo "DEBUG: toJSON(needs.*.result) = ${{ toJSON(needs.*.result) }}" + echo "::endgroup::" diff --git a/.gitignore b/.gitignore index a42ce016..ae39dfb5 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,7 @@ testing/ testing* *.pyc null/ +.nf-test.log +nf-test +.nf-test* +.nf-test/ diff --git a/.nf-core.yml b/.nf-core.yml index 29b98257..9c7cdea8 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,6 +1,6 @@ repository_type: pipeline -nf_core_version: 3.2.0 +nf_core_version: 3.5.2 lint: {} @@ -9,7 +9,7 @@ template: name: bactmap description: A mapping-based pipeline for bacterial whole genome sequences author: Andries J. van Tonder - version: 2.0.0dev + version: 2.0.0 force: true outdir: . skip_features: [] diff --git a/.prettierignore b/.prettierignore index edd29f01..dd749d43 100644 --- a/.prettierignore +++ b/.prettierignore @@ -10,4 +10,7 @@ testing/ testing* *.pyc bin/ +.nf-test/ ro-crate-metadata.json +modules/nf-core/ +subworkflows/nf-core/ diff --git a/.prettierrc.yml b/.prettierrc.yml index c81f9a76..07dbd8bb 100644 --- a/.prettierrc.yml +++ b/.prettierrc.yml @@ -1 +1,6 @@ printWidth: 120 +tabWidth: 4 +overrides: + - files: "*.{md,yml,yaml,html,css,scss,js,cff}" + options: + tabWidth: 2 diff --git a/CHANGELOG.md b/CHANGELOG.md index c64143e9..d7675169 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,71 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.0.0dev - [date] +## 2.0.0 - Titanium Bird - 2025-09-15 -Initial release of nf-core/bactmap, created with the [nf-core](https://nf-co.re/) template. +nf-core/bactmap release v2.0.0. The pipeline has been completely rewritten using a fresh template created with [nf-core](https://nf-co.re/). ### `Added` +The pipeline is composed of the following steps: + +1. Index reference fasta file (short-read: [`BWA index`](https://github.com/lh3/bwa) or [`Bowtie2 build`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2 index`](https://github.com/lh3/minimap2)) +2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option) +3. Calculate fastq summary statistics ([`fastq-scan`](https://github.com/rpetit3/fastq-scan)) +4. Perform read pre-processing (optional) + - Adapter clipping and merging (short-read: [`fastp`](https://github.com/OpenGene/fastp) or [`AdapterRemoval2`](https://github.com/MikkelSchubert/adapterremoval); long-read: [`porechop`](https://github.com/rrwick/Porechop) or [`Porechop_ABI`](https://github.com/bonsai-team/Porechop_ABI)) + - Quality filtering (long-read: [`Filtlong`](https://github.com/rrwick/Filtlong)), [`Nanoq`](https://github.com/esteinig/nanoq) + - Run merging ([`cat`](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/cat.html)) +5. Downsample fastq files (optional) ([`Rasusa`](https://github.com/mbhall88/rasusa)) +6. Summarise read statistics pre- and post-processing and subsampling ([`read_stats`](https://github.com/nf-core/bactmap/blob/master/modules/local/read_stats/main.nf)) +7. Variant calling + +- Map reads to reference (short-read: [`BWA-MEM2`](https://github.com/bwa-mem2/bwa-mem2) or [`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2`](https://github.com/lh3/minimap2)) +- Sort and index alignments ([`SAMtools view/sort`](https://sourceforge.net/projects/samtools/files/samtools/)) +- Summarise alignment statistics ([`SAMtools stats`](https://sourceforge.net/projects/samtools/files/samtools/)) +- Call variants (short-read: [`FreeBayes`](https://github.com/freebayes/freebayes); long-read: [`Clair3`](https://github.com/HKU-BAL/Clair3)) +- Filter variants ([`BCFtools filter`](http://samtools.github.io/bcftools/bcftools.html)) +- Summarise variant statistics ([`BCFtools stats`](http://samtools.github.io/bcftools/bcftools.html)) +- Convert filtered bcf to pseudogenome fasta ([`BCFtools consensus`](http://samtools.github.io/bcftools/bcftools.html) and [`BEDtools`](https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html)) +- Summarise mapping statistics ([`seqtk`](https://github.com/lh3/seqtk)) + +8. Create alignment from pseudogenomes by concatenating fasta files having first checked that the sample sequences are high quality ([`alignpseudogenomes`](https://github.com/nf-core/bactmap/blob/master/modules/local/alignpseudogenomes/main.nf)) +9. Extract variant sites from alignment ([`SNP-sites`](https://github.com/sanger-pathogens/snp-sites)) +10. Present QC for raw and processed reads, alignment statistics and variant statistics ([`MultiQC`](http://multiqc.info/)) + +- Added support for Oxford Nanopore long-read sequencing data. + ### `Fixed` ### `Dependencies` ### `Deprecated` + +- Recombination removal with [`Gubbins`](https://sanger-pathogens.github.io/gubbins/) has been removed from the pipeline. The user can still run recombination removal using the alignment output from the pipeline. +- Phylogenetic tree construction has been removed from the pipeline. The user can still run phylogenetic tree construction using the alignment output from the pipeline. + +## 1.0.0 - Aluminium Spider - 2021-06-18 + +Initial release of nf-core/bactmap, created with the [nf-core](https://nf-co.re/) template. + +The pipeline is composed of the following steps: + +1. Index reference fasta file ([`BWA index`](https://github.com/lh3/bwa)) +2. Trim reads for quality and adapter sequence (Optional) ([`fastp`](https://github.com/OpenGene/fastp)) +3. Estimate genome size ([`mash sketch`](https://mash.readthedocs.io/en/latest/index.html)) +4. Downsample fastq files (Optional) ([`Rasusa`](https://github.com/mbhall88/rasusa)) +5. Variant calling + 1. Read mapping ([`BWA mem`](https://github.com/lh3/bwa)) + 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Call and filter variants ([`BCFtools`](http://samtools.github.io/bcftools/bcftools.html)) + 4. Convert filtered bcf to pseudogenome fasta ([`vcf2pseudogenome.py`](https://github.com/nf-core/bactmap/blob/dev/bin/vcf2pseudogenome.py)) +6. Create alignment from pseudogenome by concatenating fasta files having first checked that the sample sequences are high quality ([`calculate_fraction_of_non_GATC_bases.py`](https://github.com/nf-core/bactmap/blob/dev/bin/)) +7. Remove recombination (Optional) ([`Gubbins`](https://sanger-pathogens.github.io/gubbins/)) +8. Extract variant sites from alignment ([`SNP-sites`](https://github.com/sanger-pathogens/snp-sites)) +9. Construct phylogenetic tree (Optional) + 1. Fast/less accurate + - neighbour joining [`RapidNJ`](https://birc.au.dk/software/rapidnj/) + - approximate maximum likelihood [`FastTree2`](http://www.microbesonline.org/fasttree/)) + 2. Slow/more accurate, maximum likelihood + - [`IQ-TREE`](http://www.iqtree.org/), + - [`RAxML-NG`](https://github.com/amkozlov/raxml-ng) diff --git a/CITATIONS.md b/CITATIONS.md index a03c7d0a..9f5b5cd8 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,13 +10,93 @@ ## Pipeline tools +- [AdapterRemoval2](https://pubmed.ncbi.nlm.nih.gov/26868221/) + + > Schubert M, Lindgreen S, Orlando L. AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Res Notes. 2016 Feb 12;9:88. doi: 10.1186/s13104-016-1900-2. PMID: 26868221; PMCID: PMC4751634. + +- [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) + + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + +- [BEDTools](https://pubmed.ncbi.nlm.nih.gov/25199790/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [Bowtie2](https://pubmed.ncbi.nlm.nih.gov/22388286/) + + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PMID: 22388286; PMCID: PMC3322381. + +- [BWA MEM 2](https://github.com/bwa-mem2/bwa-mem2) + + > Vasimuddin M. Misra S. Li H. Aluru S. Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2019, pp. 314-324. doi: 10.1109/IPDPS.2019.00041. + +- [cat](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/cat.html) + + > The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008, 2016 Edition. The Open Group. + +- [Clair3](https://pubmed.ncbi.nlm.nih.gov/38177392/) + + > Zheng Z, Li S, Su J, Leung AW, Lam TW, Luo R. Symphonizing pileup and full-alignment for deep learning-based long-read variant calling. Nat Comput Sci. 2022 Dec;2(12):797-803. doi: 10.1038/s43588-022-00387-x. Epub 2022 Dec 19. PMID: 38177392. + +- [Falco](https://pubmed.ncbi.nlm.nih.gov/33552473/) + + > de Sena Brandine G, Smith AD. Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Res. 2019 Nov 7;8:1874. doi: 10.12688/f1000research.21142.2. PMID: 33552473; PMCID: PMC7845152. + +- [FastP](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) + + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + +- [fastq-scan](https://github.com/rpetit3/fastq-scan) + + > Petit, R. (2022). fastq-scan: A tools for reading a FASTQ from STDIN and outputting summary statistics [Online]. + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Andrews S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [Filtlong](https://github.com/rrwick/Filtlong) + + > Wick R (2021) Filtlong: A tool for filtering long reads by quality [Online]. + +- [FreeBayes](https://arxiv.org/abs/1207.3907) + + > Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing. arXiv preprint arXiv:1207.3907 [q-bio.GN] 2012. doi: 10.48550/arXiv.1207.3907 + +- [minimap2](https://pubmed.ncbi.nlm.nih.gov/29750242/) + + > Li H. Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics. 2018 Sep 15;34(18):3094-3100. doi: 10.1093/bioinformatics/bty191. PMID: 29750242; PMCID: PMC6137996. - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) -> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [nanoq](https://github.com/esteinig/nanoq) + + > Steinig, E., & Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69). https://doi.org/10.21105/joss.02991 + +- [Porechop](https://pubmed.ncbi.nlm.nih.gov/29177090/) + + > Wick RR, Judd LM, Gorrie CL, Holt KE. Completing bacterial genome assemblies with multiplex MinION sequencing. Microb Genom. 2017 Sep 14;3(10):e000132. doi: 10.1099/mgen.0.000132. PMID: 29177090; PMCID: PMC5695209. + +- [Porechop_ABI](https://pubmed.ncbi.nlm.nih.gov/36698762/) + + > Bonenfant Q, Noé L, Touzet H. Porechop_ABI: discovering unknown adapters in Oxford Nanopore Technology sequencing reads for downstream trimming. Bioinform Adv. 2022 Nov 21;3(1):vbac085. doi: 10.1093/bioadv/vbac085. PMID: 36698762; PMCID: PMC9869717. + +- [Rasusa](https://github.com/mbhall88/rasusa) + + > Hall MB. Rasusa: Randomly subsample sequencing reads to a specified coverage. 2019. doi:10.5281/zenodo.3731394 + +- [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [seqtk](https://github.com/lh3/seqtk) + + > Li H. seqtk: Toolkit for processing sequences in FASTA/Q formats. 2012. [Online]. + +- [SNP-sites](https://pubmed.ncbi.nlm.nih.gov/28348851/) + + > Page AJ, Taylor B, Delaney AJ, Soares J, Seemann T, Keane JA, Harris SR. SNP-sites: rapid efficient extraction of SNPs from multi-FASTA alignments. Microb Genom. 2016 Apr 29;2(4):e000056. doi: 10.1099/mgen.0.000056. PMID: 28348851; PMCID: PMC5320690. ## Software packaging/containerisation tools diff --git a/README.md b/README.md index decae49f..de5c7c6f 100644 --- a/README.md +++ b/README.md @@ -19,47 +19,71 @@ ## Introduction -**nf-core/bactmap** is a bioinformatics pipeline that ... - - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +**nf-core/bactmap** is a bioinformatics best-practice analysis pipeline for mapping short (Illumina) and long reads (Oxford Nanopore) from bacterial WGS to a reference sequence, creating filtered VCF files and making pseudogenomes based on high quality positions in the VCF files. + +## Pipeline summary + +![](docs/images/bactmap_pipeline.png) + +1. Index reference fasta file (short-read: [`BWA index`](https://github.com/lh3/bwa) or [`Bowtie2 build`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2 index`](https://github.com/lh3/minimap2)) +2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option) +3. Calculate fastq summary statistics ([`fastq-scan`](https://github.com/rpetit3/fastq-scan)) +4. Perform read pre-processing (optional) + - Adapter clipping and merging (short-read: [`fastp`](https://github.com/OpenGene/fastp) or [`AdapterRemoval2`](https://github.com/MikkelSchubert/adapterremoval); long-read: [`porechop`](https://github.com/rrwick/Porechop) or [`Porechop_ABI`](https://github.com/bonsai-team/Porechop_ABI)) + - Quality filtering (long-read: [`Filtlong`](https://github.com/rrwick/Filtlong)), [`Nanoq`](https://github.com/esteinig/nanoq) + - Run merging ([`cat`](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/cat.html)) +5. Downsample fastq files (optional) ([`Rasusa`](https://github.com/mbhall88/rasusa)) +6. Summarise read statistics pre- and post-processing and subsampling ([`read_stats`](https://github.com/nf-core/bactmap/blob/master/modules/local/read_stats/main.nf)) +7. Variant calling + +- Map reads to reference (short-read: [`BWA-MEM2`](https://github.com/bwa-mem2/bwa-mem2) or [`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2`](https://github.com/lh3/minimap2)) +- Sort and index alignments ([`SAMtools view/sort`](https://sourceforge.net/projects/samtools/files/samtools/)) +- Summarise alignment statistics ([`SAMtools stats`](https://sourceforge.net/projects/samtools/files/samtools/)) +- Call variants (short-read: [`FreeBayes`](https://github.com/freebayes/freebayes); long-read: [`Clair3`](https://github.com/HKU-BAL/Clair3)) +- Filter variants ([`BCFtools filter`](http://samtools.github.io/bcftools/bcftools.html)) +- Summarise variant statistics ([`BCFtools stats`](http://samtools.github.io/bcftools/bcftools.html)) +- Convert filtered bcf to pseudogenome fasta ([`BCFtools consensus`](http://samtools.github.io/bcftools/bcftools.html) and [`BEDtools`](https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html)) +- Summarise mapping statistics ([`seqtk`](https://github.com/lh3/seqtk)) + +8. Create alignment from pseudogenomes by concatenating fasta files having first checked that the sample sequences are high quality ([`alignpseudogenomes`](https://github.com/nf-core/bactmap/blob/master/modules/local/alignpseudogenomes/main.nf)) +9. Extract variant sites from alignment ([`SNP-sites`](https://github.com/sanger-pathogens/snp-sites)) +10. Present QC for raw and processed reads, alignment statistics and variant statistics ([`MultiQC`](http://multiqc.info/)) ## Usage > [!NOTE] > If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - +Additionally, if you are analysing Oxford Nanopore data, you will need to provide the path to a model to use with `Clair3` (specified with `--clair3_model`). Models for older chemistries and basecallers (e.g. r9.4.1) can be downloaded from [here](https://www.bio8.cs.hku.hk/clair3/clair3_models/). For newer chemistries and basecallers, ONT provides models through [Rerio](https://github.com/nanoporetech/rerio). To download the models for Clair3 from the ONT github, you can use the following commands (each model will be downloaded to the folder `clair3_models/`): -Now, you can run the pipeline using: +```bash +# Clone the rerio repository +git clone https://github.com/nanoporetech/rerio - +# Download all models +python3 download_model.py --clair3 +``` + +Now, you can run the pipeline using: ```bash nextflow run nf-core/bactmap \ -profile \ --input samplesheet.csv \ + --fasta \ + --clair3_model \ --outdir ``` @@ -76,11 +100,23 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/bactmap was originally written by Andries J. van Tonder. +nf-core/bactmap was originally written by [Anthony Underwood](https://github.com/antunderwood), [Andries van Tonder](https://github.com/avantonder) and [Thanh Le Viet](https://github.com/thanhleviet). -We thank the following people for their extensive assistance in the development of this pipeline: +We thank the following people for their extensive assistance in the development +of this pipeline: - +- [Alexandre Gilardet](https://github.com/alexandregilardet) +- [Hanh Hoang](https://github.com/sainsachiko) +- [Ismael Henarejos-Castilo](https://github.com/IsmaelHC1994) +- [Mareike Janiak](https://github.com/MareikeJaniak) +- [Harshil Patel](https://github.com/drpatelh) +- [Olha Petryk](https://github.com/opetryk) +- [Richard Agyekum](https://github.com/QuadjoLegend) +- [Steven Sutcliffe](https://github.com/sgsutcliffe) +- [Szymon Szyszkowski](https://github.com/project-defiant) + +Anthony Underwood's time working on the project was funded by the National Institute for Health Research(NIHR) Global Health Research Unit for the Surveillance of Antimicrobial Resistance (Grant Reference Number 16/136/111) +![NIHR funded](assets/nihr_logos_funded_by.jpg) ## Contributions and Support @@ -93,8 +129,6 @@ For further information or help, don't hesitate to get in touch on the [Slack `# - - An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. You can cite the `nf-core` publication as follows: diff --git a/assets/email_template.html b/assets/email_template.html index 8387659b..3619691a 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -1,110 +1,53 @@ - - - - + + + + - - nf-core/bactmap Pipeline Report - - -
- + + nf-core/bactmap Pipeline Report + + +
-

nf-core/bactmap ${version}

-

Run Name: $runName

+ - <% if (!success){ out << """ -
-

nf-core/bactmap execution completed unsuccessfully!

+

nf-core/bactmap ${version}

+

Run Name: $runName

+ +<% if (!success){ + out << """ +
+

nf-core/bactmap execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
-
- """ } else { out << """ -
+
${errorReport}
+
+ """ +} else { + out << """ +
nf-core/bactmap execution completed successfully! -
- """ } %> +
+ """ +} +%> -

The workflow was completed at $dateComplete (duration: $duration)

-

The command used to launch the workflow was as follows:

-
-$commandLine
+

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
$commandLine
-

Pipeline Configuration:

- - - <% out << summary.collect{ k,v -> " - - - - - " }.join("\n") %> - -
- $k - -
$v
-
+

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
-

nf-core/bactmap

-

https://github.com/nf-core/bactmap

-
- +

nf-core/bactmap

+

https://github.com/nf-core/bactmap

+ +
+ + diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 88c37263..64bd1170 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -3,8 +3,6 @@ description: "Suggested text and references to use when describing pipeline usag section_name: "nf-core/bactmap Methods Description" section_href: "https://github.com/nf-core/bactmap" plot_type: "html" -## TODO nf-core: Update the HTML below to your preferred methods description, e.g. add publication citation for this pipeline -## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

Data was processed using nf-core/bactmap v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 89acb7af..fade6e77 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -1,7 +1,12 @@ +# multiqc_config.yml + report_comment: > - This report has been generated by the nf-core/bactmap - analysis pipeline. For information about how to interpret these results, please see the - documentation. + This report has been generated by the nf-core/bactmap analysis pipeline. For information about how + to interpret these results, please see the documentation. + +# Report section order report_section_order: "nf-core-bactmap-methods-description": order: -1000 @@ -9,7 +14,282 @@ report_section_order: order: -1001 "nf-core-bactmap-summary": order: -1002 + general_stats: + order: 1000 + fastqc: + order: 900 + fastp: + order: 800 + porechop: + order: 700 + porechop_abi: + order: 600 + filtlong: + order: 500 + nanoq: + order: 400 + fastqc-1: + order: 300 + samtools: + order: 200 + bcftools: + order: 100 -export_plots: true +custom_logo: "nf-core-bactmap_logo_light.png" +custom_logo_url: https://nf-co.re/bactmap +custom_logo_title: "nf-core/bactmap" + +# List of run modules +run_modules: + - fastqc + - fastp + - adapterremoval + - porechop + - porechop_abi + - filtlong + - nanoq + - samtools + - bcftools + - custom_content + +# File path filters +sp: + fastqc/data: + fn_re: ".*(fastqc|falco)_data.txt$" + fastqc/zip: + fn: "*_fastqc.zip" + fastp: + fn: "*.fastp.json" + filtlong: + contents: Scoring long reads + contents_re: " " + +# Top-level module definitions +top_modules: + - "fastqc": + name: "FastQC / Falco (pre-Trimming)" + path_filters: + - "*raw*" + path_filters_exclude: + - "*processed*" + extra: | + If used in this run, Falco is a drop-in replacement for FastQC producing + the same output, written by Guilherme de Sena Brandine and Andrew D. Smith. + - "fastqc": + name: "FastQC / Falco (post-Trimming)" + path_filters: + - "*processed*" + path_filters_exclude: + - "*raw*" + extra: | + If used in this run, Falco is a drop-in replacement for FastQC producing + the same output, written by Guilherme de Sena Brandine and Andrew D. Smith. + - "porechop": + name: "Porechop" + anchor: "porechop" + target: "Porechop" + path_filters: + - "*porechop.log" + extra: | + ℹ️: If you get the error message 'Error - was not able to plot data.' + this means that porechop did not detect any adapters and therefore no statistics + generated. + - "porechop": + name: "Porechop_ABI" + anchor: "porechop_abi" + target: "Porechop_ABI" + doi: "10.1093/bioadv/vbac085" + info: "Find and remove adapters from Oxford Nanopore reads." + path_filters: + - "*porechop_abi.log" + extra: | + ℹ️: If you get the error message 'Error - was not able to plot data.' + this means that porechop_abi did not detect any adapters and therefore no statistics + generated. + - "filtlong": + name: "Filtlong" + anchor: "filtlong" + target: "Filtlong" + doi: "10.1371/journal.pcbi.1010905" + info: "This section of the report shows Filtlong results for reads after length + filtering." + path_filters: + - "*filtlong/*" + extra: | + ℹ️: If you get the error message 'Error - was not able to plot data.' + this means that filtlong did not detect any worse reads and therefore no statistics + generated. + - "nanoq": + name: "Nanoq" + anchor: "nanoq" + target: "Nanoq" + doi: "10.21105/joss.02991" + info: "This section of the report shows Nanoq results for Nanopore reads" + path_filters: + - "*nanoq.log" + extra: | + ℹ️: If you get the error message 'Error - was not able to plot data.' + this means that nanoq did not generate any results for the reads, and no statistics are available. + - samtools: + name: "SAMTools Stats" + anchor: "samtools" + target: "SAMTools" + doi: "10.1093/bioinformatics/btp352" + info: "This section of the report shows SAMTools counts/statistics after mapping + with Bowtie 2/BWA 2 mem." + path_filters: + - "*.stats" + - "bcftools": + name: "BCFTools" + anchor: "bcftools" + target: "BCFTools" + doi: "10.1093/bioinformatics/btp352" + info: "This section of the report shows BCFTools stats results for the called + variants." + path_filters: + - "*.txt" +# Table columns for each module +table_columns_placement: + FastQC / Falco (pre-Trimming): + total_sequences: 100 + avg_sequence_length: 110 + median_sequence_length: 120 + percent_duplicates: 130 + percent_gc: 140 + percent_fails: 150 + FastQC / Falco (post-Trimming): + total_sequences: 200 + avg_sequence_length: 210 + median_sequence_length: 220 + percent_duplicates: 230 + percent_gc: 240 + percent_fails: 250 + fastp: + pct_adapter: 300 + pct_surviving: 310 + pct_duplication: 320 + after_filtering_gc_content: 330 + after_filtering_q30_rate: 340 + after_filtering_q30_bases: 350 + filtering_result_passed_filter_reads: 360 + Adapter Removal: + aligned_total: 360 + percent_aligned: 370 + percent_collapsed: 380 + percent_discarded: 390 + Porechop: + Input Reads: 500 + Start Trimmed: 510 + Start Trimmed Percent: 520 + End Trimmed: 530 + End Trimmed Percent: 540 + Middle Split: 550 + Middle Split Percent: 560 + Porechop_ABI: + Input Reads: 500 + Start Trimmed: 510 + Start Trimmed Percent: 520 + End Trimmed: 530 + End Trimmed Percent: 540 + Middle Split: 550 + Middle Split Percent: 560 + Filtlong: + Target bases: 600 + nanoq: + Reads: 700 + Read N50: 710 + SAMTools Stats: + raw_total_sequences: 800 + reads_mapped: 810 + reads_mapped_percent: 820 + reads_properly_paired_percent: 830 + non-primary_alignments: 840 + reads_MQ0_percent: 850 + error_rate: 860 + +# Table columns visibility +table_columns_visible: + FastQC / Falco (pre-Trimming): + total_sequences: true + avg_sequence_length: true + percent_duplicates: true + percent_gc: true + percent_fails: false + FastQC / Falco (post-Trimming): + total_sequences: true + avg_sequence_length: true + percent_duplicates: false + percent_gc: false + percent_fails: false + Adapter Removal: + aligned_total: true + percent_aligned: true + percent_collapsed: true + percent_discarded: false + fastp: + pct_adapter: true + pct_surviving: true + pct_duplication: false + after_filtering_gc_content: false + after_filtering_q30_rate: false + after_filtering_q30_bases: false + porechop: + Input reads: false + Start Trimmed: true + Start Trimmed Percent: true + End Trimmed: false + End Trimmed Percent: true + Middle Split: false + Middle Split Percent: true + porechop_abi: + Input reads: false + Start Trimmed: true + Start Trimmed Percent: true + End Trimmed: false + End Trimmed Percent: true + Middle Split: false + Middle Split Percent: true + Filtlong: + Target bases: true + nanoq: + ReadN50: true + Reads: true + +# Column names for each module +table_columns_name: + FastQC / Falco (pre-Trimming): + total_sequences: "Nr. Input Reads" + avg_sequence_length: "Length Input Reads" + percent_gc: "% GC Input Reads" + percent_duplicates: "% Dups Input Reads" + percent_fails: "% Failed Input Reads" + FastQC / Falco (post-Trimming): + total_sequences: "Nr. Processed Reads" + avg_sequence_length: "Length Processed Reads" + percent_gc: "% GC Processed Reads" + percent_duplicates: "% Dups Processed Reads" + percent_fails: "% Failed Processed Reads" + SAMTools Stats: + raw_total_sequences: "Nr. Reads Into Mapping" + reads_mapped: "Nr. Mapped Reads" + reads_mapped_percent: "% Mapped Reads" + +# Clean-up extensions +extra_fn_clean_exts: + - ".txt" + - "_filtered" + - "porechop" + - "porechop_abi" + - "_processed" + - type: remove + pattern: "_falco" + +# Section comments for further info +section_comments: + general_stats: | + By default, all read count columns are displayed as millions (M) of reads. + +# Export plots and disable version detection +export_plots: true disable_version_detection: true diff --git a/assets/nf-core-bactmap_logo_light.png b/assets/nf-core-bactmap_logo_light.png index 47f376d8..9fd8de13 100644 Binary files a/assets/nf-core-bactmap_logo_light.png and b/assets/nf-core-bactmap_logo_light.png differ diff --git a/assets/nihr_logos_funded_by.jpg b/assets/nihr_logos_funded_by.jpg new file mode 100644 index 00000000..836ecdb8 Binary files /dev/null and b/assets/nihr_logos_funded_by.jpg differ diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv index 5f653ab7..a3776d9d 100644 --- a/assets/samplesheet.csv +++ b/assets/samplesheet.csv @@ -1,3 +1,4 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, +sample,run_accession,instrument_platform,fastq_1,fastq_2 +01,test_1,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz +02,test_2,ILLUMINA,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_1.fastq.gz,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_2.fastq.gz +03,test_3,OXFORD_NANOPORE,https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz, diff --git a/assets/schema_input.json b/assets/schema_input.json index 3023bb46..8f7b9ea5 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -13,6 +13,18 @@ "errorMessage": "Sample name must be provided and cannot contain spaces", "meta": ["id"] }, + "run_accession": { + "type": ["string", "integer"], + "pattern": "^[^\\s]+$", + "errorMessage": "Run accession must be provided and cannot contain spaces.", + "meta": ["run_accession"] + }, + "instrument_platform": { + "type": "string", + "enum": ["ILLUMINA", "OXFORD_NANOPORE"], + "errorMessage": "One of ILLUMINA or OXFORD_NANOPORE must be provided.", + "meta": ["instrument_platform"] + }, "fastq_1": { "type": "string", "format": "file-path", @@ -28,6 +40,11 @@ "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" } }, - "required": ["sample", "fastq_1"] - } + "required": ["sample", "run_accession", "instrument_platform"] + }, + "allOf": [ + { "uniqueEntries": ["fastq_1"] }, + { "uniqueEntries": ["fastq_2"] }, + { "uniqueEntries": ["sample", "run_accession"] } + ] } diff --git a/bin/calculate_fraction_of_non_GATC_bases.py b/bin/calculate_fraction_of_non_GATC_bases.py new file mode 100755 index 00000000..a6cf8688 --- /dev/null +++ b/bin/calculate_fraction_of_non_GATC_bases.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +## Originally written by Andries van Tonder and released under the MIT license. +## See git repository (https://github.com/nf-core/bactmap) for full license text. + +from Bio import SeqIO +import argparse, sys, os + +class ParserWithErrors(argparse.ArgumentParser): + def error(self, message): + print('{0}\n\n'.format(message)) + self.print_help() + sys.exit(2) + + def is_valid_file(self, parser, arg): + if not os.path.isfile(arg): + parser.error("The file %s does not exist!" % arg) + else: + return arg + + +def argparser(): + description = """ + A script to find the fraction of non-GATC bases in a fasta file + """ + parser = ParserWithErrors(description = description) + parser.add_argument("-f", "--fasta_file", required=True, + help="fasta file path", + type=lambda x: parser.is_valid_file(parser, x)) + + + return parser + + +def calculate_fraction_of_non_GATC_bases(fasta_file): + record = SeqIO.read(fasta_file, 'fasta') + total_len = len(record.seq) + num_Gs = record.seq.upper().count('G') + num_As = record.seq.upper().count('A') + num_Ts = record.seq.upper().count('T') + num_Cs = record.seq.upper().count('C') + return (total_len - (num_Gs + num_As + num_Ts + num_Cs))/total_len + +if __name__ == '__main__': + parser = argparser() + args = parser.parse_args() + print(calculate_fraction_of_non_GATC_bases(args.fasta_file)) diff --git a/bin/fastqscan_parser.py b/bin/fastqscan_parser.py new file mode 100755 index 00000000..5db2a1b5 --- /dev/null +++ b/bin/fastqscan_parser.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +import pandas as pd +import json +import glob +import sys +import os +import argparse + +def parser_args(args=None): + """ + Function for input arguments for fastqscan_parser.py + """ + Description = 'Collect fastq-scan outputs and create a summary table' + Epilog = """Example usage: python fastqscan_parser.py """ + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("-of", "--output_file", type=str, default="fastq-scan_summary.tsv", help="fastq-scan summary file (default: 'fastq-scan_summary.tsv').") + return parser.parse_args(args) + +def make_dir(path): + """ + Function for making a directory from a provided path + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +def json_to_dataframe(json_files): + """ + Function to take list of json files and create a summary table + """ + json_names = [i.replace('.json', '') for i in json_files] + json_names_df = pd.DataFrame(json_names) + json_names_df.columns = ['sample'] + jsons_data = {} + + for index, file in enumerate(json_files): + with open(file, 'r') as f: + json_text = json.loads(f.read()) + qc = json_text['qc_stats'] + jsons_data[index] = qc + + jsons_data_df = pd.DataFrame.from_dict(jsons_data, orient = 'index') + json_merged_df = json_names_df.join(jsons_data_df) + + return json_merged_df + +def main(args=None): + args = parser_args(args) + + ## Create output directory if it doesn't exist + out_dir = os.path.dirname(args.output_file) + make_dir(out_dir) + + ## Create list of fastq-scan json files + json_files = sorted(glob.glob('*.json')) + + ## Create dataframe + json_df = json_to_dataframe(json_files) + + ## Write output file + json_df.to_csv(args.output_file, sep = '\t', header = True, index = False) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/bin/multi2single_sequence.py b/bin/multi2single_sequence.py new file mode 100755 index 00000000..9ee9c0b7 --- /dev/null +++ b/bin/multi2single_sequence.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +## Originally written by Andries van Tonder and released under the MIT license. +## See git repository (https://github.com/nf-core/bactmap) for full license text. + +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord +import textwrap +import argparse, sys, os + +class ParserWithErrors(argparse.ArgumentParser): + def error(self, message): + print('{0}\n\n'.format(message)) + self.print_help() + sys.exit(2) + + def is_valid_file(self, parser, arg): + if not os.path.isfile(arg): + parser.error("The file %s does not exist!" % arg) + else: + return arg + + +def argparser(): + description = """ + A script to parse a multiple contig fasta and + concatenate the sequences into a single sequence. + The new sequence will be output to a new fasta file. + """ + parser = ParserWithErrors(description = description) + parser.add_argument("-r", "--reference_file", required=True, + help="reference fasta file path", + type=lambda x: parser.is_valid_file(parser, x)) + parser.add_argument("-o", "--output_fasta_file", required=True, + help="file path to output fasta file") + + return parser + +def combine_sequences(reference_sequence): + records = list(SeqIO.parse(reference_sequence, "fasta")) + new_sequence = ''.join([str(record.seq) for record in records]) + new_id = '|'.join([record.id for record in records]) + if len(new_id) > 100: + new_id = new_id[:97] + '...' + new_record = SeqRecord(Seq(new_sequence), id = new_id, description = '') + return(new_record) + + +def write_sequence(filepath, record): + with open(filepath, 'w') as output: + SeqIO.write(record, output, "fasta") + +if __name__ == '__main__': + parser = argparser() + args = parser.parse_args() + + new_record = combine_sequences(args.reference_file) + write_sequence(args.output_fasta_file, new_record) diff --git a/bin/read_stats.py b/bin/read_stats.py new file mode 100755 index 00000000..ec47a3c3 --- /dev/null +++ b/bin/read_stats.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python +## Originally written by Andries van Tonder and released under the MIT license. +## See git repository (https://github.com/nf-core/bactmap) for full license text. + +import pandas as pd +import os +import sys +import glob +import json +import argparse + +def parser_args(args=None): + """ + Function for input arguments for read_stats.py + """ + Description = 'Collect fastq-scan and create a table for each sample' + Epilog = """Example usage: python read_stats.py """ + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("-of", "--output_file" , type=str, default="read_stats.csv", help="read stats file (default: 'read_stats.csv').") + return parser.parse_args(args) + +def make_dir(path): + """ + Function for making a directory from a provided path + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +def json_to_dataframe(json_files): + """ + Function to take list of json files and create a summary table + """ + json_names = [i.replace('.json', '') for i in json_files] + json_names_df = pd.DataFrame(json_names) + json_names_df.columns = ['Sample'] + jsons_data = {} + + for index, file in enumerate(json_files): + with open(file, 'r') as f: + json_text = json.loads(f.read()) + qc = json_text['qc_stats'] + jsons_data[index] = qc + + jsons_data_df = pd.DataFrame.from_dict(jsons_data, orient = 'index') + json_merged_df = json_names_df.join(jsons_data_df) + json_merged_df = json_merged_df.iloc[:, 0:4] + + return json_merged_df + +def main(args=None): + args = parser_args(args) + + ## Create output directory if it doesn't exist + out_dir = os.path.dirname(args.output_file) + make_dir(out_dir) + + ## Create list of raw reads fastq-scan json files + raw_json_files = sorted(glob.glob('*.raw.json')) + + ## Create list of processed reads fastq-scan json files + processed_json_files = sorted(glob.glob('*.processed.json')) + + ## Create dataframe of raw reads fastq-scan results + raw_json_df = json_to_dataframe(raw_json_files) + raw_json_df = raw_json_df.rename(columns = {'total_bp' : 'raw_total_bp', 'read_total' : 'num_raw_reads', 'coverage' : 'raw_coverage'}) + raw_json_df['Sample'] = raw_json_df['Sample'].str.replace('.raw','') + + ## Create dataframe of processed reads fastq-scan results + processed_json_df = json_to_dataframe(processed_json_files) + processed_json_df = processed_json_df.rename(columns = {'total_bp' : 'processed_total_bp', 'read_total' : 'num_processed_reads', 'coverage' : 'processed_coverage'}) + processed_json_df['Sample'] = processed_json_df['Sample'].str.replace('.processed','') + + ## Merge fastq-scan dataframes + fastqscan_merged = pd.merge(raw_json_df, processed_json_df, on = ['Sample']) + fastqscan_merged['%reads_after_processed'] = fastqscan_merged['num_processed_reads'] / fastqscan_merged['num_raw_reads'] * 100 + + ## Write output file + fastqscan_merged.to_csv(args.output_file, sep = ',', header = True, index = False) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/bin/read_stats_parser.py b/bin/read_stats_parser.py new file mode 100755 index 00000000..6afbd88e --- /dev/null +++ b/bin/read_stats_parser.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python +## Originally written by Andries van Tonder and released under the MIT license. +## See git repository (https://github.com/nf-core/bactmap) for full license text. + +import pandas as pd +import os +import sys +import glob +import argparse + +def parser_args(args=None): + """ + Function for input arguments for read_stats_parser.py + """ + Description = 'Collect read stats outputs and create a summary table' + Epilog = """Example usage: python read_stats_parser.py """ + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("-of", "--output_file", type=str, default="read_stats_summary.tsv", help="Read stats summary file (default: 'read_stats_summary.tsv').") + return parser.parse_args(args) + +def make_dir(path): + """ + Function for making a directory from a provided path + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + +def read_stats_parse(files): + """ + Function for creating a dataframe from individual csv files + """ + file_list = [pd.read_csv(f, sep=',') for f in files] + dataframe = pd.concat(file_list, ignore_index=True) + + return dataframe + +def main(args=None): + args = parser_args(args) + + ## Create output directory if it doesn't exist + out_dir = os.path.dirname(args.output_file) + make_dir(out_dir) + + ## Create list of cluster files + read_stats_files = sorted(glob.glob('*.csv')) + + ## Create dataframe + read_stats_df = read_stats_parse(read_stats_files) + + ## Write output file + read_stats_df.to_csv(args.output_file, sep = '\t', header = True, index = False) + +if __name__ == '__main__': + sys.exit(main()) diff --git a/bin/seqtk_parser.py b/bin/seqtk_parser.py new file mode 100755 index 00000000..5cee628a --- /dev/null +++ b/bin/seqtk_parser.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +import os +import sys +import glob +import argparse +import pandas as pd + + +def parser_args(args=None): + """ + Function for input arguments for seqtk_parser.py + """ + Description = "Collect seqtk comp outputs and create a summary table" + Epilog = """Example usage: python seqtk_parser.py """ + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument( + "-of", + "--output_file", + type=str, + default="mapping_summary.tsv", + help="seqtk comp summary file (default: 'mapping_summary.tsv').", + ) + return parser.parse_args(args) + + +def make_dir(path): + """ + Function for making a directory from a provided path + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +def seqtk_to_dataframe(file_list): + """ + Function for creating a dataframe from a list of seqtk comp files + """ + seqtk_file_list = [pd.read_csv(f, sep="\t", header=None) for f in file_list] + seqtk_df = pd.concat(seqtk_file_list, ignore_index=True) + seqtk_df = seqtk_df.iloc[:, 0:6] + seqtk_df.columns = ["sample", "ref_length", "#A", "#C", "#G", "#T"] + column_list = ["#A", "#C", "#G", "#T"] + seqtk_df["mapped"] = seqtk_df[column_list].sum(axis=1) + seqtk_df["%ref mapped"] = seqtk_df["mapped"] / seqtk_df["ref_length"] * 100 + + return seqtk_df + + +def main(args=None): + args = parser_args(args) + + ## Create output directory if it doesn't exist + out_dir = os.path.dirname(args.output_file) + make_dir(out_dir) + + ## Create list of seqtk comp tsv outputs + seqtk_files = sorted(glob.glob("*.tsv")) + + ## Create dataframe + seqtk_df = seqtk_to_dataframe(seqtk_files) + + ## Write output file + seqtk_df.to_csv(args.output_file, sep="\t", header=True, index=False) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/conf/base.config b/conf/base.config index a037a4f4..7788d73a 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,7 +10,6 @@ process { - // TODO nf-core: Check the defaults for all processes cpus = { 1 * task.attempt } memory = { 6.GB * task.attempt } time = { 4.h * task.attempt } @@ -24,8 +23,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. - // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } diff --git a/conf/modules.config b/conf/modules.config index d203d2b6..cc7186eb 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -15,20 +15,657 @@ process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, ] + withName: BWAMEM2_INDEX { + publishDir = [ + path: { "${params.outdir}/bwamem2/index" }, + mode: params.publish_dir_mode, + ] + } + + withName: BOWTIE2_BUILD { + publishDir = [ + path: { "${params.outdir}/bowtie2/build" }, + mode: params.publish_dir_mode, + ] + } + + withName: SAMTOOLS_FAIDX { + publishDir = [ + path: { "${params.outdir}/samtools/faidx" }, + mode: params.publish_dir_mode, + pattern: '*.{fai,gzi}', + ] + } + + withName: FASTQSCAN_RAW { + ext.prefix = { "${meta.id}_${meta.run_accession}.raw" } + publishDir = [ + path: { "${params.outdir}/fastqscan/raw" }, + mode: params.publish_dir_mode, + pattern: '*.json', + ] + } + + withName: FASTQSCANPARSE_RAW { + ext.prefix = { "raw" } + publishDir = [ + path: { "${params.outdir}/summaries" }, + mode: params.publish_dir_mode, + pattern: '*.tsv', + ] + } + withName: FASTQC { - ext.args = '--quiet' + ext.args = '--quiet' + ext.prefix = { "${meta.id}_${meta.run_accession}_raw" } + publishDir = [ + path: { "${params.outdir}/fastqc/raw" }, + mode: params.publish_dir_mode, + pattern: '*.{html,zip}', + ] } - withName: 'MULTIQC' { - ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } + withName: FASTQC_PROCESSED { + ext.args = '--quiet' + ext.prefix = { "${meta.id}_${meta.run_accession}_processed" } publishDir = [ - path: { "${params.outdir}/multiqc" }, + path: { "${params.outdir}/fastqc/processed" }, + mode: params.publish_dir_mode, + pattern: '*.{html,zip}', + ] + } + + withName: FALCO { + ext.prefix = { "${meta.id}_${meta.run_accession}_raw_falco" } + publishDir = [ + path: { "${params.outdir}/falco/raw" }, + mode: params.publish_dir_mode, + pattern: '*.{html,txt,zip}', + ] + } + + withName: FALCO_PROCESSED { + ext.prefix = { "${meta.id}_${meta.run_accession}_processed_falco" } + publishDir = [ + path: { "${params.outdir}/falco/processed" }, + mode: params.publish_dir_mode, + pattern: '*.{html,txt,zip}', + ] + } + + withName: FASTP_SINGLE { + ext.args = [ + params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + "--length_required ${params.shortread_qc_minlength}", + "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: '*.{log,html,json}', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: FASTP_PAIRED { + ext.args = [ + params.shortread_qc_includeunmerged ? '--include_unmerged' : '', + params.shortread_qc_skipadaptertrim ? "--disable_adapter_trimming" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter_sequence ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter_sequence_r2 ${params.shortread_qc_adapter2}" : "--detect_adapter_for_pe", + "--length_required ${params.shortread_qc_minlength}", + "--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: '*.{log,html,json}', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: params.shortread_qc_mergepairs ? '*merged.fastq.gz' : '*.fastp.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: ADAPTERREMOVAL_SINGLE { + ext.args = [ + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + "--minlength ${params.shortread_qc_minlength}", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + [ + path: { "${params.outdir}/adapterremoval" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/adapterremoval" }, + mode: params.publish_dir_mode, + pattern: '*.settings', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*truncated.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: ADAPTERREMOVAL_PAIRED { + ext.args = [ + params.shortread_qc_mergepairs ? "--collapse" : "", + params.shortread_qc_skipadaptertrim ? "--adapter1 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter1 ? "--adapter1 ${params.shortread_qc_adapter1}" : "", + params.shortread_qc_skipadaptertrim ? "--adapter2 ''" : params.shortread_qc_adapterlist ? "" : params.shortread_qc_adapter2 ? "--adapter2 ${params.shortread_qc_adapter2}" : "", + "--minlength ${params.shortread_qc_minlength}", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + [ + path: { "${params.outdir}/adapterremoval" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/adapterremoval" }, + mode: params.publish_dir_mode, + pattern: '*.settings', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*{truncated.fastq,singleton.truncated}.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_qc && !params.shortread_qc_mergepairs && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + // AdapterRemoval separate output merging + withName: CAT_FASTQ { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && params.perform_shortread_qc && params.save_analysis_ready_fastqs ? it : null }, + ] + ] + } + + withName: PORECHOP_PORECHOP { + ext.prefix = { "${meta.id}_${meta.run_accession}_porechop" } + publishDir = [ + [ + path: { "${params.outdir}/porechop" }, + mode: params.publish_dir_mode, + pattern: '*_porechop.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/porechop" }, + mode: params.publish_dir_mode, + pattern: '*.log', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*_porechop.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { ( params.perform_runmerging == false || ( params.perform_runmerging && !meta.is_multirun ) ) && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: PORECHOP_ABI { + ext.prefix = { "${meta.id}_${meta.run_accession}_porechop_abi" } + publishDir = [ + [ + path: { "${params.outdir}/porechop_abi" }, + mode: params.publish_dir_mode, + pattern: '*_porechop_abi.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/porechop_abi" }, + mode: params.publish_dir_mode, + pattern: '*.log', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*porechop_abi.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { (params.perform_runmerging == false || (params.perform_runmerging && !meta.is_multirun)) && !params.perform_longread_hostremoval && params.longread_qc_skipqualityfilter && !params.longread_qc_skipadaptertrim && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: FILTLONG { + ext.args = [ + "--min_length ${params.longread_qc_qualityfilter_minlength}", + "--keep_percent ${params.longread_qc_qualityfilter_keeppercent}", + "--target_bases ${params.longread_qc_qualityfilter_targetbases}", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" } + publishDir = [ + [ + path: { "${params.outdir}/filtlong" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/filtlong" }, + mode: params.publish_dir_mode, + pattern: '*.log', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { (params.perform_runmerging == false || (params.perform_runmerging && !meta.is_multirun)) && !params.perform_longread_hostremoval && !params.longread_qc_skipqualityfilter && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: NANOQ { + ext.args = [ + "-vv", + "--min-len ${params.longread_qc_qualityfilter_minlength}", + "--min-qual ${params.longread_qc_qualityfilter_minquality}", + ].join(' ').trim() + ext.prefix = { "${meta.id}_${meta.run_accession}_filtered" } + publishDir = [ + [ + path: { "${params.outdir}/nanoq" }, + mode: params.publish_dir_mode, + pattern: '*_filtered.fastq.gz', + enabled: params.save_preprocessed_reads, + ], + [ + path: { "${params.outdir}/nanoq" }, + mode: params.publish_dir_mode, + pattern: '*_filtered.stats', + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*_filtered.fastq.gz', + enabled: params.save_analysis_ready_fastqs, + saveAs: { (params.perform_runmerging == false || (params.perform_runmerging && !meta.is_multirun)) && !params.perform_longread_hostremoval && !params.longread_qc_skipqualityfilter && params.perform_longread_qc && params.save_analysis_ready_fastqs ? it : null }, + ], + ] + } + + withName: FASTQSCAN_PROCESSED { + ext.prefix = { "${meta.id}_${meta.run_accession}.processed" } + publishDir = [ + path: { "${params.outdir}/fastqscan/processed" }, + mode: params.publish_dir_mode, + pattern: '*.json', + ] + } + + withName: FASTQSCANPARSE_PROCESSED { + ext.prefix = { "processed" } + publishDir = [ + path: { "${params.outdir}/summaries" }, + mode: params.publish_dir_mode, + pattern: '*.tsv', + ] + } + + withName: READ_STATS { + ext.prefix = { "${meta.id}_${meta.run_accession}" } + publishDir = [ + path: { "${params.outdir}/read_stats" }, + mode: params.publish_dir_mode, + pattern: '*.csv', + ] + } + + withName: READSTATS_PARSE { + publishDir = [ + path: { "${params.outdir}/summaries" }, + mode: params.publish_dir_mode, + pattern: '*.tsv', + ] + } + + withName: MERGE_RUNS { + ext.prefix = { "${meta.id}" } + publishDir = [ + [ + path: { "${params.outdir}/run_merging/" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.save_runmerged_reads, + ], + [ + path: { "${params.outdir}/analysis_ready_fastqs" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + enabled: params.perform_runmerging && params.save_analysis_ready_fastqs, + ], + ] + } + + withName: RASUSA { + ext.prefix = { "${meta.id}.subsampled" } + ext.args = '--seed 23032021' + publishDir = [ + path: { "${params.outdir}/rasusa" }, + mode: params.publish_dir_mode, + pattern: '*.fastq.gz', + ] + + } + + withName: BOWTIE2_ALIGN { + ext.args = '' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bowtie2/align" }, + mode: params.publish_dir_mode, + pattern: '*.log', + ] + } + + withName: BWAMEM2_MEM { + ext.args = '' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bwamem2/mem" }, + mode: params.publish_dir_mode, + pattern: '*.log', + ] + } + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/sort" }, + mode: params.publish_dir_mode, + pattern: '*.{bam,bai}', + ] + } + + withName: SAMTOOLS_INDEX { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/sort" }, + mode: params.publish_dir_mode, + pattern: '*.bai', + ] + } + + withName: SAMTOOLS_STATS { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + pattern: '*.stats', + ] + } + + withName: SAMTOOLS_FLAGSTAT { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + pattern: '*.flagstat', + ] + } + + withName: SAMTOOLS_IDXSTATS { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/samtools/stats" }, + mode: params.publish_dir_mode, + pattern: '*.idxstats', + ] + } + + withName: FREEBAYES { + ext.args = '-p 1 -P 0 -C 2 -F 0.05 --min-coverage 10 --min-repeat-entropy 1.0 -q 13 -m 30 --strict-vcf' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/freebayes" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + ] + } + + withName: MINIMAP2_INDEX { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/minimap2/index" }, + mode: params.publish_dir_mode, + pattern: '*.mmi', + ] + } + + withName: MINIMAP2_ALIGN { + ext.args = '--cs --MD -x map-ont' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/minimap2/align" }, + mode: params.publish_dir_mode, + pattern: '*.{bam,bai}', + ] + } + + withName: CLAIR3 { + ext.args = '--include_all_ctgs --haploid_precise --no_phasing_for_fa --enable_long_indel' + publishDir = [ + path: { "${params.outdir}/clair3/" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + ] + } + + withName: BCFTOOLS_SORT { + ext.args = '' + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/bcftools/sort" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + pattern: '*.vcf.gz', ] } + withName: BCFTOOLS_INDEX { + ext.args = '--tbi' + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/bcftools/sort" }, + mode: params.publish_dir_mode, + pattern: '*.{tbi,csi}', + ] + } + + //final Illumina vcf file + withName: BCFTOOLS_FILTER { + //ext.args = '--write-index=tbi --output-type z --soft-filter LowQual' + ext.args = '--write-index=tbi --output-type z -i "QUAL > 20 && INFO/SAF > 0 && INFO/SAR > 0 && (INFO/SRF > 0 || INFO/SRR > 0) && (INFO/AO / INFO/DP) > 0.9"' + ext.prefix = { "${meta.id}.filtered" } + publishDir = [ + path: { "${params.outdir}/bcftools/filter" }, + mode: params.publish_dir_mode, + pattern: '*.{vcf.gz,tbi,csi}', + ] + } + + withName: BCFTOOLS_VIEW { + ext.args = '-f PASS --write-index=tbi --output-type z' + ext.prefix = { "${meta.id}.filtered.pass" } + publishDir = [ + path: { "${params.outdir}/bcftools/view" }, + mode: params.publish_dir_mode, + pattern: '*.vcf.gz', + ] + } + + //final ONT vcf file + withName: BCFTOOLS_NORM { + ext.args = '--output-type z -a -m -both --write-index=tbi' + ext.prefix = { "${meta.id}.filtered.norm" } + publishDir = [ + path: { "${params.outdir}/filtered_variants" }, + mode: params.publish_dir_mode, + pattern: '*.{vcf.gz,tbi,csi}', + ] + } + + withName: BCFTOOLS_STATS { + ext.args = '' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bcftools/stats" }, + mode: params.publish_dir_mode, + pattern: '*.txt', + ] + } + + withName: BCFTOOLS_QUERY { + ext.args = "-f'%CHROM\t%POS0\t%END\n'" + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bcftools/query" }, + mode: params.publish_dir_mode, + pattern: '*.txt', + ] + } + + withName: BEDTOOLS_GENOMECOV { + ext.args = '-bga' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bedtools/genomecov" }, + mode: params.publish_dir_mode, + pattern: '*.bed', + ] + } + + withName: BEDTOOLS_SUBTRACT { + ext.args = '' + ext.prefix = { "${meta.id}.subtracted" } + publishDir = [ + path: { "${params.outdir}/bedtools/subtract" }, + mode: params.publish_dir_mode, + pattern: '*.bed', + ] + } + + withName: BCFTOOLS_CONSENSUS { + //ext.args = '-i "FORMAT/VAF > 0.90 & INFO/MQ >= 20 & FORMAT/DP >= 10"' + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/bcftools/consensus" }, + mode: params.publish_dir_mode, + pattern: '*.fa', + ] + } + + withName: RENAME_FASTA_HEADER { + ext.args = '' + ext.prefix = { "${meta.id}.renamed" } + publishDir = [ + path: { "${params.outdir}/pseudogenomes/" }, + enabled: false, + ] + } + + withName: CONCATENATE_FASTA { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/pseudogenomes/" }, + mode: params.publish_dir_mode, + pattern: '*.fa', + ] + } + + withName: SEQTK_COMP { + ext.agrs = '' + publishDir = [ + path: { "${params.outdir}/seqtk/" }, + mode: params.publish_dir_mode, + pattern: '*.tsv', + ] + } + + withName: SEQTK_PARSE { + ext.agrs = '' + publishDir = [ + path: { "${params.outdir}/summaries/" }, + mode: params.publish_dir_mode, + pattern: 'mapping_summary.tsv', + ] + } + + withName: ALIGNPSEUDOGENOMES { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/pseudogenomes/" }, + mode: params.publish_dir_mode, + pattern: '*.fas', + ] + } + + withName: SNPSITES { + ext.args = '' + publishDir = [ + path: { "${params.outdir}/snp-sites" }, + mode: params.publish_dir_mode, + pattern: '*.{fas,txt}', + ] + } + + withName: MULTIQC { + ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' } + publishDir = [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } } diff --git a/conf/test.config b/conf/test.config index 56cea1e0..8cd27cd2 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,18 +13,18 @@ process { resourceLimits = [ cpus: 4, - memory: '15.GB', - time: '1.h' + memory: '8.GB', + time: '30.m', ] } params { - config_profile_name = 'Test profile' + config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'// Genome references - genome = 'R64-1-1' + // Input data for full size test + input = params.pipelines_testdata_base_path + 'bactmap/samplesheet.csv' + // Genome references + + fasta = params.pipelines_testdata_base_path + 'modules/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz' } diff --git a/conf/test_full.config b/conf/test_full.config index bded76ef..ab91d6f7 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -11,14 +11,12 @@ */ params { - config_profile_name = 'Full test profile' + config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + input = params.pipelines_testdata_base_path + 'bactmap/samplesheet.csv' // Genome references - genome = 'R64-1-1' + fasta = params.pipelines_testdata_base_path + 'modules/data/genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz' } diff --git a/docs/images/bactmap_pipeline.png b/docs/images/bactmap_pipeline.png new file mode 100644 index 00000000..d6d312ef Binary files /dev/null and b/docs/images/bactmap_pipeline.png differ diff --git a/docs/images/nf-core-bactmap_logo_dark.png b/docs/images/nf-core-bactmap_logo_dark.png index 9e7daef5..4d3f130c 100644 Binary files a/docs/images/nf-core-bactmap_logo_dark.png and b/docs/images/nf-core-bactmap_logo_dark.png differ diff --git a/docs/images/nf-core-bactmap_logo_light.png b/docs/images/nf-core-bactmap_logo_light.png index f86b9a7e..819530b0 100644 Binary files a/docs/images/nf-core-bactmap_logo_light.png and b/docs/images/nf-core-bactmap_logo_light.png differ diff --git a/docs/output.md b/docs/output.md index 2ede6f38..fd952701 100644 --- a/docs/output.md +++ b/docs/output.md @@ -6,29 +6,479 @@ This document describes the output produced by the pipeline. Most of the plots a The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - - ## Pipeline overview The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +- [SAMtools faidx](#samtools-faidx) - Indexing of reference genome - [FastQC](#fastqc) - Raw read QC +- [fastq-scan](#fastq-scan) - Summary statistics for fastq files +- [falco](#fastqc) - Alternative to FastQC for raw read QC +- [fastp](#fastp) - Adapter trimming for Illumina data +- [AdapterRemoval](#adapterremoval) - Adapter trimming for Illumina data +- [Porechop](#porechop) - Adapter removal for Oxford Nanopore data +- [Porechop_ABI](#porechop_abi) - Adapter removal for Oxford Nanopore data +- [Filtlong](#filtlong) - Quality trimming and filtering for Nanopore data +- [Nanoq](#nanoq) - Quality trimming and filtering for Nanopore data +- [Run Merging](#run-merging) - Merging of reads from multiple sequencing runs +- [Analysis Ready Reads](#analysis-ready-reads) - Optional results directory containing the final processed reads used as input for subsampling. +- [Rasusa](#rasusa) - Subsampling of reads +- [read_stats](#read_stats) - Summarise read statistics pre- and post-processing +- [Bowtie2](#bowtie2) - Mapping for Illumina reads +- [BWA MEM2](#bwamem2) - Mapping for Illumina reads +- [minimap2](#minimap2) - Mapping for Nanopore reads +- [SAMtools sort](#samtools-sort) - Sorting of bam files +- [SAMtools stats](#samtools-stats) - Statistics from mapping +- [FreeBayes](#freebayes) - Variant calling for Illumina reads +- [Clair3](#clair3) - Variant calling for Nanopore reads +- [BCFtools filter](#bcftools-filter) - Filtering of Illumina variants +- [BCFtools norm](#bcftools-norm) - Normalisation of ONT variants +- [BCFtools stats](#bcftools-stats) - Statistics from variant calling +- [BCFtools consensus](#bcftools-consensus) - Convert filtered bcf to pseudogenome fasta +- [seqtk](#seqtk) - Summarise mapping statistics +- [Align pseudogenomes](#align-pseudogenomes) - Create alignment from pseudogenomes +- [SNP-sites](#snp-sites) - Extract variant sites from alignment - [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline - [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution -### FastQC +![](images/bactmap_pipeline.png) + +### SAMtools faidx + +[SAMtools faidx](http://www.htslib.org/doc/samtools-faidx.html) is used to index the reference genome. The index file is required for downstream mapping steps.
Output files -- `fastqc/` - - `*_fastqc.html`: FastQC report containing quality metrics. - - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +- `samtools/faidx` + - `.fai`: Index file for the reference genome + - `.gzi`: Gzip index file for the reference genome + +
+ +### FastQC or Falco + +
+Output files + +- `{fastqc,falco}/` + - {raw,preprocessed} + - `*html`: FastQC or Falco report containing quality metrics in HTML format. + - `*.txt`: FastQC or Falco report containing quality metrics in TXT format. + - `*.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images (FastQC only).
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +If preprocessing is turned on, nf-core/bactmap runs FastQC/Falco twice -once before and once after adapter removal/read merging, to allow evaluation of the performance of these preprocessing steps. Note in the General Stats table, the columns of these two instances of FastQC/Falco are placed next to each other to make it easier to evaluate. However, the columns of the actual preprocessing steps (i.e, fastp, AdapterRemoval, and Porechop) will be displayed _after_ the two FastQC/Falco columns, even if they were run 'between' the two FastQC/Falco jobs in the pipeline itself. + +:::info +Falco produces identical output to FastQC but in the `falco/` directory. +::: + +### fastq-scan + +
+Output files + +- `fastqscan/` + - `raw/*.json`: JSON formatted file of summary statistics for input fastq files. + - `processed/*.json`: JSON formatted file of summary statistics for processed fastq files. +- `summaries/` + - `raw_fastq-scan_summary.tsv`: Final summary tsv file of sequencing statistics for analysis ready fastq files for all samples. + - `processed_fastq-scan_summary.tsv`: Final summary tsv file of sequencing statistics for merged, subsampled fastq files for all samples. + +
+ +[fastq-scan](https://github.com/rpetit3/fastq-scan) is a tool for generating FASTQ summary statistics in JSON format. + +### fastp + +[fastp](https://github.com/OpenGene/fastp) is a FASTQ pre-processing tool for quality control, trimmming of adapters, quality filtering and other features. + +It is used in nf-core/bactmap for adapter trimming of short-reads. + +
+Output files + +- `fastp/` + - `.fastp.fastq.gz`: File with the trimmed unmerged fastq reads. + - `.merged.fastq.gz`: File with the reads that were successfully merged. + - `.*{log,html,json}`: Log files in different formats. + +
+ +By default nf-core/bactmap will only provide the `.fastp.fastq.gz` file if fastp is selected. The file `.merged.fastq.gz` will be available in the output folder if you provide the argument ` --shortread_qc_mergepairs` (optionally retaining un-merged pairs when in combination with `--shortread_qc_includeunmerged`). + +You can change the default value for low complexity filtering by using the argument `--shortread_complexityfilter_fastp_threshold`. + +### AdapterRemoval + +[AdapterRemoval](https://adapterremoval.readthedocs.io/en/stable/) searches for and removes remnant adapter sequences from High-Throughput Sequencing (HTS) data and (optionally) trims low quality bases from the 3' end of reads following adapter removal. It is popular in the field of palaeogenomics. The output logs are stored in the results folder, and as a part of the MultiQC report. + +
+Output files + +- `adapterremoval/` + - `.settings`: AdapterRemoval log file containing general adapter removal, read trimming and merging statistics + - `.collapsed.fastq.gz` - read-pairs that merged and did not undergo trimming (only when `--shortread_qc_mergepairs` supplied) + - `.collapsed.truncated.fastq.gz` - read-pairs that merged underwent quality trimming (only when `--shortread_qc_mergepairs` supplied) + - `.pair1.truncated.fastq.gz` - read 1 of pairs that underwent quality trimming + - `.pair2.truncated.fastq.gz` - read 2 of pairs that underwent quality trimming (and could not merge if `--shortread_qc_mergepairs` supplied) + - `.singleton.truncated.fastq.gz` - orphaned read pairs where one of the pair was discarded + - `.discard.fastq.gz` - reads that were discarded due to length or quality filtering + +
+ +By default nf-core/bactmap will only provide the `.settings` file if AdapterRemoval is selected. + +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. If this is selected, you may receive different combinations of `.fastq` files for each sample depending on the input types - e.g. whether you have merged or not, or if you're supplying both single- and paired-end reads. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +:::warning +The resulting `.fastq` files may _not_ always be the 'final' reads that go into subsampling, if you also run other steps such as run merging etc.. +::: + +### Porechop + +[Porechop](https://github.com/rrwick/Porechop) is a tool for finding and removing adapters from Oxford Nanopore reads. Adapters on the ends of reads are trimmed and if a read has an adapter in its middle, it is considered a chimeric and it chopped into separate reads. + +
+Output files + +- `porechop/` + - `.log`: Log file containing trimming statistics + - `.fastq.gz`: Adapter-trimmed file + +
+ +The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually. + +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +:::warning +We do **not** recommend using Porechop if you are already trimming the adapters with ONT's basecaller Guppy. +::: + +### Porechop_ABI + +[Porechop_ABI](https://github.com/bonsai-team/Porechop_ABI) is an extension of [Porechop](https://github.com/rrwick/Porechop). Unlike Porechop, Porechop_ABI does not use any external knowledge or database for the adapters. Adapters are discovered directly from the reads using approximate k-mers counting and assembly. Then these sequences can be used for trimming, using all standard Porechop options. The software is able to report a combination of distinct sequences if a mix of adapters is used. It can also be used to check whether a dataset has already been trimmed out or not, or to find leftover adapters in datasets that have been previously processed with Guppy. + +
+Output files + +- `porechop_abi/` + - `.log`: Log file containing trimming statistics + - `.fastq.gz`: Adapter-trimmed file + +
+ +The output logs are saved in the output folder and are part of MultiQC report.You do not normally need to check these manually. + +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +### Filtlong + +[Filtlong](https://github.com/rrwick/Filtlong) is a quality filtering tool for long reads. It can take a set of small reads and produce a smaller, better subset. + +
+Output files + +- `filtlong/` + - `_filtered.fastq.gz`: Quality or long read data filtered file + - `_filtered.log`: log file containing summary statistics + +
+ +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +:::warning +We do _not_ recommend using Filtlong if you are performing filtering of low quality reads with ONT's basecaller Guppy. +::: + +### Nanoq + +[nanoq](https://github.com/esteinig/nanoq) is an ultra-fast quality filtering tool that also provides summary reports for nanopore reads. + +
+Output files + +- `nanoq/` + - `_filtered.fastq.gz`: Quality or long read data filtered file + - `_filtered.stats`: Summary statistics report + +
+ +You will only find the `.fastq` files in the results directory if you provide ` --save_preprocessed_reads`. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +### Run Merging + +nf-core/bactmap offers the option to merge FASTQ files of multiple sequencing runs or libraries that derive from the same sample, as specified in the input samplesheet. + +This is the last possible preprocessing step, so if you have multiple runs or libraries (and run merging turned on), this will represent the final reads that will go into subsampling steps. + +
+Output files + +- `run_merging/` + - `.fastq.gz`: Concatenated FASTQ files on a per-sample basis + +
+ +Note that you will only find samples that went through the run merging step in this directory. For samples that had a single run or library will not go through this step of the pipeline and thus will not be present in this directory. + +This directory and its FASTQ files will only be present if you supply `--save_runmerged_reads`. Alternatively, if you wish only to have the 'final' reads that go into subsampling (i.e., that may have additional processing), do not specify this flag but rather specify `--save_analysis_ready_fastqs`, in which case the reads will be in the folder `analysis_ready_reads`. + +### Analysis Ready Reads + +:::info +This optional results directory will only be present in the pipeline results when supplying `--save_analysis_ready_fastqs`. +::: + +
+Output files + +- `analysis_ready_fastqs/` + - `_{fq,fastq}.gz`: Final reads that underwent preprocessing and were sent for subsampling. + +
+ +The results directory will contain the 'final' processed reads used as input for subsampling. It will _only_ include the output of the _last_ step of any combinations of preprocessing steps that may have been specified in the run configuration. For example, if you perform the read QC, the final reads that are sent to subsampling are the post-QC processed ones - those will be the ones present in this directory. + +:::warning +If you turn off all preprocessing steps, then no results will be present in this directory. This happens independently for short- and long-reads i.e. you will only have FASTQ files for short reads in this directory if you skip all long-read preprocessing. +::: + +### Rasusa + +The `rasusa` software is used to subsample reads to a depth cutoff of a default of 100 (unless the `--subsampling_off` flag is set) + +
+Output files + +- `rasusa/` + - `.fastq.gz` subsampled fastq files + +
+ +### read_stats + +
+Output files + +- `read_stats/` + - `.tsv`: Pre- and post-processing sequence statistics. +- `summaries` + - `read_stats_summary.tsv`: Final summary tsv file of pre- and post-processing sequence statistics for all samples. + +
+ +### Bowtie2 + +[Bowtie 2](https://bowtie-bio.sourceforge.net/bowtie2/index.shtml) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. It is particularly good at aligning reads of about 50 up to 100s or 1,000s of characters, and particularly good at aligning to relatively long (e.g. mammalian) genomes. + +It is used with nf-core/bactmap to map short reads to the reference genome. + +
+Output files + +- `bowtie2/` + - `build/` + - `*.bt2`: Bowtie2 indices of reference genome + - `align/` + - `.log`: log file about the mapped reads + +
+ +:::info +While there is a dedicated section in the MultiQC HTML for Bowtie2, these values are not displayed by default in the General Stats table. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report. +::: + +### BWA MEM2 + +[BWA MEM2](https://github.com/bwa-mem2/bwa-mem2) is a fast and accurate aligner for mapping short reads to reference sequences. + +It is used with nf-core/bactmap to map short reads to the reference genome. + +
+Output files + +- `bwamem2/` + - `index/` + - `*.amb` : BWA-MEM2 indices of reference genome + - `*.ann` : BWA-MEM2 indices of reference genome + - `*.bwt` : BWA-MEM2 indices of reference genome + - `*.pac` : BWA-MEM2 indices of reference genome + - `*.sa` : BWA-MEM2 indices of reference genome + +
+ +:::info +While there is a dedicated section in the MultiQC HTML for BWA-MEM2, these values are not displayed by default in the General Stats table. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report. By default the bam files created are not saved since sorted bam files are produced in the next step +::: + +### minimap2 + +[minimap2](https://github.com/lh3/minimap2) is an alignment tool suited to mapping long reads to reference sequences. + +It is used with nf-core/bactmap to map long reads to the reference genome. + +
+Output files + +- `minimap2/` + - `build/` + - `*.mmi2`: minimap2 indices of reference genome. + - `align/` + - `.bam`: BAM file containing reads that aligned against the user-supplied reference genome as well as unmapped reads + - `.bam.bai`: Index file for the BAM file +
+ +:::info +minimap2 is not yet supported as a module in MultiQC and therefore there is no dedicated section in the MultiQC HTML. Rather, alignment statistics to host genome is reported via samtools stats module in MultiQC report. +::: + +### SAMtools sort + +[SAMtools sort](http://www.htslib.org/doc/samtools-sort.html) is used to sort the BAM files generated by the mapping steps. The sorted BAM files are used for downstream variant calling. + +
+Output files + +- `samtools/sort/` + + - `.sorted.bam`: Sorted BAM file containing reads that aligned against the user-supplied reference genome as well as unmapped reads + - `.sorted.bam.bai`: Index file for the sorted BAM file + +
+ +### SAMtools stats + +[SAMtools stats](http://www.htslib.org/doc/samtools-stats.html) collects statistics from a `.sam`, `.bam`, or `.cram` alignment file and outputs in a text format. + +
+Output files + +- `samtools/stats/` + - `.sorted.bam.stats`: File containing samtools stats output. + - `.sorted.bam.flagstat`: Flagstat file for the sorted BAM file + - `.sorted.bam.idxstats`: Index statistics file for the sorted BAM file + +
+ +In most cases you do not need to check this file, as it is rendered in the MultiQC run report. + +### FreeBayes + +FreeBayes is a haplotype-based variant detector designed to find SNPs, indels, and complex variants in short-read data. It is used with nf-core/bactmap to call variants from short-read data. + +
+Output files + +- `freebayes/` + - `.vcf.gz`: VCF file containing variants + +
+ +### Clair3 + +Clair3 is a variant caller for long-read data. It is used with nf-core/bactmap to call variants from long-read data. + +
+Output files + +- `clair3/` + - `.vcf.gz`: VCF file containing variants + +
+ +### BCFtools filter + +The `BCFtools` software is used to call and filter variants found within the bam files. + +
+Output files + +- `filtered_variants` + - `.filtered.vcf.gz` filtered vcf files containing variants + +
+ +### BCFtools norm + +`BCFtools` norm is used to normalize the variant calls from ONT data. + +
+Output files + +- `filtered_variants` + - `.filtered.vcf.gz` filtered vcf files containing variants + +
+ +### BCFtools stats + +BCFtools stats is used to generate statistics from the variant calling step. The output is a summary of the variants found in the vcf files. + +
+Output files + +- `bcftools/stats/` + - `.stats` BCFtools stats output files + - `.tsv` BCFtools stats summary files + +### BCFtools consensus + +The filtered vcf files are converted to a pseudogenome. + +
+Output files + +- `pseudogenomes/` + - `.fas` pseudogenome with a base at each position of the reference sequence + +
+ +### seqtk + +The `seqtk` tool is used to identify the number of mapped bases within the pseudogenome fasta files. + +
+Output files + +- `seqtk/` + - `.tsv` tsv with base count and distribution for each pseudogenome +- `summaries` + - `mapping_summary.tsv` Summary of seqtk output for all samples + +
+ +### Align pseudogenomes + +Only those pseudogenome fasta files that have a non-ACGT fraction less than the threshold specified will be included in the `aligned_pseudogenomes.fas` file. Those failing this will be reported in the `low_quality_pseudogenomes.tsv` file. + +
+Output files + +- `pseudogenomes/` + - `aligned_pseudogenomes.fas` alignment of all sample pseudogenomes and the reference sequence + - `low_quality_pseudogenomes.tsv` a tab separated file of the samples that failed the non-ACGT base threshold + +
+ +### SNP-sites + +Non-informative constant sites are removed from the alignment using `snp-sites` + +
+Output files + +- `snpsites/` + - `constant.sites.txt` A file with the number of constant sites for each base + - `filtered_alignment.fas` Alignment with only informative positions (those positions that have at least one alternative variant base) + +
+ ### MultiQC
@@ -45,6 +495,24 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +All tools in bactmap supported by MultiQC will have a dedicated section showing summary statistics of each tool based on information stored in log files. + +You can expect in the MultiQC reports either sections and/or general stats columns for the following tools: + +- fastqc +- fastp +- adapterremoval +- porechop +- porechop_abi +- filtlong +- nanoq +- samtools stats +- bcftools stats + +:::info +The 'General Stats' table by default will only show statistics referring to pre-processing steps, and will not display statistics from mapping and variant calling, unless turned on by the user within the 'Configure Columns' menu or via a custom MultiQC config file (`--multiqc_config`). +::: + ### Pipeline information
diff --git a/docs/usage.md b/docs/usage.md index 49c4df23..16e6e99a 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -6,58 +6,111 @@ ## Introduction - +**nf-core/bactmap** is a bioinformatics best-practice analysis pipeline for mapping short (Illumina) and long reads (Oxford Nanopore) from bacterial WGS to a reference sequence, creating filtered VCF files and making pseudogenomes based on high quality positions in the VCF files. -## Samplesheet input +In addition to this page, you can find additional usage information on the following pages: -You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. +- [Tutorials](usage/tutorials.md) +- [FAQ and Troubleshooting](usage/faq-troubleshooting.md) -```bash +## General Usage + +To run nf-core/bactmap, at a minimum two you require two inputs: + +- a sequencing read samplesheet +- a reference genome + +The samplesheet contains metadata and paths to the data of your input samples. + +nf-core/bactmap includes optional pre-processing (adapter clipping, merge running etc.) or post-processing (visualisation) steps. These are opt in with a `--perform_` flag. In some cases, the pre- and post-processing steps may also require additional files. Please check the parameters tab of this documentation for more information. + +Please see the rest of this page for information about how to prepare input samplesheets and databases and how to run Nextflow pipelines. See the [parameters](https://nf-co.re/bactmap/parameters) documentation for more information about specific options the pipeline also offers. + +## Samplesheet inputs + +nf-core/bactmap can accept as input raw or preprocessed single- or paired-end short-read (e.g. Illumina) FASTQ files and long-read FASTQ files (e.g. Oxford Nanopore). + +You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 5 columns, and a header row as shown in the examples below. + +This samplesheet is then specified on the command line as follows: + +```console --input '[path to samplesheet file]' ``` ### Multiple runs of the same sample -The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate the raw reads before performing any downstream analysis. Below is an example for the same sample sequenced across 3 lanes: +The `sample` identifiers have to be the same when you have re-sequenced the same sample more than once e.g. to increase sequencing depth. The pipeline will concatenate different runs FASTQ files of the same sample before performing mapping and variant calling, when `--perform_runmerging` is supplied. Below is an example for the same sample sequenced across 3 lanes: ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +sample,run_accession,instrument_platform,fastq_1,fastq_2 +2612,lane1,ILLUMINA,2612_lane1_R1.fq.gz,ILLUMINA,2612_lane1_R2.fq.gz +2612,lane2,ILLUMINA,2612_lane2_R1.fq.gz,ILLUMINA,2612_lane2_R2.fq.gz +2612,lane3,ILLUMINA,2612_lane3_R1.fq.gz, ``` +::: info +Please note that the column name `run_accession` follows the definition of an ENA 'run'. +A 'run' corresponds to a single or paired-end set of demultiplexed FASTQs. +Given that demultiplexing of a given library happens per lane, each sequencing pair from each lane is a 'run'. +Therefore, for each sample, you may get multiple 'runs' consisting of _both_ lanes (of the same library) _and_ sequencing libraries. +Therefore ensure that each `run_accession` ID is unique, even if from the same sample! +::: + +:::warning +Runs of the same sample sequenced on Illumina platforms with a combination of single and paired-end data will **not** be run-wise concatenated, unless pair-merging is specified. In the example above, `run3` will be profiled independently of `run1` and `run2` if pairs are not merged. +::: + ### Full samplesheet -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 5 columns to match those defined in the table below. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 5 samples, where `2612` has been sequenced twice. ```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +sample,run_accession,instrument_platform,fastq_1,fastq_2 +2612,ERR5766176,ILLUMINA,///fastq/ERX5474932_ERR5766176_1.fastq.gz,///fastq/ERX5474932_ERR5766176_2.fastq.gz +2612,ERR5766180,ILLUMINA,///fastq/ERX5474936_ERR5766180_1.fastq.gz, +2613,ERR5766181,ILLUMINA,///fastq/ERX5474937_ERR5766181_1.fastq.gz,///fastq/ERX5474937_ERR5766181_2.fastq.gz +ERR3201952,ERR3201952,OXFORD_NANOPORE,///fastq/ERR3201952.fastq.gz, ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +:::warning +Input FASTQ _must_ be gzipped. +::: + +:::warning +While one can include both short-read and long-read data in one run, we recommend that you split these across _two_ pipeline runs. This will make MultiQC run-reports more readable (due to run statistics having vary large number differences). +::: + +| Column | Description | +| --------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Unique sample name [required]. | +| `run_accession` | Run ID or name unique for each (pairs of) file(s) .Can also supply sample name again here, if only a single run was generated [required]. | +| `instrument_platform` | Sequencing platform reads generated on, selected from the EBI ENA [controlled vocabulary](https://www.ebi.ac.uk/ena/portal/api/controlledVocab?field=instrument_platform) [required]. | +| `fastq_1` | Path or URL to sequencing reads or for Illumina R1 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if data in FASTA is specified. | +| `fastq_2` | Path or URL to Illumina R2 sequencing reads in FASTQ format. GZipped compressed files accepted. Can be left empty if single end data. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +## Reference genome + +The reference genome must be provided in FASTA format. The pipeline will automatically index the reference genome using `bwa index` or `bowtie2 build` and `samtools faidx` if not already indexed. +The reference genome can be provided as a local file or a URL. The pipeline will automatically download the reference genome if a URL is provided. +The reference genome can be specified on the command line as follows: + +```console +--fasta '[path to reference genome]' +``` + +The reference genome can be a single FASTA file or a multi-FASTA file. The reference genome can also be a gzipped FASTA file. The pipeline will automatically unzip the file if it is gzipped. + ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/bactmap --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/bactmap --input ./samplesheet.csv --outdir ./results --fasta reference.fasta -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -89,12 +142,67 @@ with: ```yaml title="params.yaml" input: './samplesheet.csv' outdir: './results/' -genome: 'GRCh37' +reference: 'reference.fasta' <...> ``` You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch). +### Sequencing quality control + +[`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. nf-core bactmap offers [`falco`](https://github.com/smithlabcode/falco) as an drop-in replacement, with supposedly better improvement particularly for long reads. + +### Preprocessing Steps + +nf-core/bactmap offers two main preprocessing steps for preprocessing raw sequencing reads: + +- [**Read processing**](#read-processing): adapter clipping and pair-merging. +- [**Run merging**](#run-merging): concatenation of multiple FASTQ chunks/sequencing runs/libraries of a sample. + +:::info +You can save the 'final' reads used for classification/profiling from any combination of these steps with `--save_analysis_ready_fastqs`. +::: + +#### Read Processing + +Raw sequencing read processing in the form of adapter clipping and paired-end read merging can be activated via the `--perform_shortread_qc` or `--perform_longread_qc` flags. + +It is highly recommended to run this on raw reads to remove artifacts from sequencing that can cause false positive identification of taxa (e.g. contaminated reference genomes) and/or skews in taxonomic abundance profiles. If you have public data, normally these should have been corrected for, however you should still check that these steps have indeed been already performed. + +There are currently two options for short-read preprocessing: [`fastp`](https://github.com/OpenGene/fastp) or [`adapterremoval`](https://github.com/MikkelSchubert/adapterremoval). + +For adapter clipping, you can either rely on the tool's default adapter sequences, or supply your own adapters (`--shortread_qc_adapter1` and `--shortread_qc_adapter2`) +By default, paired-end merging is not activated. If paired-end merging is activated you can also specify whether to include unmerged reads in the reads sent for mapping/variant calling (`--shortread_qc_mergepairs` and `--shortread_qc_includeunmerged`). +You can also turn off clipping and only perform paired-end merging, if requested. This can be useful when processing data downloaded from the ENA, SRA, or DDBJ (`--shortread_qc_skipadaptertrim`). +Both tools support length filtering of reads and can be tuned with `--shortread_qc_minlength`. + +There are currently two options for long-read Oxford Nanopore processing: [`porechop`](https://github.com/rrwick/Porechop), [`porechop_abi`](https://github.com/bonsai-team/Porechop_ABI). + +For both short-read and long-read preprocessing, you can optionally save the resulting processed reads with `--save_preprocessed_reads`. + +#### Run Merging + +For samples that may have been sequenced over multiple runs, or for FASTQ files split into multiple chunks, you can activate the ability to merge across all runs or chunks with `--perform_runmerging`. + +For more information how to set up your input samplesheet, see [Multiple runs of the same sample](#multiple-runs-of-the-same-sample). + +Activating this functionality will concatenate the FASTQ files with the same sample name _after_ the optional preprocessing steps and _before_ mapping/variant calling. Note that libraries with runs of different pairing types will **not** be merged and this will be indicated on output files with a `_se` or `_pe` suffix to the sample name accordingly. + +You can optionally save the FASTQ output of the run merging with the `--save_runmerged_reads`. + +### Subsampling reads + +Some sequencing runs may be too large to process in a reasonable time. In these cases, you can use the `--perform_subsampling` flag to randomly subsample your reads to a specified depth of coverage per sample. This is done before the mapping steps. By default, this step is activated for all samples. + +### Read mapping + +The nf-core/bactmap pipeline provides two strategies to map reads to a reference genome: + +- **Short-read mapping**: `bwa-mem2` or `bowtie2` +- **Long-read mapping**: `minimap2` + +By default, the pipeline will use `bowtie2` for short-read mapping. You can change this with the `shortread_mapping_tool` parameter. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: diff --git a/docs/usage/faq-troubleshooting.md b/docs/usage/faq-troubleshooting.md new file mode 100644 index 00000000..7e492537 --- /dev/null +++ b/docs/usage/faq-troubleshooting.md @@ -0,0 +1,5 @@ +# Troubleshooting and FAQs + +This document provides troubleshooting tips and answers to frequently asked questions related to the nf-core/bactmap pipeline. + + diff --git a/docs/usage/tutorials.md b/docs/usage/tutorials.md new file mode 100644 index 00000000..aae076fd --- /dev/null +++ b/docs/usage/tutorials.md @@ -0,0 +1,150 @@ +# nf-core/bactmap: Tutorials + +This page provides a range of tutorials to help give you a bit more guidance on how to set up nf-core/bactmap runs yourself. + +## Simple Tutorial + +In this tutorial we will run you through a simple set up of a small nf-core/bactmap run. +It assumes that you have basic knowledge of read mapping input and output files. + +### Preparation + +#### Hardware + +The datasets used should be small enough to run on your own laptop or a single server node. + +If you wish to use a HPC cluster or cloud, and don't wish to use an 'interactive' session submitted to your scheduler, please see the [nf-core documentation](https://nf-co.re/docs/usage/configuration#introduction) on how to make a relevant config file. + +You will need internet access and at least XXX GB of hardrive space. + +#### Software + +The tutorial assumes you are on a Unix based operating system, and have already installed Nextflow as well a software environment system such as [Conda](https://docs.conda.io/en/latest/miniconda.html), [Docker](https://www.docker.com/), or [Singularity/Apptainer](https://apptainer.org/). +The tutorial will use Docker, however you can simply replace references to `docker` with `conda`, `singularity`, or `apptainer` accordingly. + +#### Data + +First we will make a directory to run the whole tutorial in. + +```bash +mkdir bactmap-tutorial +cd bactmap-tutorial/ +``` + +We will use very small short-read FASTQ files used for testing. You can download these files, along with the reference genome, with the following commands: + +```bash +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_2.fastq.gz +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_1.fastq.gz +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test2_2.fastq.gz +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/nanopore/fastq/test.fastq.gz +curl -O https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/prokaryotes/bacteroides_fragilis/reference/genome.fna.gz +``` + +### Preparing Input + +#### Sample sheet + +You provide the sequencing data FASTQ files to nf-core/bactmap via a input 'sample sheet' `.csv` file. +This is a 5 column table, that includes sample and library names, instrument platform, and paths to the sequencing data. + +Open a text editor, and create a file called `samplesheet.csv`. +Copy and paste the following lines into the file and save it. + +```csv title="samplesheet.csv" +sample,run_accession,instrument_platform,fastq_1,fastq_2 +01,test_1,ILLUMINA,test1_1.fastq.gz,test1_2.fastq.gz +02,test_2,ILLUMINA,test2_1.fastq.gz,test2_2.fastq.gz +03,test_3,OXFORD_NANOPORE,test.fastq.gz, +``` + +Here we have specified three samples, two were sequenced on the Illumina platform, one on the Oxford Nanopore, and the paths to the FASTQ files. +If you had placed your FASTQ files elsewhere, you would give the full path (i.e., with relevant directories) to the `fastq_1` and `fastq_2` columns. + +### Running the pipeline + +Now that we have the sequencing reads (in FASTQ format) and a reference genome (FASTA, optionally gzipped), we can now run them with the pipeline. The following command will perform short read quality control, short- and long-read mapping, variant calling, create pseudogenomes and pseudogenome alignment, and finally read and mapping statistics. + +```bash +nextflow run nf-core/bactmap -r 2.0.0 -profile docker \ +--input samplesheet.csv --fasta genome.fna.gz --outdir ./results \ +--perform_subsampling false +``` + +:::info +With all Docker containers pre-downloaded, this run took X minutes and X seconds on a laptop running XXX with XX GB RAM and XX CPUs. +If you are running nf-core/bactmap for the first time, expect this command to take longer as Nextflow will have to download each software container for each step of the pipeline. +::: + +To break down each line of the command: + +- Tell Nextflow to run nf-core/bactmap with the particular version and using the Docker container system +- Specify the input and outputs, i.e., paths to the `samplesheet.csv`, `genome.fna.gz`, and directory where to save the results +- Turn off subsampling, which is a step that randomly selects a subset of reads from the FASTQ files. This is useful for testing the pipeline on small datasets, but not needed for real data. +- (Optional) provide a _cap_ to the maximum amount of resources each step/job of the pipeline can use + +:::warning +The pipeline runs occasionally fail due to a particular step of the pipeline requesting more resources than you have on your system. To avoid these failures, you can tell Nextflow to set a cap pipeline-step resource requests against a list called `resourceLimits` specified in Nextflow config file. These should represent the maximum possible resources of a machine or node. To learn how to increase computational resource to the pipeline, see the central [nf-core documentation](https://nf-co.re/docs/usage/configuration). +::: + +### Output + +In the resulting directory `results/` you will find a range of directories. + + + +```tree +results/ +├── alignpseudogenomes +├── bcftools +├── bowtie2 +├── fastp +├── fastqc +├── minimap2 +├── multiqc +├── nanoq +├── pipeline_info +├── porechop +├── pseudogenomes +├── rasusa +├── read_stats +├── samtools +├── snp-sites +``` + + + +To follow the same order as the command construction above + +- Pipeline run report is found in `multiqc/` and resource statistics in `pipeline_info` +- Short-read QC results are found in `fastqc/` and `fastp/` +- Short-read mapping results are found in `bowtie2/` +- Long-read QC results are found in `fastqc/`, `porechop/` and `nanoq/` +- Read merging results are found in `run_merging/` +- Long-read mapping results are found in `minimap2/` +- Variant calling results are found in `samtools/` +- Variant calling statistics are found in `samtools/` and `bcftools/` +- Consensus pseudogenome results are found in `pseudogenomes/` +- Pseudogenome alignment results are found in `alignpseudogenomes/` +- Variant sites are found in `snp-sites/` + +:::info +For read-preprocessing steps, only log files are stored in the `results/` directories by default. Refer to the parameters tab of the [nf-core/bactmap documentation](https://nf-co.re/bactmap/) for more options. +::: + +The general 'workflow' of going through the results will typically be reviewing the `multiqc/multiqc_report.html` file to get general statistics of the entire run, particularly of the preprocessing. + +Detailed descriptions of all results files can be found in the output tab of the [nf-core/bactmap documentation](https://nf-co.re/bactmap/). + +### Clean up + +Once you have completed the tutorial, you can run the following command to delete all downloaded and output files. + +```bash +rm -r bactmap-tutorial/ +``` + +:::warning +Don't forget to change out of the directory above before trying to delete it! +::: diff --git a/main.nf b/main.nf index 3a3b39a4..ebbd9ceb 100644 --- a/main.nf +++ b/main.nf @@ -15,7 +15,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { BACTMAP } from './workflows/bactmap' +include { BACTMAP } from './workflows/bactmap' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_bactmap_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_bactmap_pipeline' include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_bactmap_pipeline' @@ -26,7 +26,6 @@ include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_bact ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file // This is an example of how to use getGenomeAttribute() to fetch parameters // from igenomes.config using `--genome` params.fasta = getGenomeAttribute('fasta') @@ -47,11 +46,19 @@ workflow NFCORE_BACTMAP { main: + if(params.fasta){ + ch_fasta = Channel.fromPath(params.fasta, checkIfExists: true).collect() + .map{ it -> [[id:it[0].getSimpleName()], it[0]]} + } else { + exit 1, 'Either a valid configured `genome` or a `fasta` file must be specified.' + } + // // WORKFLOW: Run pipeline // BACTMAP ( - samplesheet + samplesheet, + ch_fasta ) emit: multiqc_report = BACTMAP.out.multiqc_report // channel: /path/to/multiqc_report.html @@ -74,7 +81,10 @@ workflow { params.monochrome_logs, args, params.outdir, - params.input + params.input, + params.help, + params.help_full, + params.show_hidden ) // diff --git a/modules.json b/modules.json index d910b83f..61b95dcb 100644 --- a/modules.json +++ b/modules.json @@ -5,33 +5,208 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "adapterremoval": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "bcftools/filter": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, + "bcftools/index": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["bam_variant_calling_sort_freebayes_bcftools"] + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, + "bcftools/query": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["bam_variant_calling_sort_freebayes_bcftools"] + }, + "bcftools/stats": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, + "bcftools/view": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, + "bedtools/subtract": { + "branch": "master", + "git_sha": "88d43dad73a675e66bff49ebb57fe657a5909018", + "installed_by": ["modules"] + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "installed_by": ["modules"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "bwamem2/index": { + "branch": "master", + "git_sha": "d86336f3e7ae0d5f76c67b0859409769cfeb2af2", + "installed_by": ["modules"] + }, + "bwamem2/mem": { + "branch": "master", + "git_sha": "d86336f3e7ae0d5f76c67b0859409769cfeb2af2", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "cf735af4433f2dc8e410f67012dff824ef9990eb", + "installed_by": ["modules"] + }, + "falco": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "b8f1de0ac853ae5b56c63450d47438f899c553d0", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", - "git_sha": "08108058ea36a63f141c25c4e75f9f872a5b2296", + "git_sha": "5bdb098216aaf5df9c3b6343e6204cd932503c16", + "installed_by": ["modules"] + }, + "fastqscan": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "filtlong": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "freebayes": { + "branch": "master", + "git_sha": "d04951ee68e3e8b875ebf5ddb7ba6e05233624c1", + "installed_by": ["bam_variant_calling_sort_freebayes_bcftools"] + }, + "gunzip": { + "branch": "master", + "git_sha": "96c57dfd98a0641886a67bd449fe33ee2ec0e374", + "installed_by": ["modules"] + }, + "minimap2/align": { + "branch": "master", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "installed_by": ["modules"] + }, + "minimap2/index": { + "branch": "master", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", "installed_by": ["modules"] }, "multiqc": { "branch": "master", - "git_sha": "f0719ae309075ae4a291533883847c3f7c441dad", + "git_sha": "80cba9452fb1e9bb79884976fa1ca0e671949aa2", + "installed_by": ["modules"] + }, + "nanoq": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", + "installed_by": ["modules"] + }, + "porechop/abi": { + "branch": "master", + "git_sha": "10d3ce839d4c87bbff4da434e155a3c8a112ea05", + "installed_by": ["modules"] + }, + "porechop/porechop": { + "branch": "master", + "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "installed_by": ["modules"] + }, + "samtools/faidx": { + "branch": "master", + "git_sha": "9a48bce39a67e2cb34b8f125fc1d50f0ad98b616", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "e334e12a1e985adc5ffc3fc78a68be1de711de45", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "c8be52dba1166c678e74cda9c3a3c221635c8bb1", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "5cb9a8694da0a0e550921636bb60bc8c56445fd7", + "installed_by": ["bam_sort_stats_samtools", "modules"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", + "installed_by": ["bam_stats_samtools", "modules"] + }, + "seqtk/comp": { + "branch": "master", + "git_sha": "a46713779030a5f508117080cbf4b693dd4c6e33", + "installed_by": ["modules"] + }, + "snpsites": { + "branch": "master", + "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "eed5d14302a6c9070dcc25feeab707a27a4b3254", + "installed_by": ["subworkflows"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "eed5d14302a6c9070dcc25feeab707a27a4b3254", + "installed_by": ["bam_sort_stats_samtools", "subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", - "git_sha": "c2b22d85f30a706a3073387f30380704fcae013b", + "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", - "git_sha": "51ae5406a030d4da1e49e4dab49756844fdd6c7a", + "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", - "git_sha": "2fd2cd6d0e7b273747f32e465fdc6bcc3ae0814e", + "git_sha": "fdc08b8b1ae74f56686ce21f7ea11ad11990ce57", "installed_by": ["subworkflows"] } } diff --git a/modules/local/alignpseudogenomes/environment.yml b/modules/local/alignpseudogenomes/environment.yml new file mode 100644 index 00000000..3bcd4f91 --- /dev/null +++ b/modules/local/alignpseudogenomes/environment.yml @@ -0,0 +1,4 @@ +channels: + - conda-forge +dependencies: + - conda-forge::biopython=1.78 diff --git a/modules/local/alignpseudogenomes/main.nf b/modules/local/alignpseudogenomes/main.nf new file mode 100644 index 00000000..31747417 --- /dev/null +++ b/modules/local/alignpseudogenomes/main.nf @@ -0,0 +1,42 @@ +process ALIGNPSEUDOGENOMES { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/biopython:1.78' : + 'quay.io/biocontainers/biopython:1.78' }" + + input: + path pseudogenomes + tuple val(ref_meta), path(fasta) + + output: + tuple env(NUM_ALIGNMENT_GENOMES), path("aligned_pseudogenomes.fas"), emit: aligned_pseudogenomes + path "low_quality_pseudogenomes.tsv", emit: low_quality_metrics + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline, in nf-core/bactmap/bin/ + def aligner_version = '1.0' + """ + touch low_quality_pseudogenomes.tsv + touch aligned_pseudogenomes.fas + for pseudogenome in ${pseudogenomes} + do + fraction_non_GATC_bases=\$(calculate_fraction_of_non_GATC_bases.py -f \$pseudogenome | tr -d '\\n') + if awk 'BEGIN { exit !(\$fraction_non_GATC_bases < ${params.non_GATC_threshold}) }'; then + cat \$pseudogenome >> aligned_pseudogenomes.fas + else + echo "\$pseudogenome\t\$fraction_non_GATC_bases" >> low_quality_pseudogenomes.tsv + fi + done + multi2single_sequence.py -r ${fasta} -o final_reference.fas + cat final_reference.fas >> aligned_pseudogenomes.fas + + NUM_ALIGNMENT_GENOMES=\$(grep -c ">" aligned_pseudogenomes.fas) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multi2single_sequence.py: ${aligner_version} + END_VERSIONS + """ +} diff --git a/modules/local/bcftools/consensus/environment.yml b/modules/local/bcftools/consensus/environment.yml new file mode 100644 index 00000000..b276efd9 --- /dev/null +++ b/modules/local/bcftools/consensus/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bcftools=1.20 diff --git a/modules/local/bcftools/consensus/main.nf b/modules/local/bcftools/consensus/main.nf new file mode 100644 index 00000000..194eccf2 --- /dev/null +++ b/modules/local/bcftools/consensus/main.nf @@ -0,0 +1,57 @@ +process BCFTOOLS_CONSENSUS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0': + 'biocontainers/bcftools:1.20--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(mask) + tuple val(ref_meta), path(fasta) + + output: + tuple val(meta), path('*.fa'), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def masking = mask ? "-m $mask" : "" + """ + bcftools view -v snps $vcf -Oz -o ${prefix}.snps.vcf.gz + + bcftools index ${prefix}.snps.vcf.gz + + cat $fasta \\ + | bcftools \\ + consensus \\ + ${prefix}.snps.vcf.gz \\ + $args \\ + $masking \\ + --mask-with "N" \\ + > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def masking = mask ? "-m $mask" : "" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/bcftools/consensus/meta.yml b/modules/local/bcftools/consensus/meta.yml new file mode 100644 index 00000000..e9fa7815 --- /dev/null +++ b/modules/local/bcftools/consensus/meta.yml @@ -0,0 +1,59 @@ +name: bcftools_consensus +description: Compresses VCF files +keywords: + - variant calling + - consensus + - VCF +tools: + - consensus: + description: | + Create consensus sequence by applying VCF variants to a reference fasta file. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf}" + - tbi: + type: file + description: tabix index file + pattern: "*.{tbi}" + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - mask: + type: file + description: BED file used for masking + pattern: "*.{bed}" +output: + - fasta: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fa": + type: file + description: FASTA reference consensus file + pattern: "*.{fasta,fa}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/local/bcftools/consensus/tests/main.nf.test b/modules/local/bcftools/consensus/tests/main.nf.test new file mode 100644 index 00000000..aa208459 --- /dev/null +++ b/modules/local/bcftools/consensus/tests/main.nf.test @@ -0,0 +1,90 @@ +nextflow_process { + + name "Test Process BCFTOOLS_CONSENSUS" + script "../main.nf" + process "BCFTOOLS_CONSENSUS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/consensus" + + test("bcftools - test") { + + tag "bcftools_consensus" + + when { + process{ + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bcftools - test - no mask") { + + tag "bcftools_consensus" + + when { + process{ + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bcftools - stub - test") { + + tag "stub" + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/local/bcftools/consensus/tests/main.nf.test.snap b/modules/local/bcftools/consensus/tests/main.nf.test.snap new file mode 100644 index 00000000..1a23bd6d --- /dev/null +++ b/modules/local/bcftools/consensus/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "bcftools - stub - test": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2024-12-20T15:38:55.13767915" + }, + "bcftools - test": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,313f4a657187fdaf293c4ed69d98c112" + ] + ], + "1": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,313f4a657187fdaf293c4ed69d98c112" + ] + ], + "versions": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-20T13:56:00.789333465" + }, + "bcftools - test - no mask": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fa:md5,e57d3e4d113f989bc069c5fd61627091" + ] + ], + "1": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ], + "fasta": [ + [ + { + "id": "test" + }, + "test.fa:md5,e57d3e4d113f989bc069c5fd61627091" + ] + ], + "versions": [ + "versions.yml:md5,b5e1ca5013c29fa37855fe929fb21a65" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-08-20T13:56:07.531646148" + } +} \ No newline at end of file diff --git a/modules/local/bedtools/genomecov/environment.yml b/modules/local/bedtools/genomecov/environment.yml new file mode 100644 index 00000000..45c307b0 --- /dev/null +++ b/modules/local/bedtools/genomecov/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/local/bedtools/genomecov/main.nf b/modules/local/bedtools/genomecov/main.nf new file mode 100644 index 00000000..4bafd4da --- /dev/null +++ b/modules/local/bedtools/genomecov/main.nf @@ -0,0 +1,60 @@ +process BEDTOOLS_GENOMECOV { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/63/6397750e9730a3fbcc5b4c43f14bd141c64c723fd7dad80e47921a68a7c3cd21/data': + 'community.wave.seqera.io/library/bedtools_coreutils:a623c13f66d5262b' }" + + // simpler input, having removed the sorting option + input: + tuple val(meta), path(intervals), val(scale) + + output: + tuple val(meta), path("*.bed"), emit: genomecov + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args_list = args.tokenize() + args += (scale > 0 && scale != 1) ? " -scale $scale" : "" + if (!args_list.contains('-bg') && (scale > 0 && scale != 1)) { + args += " -bg" + } + // Sorts output file by chromosome and position using additional options for performance and consistency + // See https://www.biostars.org/p/66927/ for further details + def buffer = task.memory ? "--buffer-size=${task.memory.toGiga().intdiv(2)}G" : '' + + def prefix = task.ext.prefix ?: "${meta.id}" + def cmd = '$4 <' + // hard-coded for bcftools_consensus subworkflow. mainly the problem was the awk pipe at the end, we decided to go with a local version of genomecov. + // this is a simpler version than the nf-core version, it expects a bam as interval file, and it does not expect sorting either. we did add a threshold parameter in the nextflow.config in case the user wants to specify the threshold for low coverage + """ + bedtools \\ + genomecov \\ + -ibam $intervals \\ + $args \\ + | awk '${cmd}'$params.genomecov_threshold \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/clair3/environment.yml b/modules/local/clair3/environment.yml new file mode 100644 index 00000000..cc814bb3 --- /dev/null +++ b/modules/local/clair3/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::clair3=1.2.0" diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf new file mode 100644 index 00000000..2ea01f49 --- /dev/null +++ b/modules/local/clair3/main.nf @@ -0,0 +1,72 @@ +process CLAIR3 { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e7/e70b0f4389028f4dc88efde1aac7139927c898cf7add680e14724d97fecd3d32/data' + : 'community.wave.seqera.io/library/clair3:1.2.0--b1b03d4e9d1b6a2e'}" + + input: + tuple val(meta), path(bam), path(bai), path(model), val(platform) + tuple val(meta2), path(reference) + tuple val(meta3), path(index) + + output: + tuple val(meta), path("*merge_output.vcf.gz"), emit: vcf + tuple val(meta), path("*merge_output.vcf.gz.tbi"), emit: tbi + tuple val(meta), path("*phased_merge_output.vcf.gz"), emit: phased_vcf, optional: true + tuple val(meta), path("*phased_merge_output.vcf.gz.tbi"), emit: phased_tbi, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + run_clair3.sh \\ + --bam_fn=$bam \\ + --ref_fn=$reference \\ + --threads=$task.cpus \\ + --output=. \\ + --platform=$platform \\ + --model_path=$model \\ + $args + + # Rename to add prefix + for file in merge_output.vcf.gz \ + merge_output.vcf.gz.tbi \ + phased_merge_output.vcf.gz \ + phased_merge_output.vcf.gz.tbi \ + merge_output.gvcf.gz \ + merge_output.gvcf.gz.tbi; do + if [ -e "\$file" ]; then + mv "\$file" "${prefix}\$file" + fi + done + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}phased_merge_output.vcf.gz + touch ${prefix}phased_merge_output.vcf.gz.tbi + echo "" | gzip > ${prefix}merge_output.vcf.gz + touch ${prefix}merge_output.vcf.gz.tbi + echo "" | gzip > ${prefix}merge_output.gvcf.gz + touch ${prefix}merge_output.gvcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + clair3: \$(run_clair3.sh --version |& sed '1!d ; s/Clair3 v//') + END_VERSIONS + """ +} diff --git a/modules/local/clair3/meta.yml b/modules/local/clair3/meta.yml new file mode 100644 index 00000000..249916c5 --- /dev/null +++ b/modules/local/clair3/meta.yml @@ -0,0 +1,119 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "clair3" +description: Clair3 is a germline small variant caller for long-reads +keywords: + - germline + - variant + - Indel + - SNV +tools: + - "clair3": + description: "Clair3 is a small variant caller for long-reads. Compare to PEPPER + (r0.4), Clair3 (v0.1) shows a better SNP F1-score with ≤30-fold of ONT data + (precisionFDA Truth Challenge V2), and a better Indel F1-score, while runs generally + four times faster. Clair3 makes the best of both worlds of using pileup or full-alignment + as input for deep-learning based long-read small variant calling. Clair3 is + simple and modular for easy deployment and integration." + homepage: "https://github.com/HKU-BAL/Clair3" + documentation: "https://github.com/HKU-BAL/Clair3" + tool_dev_url: "https://github.com/HKU-BAL/Clair3" + doi: "10.1038/s43588-022-00387-x" + licence: ["BSD-3-clause"] + identifier: biotools:clair3 + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: + - edam: "http://edamontology.org/format_25722" + - edam: "http://edamontology.org/format_2573" + - edam: "http://edamontology.org/format_3462" + - bai: + type: file + description: BAM index file + pattern: "*.bai" + - model: + type: directory + description: collection of files used in a trained Clair3 model + - platform: + type: string + description: val in ['hifi','ont', 'ilmn'] to indicate pacbio, ONT, or illumina + respectively + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - reference: + type: file + description: reference fasta file + pattern: "*.fasta" + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - index: + type: file + description: reference index file + pattern: "*.fai" +output: + - vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" + - "*merge_output.vcf.gz": + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + pattern: "*.{vcf,vcf.gz}" + - tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*merge_output.vcf.gz.tbi": + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" + - phased_vcf: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*phased_merge_output.vcf.gz": + type: file + description: phased vcf + pattern: "*.{vcf,vcf.gz}" + - phased_tbi: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - "*phased_merge_output.vcf.gz.tbi": + type: file + description: index for vcf files + pattern: "*.{vcf.tbi,vcf.tbi.gz}" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@robert-a-forsyth" +maintainers: + - "@robert-a-forsyth" diff --git a/modules/local/clair3/tests/main.nf.test b/modules/local/clair3/tests/main.nf.test new file mode 100644 index 00000000..db5ba26d --- /dev/null +++ b/modules/local/clair3/tests/main.nf.test @@ -0,0 +1,104 @@ +nextflow_process { + + name "Test Process CLAIR3" + script "../main.nf" + process "CLAIR3" + + tag "modules" + tag "modules_nfcore" + tag "clair3" + tag "untar" + + setup { + run("UNTAR") { + script "../../../../modules/nf-core/untar/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'generic/models/clair3.hifi.tar.gz', checkIfExists: true) + ] + """ + } + } + } + test("sarscov2 - bam") { + + when { + process { + """ + def model_path = UNTAR.out.untar + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ]) + .join(UNTAR.out.untar).combine(Channel.of(['hifi'])) + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[2] = [ + [ id: 'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.versions, + process.out.phased_vcf.collect { file(it[1]).getName() }, + process.out.phased_tbi.collect { file(it[1]).getName() }).match()} + ) + } + + } + + test("sarscov2 - bam - stub") { + + options "-stub" + + when { + process { + """ + def model_path = UNTAR.out.untar + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + ]) + .join(UNTAR.out.untar).combine(Channel.of(['hifi'])) + input[1] = [ + [ id:'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta', checkIfExists: true), + ] + input[2] = [ + [ id: 'test'], + file(params.modules_testdata_base_path + '/genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.tbi, + process.out.phased_vcf, + process.out.phased_tbi, + process.out.versions, + ).match()} + ) + } + + } + +} diff --git a/modules/local/clair3/tests/main.nf.test.snap b/modules/local/clair3/tests/main.nf.test.snap new file mode 100644 index 00000000..fe9e76f8 --- /dev/null +++ b/modules/local/clair3/tests/main.nf.test.snap @@ -0,0 +1,76 @@ +{ + "sarscov2 - bam - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "test.merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.phased_merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + [ + [ + { + "id": "test" + }, + [ + "test.merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "test.phased_merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + [ + [ + { + "id": "test" + }, + "test.phased_merge_output.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + [ + [ + { + "id": "test" + }, + "test.phased_merge_output.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + [ + "versions.yml:md5,10928c13418eced076964d86249aeaf8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-17T09:56:09.775735" + }, + "sarscov2 - bam": { + "content": [ + [ + "merge_output.vcf.gz" + ], + [ + "merge_output.vcf.gz.tbi" + ], + [ + "versions.yml:md5,10928c13418eced076964d86249aeaf8" + ], + [ + + ], + [ + + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-03-14T17:16:02.057639" + } +} \ No newline at end of file diff --git a/modules/local/concatenate_fasta/environment.yml b/modules/local/concatenate_fasta/environment.yml new file mode 100644 index 00000000..e1e52d8a --- /dev/null +++ b/modules/local/concatenate_fasta/environment.yml @@ -0,0 +1,6 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge +dependencies: + - conda-forge::biopython=1.78 diff --git a/modules/local/concatenate_fasta/main.nf b/modules/local/concatenate_fasta/main.nf new file mode 100644 index 00000000..d5226e68 --- /dev/null +++ b/modules/local/concatenate_fasta/main.nf @@ -0,0 +1,30 @@ +process CONCATENATE_FASTA { + tag "$meta.id" + label 'process_single' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/biopython:1.78' : + 'quay.io/biocontainers/biopython:1.78' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def aligner_version = '1.0' + """ + multi2single_sequence.py -r ${fasta} -o ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multi2single_sequence.py: ${aligner_version} + END_VERSIONS + """ +} diff --git a/modules/local/fasta_rename/environment.yml b/modules/local/fasta_rename/environment.yml new file mode 100644 index 00000000..ea9ca827 --- /dev/null +++ b/modules/local/fasta_rename/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::sed=4.9 diff --git a/modules/local/fasta_rename/main.nf b/modules/local/fasta_rename/main.nf new file mode 100644 index 00000000..f58527c4 --- /dev/null +++ b/modules/local/fasta_rename/main.nf @@ -0,0 +1,29 @@ +process RENAME_FASTA_HEADER { + tag "$meta.id" + label 'process_single' + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b3/b3e6bc623ea7bb7b6ebe5c225f8138e18548c78b7a1ca5abea06a48e635a2ef3/data' : + 'community.wave.seqera.io/library/sed:4.9--b22139a895c82f4b' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + sed "s/>/>${meta.id} /g" $fasta > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/fastq_scan_parse/environment.yml b/modules/local/fastq_scan_parse/environment.yml new file mode 100644 index 00000000..884792d3 --- /dev/null +++ b/modules/local/fastq_scan_parse/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pandas=2.3.1 diff --git a/modules/local/fastq_scan_parse/main.nf b/modules/local/fastq_scan_parse/main.nf new file mode 100644 index 00000000..8c10d078 --- /dev/null +++ b/modules/local/fastq_scan_parse/main.nf @@ -0,0 +1,27 @@ +process FASTQSCANPARSE { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c03d8fbb44376692e10bb9e56be2577fc35446c193637735de9fed182e6b58df/data' : + 'community.wave.seqera.io/library/pandas:2.3.1--139e2fa6c1f18206' }" + + input: + path json + + output: + path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + script: // This script is bundled with the pipeline in avantonder/bovisanalyzer/bin/ + def prefix = task.ext.prefix ?: '' + def parser_version = '1.0' + """ + fastqscan_parser.py + mv fastq-scan_summary.tsv ${prefix}_fastq-scan_summary.tsv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscan_parser.py: ${parser_version} + END_VERSIONS + """ +} diff --git a/modules/local/get_genome_size/environment.yml b/modules/local/get_genome_size/environment.yml new file mode 100644 index 00000000..f52109e8 --- /dev/null +++ b/modules/local/get_genome_size/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::gawk=5.3.0 diff --git a/modules/local/get_genome_size/main.nf b/modules/local/get_genome_size/main.nf new file mode 100644 index 00000000..607f1891 --- /dev/null +++ b/modules/local/get_genome_size/main.nf @@ -0,0 +1,29 @@ +process GET_GENOME_SIZE { + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/gawk:5.3.0' : + 'biocontainers/gawk:5.3.0' }" + + input: + tuple val(meta), path(size_file) + + output: + env 'genome_size', emit: ch_genome_size + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def cmd = '{sum+=$2} END {print sum}' + """ + genome_size=`cat ${size_file} | awk '${cmd}'` + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//') + END_VERSIONS + """ +} diff --git a/modules/local/rasusa/environment.yml b/modules/local/rasusa/environment.yml new file mode 100644 index 00000000..68cd6e40 --- /dev/null +++ b/modules/local/rasusa/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::rasusa=0.3.0 diff --git a/modules/local/rasusa/main.nf b/modules/local/rasusa/main.nf new file mode 100644 index 00000000..1a4ac498 --- /dev/null +++ b/modules/local/rasusa/main.nf @@ -0,0 +1,38 @@ +process RASUSA { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/rasusa:0.3.0--h779adbc_1' : + 'biocontainers/rasusa:0.3.0--h779adbc_1' }" + + input: + tuple val(meta), path(reads) + val genome_size + val depth_cutoff + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def output = meta.single_end ? "--output ${prefix}.fastq.gz" : "--output ${prefix}_1.fastq.gz ${prefix}_2.fastq.gz" + """ + rasusa \\ + $args \\ + --coverage $depth_cutoff \\ + --genome-size $genome_size \\ + --input $reads \\ + $output + cat <<-END_VERSIONS > versions.yml + "${task.process}": + rasusa: \$(rasusa --version 2>&1 | sed -e "s/rasusa //g") + END_VERSIONS + """ +} diff --git a/modules/local/rasusa/meta.yml b/modules/local/rasusa/meta.yml new file mode 100644 index 00000000..abda9a39 --- /dev/null +++ b/modules/local/rasusa/meta.yml @@ -0,0 +1,50 @@ +name: rasusa +description: Randomly subsample sequencing reads to a specified coverage +keywords: + - coverage + - depth + - subsampling +tools: + - rasusa: + description: Randomly subsample sequencing reads to a specified coverage + homepage: https://github.com/mbhall88/rasusa + documentation: https://github.com/mbhall88/rasusa/blob/master/README.md + tool_dev_url: https://github.com/mbhall88/rasusa + doi: "10.5281/zenodo.3731394" + licence: ["MIT"] + identifier: biotools:rasusa +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input paired-end FastQ files + - genome_size: + type: string + description: Genome size of the species + - - depth_cutoff: + type: integer + description: Depth of coverage cutoff +output: + - reads: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Reads with subsampled coverage + pattern: "*.fastq.gz" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@thanhleviet" +maintainers: + - "@thanhleviet" diff --git a/modules/local/rasusa/tests/main.nf.test b/modules/local/rasusa/tests/main.nf.test new file mode 100644 index 00000000..db59182b --- /dev/null +++ b/modules/local/rasusa/tests/main.nf.test @@ -0,0 +1,38 @@ +nextflow_process { + + name "Test Process RASUSA" + script "../main.nf" + process "RASUSA" + tag "rasusa" + tag "modules" + tag "modules_nfcore" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [ id:'testfile', single_end:false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + "1000000b" + ] + input[1] = 100 + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/local/rasusa/tests/main.nf.test.snap b/modules/local/rasusa/tests/main.nf.test.snap new file mode 100644 index 00000000..60401ac6 --- /dev/null +++ b/modules/local/rasusa/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "testfile", + "single_end": false + }, + [ + "testfile_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec", + "testfile_2.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + "1": [ + "versions.yml:md5,0c382a24d70b3003b1c80e44e9c02652" + ], + "reads": [ + [ + { + "id": "testfile", + "single_end": false + }, + [ + "testfile_1.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec", + "testfile_2.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + "versions": [ + "versions.yml:md5,0c382a24d70b3003b1c80e44e9c02652" + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-31T14:08:55.918357445" + } +} \ No newline at end of file diff --git a/modules/local/rasusa/tests/tags.yml b/modules/local/rasusa/tests/tags.yml new file mode 100644 index 00000000..8853e2d7 --- /dev/null +++ b/modules/local/rasusa/tests/tags.yml @@ -0,0 +1,2 @@ +rasusa: + - modules/nf-core/rasusa/** diff --git a/modules/local/read_stats/environment.yml b/modules/local/read_stats/environment.yml new file mode 100644 index 00000000..884792d3 --- /dev/null +++ b/modules/local/read_stats/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pandas=2.3.1 diff --git a/modules/local/read_stats/main.nf b/modules/local/read_stats/main.nf new file mode 100644 index 00000000..c1aa4334 --- /dev/null +++ b/modules/local/read_stats/main.nf @@ -0,0 +1,28 @@ +process READ_STATS { + label 'process_low' + tag "$meta.id" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c03d8fbb44376692e10bb9e56be2577fc35446c193637735de9fed182e6b58df/data' : + 'community.wave.seqera.io/library/pandas:2.3.1--139e2fa6c1f18206' }" + + input: + tuple val(meta), path(json), path(json) + + output: + tuple val(meta), path("*.read_stats.csv"), emit: csv + path "versions.yml", emit: versions + + script: // This script is bundled with the pipeline in avantonder/bovisanalyzer/bin/ + def prefix = task.ext.prefix ?: "${meta.id}" + def parser_version = '1.0' + """ + read_stats.py + mv read_stats.csv ${prefix}.read_stats.csv + cat <<-END_VERSIONS > versions.yml + "${task.process}": + read_stats.py: ${parser_version} + END_VERSIONS + """ +} diff --git a/modules/local/read_stats_parse/environment.yml b/modules/local/read_stats_parse/environment.yml new file mode 100644 index 00000000..884792d3 --- /dev/null +++ b/modules/local/read_stats_parse/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pandas=2.3.1 diff --git a/modules/local/read_stats_parse/main.nf b/modules/local/read_stats_parse/main.nf new file mode 100644 index 00000000..b5b54827 --- /dev/null +++ b/modules/local/read_stats_parse/main.nf @@ -0,0 +1,28 @@ +process READSTATS_PARSE { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c03d8fbb44376692e10bb9e56be2577fc35446c193637735de9fed182e6b58df/data' : + 'community.wave.seqera.io/library/pandas:2.3.1--139e2fa6c1f18206' }" + + input: + path csv + + output: + path "read_stats_summary.tsv", emit: tsv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline in avantonder/bovisanalyzer/bin/ + def parser_version = '1.0' + """ + read_stats_parser.py + cat <<-END_VERSIONS > versions.yml + "${task.process}": + read_stats_parser.py: ${parser_version} + END_VERSIONS + """ +} diff --git a/modules/local/seqtk_parse/environment.yml b/modules/local/seqtk_parse/environment.yml new file mode 100644 index 00000000..884792d3 --- /dev/null +++ b/modules/local/seqtk_parse/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::pandas=2.3.1 diff --git a/modules/local/seqtk_parse/main.nf b/modules/local/seqtk_parse/main.nf new file mode 100644 index 00000000..1b6f49e5 --- /dev/null +++ b/modules/local/seqtk_parse/main.nf @@ -0,0 +1,28 @@ +process SEQTK_PARSE { + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/c0/c03d8fbb44376692e10bb9e56be2577fc35446c193637735de9fed182e6b58df/data' : + 'community.wave.seqera.io/library/pandas:2.3.1--139e2fa6c1f18206' }" + + input: + path tsv + + output: + path "mapping_summary.tsv", emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline in avantonder/bactmap/bin/ + def parser_version = '1.0' + """ + seqtk_parser.py + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqtk_parser.py: ${parser_version} + END_VERSIONS + """ +} diff --git a/modules/nf-core/adapterremoval/environment.yml b/modules/nf-core/adapterremoval/environment.yml new file mode 100644 index 00000000..24e78cdb --- /dev/null +++ b/modules/nf-core/adapterremoval/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::adapterremoval=2.3.2 diff --git a/modules/nf-core/adapterremoval/main.nf b/modules/nf-core/adapterremoval/main.nf new file mode 100644 index 00000000..319497fb --- /dev/null +++ b/modules/nf-core/adapterremoval/main.nf @@ -0,0 +1,119 @@ +process ADAPTERREMOVAL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/adapterremoval:2.3.2--hb7ba0dd_0' : + 'biocontainers/adapterremoval:2.3.2--hb7ba0dd_0' }" + + input: + tuple val(meta), path(reads) + path(adapterlist) + + output: + tuple val(meta), path("${prefix}.truncated.fastq.gz") , emit: singles_truncated , optional: true + tuple val(meta), path("${prefix}.discarded.fastq.gz") , emit: discarded , optional: true + tuple val(meta), path("${prefix}.pair{1,2}.truncated.fastq.gz"), emit: paired_truncated , optional: true + tuple val(meta), path("${prefix}.collapsed.fastq.gz") , emit: collapsed , optional: true + tuple val(meta), path("${prefix}.collapsed.truncated.fastq.gz"), emit: collapsed_truncated, optional: true + tuple val(meta), path("${prefix}.paired.fastq.gz") , emit: paired_interleaved , optional: true + tuple val(meta), path('*.settings') , emit: settings + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def list = adapterlist ? "--adapter-list ${adapterlist}" : "" + + if (meta.single_end) { + """ + AdapterRemoval \\ + --file1 ${reads} \\ + ${args} \\ + ${list} \\ + --basename ${prefix} \\ + --threads ${task.cpus} \\ + --seed 42 \\ + --gzip + + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } else { + """ + AdapterRemoval \\ + --file1 ${reads[0]} \\ + --file2 ${reads[1]} \\ + ${args} \\ + ${list} \\ + --basename ${prefix} \\ + --threads ${task.cpus} \\ + --seed 42 \\ + --gzip + + ensure_fastq() { + if [ -f "\${1}" ]; then + mv "\${1}" "\${1::-3}.fastq.gz" + fi + + } + + ensure_fastq '${prefix}.truncated.gz' + ensure_fastq '${prefix}.discarded.gz' + ensure_fastq '${prefix}.pair1.truncated.gz' + ensure_fastq '${prefix}.pair2.truncated.gz' + ensure_fastq '${prefix}.collapsed.gz' + ensure_fastq '${prefix}.collapsed.truncated.gz' + ensure_fastq '${prefix}.paired.gz' + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ + } + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + collapse_cmd = args.contains('--collapse') + + """ + touch '${prefix}.settings' + echo | gzip > '${prefix}.truncated.fastq.gz' + echo | gzip > '${prefix}.discarded.fastq.gz' + + if [ "${meta.single_end}" = false ]; then + echo | gzip > '${prefix}.pair1.truncated.fastq.gz' + echo | gzip > '${prefix}.pair2.truncated.fastq.gz' + echo | gzip > '${prefix}.paired.fastq.gz' + + if [ "${collapse_cmd}" = true ]; then + echo | gzip > '${prefix}.collapsed.truncated.fastq.gz' + echo | gzip > '${prefix}.collapsed.fastq.gz' + fi + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + adapterremoval: \$(AdapterRemoval --version 2>&1 | sed -e "s/AdapterRemoval ver. //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/adapterremoval/meta.yml b/modules/nf-core/adapterremoval/meta.yml new file mode 100644 index 00000000..875f61ec --- /dev/null +++ b/modules/nf-core/adapterremoval/meta.yml @@ -0,0 +1,137 @@ +name: adapterremoval +description: Trim sequencing adapters and collapse overlapping reads +keywords: + - trimming + - adapters + - merging + - fastq +tools: + - adapterremoval: + description: The AdapterRemoval v2 tool for merging and clipping reads. + homepage: https://github.com/MikkelSchubert/adapterremoval + documentation: https://adapterremoval.readthedocs.io + licence: ["GPL v3"] + identifier: biotools:adapterremoval +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - adapterlist: + type: file + description: Optional text file containing list of adapters to look for for removal + with one adapter per line. Otherwise will look for default adapters (see AdapterRemoval + man page), or can be modified to remove user-specified adapters via ext.args. + ontologies: + - edam: "http://edamontology.org/format_2330" # Textual format +output: + singles_truncated: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.truncated.fastq.gz: + type: file + description: | + Adapter trimmed FastQ files of either single-end reads, or singleton + 'orphaned' reads from merging of paired-end data (i.e., one of the pair + was lost due to filtering thresholds). + pattern: "*.truncated.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + discarded: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.discarded.fastq.gz: + type: file + description: | + Adapter trimmed FastQ files of reads that did not pass filtering + thresholds. + pattern: "*.discarded.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + paired_truncated: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.pair{1,2}.truncated.fastq.gz: + type: file + description: | + Adapter trimmed R{1,2} FastQ files of paired-end reads that did not merge + with their respective R{1,2} pair due to long templates. The respective pair + is stored in 'pair{1,2}_truncated'. + pattern: "*.pair{1,2}.truncated.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + collapsed: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.collapsed.fastq.gz: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair but were not trimmed. + pattern: "*.collapsed.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + collapsed_truncated: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.collapsed.truncated.fastq.gz: + type: file + description: | + Collapsed FastQ of paired-end reads that successfully merged with their + respective R1 pair and were trimmed of adapter due to sufficient overlap. + pattern: "*.collapsed.truncated.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + paired_interleaved: + - - meta: + type: map + description: Groovy Map containing sample information + - ${prefix}.paired.fastq.gz: + type: file + description: | + Write paired-end reads to a single file, interleaving mate 1 and mate 2 reads + pattern: "*.paired.fastq.gz" + ontologies: + - edam: "http://edamontology.org/format_1930" # FASTQ + - edam: http://edamontology.org/format_3989 # GZIP format + settings: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.settings": + type: file + description: AdapterRemoval log file + pattern: "*.settings" + ontologies: + - edam: "http://edamontology.org/format_2330" # Textual format + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: "http://edamontology.org/format_3750" # YAML +authors: + - "@maxibor" + - "@jfy133" +maintainers: + - "@maxibor" + - "@jfy133" diff --git a/modules/nf-core/adapterremoval/tests/main.nf.test b/modules/nf-core/adapterremoval/tests/main.nf.test new file mode 100644 index 00000000..e2ef1090 --- /dev/null +++ b/modules/nf-core/adapterremoval/tests/main.nf.test @@ -0,0 +1,228 @@ +nextflow_process { + + name "Test Process ADAPTERREMOVAL" + script "../main.nf" + config "./nextflow.config" + process "ADAPTERREMOVAL" + + tag "modules" + tag "modules_nfcore" + tag "adapterremoval" + + test("single-end - sarscov2 - [fastq]") { + when { + params { + adapterremoval_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.singles_truncated, + process.out.settings, + process.out.versions + ).match() }, + ) + } + } + + test("paired-end - sarscov2 - [fastq]") { + + when { + params { + adapterremoval_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.paired_truncated, + process.out.settings, + process.out.versions + ).match() } + ) + } + } + + test("paired-end collapse - sarscov2 - [fastq]") { + when { + params { + adapterremoval_args = "--collapse" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.paired_truncated, + process.out.collapsed, + file(process.out.collapsed_truncated[0][1]).name, // Check for present but is empty + process.out.settings, + process.out.versions + ).match() } + ) + } + + } + + test("paired-end adapterlist - sarscov2 - [fastq]") { + when { + params { + adapterremoval_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + '/delete_me/adapterremoval/adapterremoval_adapterlist.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.paired_truncated, + process.out.settings, + process.out.versions + ).match() } + ) + } + } + + test("paired-end collapse adapterlist - sarscov2 - [fastq] -- stub") { + options "-stub" + when { + params { + adapterremoval_args = "--collapse" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = file(params.modules_testdata_base_path + '/delete_me/adapterremoval/adapterremoval_adapterlist.txt', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("single-end - sarscov2 - [fastq] -- stub") { + options "-stub" + when { + params { + adapterremoval_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("paired-end - sarscov2 - [fastq] -- stub") { + options "-stub" + when { + params { + adapterremoval_args = "" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/adapterremoval/tests/main.nf.test.snap b/modules/nf-core/adapterremoval/tests/main.nf.test.snap new file mode 100644 index 00000000..b3176a9a --- /dev/null +++ b/modules/nf-core/adapterremoval/tests/main.nf.test.snap @@ -0,0 +1,525 @@ +{ + "paired-end collapse adapterlist - sarscov2 - [fastq] -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.pair2.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.collapsed.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.collapsed.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ], + "collapsed": [ + [ + { + "id": "test", + "single_end": false + }, + "test.collapsed.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "collapsed_truncated": [ + [ + { + "id": "test", + "single_end": false + }, + "test.collapsed.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "discarded": [ + [ + { + "id": "test", + "single_end": false + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "paired_interleaved": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "paired_truncated": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.pair2.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "settings": [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "singles_truncated": [ + [ + { + "id": "test", + "single_end": false + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + }, + { + "ADAPTERREMOVAL": { + "adapterremoval": "2.3.2" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-01T17:50:28.27707234" + }, + "single-end - sarscov2 - [fastq] -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ], + "collapsed": [ + + ], + "collapsed_truncated": [ + + ], + "discarded": [ + [ + { + "id": "test", + "single_end": true + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "paired_interleaved": [ + + ], + "paired_truncated": [ + + ], + "settings": [ + [ + { + "id": "test", + "single_end": true + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "singles_truncated": [ + [ + { + "id": "test", + "single_end": true + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + }, + { + "ADAPTERREMOVAL": { + "adapterremoval": "2.3.2" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-06-02T09:16:31.193367689" + }, + "single-end - sarscov2 - [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.truncated.fastq.gz:md5,119d1b1a0a71ca6e080ff7c53ee0b690" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.settings:md5,2fd3d5d703b63ba33a83021fccf25f77" + ] + ], + [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2023-12-09T19:19:36.429445996" + }, + "paired-end - sarscov2 - [fastq] -- stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.pair2.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "3": [ + + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "7": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ], + "collapsed": [ + + ], + "collapsed_truncated": [ + + ], + "discarded": [ + [ + { + "id": "test", + "single_end": false + }, + "test.discarded.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "paired_interleaved": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "paired_truncated": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test.pair2.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "settings": [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "singles_truncated": [ + [ + { + "id": "test", + "single_end": false + }, + "test.truncated.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + }, + { + "ADAPTERREMOVAL": { + "adapterremoval": "2.3.2" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-06-02T09:16:37.301565253" + }, + "paired-end - sarscov2 - [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,e3da014fbb9b428e952c62e8f0fb6402", + "test.pair2.truncated.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,b8a451d3981b327f3fdb44f40ba2d6d1" + ] + ], + [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2023-12-09T19:19:42.88672676" + }, + "paired-end collapse - sarscov2 - [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,d6113ac35457dc942e4e47d6530e1d5e", + "test.pair2.truncated.fastq.gz:md5,304c48e7ad50d46acf73ae6de4014f64" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.collapsed.fastq.gz:md5,369452751050a7f1e31b839702d61417" + ] + ], + "test.collapsed.truncated.fastq.gz", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,7f0b2328152226e46101a535cce718b3" + ] + ], + [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2025-06-01T18:13:46.86825259" + }, + "paired-end adapterlist - sarscov2 - [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.pair1.truncated.fastq.gz:md5,e3da014fbb9b428e952c62e8f0fb6402", + "test.pair2.truncated.fastq.gz:md5,2ebae722295ea66d84075a3b042e2b42" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.settings:md5,36d47d9b40dbc178167d1ae0274d18f3" + ] + ], + [ + "versions.yml:md5,00bcc9f0b864b96eeee21bc11773ee67" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.2" + }, + "timestamp": "2023-12-09T19:19:57.26567964" + } +} diff --git a/modules/nf-core/adapterremoval/tests/nextflow.config b/modules/nf-core/adapterremoval/tests/nextflow.config new file mode 100644 index 00000000..f0c9b67f --- /dev/null +++ b/modules/nf-core/adapterremoval/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'ADAPTERREMOVAL' { + ext.args = params.adapterremoval_args + } +} diff --git a/modules/nf-core/adapterremoval/tests/tags.yml b/modules/nf-core/adapterremoval/tests/tags.yml new file mode 100644 index 00000000..d3375ec5 --- /dev/null +++ b/modules/nf-core/adapterremoval/tests/tags.yml @@ -0,0 +1,2 @@ +adapterremoval: + - "modules/nf-core/adapterremoval/**" diff --git a/modules/nf-core/bcftools/filter/environment.yml b/modules/nf-core/bcftools/filter/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/filter/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/filter/main.nf b/modules/nf-core/bcftools/filter/main.nf new file mode 100644 index 00000000..4b558818 --- /dev/null +++ b/modules/nf-core/bcftools/filter/main.nf @@ -0,0 +1,78 @@ +process BCFTOOLS_FILTER { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.${extension}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + + if ("${vcf}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + + """ + bcftools filter \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + ${args} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + if ("${vcf}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/filter/meta.yml b/modules/nf-core/bcftools/filter/meta.yml new file mode 100644 index 00000000..4ab447ff --- /dev/null +++ b/modules/nf-core/bcftools/filter/meta.yml @@ -0,0 +1,92 @@ +name: bcftools_filter +description: Filters VCF files +keywords: + - variant calling + - filtering + - VCF +tools: + - filter: + description: | + Apply fixed-threshold filters to VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + ontologies: [] + - tbi: + type: file + description: VCF index file + pattern: "*.tbi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${extension}": + type: file + description: VCF filtered output file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/filter/tests/main.nf.test b/modules/nf-core/bcftools/filter/tests/main.nf.test new file mode 100644 index 00000000..42cb8ab9 --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/main.nf.test @@ -0,0 +1,276 @@ +nextflow_process { + + name "Test Process BCFTOOLS_FILTER" + script "../main.nf" + process "BCFTOOLS_FILTER" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/filter" + + test("sarscov2 - vcf") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf") } + ) + } + } + + test("sarscov2 - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - vcf - bcf output") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [id:"bcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("bcf output") } + ) + } + } + + test("sarscov2 - vcf.gz, tbi - region filter") { + + config "./region_filter.config" + + when { + process { + """ + input[0] = [ + [id:"bcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("region filter") } + ) + } + } + + test("sarscov2 - vcf - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf - stub") } + ) + } + } + + test("sarscov2 - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } +} diff --git a/modules/nf-core/bcftools/filter/tests/main.nf.test.snap b/modules/nf-core/bcftools/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..798dade7 --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/main.nf.test.snap @@ -0,0 +1,514 @@ +{ + "region filter": { + "content": [ + { + "0": [ + [ + { + "id": "bcf_test" + }, + "bcf_test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "bcf_test" + }, + "bcf_test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:38.075538496" + }, + "sarscov2 - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:09.787136694" + }, + "vcf": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:00.400038419" + }, + "bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "bcf_test" + }, + "bcf_test.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "bcf_test" + }, + "bcf_test.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:30.237925195" + }, + "sarscov2 - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:07.949745874" + }, + "sarscov2 - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:15.417109301" + }, + "vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:46.265722559" + }, + "sarscov2 - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:54.301083152" + }, + "sarscov2 - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:02.260111336" + }, + "sarscov2 - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "vcf_test" + }, + "vcf_test_vcf.vcf.gz.tbi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_FILTER", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:02:23.055607519" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/filter/tests/nextflow.config b/modules/nf-core/bcftools/filter/tests/nextflow.config new file mode 100644 index 00000000..4e960c8d --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = {"--no-version${meta.id == 'bcf_test' ? ' --output-type b' : ' --output-type z'}"} +} diff --git a/modules/nf-core/bcftools/filter/tests/region_filter.config b/modules/nf-core/bcftools/filter/tests/region_filter.config new file mode 100644 index 00000000..b18fb4bf --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/region_filter.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z -r MT192765.1 --no-version" +} diff --git a/modules/nf-core/bcftools/filter/tests/vcf_gz_index.config b/modules/nf-core/bcftools/filter/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/filter/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/filter/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/filter/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/filter/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/index/environment.yml b/modules/nf-core/bcftools/index/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/index/main.nf b/modules/nf-core/bcftools/index/main.nf new file mode 100644 index 00000000..8635a1a2 --- /dev/null +++ b/modules/nf-core/bcftools/index/main.nf @@ -0,0 +1,40 @@ +process BCFTOOLS_INDEX { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + + """ + bcftools \\ + index \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '' + def extension = args.contains("--tbi") || args.contains("-t") + ? "tbi" + : "csi" + """ + touch ${vcf}.${extension} + """ +} diff --git a/modules/nf-core/bcftools/index/meta.yml b/modules/nf-core/bcftools/index/meta.yml new file mode 100644 index 00000000..4e4bdc8d --- /dev/null +++ b/modules/nf-core/bcftools/index/meta.yml @@ -0,0 +1,82 @@ +name: bcftools_index +description: Index VCF tools +keywords: + - vcf + - index + - bcftools + - csi + - tbi +tools: + - bcftools: + description: BCFtools is a set of utilities that manipulate variant calls in the + Variant Call Format (VCF) and its binary counterpart BCF. All commands work + transparently with both VCFs and BCFs, both uncompressed and BGZF-compressed. Most + commands accept VCF, bgzipped VCF and BCF with filetype detected automatically + even when streaming from a pipe. Indexed VCF and BCF will work in all situations. + Un-indexed VCF and BCF and streams will work in most, but not all situations. + homepage: https://samtools.github.io/bcftools/ + documentation: https://samtools.github.io/bcftools/howtos/index.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/gigascience/giab008" + licence: ["MIT", "GPL-3.0-or-later"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file (optionally GZIPPED) + pattern: "*.{vcf,vcf.gz}" + ontologies: [] +output: + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index file + pattern: "*.csi" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index file for larger files (activated with + -t parameter) + pattern: "*.tbi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@jfy133" +maintainers: + - "@jfy133" diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test b/modules/nf-core/bcftools/index/tests/main.nf.test new file mode 100644 index 00000000..b38c6adb --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process BCFTOOLS_INDEX" + script "../main.nf" + process "BCFTOOLS_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/index" + + test("sarscov2 - vcf - csi") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") }).match() + } + ) + } + } + + test("sarscov2 - vcf - tbi") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") }).match() + } + ) + } + } + + test("sarscov2 - vcf - csi - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - vcf - tbi - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/index/tests/main.nf.test.snap b/modules/nf-core/bcftools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..2074e974 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/main.nf.test.snap @@ -0,0 +1,148 @@ +{ + "sarscov2 - vcf - csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:32.869223843" + }, + "sarscov2 - vcf - tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:24.708477718" + }, + "sarscov2 - vcf - tbi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:40.444304193" + }, + "sarscov2 - vcf - csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_INDEX", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:17.363152216" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/index/tests/nextflow.config b/modules/nf-core/bcftools/index/tests/nextflow.config new file mode 100644 index 00000000..db83f7e5 --- /dev/null +++ b/modules/nf-core/bcftools/index/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--tbi' +} diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..443c8bbb --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,71 @@ +process BCFTOOLS_NORM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + def index = '' + if (extension in ['vcf.gz', 'bcf', 'bcf.gz']) { + if (['--write-index=tbi', '-W=tbi'].any { arg -> args.contains(arg) } && extension == 'vcf.gz') { + index = 'tbi' + } + else if (['--write-index=tbi', '-W=tbi', '--write-index=csi', '-W=csi', '--write-index', '-W'].any { arg -> args.contains(arg) }) { + index = 'csi' + } + } + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = index ? "touch ${prefix}.${extension}.${index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..9feecac0 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,107 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 00000000..05851753 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,545 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + +} diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 00000000..ee2dadf7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,876 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:29.987030961" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:06.488086505" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:34.863776359" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:54.718705045" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:43.007377633" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:07:54.877084219" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:22.220435939" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:58.483532889" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:03.22576704" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:27.281315407" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:14.249715835" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:46.665932019" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:38.144449162" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:10.602984345" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:51.053195842" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 00000000..b79af868 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 00000000..f36f397c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 00000000..510803b4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 00000000..10bf93e3 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 00000000..b31dd2de --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..726360f0 --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,46 @@ +process BCFTOOLS_QUERY { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..6bcb5e57 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,89 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be queried. + pattern: "*.{vcf.gz, vcf}" + ontologies: [] + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..63ac5af8 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..5168ef3f --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:54.523612853" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:17:00.64798632" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:47.953130141" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..8547ec10 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 00000000..e0dfad2d --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,72 @@ +process BCFTOOLS_SORT { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def max_memory = task.memory ? "--max-mem ${task.memory.toUnit('MB') * 0.9}M" : "" + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + --temp-dir . \\ + ${max_memory} \\ + ${args} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def index = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index}" : "" + + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml new file mode 100644 index 00000000..1c3f2a54 --- /dev/null +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -0,0 +1,85 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test new file mode 100644 index 00000000..bda7bace --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test @@ -0,0 +1,214 @@ +nextflow_process { + + name "Test Process BCFTOOLS_SORT" + script "../main.nf" + process "BCFTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/sort" + + test("sarscov2 - vcf") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf") } + ) + } + } + + test("sarscov2 - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - vcf - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } +} diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test.snap b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..9b9e4dc7 --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap @@ -0,0 +1,408 @@ +{ + "sarscov2 - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:24.286732272" + }, + "vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:18:38.783455397" + }, + "sarscov2 - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:18:45.081447395" + }, + "sarscov2 - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:18:51.194304667" + }, + "sarscov2 - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:11.413154111" + }, + "sarscov2 - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:17.770087535" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:04.409593559" + }, + "sarscov2 - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_SORT", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:18:57.536633502" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/sort/tests/vcf_gz_index.config b/modules/nf-core/bcftools/sort/tests/vcf_gz_index.config new file mode 100644 index 00000000..aacd1346 --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index" +} diff --git a/modules/nf-core/bcftools/sort/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/sort/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..640eb0ba --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi" +} diff --git a/modules/nf-core/bcftools/sort/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/sort/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..589a50c6 --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi" +} diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..ac77dd5a --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,50 @@ +process BCFTOOLS_STATS { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + ${args} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${reference_fasta} \\ + ${exons_file} \\ + ${vcf} > ${prefix}.bcftools_stats.txt + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 00000000..6416fb76 --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,129 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + ontologies: [] + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' + ontologies: [] +output: + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*stats.txt": + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..e8a55989 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,175 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("regions_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("targets_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("exon_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match("ref_versions") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..870d364b --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,218 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.22+htslib-1.22.1) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T14:58:10.684378" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.22+htslib-1.22.1) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T14:58:05.803224" + }, + "versions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:36.943096649" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.22+htslib-1.22.1) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T14:58:01.121472" + }, + "regions_versions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:43.504543106" + }, + "targets_versions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:49.943149098" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:09.654761076" + }, + "exon_versions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:19:56.837252836" + }, + "ref_versions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_STATS", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:03.323950267" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.22+htslib-1.22.1) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T14:57:51.945138" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.22+htslib-1.22.1) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T14:57:56.50867" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/environment.yml b/modules/nf-core/bcftools/view/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/view/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/view/main.nf b/modules/nf-core/bcftools/view/main.nf new file mode 100644 index 00000000..b3fd7b73 --- /dev/null +++ b/modules/nf-core/bcftools/view/main.nf @@ -0,0 +1,76 @@ +process BCFTOOLS_VIEW { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(index) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + """ + bcftools view \\ + --output ${prefix}.${extension} \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf" + def stub_index = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") + ? "csi" + : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && stub_index.matches("csi|tbi") ? "touch ${prefix}.${extension}.${stub_index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/view/meta.yml b/modules/nf-core/bcftools/view/meta.yml new file mode 100644 index 00000000..46761894 --- /dev/null +++ b/modules/nf-core/bcftools/view/meta.yml @@ -0,0 +1,112 @@ +name: bcftools_view +description: View, subset and filter VCF or BCF files by position and filtering expression. + Convert between VCF and BCF +keywords: + - variant calling + - view + - bcftools + - VCF +tools: + - view: + description: | + View, subset and filter VCF or BCF files by position and filtering expression. Convert between VCF and BCF + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be inspected. + e.g. 'file.vcf' + ontologies: [] + - index: + type: file + description: | + The tab index for the VCF file to be inspected. + e.g. 'file.tbi' + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + e.g. 'file.vcf' + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + e.g. 'file.vcf' + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: VCF normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" +maintainers: + - "@abhi18av" diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test b/modules/nf-core/bcftools/view/tests/main.nf.test new file mode 100644 index 00000000..3ed2b90c --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test @@ -0,0 +1,288 @@ +nextflow_process { + + name "Test Process BCFTOOLS_VIEW" + script "../main.nf" + process "BCFTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/view" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'out', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } +} diff --git a/modules/nf-core/bcftools/view/tests/main.nf.test.snap b/modules/nf-core/bcftools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..92972d19 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/main.nf.test.snap @@ -0,0 +1,393 @@ +{ + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:21:02.710815567" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:36.071823323" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:56.054811979" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:21:09.229049434" + }, + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,1bcbd0eff25d316ba915d06463aab17b" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:42.68820706" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.vcf", + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:49.538986405" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:22.451426124" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out.vcf:md5,8e722884ffb75155212a3fc053918766" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:16.040611233" + }, + "sarscov2 - [vcf, tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + [ + [ + { + "id": "out", + "single_end": false + }, + "out_vcf.vcf.gz.csi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_VIEW", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:20:29.512063514" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/view/tests/nextflow.config b/modules/nf-core/bcftools/view/tests/nextflow.config new file mode 100644 index 00000000..932e3ba6 --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--no-version --output-type v' +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/view/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bedtools/subtract/environment.yml b/modules/nf-core/bedtools/subtract/environment.yml new file mode 100644 index 00000000..45c307b0 --- /dev/null +++ b/modules/nf-core/bedtools/subtract/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/subtract/main.nf b/modules/nf-core/bedtools/subtract/main.nf new file mode 100644 index 00000000..1afc3363 --- /dev/null +++ b/modules/nf-core/bedtools/subtract/main.nf @@ -0,0 +1,40 @@ +process BEDTOOLS_SUBTRACT { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' + : 'biocontainers/bedtools:2.31.1--hf5e1c6e_0'}" + + input: + tuple val(meta), path(intervals1), path(intervals2) + + output: + tuple val(meta), path("*.bed"), emit: bed + tuple val("${task.process}"), val('bedtools'), eval("bedtools --version | sed -e 's/bedtools v//g'"), topic: versions, emit: versions_bedtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("${intervals1}" == "${prefix}.bed" || "${intervals2}" == "${prefix}.bed") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + bedtools \\ + subtract \\ + -a ${intervals1} \\ + -b ${intervals2} \\ + ${args} \\ + > ${prefix}.bed + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + """ +} diff --git a/modules/nf-core/bedtools/subtract/meta.yml b/modules/nf-core/bedtools/subtract/meta.yml new file mode 100644 index 00000000..4efd8df8 --- /dev/null +++ b/modules/nf-core/bedtools/subtract/meta.yml @@ -0,0 +1,68 @@ +name: bedtools_subtract +description: Finds overlaps between two sets of regions (A and B), removes the overlaps + from A and reports the remaining portion of A. +keywords: + - bed + - gff + - vcf + - subtract +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/subtract.html + licence: ["MIT"] + identifier: biotools:bedtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals1: + type: file + description: BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + ontologies: [] + - intervals2: + type: file + description: BED/GFF/VCF + pattern: "*.{bed|gff|vcf}" + ontologies: [] +output: + bed: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bed": + type: file + description: File containing the difference between the two sets of features + patters: "*.bed" + ontologies: [] + versions_bedtools: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - bedtools: + type: string + description: The name of the tool + - "bedtools --version | sed -e 's/bedtools v//g'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sidorov-si" +maintainers: + - "@sidorov-si" diff --git a/modules/nf-core/bedtools/subtract/tests/main.nf.test b/modules/nf-core/bedtools/subtract/tests/main.nf.test new file mode 100644 index 00000000..cc4729c8 --- /dev/null +++ b/modules/nf-core/bedtools/subtract/tests/main.nf.test @@ -0,0 +1,36 @@ + +nextflow_process { + + name "Test Process BEDTOOLS_SUBTRACT" + script "../main.nf" + process "BEDTOOLS_SUBTRACT" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/subtract" + + test("test-bedtools-subtract") { + + when { + process { + """ + input[0] = [ + [ id:'test_subtract' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/baits.bed', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/bedtools/subtract/tests/main.nf.test.snap b/modules/nf-core/bedtools/subtract/tests/main.nf.test.snap new file mode 100644 index 00000000..4a8df748 --- /dev/null +++ b/modules/nf-core/bedtools/subtract/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "test-bedtools-subtract": { + "content": [ + { + "0": [ + [ + { + "id": "test_subtract" + }, + "test_subtract.bed:md5,63513c4dc69e8b481ce3b4b2a9f24259" + ] + ], + "1": [ + [ + "BEDTOOLS_SUBTRACT", + "bedtools", + "2.31.1" + ] + ], + "bed": [ + [ + { + "id": "test_subtract" + }, + "test_subtract.bed:md5,63513c4dc69e8b481ce3b4b2a9f24259" + ] + ], + "versions_bedtools": [ + [ + "BEDTOOLS_SUBTRACT", + "bedtools", + "2.31.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-21T11:35:32.771087338" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 00000000..066ff52e --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/bowtie2 + - bioconda::bowtie2=2.5.4 + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.21 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.21 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 00000000..631d0bf7 --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,116 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b4/b41b403e81883126c3227fc45840015538e8e2212f13abc9ae84e4b98891d51c/data' : + 'community.wave.seqera.io/library/bowtie2_htslib_samtools_pigz:edeb13799090a2a6' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2>| >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 00000000..0c12b28a --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,138 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1186/gb-2009-10-3-r25 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Bowtie2 genome fasta file + pattern: "*.fasta" + ontologies: [] + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + sam: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.sam" + ontologies: [] + bam: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.bam" + ontologies: [] + cram: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.cram" + ontologies: [] + csi: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.csi": + type: file + description: Output SAM/BAM index for large inputs + pattern: "*.csi" + ontologies: [] + crai: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.crai": + type: file + description: Output CRAM index + pattern: "*.crai" + ontologies: [] + log: + - - meta: + type: map + description: Groovy Map containing sample information + - "*.log": + type: file + description: Alignment log + pattern: "*.log" + ontologies: [] + fastq: + - - meta: + type: map + description: Groovy Map containing sample information + - "*fastq.gz": + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000..03f1d5e5 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..b2f0c405 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..0de5950f --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..4ffd62e9 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,2768d626cdb54c9a9b5ed7aecbc3ca11" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-03-24T12:45:59.038637" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 00000000..3e83ecb4 --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,53 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: [] +output: + index: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - bowtie2: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 00000000..ee94c19c --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "../main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 00000000..ea5711e4 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,49 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,d136fb9c16f0a9fb2ae804b2a5fbc09c" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,d136fb9c16f0a9fb2ae804b2a5fbc09c" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.02.1" + }, + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 00000000..81aa61da --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/modules/nf-core/bwamem2/index/environment.yml b/modules/nf-core/bwamem2/index/environment.yml new file mode 100644 index 00000000..f3637444 --- /dev/null +++ b/modules/nf-core/bwamem2/index/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.3 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.22.1 diff --git a/modules/nf-core/bwamem2/index/main.nf b/modules/nf-core/bwamem2/index/main.nf new file mode 100644 index 00000000..62af1e39 --- /dev/null +++ b/modules/nf-core/bwamem2/index/main.nf @@ -0,0 +1,55 @@ +process BWAMEM2_INDEX { + tag "$fasta" + // NOTE Requires 28N GB memory where N is the size of the reference sequence, floor of 280M + // source: https://github.com/bwa-mem2/bwa-mem2/issues/9 + memory { (280.MB * Math.ceil(fasta.size() / 10000000)) * task.attempt } + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("bwamem2"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${fasta}" + def args = task.ext.args ?: '' + """ + mkdir bwamem2 + bwa-mem2 \\ + index \\ + $args \\ + -p bwamem2/${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${fasta}" + + """ + mkdir bwamem2 + touch bwamem2/${prefix}.0123 + touch bwamem2/${prefix}.ann + touch bwamem2/${prefix}.pac + touch bwamem2/${prefix}.amb + touch bwamem2/${prefix}.bwt.2bit.64 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/index/meta.yml b/modules/nf-core/bwamem2/index/meta.yml new file mode 100644 index 00000000..b2aa45fb --- /dev/null +++ b/modules/nf-core/bwamem2/index/meta.yml @@ -0,0 +1,52 @@ +name: bwamem2_index +description: Create BWA-mem2 index for reference genome +keywords: + - index + - fasta + - genome + - reference +tools: + - bwamem2: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: https://github.com/bwa-mem2/bwa-mem2#usage + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bwamem2: + type: file + description: BWA genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" +maintainers: + - "@maxulysse" diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test b/modules/nf-core/bwamem2/index/tests/main.nf.test new file mode 100644 index 00000000..adf44785 --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process BWAMEM2_INDEX" + tag "modules_nfcore" + tag "modules" + tag "bwamem2" + tag "bwamem2/index" + script "../main.nf" + process "BWAMEM2_INDEX" + + test("fasta") { + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.index, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } + + test("fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id: 'test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.index, + process.out.versions, + path(process.out.versions[0]).yaml + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bwamem2/index/tests/main.nf.test.snap b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap new file mode 100644 index 00000000..9ad8b20c --- /dev/null +++ b/modules/nf-core/bwamem2/index/tests/main.nf.test.snap @@ -0,0 +1,64 @@ +{ + "fasta - stub": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.amb:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.ann:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.bwt.2bit.64:md5,d41d8cd98f00b204e9800998ecf8427e", + "genome.fasta.pac:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ], + { + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-04T08:55:53.219699135" + }, + "fasta": { + "content": [ + [ + [ + { + "id": "test" + }, + [ + "genome.fasta.0123:md5,b02870de80106104abcb03cd9463e7d8", + "genome.fasta.amb:md5,3a68b8b2287e07dd3f5f95f4344ba76e", + "genome.fasta.ann:md5,c32e11f6c859f166c7525a9c1d583567", + "genome.fasta.bwt.2bit.64:md5,d097a1b82dee375d41a1ea69895a9216", + "genome.fasta.pac:md5,983e3d2cd6f36e2546e6d25a0da78d66" + ] + ] + ], + [ + "versions.yml:md5,9ffd13d12e7108ed15c58566bc4717d6" + ], + { + "BWAMEM2_INDEX": { + "bwamem2": "2.2.1" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-04T08:55:45.007921901" + } +} \ No newline at end of file diff --git a/modules/nf-core/bwamem2/mem/environment.yml b/modules/nf-core/bwamem2/mem/environment.yml new file mode 100644 index 00000000..f3637444 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/environment.yml @@ -0,0 +1,13 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + # renovate: datasource=conda depName=bioconda/bwa-mem2 + - bwa-mem2=2.3 + # renovate: datasource=conda depName=bioconda/htslib + - htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - samtools=1.22.1 diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf new file mode 100644 index 00000000..27910cf6 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -0,0 +1,83 @@ +process BWAMEM2_MEM { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/e0/e05ce34b46ad42810eb29f74e4e304c0cb592b2ca15572929ed8bbaee58faf01/data' : + 'community.wave.seqera.io/library/bwa-mem2_htslib_samtools:db98f81f55b64113' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + bwa-mem2 \\ + mem \\ + $args \\ + -t $task.cpus \\ + \$INDEX \\ + $reads \\ + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml new file mode 100644 index 00000000..6c7d1728 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -0,0 +1,133 @@ +name: bwamem2_mem +description: Performs fastq alignment to a fasta reference using BWA +keywords: + - mem + - bwa + - alignment + - map + - fastq + - bam + - sam +tools: + - bwa: + description: | + BWA-mem2 is a software package for mapping DNA sequences against + a large reference genome, such as the human genome. + homepage: https://github.com/bwa-mem2/bwa-mem2 + documentation: http://www.htslib.org/doc/samtools.html + arxiv: arXiv:1303.3997 + licence: ["MIT"] + identifier: "biotools:bwa-mem2" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1930" # FASTQ + - - meta2: + type: map + description: | + Groovy Map containing reference/index information + e.g. [ id:'test' ] + - index: + type: file + description: BWA genome index files + pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + ontologies: + - edam: "http://edamontology.org/data_3210" # Genome index + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" + ontologies: + - edam: "http://edamontology.org/data_2044" # Sequence + - edam: "http://edamontology.org/format_1929" # FASTA + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + sam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sam": + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" + ontologies: + - edam: "http://edamontology.org/format_2573" # SAM + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + ontologies: + - edam: "http://edamontology.org/format_2572" # BAM + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.cram": + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + ontologies: + - edam: "http://edamontology.org/format_3462" # CRAM + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Index file for BAM file + pattern: "*.{csi}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxulysse" + - "@matthdsm" +maintainers: + - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test new file mode 100644 index 00000000..9e0ab14a --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -0,0 +1,179 @@ +nextflow_process { + + name "Test Process BWAMEM2_MEM" + script "../main.nf" + process "BWAMEM2_MEM" + + tag "modules" + tag "modules_nfcore" + tag "bwamem2" + tag "bwamem2/mem" + tag "bwamem2/index" + + setup { + run("BWAMEM2_INDEX") { + script "../../index/main.nf" + process { + """ + input[0] = Channel.of([ + [:], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)] + ]) + """ + } + } + } + + test("sarscov2 - fastq, index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeaderMD5(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ]) + input[1] = BWAMEM2_INDEX.out.index + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap new file mode 100644 index 00000000..b7d40a68 --- /dev/null +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -0,0 +1,129 @@ +{ + "sarscov2 - [fastq1, fastq2], index, fasta, false": { + "content": [ + "e414c2d48e2e44c2c52c20ecd88e8bd8", + "57aeef88ed701a8ebc8e2f0a381b2a6", + [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T11:44:52.73673293" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "5": [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sam": [ + + ], + "versions": [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T11:45:14.834888709" + }, + "sarscov2 - [fastq1, fastq2], index, fasta, true": { + "content": [ + "716ed1ef39deaad346ca7cf86e08f959", + "af8628d9df18b2d3d4f6fd47ef2bb872", + [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T11:45:04.750057645" + }, + "sarscov2 - fastq, index, fasta, false": { + "content": [ + "283a83f604f3f5338acedfee349dccf4", + "798439cbd7fd81cbcc5078022dc5479d", + [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T11:44:28.57550711" + }, + "sarscov2 - fastq, index, fasta, true": { + "content": [ + "ed99048bb552cac58e39923b550b6d5b", + "94fcf617f5b994584c4e8d4044e16b4f", + [ + "versions.yml:md5,3574188ab1f33fd99cff9f5562dfb885" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.7" + }, + "timestamp": "2025-09-23T11:44:40.437183765" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..9b926b1f --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..0a195a3b --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,68 @@ +process CAT_FASTQ { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + tuple val("${task.process}"), val("cat"), eval("cat --version 2>&1 | head -n 1 | sed 's/^.*coreutils) //; s/ .*\$//'"), emit: versions_cat, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect { item -> item.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + """ + } else { + error("Could not find any FASTQ files to concatenate in the process input") + } + } + else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex { v, ix -> (ix & 1 ? read2 : read1) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + """ + } else { + error("Could not find any FASTQ file pairs to concatenate in the process input") + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect { item -> item.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + echo '' | gzip > ${prefix}.merged.fastq.gz + """ + } else { + error("Could not find any FASTQ files to concatenate in the process input") + } + } + else { + if (readList.size >= 2) { + """ + echo '' | gzip > ${prefix}_1.merged.fastq.gz + echo '' | gzip > ${prefix}_2.merged.fastq.gz + """ + } else { + error("Could not find any FASTQ file pairs to concatenate in the process input") + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..8fc03bdc --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,65 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. + ontologies: [] +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + ontologies: [] + versions_cat: + - - ${task.process}: + type: string + description: The process the versions were collected from + - cat: + type: string + description: The tool name + - cat --version 2>&1 | head -n 1 | sed 's/^.*coreutils) //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - cat: + type: string + description: The tool name + - cat --version 2>&1 | head -n 1 | sed 's/^.*coreutils) //; s/ .*\$//': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..013c1d0f --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,334 @@ +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_no_files") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [] + ]) + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot(process.stdout.find { it.contains("-- Check script") }.split(" -- Check script")[0]).match() } + ) + } + } + + test("test_cat_fastq_paired_end_no_files") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [] + ]) + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot(process.stdout.find { it.contains("-- Check script") }.split(" -- Check script")[0]).match() } + ) + } + } + + test("test_cat_fastq_single_end_no_files - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [] + ]) + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot(process.stdout.find { it.contains("-- Check script") }.split(" -- Check script")[0]).match() } + ) + } + } + + test("test_cat_fastq_paired_end_no_files - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [] + ]) + """ + } + } + + then { + assertAll( + { assert process.failed }, + { assert snapshot(process.stdout.find { it.contains("-- Check script") }.split(" -- Check script")[0]).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..e2a94249 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,496 @@ +{ + "test_cat_fastq_paired_end_no_files - stub": { + "content": [ + " Could not find any FASTQ file pairs to concatenate in the process input" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-25T17:14:51.248685461" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:42.84401526" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:36.820489323" + }, + "test_cat_fastq_paired_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:32:06.262192935" + }, + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:18.859169785" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:30.942615287" + }, + "test_cat_fastq_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:48.827990633" + }, + "test_cat_fastq_paired_end_no_files": { + "content": [ + " Could not find any FASTQ file pairs to concatenate in the process input" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-25T17:14:40.806088747" + }, + "test_cat_fastq_single_end_no_files - stub": { + "content": [ + " Could not find any FASTQ files to concatenate in the process input" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-25T17:14:45.852365218" + }, + "test_cat_fastq_single_end_same_name - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:32:00.586584379" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:25.159365603" + }, + "test_cat_fastq_single_end_no_files": { + "content": [ + " Could not find any FASTQ files to concatenate in the process input" + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.4" + }, + "timestamp": "2025-02-25T17:14:35.695192409" + }, + "test_cat_fastq_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:31:54.850702874" + }, + "test_cat_fastq_single_end_single_file - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_cat": [ + [ + "CAT_FASTQ", + "cat", + "9.5" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-10T14:32:11.746498148" + } +} \ No newline at end of file diff --git a/modules/nf-core/falco/environment.yml b/modules/nf-core/falco/environment.yml new file mode 100644 index 00000000..59c973a9 --- /dev/null +++ b/modules/nf-core/falco/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::falco=1.2.1 diff --git a/modules/nf-core/falco/main.nf b/modules/nf-core/falco/main.nf new file mode 100644 index 00000000..a4b343b2 --- /dev/null +++ b/modules/nf-core/falco/main.nf @@ -0,0 +1,57 @@ +process FALCO { + tag "$meta.id" + label 'process_single' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/falco:1.2.1--h867801b_3': + 'biocontainers/falco:1.2.1--h867801b_3' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.txt") , emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ( reads.toList().size() == 1 ) { + """ + falco $args --threads $task.cpus ${reads} -D ${prefix}_fastqc_data.txt -S ${prefix}_summary.txt -R ${prefix}_report.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + falco:\$( falco --version | sed -e "s/falco//g" ) + END_VERSIONS + """ + } else { + """ + falco $args --threads $task.cpus ${reads} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + falco:\$( falco --version | sed -e "s/falco//g" ) + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_data.txt + touch ${prefix}_fastqc_data.html + touch ${prefix}_summary.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + falco: \$( falco --version | sed -e "s/falco v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/falco/meta.yml b/modules/nf-core/falco/meta.yml new file mode 100644 index 00000000..1450f2da --- /dev/null +++ b/modules/nf-core/falco/meta.yml @@ -0,0 +1,61 @@ +name: falco +description: Run falco on sequenced reads +keywords: + - quality control + - qc + - adapters + - fastq +tools: + - fastqc: + description: "falco is a drop-in C++ implementation of FastQC to assess the quality + of sequence reads." + homepage: "https://falco.readthedocs.io/" + documentation: "https://falco.readthedocs.io/" + licence: ["GPL v3"] + identifier: biotools:falco-rna +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + ontologies: [] +output: + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC like report + pattern: "*_{fastqc_report.html}" + ontologies: [] + txt: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: falco report data + pattern: "*_{data.txt}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@lucacozzuto" +maintainers: + - "@lucacozzuto" diff --git a/modules/nf-core/falco/tests/main.nf.test b/modules/nf-core/falco/tests/main.nf.test new file mode 100644 index 00000000..816c72ba --- /dev/null +++ b/modules/nf-core/falco/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process FALCO" + script "../main.nf" + process "FALCO" + + tag "modules" + tag "modules_nfcore" + tag "falco" + + test("sarscov2 - fastq - single end") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:true ], + [ + file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + ), + ], + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + file(process.out.html.get(0).get(1)).list(), + ).match() + }, + ) + } + + } + + test("sarscov2 - fastq - paired end") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + ), + file( + params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], + checkIfExists: true + ), + ], + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + process.out.html.get(0).get(1).collect{ it.split("/")[-1] }.sort(), + ).match() + }, + ) + } + + } + + test("sarscov2 - fastq - interleaved") { + + when { + process { + """ + input[0] = [ + [ id: 'test', single_end:false ], + [ + file( + params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], + checkIfExists: true + ), + ], + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + file(process.out.html.get(0).get(1)).list(), + ).match() + }, + ) + } + + } + +} diff --git a/modules/nf-core/falco/tests/main.nf.test.snap b/modules/nf-core/falco/tests/main.nf.test.snap new file mode 100644 index 00000000..34ac64e3 --- /dev/null +++ b/modules/nf-core/falco/tests/main.nf.test.snap @@ -0,0 +1,61 @@ +{ + "sarscov2 - fastq - single end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + [ + "test_fastqc_data.txt:md5,36d989bb9e2d5a632e19452f4e6c2a4e", + "test_summary.txt:md5,a925aec214a83d2f6252847166f2ef3a" + ] + ] + ], + null + ], + "timestamp": "2024-02-02T16:28:17.756764" + }, + "sarscov2 - fastq - paired end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastq.gz_fastqc_data.txt:md5,36d989bb9e2d5a632e19452f4e6c2a4e", + "test_1.fastq.gz_summary.txt:md5,a925aec214a83d2f6252847166f2ef3a", + "test_2.fastq.gz_fastqc_data.txt:md5,ad5c45dfc8f79754dd5d8029456b715b", + "test_2.fastq.gz_summary.txt:md5,d0cb642adefb5635a25e808f1f38780a" + ] + ] + ], + [ + "test_1.fastq.gz_fastqc_report.html", + "test_2.fastq.gz_fastqc_report.html" + ] + ], + "timestamp": "2024-02-02T16:22:11.757473" + }, + "sarscov2 - fastq - interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_fastqc_data.txt:md5,b5e593f140fe578bdd25ceb84e98fd37", + "test_summary.txt:md5,ca52f458b1223d89db69e2d5e73cf867" + ] + ] + ], + null + ], + "timestamp": "2024-02-02T16:28:36.035899" + } +} \ No newline at end of file diff --git a/modules/nf-core/falco/tests/tags.yml b/modules/nf-core/falco/tests/tags.yml new file mode 100644 index 00000000..bf5610e2 --- /dev/null +++ b/modules/nf-core/falco/tests/tags.yml @@ -0,0 +1,2 @@ +falco: + - "modules/nf-core/falco/**" diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..0c36eed2 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/fastp + - bioconda::fastp=1.0.1 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..7538fc3a --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,124 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/527b18847a97451091dba07a886b24f17f742a861f9f6c9a6bfb79d4f1f3bf9d/data' : + 'community.wave.seqera.io/library/fastp:1.0.1--c8b87fe62dcc103c' }" + + input: + tuple val(meta), path(reads), path(adapter_fasta) + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_R1.fail.fastq.gz --unpaired2 ${prefix}_R2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_R1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_R2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_R1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_R1.fastq.gz + [ ! -f ${prefix}_R2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_R2.fastq.gz + fastp \\ + --in1 ${prefix}_R1.fastq.gz \\ + --in2 ${prefix}_R2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2>| >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_R1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_R2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_R1.fail.fastq.gz ; echo '' | gzip > ${prefix}_R2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..324025fe --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,127 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] + identifier: biotools:fastp +input: + - - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + ontologies: [] + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + ontologies: [] + - discard_trimmed_pass: + type: boolean + description: | + Specify true to not write any reads that pass trimming thresholds. + This can be used to use fastp for the output report only. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds + ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastp.fastq.gz": + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: Results in JSON format + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + html: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: Results in HTML format + pattern: "*.html" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: fastq log file + pattern: "*.log" + ontologies: [] + reads_fail: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fail.fastq.gz": + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + reads_merged: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.merged.fastq.gz": + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..5125705c --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,661 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + adapter_fasta = [] // empty list for no adapter file! + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() + } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = false + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = true + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) + discard_trimmed_pass = false + save_trimmed_fail = false + save_merged = true + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + adapter_fasta = [] + discard_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ], + adapter_fasta + ]) + input[1] = discard_trimmed_pass + input[2] = save_trimmed_fail + input[3] = save_merged + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..2276fc08 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1250 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T09:55:42.073182" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_R2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:32:22.887952042" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:32:53.752975682" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-19T16:23:36.149003" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_R2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_R1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_R2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:32:41.270456637" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T09:55:19.47199" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T09:55:09.617001" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:33:44.204950729" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:33:38.518882433" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_R2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:32:47.366974895" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:33:16.494574544" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-19T16:23:08.469846" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T09:55:23.871395" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_R1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_R2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2025-12-22T14:33:32.863505882" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-19T16:23:16.479494" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-19T16:23:20.299076" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-19T16:23:40.113724" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,c4974822658d02533e660fae343f281b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-11T09:55:46.696419" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 00000000..53b61b0c --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 691d4c76..f9f54ee9 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 033f4154..4b3041dc 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -11,9 +11,9 @@ process FASTQC { tuple val(meta), path(reads) output: - tuple val(meta), path("*.html"), emit: html - tuple val(meta), path("*.zip") , emit: zip - path "versions.yml" , emit: versions + tuple val(meta) , path("*.html") , emit: html + tuple val(meta) , path("*.zip") , emit: zip + tuple val("${task.process}"), val('fastqc'), eval('fastqc --version | sed "/FastQC v/!d; s/.*v//"'), emit: versions_fastqc, topic: versions when: task.ext.when == null || task.ext.when @@ -29,7 +29,7 @@ process FASTQC { // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label - def memory_in_mb = task.memory ? task.memory.toUnit('MB').toFloat() / task.cpus : null + def memory_in_mb = task.memory ? task.memory.toUnit('MB') / task.cpus : null // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) @@ -43,11 +43,6 @@ process FASTQC { --threads ${task.cpus} \\ --memory ${fastqc_memory} \\ ${renamed_files} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS """ stub: @@ -55,10 +50,5 @@ process FASTQC { """ touch ${prefix}.html touch ${prefix}.zip - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - fastqc: \$( fastqc --version | sed '/FastQC v/!d; s/.*v//' ) - END_VERSIONS """ } diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index 2b2e62b8..49164c88 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -29,9 +29,10 @@ input: description: | List of input FastQ files of size 1 and 2 for single-end and paired-end data, respectively. + ontologies: [] output: - - html: - - meta: + html: + - - meta: type: map description: | Groovy Map containing sample information @@ -40,8 +41,9 @@ output: type: file description: FastQC report pattern: "*_{fastqc.html}" - - zip: - - meta: + ontologies: [] + zip: + - - meta: type: map description: | Groovy Map containing sample information @@ -50,11 +52,29 @@ output: type: file description: FastQC report archive pattern: "*_{fastqc.zip}" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + ontologies: [] + versions_fastqc: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fastqc: + type: string + description: The tool name + - fastqc --version | sed "/FastQC v/!d; s/.*v//": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - fastqc: + type: string + description: The tool name + - fastqc --version | sed "/FastQC v/!d; s/.*v//": + type: eval + description: The expression to obtain the version of the tool authors: - "@drpatelh" - "@grst" @@ -65,3 +85,27 @@ maintainers: - "@grst" - "@ewels" - "@FelixKrueger" +containers: + conda: + linux_amd64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-af7a5314d5015c29_1/condalock + linux_arm64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-df99cb252670875a_2/condalock + docker: + linux_amd64: + build_id: bd-af7a5314d5015c29_1 + name: community.wave.seqera.io/library/fastqc:0.12.1--af7a5314d5015c29 + scanId: sc-a618548acbee5a8a_30 + linux_arm64: + build_id: bd-df99cb252670875a_2 + name: community.wave.seqera.io/library/fastqc:0.12.1--df99cb252670875a + scanId: sc-b5913ed5d42b22d2_18 + singularity: + linux_amd64: + build_id: bd-104d26ddd9519960_1 + name: oras://community.wave.seqera.io/library/fastqc:0.12.1--104d26ddd9519960 + https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:e0c976cb2eca5fee72618a581537a4f8ea42fcae24c9b201e2e0f764fd28648a + linux_arm64: + build_id: bd-d56b505a93aef38a_1 + name: oras://community.wave.seqera.io/library/fastqc:0.12.1--d56b505a93aef38a + https: https://community.wave.seqera.io/v2/library/fastqc/blobs/sha256:fd39534bf298698cbe3ee4d4a6f1e73330ec4bca44c38dd9a4d06cb5ea838017 diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index e9d79a07..66c44da9 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -30,7 +30,7 @@ nextflow_process { { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } @@ -58,7 +58,7 @@ nextflow_process { { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } @@ -82,7 +82,7 @@ nextflow_process { { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } @@ -106,7 +106,7 @@ nextflow_process { { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } @@ -142,7 +142,7 @@ nextflow_process { { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } @@ -166,7 +166,7 @@ nextflow_process { { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match() } + { assert snapshot(sanitizeOutput(process.out).findAll { key, val -> key != 'html' && key != 'zip' }).match() } ) } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index d5db3092..c8ee120f 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,15 +1,21 @@ { "sarscov2 custom_prefix": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:16.374038" + "timestamp": "2025-10-28T16:39:14.518503" }, "sarscov2 single-end [fastq] - stub": { "content": [ @@ -33,7 +39,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -44,8 +54,12 @@ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -59,10 +73,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:24.993809" + "timestamp": "2025-10-28T16:39:19.309008" }, "sarscov2 custom_prefix - stub": { "content": [ @@ -86,7 +100,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -97,8 +115,12 @@ "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -112,58 +134,82 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:03:10.93942" + "timestamp": "2025-10-28T16:39:44.94888" }, "sarscov2 interleaved [fastq]": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:01:42.355718" + "timestamp": "2025-10-28T16:38:45.168496" }, "sarscov2 paired-end [bam]": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:01:53.276274" + "timestamp": "2025-10-28T16:38:53.268919" }, "sarscov2 multiple [fastq]": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:05.527626" + "timestamp": "2025-10-28T16:39:05.050305" }, "sarscov2 paired-end [fastq]": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:01:31.188871" + "timestamp": "2025-10-28T16:38:37.2373" }, "sarscov2 paired-end [fastq] - stub": { "content": [ @@ -187,7 +233,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -198,8 +248,12 @@ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -213,10 +267,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:34.273566" + "timestamp": "2025-10-28T16:39:24.450398" }, "sarscov2 multiple [fastq] - stub": { "content": [ @@ -240,7 +294,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -251,8 +309,12 @@ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -266,22 +328,28 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:03:02.304411" + "timestamp": "2025-10-28T16:39:39.758762" }, "sarscov2 single-end [fastq]": { "content": [ - [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" - ] + { + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] + ] + } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:01:19.095607" + "timestamp": "2025-10-28T16:38:29.555068" }, "sarscov2 interleaved [fastq] - stub": { "content": [ @@ -305,7 +373,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -316,8 +388,12 @@ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -331,10 +407,10 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:44.640184" + "timestamp": "2025-10-28T16:39:29.193136" }, "sarscov2 paired-end [bam] - stub": { "content": [ @@ -358,7 +434,11 @@ ] ], "2": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "html": [ [ @@ -369,8 +449,12 @@ "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + "versions_fastqc": [ + [ + "FASTQC", + "fastqc", + "0.12.1" + ] ], "zip": [ [ @@ -384,9 +468,9 @@ } ], "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" + "nf-test": "0.9.2", + "nextflow": "25.10.0" }, - "timestamp": "2024-07-22T11:02:53.550742" + "timestamp": "2025-10-28T16:39:34.144919" } } \ No newline at end of file diff --git a/modules/nf-core/fastqscan/environment.yml b/modules/nf-core/fastqscan/environment.yml new file mode 100644 index 00000000..b372c857 --- /dev/null +++ b/modules/nf-core/fastqscan/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::fastq-scan=0.4.4 diff --git a/modules/nf-core/fastqscan/main.nf b/modules/nf-core/fastqscan/main.nf new file mode 100644 index 00000000..913ff5ae --- /dev/null +++ b/modules/nf-core/fastqscan/main.nf @@ -0,0 +1,33 @@ +process FASTQSCAN { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastq-scan:0.4.4--h7d875b9_0' : + 'biocontainers/fastq-scan:0.4.4--h7d875b9_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.json"), emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + zcat $reads | \\ + fastq-scan \\ + $args > ${prefix}.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastqscan: \$( echo \$(fastq-scan -v 2>&1) | sed 's/^.*fastq-scan //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastqscan/meta.yml b/modules/nf-core/fastqscan/meta.yml new file mode 100644 index 00000000..c6bf3474 --- /dev/null +++ b/modules/nf-core/fastqscan/meta.yml @@ -0,0 +1,49 @@ +name: fastqscan +description: FASTQ summary statistics in JSON format +keywords: + - fastq + - summary + - statistics +tools: + - fastqscan: + description: FASTQ summary statistics in JSON format + homepage: https://github.com/rpetit3/fastq-scan + documentation: https://github.com/rpetit3/fastq-scan + tool_dev_url: https://github.com/rpetit3/fastq-scan + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: FASTQ file + pattern: "*.{fastq.gz,fq.gz}" + ontologies: [] +output: + json: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.json": + type: file + description: JSON formatted file of summary statistics + pattern: "*.json" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@rpetit3" +maintainers: + - "@rpetit3" diff --git a/modules/nf-core/fastqscan/tests/main.nf.test b/modules/nf-core/fastqscan/tests/main.nf.test new file mode 100644 index 00000000..aa08cdb6 --- /dev/null +++ b/modules/nf-core/fastqscan/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FASTQSCAN" + script "../main.nf" + process "FASTQSCAN" + tag "fastqscan" + tag "modules" + tag "modules_nfcore" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/fastqscan/tests/main.nf.test.snap b/modules/nf-core/fastqscan/tests/main.nf.test.snap new file mode 100644 index 00000000..78883158 --- /dev/null +++ b/modules/nf-core/fastqscan/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,2748bcc8d6dbe09ad2233ae5a22edcbe" + ] + ], + "1": [ + "versions.yml:md5,900a4e8a5bd719abdee37e7b61c9339f" + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.json:md5,2748bcc8d6dbe09ad2233ae5a22edcbe" + ] + ], + "versions": [ + "versions.yml:md5,900a4e8a5bd719abdee37e7b61c9339f" + ] + } + ], + "timestamp": "2023-10-17T14:28:58.206046021" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastqscan/tests/tags.yml b/modules/nf-core/fastqscan/tests/tags.yml new file mode 100644 index 00000000..4e111203 --- /dev/null +++ b/modules/nf-core/fastqscan/tests/tags.yml @@ -0,0 +1,2 @@ +fastqscan: + - modules/nf-core/fastqscan/** diff --git a/modules/nf-core/filtlong/environment.yml b/modules/nf-core/filtlong/environment.yml new file mode 100644 index 00000000..1efde8eb --- /dev/null +++ b/modules/nf-core/filtlong/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::filtlong=0.2.1 diff --git a/modules/nf-core/filtlong/main.nf b/modules/nf-core/filtlong/main.nf new file mode 100644 index 00000000..e2efd878 --- /dev/null +++ b/modules/nf-core/filtlong/main.nf @@ -0,0 +1,39 @@ +process FILTLONG { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/filtlong:0.2.1--h9a82719_0' : + 'biocontainers/filtlong:0.2.1--h9a82719_0' }" + + input: + tuple val(meta), path(shortreads), path(longreads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def short_reads = !shortreads ? "" : meta.single_end ? "-1 $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" + if ("$longreads" == "${prefix}.fastq.gz") error "Longread FASTQ input and output names are the same, set prefix in module configuration to disambiguate!" + """ + filtlong \\ + $short_reads \\ + $args \\ + $longreads \\ + 2>| >(tee ${prefix}.log >&2) \\ + | gzip -n > ${prefix}.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + filtlong: \$( filtlong --version | sed -e "s/Filtlong v//g" ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/filtlong/meta.yml b/modules/nf-core/filtlong/meta.yml new file mode 100644 index 00000000..08ad8acd --- /dev/null +++ b/modules/nf-core/filtlong/meta.yml @@ -0,0 +1,74 @@ +name: filtlong +description: Filtlong filters long reads based on quality measures or short read data. +keywords: + - nanopore + - quality control + - QC + - filtering + - long reads + - short reads +tools: + - filtlong: + description: Filtlong is a tool for filtering long reads. It can take a set of + long reads and produce a smaller, better subset. It uses both read length (longer + is better) and read identity (higher is better) when choosing which reads pass + the filter. + homepage: https://anaconda.org/bioconda/filtlong + tool_dev_url: https://github.com/rrwick/Filtlong + licence: ["GPL v3"] + identifier: biotools:filtlong +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - shortreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - longreads: + type: file + description: fastq file + pattern: "*.{fq,fastq,fq.gz,fastq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Filtered (compressed) fastq file + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Standard error logging file containing summary statistics + pattern: "*.log" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@d4straub" + - "@sofstam" +maintainers: + - "@d4straub" + - "@sofstam" diff --git a/modules/nf-core/filtlong/tests/main.nf.test b/modules/nf-core/filtlong/tests/main.nf.test new file mode 100644 index 00000000..d54ce39c --- /dev/null +++ b/modules/nf-core/filtlong/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_process { + + name "Test Process FILTLONG" + script "../main.nf" + process "FILTLONG" + config "./nextflow.config" + tag "filtlong" + tag "modules" + tag "modules_nfcore" + + test("sarscov2 nanopore [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } + + + test("sarscov2 nanopore [fastq] + Illumina single-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } + + + test("sarscov2 nanopore [fastq] + Illumina paired-end [fastq]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.log.get(0).get(1)).readLines().contains("Scoring long reads")}, + { assert snapshot( + process.out.reads, + process.out.versions + ).match() + } + ) + } + + } +} diff --git a/modules/nf-core/filtlong/tests/main.nf.test.snap b/modules/nf-core/filtlong/tests/main.nf.test.snap new file mode 100644 index 00000000..1a25c3fc --- /dev/null +++ b/modules/nf-core/filtlong/tests/main.nf.test.snap @@ -0,0 +1,65 @@ +{ + "sarscov2 nanopore [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:29.197603" + }, + "sarscov2 nanopore [fastq] + Illumina paired-end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:39.68464" + }, + "sarscov2 nanopore [fastq] + Illumina single-end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_lr.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + [ + "versions.yml:md5,af5988f30157282acdb0ac50ebb4c8cc" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-06T10:51:34.404022" + } +} \ No newline at end of file diff --git a/modules/nf-core/filtlong/tests/nextflow.config b/modules/nf-core/filtlong/tests/nextflow.config new file mode 100644 index 00000000..d366b4c3 --- /dev/null +++ b/modules/nf-core/filtlong/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = "--min_length 10" + ext.prefix = "test_lr" +} diff --git a/modules/nf-core/freebayes/environment.yml b/modules/nf-core/freebayes/environment.yml new file mode 100644 index 00000000..79cb3ead --- /dev/null +++ b/modules/nf-core/freebayes/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::freebayes=1.3.10 diff --git a/modules/nf-core/freebayes/main.nf b/modules/nf-core/freebayes/main.nf new file mode 100644 index 00000000..9d949add --- /dev/null +++ b/modules/nf-core/freebayes/main.nf @@ -0,0 +1,61 @@ +process FREEBAYES { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/freebayes:1.3.10--hbefcdb2_0' + : 'biocontainers/freebayes:1.3.10--hbefcdb2_0'}" + + input: + tuple val(meta), path(input_1), path(input_1_index), path(input_2), path(input_2_index), path(target_bed) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fasta_fai) + tuple val(meta4), path(samples) + tuple val(meta5), path(populations) + tuple val(meta6), path(cnv) + + output: + tuple val(meta), path("*.vcf.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def input = input_2 ? "${input_1} ${input_2}" : "${input_1}" + def targets_file = target_bed ? "--target ${target_bed}" : "" + def samples_file = samples ? "--samples ${samples}" : "" + def populations_file = populations ? "--populations ${populations}" : "" + def cnv_file = cnv ? "--cnv-map ${cnv}" : "" + """ + freebayes \\ + -f ${fasta} \\ + ${targets_file} \\ + ${samples_file} \\ + ${populations_file} \\ + ${cnv_file} \\ + ${args} \\ + ${input} > ${prefix}.vcf + + bgzip ${prefix}.vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo | gzip > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freebayes: \$(echo \$(freebayes --version 2>&1) | sed 's/version:\s*v//g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freebayes/meta.yml b/modules/nf-core/freebayes/meta.yml new file mode 100644 index 00000000..5593bcaa --- /dev/null +++ b/modules/nf-core/freebayes/meta.yml @@ -0,0 +1,135 @@ +name: freebayes +description: A haplotype-based variant detector +keywords: + - variant caller + - SNP + - genotyping + - somatic variant calling + - germline variant calling + - bacterial variant calling + - bayesian +tools: + - freebayes: + description: Bayesian haplotype-based polymorphism discovery and genotyping + homepage: https://github.com/freebayes/freebayes + documentation: https://github.com/freebayes/freebayes + tool_dev_url: https://github.com/freebayes/freebayes + doi: "10.48550/arXiv.1207.3907" + licence: ["MIT"] + identifier: biotools:freebayes +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_1: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_1_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + ontologies: [] + - input_2: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - input_2_index: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai}" + ontologies: [] + - target_bed: + type: file + description: Optional - Limit analysis to targets listed in this BED-format + FILE. + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test_reference' ] + - fasta: + type: file + description: reference fasta file + pattern: ".{fa,fa.gz,fasta,fasta.gz}" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information. + e.g. [ id:'test_reference' ] + - fasta_fai: + type: file + description: reference fasta file index + pattern: "*.{fa,fasta}.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing meta information for the samples file. + e.g. [ id:'test_samples' ] + - samples: + type: file + description: Optional - Limit analysis to samples listed (one per line) in the + FILE. + pattern: "*.txt" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing meta information for the populations file. + e.g. [ id:'test_populations' ] + - populations: + type: file + description: Optional - Each line of FILE should list a sample and a population + which it is part of. + pattern: "*.txt" + ontologies: [] + - - meta6: + type: map + description: | + Groovy Map containing meta information for the cnv file. + e.g. [ id:'test_cnv' ] + - cnv: + type: file + description: | + A copy number map BED file, which has either a sample-level ploidy: + sample_name copy_number + or a region-specific format: + seq_name start end sample_name copy_number + pattern: "*.bed" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.vcf.gz": + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + versions: + - versions.yml: + type: file + description: File containing software version + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@maxibor" + - "@FriederikeHanssen" + - "@maxulysse" +maintainers: + - "@maxibor" + - "@FriederikeHanssen" + - "@maxulysse" diff --git a/modules/nf-core/freebayes/tests/main.nf.test b/modules/nf-core/freebayes/tests/main.nf.test new file mode 100644 index 00000000..18334a24 --- /dev/null +++ b/modules/nf-core/freebayes/tests/main.nf.test @@ -0,0 +1,227 @@ +nextflow_process { + + name "Test Process FREEBAYES" + script "../main.nf" + process "FREEBAYES" + + tag "modules" + tag "modules_nfcore" + tag "freebayes" + + test("sarscov2 - [ bam, bai ] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ [ id: 'test_fasta' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'test_fai' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - [ bam, bai, bed ] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test.bed', checkIfExists: true), + ] + input[1] = [ [ id: 'fasta' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'fai' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + path(process.out.vcf[0][1]).vcf.summary, // the created vcf file has no variants + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - [ cram, crai ] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + [], + [], + [], + ] + input[1] = [ [ id: 'fasta' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - [ bam, bai, bam, bai ] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + [], + ] + input[1] = [ [ id: 'fasta' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - [ cram, crai, cram, crai, bed ] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test2.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.bed', checkIfExists: true), + ] + input[1] = [ [ id: 'fasta' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'fai' ], file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.vcf[0][1]).name, + path(process.out.vcf[0][1]).vcf.variantsMD5, + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - [ bam, bai ] - fasta - fai - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + [], + [] + ] + input[1] = [ [ id: 'test_fasta' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[2] = [ [ id: 'test_fai' ], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out, + path(process.out.versions[0]).yaml + ).match() + } + ) + } + + } +} diff --git a/modules/nf-core/freebayes/tests/main.nf.test.snap b/modules/nf-core/freebayes/tests/main.nf.test.snap new file mode 100644 index 00000000..a4b0b86f --- /dev/null +++ b/modules/nf-core/freebayes/tests/main.nf.test.snap @@ -0,0 +1,112 @@ +{ + "sarscov2 - [ cram, crai, cram, crai, bed ] - fasta - fai": { + "content": [ + "test.vcf.gz", + "5d2758e9ebd427d59c8d1eebdd1c0f44", + [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T13:50:34.700159" + }, + "sarscov2 - [ bam, bai ] - fasta - fai": { + "content": [ + "test.vcf.gz", + "f68e3a94e542c7cc84cdedcfc3f6190c", + [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T13:50:15.426043" + }, + "sarscov2 - [ bam, bai ] - fasta - fai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + }, + { + "FREEBAYES": { + "freebayes": "1.3.10" + } + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T13:50:39.015789" + }, + "sarscov2 - [ cram, crai ] - fasta - fai": { + "content": [ + "test.vcf.gz", + "f578a2043bd4303e16487b64ea6ff524", + [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T14:06:43.891953" + }, + "sarscov2 - [ bam, bai, bed ] - fasta - fai": { + "content": [ + "test.vcf.gz", + "VcfFile [chromosomes=[], sampleCount=1, variantCount=0, phased=true, phasedAutodetect=true]", + [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T14:06:39.11959" + }, + "sarscov2 - [ bam, bai, bam, bai ] - fasta - fai": { + "content": [ + "test.vcf.gz", + "5d2758e9ebd427d59c8d1eebdd1c0f44", + [ + "versions.yml:md5,0753be9305ce1577b49b9914cea3b496" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.3" + }, + "timestamp": "2025-08-11T13:50:29.470044" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..9b926b1f --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,12 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - conda-forge::coreutils=9.5 + - conda-forge::grep=3.11 + - conda-forge::gzip=1.13 + - conda-forge::lbzip2=2.5 + - conda-forge::sed=4.8 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..a8533e74 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,45 @@ +process GUNZIP { + tag "${archive}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/52/52ccce28d2ab928ab862e25aae26314d69c8e38bd41ca9431c67ef05221348aa/data' + : 'community.wave.seqera.io/library/coreutils_grep_gzip_lbzip2_pruned:838ba80435a629f8'}" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("${gunzip}"), emit: gunzip + tuple val("${task.process}"), val('gunzip'), eval('gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//"'), topic: versions, emit: versions_gunzip + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def extension = (archive.toString() - '.gz').tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".${extension}" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".${extension}" + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + ${args} \\ + ${archive} \\ + > ${gunzip} + """ + + stub: + def extension = (archive.toString() - '.gz').tokenize('.')[-1] + def name = archive.toString() - '.gz' - ".${extension}" + def prefix = task.ext.prefix ?: name + gunzip = prefix + ".${extension}" + """ + touch ${gunzip} + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..bba6b3ba --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,68 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" + ontologies: [] +output: + gunzip: + - - meta: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + ontologies: [] + - ${gunzip}: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + ontologies: [] + versions_gunzip: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gunzip: + type: string + description: The tool name + - gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - gunzip: + type: string + description: The tool name + - gunzip --version 2>&1 | head -1 | sed "s/^.*(gzip) //; s/ Copyright.*//": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" + - "@gallvp" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..776211ad --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,121 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix") { + + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - stub") { + + options '-stub' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should run without failures - prefix - stub") { + + options '-stub' + config './nextflow.config' + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id: 'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..111ba1bc --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,166 @@ +{ + "Should run without failures - prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-19T17:21:56.633550769" + }, + "Should run without failures - stub": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-19T17:21:51.435621199" + }, + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-19T17:21:40.613975821" + }, + "Should run without failures - prefix": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ], + "gunzip": [ + [ + { + "id": "test" + }, + "test.xyz.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions_gunzip": [ + [ + "GUNZIP", + "gunzip", + "1.13" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-19T17:21:46.086880414" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/nextflow.config b/modules/nf-core/gunzip/tests/nextflow.config new file mode 100644 index 00000000..dec77642 --- /dev/null +++ b/modules/nf-core/gunzip/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: GUNZIP { + ext.prefix = { "${meta.id}.xyz" } + } +} diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 00000000..17886061 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::minimap2=2.29 + - bioconda::samtools=1.21 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf new file mode 100644 index 00000000..2503a06d --- /dev/null +++ b/modules/nf-core/minimap2/align/main.nf @@ -0,0 +1,78 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/66/66dc96eff11ab80dfd5c044e9b3425f52d818847b9c074794cf0c02bfa781661/data' : + 'community.wave.seqera.io/library/minimap2_samtools:33bb43c18d22e29c' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) + val bam_format + val bam_index_extension + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + $samtools_reset_fastq \\ + minimap2 \\ + $args \\ + -t $task.cpus \\ + $target \\ + $query \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + touch $output_file + ${bam_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml new file mode 100644 index 00000000..b501526e --- /dev/null +++ b/modules/nf-core/minimap2/align/meta.yml @@ -0,0 +1,106 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + ontologies: [] + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + paf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.paf": + type: file + description: Alignment in PAF format + pattern: "*.paf" + ontologies: [] + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam": + type: file + description: Alignment in BAM format + pattern: "*.bam" + ontologies: [] + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bam.${bam_index_extension}": + type: file + description: BAM alignment index + pattern: "*.bam.*" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 00000000..65061404 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,441 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + +} diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..89f20336 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:23.829797899" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:54.665655242" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:38.492212433" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:43.879647142" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:33.262333471" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:07.571731983" + }, + "sarscov2 - fastq, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:47:56.497792473" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:01.888544427" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:18.376062313" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,231f31609e2b72661af6a11b7aee3cfe" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:49.268693724" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.29-r1283\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.21\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,660fcf8ff66d4dce2045ffa0e325eed8" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:48:12.942360555" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/index/environment.yml b/modules/nf-core/minimap2/index/environment.yml new file mode 100644 index 00000000..2f3ba0eb --- /dev/null +++ b/modules/nf-core/minimap2/index/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::minimap2=2.29 diff --git a/modules/nf-core/minimap2/index/main.nf b/modules/nf-core/minimap2/index/main.nf new file mode 100644 index 00000000..56cb0efd --- /dev/null +++ b/modules/nf-core/minimap2/index/main.nf @@ -0,0 +1,44 @@ +process MINIMAP2_INDEX { + label 'process_low' + + // Note: the versions here need to match the versions used in minimap2/align + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/minimap2:2.29--h577a1d6_0' : + 'biocontainers/minimap2:2.29--h577a1d6_0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.mmi"), emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + minimap2 \\ + -t $task.cpus \\ + -d ${fasta.baseName}.mmi \\ + $args \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ + + stub: + """ + touch ${fasta.baseName}.mmi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/index/meta.yml b/modules/nf-core/minimap2/index/meta.yml new file mode 100644 index 00000000..0d6a2d86 --- /dev/null +++ b/modules/nf-core/minimap2/index/meta.yml @@ -0,0 +1,50 @@ +name: minimap2_index +description: Provides fasta index required by minimap2 alignment. +keywords: + - index + - fasta + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: | + Reference database in FASTA format. + ontologies: [] +output: + index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.mmi": + type: file + description: Minimap2 fasta index. + pattern: "*.mmi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@yuukiiwa" + - "@drpatelh" +maintainers: + - "@yuukiiwa" + - "@drpatelh" diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test b/modules/nf-core/minimap2/index/tests/main.nf.test new file mode 100644 index 00000000..eec743cf --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test @@ -0,0 +1,32 @@ +nextflow_process { + + name "Test Process MINIMAP2_INDEX" + script "../main.nf" + process "MINIMAP2_INDEX" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/index" + + test("minimap2 index") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/minimap2/index/tests/main.nf.test.snap b/modules/nf-core/minimap2/index/tests/main.nf.test.snap new file mode 100644 index 00000000..dbb32049 --- /dev/null +++ b/modules/nf-core/minimap2/index/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "minimap2 index": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "1": [ + "versions.yml:md5,2c3e19022653b28d77646b2e9cc9bdb3" + ], + "index": [ + [ + { + "id": "test" + }, + "genome.mmi:md5,72e450f12dc691e763c697463bdb1571" + ] + ], + "versions": [ + "versions.yml:md5,2c3e19022653b28d77646b2e9cc9bdb3" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-22T14:50:37.213379742" + } +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index a27122ce..009874d4 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,5 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda dependencies: - - bioconda::multiqc=1.27 + - bioconda::multiqc=1.33 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 58d9313c..3b0e975b 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,11 +3,11 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.27--pyhdfd78af_0' : - 'biocontainers/multiqc:1.27--pyhdfd78af_0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data' : + 'community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b' }" input: - path multiqc_files, stageAs: "?/*" + path multiqc_files, stageAs: "?/*" path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) @@ -15,10 +15,11 @@ process MULTIQC { path(sample_names) output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions + path "*.html" , emit: report + path "*_data" , emit: data + path "*_plots" , optional:true, emit: plots + tuple val("${task.process}"), val('multiqc'), eval('multiqc --version | sed "s/.* //g"'), emit: versions + // MultiQC should not push its versions to the `versions` topic. Its input depends on the versions topic to be resolved thus outputting to the topic will let the pipeline hang forever when: task.ext.when == null || task.ext.when @@ -26,38 +27,29 @@ process MULTIQC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def config = multiqc_config ? "--config ${multiqc_config}" : '' + def extra_config = extra_multiqc_config ? "--config ${extra_multiqc_config}" : '' def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' def replace = replace_names ? "--replace-names ${replace_names}" : '' def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ - $args \\ - $config \\ - $prefix \\ - $extra_config \\ - $logo \\ - $replace \\ - $samples \\ + ${args} \\ + ${config} \\ + ${prefix} \\ + ${extra_config} \\ + ${logo} \\ + ${replace} \\ + ${samples} \\ . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS """ stub: """ mkdir multiqc_data + touch multiqc_data/.stub mkdir multiqc_plots touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS """ } diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index b16c1879..861cd7f7 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -15,57 +15,74 @@ tools: licence: ["GPL-3.0-or-later"] identifier: biotools:multiqc input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: + - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + ontologies: [] + - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + ontologies: [] + - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" + ontologies: + - edam: http://edamontology.org/format_3475 # TSV +output: + report: + - "*.html": type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: + description: MultiQC report file + pattern: ".html" + ontologies: [] + data: + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" + plots: + - "*_plots": type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" + description: Plots created by MultiQC + pattern: "*_data" + ontologies: [] + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - multiqc: + type: string + description: The tool name + - multiqc --version | sed "s/.* //g": + type: eval + description: The expression to obtain the version of the tool authors: - "@abhi18av" - "@bunop" @@ -76,3 +93,27 @@ maintainers: - "@bunop" - "@drpatelh" - "@jfy133" +containers: + conda: + linux/amd64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-ee7739d47738383b_1/condalock + linux/arm64: + lock_file: https://wave.seqera.io/v1alpha1/builds/bd-58d7dee710ab3aa8_1/condalock + docker: + linux/amd64: + build_id: bd-ee7739d47738383b_1 + name: community.wave.seqera.io/library/multiqc:1.33--ee7739d47738383b + scanId: sc-6ddec592dcadd583_4 + linux/arm64: + build_id: bd-58d7dee710ab3aa8_1 + name: community.wave.seqera.io/library/multiqc:1.33--58d7dee710ab3aa8 + scanId: sc-a04c42273e34c55c_2 + singularity: + linux/amd64: + build_id: bd-e3576ddf588fa00d_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/34/34e733a9ae16a27e80fe00f863ea1479c96416017f24a907996126283e7ecd4d/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--e3576ddf588fa00d + linux/arm64: + build_id: bd-2537ca5f8445e3c2_1 + https: https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/78/78b89e91d89e9cc99ad5ade5be311f347838cb2acbfb4f13bc343b170be09ce4/data + name: oras://community.wave.seqera.io/library/multiqc:1.33--2537ca5f8445e3c2 diff --git a/modules/nf-core/multiqc/tests/custom_prefix.config b/modules/nf-core/multiqc/tests/custom_prefix.config new file mode 100644 index 00000000..b30b1358 --- /dev/null +++ b/modules/nf-core/multiqc/tests/custom_prefix.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = "custom_prefix" + } +} diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index 33316a7d..d1ae8b06 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -30,7 +30,33 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_single") } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}).match() } + ) + } + + } + + test("sarscov2 single-end [fastqc] - custom prefix") { + config "./custom_prefix.config" + + when { + process { + """ + input[0] = Channel.of(file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastqc/test_fastqc.zip', checkIfExists: true)) + input[1] = [] + input[2] = [] + input[3] = [] + input[4] = [] + input[5] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.report[0] ==~ ".*/custom_prefix.html" }, + { assert process.out.data[0] ==~ ".*/custom_prefix_data" } ) } @@ -56,7 +82,7 @@ nextflow_process { { assert process.success }, { assert process.out.report[0] ==~ ".*/multiqc_report.html" }, { assert process.out.data[0] ==~ ".*/multiqc_data" }, - { assert snapshot(process.out.versions).match("multiqc_versions_config") } + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions")}).match() } ) } } @@ -84,7 +110,7 @@ nextflow_process { { assert snapshot(process.out.report.collect { file(it).getName() } + process.out.data.collect { file(it).getName() } + process.out.plots.collect { file(it).getName() } + - process.out.versions ).match("multiqc_stub") } + process.out.findAll { key, val -> key.startsWith("versions")} ).match() } ) } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 7b7c1322..d72d35b7 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -1,41 +1,61 @@ { - "multiqc_versions_single": { + "sarscov2 single-end [fastqc]": { "content": [ - [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" - ] + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-27T09:29:57.631982377" + "timestamp": "2025-12-09T10:10:43.020315838" }, - "multiqc_stub": { + "sarscov2 single-end [fastqc] - stub": { "content": [ [ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ] ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-27T09:30:34.743726958" + "timestamp": "2025-12-09T10:11:14.131950776" }, - "multiqc_versions_config": { + "sarscov2 single-end [fastqc] [config]": { "content": [ - [ - "versions.yml:md5,8f3b8c1cec5388cf2708be948c9fa42f" - ] + { + "versions": [ + [ + "MULTIQC", + "multiqc", + "1.33" + ] + ] + } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "24.10.4" + "nf-test": "0.9.3", + "nextflow": "25.10.2" }, - "timestamp": "2025-01-27T09:30:21.44383553" + "timestamp": "2025-12-09T10:11:07.15692209" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/tags.yml b/modules/nf-core/multiqc/tests/tags.yml deleted file mode 100644 index bea6c0d3..00000000 --- a/modules/nf-core/multiqc/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -multiqc: - - modules/nf-core/multiqc/** diff --git a/modules/nf-core/nanoq/environment.yml b/modules/nf-core/nanoq/environment.yml new file mode 100644 index 00000000..8df81732 --- /dev/null +++ b/modules/nf-core/nanoq/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::nanoq=0.10.0 diff --git a/modules/nf-core/nanoq/main.nf b/modules/nf-core/nanoq/main.nf new file mode 100644 index 00000000..e0614f6a --- /dev/null +++ b/modules/nf-core/nanoq/main.nf @@ -0,0 +1,49 @@ +process NANOQ { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/nanoq:0.10.0--h031d066_2' + : 'biocontainers/nanoq:0.10.0--h031d066_2'}" + + input: + tuple val(meta), path(ontreads) + val(output_format) //One of the following: fastq, fastq.gz, fastq.bz2, fastq.lzma, fasta, fasta.gz, fasta.bz2, fasta.lzma. + + output: + tuple val(meta), path("*.{stats,json}") , emit: stats + tuple val(meta), path("${prefix}.${output_format}"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + nanoq -i ${ontreads} \\ + ${args} \\ + -r ${prefix}.stats \\ + -o ${prefix}.${output_format} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}_filtered" + """ + echo "" | gzip > ${prefix}.${output_format} + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoq: \$(nanoq --version | sed -e 's/nanoq //g') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanoq/meta.yml b/modules/nf-core/nanoq/meta.yml new file mode 100644 index 00000000..bc85265f --- /dev/null +++ b/modules/nf-core/nanoq/meta.yml @@ -0,0 +1,71 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "nanoq" +description: Nanoq implements ultra-fast read filters and summary reports for high-throughput + nanopore reads. +keywords: + - nanoq + - Read filters + - Read trimming + - Read report +tools: + - "nanoq": + description: "Ultra-fast quality control and summary reports for nanopore reads" + homepage: "https://github.com/esteinig/nanoq" + documentation: "https://github.com/esteinig/nanoq" + tool_dev_url: "https://github.com/esteinig/nanoq" + doi: "10.21105/joss.02991" + licence: ["MIT"] + identifier: "" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ontreads: + type: file + description: Compressed or uncompressed nanopore reads in fasta or fastq formats. + pattern: "*.{fa,fna,faa,fasta,fq,fastq}{,.gz,.bz2,.xz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - output_format: + type: string + description: "Specifies the output format. One of these formats: fasta, fastq; + fasta.gz, fastq.gz; fasta.bz2, fastq.bz2; fasta.lzma, fastq.lzma." +output: + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - "*.{stats,json}": + type: file + description: Summary report of reads statistics. + pattern: "*.{stats,json}" + ontologies: + - edam: http://edamontology.org/format_3464 # JSON + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}.${output_format}: + type: file + description: Filtered reads. + pattern: "*.{fasta,fastq}{,.gz,.bz2,.lzma}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@LilyAnderssonLee" +maintainers: + - "@LilyAnderssonLee" diff --git a/modules/nf-core/nanoq/tests/main.nf.test b/modules/nf-core/nanoq/tests/main.nf.test new file mode 100644 index 00000000..ef63d12f --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test @@ -0,0 +1,122 @@ +nextflow_process { + + name "Test Process NANOQ" + script "../main.nf" + process "NANOQ" + + tag "modules" + tag "modules_nfcore" + tag "nanoq" + + test("sarscov2 - nanopore_uncompressed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + + input[1] = 'fastq' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_bz2") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.bz2' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + test("sarscov2 - nanopore_compressed_lzma") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.lzma' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - nanopore_compressed_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = 'fastq.gz' + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/nanoq/tests/main.nf.test.snap b/modules/nf-core/nanoq/tests/main.nf.test.snap new file mode 100644 index 00000000..b5dda2a7 --- /dev/null +++ b/modules/nf-core/nanoq/tests/main.nf.test.snap @@ -0,0 +1,267 @@ +{ + "sarscov2 - nanopore_compressed_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:32.117229" + }, + "sarscov2 - nanopore_compressed_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:42:06.039307" + }, + "sarscov2 - nanopore_compressed_bz2": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.bz2:md5,b53cf14fd4eb5b16c459c41f03cc8a4b" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:36.674647" + }, + "sarscov2 - nanopore_compressed_lzma": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq.lzma:md5,65dda701689f913734dc245b68c89e07" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:41.51344" + }, + "sarscov2 - nanopore_uncompressed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "2": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.fastq:md5,7567d853ada6ac142332619d0b541d76" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test_filtered.stats:md5,5ab32af3352dfeca8268e10edf6e4dbe" + ] + ], + "versions": [ + "versions.yml:md5,7a40efe417ff7dbb9e91e9c1629a04e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-11T11:39:26.868897" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoq/tests/tags.yml b/modules/nf-core/nanoq/tests/tags.yml new file mode 100644 index 00000000..37457df1 --- /dev/null +++ b/modules/nf-core/nanoq/tests/tags.yml @@ -0,0 +1,2 @@ +nanoq: + - "modules/nf-core/nanoq/**" diff --git a/modules/nf-core/porechop/abi/environment.yml b/modules/nf-core/porechop/abi/environment.yml new file mode 100644 index 00000000..b2b2cd23 --- /dev/null +++ b/modules/nf-core/porechop/abi/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::porechop_abi=0.5.0post1 diff --git a/modules/nf-core/porechop/abi/main.nf b/modules/nf-core/porechop/abi/main.nf new file mode 100644 index 00000000..6918da45 --- /dev/null +++ b/modules/nf-core/porechop/abi/main.nf @@ -0,0 +1,56 @@ +process PORECHOP_ABI { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://depot.galaxyproject.org/singularity/porechop_abi:0.5.0post1--py310h275bdba_0' + : 'biocontainers/porechop_abi:0.5.0post1--py310h275bdba_0'}" + + input: + tuple val(meta), path(reads) + path custom_adapters + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log"), emit: log + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + def adapters_list = custom_adapters ? "--custom_adapters ${custom_adapters}" : "" + if ("${reads}" == "${prefix}.fastq.gz") { + error("Input and output names are the same, use \"task.ext.prefix\" to disambiguate!") + } + """ + porechop_abi \\ + --input ${reads} \\ + ${adapters_list} \\ + --threads ${task.cpus} \\ + ${args} \\ + --output ${prefix}.fastq.gz \\ + | tee ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + def adapters_list = custom_adapters ? "--custom_adapters ${custom_adapters}" : "" + """ + echo "" | gzip > ${prefix}.fastq.gz + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/porechop/abi/meta.yml b/modules/nf-core/porechop/abi/meta.yml new file mode 100644 index 00000000..276000ee --- /dev/null +++ b/modules/nf-core/porechop/abi/meta.yml @@ -0,0 +1,71 @@ +name: "porechop_abi" +description: Extension of Porechop whose purpose is to process adapter sequences in + ONT reads. +keywords: + - porechop_abi + - adapter + - nanopore +tools: + - "porechop_abi": + description: Extension of Porechop whose purpose is to process adapter sequences + in ONT reads. + homepage: "https://github.com/bonsai-team/Porechop_ABI" + documentation: "https://github.com/bonsai-team/Porechop_ABI" + tool_dev_url: "https://github.com/bonsai-team/Porechop_ABI" + doi: "10.1101/2022.07.07.499093" + licence: ["MIT"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ + - custom_adapters: + type: file + description: Text file containing custom adapters + + ontologies: [] +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Adapter-trimmed fastq.gz file + pattern: "*.fastq.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing stdout information + pattern: "*.log" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@sofstam" + - "LilyAnderssonLee" +maintainers: + - "@sofstam" + - "LilyAnderssonLee" diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test b/modules/nf-core/porechop/abi/tests/main.nf.test new file mode 100644 index 00000000..23e34d7d --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process PORECHOP_ABI" + script "../main.nf" + process "PORECHOP_ABI" + tag "modules" + tag "modules_nfcore" + tag "porechop" + tag "porechop/abi" + + test("sarscov2-nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log.get(0).get(1)).readLines()[20..40], + process.out.versions).match() + } + ) + } + } + + test("sarscov2-nanopore - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test.snap b/modules/nf-core/porechop/abi/tests/main.nf.test.snap new file mode 100644 index 00000000..5fc4d060 --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "sarscov2-nanopore": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" + ] + ], + [ + " Best \u001b[0m", + " read Best \u001b[0m", + " start read end\u001b[0m", + " \u001b[4mSet %ID %ID \u001b[0m", + " \u001b[32mSQK-NSK007 100.0 73.1\u001b[0m", + " Rapid 40.4 0.0", + " RBK004_upstream 77.5 0.0", + " SQK-MAP006 75.8 72.7", + " SQK-MAP006 short 65.5 66.7", + " PCR adapters 1 73.9 69.6", + " PCR adapters 2 80.0 72.7", + " PCR adapters 3 70.8 69.6", + " 1D^2 part 1 71.4 70.0", + " 1D^2 part 2 84.8 75.8", + " cDNA SSP 63.0 61.7", + " \u001b[32mBarcode 1 (reverse) 100.0 100.0\u001b[0m", + " Barcode 2 (reverse) 70.8 69.2", + " Barcode 3 (reverse) 76.0 70.4", + " Barcode 4 (reverse) 74.1 71.4", + " Barcode 5 (reverse) 77.8 80.8", + " Barcode 6 (reverse) 73.1 70.8" + ], + [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-10T10:27:24.536642628" + }, + "sarscov2-nanopore - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ], + "log": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.5" + }, + "timestamp": "2025-04-10T10:27:30.502715737" + } +} \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/environment.yml b/modules/nf-core/porechop/porechop/environment.yml new file mode 100644 index 00000000..109cf8bd --- /dev/null +++ b/modules/nf-core/porechop/porechop/environment.yml @@ -0,0 +1,8 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::porechop=0.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/porechop/porechop/main.nf b/modules/nf-core/porechop/porechop/main.nf new file mode 100644 index 00000000..dd148cf5 --- /dev/null +++ b/modules/nf-core/porechop/porechop/main.nf @@ -0,0 +1,49 @@ +process PORECHOP_PORECHOP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/2b/2bce1f10c51906a66c4c4d3a7485394f67e304177192ad1cce6cf586a3a18bae/data' : + 'community.wave.seqera.io/library/porechop_pigz:d1655e5b5bad786c' }" + + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz"), emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + porechop \\ + -i $reads \\ + -t $task.cpus \\ + $args \\ + -o ${prefix}.fastq.gz \\ + > ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.fastq + gzip ${prefix}.fastq + touch ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop: \$( porechop --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/porechop/porechop/meta.yml b/modules/nf-core/porechop/porechop/meta.yml new file mode 100644 index 00000000..01f50a7e --- /dev/null +++ b/modules/nf-core/porechop/porechop/meta.yml @@ -0,0 +1,77 @@ +name: "porechop_porechop" +description: Adapter removal and demultiplexing of Oxford Nanopore reads +keywords: + - adapter + - nanopore + - demultiplexing +tools: + - porechop: + description: Adapter removal and demultiplexing of Oxford Nanopore reads + homepage: "https://github.com/rrwick/Porechop" + documentation: "https://github.com/rrwick/Porechop" + tool_dev_url: "https://github.com/rrwick/Porechop" + doi: "10.1099/mgen.0.000132" + licence: ["GPL v3"] + identifier: "" +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ +output: + reads: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fastq.gz": + type: file + description: Demultiplexed and/or adapter-trimmed fastq.gz file + pattern: "*.{fastq.gz}" + ontologies: [] + log: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing stdout information + pattern: "*.log" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" + - "@jfy133" +maintainers: + - "@ggabernet" + - "@jasmezz" + - "@d4straub" + - "@LaurenceKuhl" + - "@SusiJo" + - "@jonasscheid" + - "@jonoave" + - "@GokceOGUZ" + - "@jfy133" diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test b/modules/nf-core/porechop/porechop/tests/main.nf.test new file mode 100644 index 00000000..ed3f6986 --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process PORECHOP_PORECHOP" + script "../main.nf" + process "PORECHOP_PORECHOP" + config "./nextflow.config" + + tag "modules" + tag "modules_nfcore" + tag "porechop" + tag "porechop/porechop" + + test("sarscov2 - nanopore - fastq") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.reads).match("reads") }, + { assert snapshot(process.out.versions).match("versions") }, + // complete log is not stable. These first lines should be stable + { assert snapshot(path(process.out.log.get(0).get(1)).readLines()[0..7]).match("log")} + ) + } + + } + + + test("stub") { + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test', single_end:true ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + +} diff --git a/modules/nf-core/porechop/porechop/tests/main.nf.test.snap b/modules/nf-core/porechop/porechop/tests/main.nf.test.snap new file mode 100644 index 00000000..cf544d2d --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ] + ], + "timestamp": "2023-12-18T07:47:16.83444" + }, + "log": { + "content": [ + [ + "", + "\u001b[1m\u001b[4mLoading reads\u001b[0m", + "test.fastq.gz", + "100 reads loaded", + "", + "", + "\u001b[1m\u001b[4mLooking for known adapter sets\u001b[0m", + "" + ] + ], + "timestamp": "2023-12-18T07:47:16.853899" + }, + "reads": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" + ] + ] + ], + "timestamp": "2023-12-18T07:47:16.811393" + }, + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test_porechop.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,712c0753b56d0fb530092dfb5bdf2e5c" + ] + } + ], + "timestamp": "2023-12-18T07:47:37.814949" + } +} \ No newline at end of file diff --git a/modules/nf-core/porechop/porechop/tests/nextflow.config b/modules/nf-core/porechop/porechop/tests/nextflow.config new file mode 100644 index 00000000..a9ecf7b6 --- /dev/null +++ b/modules/nf-core/porechop/porechop/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + + withName: PORECHOP_PORECHOP { + ext.args = '' + ext.prefix = { "${meta.id}_porechop" } + } + +} diff --git a/modules/nf-core/samtools/faidx/environment.yml b/modules/nf-core/samtools/faidx/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/faidx/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/faidx/main.nf b/modules/nf-core/samtools/faidx/main.nf new file mode 100644 index 00000000..57a03497 --- /dev/null +++ b/modules/nf-core/samtools/faidx/main.nf @@ -0,0 +1,50 @@ +process SAMTOOLS_FAIDX { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(fasta) + tuple val(meta2), path(fai) + val get_sizes + + output: + tuple val(meta), path ("*.{fa,fasta}") , emit: fa, optional: true + tuple val(meta), path ("*.sizes") , emit: sizes, optional: true + tuple val(meta), path ("*.fai") , emit: fai, optional: true + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def get_sizes_command = get_sizes ? "cut -f 1,2 ${fasta}.fai > ${fasta}.sizes" : '' + """ + samtools \\ + faidx \\ + $fasta \\ + $args + + ${get_sizes_command} + """ + + stub: + def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll() + def fastacmd = match[0] ? "touch ${match[0][1]}" : '' + def get_sizes_command = get_sizes ? "touch ${fasta}.sizes" : '' + """ + ${fastacmd} + touch ${fasta}.fai + if [[ "${fasta.extension}" == "gz" ]]; then + touch ${fasta}.gzi + fi + + ${get_sizes_command} + """ +} diff --git a/modules/nf-core/samtools/faidx/meta.yml b/modules/nf-core/samtools/faidx/meta.yml new file mode 100644 index 00000000..163c3015 --- /dev/null +++ b/modules/nf-core/samtools/faidx/meta.yml @@ -0,0 +1,117 @@ +name: samtools_faidx +description: Index FASTA file, and optionally generate a file of chromosome sizes +keywords: + - index + - fasta + - faidx + - chromosome +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + - get_sizes: + type: boolean + description: use cut to get the sizes of the index (true) or not (false) + +output: + fa: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{fa,fasta}": + type: file + description: FASTA file + pattern: "*.{fa}" + ontologies: [] + sizes: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.sizes": + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + ontologies: [] + fai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.fai": + type: file + description: FASTA index file + pattern: "*.{fai}" + ontologies: [] + gzi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.gzi": + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@drpatelh" + - "@ewels" + - "@phue" +maintainers: + - "@maxulysse" + - "@phue" diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test b/modules/nf-core/samtools/faidx/tests/main.nf.test new file mode 100644 index 00000000..02ba5040 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test @@ -0,0 +1,245 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FAIDX" + script "../main.nf" + process "SAMTOOLS_FAIDX" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/faidx" + config "./nextflow.config" + + test("test_samtools_faidx") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = [[],[]] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)] + input[1] = [[],[]] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_fasta") { + + when { + params { + module_args = 'MT192765.1 -o extract.fa' + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_stub_fasta") { + + options "-stub" + when { + params { + module_args = '-o extract.fa' + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_stub_fai") { + + options "-stub" + when { + params { + module_args = '' + } + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] + input[1] = [[],[]] + input[2] = false + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip") { + + when { + params { + module_args = '' + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + + test("test_samtools_faidx_get_sizes_bgzip - stub") { + + options "-stub" + + when { + params { + module_args = '' + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + input[1] = [[],[]] + input[2] = true + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot(process.out).match()} + ) + } + } + +} diff --git a/modules/nf-core/samtools/faidx/tests/main.nf.test.snap b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap new file mode 100644 index 00000000..565d20e7 --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/main.nf.test.snap @@ -0,0 +1,615 @@ +{ + "test_samtools_faidx": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:02:40.159309157" + }, + "test_samtools_faidx_get_sizes_bgzip - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:03:39.550619177" + }, + "test_samtools_faidx_get_sizes": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:03:16.844965756" + }, + "test_samtools_faidx_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:02:47.301476131" + }, + "test_samtools_faidx_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T09:44:40.559583279" + }, + "test_samtools_faidx_get_sizes - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:03:31.989929281" + }, + "test_samtools_faidx_stub_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + [ + { + "id": "test", + "single_end": false + }, + "extract.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "fai": [ + + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T09:44:48.295693103" + }, + "test_samtools_faidx_stub_fai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test", + "single_end": false + }, + "genome.fasta.fai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "gzi": [ + + ], + "sizes": [ + + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:03:09.784289542" + }, + "test_samtools_faidx_get_sizes_bgzip": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "4": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ], + "fa": [ + + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_FAIDX", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2025-12-23T14:03:24.814967939" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/faidx/tests/nextflow.config b/modules/nf-core/samtools/faidx/tests/nextflow.config new file mode 100644 index 00000000..202c036e --- /dev/null +++ b/modules/nf-core/samtools/faidx/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_FAIDX { + ext.args = params.module_args + } + +} diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..f148f56b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,57 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat <<-END_FLAGSTAT > ${prefix}.flagstat + 1000000 + 0 in total (QC-passed reads + QC-failed reads) + 0 + 0 secondary + 0 + 0 supplementary + 0 + 0 duplicates + 900000 + 0 mapped (90.00% : N/A) + 1000000 + 0 paired in sequencing + 500000 + 0 read1 + 500000 + 0 read2 + 800000 + 0 properly paired (80.00% : N/A) + 850000 + 0 with mate mapped to a different chr + 50000 + 0 with mate mapped to a different chr (mapQ>=5) + END_FLAGSTAT + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..ebbc15f2 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,60 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG + type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: [] +output: + flagstat: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.flagstat": + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..3b648a37 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("BAM - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..0a0a9b15 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "BAM - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "1": [ + "versions.yml:md5,bdc0bfb2b0542580e7cd65e80d8570bc" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "versions": [ + "versions.yml:md5,bdc0bfb2b0542580e7cd65e80d8570bc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-15T15:02:00.813612" + }, + "BAM": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "1": [ + "versions.yml:md5,bdc0bfb2b0542580e7cd65e80d8570bc" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + "versions": [ + "versions.yml:md5,bdc0bfb2b0542580e7cd65e80d8570bc" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-15T15:01:55.232954" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 00000000..9181a1a5 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + # Note: --threads value represents *additional* CPUs to allocate (total CPUs = 1 + --threads). + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 00000000..96d42746 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,60 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + ontologies: [] + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" + ontologies: [] +output: + idxstats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.idxstats": + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 00000000..5fd1fc78 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,53 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + }} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 00000000..d3e785e0 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,6da44e5235401559cea62052bdc0197b" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,6da44e5235401559cea62052bdc0197b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:47:35.796569" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "1": [ + "versions.yml:md5,6da44e5235401559cea62052bdc0197b" + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + "versions": [ + "versions.yml:md5,6da44e5235401559cea62052bdc0197b" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T13:47:31.86415" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..a77ad821 --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def extension = file(input).getExtension() == 'cram' ? + "crai" : args.contains("-c") ? "csi" : "bai" + """ + touch ${input}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..1bed6bca --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,77 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: input file + ontologies: [] +output: + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.bai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: CSI index file + pattern: "*.{csi}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.crai": + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..ca34fb5c --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,140 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai") { + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi") { + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.csi[0][1]).name, + process.out.versions + ).match() } + ) + } + } + + test("bai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("crai - stub") { + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("csi - stub") { + options "-stub" + config "./csi.nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..3836c6bf --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,250 @@ +{ + "csi - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ], + "bai": [ + + ], + "crai": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:38.25787" + }, + "crai - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:34.496412" + }, + "bai - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:25.934431" + }, + "csi": { + "content": [ + "test.paired_end.sorted.bam.csi", + [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:22.262088" + }, + "crai": { + "content": [ + { + "0": [ + + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "3": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ], + "bai": [ + + ], + "crai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:18.191664" + }, + "bai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ], + "crai": [ + + ], + "csi": [ + + ], + "versions": [ + "versions.yml:md5,b8717818c91b07de87c2a5590bad02e6" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-09-10T14:13:08.51539" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..6b5aa31d --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,78 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + val index_format + + output: + tuple val(meta), path("${prefix}.bam"), emit: bam, optional: true + tuple val(meta), path("${prefix}.cram"), emit: cram, optional: true + tuple val(meta), path("${prefix}.sam"), emit: sam, optional: true + tuple val(meta), path("${prefix}.${extension}.crai"), emit: crai, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val(meta), path("${prefix}.${extension}.bai"), emit: bai, optional: true + tuple val("${task.process}"), val('samtools'), eval("samtools version | sed '1!d;s/.* //'"), topic: versions, emit: versions_samtools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + output_file = index_format ? "${prefix}.${extension}##idx##${prefix}.${extension}.${index_format} --write-index" : "${prefix}.${extension}" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${output_file} \\ + - + + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + if (index_format) { + if (!index_format.matches('bai|csi|crai')) { + error "Index format not one of bai, csi, crai." + } else if (extension == "sam") { + error "Indexing not compatible with SAM output" + } + } + index = index_format ? "touch ${prefix}.${extension}.${index_format}" : "" + + """ + touch ${prefix}.${extension} + ${index} + + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..69968304 --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,142 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true + ontologies: [] + - index_format: + type: string + description: Index format to use (optional) + pattern: "bai|csi|crai" +output: + bam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.bam": + type: file + description: Sorted BAM file + pattern: "*.{bam}" + ontologies: [] + cram: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.cram": + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + ontologies: [] + sam: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.sam": + type: file + description: Sorted SAM file + pattern: "*.{sam}" + ontologies: [] + crai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.crai": + type: file + description: CRAM index file (optional) + pattern: "*.crai" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.csi": + type: file + description: BAM index file (optional) + pattern: "*.csi" + ontologies: [] + bai: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.bai": + type: file + description: BAM index file (optional) + pattern: "*.bai" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - samtools: + type: string + description: The tool name + - "samtools version | sed '1!d;s/.* //'": + type: string + description: The command used to generate the version of the tool + +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..df47bb25 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,332 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam_no_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_bai_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam_csi_index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam bai index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'bai' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.bai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("multiple bam csi index") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = 'csi' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.bam, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("cram") { + + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + process.out.cram.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.crai.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } + + test("multiple bam - stub") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true) + ] + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } + + test("cram - stub") { + + options "-stub" + config "./nextflow_cram.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = '' + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.findAll { key, val -> key.startsWith("versions") }).match() } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..4e618fa3 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,296 @@ +{ + "cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.cram.crai" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:01.171084" + }, + "bam_csi_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,72ca1dff5344a5e5e6b892fe5f6b134d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,01394e702c729cb478df914ffaf9f7f8" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:00.961675" + }, + "bam - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:12.154354" + }, + "multiple bam bai index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,3ffa2affc29f0aa6e7b36dded84625fe" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:25.488622" + }, + "cram - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:28.485045" + }, + "multiple bam": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cd4eb0077f25e9cff395366b8883dd1f" + ] + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:13.168476" + }, + "multiple bam - stub": { + "content": [ + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:47:21.628088" + }, + "bam_no_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,26b27d1f9bcb61c25da21b562349784e" + ] + ], + [ + + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:45:47.139418" + }, + "multiple bam csi index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,295503ba5342531a3310c33ad0efbc22" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:46:51.5531" + }, + "bam_bai_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,cae7564cb83bb4a5911205bf94124b54" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.bai:md5,50dd467c169545a4d5d1f709f7e986e0" + ] + ], + { + "versions_samtools": [ + [ + "SAMTOOLS_SORT", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-10-29T12:45:52.796936" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..723f62b2 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + } + +} diff --git a/modules/nf-core/samtools/sort/tests/nextflow_cram.config b/modules/nf-core/samtools/sort/tests/nextflow_cram.config new file mode 100644 index 00000000..3a8c0188 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow_cram.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index --output-fmt cram" + } + +} diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..89e12a64 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/samtools + - bioconda::samtools=1.22.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..57d24680 --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,40 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.22.1--h96c455f_0' : + 'biocontainers/samtools:1.22.1--h96c455f_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + tuple val("${task.process}"), val('samtools'), eval('samtools version | sed "1!d;s/.* //"'), emit: versions_samtools, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + ${args} \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..5c59cce4 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,88 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:samtools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + ontologies: [] + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + ontologies: [] +output: + stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.stats": + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + ontologies: [] + versions_samtools: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: Name of the process + - samtools: + type: string + description: Name of the tool + - samtools version | sed "1!d;s/.* //": + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..5bc89309 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,113 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("bam - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..94d981b2 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,174 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f4aec6c41b73d34ac2fc6b3253aa39ba" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:18.460724" + }, + "bam - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:30.245839" + }, + "cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:27:39.041649" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" + ] + ], + "1": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,41ba8ad30ddb598dadb177a54c222ab9" + ] + ], + "versions_samtools": [ + [ + "SAMTOOLS_STATS", + "samtools", + "1.22.1" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:26:55.988241" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/comp/environment.yml b/modules/nf-core/seqtk/comp/environment.yml new file mode 100644 index 00000000..bfa1b017 --- /dev/null +++ b/modules/nf-core/seqtk/comp/environment.yml @@ -0,0 +1,6 @@ +--- +channels: + - conda-forge + - bioconda +dependencies: + - "bioconda::seqtk=1.4" diff --git a/modules/nf-core/seqtk/comp/main.nf b/modules/nf-core/seqtk/comp/main.nf new file mode 100644 index 00000000..627279fb --- /dev/null +++ b/modules/nf-core/seqtk/comp/main.nf @@ -0,0 +1,38 @@ + +process SEQTK_COMP { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqtk:1.4--h577a1d6_3': + 'biocontainers/seqtk:1.4--h577a1d6_3' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("*.seqtk_stats.tsv"), emit: seqtk_stats + tuple val("${task.process}"), val('seqtk'), eval("seqtk 2>&1 | sed -n 's/^Version: //p'"), emit: versions_seqtk, topic: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + seqtk comp \\ + ${args} \\ + ${fastx} > ${prefix}.seqtk_stats.tsv + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + echo "" > ${prefix}.seqtk_stats.tsv + """ +} diff --git a/modules/nf-core/seqtk/comp/meta.yml b/modules/nf-core/seqtk/comp/meta.yml new file mode 100644 index 00000000..bcdd15d1 --- /dev/null +++ b/modules/nf-core/seqtk/comp/meta.yml @@ -0,0 +1,73 @@ +name: "seqtk_comp" +description: Computes sequence statistics from FASTQ or FASTA files +keywords: + - seqtk + - comp + - fastx + +tools: + - "seqtk_comp": + description: | + Seqtk is a fast and lightweight tool for processing sequences in the FASTA or FASTQ format. + The seqtk comp command computes base composition, sequence length, and GC content for quality control. + + homepage: https://github.com/lh3/seqtk + documentation: https://docs.csc.fi/apps/seqtk/ + tool_dev_url: https://github.com/lh3/seqtk + licence: ["MIT"] + identifier: biotools:seqtk + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1' ] + - fastx: + type: file + description: A FASTQ or FASTA file + pattern: "*.{fastq,fq,fasta,fa,fas,fna}{,.gz}" + + ontologies: + - edam: http://edamontology.org/format_1930 # FASTQ +output: + seqtk_stats: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'sample1', single_end:false ] + - "*.seqtk_stats.tsv": + type: file + description: The output TSV file summarizing sequence statistics with + columns for sequence name, length, counts of A, C, G, T, and N bases, + and GC content percentage." + pattern: "*.seqtk_stats.tsv" + + ontologies: + - edam: http://edamontology.org/format_3475 # TSV + versions_seqtk: + - - ${task.process}: + type: string + description: The name of the process + - seqtk: + type: string + description: The name of the tool + - "seqtk 2>&1 | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The name of the process + - seqtk: + type: string + description: The name of the tool + - "seqtk 2>&1 | sed -n 's/^Version: //p'": + type: eval + description: The expression to obtain the version of the tool +authors: + - "@sainsachiko" +maintainers: + - "@sainsachiko" diff --git a/modules/nf-core/seqtk/comp/tests/main.nf.test b/modules/nf-core/seqtk/comp/tests/main.nf.test new file mode 100644 index 00000000..81498134 --- /dev/null +++ b/modules/nf-core/seqtk/comp/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process SEQTK_COMP" + script "../main.nf" + process "SEQTK_COMP" + + tag "modules" + tag "modules_nfcore" + tag "seqtk" + tag "seqtk/comp" + + test("bacteroides_fragilis - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file( params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + with(process.out.seqtk_stats[0]) { + assert path(get(1)).text.contains("NZ_CP069563.1") + } + } + + } + + test("bacteroides_fragilis - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file( params.modules_testdata_base_path + 'genomics/prokaryotes/bacteroides_fragilis/genome/genome.fna.gz', checkIfExists: true), + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/seqtk/comp/tests/main.nf.test.snap b/modules/nf-core/seqtk/comp/tests/main.nf.test.snap new file mode 100644 index 00000000..a78f2829 --- /dev/null +++ b/modules/nf-core/seqtk/comp/tests/main.nf.test.snap @@ -0,0 +1,88 @@ +{ + "bacteroides_fragilis - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.seqtk_stats.tsv:md5,46c8788954c64c718a799c0f6a41ecdc" + ] + ], + "1": [ + [ + "SEQTK_COMP", + "seqtk", + "1.4-r122" + ] + ], + "seqtk_stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.seqtk_stats.tsv:md5,46c8788954c64c718a799c0f6a41ecdc" + ] + ], + "versions_seqtk": [ + [ + "SEQTK_COMP", + "seqtk", + "1.4-r122" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T12:07:29.217316713" + }, + "bacteroides_fragilis - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.seqtk_stats.tsv:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "SEQTK_COMP", + "seqtk", + "1.4-r122" + ] + ], + "seqtk_stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.seqtk_stats.tsv:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_seqtk": [ + [ + "SEQTK_COMP", + "seqtk", + "1.4-r122" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-14T12:07:37.125237854" + } +} \ No newline at end of file diff --git a/modules/nf-core/seqtk/comp/tests/tags.yml b/modules/nf-core/seqtk/comp/tests/tags.yml new file mode 100644 index 00000000..eb168ac1 --- /dev/null +++ b/modules/nf-core/seqtk/comp/tests/tags.yml @@ -0,0 +1,2 @@ +seqtk/mergepe: + - "modules/nf-core/seqtk/comp/**" diff --git a/modules/nf-core/snpsites/environment.yml b/modules/nf-core/snpsites/environment.yml new file mode 100644 index 00000000..d6bb02c0 --- /dev/null +++ b/modules/nf-core/snpsites/environment.yml @@ -0,0 +1,7 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::snp-sites=2.5.1 diff --git a/modules/nf-core/snpsites/main.nf b/modules/nf-core/snpsites/main.nf new file mode 100644 index 00000000..c0cc6081 --- /dev/null +++ b/modules/nf-core/snpsites/main.nf @@ -0,0 +1,50 @@ +process SNPSITES { + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snp-sites:2.5.1--hed695b0_0' : + 'biocontainers/snp-sites:2.5.1--hed695b0_0' }" + + input: + path alignment + + output: + path "*.fas" , emit: fasta + path "*.sites.txt" , emit: constant_sites + path "versions.yml" , emit: versions + env CONSTANT_SITES, emit: constant_sites_string + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + snp-sites \\ + $alignment \\ + $args \\ + > filtered_alignment.fas + + echo \$(snp-sites -C $alignment) > constant.sites.txt + + CONSTANT_SITES=\$(cat constant.sites.txt) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsites: \$(snp-sites -V 2>&1 | sed 's/snp-sites //') + END_VERSIONS + """ + stub: + """ + touch filtered_alignment.fas + touch constant.sites.txt + CONSTANT_SITES=\$(cat constant.sites.txt) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsites: \$(snp-sites -V 2>&1 | sed 's/snp-sites //') + END_VERSIONS + """ + +} diff --git a/modules/nf-core/snpsites/meta.yml b/modules/nf-core/snpsites/meta.yml new file mode 100644 index 00000000..f6d412e4 --- /dev/null +++ b/modules/nf-core/snpsites/meta.yml @@ -0,0 +1,48 @@ +name: snpsites +description: Rapidly extracts SNPs from a multi-FASTA alignment. +keywords: + - SNPs + - invariant + - constant +tools: + - snpsites: + description: Rapidly extracts SNPs from a multi-FASTA alignment. + homepage: https://www.sanger.ac.uk/tool/snp-sites/ + documentation: https://github.com/sanger-pathogens/snp-sites + licence: ["GPL-3.0-or-later"] + identifier: "" +input: + - alignment: + type: file + description: fasta alignment file + pattern: "*.{fasta,fas,fa,aln}" + ontologies: [] +output: + fasta: + - "*.fas": + type: file + description: Variant fasta file + pattern: "*.{fas}" + ontologies: [] + constant_sites: + - "*.sites.txt": + type: file + description: Text file containing counts of constant sites + pattern: "*.{sites.txt}" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML + constant_sites_string: + - CONSTANT_SITES: + type: integer + description: Value with the number of constant sites + pattern: "*.{sites.txt}" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/modules/nf-core/snpsites/tests/main.nf.test b/modules/nf-core/snpsites/tests/main.nf.test new file mode 100644 index 00000000..60ae4b9a --- /dev/null +++ b/modules/nf-core/snpsites/tests/main.nf.test @@ -0,0 +1,49 @@ +nextflow_process { + + name "Test Process SNPSITES" + script "../main.nf" + process "SNPSITES" + + tag "modules" + tag "modules_nfcore" + tag "snpsites" + + test("sarscov2 - all_sites_fas") { + + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/all_sites.fas', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - all_sites_fas - stub") { + options "-stub" + when { + process { + """ + input[0] = file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/all_sites.fas', checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/snpsites/tests/main.nf.test.snap b/modules/nf-core/snpsites/tests/main.nf.test.snap new file mode 100644 index 00000000..71522e3b --- /dev/null +++ b/modules/nf-core/snpsites/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - all_sites_fas - stub": { + "content": [ + { + "0": [ + "filtered_alignment.fas:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "1": [ + "constant.sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "2": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ], + "3": [ + "" + ], + "constant_sites": [ + "constant.sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "constant_sites_string": [ + "" + ], + "fasta": [ + "filtered_alignment.fas:md5,d41d8cd98f00b204e9800998ecf8427e" + ], + "versions": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T15:20:57.589176616" + }, + "sarscov2 - all_sites_fas": { + "content": [ + { + "0": [ + "filtered_alignment.fas:md5,f96c7513003e878e16fa9eac9fcda0f4" + ], + "1": [ + "constant.sites.txt:md5,8b9b226e3787f7baaefce07405af22c9" + ], + "2": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ], + "3": [ + "8789,5363,5747,9456" + ], + "constant_sites": [ + "constant.sites.txt:md5,8b9b226e3787f7baaefce07405af22c9" + ], + "constant_sites_string": [ + "8789,5363,5747,9456" + ], + "fasta": [ + "filtered_alignment.fas:md5,f96c7513003e878e16fa9eac9fcda0f4" + ], + "versions": [ + "versions.yml:md5,af6942a02036a7ff2eadb6ecf344c619" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T15:15:41.760289537" + } +} \ No newline at end of file diff --git a/modules/nf-core/snpsites/tests/tags.yml b/modules/nf-core/snpsites/tests/tags.yml new file mode 100644 index 00000000..d33ff71f --- /dev/null +++ b/modules/nf-core/snpsites/tests/tags.yml @@ -0,0 +1,2 @@ +snpsites: + - "modules/nf-core/snpsites/**" diff --git a/nextflow.config b/nextflow.config index d02a30c2..516ba5a4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,20 +9,78 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options - input = null + input = null // References - genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + fasta = null + genome = null + igenomes_base = 's3://ngi-igenomes/igenomes/' + igenomes_ignore = false + + // FASTQ preprocessing + skip_preprocessing_qc = false + preprocessing_qc_tool = 'fastqc' + + perform_shortread_qc = true + shortread_qc_tool = 'fastp' + shortread_qc_skipadaptertrim = false + shortread_qc_mergepairs = false + shortread_qc_includeunmerged = false + shortread_qc_adapter1 = null + shortread_qc_adapter2 = null + shortread_qc_adapterlist = null + shortread_qc_minlength = 50 + shortread_qc_dedup = false + + perform_longread_qc = true + longread_adapterremoval_tool = 'porechop' + longread_qc_skipadaptertrim = false + longread_qc_skipqualityfilter = false + longread_filter_tool = 'nanoq' + longread_qc_qualityfilter_minlength = 1000 + longread_qc_qualityfilter_keeppercent = 90 + longread_qc_qualityfilter_minquality = 7 + longread_qc_qualityfilter_targetbases = 500000000 + + save_preprocessed_reads = false + + // run merging + perform_runmerging = true + save_runmerged_reads = true + + // Publishing final reads going into subsampling + save_analysis_ready_fastqs = false + + // Subsampling + perform_subsampling = true + subsampling_depth_cutoff = 100 + + // Shortread mapping + shortread_mapping_tool = 'bowtie2' + + // Minimap2 options + bam_format = true + bam_index_extension = "bai" + cigar_paf_format = false + cigar_bam = true + + // clair3 + clair3_model = '' + clair3_platform = 'ont' + + // BCFtools consensus + genomecov_scale = '1' + genomecov_threshold = 9 + + // Maximum non GATC bases (i.e - and N) to allow in pseudogenome sequences + non_GATC_threshold = 0.5 // MultiQC options - multiqc_config = null - multiqc_title = null - multiqc_logo = null - max_multiqc_email_size = '25.MB' + multiqc_config = null + multiqc_title = null + multiqc_logo = null + max_multiqc_email_size = '25.MB' multiqc_methods_description = null // Boilerplate options @@ -39,8 +97,8 @@ params { version = false pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/' trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options - config_profile_name = null - config_profile_description = null + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" @@ -165,7 +223,7 @@ profiles { } // Load nf-core custom profiles from different Institutions -includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" +includeConfig params.custom_config_base && (!System.getenv('NXF_OFFLINE') || !params.custom_config_base.startsWith('http')) ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/bactmap custom profiles from different institutions. // TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs @@ -226,15 +284,38 @@ dag { manifest { name = 'nf-core/bactmap' - author = """Andries J. van Tonder""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead + author = """Andries J. van Tonder, Anthony Underwood, Than Le Viet and the nf-core/bactmap team""" // The author field is deprecated from Nextflow version 24.10.0, use contributors instead contributors = [ - // TODO nf-core: Update the field with the details of the contributors to your pipeline. New with Nextflow version 24.10.0 [ name: 'Andries J. van Tonder', - affiliation: '', + affiliation: 'Department of Veterinary Medicine, University of Cambridge, Cambridge, UK', + email: 'ajv37@cam.ac.uk', + github: 'https://github.com/avantonder', + contribution: ['author', 'maintainer'], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: 'https://orcid.org/0000-0002-4380-5250' + ], + [ + name: 'Anthony Underwood', + affiliation: 'Broken Strings Biosciences, Cambridge, UK', + email: '', + github: 'https://github.com/antunderwood', + contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: 'https://orcid.org/0000-0002-8547-4277' + ], + [ + name: 'Than Le Viet', + affiliation: 'Quadram Institute, Norwich, UK', email: '', - github: '', - contribution: [], // List of contribution types ('author', 'maintainer' or 'contributor') + github: 'https://github.com/thanhleviet', + contribution: ['author'], // List of contribution types ('author', 'maintainer' or 'contributor') + orcid: 'https://orcid.org/0000-0002-2106-8130' + ], + [ + name: 'and the nf-core/bactmap team', + affiliation: 'nf-core community', + email: '', + github: 'https://github.com/nf-core', + contribution: ['contributor'], // List of contribution types ('author', 'maintainer' or 'contributor') orcid: '' ], ] @@ -242,46 +323,19 @@ manifest { description = """A mapping-based pipeline for bacterial whole genome sequences""" mainScript = 'main.nf' defaultBranch = 'master' - nextflowVersion = '!>=24.04.2' - version = '2.0.0dev' + nextflowVersion = '!>=25.10.0' + version = '2.0.0' doi = '' } // Nextflow plugins plugins { - id 'nf-schema@2.3.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.5.1' // Validation of pipeline parameters and creation of an input channel from a sample sheet } validation { defaultIgnoreParams = ["genomes"] monochromeLogs = params.monochrome_logs - help { - enabled = true - command = "nextflow run nf-core/bactmap -profile --input samplesheet.csv --outdir " - fullParameter = "help_full" - showHiddenParameter = "show_hidden" - beforeText = """ --\033[2m----------------------------------------------------\033[0m- - \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m -\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m -\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m -\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m - \033[0;32m`._,._,\'\033[0m -\033[0;35m nf-core/bactmap ${manifest.version}\033[0m --\033[2m----------------------------------------------------\033[0m- -""" - afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} -* The nf-core framework - https://doi.org/10.1038/s41587-020-0439-x - -* Software dependencies - https://github.com/nf-core/bactmap/blob/master/CITATIONS.md -""" - } - summary { - beforeText = validation.help.beforeText - afterText = validation.help.afterText - } } // Load modules.config for DSL2 module specific options diff --git a/nextflow_schema.json b/nextflow_schema.json index dfae7be0..2dbc68c8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -224,6 +224,326 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "help": { + "type": ["boolean", "string"], + "description": "Display the help message." + }, + "help_full": { + "type": "boolean", + "description": "Display the full detailed help message." + }, + "show_hidden": { + "type": "boolean", + "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." + } + } + }, + "preprocessing_general_qc_options": { + "title": "Preprocessing general QC options", + "type": "object", + "description": "Common options across both long and short read preprocessing QC steps", + "default": "", + "properties": { + "skip_preprocessing_qc": { + "type": "boolean", + "description": "Specify to skip sequencing quality control of raw sequencing reads", + "fa_icon": "fas fa-forward", + "help_text": "Skipping running of FastQC or Falco maybe useful in cases where you are already running with preprocessed data (e.g. you are also skipping short/long read qc steps) that you already know the quality of." + }, + "preprocessing_qc_tool": { + "type": "string", + "default": "fastqc", + "description": "Specify the tool used for quality control of raw sequencing reads", + "enum": ["fastqc", "falco"], + "fa_icon": "fas fa-tools", + "help_text": "Falco is designed as a drop-in replacement for FastQC but written in C++ for faster computation. We particularly recommend using falco when using long reads (due to reduced memory constraints), however is also applicable for short reads." + }, + "save_preprocessed_reads": { + "type": "boolean", + "description": "Save reads from samples that went through the adapter clipping, pair-merging, and length filtering steps for both short and long reads", + "fa_icon": "fas fa-save", + "help_text": "This saves the FASTQ output from the following tools:\\n\\n- fastp\\n- AdapterRemoval\\n- Porechop\\n- Filtlong\\n- Nanoq\\n\\nThese reads will be a mixture of: adapter clipped, quality trimmed, pair-merged, and length filtered, depending on the parameters you set." + }, + "save_analysis_ready_fastqs": { + "type": "boolean", + "description": "Save only the final reads from all read processing steps in results directory.", + "fa_icon": "fas fa-save", + "help_text": "This flag will generate the directory `results/analysis_ready_reads` that contains the reads from the last preprocessing (QC, run merging etc.) step of the pipeline run. \\n\\nThis can be useful if you wish to re-use the final cleaned-up and prepared reads - the data actually used for the actual mapping and variant calling steps of the pipeline - for other analyses or purposes to reduce redundant preprocessing between different pipelines." + } + } + }, + "preprocessing_short_read_qc_options": { + "title": "Preprocessing short-read QC options", + "type": "object", + "description": "Options for adapter clipping, quality trimming and pair-merging", + "default": "", + "properties": { + "perform_shortread_qc": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-toggle-on", + "description": "Turns on short read quality control steps (adapter clipping, read filtering etc.)", + "help_text": "Turns on short read quality control steps (adapter clipping etc.)\\n\\nThis subworkflow can perform:\\n\\n- Adapter removal\\n- Read quality trimming\\n- Read pair merging\\n- Length filtering\\n\\nEither with fastp or AdapterRemoval.\\n\\nRemoving adapters (if present) is recommend to reduce false-positive hits that may occur from 'dirty' or 'contaminated' reference genomes in a profiling database that contain accidentally incorporated adapter sequences. Note that some, but not all, tools support paired-end alignment (utilising information about the insert covered by the pairs). However read pair merging in some cases can be recommend to increase read length (such as in aDNA). Length filtering, and/or complexity can speed up alignment by reducing the number of short unspecific reads that need to be aligned." + }, + "shortread_qc_tool": { + "type": "string", + "default": "fastp", + "description": "Specify which tool to use for short-read QC", + "fa_icon": "fas fa-tools", + "enum": ["fastp", "adapterremoval"] + }, + "shortread_qc_skipadaptertrim": { + "type": "boolean", + "description": "Skip adapter trimming", + "fa_icon": "fas fa-forward", + "help_text": "Skip the removal of sequencing adapters. \\n\\nThis often can be useful to speed up run-time of the pipeline when analysing data downloaded from public databases such as the ENA or SRA, as adapters should already be removed (however we recommend to check FastQC results to ensure this is the case)." + }, + "shortread_qc_adapter1": { + "type": "string", + "description": "Specify adapter 1 nucleotide sequence", + "fa_icon": "fas fa-grip-lines", + "help_text": "Specify a custom forward or R1 adapter sequence to be removed from reads. \\n\\nIf not set, the selected short-read QC tool's defaults will be used.\\n\\n> Modifies tool parameter(s):\\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCACACGTCTGAACTCCAGTCA`\\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG`" + }, + "shortread_qc_adapter2": { + "type": "string", + "description": "Specify adapter 2 nucleotide sequence", + "fa_icon": "fas fa-grip-lines", + "help_text": "Specify a custom reverse or R2 adapter sequence to be removed from reads. \\n\\nIf not set, the selected short-read QC tool's defaults will be used.\\n\\n> Modifies tool parameter(s):\\n> - fastp: `--adapter_sequence`. fastp default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT`\\n> - AdapterRemoval: `--adapter1`. AdapteRemoval2 default: `AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT`" + }, + "shortread_qc_adapterlist": { + "type": "string", + "description": "Specify a list of all possible adapters to trim. Overrides --shortread_qc_adapter1/2. Formats: .txt (AdapterRemoval) or .fasta. (fastp).", + "fa_icon": "fas fa-th-list", + "help_text": "Allows to supply a file with a list of adapter (combinations) to remove from all files. \\n\\nOverrides the --shortread_qc_adapter1/--shortread_qc_adapter2 parameters . \\n\\nFor AdapterRemoval this consists of a two column table with a `.txt` extension: first column represents forward strand, second column for reverse strand. You must supply all possible combinations, one per line, and this list is applied to all files. See AdapterRemoval documentation for more information.\\n\\nFor fastp this consists of a standard FASTA format with a `.fasta`/`.fa`/`.fna`/`.fas` extension. The adapter sequence in this file should be at least 6bp long, otherwise it will be skipped. fastp trims the adapters present in the FASTA file one by one.\\n\\n> Modifies AdapterRemoval parameter: --adapter-list\\n> Modifies fastp parameter: --adapter_fasta" + }, + "shortread_qc_mergepairs": { + "type": "boolean", + "description": "Turn on merging of read pairs for paired-end data", + "fa_icon": "fas fa-toggle-on", + "help_text": "Turn on the merging of read-pairs of paired-end short read sequencing data. \\n\\n> Modifies tool parameter(s):\\n> - AdapterRemoval: `--collapse`\\n> - fastp: `-m --merged_out`\\n" + }, + "shortread_qc_includeunmerged": { + "type": "boolean", + "description": "Include unmerged reads from paired-end merging in the downstream analysis", + "fa_icon": "fas fa-times-circle", + "help_text": "Turns on the inclusion of unmerged reads in resulting FASTQ file from merging paired-end sequencing data when using `fastp` and/or `AdapterRemoval`. For `fastp` this means the unmerged read pairs are directly included in the output FASTQ file. For `AdapterRemoval`, additional output files containing unmerged reads are all concatenated into one file by the workflow.\\n\\nExcluding unmerged reads can be useful in cases where you prefer to have very short reads (e.g. aDNA), thus excluding longer-reads or possibly faulty reads where one of the pair was discarded.\\n\\n> Adds `fastp` option: `--include_unmerged`\\n" + }, + "shortread_qc_minlength": { + "type": "integer", + "default": 50, + "description": "Specify the minimum length of reads to be retained", + "fa_icon": "fas fa-ruler-horizontal", + "help_text": "Specifying a minimum read length filtering can speed up profiling by reducing the number of short unspecific reads that need to be match/aligned to the database.\\n\\n> Modifies tool parameter(s):\\n> - removed from reads `--length_required`\\n> - AdapterRemoval: `--minlength`" + }, + "shortread_qc_dedup": { + "type": "boolean", + "description": "Perform deduplication of the input reads (fastp only)", + "fa_icon": "fas fa-toggle-on", + "help_text": "This enables the deduplication of processed reads during fastp adapter removal and/or merging. It removes identical reads that are likely artefacts from laboratory protocols (e.g. amplification), and provide no additional sequence information to the library.\\n\\nRemoving duplicates can increase runtime and increase accuracy of abundance calculations.\\n\\n> Modifies tool parameter(s):\\n> fastp: ` --dedup`\\n" + } + } + }, + "preprocessing_long_read_qc_options": { + "title": "Preprocessing long-read QC options", + "type": "object", + "description": "Options for adapter clipping, quality trimming, and length filtering", + "default": "", + "properties": { + "perform_longread_qc": { + "type": "boolean", + "default": true, + "description": "Turns on long read quality control steps (adapter clipping, length filtering etc.)", + "help_text": "Turns on long read quality control steps (adapter clipping, length and/or quality filtering.)\\n\\nRemoving adapters (if present) is recommended to reduce false-positive hits that may occur from 'dirty' or 'contaminated' reference genomes in a profiling database that contain accidentally incorporated adapter sequences.\\n\\nLength filtering, and quality filtering can speed up alignment by reducing the number of unspecific reads that need to be aligned.", + "fa_icon": "fas fa-toggle-on" + }, + "longread_adapterremoval_tool": { + "type": "string", + "default": "porechop", + "description": "Specify which tool to use for adapter trimming.", + "enum": ["porechop", "porechop_abi"], + "help_text": "The performance of Porechop and Porechop_ABI is same in terms of removing adapter reads. However Porechop is no longer updated, Porechop_ABI receives regular updates.", + "fa_icon": "fas fa-hammer" + }, + "longread_qc_skipadaptertrim": { + "type": "boolean", + "description": "Skip long-read trimming", + "fa_icon": "fas fa-forward", + "help_text": "Skip removal of adapters by Porechop. This can be useful in some cases to speed up run time - particularly when you are running data downloading from public databases such as the ENA/SRA that should already have adapters removed. We recommend that you check your FastQC results this is indeed the case." + }, + "longread_filter_tool": { + "type": "string", + "default": "nanoq", + "description": "Specify which tool to use for long reads filtering", + "fa_icon": "fas fa-hammer", + "help_text": "Nanoq is a filtering tool only for Nanopore reads. Nanoq is faster and more memory-efficient than Filtlong. Nanoq also provides a summary of input read statistics; see [benchmarking](https://github.com/esteinig/nanoq?tab=readme-ov-file#benchmarks). \\n\\nFiltlong is a good option if you want to keep a certain percentage of reads after filtering, and you can also use it for non-Nanopore long reads." + }, + "longread_qc_skipqualityfilter": { + "type": "boolean", + "description": "Skip long-read length and quality filtering", + "fa_icon": "fas fa-forward", + "help_text": "Skip removal of quality filtering with Filtlong or Nanoq. This will skip length, percent reads, and target bases filtering (see other `--longread_qc_qualityfilter_*` parameters)." + }, + "longread_qc_qualityfilter_minlength": { + "type": "integer", + "default": 1000, + "description": "Specify the minimum length of reads to be retained", + "fa_icon": "fas fa-ruler-horizontal", + "help_text": "Specify the minimum of length of reads to be kept for downstream analysis.\\n\\n> Modifies tool parameter(s):\\n> - Filtlong: `--min_length` or - Nanoq: `--min-len`" + }, + "longread_qc_qualityfilter_keeppercent": { + "type": "integer", + "default": 90, + "description": "Specify the percent of high-quality bases to be retained", + "fa_icon": "fas fa-percent", + "help_text": "Throw out the remaining percentage of reads outside the value. This is measured by bp, not by read count. So this option throws out the worst e.g. 10% of read bases if the parameter is set to `90`. _Modified from [Filtlong documentation](https://github.com/rrwick/Filtlong)_\\n\\n> Modifies tool parameter(s):\\n> - Filtlong: `--keep_percent`" + }, + "longread_qc_qualityfilter_targetbases": { + "type": "integer", + "default": 500000000, + "description": "Filtlong only: specify the number of high-quality bases in the library to be retained", + "fa_icon": "fas fa-bullseye", + "help_text": "Removes the worst reads until only the specified value of bases remain, useful for very large read sets. If the input read set is less than the specified value, this setting will have no effect. _Modified from [Filtlong documentation](https://github.com/rrwick/Filtlong)_\\n\\n> Modifies tool parameter(s):\\n> - Filtlong: `--keep_percent`" + }, + "longread_qc_qualityfilter_minquality": { + "type": "integer", + "default": 7, + "description": "Nanoq only: specify the minimum average read quality filter (Q)", + "fa_icon": "fas fa-bullseye", + "help_text": "Remove the reads with quality score lower than 7. \\n\\n> Modifies tool parameter(s):\\n> - Nanoq: `--min-qual`" + } + } + }, + "preprocessing_run_merging_options": { + "title": "Preprocessing run-merging options", + "type": "object", + "description": "Options for per-sample run-merging", + "default": "", + "properties": { + "perform_runmerging": { + "type": "boolean", + "default": true, + "description": "Turn on run merging", + "fa_icon": "fas fa-toggle-on", + "help_text": "Turns on the concatenation of sequencing runs or libraries with the same sample name.\\n\\nThis can be useful to ensure you get a single profile per sample, rather than one profile per run or library. Note that in some cases comparing profiles of independent _libraries_ may be useful, so this parameter may not always be suitable." + }, + "save_runmerged_reads": { + "type": "boolean", + "default": true, + "description": "Save reads from samples that went through the run-merging step", + "fa_icon": "fas fa-save", + "help_text": "Save the run- and library-concatenated reads of a given sample in FASTQ format.\\n\\n> \\u26a0\\ufe0f Only samples that went through the run-merging step of the pipeline will be stored in the resulting directory. \\n\\nIf you wish to save the files that go to the classification/profiling steps for samples that _did not_ go through run merging, you must supply the appropriate upstream `--save_` flag.\\n\\n" + } + } + }, + "sub_sampling_options": { + "title": "Sub-sampling options", + "type": "object", + "description": "Options for sub-sampling reads", + "default": "", + "properties": { + "perform_subsampling": { + "type": "boolean", + "default": true, + "description": "Turn on sub-sampling of reads with Rasusa", + "fa_icon": "fas fa-toggle-on", + "help_text": "Subsampling sequence reads is a good idea because it reduces computational resources and processing time while still maintaining sufficient data for accurate analysis." + }, + "subsampling_depth_cutoff": { + "type": "integer", + "default": 100, + "description": "Desired coverage depth when sub-sampling", + "fa_icon": "fas fa-ruler-vertical", + "help_text": "100X should more more than enough coverage to perform accurate variant calling" + } + } + }, + "short_read_mapping_options": { + "title": "Short read mapping options", + "type": "object", + "description": "Options for short-read mapping", + "default": "", + "properties": { + "shortread_mapping_tool": { + "type": "string", + "default": "bowtie2", + "description": "Specify which tool to use for short-read mapping", + "enum": ["bowtie2", "bwa"], + "help_text": "By default the pipeline uses Bowtie2 but BWA mem 2 can also be used for short-read mapping ", + "fa_icon": "fas fa-grip-lines" + } + } + }, + "long_read_mapping_options": { + "title": "Long-read mapping options", + "type": "object", + "description": "Options for long-read mapping", + "default": "", + "properties": { + "bam_format": { + "type": "boolean", + "default": true, + "description": "Specify the output format from minimap2 align", + "fa_icon": "fas fa-sign-out-alt", + "help_text": "Saves the output from minimap2 align to a bam file. \\n\\n> Modifies tool parameter(s):\\n> - minimap2: `-a`\\n>\\n. If set to false, the output of minimap2 align is saved to a paf file" + }, + "bam_index_extension": { + "type": "string", + "default": "bai", + "fa_icon": "fas fa-save", + "description": "Specify the bam index file extension" + }, + "cigar_paf_format": { + "type": "boolean", + "description": "Generate CIGAR", + "help_text": "Generate CIGAR. In PAF, the CIGAR is written to the \u2018cg\u2019 custom tag.", + "fa_icon": "fas fa-smoking" + }, + "cigar_bam": { + "type": "boolean", + "fa_icon": "fas fa-smoking", + "description": "Write CIGAR with >65535 operators at the CG tag.", + "help_text": "Older tools are unable to convert alignments with >65535 CIGAR ops to BAM. This option makes minimap2 SAM compatible with older tools. Newer tools recognizes this tag and reconstruct the real CIGAR in memory.", + "default": true + }, + "clair3_model": { + "type": "string", + "description": "Path to Clair3 model", + "fa_icon": "fas fa-check-circle" + }, + "clair3_platform": { + "type": "string", + "default": "ont", + "description": "Sequencing platform", + "help_text": "Clair3 can align ONT, PacBio and Illumina sequence data. Here we only support ONT data", + "fa_icon": "fas fa-grip-lines" + } + } + }, + "consensus_options": { + "title": "Consensus options", + "type": "object", + "description": "Options for creating FASTA consensus files", + "default": "", + "properties": { + "genomecov_threshold": { + "type": "integer", + "default": 9, + "description": "Specify the coverage at which low coverage regions are masked with N", + "fa_icon": "fas fa-bullseye" + }, + "genomecov_scale": { + "type": "string", + "default": 1, + "description": "Scale the coverage by a constant factor", + "help_text": "Each coverage value is multiplied by this factor before being reported. Useful for normalizing coverage by, e.g., reads per million (RPM). Default is 1.0 i.e. unscaled.", + "fa_icon": "fas fa-balance-scale-left" + }, + "non_GATC_threshold": { + "type": "number", + "default": 0.5, + "description": "Maximum non GATC bases (i.e - and N) to allow in consensus FASTA", + "fa_icon": "fas fa-percent" } } } @@ -240,6 +560,30 @@ }, { "$ref": "#/$defs/generic_options" + }, + { + "$ref": "#/$defs/preprocessing_general_qc_options" + }, + { + "$ref": "#/$defs/preprocessing_short_read_qc_options" + }, + { + "$ref": "#/$defs/preprocessing_long_read_qc_options" + }, + { + "$ref": "#/$defs/preprocessing_run_merging_options" + }, + { + "$ref": "#/$defs/sub_sampling_options" + }, + { + "$ref": "#/$defs/short_read_mapping_options" + }, + { + "$ref": "#/$defs/long_read_mapping_options" + }, + { + "$ref": "#/$defs/consensus_options" } ] } diff --git a/nf-test.config b/nf-test.config new file mode 100644 index 00000000..3a1fff59 --- /dev/null +++ b/nf-test.config @@ -0,0 +1,24 @@ +config { + // location for all nf-test tests + testsDir "." + + // nf-test directory including temporary files for each test + workDir System.getenv("NFT_WORKDIR") ?: ".nf-test" + + // location of an optional nextflow.config file specific for executing tests + configFile "tests/nextflow.config" + + // ignore tests coming from the nf-core/modules repo + ignore 'modules/nf-core/**/tests/*', 'subworkflows/nf-core/**/tests/*' + + // run all test with defined profile(s) from the main nextflow.config + profile "test" + + // list of filenames or patterns that should be trigger a full test run + triggers 'nextflow.config', 'nf-test.config', 'conf/test.config', 'tests/nextflow.config', 'tests/.nftignore' + + // load the necessary plugins + plugins { + load "nft-utils@0.0.3" + } +} diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 6dc91e48..85cb3b04 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -21,9 +21,9 @@ { "@id": "./", "@type": "Dataset", - "creativeWorkStatus": "InProgress", - "datePublished": "2025-03-13T14:15:46+00:00", - "description": "

\n \n \n \"nf-core/bactmap\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/bactmap/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/bactmap/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/bactmap/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/bactmap/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/bactmap/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/bactmap)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23bactmap-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/bactmap)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/bactmap** is a bioinformatics pipeline that ...\n\n\n\n\n1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/))2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\n\n\nNow, you can run the pipeline using:\n\n\n\n```bash\nnextflow run nf-core/bactmap \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/bactmap/usage) and the [parameter documentation](https://nf-co.re/bactmap/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/bactmap/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/bactmap/output).\n\n## Credits\n\nnf-core/bactmap was originally written by Andries J. van Tonder.\n\nWe thank the following people for their extensive assistance in the development of this pipeline:\n\n\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#bactmap` channel](https://nfcore.slack.com/channels/bactmap) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "creativeWorkStatus": "Stable", + "datePublished": "2025-09-15T13:47:58+00:00", + "description": "

\n \n \n \"nf-core/bactmap\"\n \n

\n\n[![GitHub Actions CI Status](https://github.com/nf-core/bactmap/actions/workflows/ci.yml/badge.svg)](https://github.com/nf-core/bactmap/actions/workflows/ci.yml)\n[![GitHub Actions Linting Status](https://github.com/nf-core/bactmap/actions/workflows/linting.yml/badge.svg)](https://github.com/nf-core/bactmap/actions/workflows/linting.yml)[![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?labelColor=000000&logo=Amazon%20AWS)](https://nf-co.re/bactmap/results)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.XXXXXXX-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.XXXXXXX)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A524.04.2-23aa62.svg)](https://www.nextflow.io/)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/nf-core/bactmap)\n\n[![Get help on Slack](http://img.shields.io/badge/slack-nf--core%20%23bactmap-4A154B?labelColor=000000&logo=slack)](https://nfcore.slack.com/channels/bactmap)[![Follow on Twitter](http://img.shields.io/badge/twitter-%40nf__core-1DA1F2?labelColor=000000&logo=twitter)](https://twitter.com/nf_core)[![Follow on Mastodon](https://img.shields.io/badge/mastodon-nf__core-6364ff?labelColor=FFFFFF&logo=mastodon)](https://mstdn.science/@nf_core)[![Watch on YouTube](http://img.shields.io/badge/youtube-nf--core-FF0000?labelColor=000000&logo=youtube)](https://www.youtube.com/c/nf-core)\n\n## Introduction\n\n**nf-core/bactmap** is a bioinformatics best-practice analysis pipeline for mapping short (Illumina) and long reads (Oxford Nanopore) from bacterial WGS to a reference sequence, creating filtered VCF files and making pseudogenomes based on high quality positions in the VCF files.\n\n## Pipeline summary\n\n![](docs/images/bactmap_pipeline.png)\n\n1. Index reference fasta file (short-read: [`BWA index`](https://github.com/lh3/bwa) or [`Bowtie2 build`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2 index`](https://github.com/lh3/minimap2))\n2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) or [`falco`](https://github.com/smithlabcode/falco) as an alternative option)\n3. Calculate fastq summary statistics ([`fastq-scan`](https://github.com/rpetit3/fastq-scan))\n4. Perform read pre-processing (optional)\n - Adapter clipping and merging (short-read: [`fastp`](https://github.com/OpenGene/fastp) or [`AdapterRemoval2`](https://github.com/MikkelSchubert/adapterremoval); long-read: [`porechop`](https://github.com/rrwick/Porechop) or [`Porechop_ABI`](https://github.com/bonsai-team/Porechop_ABI))\n - Quality filtering (long-read: [`Filtlong`](https://github.com/rrwick/Filtlong)), [`Nanoq`](https://github.com/esteinig/nanoq)\n - Run merging ([`cat`](https://pubs.opengroup.org/onlinepubs/9699919799/utilities/cat.html))\n5. Downsample fastq files (optional) ([`Rasusa`](https://github.com/mbhall88/rasusa))\n6. Summarise read statistics pre- and post-processing and subsampling ([`read_stats`](https://github.com/nf-core/bactmap/blob/master/modules/local/read_stats/main.nf))\n7. Variant calling\n\n- Map reads to reference (short-read: [`BWA-MEM2`](https://github.com/bwa-mem2/bwa-mem2) or [`Bowtie2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml); long-read: [`minimap2`](https://github.com/lh3/minimap2))\n- Sort and index alignments ([`SAMtools view/sort`](https://sourceforge.net/projects/samtools/files/samtools/))\n- Summarise alignment statistics ([`SAMtools stats`](https://sourceforge.net/projects/samtools/files/samtools/))\n- Call variants (short-read: [`FreeBayes`](https://github.com/freebayes/freebayes); long-read: [`Clair3`](https://github.com/HKU-BAL/Clair3))\n- Filter variants ([`BCFtools filter`](http://samtools.github.io/bcftools/bcftools.html))\n- Summarise variant statistics ([`BCFtools stats`](http://samtools.github.io/bcftools/bcftools.html))\n- Convert filtered bcf to pseudogenome fasta ([`BCFtools consensus`](http://samtools.github.io/bcftools/bcftools.html) and [`BEDtools`](https://bedtools.readthedocs.io/en/latest/content/tools/genomecov.html))\n- Summarise mapping statistics ([`seqtk`](https://github.com/lh3/seqtk))\n\n8. Create alignment from pseudogenomes by concatenating fasta files having first checked that the sample sequences are high quality ([`alignpseudogenomes`](https://github.com/nf-core/bactmap/blob/master/modules/local/alignpseudogenomes/main.nf))\n9. Extract variant sites from alignment ([`SNP-sites`](https://github.com/sanger-pathogens/snp-sites))\n10. Present QC for raw and processed reads, alignment statistics and variant statistics ([`MultiQC`](http://multiqc.info/))\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst, prepare a samplesheet with your input data that looks as follows:\n\n```csv title=\"samplesheet.csv\"\nsample,run_accession,instrument_platform,fastq_1,fastq_2\n2612,run1,ILLUMINA,2612_run1_R1.fq.gz,\n2613,run1,ILLUMINA,2612_run3_R1.fq.gz,2612_run3_R2.fq.gz\n2614,run3,OXFORD_NANOPORE,2614_file1.fastq.gz,\n2614,run3,OXFORD_NANOPORE,2614_file2.fastq.gz,\n```\n\nEach row represents a fastq file (single-end) or a pair of fastq files (paired end), either Illumina (short reads) or Oxford Nanopore (long reads).\n\nAdditionally, if you are analysing Oxford Nanopore data, you will need to provide the path to a model to use with `Clair3` (specified with `--clair3_model`). Models for older chemistries and basecallers (e.g. r9.4.1) can be downloaded from [here](https://www.bio8.cs.hku.hk/clair3/clair3_models/). For newer chemistries and basecallers, ONT provides models through [Rerio](https://github.com/nanoporetech/rerio). To download the models for Clair3 from the ONT github, you can use the following commands (each model will be downloaded to the folder `clair3_models/`):\n\n```bash\n# Clone the rerio repository\ngit clone https://github.com/nanoporetech/rerio\n\n# Download all models\npython3 download_model.py --clair3\n```\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run nf-core/bactmap \\\n -profile \\\n --input samplesheet.csv \\\n --fasta \\\n --clair3_model \\\n --outdir \n```\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\nFor more details and further functionality, please refer to the [usage documentation](https://nf-co.re/bactmap/usage) and the [parameter documentation](https://nf-co.re/bactmap/parameters).\n\n## Pipeline output\n\nTo see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/bactmap/results) tab on the nf-core website pipeline page.\nFor more details about the output files and reports, please refer to the\n[output documentation](https://nf-co.re/bactmap/output).\n\n## Credits\n\nnf-core/bactmap was originally written by [Anthony Underwood](https://github.com/antunderwood), [Andries van Tonder](https://github.com/avantonder) and [Thanh Le Viet](https://github.com/thanhleviet).\n\nWe thank the following people for their extensive assistance in the development\nof this pipeline:\n\n- [Alexandre Gilardet](https://github.com/alexandregilardet)\n- [Hanh Hoang](https://github.com/sainsachiko)\n- [Ismael Henarejos-Castilo](https://github.com/IsmaelHC1994)\n- [Mareike Janiak](https://github.com/MareikeJaniak)\n- [Harshil Patel](https://github.com/drpatelh)\n- [Olha Petryk](https://github.com/opetryk)\n- [Richard Agyekum](https://github.com/QuadjoLegend)\n- [Steven Sutcliffe](https://github.com/sgsutcliffe)\n- [Szymon Szyszkowski](https://github.com/project-defiant)\n\nAnthony Underwood's time working on the project was funded by the National Institute for Health Research(NIHR) Global Health Research Unit for the Surveillance of Antimicrobial Resistance (Grant Reference Number 16/136/111)\n![NIHR funded](assets/nihr_logos_funded_by.jpg)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\nFor further information or help, don't hesitate to get in touch on the [Slack `#bactmap` channel](https://nfcore.slack.com/channels/bactmap) (you can join with [this invite](https://nf-co.re/join/slack)).\n\n## Citations\n\n\n\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nYou can cite the `nf-core` publication as follows:\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" @@ -31,6 +31,9 @@ { "@id": "assets/" }, + { + "@id": "bin/" + }, { "@id": "conf/" }, @@ -43,6 +46,9 @@ { "@id": "modules/" }, + { + "@id": "modules/local/" + }, { "@id": "modules/nf-core/" }, @@ -99,7 +105,7 @@ }, "mentions": [ { - "@id": "#2ba10903-33af-4faa-9ee1-d7e45a8f2fc1" + "@id": "#cb036603-cb4f-4dc5-9e16-cbf9b6cd5707" } ], "name": "nf-core/bactmap" @@ -121,9 +127,18 @@ }, { "@id": "main.nf", - "@type": ["File", "SoftwareSourceCode", "ComputationalWorkflow"], + "@type": [ + "File", + "SoftwareSourceCode", + "ComputationalWorkflow" + ], + "creator": [ + { + "@id": "#ajv37@shgb2monitor.vet.private.cam.ac.uk" + } + ], "dateCreated": "", - "dateModified": "2025-03-13T14:15:46Z", + "dateModified": "2025-09-15T14:47:58Z", "dct:conformsTo": "https://bioschemas.org/profiles/ComputationalWorkflow/1.0-RELEASE/", "keywords": [ "nf-core", @@ -136,16 +151,25 @@ "phylogeny", "tree" ], - "license": ["MIT"], - "name": ["nf-core/bactmap"], + "license": [ + "MIT" + ], + "name": [ + "nf-core/bactmap" + ], "programmingLanguage": { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow" }, "sdPublisher": { "@id": "https://nf-co.re/" }, - "url": ["https://github.com/nf-core/bactmap", "https://nf-co.re/bactmap/dev/"], - "version": ["2.0.0dev"] + "url": [ + "https://github.com/nf-core/bactmap", + "https://nf-co.re/bactmap/2.0.0/" + ], + "version": [ + "2.0.0" + ] }, { "@id": "https://w3id.org/workflowhub/workflow-ro-crate#nextflow", @@ -157,14 +181,14 @@ "url": { "@id": "https://www.nextflow.io/" }, - "version": "!>=24.04.2" + "version": "!>=24.10.5" }, { - "@id": "#2ba10903-33af-4faa-9ee1-d7e45a8f2fc1", + "@id": "#cb036603-cb4f-4dc5-9e16-cbf9b6cd5707", "@type": "TestSuite", "instance": [ { - "@id": "#3f5008e2-1065-44ff-89de-ac416b610232" + "@id": "#f13981a3-764d-43b0-b4fb-474153139fca" } ], "mainEntity": { @@ -173,10 +197,10 @@ "name": "Test suite for nf-core/bactmap" }, { - "@id": "#3f5008e2-1065-44ff-89de-ac416b610232", + "@id": "#f13981a3-764d-43b0-b4fb-474153139fca", "@type": "TestInstance", "name": "GitHub Actions workflow for testing nf-core/bactmap", - "resource": "repos/nf-core/bactmap/actions/workflows/ci.yml", + "resource": "repos/nf-core/bactmap/actions/workflows/nf-test.yml", "runsOn": { "@id": "https://w3id.org/ro/terms/test#GithubService" }, @@ -195,6 +219,11 @@ "@type": "Dataset", "description": "Additional files" }, + { + "@id": "bin/", + "@type": "Dataset", + "description": "Scripts that must be callable from a pipeline process" + }, { "@id": "conf/", "@type": "Dataset", @@ -215,6 +244,11 @@ "@type": "Dataset", "description": "Modules used by the pipeline" }, + { + "@id": "modules/local/", + "@type": "Dataset", + "description": "Pipeline-specific modules" + }, { "@id": "modules/nf-core/", "@type": "Dataset", @@ -300,6 +334,12 @@ "@type": "Organization", "name": "nf-core", "url": "https://nf-co.re/" + }, + { + "@id": "#ajv37@shgb2monitor.vet.private.cam.ac.uk", + "@type": "Person", + "email": "ajv37@shgb2monitor.vet.private.cam.ac.uk", + "name": "Andries Van Tonder" } ] -} +} \ No newline at end of file diff --git a/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/main.nf b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/main.nf new file mode 100644 index 00000000..e192cc6d --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/main.nf @@ -0,0 +1,34 @@ +include { FREEBAYES } from '../../../modules/nf-core/freebayes/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' + +workflow BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS { + + take: + ch_input // channel: [mandatory] [ val(meta), path(input1), path(index1), path(input2), path(index2), path(bed) ] + ch_fasta_fai // channel: [mandatory] [ val(meta2), path(fasta), path(fai) ] + ch_samples // channel: [optional] [ path(samples) ] + ch_populations // channel: [optional] [ path(populations ] + ch_cnv // channel: [optional] [ path(cnv) ] + + main: + + ch_versions = channel.empty() + + // Variant calling + FREEBAYES ( ch_input, ch_fasta_fai.map{ meta, fasta, fai -> [ meta, fasta ] }, ch_fasta_fai.map{ meta, fasta, fai -> [ meta, fai ] }, ch_samples, ch_populations, ch_cnv ) + ch_versions = ch_versions.mix(FREEBAYES.out.versions.first()) + + // Sort VCF files + BCFTOOLS_SORT ( FREEBAYES.out.vcf ) + + // Index VCF files + BCFTOOLS_INDEX ( BCFTOOLS_SORT.out.vcf ) + + emit: + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), path(vcf) ] + csi = BCFTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + tbi = BCFTOOLS_INDEX.out.tbi // channel: [ val(meta), path(tbi) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/meta.yml b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/meta.yml new file mode 100644 index 00000000..b5b15456 --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/meta.yml @@ -0,0 +1,99 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_variant_calling_sort_freebayes_bcftools" +description: Call variants using freebayes, then sort and index +keywords: + - variant + - sort + - index + - bam + - cram + - vcf +components: + - freebayes + - bcftools/sort + - bcftools/index +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - input1: + type: file + description: BAM/CRAM/SAM file; + pattern: "*.{bam,cram,sam}" + - index1: + type: file + description: Index BAI/CRAI/CSI file + pattern: "*.{bai,crai,csi}" + - input2: + type: file + description: BAM/CRAM/SAM file; used to run variant calling with pair (normal vs tumor) + pattern: "*.{bam,cram,sam}" + - index2: + type: file + description: Index BAI/CRAI/CSI file + pattern: "*.{bai,crai,csi}" + - bed: + type: file + description: Optional - Limit analysis to targets listed in this BED-format FILE. + pattern: "*.bed" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: reference fasta file + pattern: ".{fa,fa.gz,fasta,fasta.gz}" + - fai: + type: file + description: reference fasta file index + pattern: "*.{fa,fasta}.fai" + - samples: + type: file + description: Optional - Limit analysis to samples listed (one per line) in the FILE. + pattern: "*.txt" + - populations: + type: file + description: Optional - Each line of FILE should list a sample and a population which it is part of. + pattern: "*.txt" + - cnv: + type: file + description: | + A copy number map BED file, which has either a sample-level ploidy: + sample_name copy_number + or a region-specific format: + seq_name start end sample_name copy_number + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" + - csi: + type: file + description: Default VCF file index file + pattern: "*.csi" + - tbi: + type: file + description: Alternative VCF file index file (activated with -t parameter) + pattern: "*.tbi" +authors: + - "@priyanka-surana" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@priyanka-surana" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/readme.md b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/readme.md new file mode 100644 index 00000000..159eb88c --- /dev/null +++ b/subworkflows/local/bam_variant_calling_sort_freebayes_bcftools/readme.md @@ -0,0 +1,8 @@ +```mermaid +flowchart TB + A[[META, BAM, BAI, BAM, BAI, BED]] & B[[META, FASTA, FAI]] & C[SAMPLES] & D[POPULATIONS] & E[CNV] --> F(FREEBAYES) + F --> G(BCFTOOLS_SORT) + G --> H(BCFTOOLS_INDEX) + G --> I[[META, BCFTOOLS_SORT_VCF]] + H --> J[[META, BCFTOOLS_INDEX_CSI]] & K[[META, BCFTOOLS_SORT_TBI]] +``` diff --git a/subworkflows/local/consensus_bcftools/main.nf b/subworkflows/local/consensus_bcftools/main.nf new file mode 100644 index 00000000..e89e7e7a --- /dev/null +++ b/subworkflows/local/consensus_bcftools/main.nf @@ -0,0 +1,87 @@ +// +// Consensus calling with BCFTools +// + +include { BCFTOOLS_QUERY } from '../../../modules/nf-core/bcftools/query/main' +include { BEDTOOLS_GENOMECOV } from '../../../modules/local/bedtools/genomecov/main' +include { BEDTOOLS_SUBTRACT } from '../../../modules/nf-core/bedtools/subtract/main' +include { BCFTOOLS_CONSENSUS } from '../../../modules/local/bcftools/consensus/main' +include { RENAME_FASTA_HEADER } from '../../../modules/local/fasta_rename/main' +include { CONCATENATE_FASTA } from '../../../modules/local/concatenate_fasta/main' + +workflow CONSENSUS_BCFTOOLS { + + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_vcf // channel: [ val(meta), [ vcf ] ] + ch_tbi // channel: [ val(meta), [ tbi ] ] + ch_fasta // channel: /path/to/genome.fasta + + main: + ch_versions = channel.empty() + + // + // Filter variants by allele frequency, zip and index + // + ch_query = ch_vcf.join(ch_tbi) + .map{ + meta, vcf, tbi -> [ meta, vcf, tbi ] + } + + BCFTOOLS_QUERY ( + ch_query, + [], + [], + [] + ) + + ch_genomecov = ch_bam + .multiMap { + meta, bam -> + genomecov: [meta, bam, params.genomecov_scale ] + } + + BEDTOOLS_GENOMECOV ( + ch_genomecov.genomecov + ) + ch_versions = ch_versions.mix(BEDTOOLS_GENOMECOV.out.versions.first()) + + // + // Make the bed mask + // + ch_bedtools_subtract = BEDTOOLS_GENOMECOV.out.genomecov.join(BCFTOOLS_QUERY.out.output) + + BEDTOOLS_SUBTRACT ( + ch_bedtools_subtract + ) + + ch_consensus = ch_vcf + .join( ch_tbi) + .join( BEDTOOLS_SUBTRACT.out.bed ).map{ + meta, vcf, tbi, bed -> [ meta, vcf, tbi, bed ] + } + + BCFTOOLS_CONSENSUS ( + ch_consensus, + ch_fasta + ) + ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) + + // + // Rename consensus header adding sample name + // + RENAME_FASTA_HEADER ( + BCFTOOLS_CONSENSUS.out.fasta + ) + ch_versions = ch_versions.mix(RENAME_FASTA_HEADER.out.versions.first()) + + CONCATENATE_FASTA ( + RENAME_FASTA_HEADER.out.fasta + ) + ch_versions = ch_versions.mix(RENAME_FASTA_HEADER.out.versions.first()) + + + emit: + consensus = CONCATENATE_FASTA.out.fasta // channel: [ val(meta), [ fasta ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_bcftools/meta.yml b/subworkflows/local/consensus_bcftools/meta.yml new file mode 100644 index 00000000..baaabf79 --- /dev/null +++ b/subworkflows/local/consensus_bcftools/meta.yml @@ -0,0 +1,68 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "consensus_bcftools" +description: Create consensus FASTA from a VCF file +keywords: + - vcf + - fasta + - pseudogenome + - consensus +components: + - bcftools/consensus + - bcftools/query + - bedtools/genomecov + - bedtools/subtract + - concatenate_fasta + - rename_fasta_header +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_bam: + type: file + description: | + The input channel containing the BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.{bam}" + - ch_vcf: + type: file + description: | + The input channel containing the VCF files + Structure: [ val(meta), path(vcf) ] + pattern: "*.{vcf,vcf.gz}" + - ch_tbi: + type: file + description: | + The input channel containing the VCF index files + Structure: [ val(meta), path(tbi) ] + pattern: "*.{tbi}" + - ch_fasta: + type: file + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - consensus: + type: file + description: | + Channel containing consensus FASTA files + Structure: [ val(meta), path(fasta) ] + pattern: "*.fa" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@MareikeJaniak" + - "@sgsutcliffe" + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/fastq_align_bwamem2/main.nf b/subworkflows/local/fastq_align_bwamem2/main.nf new file mode 100644 index 00000000..70c4576e --- /dev/null +++ b/subworkflows/local/fastq_align_bwamem2/main.nf @@ -0,0 +1,37 @@ +include { BWAMEM2_MEM } from '../../../modules/nf-core/bwamem2/mem/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BWAMEM2 { + + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: [meta, ref index] + ch_fasta // channel: [meta, ref] + sort_bam // value: true + + main: + + ch_versions = channel.empty() + + // + // Map reads with BWA 2 mem + // + BWAMEM2_MEM ( ch_reads, ch_index, ch_fasta, sort_bam ) + ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BWAMEM2_MEM.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BWAMEM2_MEM.out.bam // channel: [ val(meta), aligned ] + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_align_bwamem2/meta.yml b/subworkflows/local/fastq_align_bwamem2/meta.yml new file mode 100644 index 00000000..2c98a4cf --- /dev/null +++ b/subworkflows/local/fastq_align_bwamem2/meta.yml @@ -0,0 +1,98 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "fastq_align_bwamem2" +description: Map reads to a reference genome using BWA-MEM2 and sort with samtools. +keywords: + - align + - fasta + - genome + - reference +components: + - bwamem2/mem + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: BWA-MEM2 genome index files + pattern: "*.{0123,amb,ann,bwt.2bit.64,pac}" + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" + - sort_bam: + type: boolean + description: | + Use samtools sort (true) or samtools view (false) + default: false +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam_orig: + type: file + description: | + Channel containing unsorted BAM files + Structure: [ val(meta), path(bam_orig) ] + pattern: "*.bam" + - bam: + type: file + description: | + Channel containing sorted BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - stats: + type: file + description: | + Channel containing samtools stats output + Structure: [ val(meta), path(stats) ] + pattern: "*.{stats}" + - flagstat: + type: file + description: | + Channel containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + pattern: "*.{flagstat}" + - idxstats: + type: file + description: | + Channel containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + pattern: "*.{idxstats}" + - versions: + type: file + description: | + Channel containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/longread_adapterremoval/main.nf b/subworkflows/local/longread_adapterremoval/main.nf new file mode 100644 index 00000000..b9b0dea3 --- /dev/null +++ b/subworkflows/local/longread_adapterremoval/main.nf @@ -0,0 +1,36 @@ +// +// Process long raw reads with porechop or porechop_abi +// + +include { PORECHOP_PORECHOP } from '../../../modules/nf-core/porechop/porechop/main' +include { PORECHOP_ABI } from '../../../modules/nf-core/porechop/abi/main' + +workflow LONGREAD_ADAPTERREMOVAL { + take: + reads + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + if (params.longread_adapterremoval_tool == 'porechop_abi') { + PORECHOP_ABI(reads) + ch_processed_reads = PORECHOP_ABI.out.reads.map { meta, chopped_reads -> [meta + [single_end: true], chopped_reads] } + ch_versions = ch_versions.mix(PORECHOP_ABI.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_ABI.out.log) + } + else if (params.longread_adapterremoval_tool == 'porechop') { + PORECHOP_PORECHOP(reads) + ch_processed_reads = PORECHOP_PORECHOP.out.reads.map { meta, chopped_reads -> [meta + [single_end: true], chopped_reads] } + ch_versions = ch_versions.mix(PORECHOP_PORECHOP.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(PORECHOP_PORECHOP.out.log) + } + else { + ch_processed_reads = reads + } + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/longread_adapterremoval/meta.yml b/subworkflows/local/longread_adapterremoval/meta.yml new file mode 100644 index 00000000..299f1cb1 --- /dev/null +++ b/subworkflows/local/longread_adapterremoval/meta.yml @@ -0,0 +1,46 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "longread_adapterremoval" +description: Long read adapter removal using Porechop +keywords: + - adapters + - removal + - fastq +components: + - porechop/porechop + - porechop/abi +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing adapter removed FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/longread_filtering/main.nf b/subworkflows/local/longread_filtering/main.nf new file mode 100644 index 00000000..510bf824 --- /dev/null +++ b/subworkflows/local/longread_filtering/main.nf @@ -0,0 +1,35 @@ +// +// Perform filtering +// + +include { FILTLONG } from '../../../modules/nf-core/filtlong/main' +include { NANOQ } from '../../../modules/nf-core/nanoq/main' + +workflow LONGREAD_FILTERING { + take: + reads // [ [ meta ], [ reads ] ] + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + // fastp complexity filtering is activated via modules.conf in shortread_preprocessing + if (params.longread_filter_tool == 'filtlong') { + ch_filtered_reads = FILTLONG(reads.map { meta, long_reads -> [meta, [], long_reads] }).reads + ch_versions = ch_versions.mix(FILTLONG.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(FILTLONG.out.log) + } + else if (params.longread_filter_tool == 'nanoq') { + ch_filtered_reads = NANOQ(reads, 'fastq.gz').reads + ch_versions = ch_versions.mix(NANOQ.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(NANOQ.out.stats) + } + else { + ch_filtered_reads = reads + } + + emit: + reads = ch_filtered_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/longread_filtering/meta.yml b/subworkflows/local/longread_filtering/meta.yml new file mode 100644 index 00000000..842067e8 --- /dev/null +++ b/subworkflows/local/longread_filtering/meta.yml @@ -0,0 +1,46 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "longread_filtering" +description: Perform filtering of long read FASTQ files +keywords: + - filtering + - cleaning + - fastq +components: + - filtlong + - nanoq +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing filtered FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/longread_mapping/main.nf b/subworkflows/local/longread_mapping/main.nf new file mode 100644 index 00000000..96f8ba2d --- /dev/null +++ b/subworkflows/local/longread_mapping/main.nf @@ -0,0 +1,76 @@ +// +// Perform long read mapping and variant calling +// + +include { MINIMAP2_ALIGNMENT } from '../minimap2_alignment/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../../nf-core/bam_sort_stats_samtools/main' +include { CLAIR3 } from '../../../modules/local/clair3/main' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort/main' +include { BCFTOOLS_INDEX } from '../../../modules/nf-core/bcftools/index/main' +include { BCFTOOLS_VIEW } from '../../../modules/nf-core/bcftools/view/main' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' +include { CONSENSUS_BCFTOOLS } from '../consensus_bcftools/main' +include { SEQTK_COMP } from '../../../modules/nf-core/seqtk/comp/main.nf' + +workflow LONGREAD_MAPPING { + + take: + ch_fasta // channel: [meta, ref] + ch_faidx // channel: [meta, ref index] + reads // channel: [meta2, fasta/fastq] + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + MINIMAP2_ALIGNMENT( ch_fasta, reads ) + ch_versions = ch_versions.mix(MINIMAP2_ALIGNMENT.out.versions) + + BAM_SORT_STATS_SAMTOOLS ( MINIMAP2_ALIGNMENT.out.minimap_align, ch_fasta ) + ch_multiqc_files = ch_multiqc_files.mix( BAM_SORT_STATS_SAMTOOLS.out.stats ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + ch_clair3_input = BAM_SORT_STATS_SAMTOOLS.out.bam + .join(BAM_SORT_STATS_SAMTOOLS.out.bai) + .multiMap{ + meta, bam, bai -> + bam: [ meta, bam, bai, params.clair3_model, params.clair3_platform ] + } + + CLAIR3 (ch_clair3_input.bam, ch_fasta, ch_faidx) + ch_versions = ch_versions.mix(CLAIR3.out.versions.first()) + + BCFTOOLS_SORT ( CLAIR3.out.vcf ) + + BCFTOOLS_INDEX ( BCFTOOLS_SORT.out.vcf ) + + ch_bcftool_view_input = BCFTOOLS_SORT.out.vcf.join(BCFTOOLS_INDEX.out.tbi) + BCFTOOLS_VIEW ( ch_bcftool_view_input, [], [], [] ) + + ch_bcftool_norm_input = BCFTOOLS_VIEW.out.vcf.join(BCFTOOLS_VIEW.out.tbi) + BCFTOOLS_NORM ( ch_bcftool_norm_input, ch_fasta ) + + ch_bcftool_stats_input = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_NORM.out.tbi) + + BCFTOOLS_STATS ( ch_bcftool_stats_input, [ [:], [] ], [ [:], [] ], [ [:], [] ], [ [:], [] ], [ [:], [] ] ) + ch_multiqc_files = ch_multiqc_files.mix( BCFTOOLS_STATS.out.stats ) + + CONSENSUS_BCFTOOLS ( BAM_SORT_STATS_SAMTOOLS.out.bam, BCFTOOLS_NORM.out.vcf, BCFTOOLS_NORM.out.tbi, ch_fasta ) + ch_versions = ch_versions.mix( CONSENSUS_BCFTOOLS.out.versions ) + + SEQTK_COMP( CONSENSUS_BCFTOOLS.out.consensus ) + + emit: + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + vcf = BCFTOOLS_NORM.out.vcf // channel: [meta, vcf] + csi = BCFTOOLS_NORM.out.csi // channel: [ val(meta), path(csi) ] + tbi = BCFTOOLS_NORM.out.tbi // channel; [meta, tbi] + stats = BCFTOOLS_STATS.out.stats // channel: [meta, stats] + consensus = CONSENSUS_BCFTOOLS.out.consensus // channel: [ val(meta), path(consensus) ] + seqtk_stats = SEQTK_COMP.out.seqtk_stats // channel: [meta, stats] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files // channel: [ val(meta), [ multiqc files ] ] +} + diff --git a/subworkflows/local/longread_mapping/meta.yml b/subworkflows/local/longread_mapping/meta.yml new file mode 100644 index 00000000..9f72ee39 --- /dev/null +++ b/subworkflows/local/longread_mapping/meta.yml @@ -0,0 +1,98 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "longread_mapping" +description: Map long reads to a reference genome with minimap 2 and sort the output BAM files +keywords: + - align + - fasta + - genome + - reference + - bam +components: + - minimap2_alignment + - bam_sort_stats_samtools + - clair3 + - bcftools/sort + - bcftools/index + - bcftools/view + - bcftools/norm + - bcftools/stats + - consensus_bcftools + - seqtk/comp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" + - ch_faidx: + type: file + description: samtools reference genome index files + pattern: "*.fai" + - ch_reads: + type: file + description: | + List of input FastQ files +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - vcf: + type: file + description: | + Channel containing Sorted VCF files + Structure: [ val(meta), path(vcf) ] + pattern: "*.{vcf.gz}" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - tbi: + type: file + description: | + Channel containing TBI files + Structure: [ val(meta), path(tbi) ] + pattern: "*.tbi" + - stats: + type: file + description: | + Channel containing bcftools stats files + Structure: [ val(meta), path(tbi) ] + pattern: "*.txt" + - seqtk_stats: + type: file + description: | + Channel containing seqtk stats files + Structure: [ val(meta), path(consensus) ] + pattern: "*.tsv" + - consensus: + type: file + description: | + Channel containing consensus FASTA files + Structure: [ val(meta), path(consensus) ] + pattern: "*.fa" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@opetryk" + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/longread_preprocessing/main.nf b/subworkflows/local/longread_preprocessing/main.nf new file mode 100644 index 00000000..a6e6b447 --- /dev/null +++ b/subworkflows/local/longread_preprocessing/main.nf @@ -0,0 +1,56 @@ +// +// Perform read trimming and filtering +// + +include { FASTQC as FASTQC_PROCESSED } from '../../../modules/nf-core/fastqc/main' +include { FALCO as FALCO_PROCESSED } from '../../../modules/nf-core/falco/main' + +include { LONGREAD_ADAPTERREMOVAL } from '../longread_adapterremoval/main' +include { LONGREAD_FILTERING } from '../longread_filtering/main' + +workflow LONGREAD_PREPROCESSING { + take: + reads + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + if (!params.longread_qc_skipadaptertrim && params.longread_qc_skipqualityfilter) { + LONGREAD_ADAPTERREMOVAL(reads) + ch_processed_reads = LONGREAD_ADAPTERREMOVAL.out.reads + ch_versions = ch_versions.mix(LONGREAD_ADAPTERREMOVAL.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_ADAPTERREMOVAL.out.mqc) + } + else if (params.longread_qc_skipadaptertrim && !params.longread_qc_skipqualityfilter) { + LONGREAD_FILTERING(reads) + ch_processed_reads = LONGREAD_FILTERING.out.reads + ch_versions = ch_versions.mix(LONGREAD_FILTERING.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_FILTERING.out.mqc) + } + else { + LONGREAD_ADAPTERREMOVAL(reads) + ch_clipped_reads = LONGREAD_ADAPTERREMOVAL.out.reads.map { meta, clipped_long_reads -> [meta + [single_end: true], clipped_long_reads] } + LONGREAD_FILTERING(ch_clipped_reads) + ch_processed_reads = LONGREAD_FILTERING.out.reads + ch_versions = ch_versions.mix(LONGREAD_ADAPTERREMOVAL.out.versions.first()) + ch_versions = ch_versions.mix(LONGREAD_FILTERING.out.versions.first()) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_ADAPTERREMOVAL.out.mqc) + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_FILTERING.out.mqc) + } + + if (params.preprocessing_qc_tool == 'fastqc') { + FASTQC_PROCESSED(ch_processed_reads) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_PROCESSED.out.zip) + } + else if (params.preprocessing_qc_tool == 'falco') { + FALCO_PROCESSED(ch_processed_reads) + ch_versions = ch_versions.mix(FALCO_PROCESSED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FALCO_PROCESSED.out.txt) + } + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/longread_preprocessing/meta.yml b/subworkflows/local/longread_preprocessing/meta.yml new file mode 100644 index 00000000..613cb1c2 --- /dev/null +++ b/subworkflows/local/longread_preprocessing/meta.yml @@ -0,0 +1,48 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "longread_preprocessing" +description: Perform filtering and adapter removal of long read FASTQ files +keywords: + - preprocessing + - cleaning + - fastq +components: + - fastqc + - falco + - longread_adapterremoval + - longread_filtering +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing cleaned FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/minimap2_alignment/main.nf b/subworkflows/local/minimap2_alignment/main.nf new file mode 100644 index 00000000..7b916163 --- /dev/null +++ b/subworkflows/local/minimap2_alignment/main.nf @@ -0,0 +1,37 @@ +include { MINIMAP2_INDEX } from '../../../modules/nf-core/minimap2/index/main' +include { MINIMAP2_ALIGN } from '../../../modules/nf-core/minimap2/align/main' + +workflow MINIMAP2_ALIGNMENT { + + take: + + ch_ref // channel: [meta, ref] + ch_fasta // channel: [meta2, fasta/fastq] + + main: + + ch_versions = channel.empty() + + MINIMAP2_INDEX ( ch_ref ) + ch_versions = ch_versions.mix(MINIMAP2_INDEX.out.versions) + + MINIMAP2_ALIGN ( ch_fasta, MINIMAP2_INDEX.out.index, params.bam_format, params.bam_index_extension, params.cigar_paf_format, params.cigar_bam ) + ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions) + + if (params.bam_format) { + minimap_out = MINIMAP2_ALIGN.out.bam + } else { + minimap_out = MINIMAP2_ALIGN.out.paf + } + + if (params.bam_index_extension) { + minimap_index = MINIMAP2_ALIGN.out.index + } else { + minimap_index = [] + } + emit: + minimap_align = minimap_out // channel: [ val(meta), [ bam ] ] + minimap_index = minimap_index // channel: [ val(meta), [ index ] ] + versions = ch_versions // channel: [ versions.yml ] +} + diff --git a/subworkflows/local/minimap2_alignment/meta.yml b/subworkflows/local/minimap2_alignment/meta.yml new file mode 100644 index 00000000..0b9df9b5 --- /dev/null +++ b/subworkflows/local/minimap2_alignment/meta.yml @@ -0,0 +1,53 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "minimap2_alignment" +description: Map reads to a reference genome using BWA-MEM2 and sort with samtools. +keywords: + - align + - fasta + - genome + - reference +components: + - minimap2/index + - minimap2/align +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_ref: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" + - ch_fasta: + type: file + description: | + List of input FastQ files +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - minimap_align: + type: file + description: | + Channel containing unsorted BAM files + Structure: [ val(meta), path(bam_orig) ] + pattern: "*.bam" + - minimap_index: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - versions: + type: file + description: | + Channel containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@opetryk" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/shortread_adapterremoval/main.nf b/subworkflows/local/shortread_adapterremoval/main.nf new file mode 100644 index 00000000..cf199705 --- /dev/null +++ b/subworkflows/local/shortread_adapterremoval/main.nf @@ -0,0 +1,87 @@ +// +// Process short raw reads with AdapterRemoval +// + +include { ADAPTERREMOVAL as ADAPTERREMOVAL_SINGLE } from '../../../modules/nf-core/adapterremoval/main' +include { ADAPTERREMOVAL as ADAPTERREMOVAL_PAIRED } from '../../../modules/nf-core/adapterremoval/main' +include { CAT_FASTQ } from '../../../modules/nf-core/cat/fastq/main' + +workflow SHORTREAD_ADAPTERREMOVAL { + take: + reads // [[meta], [reads]] + adapterlist // file + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + ch_input_for_adapterremoval = reads.branch { + single: it[0].single_end + paired: !it[0].single_end + } + + ADAPTERREMOVAL_SINGLE(ch_input_for_adapterremoval.single, adapterlist) + ADAPTERREMOVAL_PAIRED(ch_input_for_adapterremoval.paired, adapterlist) + + /* + * Due to the ~slightly~ very ugly output implementation of the current AdapterRemoval2 version, each file + * has to be exported in a separate channel and we must manually recombine when necessary. + */ + + if (params.shortread_qc_mergepairs && params.shortread_qc_includeunmerged) { + + ch_concat_fastq = channel.empty() + .mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ADAPTERREMOVAL_PAIRED.out.singles_truncated, + ADAPTERREMOVAL_PAIRED.out.paired_truncated, + ) + .map { meta, adapterremoved_reads -> + [meta + [single_end: true], adapterremoved_reads] + } + .groupTuple() + .map { meta, fastq -> [meta, fastq.flatten()] } + // Paired-end reads cause a nested tuple during grouping. + // We want to present a flat list of files to `CAT_FASTQ`, thus the flatten + + CAT_FASTQ(ch_concat_fastq) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + } + else if (params.shortread_qc_mergepairs && !params.shortread_qc_includeunmerged) { + + ch_concat_fastq = channel.empty() + .mix( + ADAPTERREMOVAL_PAIRED.out.collapsed, + ADAPTERREMOVAL_PAIRED.out.collapsed_truncated, + ) + .map { meta, input_reads -> + [meta + [single_end: true], input_reads] + } + .groupTuple() + .map { meta, fastq -> [meta, fastq.flatten()] } + + + CAT_FASTQ(ch_concat_fastq) + + ch_adapterremoval_reads_prepped = CAT_FASTQ.out.reads.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + } + else { + + ch_adapterremoval_reads_prepped = ADAPTERREMOVAL_PAIRED.out.paired_truncated.mix(ADAPTERREMOVAL_SINGLE.out.singles_truncated) + } + + ch_versions = ch_versions.mix(ADAPTERREMOVAL_SINGLE.out.versions.first()) + ch_versions = ch_versions.mix(ADAPTERREMOVAL_PAIRED.out.versions.first()) + + ch_multiqc_files = ch_multiqc_files.mix( + ADAPTERREMOVAL_PAIRED.out.settings, + ADAPTERREMOVAL_SINGLE.out.settings, + ) + + emit: + reads = ch_adapterremoval_reads_prepped // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/shortread_adapterremoval/meta.yml b/subworkflows/local/shortread_adapterremoval/meta.yml new file mode 100644 index 00000000..fdc8b4a2 --- /dev/null +++ b/subworkflows/local/shortread_adapterremoval/meta.yml @@ -0,0 +1,52 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "shortread_adapterremoval" +description: Create consensus FASTA from a VCF file +keywords: + - adapters + - removal + - fastq +components: + - adapterremoval + - cat/fastq +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - adapterlist: + type: file + description: | + List of adapters to remove from the reads. + Structure: [ path(adapterlist) ] + pattern: "*.txt" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing adapter removed FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/shortread_fastp/main.nf b/subworkflows/local/shortread_fastp/main.nf new file mode 100644 index 00000000..108b9a76 --- /dev/null +++ b/subworkflows/local/shortread_fastp/main.nf @@ -0,0 +1,54 @@ +// +// Process short raw reads with FastP +// + +include { FASTP as FASTP_SINGLE } from '../../../modules/nf-core/fastp/main' +include { FASTP as FASTP_PAIRED } from '../../../modules/nf-core/fastp/main' + +workflow SHORTREAD_FASTP { + take: + reads // [[meta], [reads]] + adapterlist + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + ch_input_for_fastp = reads.branch { + single: it[0]['single_end'] == true + paired: it[0]['single_end'] == false + } + + ch_fastp_input_single = ch_input_for_fastp.single + .map { meta, reads -> [meta, reads, [] ] } + ch_fastp_input_paired = ch_input_for_fastp.paired + .map { meta, reads -> [meta, reads, [] ] } + + FASTP_SINGLE(ch_fastp_input_single, false, false, false) + // Last parameter here turns on merging of PE data + FASTP_PAIRED(ch_fastp_input_paired, false, false, params.shortread_qc_mergepairs) + + if (params.shortread_qc_mergepairs) { + ch_fastp_reads_prepped_pe = FASTP_PAIRED.out.reads_merged.map { meta, merged_reads -> + [meta + [single_end: true], [merged_reads].flatten()] + } + + ch_fastp_reads_prepped = ch_fastp_reads_prepped_pe.mix(FASTP_SINGLE.out.reads) + } + else { + ch_fastp_reads_prepped = FASTP_PAIRED.out.reads.mix(FASTP_SINGLE.out.reads) + } + + ch_versions = ch_versions.mix(FASTP_SINGLE.out.versions.first()) + ch_versions = ch_versions.mix(FASTP_PAIRED.out.versions.first()) + + ch_processed_reads = ch_fastp_reads_prepped + + ch_multiqc_files = ch_multiqc_files.mix(FASTP_SINGLE.out.json) + ch_multiqc_files = ch_multiqc_files.mix(FASTP_PAIRED.out.json) + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/shortread_fastp/meta.yml b/subworkflows/local/shortread_fastp/meta.yml new file mode 100644 index 00000000..bc530bf8 --- /dev/null +++ b/subworkflows/local/shortread_fastp/meta.yml @@ -0,0 +1,51 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "shortread_fastp" +description: Perform filtering of short read FASTQ files +keywords: + - filtering + - fastq + - trimming +components: + - fastp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - adapterlist: + type: file + description: | + List of adapters to remove from the reads. + Structure: [ path(adapterlist) ] + pattern: "*.txt" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing filtered FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/shortread_mapping/main.nf b/subworkflows/local/shortread_mapping/main.nf new file mode 100644 index 00000000..7d8dc0ce --- /dev/null +++ b/subworkflows/local/shortread_mapping/main.nf @@ -0,0 +1,98 @@ +// +// Perform short read mapping and variant calling +// + +include { FASTQ_ALIGN_BWAMEM2 } from '../fastq_align_bwamem2/main' +include { FASTQ_ALIGN_BOWTIE2 } from '../../nf-core/fastq_align_bowtie2/main' +include { BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS } from '../../local/bam_variant_calling_sort_freebayes_bcftools/main' +include { BCFTOOLS_FILTER } from '../../../modules/nf-core/bcftools/filter/main' +include { BCFTOOLS_NORM } from '../../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_STATS } from '../../../modules/nf-core/bcftools/stats/main' +include { CONSENSUS_BCFTOOLS } from '../consensus_bcftools/main' +include { SEQTK_COMP } from '../../../modules/nf-core/seqtk/comp/main.nf' + +workflow SHORTREAD_MAPPING { + + take: + reads // channel: [ val(meta), [ reads ] ] + ch_fasta // channel: [meta, ref] + ch_index // channel: [meta, ref index] + ch_faidx // channel: [meta, ref fai] + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + if (params.shortread_mapping_tool == 'bowtie2') { + FASTQ_ALIGN_BOWTIE2 ( + reads, + ch_index, + false, + false, + ch_fasta + ) + ch_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_bam_index = FASTQ_ALIGN_BOWTIE2.out.bai + ch_multiqc_files = ch_multiqc_files.mix( FASTQ_ALIGN_BOWTIE2.out.stats ) + ch_versions = ch_versions.mix( FASTQ_ALIGN_BOWTIE2.out.versions ) + } else { + FASTQ_ALIGN_BWAMEM2 ( + reads, + ch_index, + ch_fasta, + false + ) + ch_bam = FASTQ_ALIGN_BWAMEM2.out.bam + ch_bam_index = FASTQ_ALIGN_BWAMEM2.out.bai + ch_multiqc_files = ch_multiqc_files.mix( FASTQ_ALIGN_BWAMEM2.out.stats ) + ch_versions = ch_versions.mix( FASTQ_ALIGN_BWAMEM2.out.versions ) + } + + // Prepare inputs for FreeBayes + ch_freebayes_fasta = ch_fasta // channel: [ val(meta), path(reference), path(fai)] + .join( ch_faidx ) + + freebayes_input = ch_bam // channel: [ val(meta), path(bam) ] + .join( ch_bam_index ) // channel: [ val(meta), path(bam), path(bam_index)] + .map{ + meta, bam, bai -> [ meta, bam, bai, [], [], [] ] + } + + BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS (freebayes_input, + ch_freebayes_fasta.first(), + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS.out.versions) + + ch_bcftool_filter_input = BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS.out.vcf + .join(BAM_VARIANT_CALLING_SORT_FREEBAYES_BCFTOOLS.out.tbi) + + BCFTOOLS_FILTER ( ch_bcftool_filter_input ) + + ch_bcftool_norm_input = BCFTOOLS_FILTER.out.vcf.join(BCFTOOLS_FILTER.out.tbi) + BCFTOOLS_NORM ( ch_bcftool_norm_input, ch_fasta ) + + ch_bcftool_stats_input = BCFTOOLS_NORM.out.vcf.join(BCFTOOLS_NORM.out.tbi) + + BCFTOOLS_STATS ( ch_bcftool_stats_input, [ [:], [] ], [ [:], [] ], [ [:], [] ], [ [:], [] ], [ [:], [] ] ) + ch_multiqc_files = ch_multiqc_files.mix( BCFTOOLS_STATS.out.stats ) + + CONSENSUS_BCFTOOLS ( ch_bam, BCFTOOLS_NORM.out.vcf, BCFTOOLS_NORM.out.tbi, ch_fasta ) + ch_versions = ch_versions.mix( CONSENSUS_BCFTOOLS.out.versions ) + + SEQTK_COMP( CONSENSUS_BCFTOOLS.out.consensus ) + + emit: + bam = ch_bam // channel: [ val(meta), [ bam ] ] + bai = ch_index // channel: [ val(meta), [ bai ] ] + vcf = BCFTOOLS_NORM.out.vcf // channel: [meta, vcf] + csi = BCFTOOLS_NORM.out.csi // channel: [ val(meta), path(csi) ] + tbi = BCFTOOLS_NORM.out.tbi // channel; [meta, tbi] + stats = BCFTOOLS_STATS.out.stats // channel: [meta, stats] + consensus = CONSENSUS_BCFTOOLS.out.consensus // channel: [ val(meta), path(consensus) ] + seqtk_stats = SEQTK_COMP.out.seqtk_stats // channel: [meta, stats] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files // channel: [ val(meta), [ multiqc files ] ] +} diff --git a/subworkflows/local/shortread_mapping/meta.yml b/subworkflows/local/shortread_mapping/meta.yml new file mode 100644 index 00000000..7d66f1bb --- /dev/null +++ b/subworkflows/local/shortread_mapping/meta.yml @@ -0,0 +1,98 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "shortread_mapping" +description: Map short reads to a reference genome with BWA-MEM 2 or bowtie2 and sort the output BAM files +keywords: + - align + - fasta + - genome + - reference + - bam +components: + - fastq_align_bowtie2 + - fastq_align_bwamem2 + - bam_variant_calling_sort_freebayes_bcftools + - bcftools/filter + - bcftools/stats + - consensus_bcftools + - seqtk/comp +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" + - ch_index: + type: file + description: BWA-MEM or bowtie2 reference genome index files + pattern: "*.{bt2,amb,ann,bwt,pac,sa,alt}" + - ch_faidx: + type: file + description: samtools reference genome index files + pattern: "*.fai" + - ch_reads: + type: file + description: | + List of input FastQ files +output: + - bam: + type: file + description: | + Channel containing BAM files + Structure: [ val(meta), path(bam) ] + pattern: "*.bam" + - bai: + type: file + description: | + Channel containing indexed BAM (BAI) files + Structure: [ val(meta), path(bai) ] + pattern: "*.bai" + - vcf: + type: file + description: | + Channel containing Sorted VCF files + Structure: [ val(meta), path(vcf) ] + pattern: "*.{vcf.gz}" + - csi: + type: file + description: | + Channel containing CSI files + Structure: [ val(meta), path(csi) ] + pattern: "*.csi" + - tbi: + type: file + description: | + Channel containing TBI files + Structure: [ val(meta), path(tbi) ] + pattern: "*.tbi" + - stats: + type: file + description: | + Channel containing bcftools stats files + Structure: [ val(meta), path(tbi) ] + pattern: "*.txt" + - consensus: + type: file + description: | + Channel containing consensus FASTA files + Structure: [ val(meta), path(consensus) ] + pattern: "*.fa" + - seqtk_stats: + type: file + description: | + Channel containing seqtk stats files + Structure: [ val(meta), path(consensus) ] + pattern: "*.tsv" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/shortread_preprocessing/main.nf b/subworkflows/local/shortread_preprocessing/main.nf new file mode 100644 index 00000000..919bf28c --- /dev/null +++ b/subworkflows/local/shortread_preprocessing/main.nf @@ -0,0 +1,50 @@ +// +// Perform read trimming and merging +// + + +include { SHORTREAD_FASTP } from '../shortread_fastp/main' +include { SHORTREAD_ADAPTERREMOVAL } from '../shortread_adapterremoval/main' +include { FASTQC as FASTQC_PROCESSED } from '../../../modules/nf-core/fastqc/main' +include { FALCO as FALCO_PROCESSED } from '../../../modules/nf-core/falco/main' + +workflow SHORTREAD_PREPROCESSING { + take: + reads // [ [ meta ], [ reads ] ] + adapterlist // file + + main: + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + if (params.shortread_qc_tool == "fastp") { + SHORTREAD_FASTP(reads, adapterlist) + ch_processed_reads = SHORTREAD_FASTP.out.reads + ch_versions = ch_versions.mix(SHORTREAD_FASTP.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_FASTP.out.mqc) + } + else if (params.shortread_qc_tool == "adapterremoval") { + SHORTREAD_ADAPTERREMOVAL(reads, adapterlist) + ch_processed_reads = SHORTREAD_ADAPTERREMOVAL.out.reads + ch_versions = ch_versions.mix(SHORTREAD_ADAPTERREMOVAL.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_ADAPTERREMOVAL.out.mqc) + } + else { + ch_processed_reads = reads + } + + if (params.preprocessing_qc_tool == 'fastqc') { + FASTQC_PROCESSED(ch_processed_reads) + ch_multiqc_files = ch_multiqc_files.mix(FASTQC_PROCESSED.out.zip) + } + else if (params.preprocessing_qc_tool == 'falco') { + FALCO_PROCESSED(ch_processed_reads) + ch_versions = ch_versions.mix(FALCO_PROCESSED.out.versions) + ch_multiqc_files = ch_multiqc_files.mix(FALCO_PROCESSED.out.txt) + } + + emit: + reads = ch_processed_reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] + mqc = ch_multiqc_files +} diff --git a/subworkflows/local/shortread_preprocessing/meta.yml b/subworkflows/local/shortread_preprocessing/meta.yml new file mode 100644 index 00000000..5f1d3523 --- /dev/null +++ b/subworkflows/local/shortread_preprocessing/meta.yml @@ -0,0 +1,54 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "shortread_preprocessing" +description: Perform filtering and adapter removal of long read FASTQ files +keywords: + - preprocessing + - cleaning + - fastq +components: + - shortread_fastp + - shortread_adapterremoval + - fastqc + - falco +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - adapterlist: + type: file + description: | + List of adapters to remove from the reads. + Structure: [ path(adapterlist) ] + pattern: "*.txt" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + Channel containing cleaned FASTQ files + Structure: [ val(meta), path(reads) ] + pattern: "*.fastq.gz" + - versions: + type: file + description: | + File containing software versions + Structure: [ path(versions.yml) ] + pattern: "versions.yml" +authors: + - "@jfy133" + - "@sofstam" + - "@Midnighter" + - "@LilyAnderssonLee" +maintainers: + - "@avantonder" diff --git a/subworkflows/local/utils_nfcore_bactmap_pipeline/main.nf b/subworkflows/local/utils_nfcore_bactmap_pipeline/main.nf index 105a4e63..160295f4 100644 --- a/subworkflows/local/utils_nfcore_bactmap_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_bactmap_pipeline/main.nf @@ -11,6 +11,7 @@ include { UTILS_NFSCHEMA_PLUGIN } from '../../nf-core/utils_nfschema_plugin' include { paramsSummaryMap } from 'plugin/nf-schema' include { samplesheetToList } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' include { completionEmail } from '../../nf-core/utils_nfcore_pipeline' include { completionSummary } from '../../nf-core/utils_nfcore_pipeline' include { imNotification } from '../../nf-core/utils_nfcore_pipeline' @@ -32,10 +33,13 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args // array: List of positional nextflow CLI args outdir // string: The output directory where the results will be saved input // string: Path to input samplesheet + help // boolean: Display help message and exit + help_full // boolean: Show the full help message + show_hidden // boolean: Show hidden parameters in the help message main: - ch_versions = Channel.empty() + ch_versions = channel.empty() // // Print version and exit if required and dump pipeline parameters to JSON file @@ -50,10 +54,35 @@ workflow PIPELINE_INITIALISATION { // // Validate parameters and generate parameter summary to stdout // + before_text = """ +-\033[2m----------------------------------------------------\033[0m- + \033[0;32m,--.\033[0;30m/\033[0;32m,-.\033[0m +\033[0;34m ___ __ __ __ ___ \033[0;32m/,-._.--~\'\033[0m +\033[0;34m |\\ | |__ __ / ` / \\ |__) |__ \033[0;33m} {\033[0m +\033[0;34m | \\| | \\__, \\__/ | \\ |___ \033[0;32m\\`-._,-`-,\033[0m + \033[0;32m`._,._,\'\033[0m +\033[0;35m nf-core/bactmap ${workflow.manifest.version}\033[0m +-\033[2m----------------------------------------------------\033[0m- +""" + after_text = """${workflow.manifest.doi ? "\n* The pipeline\n" : ""}${workflow.manifest.doi.tokenize(",").collect { doi -> " https://doi.org/${doi.trim().replace('https://doi.org/','')}"}.join("\n")}${workflow.manifest.doi ? "\n" : ""} +* The nf-core framework + https://doi.org/10.1038/s41587-020-0439-x + +* Software dependencies + https://github.com/nf-core/taxprofiler/blob/master/CITATIONS.md +""" + command = "nextflow run ${workflow.manifest.name} -profile --input samplesheet.csv --outdir " + UTILS_NFSCHEMA_PLUGIN ( workflow, validate_params, - null + null, + help, + help_full, + show_hidden, + before_text, + after_text, + command ) // @@ -72,26 +101,11 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] - } - } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } + channel + .fromList(samplesheetToList(params.input, "assets/schema_input.json")) .set { ch_samplesheet } + emit: samplesheet = ch_samplesheet versions = ch_versions @@ -171,6 +185,7 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } + // // Get attribute from genome config file e.g. fasta // @@ -200,27 +215,136 @@ def genomeExistsError() { // Generate methods description for MultiQC // def toolCitationText() { - // TODO nf-core: Optionally add in-text citation tools to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report + def text_seq_qc = [ + "Sequencing quality control with", + params.preprocessing_qc_tool == "falco" ? "Falco (de Sena Brandine and Smith 2021)." : "FastQC (Andrews 2010)." + ].join(' ').trim() + + def text_shortread_qc = [ + "Short read preprocessing was performed with:", + params.shortread_qc_tool == "adapterremoval" ? "AdapterRemoval (Schubert et al. 2016)." : "", + params.shortread_qc_tool == "fastp" ? "fastp (Chen et al. 2018)." : "", + ].join(' ').trim() + + def text_longread_qc = [ + "Long read preprocessing was performed with:", + params.longread_adapterremoval_tool == "porechop_abi" ? "Porechop_ABI (Bonenfant et al. 2023)," : "", + params.longread_adapterremoval_tool == "porechop" ? "Porechop (Wick et al. 2017)," : "", + params.longread_filter_tool == "filtlong" ? "Filtlong (Wick 2021)." : "", + params.longread_filter_tool == "nanoq" ? "Nanoq (Steinig and Coin 2022)." : "", + ].join(' ').trim() + + def text_subsampling = [ + "Read subsampling was done with Rasusa (Hall et al. 2019)." + ].join(' ').trim() + + def shortread_mapping = [ + "Short read mapping and variant calling was performed with", + params.shortread_mapping_tool == "bowtie2" ? "Bowtie2 (Langmead and Salzberg 2012)," : "BWA-MEM2 (Li 2021),", + "FreeBayes (Garrison and Marth 2012)", + "and BCFtools (Li 2011).", + ].join(' ').trim() + + def longread_mapping = [ + "Long read mapping and variant calling was performed with", + "Minimap2 (Li 2018),", + "Clair3 (Zhang et al. 2020)", + "and BCFtools (Li 2011).", + ].join(' ').trim() + + def consensus = [ + "Consensus FASTA sequences were created with", + "BCFtools (Li 2011)", + "and bedtools (Quinlan and Hall 2010).", + ].join(' ').trim() + + def snps = [ + "Variant and constant sites were extracted from the alignment using SNP-sites (Page et al. 2016).", + ].join(' ').trim() + + def statistics = [ + "Read, mapping and variant statistics were generated with", + "fastq-scan (Petit 2022),", + "Samtools (Li et al. 2009),", + "BCFtools (Li 2011)", + "and seqtk (Li 2012).", + ].join(' ').trim() + def citation_text = [ - "Tools used in the workflow included:", - "FastQC (Andrews 2010),", - "MultiQC (Ewels et al. 2016)", - "." - ].join(' ').trim() + "Tools used in the workflow included:", + text_seq_qc, + params.perform_shortread_qc ? text_shortread_qc : "", + params.perform_longread_qc ? text_longread_qc : "", + params.perform_subsampling ? text_subsampling : "", + shortread_mapping, + longread_mapping, + consensus, + snps, + statistics, + "Pipeline results statistics were summarised with MultiQC (Ewels et al. 2016)." + ].join(' ').trim().replaceAll("[,|.] +\\.", ".") return citation_text } def toolBibliographyText() { - // TODO nf-core: Optionally add bibliographic entries to this list. - // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", - // Uncomment function in methodsDescriptionText to render in MultiQC report + def text_seq_qc = [ + params.preprocessing_qc_tool == "falco" ? "
  • de Sena Brandine, G., & Smith, A. D. (2021). Falco: high-speed FastQC emulation for quality control of sequencing data. F1000Research, 8(1874), 1874. 10.12688/f1000research.21142.2
  • " : "", + params.preprocessing_qc_tool == "fastqc" ? "
  • Andrews S. (2010) FastQC: A Quality Control Tool for High Throughput Sequence Data, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
  • " : "", + ].join(' ').trim() + + def text_shortread_qc = [ + params.shortread_qc_tool == "adapterremoval" ? "
  • Schubert, M., Lindgreen, S., & Orlando, L. (2016). AdapterRemoval v2: rapid adapter trimming, identification, and read merging. BMC Research Notes, 9, 88. 10.1186/s13104-016-1900-2
  • " : "", + ].join(' ').trim() + + def text_longread_qc = [ + params.longread_adapterremoval_tool == "porechop_abi" ? "
  • Bonenfant, Q., Noé, L., & Touzet, H. (2023). Porechop_ABI: discovering unknown adapters in Oxford Nanopore Technology sequencing reads for downstream trimming. Bioinformatics Advances, 3(1):vbac085. 10.1093/bioadv/vbac085
  • " : "", + params.longread_adapterremoval_tool == "porechop" ? "
  • Wick, R. R., Judd, L. M., Gorrie, C. L., & Holt, K. E. (2017). Completing bacterial genome assemblies with multiplex MinION sequencing. Microbial Genomics, 3(10), e000132. 10.1099/mgen.0.000132
  • " : "", + params.longread_filter_tool == "filtlong" ? "
  • Wick R. (2021) Filtlong, URL: https://github.com/rrwick/Filtlong
  • " : "", + params.longread_filter_tool == "nanoq" ? "
  • Steinig, E., & Coin, L. (2022). Nanoq: ultra-fast quality control for nanopore reads. Journal of Open Source Software, 7(69). 10.21105/joss.02991
  • " : "" + ].join(' ').trim() + + def text_subsampling = [ + "
  • Hall, M. B. (2019). Rasusa: Randomly subsample sequencing reads to a specified coverage. 10.5281/zenodo.3731394
  • " + ].join(' ').trim() + + def text_shortreadmapping = [ + params.shortread_mapping_tool == "bowtie2" ? "
  • Langmead, B., & Salzberg, S. L. (2012). Fast gapped-read alignment with Bowtie 2. Nature Methods, 9(4), 357–359. 10.1038/nmeth.1923
  • " : "
  • Vasimuddin, M., Misra, S., Li H., & Aluru S. (2019). Efficient Architecture-Aware Acceleration of BWA-MEM for Multicore Systems. IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2019, pp. 314-324. 10.1109/IPDPS.2019.00041
  • ", + "
  • Garrison, E., & Marth, G. T. (2012). Haplotype-based variant detection from short-read sequencing. arXiv, 1207.3907 10.48550/arXiv.1207.3907
  • ", + "
  • Li, H. (2011). A statistical framework for SNP calling, mutation discovery, and population genetical parameter estimation from sequencing data. Bioinformatics, 27(21), 2987–2993. 10.1093/bioinformatics/btr509
  • ", + ].join(' ').trim() + + def text_longreadmapping = [ + "
  • Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences. Bioinformatics , 34(18), 3094–3100. 10.1093/bioinformatics/bty191
  • ", + "
  • Zheng Z., Li S., Su J., Leung A. W., Lam T. W., & Luo R. (2022). Clair3: fast and accurate long-read variant calling with deep neural networks. Nature Computer Science, 2(12):797-803. 10.1038/s43588-022-00387-x
  • ", + ].join(' ').trim() + + def text_consensus = [ + "
  • Quinlan A. R., & Hall I. M. (2010). BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics, 26(6):841-2. 10.1093/bioinformatics/btq033
  • " + ].join(' ').trim() + + def text_snps = [ + "
  • Page A. J., Taylor B., Delaney A. J., Soares J., Seemann T., Keane J. A., Harris S. R. (2016). SNP-sites: rapid efficient extraction of SNPs from multi-FASTA alignments. Microbial Genomics, 29;2(4):e000056. 10.1099/mgen.0.000056
  • " + ].join(' ').trim() + + def text_statistics = [ + "
  • Petit, R. (2022). fastq-scan: A tools for reading a FASTQ from STDIN and outputting summary statistics. URL: https://github.com/rpetit3/fastq-scan
  • ", + "
  • Li, H., Handsaker, B., Wysoker, A., Fennell, T., Ruan, J., Homer, N., ... & Durbin, R. (2009). The Sequence Alignment/Map format and SAMtools. Bioinformatics, 25(16), 2078–2079. 10.1093/bioinformatics/btp352
  • ", + "
  • Li H. (2012). seqtk: Toolkit for processing sequences in FASTA/Q formats. URL: https://github.com/lh3/seqtk
  • " + ].join(' ').trim() + def reference_text = [ - "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", - "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " - ].join(' ').trim() + text_seq_qc, + text_shortread_qc, + params.perform_longread_qc ? text_longread_qc : "", + params.perform_subsampling ? text_subsampling : "", + text_shortreadmapping, + text_longreadmapping, + text_consensus, + text_snps, + text_statistics, + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim().replaceAll("[,|.] +\\.", ".") return reference_text } @@ -246,12 +370,11 @@ def methodsDescriptionText(mqc_methods_yaml) { meta["nodoi_text"] = meta.manifest_map.doi ? "" : "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " // Tool references - meta["tool_citations"] = "" - meta["tool_bibliography"] = "" + // meta["tool_citations"] = "" + // meta["tool_bibliography"] = "" - // TODO nf-core: Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! - // meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") - // meta["tool_bibliography"] = toolBibliographyText() + meta["tool_citations"] = toolCitationText().replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + meta["tool_bibliography"] = toolBibliographyText() def methods_text = mqc_methods_yaml.text diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..e4579494 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,49 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = channel.empty() + + SAMTOOLS_SORT ( ch_bam, ch_fasta, '' ) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..e01f9ccf --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..821a3cf5 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,134 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..239f163c --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,318 @@ +{ + "test_bam_sort_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,1101fe711c4a389fdb5c4a1532107d1f" + ] + ], + [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T03:04:39.140333" + }, + "test_bam_sort_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,f26c554c244ee86c89d62ebed509fd95" + ] + ], + [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T03:04:48.787289" + }, + "test_bam_sort_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T03:05:00.693649" + }, + "test_bam_sort_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ], + "bai": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "csi": [ + + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,54f02345c3a7699f9272e6ef9ce916c5", + "versions.yml:md5,6a93080732801bacb21c3acbe13858a5", + "versions.yml:md5,de3b0ae7c3ac4188662d57fd3219e312" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T03:05:10.863912" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..a79d9572 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,31 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..809bf736 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..76e7a40a --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,188 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.stats, + workflow.out.versions).match() } + ) + } + } + + test ("test_bam_stats_samtools_single_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram - stub") { + + options "-stub" + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..3c543191 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,341 @@ +{ + "test_bam_stats_samtools_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:39:28.707989" + }, + "test_bam_stats_samtools_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:39:20.473816" + }, + "test_bam_stats_samtools_paired_end_cram - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ], + "flagstat": [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,67394650dbae96d1a4fcc70484822159" + ] + ], + "idxstats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:39:37.697552" + }, + "test_bam_stats_samtools_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,7a05a22bdb17e8df6e8c2d100ff09a31" + ] + ], + [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:38:50.143087" + }, + "test_bam_stats_samtools_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,a391612b5ef5b181e854ccaad8c8a068" + ] + ], + [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:39:01.02311" + }, + "test_bam_stats_samtools_paired_end_cram": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,2b0e31ab01b867a6ff312023ae03838d" + ] + ], + [ + "versions.yml:md5,088c14fc7d21fa2e662860d7cbf9a181", + "versions.yml:md5,ade6457ea5ae73a41c505bb22681d0fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.0" + }, + "timestamp": "2025-11-01T02:39:12.503355" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf new file mode 100644 index 00000000..cafaa9bf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -0,0 +1,45 @@ +// +// Alignment with Bowtie2 +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BOWTIE2 { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( ch_reads, ch_index, ch_fasta, save_unaligned, sort_bam ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), aligned ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml new file mode 100644 index 00000000..b18e4054 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bowtie2/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: | + Use samtools sort (true) or samtools view (false) + default: false + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test new file mode 100644 index 00000000..6eca398d --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test @@ -0,0 +1,189 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_BOWTIE2" + script "../main.nf" + config "./nextflow.config" + workflow "FASTQ_ALIGN_BOWTIE2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_bowtie2" + tag "subworkflows/bam_sort_stats_samtools" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("test_align_bowtie2_single_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_paired_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_single_end - stub") { + + options "-stub" + + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } + + test("test_align_bowtie2_paired_end - stub") { + + options "-stub" + + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions + ).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap new file mode 100644 index 00000000..c8490961 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "test_align_bowtie2_single_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.bowtie2.log:md5,7b8a9e61b7646da1089b041333c41a87" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.stats:md5,d499a70739ffa0a96880eaad229dcf7c" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.flagstat:md5,e9ce9093133116bc54fd335cfe698372" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.idxstats:md5,e16eb632f7f462514b0873c7ac8ac905" + ] + ], + [ + "versions.yml:md5,0b1901279f738fa8e22cabea6e2a48bd", + "versions.yml:md5,60be918ac1032f74137ad27024c002ef", + "versions.yml:md5,8644be336f2fd05a04b95050a4de9094", + "versions.yml:md5,aab337e63eac9055aadb9a35cec16053", + "versions.yml:md5,dd60999d74ea42ae4f5483c9d94507f3", + "versions.yml:md5,ddb252583f75777033ee6467e4b6d545" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T13:07:32.050239" + }, + "test_align_bowtie2_paired_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,c02dbd116c1f49339dda208cb950261d" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,49f3d51a8804ce58fe9cecd2549d279b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,29ff2fa56d35b2a47625b8f517f1a947" + ] + ], + [ + "versions.yml:md5,0b1901279f738fa8e22cabea6e2a48bd", + "versions.yml:md5,60be918ac1032f74137ad27024c002ef", + "versions.yml:md5,8644be336f2fd05a04b95050a4de9094", + "versions.yml:md5,aab337e63eac9055aadb9a35cec16053", + "versions.yml:md5,dd60999d74ea42ae4f5483c9d94507f3", + "versions.yml:md5,ddb252583f75777033ee6467e4b6d545" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-03T13:07:48.653475" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config b/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config new file mode 100644 index 00000000..2f85e807 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + } +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml b/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml new file mode 100644 index 00000000..267bcc77 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_align_bowtie2: + - subworkflows/nf-core/fastq_align_bowtie2/** diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index bfd25876..2f30e9a4 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -98,7 +98,7 @@ def workflowVersionToYAML() { // Get channel of software versions used in pipeline in YAML format // def softwareVersionsToYAML(ch_versions) { - return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(Channel.of(workflowVersionToYAML())) + return ch_versions.unique().map { version -> processVersionsFromYAML(version) }.unique().mix(channel.of(workflowVersionToYAML())) } // diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml deleted file mode 100644 index ac8523c9..00000000 --- a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml +++ /dev/null @@ -1,2 +0,0 @@ -subworkflows/utils_nfcore_pipeline: - - subworkflows/nf-core/utils_nfcore_pipeline/** diff --git a/subworkflows/nf-core/utils_nfschema_plugin/main.nf b/subworkflows/nf-core/utils_nfschema_plugin/main.nf index 4994303e..1df8b76f 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/main.nf +++ b/subworkflows/nf-core/utils_nfschema_plugin/main.nf @@ -4,6 +4,7 @@ include { paramsSummaryLog } from 'plugin/nf-schema' include { validateParameters } from 'plugin/nf-schema' +include { paramsHelp } from 'plugin/nf-schema' workflow UTILS_NFSCHEMA_PLUGIN { @@ -15,32 +16,58 @@ workflow UTILS_NFSCHEMA_PLUGIN { // when this input is empty it will automatically use the configured schema or // "${projectDir}/nextflow_schema.json" as default. This input should not be empty // for meta pipelines + help // boolean: show help message + help_full // boolean: show full help message + show_hidden // boolean: show hidden parameters in help message + before_text // string: text to show before the help message and parameters summary + after_text // string: text to show after the help message and parameters summary + command // string: an example command of the pipeline main: + if(help || help_full) { + help_options = [ + beforeText: before_text, + afterText: after_text, + command: command, + showHidden: show_hidden, + fullHelp: help_full, + ] + if(parameters_schema) { + help_options << [parametersSchema: parameters_schema] + } + log.info paramsHelp( + help_options, + (params.help instanceof String && params.help != "true") ? params.help : "", + ) + exit 0 + } + // // Print parameter summary to stdout. This will display the parameters // that differ from the default given in the JSON schema // + + summary_options = [:] if(parameters_schema) { - log.info paramsSummaryLog(input_workflow, parameters_schema:parameters_schema) - } else { - log.info paramsSummaryLog(input_workflow) + summary_options << [parametersSchema: parameters_schema] } + log.info before_text + log.info paramsSummaryLog(summary_options, input_workflow) + log.info after_text // // Validate the parameters using nextflow_schema.json or the schema // given via the validation.parametersSchema configuration option // if(validate_params) { + validateOptions = [:] if(parameters_schema) { - validateParameters(parameters_schema:parameters_schema) - } else { - validateParameters() + validateOptions << [parametersSchema: parameters_schema] } + validateParameters(validateOptions) } emit: dummy_emit = true } - diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test index 8fb30164..c977917a 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/main.nf.test @@ -25,6 +25,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -51,6 +57,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -77,6 +89,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -103,6 +121,12 @@ nextflow_workflow { input[0] = workflow input[1] = validate_params input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = false + input[4] = false + input[5] = false + input[6] = "" + input[7] = "" + input[8] = "" """ } } @@ -114,4 +138,36 @@ nextflow_workflow { ) } } + + test("Should create a help message") { + + when { + + params { + test_data = '' + outdir = null + } + + workflow { + """ + validate_params = true + input[0] = workflow + input[1] = validate_params + input[2] = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" + input[3] = true + input[4] = false + input[5] = false + input[6] = "Before" + input[7] = "After" + input[8] = "nextflow run test/test" + """ + } + } + + then { + assertAll( + { assert workflow.success } + ) + } + } } diff --git a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config index 0907ac58..f6537cc3 100644 --- a/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config +++ b/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow.config @@ -1,8 +1,8 @@ plugins { - id "nf-schema@2.1.0" + id "nf-schema@2.6.1" } validation { parametersSchema = "${projectDir}/subworkflows/nf-core/utils_nfschema_plugin/tests/nextflow_schema.json" monochromeLogs = true -} \ No newline at end of file +} diff --git a/tests/.nftignore b/tests/.nftignore new file mode 100644 index 00000000..158c83c5 --- /dev/null +++ b/tests/.nftignore @@ -0,0 +1,11 @@ +.DS_Store +multiqc/multiqc_data/fastqc_top_overrepresented_sequences_table.txt +multiqc/multiqc_data/BETA-multiqc.parquet +multiqc/multiqc_data/multiqc.log +multiqc/multiqc_data/multiqc_data.json +multiqc/multiqc_data/multiqc_sources.txt +multiqc/multiqc_data/multiqc_software_versions.txt +multiqc/multiqc_plots/{svg,pdf,png}/*.{svg,pdf,png} +multiqc/multiqc_report.html +fastqc/*_fastqc.{html,zip} +pipeline_info/*.{html,json,txt,yml} diff --git a/tests/default.nf.test b/tests/default.nf.test new file mode 100644 index 00000000..8bb40337 --- /dev/null +++ b/tests/default.nf.test @@ -0,0 +1,35 @@ +nextflow_pipeline { + + name "Test pipeline" + script "../main.nf" + tag "pipeline" + + test("-profile test") { + + when { + params { + outdir = "$outputDir" + } + } + + then { + // stable_name: All files + folders in ${params.outdir}/ with a stable name + def stable_name = getAllFilesFromDir(params.outdir, relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']) + // stable_path: All files in ${params.outdir}/ with stable content + def stable_path = getAllFilesFromDir(params.outdir, ignoreFile: 'tests/.nftignore') + assertAll( + { assert workflow.success}, + { assert snapshot( + // Number of successful tasks + workflow.trace.succeeded().size(), + // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions + removeNextflowVersion("$outputDir/pipeline_info/nf_core_bactmap_software_mqc_versions.yml"), + // All stable path name, with a relative path + stable_name, + // All files with stable contents + stable_path + ).match() } + ) + } + } +} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap new file mode 100644 index 00000000..8cfb934d --- /dev/null +++ b/tests/default.nf.test.snap @@ -0,0 +1,545 @@ +{ + "-profile test": { + "content": [ + 63, + { + "ALIGNPSEUDOGENOMES": { + "multi2single_sequence.py": 1.0 + }, + "BCFTOOLS_CONSENSUS": { + "bcftools": 1.2 + }, + "BCFTOOLS_FILTER": { + "bcftools": 1.22 + }, + "BCFTOOLS_INDEX": { + "bcftools": 1.22 + }, + "BCFTOOLS_NORM": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY": { + "bcftools": 1.22 + }, + "BCFTOOLS_SORT": { + "bcftools": 1.22 + }, + "BCFTOOLS_STATS": { + "bcftools": 1.22 + }, + "BEDTOOLS_GENOMECOV": { + "bedtools": "2.31.1" + }, + "BEDTOOLS_SUBTRACT": { + "bedtools": "2.31.1" + }, + "BOWTIE2_ALIGN": { + "bowtie2": "2.5.4", + "samtools": 1.21, + "pigz": 2.8 + }, + "BOWTIE2_BUILD": { + "bowtie2": "2.5.4" + }, + "FASTP_PAIRED": { + "fastp": "1.0.1" + }, + "FASTQC": { + "fastqc": "0.12.1" + }, + "FASTQC_PROCESSED": { + "fastqc": "0.12.1" + }, + "FASTQSCANPARSE_PROCESSED": { + "fastqscan_parser.py": 1.0 + }, + "FASTQSCANPARSE_RAW": { + "fastqscan_parser.py": 1.0 + }, + "FASTQSCAN_PROCESSED": { + "fastqscan": "0.4.4" + }, + "FASTQSCAN_RAW": { + "fastqscan": "0.4.4" + }, + "FREEBAYES": { + "freebayes": "1.3.10" + }, + "GET_GENOME_SIZE": { + "gawk": "5.3.0" + }, + "GUNZIP": { + "gunzip": 1.13 + }, + "MINIMAP2_INDEX": { + "minimap2": "2.29-r1283" + }, + "RASUSA": { + "rasusa": "0.3.0" + }, + "READSTATS_PARSE": { + "read_stats_parser.py": 1.0 + }, + "READ_STATS": { + "read_stats.py": 1.0 + }, + "RENAME_FASTA_HEADER": { + "sed": 4.9 + }, + "SAMTOOLS_FAIDX": { + "samtools": "1.22.1" + }, + "SAMTOOLS_FLAGSTAT": { + "samtools": "1.22.1" + }, + "SAMTOOLS_IDXSTATS": { + "samtools": "1.22.1" + }, + "SAMTOOLS_INDEX": { + "samtools": "1.22.1" + }, + "SAMTOOLS_SORT": { + "samtools": "1.22.1" + }, + "SAMTOOLS_STATS": { + "samtools": "1.22.1" + }, + "SEQTK_COMP": { + "seqtk": "1.4-r122" + }, + "SEQTK_PARSE": { + "seqtk_parser.py": 1.0 + }, + "Workflow": { + "nf-core/bactmap": "v2.0.0" + } + }, + [ + "bcftools", + "bcftools/consensus", + "bcftools/consensus/01.fa", + "bcftools/consensus/02.fa", + "bcftools/filter", + "bcftools/filter/01.filtered.vcf.gz", + "bcftools/filter/01.filtered.vcf.gz.tbi", + "bcftools/filter/02.filtered.vcf.gz", + "bcftools/filter/02.filtered.vcf.gz.tbi", + "bcftools/query", + "bcftools/query/01.txt", + "bcftools/query/02.txt", + "bcftools/sort", + "bcftools/sort/01.sorted.vcf.gz", + "bcftools/sort/01.sorted.vcf.gz.tbi", + "bcftools/sort/02.sorted.vcf.gz", + "bcftools/sort/02.sorted.vcf.gz.tbi", + "bcftools/stats", + "bcftools/stats/01.bcftools_stats.txt", + "bcftools/stats/02.bcftools_stats.txt", + "bedtools", + "bedtools/genomecov", + "bedtools/genomecov/01.bed", + "bedtools/genomecov/02.bed", + "bedtools/subtract", + "bedtools/subtract/01.subtracted.bed", + "bedtools/subtract/02.subtracted.bed", + "bowtie2", + "bowtie2/align", + "bowtie2/align/01.bowtie2.log", + "bowtie2/align/02.bowtie2.log", + "bowtie2/build", + "bowtie2/build/bowtie2", + "bowtie2/build/bowtie2/genome.fna.1.bt2", + "bowtie2/build/bowtie2/genome.fna.2.bt2", + "bowtie2/build/bowtie2/genome.fna.3.bt2", + "bowtie2/build/bowtie2/genome.fna.4.bt2", + "bowtie2/build/bowtie2/genome.fna.rev.1.bt2", + "bowtie2/build/bowtie2/genome.fna.rev.2.bt2", + "bowtie2/build/versions.yml", + "fastp", + "fastp/01_test_1.fastp.html", + "fastp/01_test_1.fastp.json", + "fastp/01_test_1.fastp.log", + "fastp/02_test_2.fastp.html", + "fastp/02_test_2.fastp.json", + "fastp/02_test_2.fastp.log", + "fastqc", + "fastqc/processed", + "fastqc/processed/01_test_1_processed_1_fastqc.html", + "fastqc/processed/01_test_1_processed_1_fastqc.zip", + "fastqc/processed/01_test_1_processed_2_fastqc.html", + "fastqc/processed/01_test_1_processed_2_fastqc.zip", + "fastqc/processed/02_test_2_processed_1_fastqc.html", + "fastqc/processed/02_test_2_processed_1_fastqc.zip", + "fastqc/processed/02_test_2_processed_2_fastqc.html", + "fastqc/processed/02_test_2_processed_2_fastqc.zip", + "fastqc/raw", + "fastqc/raw/01_test_1_raw_1_fastqc.html", + "fastqc/raw/01_test_1_raw_1_fastqc.zip", + "fastqc/raw/01_test_1_raw_2_fastqc.html", + "fastqc/raw/01_test_1_raw_2_fastqc.zip", + "fastqc/raw/02_test_2_raw_1_fastqc.html", + "fastqc/raw/02_test_2_raw_1_fastqc.zip", + "fastqc/raw/02_test_2_raw_2_fastqc.html", + "fastqc/raw/02_test_2_raw_2_fastqc.zip", + "fastqscan", + "fastqscan/processed", + "fastqscan/processed/01_test_1.processed.json", + "fastqscan/processed/02_test_2.processed.json", + "fastqscan/raw", + "fastqscan/raw/01_test_1.raw.json", + "fastqscan/raw/02_test_2.raw.json", + "filtered_variants", + "filtered_variants/01.filtered.norm.vcf.gz", + "filtered_variants/01.filtered.norm.vcf.gz.tbi", + "filtered_variants/02.filtered.norm.vcf.gz", + "filtered_variants/02.filtered.norm.vcf.gz.tbi", + "freebayes", + "freebayes/01.vcf.gz", + "freebayes/02.vcf.gz", + "get", + "gunzip", + "gunzip/genome.fna", + "minimap2", + "minimap2/index", + "minimap2/index/genome.mmi", + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/fastp-insert-size-plot.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-gc-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-content-n-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_1_Before_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_After_filtering.txt", + "multiqc/multiqc_data/fastp-seq-quality-plot_Read_2_Before_filtering.txt", + "multiqc/multiqc_data/fastp_filtered_reads_plot.txt", + "multiqc/multiqc_data/fastqc-1-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc-1_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc-1_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc-1_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc-1_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc-1_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc-1_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc-1_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc-status-check-heatmap.txt", + "multiqc/multiqc_data/fastqc_per_base_n_content_plot.txt", + "multiqc/multiqc_data/fastqc_per_base_sequence_quality_plot.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Counts.txt", + "multiqc/multiqc_data/fastqc_per_sequence_gc_content_plot_Percentages.txt", + "multiqc/multiqc_data/fastqc_per_sequence_quality_scores_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_counts_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_duplication_levels_plot.txt", + "multiqc/multiqc_data/fastqc_sequence_length_distribution_plot.txt", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_bcftools_stats_bcftools.txt", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_fastp.txt", + "multiqc/multiqc_data/multiqc_fastqc.txt", + "multiqc/multiqc_data/multiqc_fastqc_fastqc-1.txt", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats_samtools.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/fastp-insert-size-plot.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-gc-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-content-n-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_1_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_After_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp-seq-quality-plot_Read_2_Before_filtering.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastp_filtered_reads_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc-1_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc-status-check-heatmap.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_n_content_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_base_sequence_quality_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Counts.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_gc_content_plot_Percentages.pdf", + "multiqc/multiqc_plots/pdf/fastqc_per_sequence_quality_scores_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_counts_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_duplication_levels_plot.pdf", + "multiqc/multiqc_plots/pdf/fastqc_sequence_length_distribution_plot.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/fastp-insert-size-plot.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-gc-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-content-n-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_1_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_After_filtering.png", + "multiqc/multiqc_plots/png/fastp-seq-quality-plot_Read_2_Before_filtering.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-cnt.png", + "multiqc/multiqc_plots/png/fastp_filtered_reads_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-1-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc-1_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc-1_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc-1_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc-1_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc-1_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc-1_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc-1_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc-1_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc-status-check-heatmap.png", + "multiqc/multiqc_plots/png/fastqc_per_base_n_content_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_base_sequence_quality_plot.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Counts.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_gc_content_plot_Percentages.png", + "multiqc/multiqc_plots/png/fastqc_per_sequence_quality_scores_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-cnt.png", + "multiqc/multiqc_plots/png/fastqc_sequence_counts_plot-pct.png", + "multiqc/multiqc_plots/png/fastqc_sequence_duplication_levels_plot.png", + "multiqc/multiqc_plots/png/fastqc_sequence_length_distribution_plot.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/fastp-insert-size-plot.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-gc-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-content-n-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_1_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_After_filtering.svg", + "multiqc/multiqc_plots/svg/fastp-seq-quality-plot_Read_2_Before_filtering.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastp_filtered_reads_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-1-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc-1_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc-1_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc-1_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc-1_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc-1_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc-1_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc-1_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc-1_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc-status-check-heatmap.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_n_content_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_base_sequence_quality_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Counts.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_gc_content_plot_Percentages.svg", + "multiqc/multiqc_plots/svg/fastqc_per_sequence_quality_scores_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-cnt.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_counts_plot-pct.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_duplication_levels_plot.svg", + "multiqc/multiqc_plots/svg/fastqc_sequence_length_distribution_plot.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/nf_core_bactmap_software_mqc_versions.yml", + "pseudogenomes", + "pseudogenomes/01.fa", + "pseudogenomes/02.fa", + "pseudogenomes/aligned_pseudogenomes.fas", + "rasusa", + "rasusa/01.subsampled_1.fastq.gz", + "rasusa/01.subsampled_2.fastq.gz", + "rasusa/02.subsampled_1.fastq.gz", + "rasusa/02.subsampled_2.fastq.gz", + "read_stats", + "read_stats/01_test_1.read_stats.csv", + "read_stats/02_test_2.read_stats.csv", + "samtools", + "samtools/faidx", + "samtools/faidx/genome.fna.fai", + "samtools/sort", + "samtools/sort/01.sorted.bam", + "samtools/sort/01.sorted.bam.bai", + "samtools/sort/02.sorted.bam", + "samtools/sort/02.sorted.bam.bai", + "samtools/stats", + "samtools/stats/01.sorted.flagstat", + "samtools/stats/01.sorted.idxstats", + "samtools/stats/01.sorted.stats", + "samtools/stats/02.sorted.flagstat", + "samtools/stats/02.sorted.idxstats", + "samtools/stats/02.sorted.stats", + "seqtk", + "seqtk/01.seqtk_stats.tsv", + "seqtk/02.seqtk_stats.tsv", + "summaries", + "summaries/mapping_summary.tsv", + "summaries/processed_fastq-scan_summary.tsv", + "summaries/raw_fastq-scan_summary.tsv", + "summaries/read_stats_summary.tsv" + ], + [ + "01.fa:md5,17e6433115d1ad1b4ffdf384d783dc29", + "02.fa:md5,9048642262df13a6fc427b2d971b7546", + "01.filtered.vcf.gz:md5,ced07b77c22cba9a53f2815c0256aa29", + "01.filtered.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905", + "02.filtered.vcf.gz:md5,7e7c04e92033cfb373c511691dae53c5", + "02.filtered.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905", + "01.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "02.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "01.sorted.vcf.gz:md5,5998257ae03a4beb8931f67d8eed764f", + "01.sorted.vcf.gz.tbi:md5,93413e333781d63d89023745e5ed4ed4", + "02.sorted.vcf.gz:md5,b6eafab8154ecb0a7a9ee893dffcc54f", + "02.sorted.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905", + "01.bcftools_stats.txt:md5,71a4d86f777c160fd969b6382c2f5655", + "02.bcftools_stats.txt:md5,e4d4ebc423228aa03e48babac21ccf44", + "01.bed:md5,3610ca49fad02e567afc984e5ae0cce8", + "02.bed:md5,37284f438898485550db0a584251e69f", + "01.subtracted.bed:md5,3610ca49fad02e567afc984e5ae0cce8", + "02.subtracted.bed:md5,37284f438898485550db0a584251e69f", + "01.bowtie2.log:md5,81a5da4710835dc42e60430a92ad36e5", + "02.bowtie2.log:md5,c352791c3bf8ae5ecc37ee8618716f68", + "genome.fna.1.bt2:md5,829b755b64d0ca0edfeacffdebef1c0b", + "genome.fna.2.bt2:md5,4845f032a953c5a4da5cdf49723eebd2", + "genome.fna.3.bt2:md5,6d1095f738d00d1bde2dc4f954b33179", + "genome.fna.4.bt2:md5,ab585b4ebed19b28fb670547aecdccf0", + "genome.fna.rev.1.bt2:md5,a1cc6383eb8d90710502a540f87bfa13", + "genome.fna.rev.2.bt2:md5,871b69582268b9404cb1884a401cdc39", + "versions.yml:md5,f986bc29ed66b12ac01d6a64a13c3227", + "01_test_1.fastp.html:md5,77e3843e6fa6a069f76576a5c8c99b91", + "01_test_1.fastp.json:md5,200f46137ea23811d26f756e9cbc633c", + "01_test_1.fastp.log:md5,da47c5a68d591c795a302a930503db0c", + "02_test_2.fastp.html:md5,78636145d5ed1c0f68945fc69d4dc5bf", + "02_test_2.fastp.json:md5,d79d41c98f5835ead398e63a9185facc", + "02_test_2.fastp.log:md5,5fbbb767c45c35133888febdd030275c", + "01_test_1_processed_1_fastqc.html:md5,b2327ae1715a8169942c99126b2f8dbd", + "01_test_1_processed_1_fastqc.zip:md5,198e2c2a57347258ab6eddfc3e4e1e80", + "01_test_1_processed_2_fastqc.html:md5,a4be338edad02af650571fead2a225e1", + "01_test_1_processed_2_fastqc.zip:md5,9341885ca56f8c00e8e47920565e3f9f", + "02_test_2_processed_1_fastqc.html:md5,ebe6a21831477e4d244208cdf455f2e3", + "02_test_2_processed_1_fastqc.zip:md5,53c004c3f30c9b524ced1a432441e500", + "02_test_2_processed_2_fastqc.html:md5,397100c73a84a419fbd2631d781cb4d3", + "02_test_2_processed_2_fastqc.zip:md5,9505777b26366459e64cecb7faa336ef", + "01_test_1_raw_1_fastqc.html:md5,2c11d2e70ed3ca42106d1ffaa753812c", + "01_test_1_raw_1_fastqc.zip:md5,41bed910f3ce5e74b97f974ceed9b8f1", + "01_test_1_raw_2_fastqc.html:md5,bbed35e4e0267f7b0d889a47be342120", + "01_test_1_raw_2_fastqc.zip:md5,1ea438b21c3b284f199afe7d533630ca", + "02_test_2_raw_1_fastqc.html:md5,e2a92f934b54d68da36a9454b6d53318", + "02_test_2_raw_1_fastqc.zip:md5,504092823ad31ab4a129ce6df77beed1", + "02_test_2_raw_2_fastqc.html:md5,263f1bafbd45d321bb8c34caaebb4ab8", + "02_test_2_raw_2_fastqc.zip:md5,b2a4a24d0a171ff26dfc22964123ab06", + "01_test_1.processed.json:md5,125efed1942221fbf536885fbf475e66", + "02_test_2.processed.json:md5,78fbdb421671f255b1c0e12cfe0653a4", + "01_test_1.raw.json:md5,39066d3164d2dbdc7a198468cacaf838", + "02_test_2.raw.json:md5,3cb79d84ea9de94940e4d1799c0e1c2b", + "01.filtered.norm.vcf.gz:md5,e905f8aace7118fd7c775e83d24b4cd9", + "01.filtered.norm.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905", + "02.filtered.norm.vcf.gz:md5,d97deb363c2dcba13e8a181a1fd5136c", + "02.filtered.norm.vcf.gz.tbi:md5,4cb176febbc8c26d717a6c6e67b9c905", + "01.vcf.gz:md5,5ea465b34918c42c67e4d8f7999614c7", + "02.vcf.gz:md5,1dc400334499b315df22498b8c588bda", + "genome.fna:md5,dafd38f5454b54fbea38245d773062a5", + "genome.mmi:md5,76c23f17ce1cade4a054bf2763b081db", + "fastp-insert-size-plot.txt:md5,162036bf69d2dc7386b43f2bdadac877", + "fastp-seq-content-gc-plot_Read_1_After_filtering.txt:md5,4edee862ece4eecb5045589a36786591", + "fastp-seq-content-gc-plot_Read_1_Before_filtering.txt:md5,e1ed0f20f9ee5f6e08a7e4deb49ceb35", + "fastp-seq-content-gc-plot_Read_2_After_filtering.txt:md5,e96f6d5adfea94d5a3c1c1f69b9447fc", + "fastp-seq-content-gc-plot_Read_2_Before_filtering.txt:md5,90b5d0855fa34315556fe0cbbb5b4ff3", + "fastp-seq-content-n-plot_Read_1_After_filtering.txt:md5,9b2e257b68d6fcc7db96f0d8630dd1b7", + "fastp-seq-content-n-plot_Read_1_Before_filtering.txt:md5,9b2e257b68d6fcc7db96f0d8630dd1b7", + "fastp-seq-content-n-plot_Read_2_After_filtering.txt:md5,9b2e257b68d6fcc7db96f0d8630dd1b7", + "fastp-seq-content-n-plot_Read_2_Before_filtering.txt:md5,353cfe0f9e9a15e5eb33e842755d9d4e", + "fastp-seq-quality-plot_Read_1_After_filtering.txt:md5,4634d3dc04a6c471c3431947d300fef4", + "fastp-seq-quality-plot_Read_1_Before_filtering.txt:md5,9dad9391b6b77bd6f4bf117e13370e8a", + "fastp-seq-quality-plot_Read_2_After_filtering.txt:md5,1716923bf26234eea1e5eff9be484423", + "fastp-seq-quality-plot_Read_2_Before_filtering.txt:md5,01f6e617cc8ed0d81678dff0fe09fe78", + "fastp_filtered_reads_plot.txt:md5,b0bb82fc4a23564c834ef8c4f4706faf", + "fastqc-1-status-check-heatmap.txt:md5,2958bbda783b97cef0b64530cbdbb01d", + "fastqc-1_per_base_n_content_plot.txt:md5,ab8e8d876edde1e457b3153888c88107", + "fastqc-1_per_base_sequence_quality_plot.txt:md5,9e8b520d210127656adab7a734f0191e", + "fastqc-1_per_sequence_gc_content_plot_Counts.txt:md5,f1542e9e7398dda884460eaf46795ff5", + "fastqc-1_per_sequence_gc_content_plot_Percentages.txt:md5,86db46ff07ff526eee61d9de82ed76c0", + "fastqc-1_per_sequence_quality_scores_plot.txt:md5,67c58bd6be8e3404ebc1b438d9fcbf2c", + "fastqc-1_sequence_counts_plot.txt:md5,93e8c72254cab3431a2230298d970619", + "fastqc-1_sequence_duplication_levels_plot.txt:md5,c11a44d8967bcc3fd978b70c497dbd9a", + "fastqc-status-check-heatmap.txt:md5,7d06360e828f3e153a0b21051d23911b", + "fastqc_per_base_n_content_plot.txt:md5,ea371a265a80840751645c2476c98754", + "fastqc_per_base_sequence_quality_plot.txt:md5,6362bc88473fc3c3e6d69a650fce6b3b", + "fastqc_per_sequence_gc_content_plot_Counts.txt:md5,c8285b24c862d046d1dd1e5f8c4f1419", + "fastqc_per_sequence_gc_content_plot_Percentages.txt:md5,d78f38d530511e784a915a1a1e587e72", + "fastqc_per_sequence_quality_scores_plot.txt:md5,3e61d4e1bc67d2cf85c00d020f6a32a8", + "fastqc_sequence_counts_plot.txt:md5,d1c81af14b6675c3f9f04125c1ecb484", + "fastqc_sequence_duplication_levels_plot.txt:md5,e32366bc23f5fb89aa60ef7b0f29de03", + "fastqc_sequence_length_distribution_plot.txt:md5,cb379474706e24255b725ac25b7d141e", + "llms-full.txt:md5,17bbaebc278b8479d023d7606e18bc06", + "multiqc.parquet:md5,e3c7ec53526a88f66c4aca145dcb6e3c", + "multiqc_bcftools_stats_bcftools.txt:md5,356ce8604c4042691dad523ef4768e39", + "multiqc_citations.txt:md5,57db2426be011862828d18f767d25b57", + "multiqc_fastp.txt:md5,56d6d92282eff1929dc1d594df1cb9d8", + "multiqc_fastqc.txt:md5,1dbca9f02b1b75f5df2ac02c81d78411", + "multiqc_fastqc_fastqc-1.txt:md5,6fb9e3135beb3f43f193bdcb67d247b2", + "multiqc_general_stats.txt:md5,7e1ff0593f1debb00f03f556197541b1", + "multiqc_samtools_stats_samtools.txt:md5,bc33622a9e4c162e1cb0ff5cabede2a6", + "samtools-stats-dp.txt:md5,4b8fe2e0db527dc98fc21d27f2df0e11", + "samtools_alignment_plot.txt:md5,164ec8a3eceb8d56809f7970f1d0290b", + "01.fa:md5,ec73cef39989caf8e39d94271a64612c", + "02.fa:md5,5a1b47e59b09692f7ea9d35da64c0558", + "aligned_pseudogenomes.fas:md5,bf5d42e4d26f61cbf702d30f140f74a8", + "01.subsampled_1.fastq.gz:md5,67f961708debdc99dd25b6e45199bd5e", + "01.subsampled_2.fastq.gz:md5,7d173d53ad835ed689ab7a5c5ac3d89b", + "02.subsampled_1.fastq.gz:md5,c3a57b07c737ea41807a08e4d72d5100", + "02.subsampled_2.fastq.gz:md5,900f282a3822240130b80b0ffd915652", + "01_test_1.read_stats.csv:md5,ff40aaa6720a766448a3b8bde72f530f", + "02_test_2.read_stats.csv:md5,c0186b0b0464a58d780334ebb63cc70a", + "genome.fna.fai:md5,aa44b24652a42e538478dea3492480dd", + "01.sorted.bam:md5,2182496d237e58f5c9e54057ce6ac785", + "01.sorted.bam.bai:md5,194c81a937bc1defaffe2e0aa1ddd8e6", + "02.sorted.bam:md5,5a07f35cf8efedded9bec741eab2c84f", + "02.sorted.bam.bai:md5,40d3309085ab80106f2f0c67b5dab19d", + "01.sorted.flagstat:md5,4f152687b9140a16a48856b0bb216289", + "01.sorted.idxstats:md5,aa16d5da39ddbdc94dbeb52ed994c515", + "01.sorted.stats:md5,fcd22c10723786252888ec9c5f47a14a", + "02.sorted.flagstat:md5,23ce0492dce1ce77e1350aa1632b1da9", + "02.sorted.idxstats:md5,0670671323b7d1f14f15625c3c179b9f", + "02.sorted.stats:md5,76b87ccd13bd5d913deb3070063f92a7", + "01.seqtk_stats.tsv:md5,589ff7dc5f0eec7f60c5a940b2500298", + "02.seqtk_stats.tsv:md5,bd6fe576e5972d925ff4ec45fed17eee", + "mapping_summary.tsv:md5,6107518dec4d9802ba35775df42f1e46", + "processed_fastq-scan_summary.tsv:md5,2aa64d4dff6aa9f062872634a2d5c08b", + "raw_fastq-scan_summary.tsv:md5,d175a5a6fde88f7275b7833eff01e454", + "read_stats_summary.tsv:md5,e24c3bce8a36a7ca8924ed05e45b0ae8" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-03T17:23:29.845485" + } +} \ No newline at end of file diff --git a/tests/nextflow.config b/tests/nextflow.config new file mode 100644 index 00000000..c4467193 --- /dev/null +++ b/tests/nextflow.config @@ -0,0 +1,12 @@ +/* +======================================================================================== + Nextflow config file for running nf-test tests +======================================================================================== +*/ + +params { + modules_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/' + pipelines_testdata_base_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/bactmap' +} + +aws.client.anonymous = true // fixes S3 access issues on self-hosted runners diff --git a/workflows/bactmap.nf b/workflows/bactmap.nf index 7d759604..d3759555 100644 --- a/workflows/bactmap.nf +++ b/workflows/bactmap.nf @@ -3,13 +3,70 @@ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' include { paramsSummaryMap } from 'plugin/nf-schema' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_bactmap_pipeline' +// Check input path parameters to see if they exist +def checkPathParamList = [ params.input, params.fasta, params.multiqc_config, + params.shortread_qc_adapterlist, params.multiqc_logo, + params.multiqc_methods_description ] + +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +// Check mandatory parameters +if ( params.input ) { + ch_input = file(params.input, checkIfExists: true) +} else { + error("Input samplesheet not specified") +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// + +include { GET_GENOME_SIZE } from '../modules/local/get_genome_size/main' +include { FASTQSCANPARSE as FASTQSCANPARSE_RAW } from '../modules/local/fastq_scan_parse/main' +include { FASTQSCANPARSE as FASTQSCANPARSE_PROCESSED } from '../modules/local/fastq_scan_parse/main' +include { READ_STATS } from '../modules/local/read_stats/main' +include { READSTATS_PARSE } from '../modules/local/read_stats_parse/main' +include { SEQTK_PARSE } from '../modules/local/seqtk_parse' +include { ALIGNPSEUDOGENOMES } from '../modules/local/alignpseudogenomes/main' + +include { SHORTREAD_PREPROCESSING } from '../subworkflows/local/shortread_preprocessing/main' +include { LONGREAD_PREPROCESSING } from '../subworkflows/local/longread_preprocessing/main' +include { SHORTREAD_MAPPING } from '../subworkflows/local/shortread_mapping/main' +include { LONGREAD_MAPPING } from '../subworkflows/local/longread_mapping/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// NF-CORE MODULES/PLUGINS +// +include { BOWTIE2_BUILD } from '../modules/nf-core/bowtie2/build/main' +include { BWAMEM2_INDEX } from '../modules/nf-core/bwamem2/index/main' +include { SAMTOOLS_FAIDX } from '../modules/nf-core/samtools/faidx/main' +include { GUNZIP } from '../modules/nf-core/gunzip/main' +include { FASTQSCAN as FASTQSCAN_RAW } from '../modules/nf-core/fastqscan/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { FALCO } from '../modules/nf-core/falco/main' +include { FASTQSCAN as FASTQSCAN_PROCESSED } from '../modules/nf-core/fastqscan/main' +include { CAT_FASTQ as MERGE_RUNS } from '../modules/nf-core/cat/fastq/main' +include { RASUSA } from '../modules/local/rasusa/main' +include { SNPSITES } from '../modules/nf-core/snpsites/main' +include { MULTIQC } from '../modules/nf-core/multiqc/main' + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW @@ -18,25 +75,324 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_bact workflow BACTMAP { + adapterlist = params.shortread_qc_adapterlist ? file(params.shortread_qc_adapterlist) : [] + + if ( params.shortread_qc_adapterlist ) { + if ( params.shortread_qc_tool == 'adapterremoval' && !(adapterlist.extension == 'txt') ) error "[nf-core/bactmap] ERROR: AdapterRemoval2 adapter list requires a `.txt` format and extension. Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + if ( params.shortread_qc_tool == 'fastp' && !adapterlist.extension.matches(".*(fa|fasta|fna|fas)") ) error "[nf-core/bactmap] ERROR: fastp adapter list requires a `.fasta` format and extension (or fa, fas, fna). Check input: --shortread_qc_adapterlist ${params.shortread_qc_adapterlist}" + } + take: - ch_samplesheet // channel: samplesheet read in from --input + samplesheet // channel: samplesheet read in from --input + ch_fasta // channel: path(reference.fasta) + main: - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet + ch_versions = channel.empty() + ch_multiqc_files = channel.empty() + + // Validate input files and create separate channels for FASTQ, FASTA, and Nanopore data + ch_input = samplesheet + .map { meta, fastq_1, fastq_2 -> + + // Define single_end based on the conditions + if ( !fastq_1 ) { + error("ERROR: Please check input samplesheet: entry `fastq_1` doesn't exist!") + } + meta.single_end = !fastq_2 + + if (meta.instrument_platform == 'OXFORD_NANOPORE' && !meta.single_end) { + error("Error: Please check input samplesheet: for Oxford Nanopore reads entry `fastq_2` should be empty!") + } + return [ meta, fastq_1, fastq_2 ] + } + .branch { meta, fastq_1, fastq_2 -> + nanopore : meta.instrument_platform == 'OXFORD_NANOPORE' + return [ meta + [type: "long"], [fastq_1]] + fastq : meta.instrument_platform != 'OXFORD_NANOPORE' + return [ meta + [ type: "short" ], fastq_2 ? [ fastq_1, fastq_2 ] : [ fastq_1 ] ] + } + + ch_input_for_fastqc = ch_input.nanopore.mix( ch_input.fastq ) + + /* + Reference indexing + */ + if (params.shortread_mapping_tool == 'bowtie2') { + ch_index = BOWTIE2_BUILD ( ch_fasta ).index + ch_versions = ch_versions.mix( BOWTIE2_BUILD.out.versions ) + } else { + ch_index = BWAMEM2_INDEX ( ch_fasta ).index + ch_versions = ch_versions.mix( BWAMEM2_INDEX.out.versions ) + } + + /* + MODULE: Index reference file with Samtools faidx + */ + + ch_unzipped_fasta = GUNZIP ( ch_fasta ).gunzip + + SAMTOOLS_FAIDX ( + ch_unzipped_fasta, + [ [ id:'no_fai' ],[] ], + true + ) + + /* + MODULE: Get genome size + */ + + sizes = SAMTOOLS_FAIDX.out.sizes + genome_size = GET_GENOME_SIZE(sizes).ch_genome_size + ch_versions = ch_versions.mix( GET_GENOME_SIZE.out.versions ) + + /* + MODULE: Run fastq-scan + */ + FASTQSCAN_RAW ( + ch_input_for_fastqc ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + ch_versions = ch_versions.mix(FASTQSCAN_RAW.out.versions ) + + ch_fastqscanraw_fastqscanparse = FASTQSCAN_RAW.out.json + .map { it[1] } + .collect() + + ch_fastqscanraw_readstats = FASTQSCAN_RAW.out.json + + /* + MODULE: Run fastqscanparse + */ + FASTQSCANPARSE_RAW ( + ch_fastqscanraw_fastqscanparse + ) + ch_versions = ch_versions.mix( FASTQSCANPARSE_RAW.out.versions ) + + /* + MODULE: Run FastQC + */ + + if ( !params.skip_preprocessing_qc ) { + if ( params.preprocessing_qc_tool == 'falco' ) { + FALCO ( ch_input_for_fastqc ) + ch_versions = ch_versions.mix( FALCO.out.versions ) + } else { + FASTQC ( ch_input_for_fastqc ) + } + } + + /* + SUBWORKFLOW: PERFORM PREPROCESSING + */ + + if (params.perform_shortread_qc) { + SHORTREAD_PREPROCESSING(ch_input.fastq, adapterlist) + ch_shortreads_preprocessed = SHORTREAD_PREPROCESSING.out.reads + ch_versions = ch_versions.mix(SHORTREAD_PREPROCESSING.out.versions) + } + else { + ch_shortreads_preprocessed = ch_input.fastq + } + + if ( params.perform_longread_qc ) { + ch_longreads_preprocessed = LONGREAD_PREPROCESSING ( ch_input.nanopore ).reads + .map { it -> [ it[0], [it[1]] ] } + ch_versions = ch_versions.mix( LONGREAD_PREPROCESSING.out.versions ) + } else { + ch_longreads_preprocessed = ch_input.nanopore + } + + ch_reads_for_fastqscan = ch_shortreads_preprocessed + .mix( ch_longreads_preprocessed ) - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) + /* + MODULE: Run fastq-scan + */ + FASTQSCAN_PROCESSED ( + ch_reads_for_fastqscan + ) + ch_versions = ch_versions.mix( FASTQSCAN_PROCESSED.out.versions ) + + ch_fastqscanprocessed_fastqscanparse = FASTQSCAN_PROCESSED.out.json + .map { it[1] } + .collect() + + ch_fastqscanprocessed_readstats = FASTQSCAN_PROCESSED.out.json + + /* + MODULE: Run fastqscanparse + */ + FASTQSCANPARSE_PROCESSED ( + ch_fastqscanprocessed_fastqscanparse + ) + ch_versions = ch_versions.mix( FASTQSCANPARSE_PROCESSED.out.versions ) + + /* + MODULE: Calculate read stats + */ + ch_fastqscanraw_readstats // tuple val(meta), path(json) + .join( FASTQSCAN_PROCESSED.out.json ) // tuple val(meta), path(json) + .set { ch_readstats } // tuple val(meta), path(json), path(json) + + READ_STATS ( + ch_readstats + ) + ch_versions = ch_versions.mix(READ_STATS.out.versions) + + ch_readstats_readstatsparse = READ_STATS.out.csv + .map { it[1] } + .collect() + + /* + MODULE: Summarise read stats outputs + */ + READSTATS_PARSE ( + ch_readstats_readstatsparse + ) + ch_versions = ch_versions.mix(READSTATS_PARSE.out.versions) + /* + Run merging + */ + if ( params.perform_runmerging ) { + + ch_reads_for_cat_branch = ch_shortreads_preprocessed + .mix( ch_longreads_preprocessed ) + .map { + meta, reads -> + def meta_new = meta - meta.subMap('run_accession') + [ meta_new, reads ] + } + .groupTuple() + .map { + meta, reads -> + [ meta, reads.flatten() ] + } + .branch { + meta, reads -> + // we can't concatenate files if there is not a second run, we branch + // here to separate them out, and mix back in after for efficiency + cat: ( meta.single_end && reads.size() > 1 ) || ( !meta.single_end && reads.size() > 2 ) + skip: true + } + + ch_reads_runmerged = MERGE_RUNS ( ch_reads_for_cat_branch.cat ).reads + .mix( ch_reads_for_cat_branch.skip ) + .map { + meta, reads -> + [ meta, [ reads ].flatten() ] + } + + //ch_versions = ch_versions.mix(MERGE_RUNS.out.versions_cat) + + } else { + ch_reads_runmerged = ch_shortreads_preprocessed + .mix( ch_longreads_preprocessed ) + } + + /* + MODULE: Perform subsampling + */ + if ( params.perform_subsampling ) { + ch_reads_subsampled = RASUSA( ch_reads_runmerged, genome_size, params.subsampling_depth_cutoff ).reads + ch_versions = ch_versions.mix( RASUSA.out.versions ) + } else { + ch_reads_subsampled = ch_reads_runmerged + } + + // Create separate channels for FASTQ and Nanopore data + ch_mapping_input = ch_reads_subsampled + .branch { meta, reads -> + nanopore : meta.instrument_platform == 'OXFORD_NANOPORE' + fastq : meta.instrument_platform != 'OXFORD_NANOPORE' + } + + /* + MODULE: Map short-reads + */ + SHORTREAD_MAPPING ( + ch_mapping_input.fastq, + ch_unzipped_fasta, + ch_index, + SAMTOOLS_FAIDX.out.fai + ) + ch_versions = ch_versions.mix( SHORTREAD_MAPPING.out.versions ) + + /* + MODULE: Map long-reads + */ + LONGREAD_MAPPING ( + ch_unzipped_fasta, + SAMTOOLS_FAIDX.out.fai, + ch_mapping_input.nanopore + ) + ch_versions = ch_versions.mix( LONGREAD_MAPPING.out.versions ) + + /* + MODULE: Summarise seqtk outputs + */ + ch_seqtk_seqtkparse = SHORTREAD_MAPPING.out.seqtk_stats + .mix( LONGREAD_MAPPING.out.seqtk_stats ) + + SEQTK_PARSE ( + ch_seqtk_seqtkparse.map { tsv -> tsv[1] }.collect() + ) + ch_seqtk_metadata = SEQTK_PARSE.out.tsv + ch_versions = ch_versions.mix( SEQTK_PARSE.out.versions ) + + /* + MODULE: Align pseudogenomes + */ + ch_align_pseudogenomes = SHORTREAD_MAPPING.out.consensus + .mix( LONGREAD_MAPPING.out.consensus ) + + ALIGNPSEUDOGENOMES ( + ch_align_pseudogenomes.map { consensus -> consensus[1] }.collect(), + ch_unzipped_fasta + ) + ch_versions = ch_versions.mix(ALIGNPSEUDOGENOMES.out.versions) + + ALIGNPSEUDOGENOMES.out.aligned_pseudogenomes + .branch { + aligned_pseudogenomes -> + ALIGNMENT_NUM_PASS: aligned_pseudogenomes[0].toInteger() >= 4 + ALIGNMENT_NUM_FAIL: aligned_pseudogenomes[0].toInteger() < 4 + } + .set { aligned_pseudogenomes_branch } + + // Don't proceeed further if two few genonmes + aligned_pseudogenomes_branch.ALIGNMENT_NUM_FAIL.view { "Insufficient (${it[0]}) genomes after filtering to continue. Check results/pseudogenomes/low_quality_pseudogenomes.tsv for details"} + + aligned_pseudogenomes_branch.ALIGNMENT_NUM_PASS + .map{ it[1] } + .set { aligned_pseudogenomes } + + SNPSITES( + aligned_pseudogenomes + ) + ch_versions = ch_versions.mix( SNPSITES.out.versions ) + + /* + Collate and save software versions + */ + def topic_versions = channel.topic("versions") + .distinct() + .branch { entry -> + versions_file: entry instanceof Path + versions_tuple: true + } + + def topic_versions_string = topic_versions.versions_tuple + .map { process, tool, version -> + [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] + } + .groupTuple(by:0) + .map { process, tool_versions -> + tool_versions.unique().sort() + "${process}:\n${tool_versions.join('\n')}" + } + + softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) + .mix(topic_versions_string) .collectFile( storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_' + 'bactmap_software_' + 'mqc_' + 'versions.yml', @@ -44,38 +400,61 @@ workflow BACTMAP { newLine: true ).set { ch_collated_versions } - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( + /* + MODULE: MultiQC + */ + ch_multiqc_config = channel.fromPath( "$projectDir/assets/multiqc_config.yml", checkIfExists: true) ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() + channel.fromPath(params.multiqc_config, checkIfExists: true) : + channel.empty() ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() + channel.fromPath(params.multiqc_logo, checkIfExists: true) : + channel.empty() summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_workflow_summary = channel.value(paramsSummaryMultiqc(summary_params)) ch_multiqc_files = ch_multiqc_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( + ch_methods_description = channel.value( methodsDescriptionText(ch_multiqc_custom_methods_description)) ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) ch_multiqc_files = ch_multiqc_files.mix( ch_methods_description.collectFile( name: 'methods_description_mqc.yaml', - sort: true + sort: true, ) ) + if (!params.skip_preprocessing_qc) { + if (params.preprocessing_qc_tool == 'falco') { + // only mix in files actually used by MultiQC + ch_multiqc_files = ch_multiqc_files.mix( + FALCO.out.txt.map { _meta, reports -> reports }.flatten().filter { path -> path.name.endsWith('_data.txt') }.ifEmpty([]) + ) + } + else { + ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect { it[1] }.ifEmpty([])) + } + } + + if (params.perform_shortread_qc) { + ch_multiqc_files = ch_multiqc_files.mix( SHORTREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) + } + + if (params.perform_longread_qc) { + ch_multiqc_files = ch_multiqc_files.mix( LONGREAD_PREPROCESSING.out.mqc.collect{it[1]}.ifEmpty([]) ) + } + + ch_multiqc_files = ch_multiqc_files.mix(SHORTREAD_MAPPING.out.mqc.collect{it[1]}.ifEmpty([])) + + ch_multiqc_files = ch_multiqc_files.mix(LONGREAD_MAPPING.out.mqc.collect{it[1]}.ifEmpty([])) + MULTIQC ( ch_multiqc_files.collect(), ch_multiqc_config.toList(), @@ -85,8 +464,9 @@ workflow BACTMAP { [] ) - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] + emit: + multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] }