add support for purities for vafator

priesgo · priesgo · commit ad390d1051f0 · 2022-08-12T09:58:15.000+02:00
diff --git a/Makefile b/Makefile
@@ -24,3 +24,4 @@ test:
 	bash tests/run_test_7.sh
 	bash tests/run_test_8.sh
 	bash tests/run_test_10.sh
+	bash tests/run_test_11.sh
diff --git a/README.md b/README.md
@@ -93,6 +93,19 @@ The optional table with BAM files expects two tab-separated columns without a he
 | patient_2          | metastasis_tumor:/path/to/sample_1.metastasis.bam |
 | patient_2          | normal:/path/to/sample_1.normal.bam               |
 
+The optional table with tumor purities expects two tab-separated columns without a header. 
+Normal samples are not expected to have a purity value, the default purity is 1.0.
+Purity values are in the range 0.0 to 1.0.
+The purity values are used to adjust the expected VAF which is then used to calculate the power to detect a 
+somatic mutation and the probability of an undetected somatic mutation.
+
+| Patient name       | Sample name:tumor purity            |
+|--------------------|-------------------------------------|
+| patient_1          | primary_tumor:0.4                   |
+| patient_1          | metastasis_tumor:0.5                |
+| patient_2          | primary_tumor:0.6                   |
+| patient_2          | metastasis_tumor:0.7                |
+
 Each patient can have any number of samples. Any sample can have any number of BAM files, annotations from the 
 different BAM files of the same sample will be provided with suffixes _1, _2, etc.
 The aggregated vafator annotations on each sample will also be provided without a suffix.
diff --git a/main.nf b/main.nf
@@ -11,6 +11,7 @@ include { VARIANT_ANNOTATION } from './modules/05_variant_annotation'
 params.help= false
 params.input_vcfs = false
 params.input_bams = false
+params.input_purities = false
 params.input_vcf = false
 params.reference = false
 params.output = "output"
@@ -63,6 +64,14 @@ if (params.input_bams) {
     .set { input_bams }
 }
 
+if (params.input_purities) {
+    Channel
+    .fromPath(params.input_purities)
+    .splitCsv(header: ['name', 'purity'], sep: "\t")
+    .map{ row-> tuple(row.name, row.purity) }
+    .set { input_purities }
+}
+
 workflow {
 
     if (params.filter) {
@@ -88,7 +97,12 @@ workflow {
     }
 
     if ( params.input_bams ) {
-        VAFATOR(final_vcfs.join(input_bams.groupTuple()))
+        if (params.input_purities) {
+            VAFATOR(final_vcfs.join(input_bams.groupTuple()).join(input_purities.groupTuple()))
+        }
+        else {
+            VAFATOR(final_vcfs.join(input_bams.groupTuple()))
+        }
         final_vcfs = VAFATOR.out.annotated_vcf
         if ( ! params.skip_multiallelic_filter ) {
             final_vcfs = MULTIALLELIC_FILTER(final_vcfs)
diff --git a/modules/04_vafator.nf b/modules/04_vafator.nf
@@ -15,20 +15,21 @@ process VAFATOR {
     conda (params.enable_conda ? "bioconda::vafator=2.0.1" : null)
 
     input:
-    tuple val(patient_name), file(vcf), val(bams)
+    tuple val(patient_name), file(vcf), val(bams), val(purities)
 
     output:
     tuple val(patient_name), file("${patient_name}.vaf.vcf"), emit: annotated_vcf
 
     script:
     bams_param = bams.collect { b -> "--bam " + b.split(":").join(" ") }.join(" ")
+    purity_param = purities.collect { b -> "--purity " + b.split(":").join(" ") }.join(" ")
     """
     vafator \
     --input-vcf ${vcf} \
     --output-vcf ${patient_name}.vaf.vcf \
     --mapping-quality ${params.mapping_quality} \
     --base-call-quality ${params.base_call_quality} \
-    ${bams_param}
+    ${bams_param} ${purity_param}
     """
 }
 
diff --git a/test_data/test_purities.txt b/test_data/test_purities.txt
@@ -0,0 +1,4 @@
+tumor_normal	primary:0.5
+tumor_normal	normal:0.6
+single_sample	tumor:0.7
+single_sample	normal:0.8
diff --git a/tests/run_test_11.sh b/tests/run_test_11.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+
+source tests/assert.sh
+output_folder=output/test11
+
+# build input BAMs file
+echo -e "tumor_normal\tprimary:"`pwd`"/test_data/TESTX_S1_L001.bam" > test_data/test_bams.txt
+echo -e "tumor_normal\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
+echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
+echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
+echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
+echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
+
+# build input purities file
+echo -e "tumor_normal\tprimary:0.5" > test_data/test_purities.txt
+echo -e "tumor_normal\tnormal:0.6" >> test_data/test_purities.txt
+echo -e "single_sample\ttumor:0.7" >> test_data/test_purities.txt
+echo -e "single_sample\tnormal:0.8" >> test_data/test_purities.txt
+
+nextflow main.nf -profile test,conda --output $output_folder \
+--input_bams test_data/test_bams.txt \
+--input_purities test_data/test_purities.txt \
+--skip_normalization
+
+# test output files
+test -s $output_folder/single_sample/single_sample.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
+test -s $output_folder/tumor_normal/tumor_normal.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
+test -s $output_folder/single_sample/single_sample.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }
+test -s $output_folder/tumor_normal/tumor_normal.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }