Skip to content

Commit ad390d1

Browse files
committed
add support for purities for vafator
1 parent 0f0c615 commit ad390d1

6 files changed

Lines changed: 66 additions & 3 deletions

File tree

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,4 @@ test:
2424
bash tests/run_test_7.sh
2525
bash tests/run_test_8.sh
2626
bash tests/run_test_10.sh
27+
bash tests/run_test_11.sh

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,19 @@ The optional table with BAM files expects two tab-separated columns without a he
9393
| patient_2 | metastasis_tumor:/path/to/sample_1.metastasis.bam |
9494
| patient_2 | normal:/path/to/sample_1.normal.bam |
9595

96+
The optional table with tumor purities expects two tab-separated columns without a header.
97+
Normal samples are not expected to have a purity value, the default purity is 1.0.
98+
Purity values are in the range 0.0 to 1.0.
99+
The purity values are used to adjust the expected VAF which is then used to calculate the power to detect a
100+
somatic mutation and the probability of an undetected somatic mutation.
101+
102+
| Patient name | Sample name:tumor purity |
103+
|--------------------|-------------------------------------|
104+
| patient_1 | primary_tumor:0.4 |
105+
| patient_1 | metastasis_tumor:0.5 |
106+
| patient_2 | primary_tumor:0.6 |
107+
| patient_2 | metastasis_tumor:0.7 |
108+
96109
Each patient can have any number of samples. Any sample can have any number of BAM files, annotations from the
97110
different BAM files of the same sample will be provided with suffixes _1, _2, etc.
98111
The aggregated vafator annotations on each sample will also be provided without a suffix.

main.nf

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ include { VARIANT_ANNOTATION } from './modules/05_variant_annotation'
1111
params.help= false
1212
params.input_vcfs = false
1313
params.input_bams = false
14+
params.input_purities = false
1415
params.input_vcf = false
1516
params.reference = false
1617
params.output = "output"
@@ -63,6 +64,14 @@ if (params.input_bams) {
6364
.set { input_bams }
6465
}
6566

67+
if (params.input_purities) {
68+
Channel
69+
.fromPath(params.input_purities)
70+
.splitCsv(header: ['name', 'purity'], sep: "\t")
71+
.map{ row-> tuple(row.name, row.purity) }
72+
.set { input_purities }
73+
}
74+
6675
workflow {
6776

6877
if (params.filter) {
@@ -88,7 +97,12 @@ workflow {
8897
}
8998

9099
if ( params.input_bams ) {
91-
VAFATOR(final_vcfs.join(input_bams.groupTuple()))
100+
if (params.input_purities) {
101+
VAFATOR(final_vcfs.join(input_bams.groupTuple()).join(input_purities.groupTuple()))
102+
}
103+
else {
104+
VAFATOR(final_vcfs.join(input_bams.groupTuple()))
105+
}
92106
final_vcfs = VAFATOR.out.annotated_vcf
93107
if ( ! params.skip_multiallelic_filter ) {
94108
final_vcfs = MULTIALLELIC_FILTER(final_vcfs)

modules/04_vafator.nf

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,21 @@ process VAFATOR {
1515
conda (params.enable_conda ? "bioconda::vafator=2.0.1" : null)
1616

1717
input:
18-
tuple val(patient_name), file(vcf), val(bams)
18+
tuple val(patient_name), file(vcf), val(bams), val(purities)
1919

2020
output:
2121
tuple val(patient_name), file("${patient_name}.vaf.vcf"), emit: annotated_vcf
2222

2323
script:
2424
bams_param = bams.collect { b -> "--bam " + b.split(":").join(" ") }.join(" ")
25+
purity_param = purities.collect { b -> "--purity " + b.split(":").join(" ") }.join(" ")
2526
"""
2627
vafator \
2728
--input-vcf ${vcf} \
2829
--output-vcf ${patient_name}.vaf.vcf \
2930
--mapping-quality ${params.mapping_quality} \
3031
--base-call-quality ${params.base_call_quality} \
31-
${bams_param}
32+
${bams_param} ${purity_param}
3233
"""
3334
}
3435

test_data/test_purities.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
tumor_normal primary:0.5
2+
tumor_normal normal:0.6
3+
single_sample tumor:0.7
4+
single_sample normal:0.8

tests/run_test_11.sh

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#!/bin/bash
2+
3+
4+
source tests/assert.sh
5+
output_folder=output/test11
6+
7+
# build input BAMs file
8+
echo -e "tumor_normal\tprimary:"`pwd`"/test_data/TESTX_S1_L001.bam" > test_data/test_bams.txt
9+
echo -e "tumor_normal\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
10+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
11+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
12+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
13+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
14+
15+
# build input purities file
16+
echo -e "tumor_normal\tprimary:0.5" > test_data/test_purities.txt
17+
echo -e "tumor_normal\tnormal:0.6" >> test_data/test_purities.txt
18+
echo -e "single_sample\ttumor:0.7" >> test_data/test_purities.txt
19+
echo -e "single_sample\tnormal:0.8" >> test_data/test_purities.txt
20+
21+
nextflow main.nf -profile test,conda --output $output_folder \
22+
--input_bams test_data/test_bams.txt \
23+
--input_purities test_data/test_purities.txt \
24+
--skip_normalization
25+
26+
# test output files
27+
test -s $output_folder/single_sample/single_sample.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
28+
test -s $output_folder/tumor_normal/tumor_normal.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
29+
test -s $output_folder/single_sample/single_sample.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }
30+
test -s $output_folder/tumor_normal/tumor_normal.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }

0 commit comments

Comments
 (0)