Skip to content

Commit 6f4f6a3

Browse files
committed
upgrade to the latest VAFator version + normalization is now optional
1 parent b9e22a2 commit 6f4f6a3

9 files changed

Lines changed: 75 additions & 24 deletions

File tree

Makefile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,5 @@ test:
2222
bash tests/run_test_5.sh
2323
bash tests/run_test_6.sh
2424
bash tests/run_test_7.sh
25+
bash tests/run_test_8.sh
26+
bash tests/run_test_10.sh

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ This pipeline has several objectives:
1818
* Technical annotations from different BAM files
1919
* Functional annotations
2020

21+
All of the previous steps are optional.
22+
2123
## How to run it
2224

2325
Run it from GitHub as follows:
@@ -52,6 +54,7 @@ Input:
5254
5355
Optional input:
5456
* --output: the folder where to publish output
57+
* --skip_normalization: flag indicating to skip all normalization steps
5558
* --skip_decompose_complex: flag indicating not to split complex variants (ie: MNVs and combinations of SNVs and indels)
5659
* --filter: specify the filter to apply if any (e.g.: PASS), only variants with this value will be kept
5760
* --input_bams: a tab-separated values file containing in each row the sample name, tumor and normal BAM files for annotation with Vafator
@@ -96,11 +99,13 @@ The aggregated vafator annotations on each sample will also be provided without
9699

97100
## Variant filtering
98101

99-
Optionally, only variants with the value in the column `FILTER` matching the value of parameter `--filter` are kept.
102+
Only variants with the value in the column `FILTER` matching the value of parameter `--filter` are kept.
100103
If this parameter is not used not variants are filtered out. Multiple values can be passed separated by commas without spaces.
101104

102105
For instance, `--filter PASS,.` will keep variant having `FILTER=PASS` or `FILTER=.`, but remove all others.
103106

107+
No filter is applied if `--filter` is not passed.
108+
104109

105110
## Variant normalization
106111

@@ -120,6 +125,8 @@ The output consists of:
120125
* The normalized VCF
121126
* Summary statistics before and after normalization
122127

128+
Normalization is not applied if the parameter `--skip_normalization` is passed.
129+
123130

124131
![Pipeline](images/variant_normalization_pipeline.png)
125132

@@ -215,6 +222,8 @@ within the pileup of a BAM file. When doing somatic variant calling it may be re
215222
for the same variant in a patient from multiple BAM files.
216223
These annotations are provided by VAFator (https://github.com/TRON-Bioinformatics/vafator).
217224

225+
No technical annotations are performed if the parameter `--input_bams` is not passed.
226+
218227
## Functional annotations
219228

220229
The functional annotations provide a biological context for every variant. Such as the overlapping genes or the effect
@@ -238,6 +247,8 @@ To provide any additional SnpEff arguments use `--snpeff_args` such as
238247
`--snpeff_args "-noStats -no-downstream -no-upstream -no-intergenic -no-intron -onlyProtein -hgvs1LetterAa -noShiftHgvs"`,
239248
otherwise defaults will be used.
240249

250+
No functional annotations are performed if the parameters `--snpeff_organism` and `--snpeff_datadir` are not passed.
251+
241252

242253
## References
243254

main.nf

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ params.input_bams = false
1414
params.input_vcf = false
1515
params.reference = false
1616
params.output = "output"
17+
params.skip_normalization = false
1718
params.skip_decompose_complex = false
1819
params.filter = false
1920
params.cpus = 1
@@ -32,6 +33,10 @@ if ( params.snpeff_organism && ! params.snpeff_datadir) {
3233
exit 1, "To run snpEff, please, provide your snpEff data folder with --snpeff_datadir"
3334
}
3435

36+
if (params.skip_normalization && ! params.input_bams && ! params.snpeff_organism) {
37+
exit -1, "Neither normalization, VAFator annotation or SnpEff annotation enabled! Nothing to do..."
38+
}
39+
3540
if (! params.input_vcfs && ! params.input_vcf) {
3641
exit 1, "Neither --input_vcfs or --input_vcf are provided!"
3742
}
@@ -67,15 +72,20 @@ workflow {
6772

6873
SUMMARY_VCF(input_vcfs)
6974

70-
final_vcfs = BCFTOOLS_NORM(input_vcfs)
71-
if (! params.skip_decompose_complex) {
72-
VT_DECOMPOSE_COMPLEX(final_vcfs)
73-
final_vcfs = VT_DECOMPOSE_COMPLEX.out.decomposed_vcfs
74-
}
75-
REMOVE_DUPLICATES(final_vcfs)
76-
final_vcfs = REMOVE_DUPLICATES.out.deduplicated_vcfs
75+
if (! params.skip_normalization) {
76+
final_vcfs = BCFTOOLS_NORM(input_vcfs)
77+
if (! params.skip_decompose_complex) {
78+
VT_DECOMPOSE_COMPLEX(final_vcfs)
79+
final_vcfs = VT_DECOMPOSE_COMPLEX.out.decomposed_vcfs
80+
}
81+
REMOVE_DUPLICATES(final_vcfs)
82+
final_vcfs = REMOVE_DUPLICATES.out.deduplicated_vcfs
7783

78-
SUMMARY_VCF_2(final_vcfs)
84+
SUMMARY_VCF_2(final_vcfs)
85+
}
86+
else {
87+
final_vcfs = input_vcfs
88+
}
7989

8090
if ( params.input_bams ) {
8191
VAFATOR(final_vcfs.join(input_bams.groupTuple()))

modules/04_vafator.nf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,20 @@ process VAFATOR {
1212
tag "${patient_name}"
1313
publishDir "${params.output}/${patient_name}", mode: "copy"
1414

15-
conda (params.enable_conda ? "bioconda::vafator=1.1.4" : null)
15+
conda (params.enable_conda ? "bioconda::vafator=1.2.5" : null)
1616

1717
input:
1818
tuple val(patient_name), file(vcf), val(bams)
1919

2020
output:
21-
tuple val(patient_name), file("${vcf.baseName}.vaf.vcf"), emit: annotated_vcf
21+
tuple val(patient_name), file("${patient_name}.vaf.vcf"), emit: annotated_vcf
2222

2323
script:
2424
bams_param = bams.collect { b -> "--bam " + b.split(":").join(" ") }.join(" ")
2525
"""
2626
vafator \
2727
--input-vcf ${vcf} \
28-
--output-vcf ${vcf.baseName}.vaf.vcf \
28+
--output-vcf ${patient_name}.vaf.vcf \
2929
--mapping-quality ${params.mapping_quality} \
3030
--base-call-quality ${params.base_call_quality} \
3131
${bams_param}
@@ -39,16 +39,16 @@ process MULTIALLELIC_FILTER {
3939
tag "${name}"
4040
publishDir "${params.output}/${name}", mode: "copy"
4141

42-
conda (params.enable_conda ? "bioconda::vafator=1.1.4" : null)
42+
conda (params.enable_conda ? "bioconda::vafator=1.2.5" : null)
4343

4444
input:
4545
tuple val(name), file(vcf)
4646

4747
output:
48-
tuple val(name), file("${vcf.baseName}.filtered_multiallelics.vcf"), emit: filtered_vcf
48+
tuple val(name), file("${name}.filtered_multiallelics.vcf"), emit: filtered_vcf
4949

5050
script:
5151
"""
52-
multiallelics-filter --input-vcf ${vcf} --output-vcf ${vcf.baseName}.filtered_multiallelics.vcf
52+
multiallelics-filter --input-vcf ${vcf} --output-vcf ${name}.filtered_multiallelics.vcf
5353
"""
5454
}

nextflow.config

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Input:
5757
5858
Optional input:
5959
* --output: the folder where to publish output
60+
* --skip_normalization: flag indicating to skip all normalization steps
6061
* --skip_decompose_complex: flag indicating not to split complex variants (ie: MNVs and combinations of SNVs and indels)
6162
* --filter: specify a comma-separated list of filters to apply (e.g.: PASS,.), only variants with these values will be kept. If not provided all varianst are kept
6263
* --vcf-without-ad: indicate when the VCFs to normalize do not have the FORMAT/AD annotation

test_data/test_bams.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1-
tumor_normal /home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam /home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam
2-
single_sample /home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam,/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam /home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam,/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam
1+
tumor_normal primary:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam
2+
tumor_normal normal:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam
3+
single_sample tumor:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam
4+
single_sample tumor:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam
5+
single_sample normal:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L001.bam
6+
single_sample normal:/home/priesgo/src/github/tronflow-variant-normalization/test_data/TESTX_S1_L002.bam

tests/run_test_0.sh

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
11
#!/bin/bash
22

3+
output_folder=output/test0
34

4-
nextflow main.nf --help
5+
nextflow main.nf --help
6+
7+
nextflow main.nf -profile test,conda --output $output_folder --skip_normalization
8+
9+
# missing SNpEff data folder
10+
nextflow main.nf -profile test,conda --output $output_folder --snpeff_organism hg19
11+
test ! -d $output_folder

tests/run_test_10.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#!/bin/bash
2+
3+
4+
source tests/assert.sh
5+
output_folder=output/test10
6+
echo -e "tumor_normal\tprimary:"`pwd`"/test_data/TESTX_S1_L001.bam" > test_data/test_bams.txt
7+
echo -e "tumor_normal\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
8+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
9+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
10+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
11+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
12+
nextflow main.nf -profile test,conda --output $output_folder --input_bams test_data/test_bams.txt --skip_normalization
13+
test -s $output_folder/single_sample/single_sample.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
14+
test -s $output_folder/tumor_normal/tumor_normal.filtered_multiallelics.vcf || { echo "Missing test 10 output file!"; exit 1; }
15+
test -s $output_folder/single_sample/single_sample.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }
16+
test -s $output_folder/tumor_normal/tumor_normal.vaf.vcf || { echo "Missing test 10 output file!"; exit 1; }

tests/run_test_8.sh

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data
1212
nextflow main.nf -profile test,conda --output $output_folder --input_bams test_data/test_bams.txt
1313
test -s $output_folder/single_sample/single_sample.normalized.vcf || { echo "Missing test 8 output file!"; exit 1; }
1414
test -s $output_folder/tumor_normal/tumor_normal.normalized.vcf || { echo "Missing test 8 output file!"; exit 1; }
15-
test -s $output_folder/single_sample/single_sample.normalized.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
16-
test -s $output_folder/tumor_normal/tumor_normal.normalized.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
15+
test -s $output_folder/single_sample/single_sample.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
16+
test -s $output_folder/tumor_normal/tumor_normal.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
1717
assert_eq `wc -l $output_folder/single_sample/single_sample.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
1818
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
19-
assert_eq `wc -l $output_folder/single_sample/single_sample.normalized.vaf.vcf | cut -d' ' -f 1` 72 "Wrong number of variants"
20-
assert_eq `grep tumor_af $output_folder/single_sample/single_sample.normalized.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
21-
assert_eq `grep normal_af $output_folder/single_sample/single_sample.normalized.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
22-
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vaf.vcf | cut -d' ' -f 1` 60 "Wrong number of variants"
19+
assert_eq `wc -l $output_folder/single_sample/single_sample.vaf.vcf | cut -d' ' -f 1` 72 "Wrong number of variants"
20+
assert_eq `grep tumor_af $output_folder/single_sample/single_sample.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
21+
assert_eq `grep normal_af $output_folder/single_sample/single_sample.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
22+
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.vaf.vcf | cut -d' ' -f 1` 60 "Wrong number of variants"

0 commit comments

Comments
 (0)