Skip to content

Commit 96c7aae

Browse files
authored
Merge pull request #4 from TRON-Bioinformatics/fix-issue-multiallelic-filter
Fix issue with multiallelic filter
2 parents 8408f02 + 0708777 commit 96c7aae

5 files changed

Lines changed: 34 additions & 8 deletions

File tree

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ Optional input:
5555
* --skip_decompose_complex: flag indicating not to split complex variants (ie: MNVs and combinations of SNVs and indels)
5656
* --filter: specify the filter to apply if any (e.g.: PASS), only variants with this value will be kept
5757
* --input_bams: a tab-separated values file containing in each row the sample name, tumor and normal BAM files for annotation with Vafator
58+
* --skip_multiallelic_filter: after VAFator annotations if any multiallelic variant is present (ie: two different
59+
mutations in the same position) only the highest VAF variant is kept unless this flag is passed
5860
* --snpeff_organism: the SnpEff organism name (eg: hg19, hg38, GRCh37.75, GRCh38.99)
5961
* --snpeff_datadir: the SnpEff data folder where the reference genomes were previously downloaded. Required if --snpeff_organism is provided
6062
* --snpeff_args: additional SnpEff arguments

modules/04_vafator.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ process MULTIALLELIC_FILTER {
4545
tuple val(name), file(vcf)
4646

4747
output:
48-
tuple file("${vcf.baseName}.filtered_multiallelics.vcf"), emit: filtered_vcf
48+
tuple val(name), file("${vcf.baseName}.filtered_multiallelics.vcf"), emit: filtered_vcf
4949

5050
script:
5151
"""

nextflow.config

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ env {
2727
// Capture exit codes from upstream processes when piping
2828
process.shell = ['/bin/bash', '-euo', 'pipefail']
2929

30-
VERSION = '2.1.0'
30+
VERSION = '2.1.1'
3131

3232
cleanup=true
3333

@@ -63,6 +63,8 @@ Optional input:
6363
* --filter: specify a comma-separated list of filters to apply (e.g.: PASS,.), only variants with these values will be kept. If not provided all varianst are kept
6464
* --vcf-without-ad: indicate when the VCFs to normalize do not have the FORMAT/AD annotation
6565
* --input_bams: a tab-separated values file containing in each row the sample name, tumor and normal BAM files for annotation with Vafator
66+
* --skip_multiallelic_filter: after VAFator annotations if any multiallelic variant is present (ie: two different
67+
mutations in the same position) only the highest VAF variant is kept unless this flag is passed
6668
* --snpeff_organism: the SnpEff organism name (eg: hg19, hg38, GRCh37.75, GRCh38.99)
6769
* --snpeff_datadir: the SnpEff data folder where the reference genomes were previously downloaded. Required if --snpeff_organism is provided
6870
* --snpeff_args: additional SnpEff arguments

tests/run_test_8.sh

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
11
#!/bin/bash
22

3-
### this test cannot be automated as it relies on SnpEff references which need to be downloaded beforehand
3+
44
source tests/assert.sh
55
output_folder=output/test8
6-
snpeff_datadir=/home/you/snpeff
7-
nextflow main.nf -profile test,conda --output $output_folder --snpeff_organism hg19 --snpeff_datadir $snpeff_datadir
8-
test -s $output_folder/single_sample/single_sample.normalized.vcf || { echo "Missing test 1 output file!"; exit 1; }
9-
test -s $output_folder/tumor_normal/tumor_normal.normalized.vcf || { echo "Missing test 1 output file!"; exit 1; }
6+
echo -e "tumor_normal\tprimary:"`pwd`"/test_data/TESTX_S1_L001.bam" > test_data/test_bams.txt
7+
echo -e "tumor_normal\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
8+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
9+
echo -e "single_sample\ttumor:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
10+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L001.bam" >> test_data/test_bams.txt
11+
echo -e "single_sample\tnormal:"`pwd`"/test_data/TESTX_S1_L002.bam" >> test_data/test_bams.txt
12+
nextflow main.nf -profile test,conda --output $output_folder --input_bams test_data/test_bams.txt
13+
test -s $output_folder/single_sample/single_sample.normalized.vcf || { echo "Missing test 8 output file!"; exit 1; }
14+
test -s $output_folder/tumor_normal/tumor_normal.normalized.vcf || { echo "Missing test 8 output file!"; exit 1; }
15+
test -s $output_folder/single_sample/single_sample.normalized.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
16+
test -s $output_folder/tumor_normal/tumor_normal.normalized.vaf.vcf || { echo "Missing test 8 output file!"; exit 1; }
1017
assert_eq `wc -l $output_folder/single_sample/single_sample.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
11-
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
18+
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
19+
assert_eq `wc -l $output_folder/single_sample/single_sample.normalized.vaf.vcf | cut -d' ' -f 1` 72 "Wrong number of variants"
20+
assert_eq `grep tumor_af $output_folder/single_sample/single_sample.normalized.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
21+
assert_eq `grep normal_af $output_folder/single_sample/single_sample.normalized.vaf.vcf | wc -l | cut -d' ' -f 1` 35 "Wrong number of variants"
22+
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vaf.vcf | cut -d' ' -f 1` 60 "Wrong number of variants"

tests/run_test_9.sh

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
#!/bin/bash
2+
3+
### this test cannot be automated as it relies on SnpEff references which need to be downloaded beforehand
4+
source tests/assert.sh
5+
output_folder=output/test8
6+
snpeff_datadir=/home/you/snpeff
7+
nextflow main.nf -profile test,conda --output $output_folder --snpeff_organism hg19 --snpeff_datadir $snpeff_datadir
8+
test -s $output_folder/single_sample/single_sample.normalized.vcf || { echo "Missing test 1 output file!"; exit 1; }
9+
test -s $output_folder/tumor_normal/tumor_normal.normalized.vcf || { echo "Missing test 1 output file!"; exit 1; }
10+
assert_eq `wc -l $output_folder/single_sample/single_sample.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"
11+
assert_eq `wc -l $output_folder/tumor_normal/tumor_normal.normalized.vcf | cut -d' ' -f 1` 53 "Wrong number of variants"

0 commit comments

Comments
 (0)