Skip to content

Commit fc5b4a1

Browse files
committed
Make dynamic variables read from a tab-delimited annotation file work for regions.
For example, while the first command below was functional, the second was not bcftools annotate -a ann.tsv.gz -c CHROM,POS,-,SCORE,~STR -i'TAG={STR}' -k in.vcf bcftools annotate -a ann.tsv.gz -c CHROM,BEG,END,SCORE,~STR -i'TAG={STR}' -k in.vcf Resolves #2441, see also #2151
1 parent 4bfc0c1 commit fc5b4a1

File tree

6 files changed

+72
-37
lines changed

6 files changed

+72
-37
lines changed

NEWS

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ Changes affecting the whole of bcftools, or multiple commands:
99

1010
Changes affecting specific commands:
1111

12+
* bcftools annotate
13+
14+
- Make dynamic variables read from a tab-delimited annotation file (#2151) work
15+
also for regions. For example, while the first command below was functional, the
16+
second was not (#2441)
17+
18+
bcftools annotate -a ann.tsv.gz -c CHROM,POS,-,SCORE,~STR -i'TAG={STR}' -k in.vcf
19+
bcftools annotate -a ann.tsv.gz -c CHROM,BEG,END,SCORE,~STR -i'TAG={STR}' -k in.vcf
20+
21+
1222
* bcftools consensus
1323

1424
- Fix a bug which prevented reading fasta files containing empty lines in their entirety (#2424)

test/annotate36.1.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##contig=<ID=1,length=248956422>
4+
##INFO=<ID=AF,Number=1,Type=Float,Description="">
5+
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="">
6+
#CHROM POS ID REF ALT QUAL FILTER INFO
7+
1 11029 . AA A . . SVTYPE=DEL;AF=0.0175193
8+
1 11029 . A AC . . SVTYPE=INS
9+
1 11029 . AA CC . . SVTYPE=MNP

test/annotate36.vcf

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
##fileformat=VCFv4.2
2+
##contig=<ID=1,length=248956422>
3+
##INFO=<ID=AF,Number=1,Type=Float,Description="">
4+
##INFO=<ID=SVTYPE,Number=1,Type=String,Description="">
5+
#CHROM POS ID REF ALT QUAL FILTER INFO
6+
1 11029 . AA A . . SVTYPE=DEL
7+
1 11029 . A AC . . SVTYPE=INS
8+
1 11029 . AA CC . . SVTYPE=MNP

test/annots36.bed

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
1 11028 11063 DEL 35 0.017519300803542137 50 0 1

test/test.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -559,8 +559,9 @@
559559
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 0],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
560560
run_test(\&test_vcf_sort,$opts,in=>'sort',out=>'sort.out',args=>q[-m 1000],fmt=>'%CHROM\\t%POS\\t%REF,%ALT\\n');
561561
run_test(\&test_vcf_regions,$opts,in=>'regions');
562+
run_test(\&test_vcf_annotate,$opts,in=>'annotate36',bed=>'annots36',out=>'annotate36.1.out',args=>q[-c CHROM,POS,-,~X,-,AF,-,-,- -i'SVTYPE={X}' -k]);
563+
run_test(\&test_vcf_annotate,$opts,in=>'annotate36',bed=>'annots36',out=>'annotate36.1.out',args=>q[-c CHROM,BEG,END,~X,-,AF,-,-,- -i'SVTYPE={X}' -k]);
562564
run_test(\&test_vcf_annotate,$opts,in=>'annotate35',vcf=>'annots35',out=>'annotate35.1.out',args=>q[-c CHROM,POS,~ID,REF,ALT,INFO/src]);
563-
run_test(\&test_vcf_annotate,$opts,in=>'annotate35',tab=>'annots35',out=>'annotate35.2.out',args=>q[-c CHROM,POS,~ID,REF,ALT,dst:=src]);
564565
run_test(\&test_vcf_annotate,$opts,in=>'annotate.escape.1',tab=>'annotate.escape.1',out=>'annotate.escape.1.1.out',args=>q[-c CHROM,POS,ISTR,FMT/FSTR]);
565566
run_test(\&test_vcf_annotate,$opts,in=>'annotate.match.1',tab=>'annotate.match.1',out=>'annotate.match.1.1.out',args=>q[-c CHROM,POS,-,-,SCORE,~X,-,- -i'STR={X}']);
566567
run_test(\&test_vcf_annotate,$opts,in=>'annotate.match.1',tab=>'annotate.match.1',out=>'annotate.match.1.2.out',args=>q[-c CHROM,POS,REF,ALT,SCORE,-,~X,- -i'INT={X}']);

vcfannotate.c

Lines changed: 42 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3003,7 +3003,7 @@ static void init_filters(args_t *args)
30033003

30043004
if ( !args->n_ext ) return;
30053005

3006-
if ( !args->tgts )
3006+
if ( !args->tgts && !args->tgt_idx )
30073007
error("Error: dynamic variables in -i/-e expressions can be currently used only with tab-delimited file, not with VCF (todo)\n");
30083008

30093009
// contains external values
@@ -3330,6 +3330,41 @@ static int strstr_match(char *a, char *b)
33303330
}
33313331
return 0;
33323332
}
3333+
static int pass_filter_test_ext(args_t *args, bcf1_t *line, annot_line_t *ann)
3334+
{
3335+
char *tmp;
3336+
int i;
3337+
for (i=0; i<args->n_ext; i++)
3338+
{
3339+
int j = args->ext[i].icol;
3340+
if ( args->ext[i].ht_type==BCF_HT_STR ) args->ext_ptr[i] = args->ext[i].s = ann->cols[j];
3341+
else if ( args->ext[i].ht_type==BCF_HT_INT )
3342+
{
3343+
args->ext[i].i = strtol(ann->cols[j],&tmp,10);
3344+
if ( *tmp )
3345+
{
3346+
if ( strcmp(".",ann->cols[j]) ) error("Error: could not parse the annotation file, expected an integer, found \"%s\"\n",ann->cols[j]);
3347+
args->ext_ptr[i] = NULL;
3348+
}
3349+
else
3350+
args->ext_ptr[i] = &args->ext[i].i;
3351+
}
3352+
else if ( args->ext[i].ht_type==BCF_HT_REAL )
3353+
{
3354+
args->ext[i].f = strtod(ann->cols[j],&tmp);
3355+
if ( *tmp )
3356+
{
3357+
if ( strcmp(".",ann->cols[j]) ) error("Error: could not parse the annotation file, expected a float, found \"%s\"\n",ann->cols[j]);
3358+
args->ext_ptr[i] = NULL;
3359+
}
3360+
else
3361+
args->ext_ptr[i] = &args->ext[i].f;
3362+
}
3363+
}
3364+
int pass = filter_test_ext(args->filter_ext,line,NULL,(const void**)args->ext_ptr);
3365+
if ( args->filter_logic==FLT_EXCLUDE ) pass = pass ? 0 : 1;
3366+
return pass;
3367+
}
33333368
static int annotate_from_regidx(args_t *args, bcf1_t *line)
33343369
{
33353370
int j;
@@ -3355,6 +3390,11 @@ static int annotate_from_regidx(args_t *args, bcf1_t *line)
33553390
if ( args->min_overlap_vcf && args->min_overlap_vcf > (float)isec/len_vcf ) continue;
33563391

33573392
parse_annot_line(args, regitr_payload(args->tgt_itr,char*), tmp);
3393+
if ( args->filter_ext )
3394+
{
3395+
if ( !pass_filter_test_ext(args,line,tmp) ) continue;
3396+
has_overlap = 1;
3397+
}
33583398

33593399
// If a plain BED file is provided and we are asked to just mark overlapping sites, there are
33603400
// no additional columns. Not sure if there can be any side effects for ill-formatted BED files
@@ -3364,6 +3404,7 @@ static int annotate_from_regidx(args_t *args, bcf1_t *line)
33643404
for (j=0; j<args->ncols; j++)
33653405
{
33663406
if ( args->cols[j].done==1 ) continue;
3407+
if ( !args->cols[j].setter ) continue;
33673408
int ret = args->cols[j].setter(args,line,&args->cols[j],tmp);
33683409
if ( ret < 0 )
33693410
error("fixme: Could not set %s at %s:%"PRId64"\n", args->cols[j].hdr_key_src,bcf_seqname(args->hdr,line),(int64_t) line->pos+1);
@@ -3382,41 +3423,6 @@ static int annotate_from_regidx(args_t *args, bcf1_t *line)
33823423
}
33833424
return has_overlap;
33843425
}
3385-
static int pass_filter_test_ext(args_t *args, bcf1_t *line, annot_line_t *ann)
3386-
{
3387-
char *tmp;
3388-
int i;
3389-
for (i=0; i<args->n_ext; i++)
3390-
{
3391-
int j = args->ext[i].icol;
3392-
if ( args->ext[i].ht_type==BCF_HT_STR ) args->ext_ptr[i] = args->ext[i].s = ann->cols[j];
3393-
else if ( args->ext[i].ht_type==BCF_HT_INT )
3394-
{
3395-
args->ext[i].i = strtol(ann->cols[j],&tmp,10);
3396-
if ( *tmp )
3397-
{
3398-
if ( strcmp(".",ann->cols[j]) ) error("Error: could not parse the annotation file, expected an integer, found \"%s\"\n",ann->cols[j]);
3399-
args->ext_ptr[i] = NULL;
3400-
}
3401-
else
3402-
args->ext_ptr[i] = &args->ext[i].i;
3403-
}
3404-
else if ( args->ext[i].ht_type==BCF_HT_REAL )
3405-
{
3406-
args->ext[i].f = strtod(ann->cols[j],&tmp);
3407-
if ( *tmp )
3408-
{
3409-
if ( strcmp(".",ann->cols[j]) ) error("Error: could not parse the annotation file, expected a float, found \"%s\"\n",ann->cols[j]);
3410-
args->ext_ptr[i] = NULL;
3411-
}
3412-
else
3413-
args->ext_ptr[i] = &args->ext[i].f;
3414-
}
3415-
}
3416-
int pass = filter_test_ext(args->filter_ext,line,NULL,(const void**)args->ext_ptr);
3417-
if ( args->filter_logic==FLT_EXCLUDE ) pass = pass ? 0 : 1;
3418-
return pass;
3419-
}
34203426
static int annotate_from_tab(args_t *args, bcf1_t *line)
34213427
{
34223428
int i,j;

0 commit comments

Comments
 (0)