Skip to content

Commit 0ebd3b2

Browse files
committed
Resolve NEWS merge conflict
2 parents 9d3a0ce + 685b3f0 commit 0ebd3b2

File tree

7 files changed

+80
-25
lines changed

7 files changed

+80
-25
lines changed

NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ Changes affecting specific commands:
99
when the site is monoallelic in both query and genotype file. The new option --keep-refs
1010
allows to always include monoallelic sites.
1111

12+
* bcftools norm
13+
14+
- Make the -i/-e filtering option work for all options, such as line merging and
15+
duplication removal (#2415)
16+
1217
* bcftools reheader
1318

1419
- Add options `--samples-list` and `--samples-file` to allow renaming samples from a list of

test/norm.filter.1.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##reference=file:///usr/bio-ref/GRCh38.81/GRCh38.fa
4+
##contig=<ID=1,length=248956422>
5+
#CHROM POS ID REF ALT QUAL FILTER INFO
6+
1 789241 1_789241_A_C A C . . .
7+
1 789242 1_789241_A_C A C . . .
8+
1 789241 1_789241_A_G;1_789241_A_T A G,T . . .
9+
1 789242 1_789241_A_G;1_789241_A_T A G,T . . .

test/norm.filter.2.out

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
##fileformat=VCFv4.2
2+
##FILTER=<ID=PASS,Description="All filters passed">
3+
##reference=file:///usr/bio-ref/GRCh38.81/GRCh38.fa
4+
##contig=<ID=1,length=248956422>
5+
#CHROM POS ID REF ALT QUAL FILTER INFO
6+
1 789241 1_789241_A_C A C . . .
7+
1 789241 1_789241_A_G A G . . .
8+
1 789242 1_789241_A_C A C . . .
9+
1 789242 1_789241_A_G A G . . .

test/norm.filter.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
1_789241_A_G
2+
1_789241_A_T
3+
1_789242_A_G
4+
1_789242_A_T

test/norm.filter.vcf

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
##fileformat=VCFv4.2
2+
##reference=file:///usr/bio-ref/GRCh38.81/GRCh38.fa
3+
##contig=<ID=1,length=248956422>
4+
#CHROM POS ID REF ALT QUAL FILTER INFO
5+
1 789241 1_789241_A_C A C . . .
6+
1 789241 1_789241_A_G A G . . .
7+
1 789241 1_789241_A_T A T . . .
8+
1 789242 1_789241_A_C A C . . .
9+
1 789242 1_789241_A_G A G . . .
10+
1 789242 1_789241_A_T A T . . .

test/test.pl

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,10 @@
285285
run_test(\&test_vcf_query,$opts,in=>'query.header',out=>'query.98.2.out',args=>q[-HH -f'%CHROM %POS[ %SAMPLE][ %DP][ %GT]']);
286286
run_test(\&test_vcf_query,$opts,in=>'query.filter-or',out=>'query.filter-or.1.out',args=>q[-f'[%SAMPLE %DP\\n]' -i'DP=1 || DP=2']);
287287
run_test(\&test_vcf_query,$opts,in=>'query.filter-or',out=>'query.filter-or.2.out',args=>q[-f'[%SAMPLE %DP\\n]' -i'DP=1 | DP=2']);
288+
run_test(\&test_vcf_norm,$opts,in=>'norm.filter',out=>'norm.filter.1.out',args=>qq[-m +both -i 'ID=\@{PATH}/norm.filter.txt']);
289+
run_test(\&test_vcf_norm,$opts,in=>'norm.filter',out=>'norm.filter.1.out',args=>qq[-m +both -i 'ALT!="C"']);
290+
run_test(\&test_vcf_norm,$opts,in=>'norm.filter',out=>'norm.filter.2.out',args=>qq[-d both -i 'ID=\@{PATH}/norm.filter.txt']);
291+
run_test(\&test_vcf_norm,$opts,in=>'norm.filter',out=>'norm.filter.2.out',args=>qq[-d both -i 'ALT!="C"']);
288292
run_test(\&test_vcf_norm,$opts,in=>'norm.6',fai=>'norm.2',out=>'norm.6.1.out',args=>'-c e');
289293
run_test(\&test_vcf_norm,$opts,in=>'norm.breakend.1',fai=>'norm.breakend.1',out=>'norm.breakend.1.1.out',args=>'-m -');
290294
run_test(\&test_vcf_norm,$opts,in=>'norm.sort',out=>'norm.sort.1.out',args=>'-m -');

vcfnorm.c

Lines changed: 39 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ THE SOFTWARE. */
5050
#define CHECK_REF_FIX 8
5151

5252
#define MROWS_SPLIT 1
53-
#define MROWS_MERGE 2
53+
#define MROWS_MERGE 2
5454

5555
// Logic of the filters: include or exclude sites which match the filters?
5656
#define FLT_INCLUDE 1
@@ -80,11 +80,19 @@ typedef struct
8080
}
8181
cmpals_t;
8282

83+
typedef struct
84+
{
85+
bcf1_t *rec;
86+
int pass;
87+
}
88+
line_t;
89+
8390
typedef struct
8491
{
8592
char *tseq, *seq;
8693
int mseq;
87-
bcf1_t **lines, **tmp_lines, **mrows, *mrow_out;
94+
bcf1_t **tmp_lines, **mrows, *mrow_out;
95+
line_t *lines;
8896
int ntmp_lines, mtmp_lines, nmrows, mmrows, mrows_first;
8997
map_t *maps; // mrow map for each buffered record
9098
char **als;
@@ -2165,37 +2173,42 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
21652173
for (i=0; i<n; i++)
21662174
{
21672175
k = rbuf_shift(&args->rbuf);
2176+
if ( !args->lines[k].pass )
2177+
{
2178+
if ( bcf_write1(file, args->out_hdr, args->lines[k].rec)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
2179+
continue;
2180+
}
21682181
if ( args->mrows_op==MROWS_MERGE )
21692182
{
2170-
if ( mrows_can_flush(args, args->lines[k]) )
2183+
if ( mrows_can_flush(args, args->lines[k].rec) )
21712184
{
21722185
while ( (line=mrows_flush(args)) )
21732186
if ( bcf_write1(file, args->out_hdr, line)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
21742187
}
2175-
mrows_push(args, &args->lines[k]);
2188+
mrows_push(args, &args->lines[k].rec);
21762189
continue;
21772190
}
21782191
else if ( args->rmdup )
21792192
{
2180-
int line_type = bcf_get_variant_types(args->lines[k]);
2181-
if ( prev_rid>=0 && prev_rid==args->lines[k]->rid && prev_pos==args->lines[k]->pos )
2193+
int line_type = bcf_get_variant_types(args->lines[k].rec);
2194+
if ( prev_rid>=0 && prev_rid==args->lines[k].rec->rid && prev_pos==args->lines[k].rec->pos )
21822195
{
21832196
if ( args->rmdup & BCF_SR_PAIR_ANY ) { args->nrmdup++; continue; } // rmdup by position only
21842197
if ( args->rmdup & BCF_SR_PAIR_SNPS && line_type&(VCF_SNP|VCF_MNP) && prev_type&(VCF_SNP|VCF_MNP) ) { args->nrmdup++; continue; }
21852198
if ( args->rmdup & BCF_SR_PAIR_INDELS && line_type&(VCF_INDEL) && prev_type&(VCF_INDEL) ) { args->nrmdup++; continue; }
2186-
if ( args->rmdup & BCF_SR_PAIR_EXACT && cmpals_match(args, &args->cmpals_out, args->lines[k]) ) { args->nrmdup++; continue; }
2199+
if ( args->rmdup & BCF_SR_PAIR_EXACT && cmpals_match(args, &args->cmpals_out, args->lines[k].rec) ) { args->nrmdup++; continue; }
21872200
}
21882201
else
21892202
{
2190-
prev_rid = args->lines[k]->rid;
2191-
prev_pos = args->lines[k]->pos;
2203+
prev_rid = args->lines[k].rec->rid;
2204+
prev_pos = args->lines[k].rec->pos;
21922205
prev_type = 0;
21932206
if ( args->rmdup & BCF_SR_PAIR_EXACT ) cmpals_reset(&args->cmpals_out);
21942207
}
21952208
prev_type |= line_type;
2196-
if ( args->rmdup & BCF_SR_PAIR_EXACT ) cmpals_add(args,&args->cmpals_out, args->lines[k]);
2209+
if ( args->rmdup & BCF_SR_PAIR_EXACT ) cmpals_add(args,&args->cmpals_out, args->lines[k].rec);
21972210
}
2198-
if ( bcf_write1(file, args->out_hdr, args->lines[k])!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
2211+
if ( bcf_write1(file, args->out_hdr, args->lines[k].rec)!=0 ) error("[%s] Error: cannot write to %s\n", __func__,args->output_fname);
21992212
}
22002213
if ( args->mrows_op==MROWS_MERGE && !args->rbuf.n )
22012214
{
@@ -2220,7 +2233,7 @@ static void init_data(args_t *args)
22202233
bcf_hdr_printf(args->out_hdr,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">",args->old_rec_tag);
22212234

22222235
rbuf_init(&args->rbuf, 100);
2223-
args->lines = (bcf1_t**) calloc(args->rbuf.m, sizeof(bcf1_t*));
2236+
args->lines = (line_t*) calloc(args->rbuf.m, sizeof(*args->lines));
22242237
if ( args->ref_fname )
22252238
{
22262239
args->fai = fai_load(args->ref_fname);
@@ -2279,7 +2292,7 @@ static void destroy_data(args_t *args)
22792292
cmpals_destroy(&args->cmpals_out);
22802293
int i;
22812294
for (i=0; i<args->rbuf.m; i++)
2282-
if ( args->lines[i] ) bcf_destroy1(args->lines[i]);
2295+
if ( args->lines[i].rec ) bcf_destroy1(args->lines[i].rec);
22832296
free(args->lines);
22842297
for (i=0; i<args->mtmp_lines; i++)
22852298
if ( args->tmp_lines[i] ) bcf_destroy1(args->tmp_lines[i]);
@@ -2357,18 +2370,19 @@ static void normalize_line(args_t *args, bcf1_t *line)
23572370
}
23582371

23592372
// insert into sorted buffer
2360-
rbuf_expand0(&args->rbuf,bcf1_t*,args->rbuf.n+1,args->lines);
2373+
rbuf_expand0(&args->rbuf,line_t,args->rbuf.n+1,args->lines);
23612374
int i,j;
23622375
i = j = rbuf_append(&args->rbuf);
2363-
if ( args->lines[i] ) bcf_destroy(args->lines[i]);
2364-
args->lines[i] = bcf_dup(line);
2376+
if ( args->lines[i].rec ) bcf_destroy(args->lines[i].rec);
2377+
args->lines[i].rec = bcf_dup(line);
2378+
args->lines[i].pass = args->filter_pass;
23652379
while ( rbuf_prev(&args->rbuf,&i) )
23662380
{
2367-
if ( args->lines[i]->rid==args->lines[j]->rid )
2381+
if ( args->lines[i].rec->rid==args->lines[j].rec->rid )
23682382
{
2369-
bcf_unpack(args->lines[i], BCF_UN_STR);
2370-
bcf_unpack(args->lines[j], BCF_UN_STR);
2371-
if ( args->cmp_func(&args->lines[i], &args->lines[j]) > 0) SWAP(bcf1_t*, args->lines[i], args->lines[j]);
2383+
bcf_unpack(args->lines[i].rec, BCF_UN_STR);
2384+
bcf_unpack(args->lines[j].rec, BCF_UN_STR);
2385+
if ( args->cmp_func(&args->lines[i].rec, &args->lines[j].rec) > 0) SWAP(line_t, args->lines[i], args->lines[j]);
23722386
}
23732387
j = i;
23742388
}
@@ -2443,8 +2457,8 @@ static void normalize_vcf(args_t *args)
24432457
if ( done ) break; // no more lines available
24442458
int i = args->rbuf.f;
24452459
int j = rbuf_last(&args->rbuf);
2446-
if ( args->lines[i]->rid != args->lines[j]->rid ) break;
2447-
if ( args->lines[i]->pos != args->lines[j]->pos ) break;
2460+
if ( args->lines[i].rec->rid != args->lines[j].rec->rid ) break;
2461+
if ( args->lines[i].rec->pos != args->lines[j].rec->pos ) break;
24482462
}
24492463
if ( done ) break;
24502464

@@ -2454,16 +2468,16 @@ static void normalize_vcf(args_t *args)
24542468
int i, j = 0;
24552469
for (i=-1; rbuf_next(&args->rbuf,&i); )
24562470
{
2457-
if ( args->lines[ifst]->rid != args->lines[ilast]->rid )
2471+
if ( args->lines[ifst].rec->rid != args->lines[ilast].rec->rid )
24582472
{
24592473
// there are two chromosomes in the buffer, count how many are on the first chromosome
2460-
if ( args->lines[ifst]->rid != args->lines[i]->rid ) break;
2474+
if ( args->lines[ifst].rec->rid != args->lines[i].rec->rid ) break;
24612475
j++;
24622476
continue;
24632477
}
24642478
// there is just one chromosome, flush only lines that are unlikely to change order on
24652479
// realigning (the buf_win constant)
2466-
if ( args->lines[ilast]->pos - args->lines[i]->pos < args->buf_win ) break;
2480+
if ( args->lines[ilast].rec->pos - args->lines[i].rec->pos < args->buf_win ) break;
24672481
j++;
24682482
}
24692483
if ( j>0 ) flush_buffer(args, args->out, j);

0 commit comments

Comments
 (0)