@@ -50,7 +50,7 @@ THE SOFTWARE. */
50
50
#define CHECK_REF_FIX 8
51
51
52
52
#define MROWS_SPLIT 1
53
- #define MROWS_MERGE 2
53
+ #define MROWS_MERGE 2
54
54
55
55
// Logic of the filters: include or exclude sites which match the filters?
56
56
#define FLT_INCLUDE 1
@@ -80,11 +80,19 @@ typedef struct
80
80
}
81
81
cmpals_t ;
82
82
83
+ typedef struct
84
+ {
85
+ bcf1_t * rec ;
86
+ int pass ;
87
+ }
88
+ line_t ;
89
+
83
90
typedef struct
84
91
{
85
92
char * tseq , * seq ;
86
93
int mseq ;
87
- bcf1_t * * lines , * * tmp_lines , * * mrows , * mrow_out ;
94
+ bcf1_t * * tmp_lines , * * mrows , * mrow_out ;
95
+ line_t * lines ;
88
96
int ntmp_lines , mtmp_lines , nmrows , mmrows , mrows_first ;
89
97
map_t * maps ; // mrow map for each buffered record
90
98
char * * als ;
@@ -2165,37 +2173,42 @@ static void flush_buffer(args_t *args, htsFile *file, int n)
2165
2173
for (i = 0 ; i < n ; i ++ )
2166
2174
{
2167
2175
k = rbuf_shift (& args -> rbuf );
2176
+ if ( !args -> lines [k ].pass )
2177
+ {
2178
+ if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ].rec )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2179
+ continue ;
2180
+ }
2168
2181
if ( args -> mrows_op == MROWS_MERGE )
2169
2182
{
2170
- if ( mrows_can_flush (args , args -> lines [k ]) )
2183
+ if ( mrows_can_flush (args , args -> lines [k ]. rec ) )
2171
2184
{
2172
2185
while ( (line = mrows_flush (args )) )
2173
2186
if ( bcf_write1 (file , args -> out_hdr , line )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2174
2187
}
2175
- mrows_push (args , & args -> lines [k ]);
2188
+ mrows_push (args , & args -> lines [k ]. rec );
2176
2189
continue ;
2177
2190
}
2178
2191
else if ( args -> rmdup )
2179
2192
{
2180
- int line_type = bcf_get_variant_types (args -> lines [k ]);
2181
- if ( prev_rid >=0 && prev_rid == args -> lines [k ]-> rid && prev_pos == args -> lines [k ]-> pos )
2193
+ int line_type = bcf_get_variant_types (args -> lines [k ]. rec );
2194
+ if ( prev_rid >=0 && prev_rid == args -> lines [k ]. rec -> rid && prev_pos == args -> lines [k ]. rec -> pos )
2182
2195
{
2183
2196
if ( args -> rmdup & BCF_SR_PAIR_ANY ) { args -> nrmdup ++ ; continue ; } // rmdup by position only
2184
2197
if ( args -> rmdup & BCF_SR_PAIR_SNPS && line_type & (VCF_SNP |VCF_MNP ) && prev_type & (VCF_SNP |VCF_MNP ) ) { args -> nrmdup ++ ; continue ; }
2185
2198
if ( args -> rmdup & BCF_SR_PAIR_INDELS && line_type & (VCF_INDEL ) && prev_type & (VCF_INDEL ) ) { args -> nrmdup ++ ; continue ; }
2186
- if ( args -> rmdup & BCF_SR_PAIR_EXACT && cmpals_match (args , & args -> cmpals_out , args -> lines [k ]) ) { args -> nrmdup ++ ; continue ; }
2199
+ if ( args -> rmdup & BCF_SR_PAIR_EXACT && cmpals_match (args , & args -> cmpals_out , args -> lines [k ]. rec ) ) { args -> nrmdup ++ ; continue ; }
2187
2200
}
2188
2201
else
2189
2202
{
2190
- prev_rid = args -> lines [k ]-> rid ;
2191
- prev_pos = args -> lines [k ]-> pos ;
2203
+ prev_rid = args -> lines [k ]. rec -> rid ;
2204
+ prev_pos = args -> lines [k ]. rec -> pos ;
2192
2205
prev_type = 0 ;
2193
2206
if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_reset (& args -> cmpals_out );
2194
2207
}
2195
2208
prev_type |= line_type ;
2196
- if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_add (args ,& args -> cmpals_out , args -> lines [k ]);
2209
+ if ( args -> rmdup & BCF_SR_PAIR_EXACT ) cmpals_add (args ,& args -> cmpals_out , args -> lines [k ]. rec );
2197
2210
}
2198
- if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ])!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2211
+ if ( bcf_write1 (file , args -> out_hdr , args -> lines [k ]. rec )!= 0 ) error ("[%s] Error: cannot write to %s\n" , __func__ ,args -> output_fname );
2199
2212
}
2200
2213
if ( args -> mrows_op == MROWS_MERGE && !args -> rbuf .n )
2201
2214
{
@@ -2220,7 +2233,7 @@ static void init_data(args_t *args)
2220
2233
bcf_hdr_printf (args -> out_hdr ,"##INFO=<ID=%s,Number=1,Type=String,Description=\"Original variant. Format: CHR|POS|REF|ALT|USED_ALT_IDX\">" ,args -> old_rec_tag );
2221
2234
2222
2235
rbuf_init (& args -> rbuf , 100 );
2223
- args -> lines = (bcf1_t * * ) calloc (args -> rbuf .m , sizeof (bcf1_t * ));
2236
+ args -> lines = (line_t * ) calloc (args -> rbuf .m , sizeof (* args -> lines ));
2224
2237
if ( args -> ref_fname )
2225
2238
{
2226
2239
args -> fai = fai_load (args -> ref_fname );
@@ -2279,7 +2292,7 @@ static void destroy_data(args_t *args)
2279
2292
cmpals_destroy (& args -> cmpals_out );
2280
2293
int i ;
2281
2294
for (i = 0 ; i < args -> rbuf .m ; i ++ )
2282
- if ( args -> lines [i ] ) bcf_destroy1 (args -> lines [i ]);
2295
+ if ( args -> lines [i ]. rec ) bcf_destroy1 (args -> lines [i ]. rec );
2283
2296
free (args -> lines );
2284
2297
for (i = 0 ; i < args -> mtmp_lines ; i ++ )
2285
2298
if ( args -> tmp_lines [i ] ) bcf_destroy1 (args -> tmp_lines [i ]);
@@ -2357,18 +2370,19 @@ static void normalize_line(args_t *args, bcf1_t *line)
2357
2370
}
2358
2371
2359
2372
// insert into sorted buffer
2360
- rbuf_expand0 (& args -> rbuf ,bcf1_t * ,args -> rbuf .n + 1 ,args -> lines );
2373
+ rbuf_expand0 (& args -> rbuf ,line_t ,args -> rbuf .n + 1 ,args -> lines );
2361
2374
int i ,j ;
2362
2375
i = j = rbuf_append (& args -> rbuf );
2363
- if ( args -> lines [i ] ) bcf_destroy (args -> lines [i ]);
2364
- args -> lines [i ] = bcf_dup (line );
2376
+ if ( args -> lines [i ].rec ) bcf_destroy (args -> lines [i ].rec );
2377
+ args -> lines [i ].rec = bcf_dup (line );
2378
+ args -> lines [i ].pass = args -> filter_pass ;
2365
2379
while ( rbuf_prev (& args -> rbuf ,& i ) )
2366
2380
{
2367
- if ( args -> lines [i ]-> rid == args -> lines [j ]-> rid )
2381
+ if ( args -> lines [i ]. rec -> rid == args -> lines [j ]. rec -> rid )
2368
2382
{
2369
- bcf_unpack (args -> lines [i ], BCF_UN_STR );
2370
- bcf_unpack (args -> lines [j ], BCF_UN_STR );
2371
- if ( args -> cmp_func (& args -> lines [i ], & args -> lines [j ]) > 0 ) SWAP (bcf1_t * , args -> lines [i ], args -> lines [j ]);
2383
+ bcf_unpack (args -> lines [i ]. rec , BCF_UN_STR );
2384
+ bcf_unpack (args -> lines [j ]. rec , BCF_UN_STR );
2385
+ if ( args -> cmp_func (& args -> lines [i ]. rec , & args -> lines [j ]. rec ) > 0 ) SWAP (line_t , args -> lines [i ], args -> lines [j ]);
2372
2386
}
2373
2387
j = i ;
2374
2388
}
@@ -2443,8 +2457,8 @@ static void normalize_vcf(args_t *args)
2443
2457
if ( done ) break ; // no more lines available
2444
2458
int i = args -> rbuf .f ;
2445
2459
int j = rbuf_last (& args -> rbuf );
2446
- if ( args -> lines [i ]-> rid != args -> lines [j ]-> rid ) break ;
2447
- if ( args -> lines [i ]-> pos != args -> lines [j ]-> pos ) break ;
2460
+ if ( args -> lines [i ]. rec -> rid != args -> lines [j ]. rec -> rid ) break ;
2461
+ if ( args -> lines [i ]. rec -> pos != args -> lines [j ]. rec -> pos ) break ;
2448
2462
}
2449
2463
if ( done ) break ;
2450
2464
@@ -2454,16 +2468,16 @@ static void normalize_vcf(args_t *args)
2454
2468
int i , j = 0 ;
2455
2469
for (i = -1 ; rbuf_next (& args -> rbuf ,& i ); )
2456
2470
{
2457
- if ( args -> lines [ifst ]-> rid != args -> lines [ilast ]-> rid )
2471
+ if ( args -> lines [ifst ]. rec -> rid != args -> lines [ilast ]. rec -> rid )
2458
2472
{
2459
2473
// there are two chromosomes in the buffer, count how many are on the first chromosome
2460
- if ( args -> lines [ifst ]-> rid != args -> lines [i ]-> rid ) break ;
2474
+ if ( args -> lines [ifst ]. rec -> rid != args -> lines [i ]. rec -> rid ) break ;
2461
2475
j ++ ;
2462
2476
continue ;
2463
2477
}
2464
2478
// there is just one chromosome, flush only lines that are unlikely to change order on
2465
2479
// realigning (the buf_win constant)
2466
- if ( args -> lines [ilast ]-> pos - args -> lines [i ]-> pos < args -> buf_win ) break ;
2480
+ if ( args -> lines [ilast ]. rec -> pos - args -> lines [i ]. rec -> pos < args -> buf_win ) break ;
2467
2481
j ++ ;
2468
2482
}
2469
2483
if ( j > 0 ) flush_buffer (args , args -> out , j );
0 commit comments