@@ -67,8 +67,6 @@ def get_header(self):
67
67
"SOMATIC" ,
68
68
"VALIDATED" ,
69
69
"1000G" ,
70
- # conflicts with 'variant_id' variable; see RESERVED_VARIABLE_NAMES in sgkit
71
- "id" ,
72
70
]
73
71
74
72
# [Table 2: Reserved genotype keys]
@@ -101,9 +99,10 @@ def vcf_field_keys(category):
101
99
field_key_regex = r"[A-Za-z_][0-9A-Za-z_.]"
102
100
103
101
def is_reserved_key (key ):
104
- return (category == "INFO" and key in RESERVED_INFO_KEYS ) or (
105
- category == "FORMAT" and key in RESERVED_FORMAT_KEYS
106
- )
102
+ # 'id' is reserved since it conflicts with 'variant_id' variable in VCF Zarr
103
+ return (
104
+ category == "INFO" and key in RESERVED_INFO_KEYS or key .lower () == "id"
105
+ ) or (category == "FORMAT" and key in RESERVED_FORMAT_KEYS )
107
106
108
107
return from_regex (field_key_regex , fullmatch = True ).filter (
109
108
lambda key : not is_reserved_key (key )
@@ -275,18 +274,20 @@ def vcf(
275
274
-------
276
275
A Hypothesis strategy to generate a VCF file, including header, as a string.
277
276
"""
277
+ # ensure INFO and FORMAT keys are unique ignoring case to avoid macOS filesystem
278
+ # case-sensitivity issue for VCF Zarr
278
279
info_fields = draw (
279
280
lists (
280
281
vcf_fields ("INFO" , max_number = max_number ),
281
282
max_size = max_info_fields ,
282
- unique_by = lambda f : f .vcf_key ,
283
+ unique_by = lambda f : f .vcf_key . lower () ,
283
284
)
284
285
)
285
286
format_fields = draw (
286
287
lists (
287
288
vcf_fields ("FORMAT" , max_number = max_number ),
288
289
max_size = max_format_fields ,
289
- unique_by = lambda f : f .vcf_key ,
290
+ unique_by = lambda f : f .vcf_key . lower () ,
290
291
)
291
292
)
292
293
sample_ids = draw (
0 commit comments