Skip to content

Commit 5003c65

Browse files
committed
Ensure INFO and FORMAT keys are unique ignoring case
1 parent 46093a1 commit 5003c65

File tree

1 file changed

+8
-7
lines changed

1 file changed

+8
-7
lines changed

hypothesis_vcf/strategies.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ def get_header(self):
6767
"SOMATIC",
6868
"VALIDATED",
6969
"1000G",
70-
# conflicts with 'variant_id' variable; see RESERVED_VARIABLE_NAMES in sgkit
71-
"id",
7270
]
7371

7472
# [Table 2: Reserved genotype keys]
@@ -101,9 +99,10 @@ def vcf_field_keys(category):
10199
field_key_regex = r"[A-Za-z_][0-9A-Za-z_.]"
102100

103101
def is_reserved_key(key):
104-
return (category == "INFO" and key in RESERVED_INFO_KEYS) or (
105-
category == "FORMAT" and key in RESERVED_FORMAT_KEYS
106-
)
102+
# 'id' is reserved since it conflicts with 'variant_id' variable in VCF Zarr
103+
return (
104+
category == "INFO" and key in RESERVED_INFO_KEYS or key.lower() == "id"
105+
) or (category == "FORMAT" and key in RESERVED_FORMAT_KEYS)
107106

108107
return from_regex(field_key_regex, fullmatch=True).filter(
109108
lambda key: not is_reserved_key(key)
@@ -275,18 +274,20 @@ def vcf(
275274
-------
276275
A Hypothesis strategy to generate a VCF file, including header, as a string.
277276
"""
277+
# ensure INFO and FORMAT keys are unique ignoring case to avoid macOS filesystem
278+
# case-sensitivity issue for VCF Zarr
278279
info_fields = draw(
279280
lists(
280281
vcf_fields("INFO", max_number=max_number),
281282
max_size=max_info_fields,
282-
unique_by=lambda f: f.vcf_key,
283+
unique_by=lambda f: f.vcf_key.lower(),
283284
)
284285
)
285286
format_fields = draw(
286287
lists(
287288
vcf_fields("FORMAT", max_number=max_number),
288289
max_size=max_format_fields,
289-
unique_by=lambda f: f.vcf_key,
290+
unique_by=lambda f: f.vcf_key.lower(),
290291
)
291292
)
292293
sample_ids = draw(

0 commit comments

Comments
 (0)