Suffix array construction library for huge strings that require space larger than available memory
suffine also has a command-line interface.
use suffine::IndexBuilder;
let text = "I scream, you scream, we all scream for ice cream!";
let index = IndexBuilder::new(text)
.block_size(1024 * 1024)
.build()
.unwrap();
assert_eq!(index.positions("cream"), &[30, 44, 15, 3]);
Or you can directly build on a disk:
use std::fs::File;
use std::io::BufWriter;
let writer = BufWriter::new(File::create("index").unwrap());
IndexBuilder::new(text)
.block_size(1024 * 1024)
.build_to_writer_native_endian(writer)
.unwrap();
Later you can load the index:
use std::fs;
use suffine::Index;
let bytes = fs::read("index").unwrap();
let index = Index::from_bytes(text, &bytes);
suffine also has MultiDocIndex
:
use suffine::MultiDocIndexBuilder;
let text = "Roses are red,
Violets are blue,
sugar is sweet,
And so are you.";
let multi_doc_index = MultiDocIndexBuilder::new(text)
.delimiter('\n')
.build()
.unwrap();
let result = multi_doc_index
.doc_positions("are")
.collect::<Vec<(u32, u32)>>();
assert_eq!(result, [(1, 8), (0, 6), (3, 7)]);