A Tokenizer for Tantivy, based on Lindera.
The following products are required to build:
text
% make build
```rust use lindera_tantivy::tokenizer::*; use tantivy::tokenizer::Tokenizer;
fn main() -> std::io::Result<()> {
let tokenizer = LinderaTokenizer::new("normal", "");
let mut stream = tokenizer.tokenstream("すもももももももものうち");
{
let token = stream.next().unwrap();
asserteq!(token.text, "すもも");
asserteq!(token.offsetfrom, 0);
asserteq!(token.offsetto, 9);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "も");
asserteq!(token.offsetfrom, 9);
asserteq!(token.offsetto, 12);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "もも");
asserteq!(token.offsetfrom, 12);
asserteq!(token.offsetto, 18);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "も");
asserteq!(token.offsetfrom, 18);
asserteq!(token.offsetto, 21);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "もも");
asserteq!(token.offsetfrom, 21);
asserteq!(token.offsetto, 27);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "の");
asserteq!(token.offsetfrom, 27);
asserteq!(token.offsetto, 30);
}
{
let token = stream.next().unwrap();
asserteq!(token.text, "うち");
asserteq!(token.offsetfrom, 30);
asserteq!(token.offsetto, 36);
}
assert!(stream.next().is_none());
Ok(())
} ```
The API reference is available. Please see following URL: - lindera-tantivy