Vibrato is a fast implementation of tokenization (or morphological analysis) based on the viterbi algorithm.
```rust use std::fs::File; use std::io::{BufRead, BufReader};
use vibrato::{Dictionary, Tokenizer};
let file = File::open("src/tests/resources/system.dic").unwrap(); let dict = Dictionary::read(BufReader::new(file)).unwrap();
let tokenizer = vibrato::Tokenizer::new(dict); let mut worker = tokenizer.new_worker();
worker.resetsentence("京都東京都").unwrap(); worker.tokenize(); asserteq!(worker.num_tokens(), 2);
let t0 = worker.token(0); asserteq!(t0.surface(), "京都"); asserteq!(t0.rangechar(), 0..2); asserteq!(t0.rangebyte(), 0..6); asserteq!(t0.feature(), "京都,名詞,固有名詞,地名,一般,,,キョウト,京都,,A,,,,1/5");
let t1 = worker.token(1); asserteq!(t1.surface(), "東京都"); asserteq!(t1.rangechar(), 2..5); asserteq!(t1.rangebyte(), 6..15); asserteq!(t1.feature(), "東京都,名詞,固有名詞,地名,一般,,,トウキョウト,東京都,,B,5/9,,5/9,*"); ```
Licensed under either of
at your option.
Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions.