Open Source Love Crates.io Crates.io Build Status

GitHub forks GitHub stars

ik-rs

ik-analyzer for Rust

support Tantivy

Usage

Chinese Segment

rust let mut ik = IKSegmenter::new(); let text = "中华人民共和国"; let tokens = ik.tokenize(text, TokenMode::SEARCH); // TokenMode::INDEX for token in tokens { println!("{:?}", token); }

Usage for Tantivy

todo ```rust

mod tests { use ikrs::core::iksegmenter::TokenMode; use ik_rs::IkTokenizer; use tantivy::Index; use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions};

#[test]
fn it_works() {
    let mut schema_builder = Schema::builder();
    let text_field_indexing = TextFieldIndexing::default()
        .set_tokenizer("ik-index")
        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
    let text_options = TextOptions::default()
        .set_indexing_options(text_field_indexing)
        .set_stored();
    schema_builder.add_text_field("title", text_options);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema.clone());
    index
        .tokenizers()
        .register("ik-index", IkTokenizer::new(TokenMode::INDEX));
    index
        .tokenizers()
        .register("ik-search", IkTokenizer::new(TokenMode::SEARCH));
}

} ```