Crates.io License Open Source Love Build Status

GitHub forks GitHub stars

ik-rs

ik-analyzer for Rust

support Tantivy

Usage

Chinese Segment

rust let mut ik = IKSegmenter::new(); let text = "中华人民共和国"; let tokens = ik.tokenize(text, TokenMode::INDEX); // TokenMode::SEARCH for token in tokens { println!("{:?}", token); }

Usage for Tantivy

```rust

mod tests { use ikrs::core::iksegmenter::TokenMode; use ik_rs::IkTokenizer; use tantivy::Index; use tantivy::schema::{IndexRecordOption, Schema, TextFieldIndexing, TextOptions};

#[test]
fn it_works() {
    let mut schema_builder = Schema::builder();
    let text_field_indexing = TextFieldIndexing::default()
        .set_tokenizer("ik-index")
        .set_index_option(IndexRecordOption::WithFreqsAndPositions);
    let text_options = TextOptions::default()
        .set_indexing_options(text_field_indexing)
        .set_stored();
    schema_builder.add_text_field("title", text_options);
    let schema = schema_builder.build();
    let index = Index::create_in_ram(schema.clone());
    index
        .tokenizers()
        .register("ik-index", IkTokenizer::new(TokenMode::INDEX));
    index
        .tokenizers()
        .register("ik-search", IkTokenizer::new(TokenMode::SEARCH));
}

}

```

Welcome rust developer and search engine developer join us, and maintain this project together!

you can PR or submit issue...

and star⭐️ or fork this project to support me!