A Rust wrapper for Google Tesseract
Add the following line to your Cargo.toml file:
rust
rusty-tesseract = "1.1.1"
Tesseract: https://github.com/tesseract-ocr/tesseract
Create an Image object by specifying a path or alternatively a DynamicImage from the image crate https://docs.rs/image/latest/image/
```rust // you can use the frompath function let _ = Image::frompath("img/string.png");
// or instantiate Image from a DynamicImage let dynamicimage = ImageReader::open("img/string.png") .unwrap() .decode() .unwrap(); let img = Image::fromdynamicimage(&dynamicimage).unwrap(); ```
Set tesseract parameters using the Args struct.
```rust let default_args = Args::default();
// the default parameters are /* Args { lang: "eng", dpi: 150, psm: 3, oem: 3, } */
// fill your own argument struct if needed let mut myargs = Args { lang: "eng", // model language (tesseract default = 'eng') // use configvariables: "=" if no config variables are required configvariables: "'tesseditchar_whitelist=abcdefghijklmnopqrstuvwABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'", // -c VAR=VALUE Set value for config variables. This example shows a whitelist for the normal alphabet.Multiple -c arguments are allowed. Allowed commands can be found by running 'tesseract --print-parameters' dpi: 150, // specify DPI for input image psm: 3, // define page segmentation mode 6 (i.e. "Assume a single uniform block of text") oem: 3, // define optical character recognition mode 3 (i.e. "Default, based on what is available") }; ```
Choose either string, bounding box or data output:
```rust // define parameters let mut myargs = Args { lang: "eng", configvariables: "'tesseditcharwhitelist=abcdefghijklmnopqrstuvwABCDEFGHIJKLMNOPQRSTUVWXYZ'", dpi: 150, psm: 6, oem: 3 };
// string output let output = rustytesseract::imagetostring(&img, &myargs).unwrap(); println!("The String output is: {:?}", output);
// imagetoboxes creates a BoxOutput containing the parsed output from Tesseract when using the "makebox" Parameter let boxoutput = rustytesseract::imagetoboxes(&img, &myargs).unwrap(); println!( "The first boxfile symbol is: {}", boxoutput.boxes[0].symbol ); println!("The full boxfile output is:\n{}", box_output.output);
// imagetodata creates a DataOutput containing the parsed output from Tesseract when using the "TSV" Parameter let dataoutput = rustytesseract::imagetodata(&img, &myargs).unwrap(); let firsttextline = &dataoutput.data[4]; println!( "The first text is '{}' with confidence {}", firsttextline.text, firsttextline.conf ); println!("The full data output is:\n{}", data_output.output); ```
rust
let tesseract_version = rusty_tesseract::get_tesseract_version().unwrap();
println!("The tesseract version is: {:?}", tesseract_version);
git checkout -b my-feature-branch-name
)git commit -m 'commit message' <changed-file>
)git push origin my-feature-branch-name
)