A Rust wrapper for Google Tesseract
Add the following line to your Cargo.toml file:
rust
rusty-tesseract = "1.1.7"
Tesseract: https://github.com/tesseract-ocr/tesseract
Create an Image object by specifying a path or alternatively a DynamicImage from the image crate https://docs.rs/image/latest/image/
```rust // you can use the frompath function let _ = Image::frompath("img/string.png");
// or instantiate Image from a DynamicImage let dynamicimage = ImageReader::open("img/string.png") .unwrap() .decode() .unwrap(); let img = Image::fromdynamicimage(&dynamicimage).unwrap(); ```
Set tesseract parameters using the Args struct.
```rust let default_args = Args::default();
// the default parameters are /* Args { lang: "eng", dpi: Some(150), psm: Some(3), oem: Some(3), } */
// fill your own argument struct if needed
// Optional arguments are ignored if set to None
let mut myargs = Args {
//model language (tesseract default = 'eng')
//available languages can be found by running 'rustytesseract::gettesseractlangs()'
lang: "eng",
//map of config variables
//this example shows a whitelist for the normal alphabet. Multiple arguments are allowed.
//available arguments can be found by running 'rusty_tesseract::get_tesseract_config_parameters()'
config_variables: HashMap::from([(
"tessedit_char_whitelist".into(),
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".into(),
)]),
dpi: Some(150), // specify DPI for input image
psm: Some(6), // define page segmentation mode 6 (i.e. "Assume a single uniform block of text")
oem: Some(3), // define optical character recognition mode 3 (i.e. "Default, based on what is available")
}; ```
Choose either string, bounding box or data output:
```rust // define parameters let mut myargs = Args { lang: "eng", configvariables: HashMap::from([( "tesseditcharwhitelist".into(), "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".into(), )]), dpi: Some(150), psm: Some(6), oem: Some(3) };
// string output let output = rustytesseract::imagetostring(&img, &myargs).unwrap(); println!("The String output is: {:?}", output);
// imagetoboxes creates a BoxOutput containing the parsed output from Tesseract when using the "makebox" Parameter let boxoutput = rustytesseract::imagetoboxes(&img, &myargs).unwrap(); println!( "The first boxfile symbol is: {}", boxoutput.boxes[0].symbol ); println!("The full boxfile output is:\n{}", box_output.output);
// imagetodata creates a DataOutput containing the parsed output from Tesseract when using the "TSV" Parameter let dataoutput = rustytesseract::imagetodata(&img, &myargs).unwrap(); let firsttextline = &dataoutput.data[4]; println!( "The first text is '{}' with confidence {}", firsttextline.text, firsttextline.conf ); println!("The full data output is:\n{}", data_output.output); ```
```rust //tesseract version let tesseractversion = rustytesseract::gettesseractversion().unwrap(); println!("The tesseract version is: {:?}", tesseract_version);
//available languages let tesseractlangs = rustytesseract::gettesseractlangs().unwrap(); println!("The available languages are: {:?}", tesseract_langs);
//available config parameters let parameters = rustytesseract::gettesseractconfigparameters().unwrap(); println!("Example config parameter: {}", parameters.config_parameters.first().unwrap()); ```
git checkout -b my-feature-branch-name
)git commit -m 'commit message' <changed-file>
)git push origin my-feature-branch-name
)