Comparing strings according to language-dependent conventions.
This module is published as its own crate (icu_collator
)
and as part of the icu
crate. See the latter for more details on the ICU4X project.
Collator
is the main structure of the component. It accepts a set of arguments
which allow it to collect necessary data from the data provider, and once
instantiated, can be used to compare strings.
Refer to the ICU User Guide sections for Collation that give an introduction and explain basic concepts.
As its most basic purpose, Collator
offers locale-aware ordering:
```rust use core::cmp::Ordering; use icu::collator::*; use icu::locid::{locale, Locale};
let localees: Locale = locale!("es-u-co-trad"); let mut options = CollatorOptions::new(); options.strength = Some(Strength::Primary); let collatores: Collator = Collator::trynewunstable( &icutestdata::unstable(), &localees.into(), options, ) .unwrap();
// "pollo" > "polvo" in traditional Spanish asserteq!(collatores.compare("pollo", "polvo"), Ordering::Greater);
let localeen: Locale = locale!("en"); let mut options = CollatorOptions::new(); options.strength = Some(Strength::Primary); let collatoren: Collator = Collator::trynewunstable( &icutestdata::unstable(), &localeen.into(), options, ) .unwrap();
// "pollo" < "polvo" according to English rules asserteq!(collatoren.compare("pollo", "polvo"), Ordering::Less); ```
CollatorOptions
The [CollatorOptions
] struct configures specific custom behavior for the Collator
. See docs
for [CollatorOptions
] for more details. Some basic descriptions and examples are below.
The degree of sensitivity in how to determine that strings are distinct.
```rust use core::cmp::Ordering; use icu::collator::*;
// Primary Level
let mut optionsl1 = CollatorOptions::new(); optionsl1.strength = Some(Strength::Primary); let collatorl1: Collator = Collator::trynewunstable( &icutestdata::unstable(), &Default::default(), options_l1, ) .unwrap();
asserteq!(collatorl1.compare("a", "b"), Ordering::Less); // primary asserteq!(collatorl1.compare("as", "às"), Ordering::Equal); // secondary asserteq!(collatorl1.compare("às", "at"), Ordering::Less); asserteq!(collatorl1.compare("ao", "Ao"), Ordering::Equal); // tertiary asserteq!(collatorl1.compare("Ao", "aò"), Ordering::Equal); asserteq!(collatorl1.compare("A", "Ⓐ"), Ordering::Equal);
// Secondary Level
let mut optionsl2 = CollatorOptions::new(); optionsl2.strength = Some(Strength::Secondary); let collatorl2: Collator = Collator::trynewunstable( &icutestdata::unstable(), &Default::default(), options_l2, ) .unwrap();
asserteq!(collatorl2.compare("a", "b"), Ordering::Less); // primary asserteq!(collatorl2.compare("as", "às"), Ordering::Less); // secondary asserteq!(collatorl2.compare("às", "at"), Ordering::Less); asserteq!(collatorl2.compare("ao", "Ao"), Ordering::Equal); // tertiary asserteq!(collatorl2.compare("Ao", "aò"), Ordering::Less); asserteq!(collatorl2.compare("A", "Ⓐ"), Ordering::Equal);
// Tertiary Level
let mut optionsl3 = CollatorOptions::new(); optionsl3.strength = Some(Strength::Tertiary); let collatorl3: Collator = Collator::trynewunstable( &icutestdata::unstable(), &Default::default(), options_l3, ) .unwrap();
asserteq!(collatorl3.compare("a", "b"), Ordering::Less); // primary asserteq!(collatorl3.compare("as", "às"), Ordering::Less); // secondary asserteq!(collatorl3.compare("às", "at"), Ordering::Less); asserteq!(collatorl3.compare("ao", "Ao"), Ordering::Less); // tertiary asserteq!(collatorl3.compare("Ao", "aò"), Ordering::Less); asserteq!(collatorl3.compare("A", "Ⓐ"), Ordering::Less); ```
Allows alternate handling for certain customized collation orderings, including the option to ignore the special handling for the strings of such customizations. Specifically, alternate handling is used to control the handling of the so-called variable characters in the Unicode Collation Algorithm: whitespace, punctuation and symbols.
Note that AlternateHandling::ShiftTrimmed
and AlternateHandling::Blanked
are
unimplemented. The default is AlternateHandling::NonIgnorable
, except
for Thai, whose default is AlternateHandling::Shifted
.
```rust use core::cmp::Ordering; use icu::collator::*;
// If alternate handling is set to NonIgnorable
, then differences among
// these characters are of the same importance as differences among letters.
let mut options3n = CollatorOptions::new(); options3n.strength = Some(Strength::Tertiary); options3n.alternatehandling = Some(AlternateHandling::NonIgnorable); let collator3n: Collator = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options_3n).unwrap();
asserteq!(collator3n.compare("di Silva", "Di Silva"), Ordering::Less); asserteq!(collator3n.compare("Di Silva", "diSilva"), Ordering::Less); asserteq!(collator3n.compare("diSilva", "U.S.A."), Ordering::Less); asserteq!(collator3n.compare("U.S.A.", "USA"), Ordering::Less);
// If alternate handling is set to Shifted
, then these characters are of only minor
// importance. The Shifted value is often used in combination with Strength
// set to Quaternary.
let mut options3s = CollatorOptions::new(); options3s.strength = Some(Strength::Tertiary); options3s.alternatehandling = Some(AlternateHandling::Shifted); let collator3s: Collator = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options_3s).unwrap();
asserteq!(collator3s.compare("di Silva", "diSilva"), Ordering::Equal); asserteq!(collator3s.compare("diSilva", "Di Silva"), Ordering::Less); asserteq!(collator3s.compare("Di Silva", "U.S.A."), Ordering::Less); asserteq!(collator3s.compare("U.S.A.", "USA"), Ordering::Equal);
let mut options4s = CollatorOptions::new(); options4s.strength = Some(Strength::Quaternary); options4s.alternatehandling = Some(AlternateHandling::Shifted); let collator4s: Collator = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options_4s).unwrap();
asserteq!(collator4s.compare("di Silva", "diSilva"), Ordering::Less); asserteq!(collator4s.compare("diSilva", "Di Silva"), Ordering::Less); asserteq!(collator4s.compare("Di Silva", "U.S.A."), Ordering::Less); asserteq!(collator4s.compare("U.S.A.", "USA"), Ordering::Less); ```
Whether to distinguish case in sorting, even for sorting levels higher than tertiary, without having to use tertiary level just to enable case level differences.
```rust use core::cmp::Ordering; use icu::collator::*;
// Primary
let mut options = CollatorOptions::new(); options.strength = Some(Strength::Primary); options.caselevel = Some(CaseLevel::Off); let primary = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options).unwrap();
asserteq!(primary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Equal); asserteq!(primary.compare("dejavu", "dejAvu"), Ordering::Equal); assert_eq!(primary.compare("dejavu", "déjavu"), Ordering::Equal);
// Primary with case level on
options.strength = Some(Strength::Primary); options.caselevel = Some(CaseLevel::On); let primaryandcase = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options).unwrap();
asserteq!(primaryandcase.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Equal); asserteq!(primaryandcase.compare("dejavu", "dejAvu"), Ordering::Equal); asserteq!(primaryand_case.compare("dejavu", "déjavu"), Ordering::Equal);
// Secondary with case level on
options.strength = Some(Strength::Secondary); options.caselevel = Some(CaseLevel::On); let secondaryandcase = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options).unwrap();
asserteq!(secondaryandcase.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Equal); asserteq!(secondaryandcase.compare("dejavu", "dejAvu"), Ordering::Equal); asserteq!(secondaryand_case.compare("dejavu", "déjavu"), Ordering::Less); // secondary difference
// Tertiary
options.strength = Some(Strength::Tertiary); options.caselevel = Some(CaseLevel::Off); let tertiary = Collator::trynewunstable(&icutestdata::unstable(), &Default::default(), options).unwrap();
asserteq!(tertiary.compare("ⓓⓔⓐⓛ", "DEAL"), Ordering::Less); asserteq!(tertiary.compare("dejavu", "dejAvu"), Ordering::Less); assert_eq!(tertiary.compare("dejavu", "déjavu"), Ordering::Less); ```
Whether to swap the ordering of uppercase and lowercase.
Compare the second level in backward order. The default is false
(off), except for Canadian
French.
When set to true
(on), any sequence of decimal
digits is sorted at a primary level accoding to the
numeric value.
```rust use core::cmp::Ordering; use icu::collator::*;
// Numerical sorting off
let mut optionsnumoff = CollatorOptions::new(); optionsnumoff.numeric = Some(Numeric::Off); let collatornumoff: Collator = Collator::trynewunstable( &icutestdata::unstable(), &Default::default(), optionsnumoff, ) .unwrap(); asserteq!(collatornumoff.compare("a10b", "a2b"), Ordering::Less);
// Numerical sorting on
let mut optionsnumon = CollatorOptions::new(); optionsnumon.numeric = Some(Numeric::On); let collatornumon: Collator = Collator::trynewunstable( &icutestdata::unstable(), &Default::default(), optionsnumon, ) .unwrap(); asserteq!(collatornumon.compare("a10b", "a2b"), Ordering::Greater); ```
For more information on development, authorship, contributing etc. please visit ICU4X home page
.