Khmer, Lao, Myanmar, and Thai word segmentation/breaking library and command line
echo "กากกา" | wordcut
echo "กากกา" | wordcut -d <path to dictionary>
Bash
echo "ພາສາລາວມີ" | wordcut -l lao
Bash
echo នៅក្នុងការប្រកបរបរអាជីវកម្ | wordcut -l khmer
Bash
echo မြန်မာမှာ | wordcut -l myanmar
```Bash
set -x INPUT=thwik-head1m.txt
for i in {1..10} do { time nlpo3 segment < $INPUT > o3 ; } 2>> bencho3.txt { time wordcut < $INPUT > wc.txt ; } 2>> benchwc.txt done ```
[root@exper1 ~]# grep real bench_o3.txt
real 3m26.884s
real 3m15.001s
real 3m12.829s
real 3m11.998s
real 3m12.399s
real 3m13.829s
real 3m14.506s
real 3m9.198s
real 3m6.749s
real 3m8.729s
[root@exper1 ~]# grep real bench_wc.txt
real 1m41.611s
real 1m40.262s
real 1m40.488s
real 1m40.765s
real 1m39.385s
real 1m41.002s
real 1m38.292s
real 1m35.906s
real 1m40.263s
real 1m36.523s