Manticore provides built-in support for indexing CJK texts, allowing you to process CJK texts in two different ways:
CREATE TABLE products(title text, price float) charset_table = 'cjk' morphology = 'icu_chinese'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'cjk' morphology = 'icu_chinese'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'cjk',
'morphology' => 'icu_chinese'
]);
'CREATE TABLE products(title text, price float) charset_table = \'cjk\' morphology = \'icu_chinese\'') utilsApi.sql(
= await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'cjk\' morphology = \'icu_chinese\''); res
sql("CREATE TABLE products(title text, price float) charset_table = 'cjk' morphology = 'icu_chinese'"); utilsApi.
table products {
charset_table = cjk
morphology = icu_chinese
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
chinese
, korean
, japanese
) that can be used, or you can use the common cjk
character set table.CREATE TABLE products(title text, price float) charset_table = 'non_cjk' ngram_len = '1' ngram_chars = 'cjk'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'non_cjk' ngram_len = '1' ngram_chars = 'cjk'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'non_cjk',
'ngram_len' => '1',
'ngram_chars' => 'cjk'
]);
'CREATE TABLE products(title text, price float) charset_table = \'non_cjk\' ngram_len = \'1\' ngram_chars = \'cjk\'') utilsApi.sql(
= await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'non_cjk\' ngram_len = \'1\' ngram_chars = \'cjk\''); res
sql("CREATE TABLE products(title text, price float) charset_table = 'non_cjk' ngram_len = '1' ngram_chars = 'cjk'"); utilsApi.
table products {
charset_table = non_cjk
ngram_len = 1
ngram_chars = cjk
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}
Additionally, there is built-in support for Chinese stopwords with the alias zh
.
CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'
POST /cli -d "
CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'"
$index = new \Manticoresearch\Index($client);
$index->setName('products');
$index->create([
'title'=>['type'=>'text'],
'price'=>['type'=>'float']
],[
'charset_table' => 'chinese',
'morphology' => 'icu_chinese',
'stopwords' => 'zh'
]);
'CREATE TABLE products(title text, price float) charset_table = \'chinese\' morphology = \'icu_chinese\' stopwords = \'zh\'') utilsApi.sql(
= await utilsApi.sql('CREATE TABLE products(title text, price float) charset_table = \'chinese\' morphology = \'icu_chinese\' stopwords = \'zh\''); res
sql("CREATE TABLE products(title text, price float) charset_table = 'chinese' morphology = 'icu_chinese' stopwords = 'zh'"); utilsApi.
table products {
charset_table = chinese
morphology = icu_chinese
stopwords = zh
type = rt
path = tbl
rt_field = title
rt_attr_uint = price
}