ES实现自动补全,自定义拼音分词插件
如果想实现当用户输入拼音时,进行dsl查询,对输入的汉字或拼音进行分词查询,可以这样操作:
下载拼音分词器,并安装到es的plugins中
https://github.com/medcl/elasticsearch-analysis-pinyin
查询时使用“pinyin”作为分词器
但这样操作会有一些问题,单纯只用pinyin作为分词器,会把汉字转成拼音分词,且分词结果无汉字、拼音只是单个分词。
很明显不符合我们的要求,所以在创建索引库的时候,要进行自定义分词配置,下面是一个简单的配置例子:
PUT index
{
"settings" : {
"analysis" : {
"analyzer" : {
"ik_smart_pinyin" : {
"tokenizer" : "ik_smart",
"filter" : "pinyin_first_letter_and_full_pinyin_filter"
},
"ik_max_pinyin" : {
"tokenizer" : "ik_max_word",
"filter" : "pinyin_first_letter_and_full_pinyin_filter"
}
},
"filter" : {
"pinyin_first_letter_and_full_pinyin_filter" : {
"type" : "pinyin",
"keep_separate_first_letter" : false,
"keep_full_pinyin" : true,
"keep_original" : true,
"limit_first_letter_length" : 16,
"lowercase" : true,
"remove_duplicated_term" : true
}
}
}
}
}
需要注意的是search_analyzer的使用,杜绝拼音相同导致文档编号相同的情况
自动补全查询(completion suggester查询)
参与补全查询的字段必须是completion类型的 字段的内容一般是用于补全的数组(把词分成数组)
// 自动补全查询
POST /test2/_search
{
"suggest": {
"title_suggest": {
"text": "s", // 关键字
"completion": {
"field": "title", // 补全字段
"skip_duplicates": true, // 跳过重复的
"size": 10 // 获取前10条结果
}
}
}
}
这里给出一个补全索引样例查询:
PUT /students
{
"settings": {
"analysis": {
"analyzer": {
"text_anlyzer": {
"tokenizer": "ik_max_word",
"filter": "py"
},
"completion_analyzer": {
"tokenizer": "keyword",
"filter": "py"
}
},
"filter": {
"py": {
"type": "pinyin",
"keep_full_pinyin": false,
"keep_joined_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"remove_duplicated_term": true,
"none_chinese_pinyin_tokenize": false
}
}
}
},
"mappings": {
"properties": {
"id":{
"type": "keyword"
},
"name":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_smart",
"copy_to": "all"
},
"address":{
"type": "keyword",
"index": false
},
"city":{
"type": "keyword"
},
"starName":{
"type": "keyword"
},
"all":{
"type": "text",
"analyzer": "text_anlyzer",
"search_analyzer": "ik_smart"
},
"suggestion":{
"type": "completion",
"analyzer": "completion_analyzer"
}
}
}
}
查询
GET /students/_search
{
"query": {"match_all": {}}
}
GET /students/_search
{
"suggest": {
"YOUR_SUGGESTION": {
"text": "s",
"completion": {
"field": "suggestion",
"skip_duplicates": true // 跳过重复
}
}
}
}