Elasticsearch 大小写敏感问题(raw/keyword 对 wildcard 的支持)
# 问题背景_elasticsearch 的 keyword/raw 是没有对字段值的逐字存储的,所以对英文字母大小写敏感。当我们使用wildcard 做模糊查询时就会遇到查询结果集不全或者查询不到结果集_# 解决方案_我们需要在索引文档的时候对raw/keyword做下特殊里,下面是具体实现步骤_+ 在setting中设置分析器```PUT /test_raw{"settings": {"ana
·
问题背景
_elasticsearch 的 keyword/raw 是对字段值的逐字存储的,就是没有做分词,所以对英文字母大小写敏感。当我们使用wildcard 做模糊查询时就会遇到查询结果集不全或者查询不到结果集_
解决方案
_我们需要在索引文档的时候对raw/keyword做下特殊里,下面是具体实现步骤_
- 在setting中设置分析器
PUT /test_raw
{
"settings": {
"analysis": {
"filter": {
"my_synonym": {
"type": "synonym",
"synonyms_path": "synonym.dic"
}
},
"analyzer": {
"pinyin_analyzer": {
"tokenizer": "my_pinyin"
},
"first_letter_pinyin_analyzer": {
"tokenizer": "my_first_letter_pinyin"
},
"my_ik_max_word": {
"tokenizer": "ik_max_word",
"filter": [
"my_synonym"
]
},
"my_ik_smart": {
"tokenizer": "ik_smart",
"filter": [
"my_synonym"
]
}
},
"normalizer": {
"lowercase": {
"type": "custom",
"filter": [
"lowercase"
]
}
},
"tokenizer": {
"my_pinyin": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": false,
"keep_full_pinyin": false,
"keep_original": false,
"lowercase": true
},
"my_first_letter_pinyin": {
"type": "pinyin",
"keep_first_letter": false,
"keep_separate_first_letter": false,
"keep_joined_full_pinyin": true,
"keep_none_chinese": true,
"keep_none_chinese_in_joined_full_pinyin": true,
"none_chinese_pinyin_tokenize": false,
"keep_full_pinyin": false,
"keep_original": false,
"limit_first_letter_length": 50,
"trim_whitespace": false,
"lowercase": true
}
}
},
"number_of_shards": 1,
"number_of_replicas": 0,
"max_result_window": "1000000"
},
"mappings": {
"dynamic_templates": [
{
"strings": {
"match_mapping_type": "string",
"mapping": {
"type": "text",
"store": true,
"fields": {
"spy": {
"type": "text",
"store": false,
"fielddata": "true",
"analyzer": "pinyin_analyzer"
},
"ik": {
"type": "text",
"store": false,
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
},
"syno_ik": {
"type": "text",
"store": false,
"analyzer": "my_ik_max_word",
"search_analyzer": "ik_smart"
},
"raw": {
"type": "keyword",
"normalizer": "lowercase"
},
"py": {
"type": "text",
"store": false,
"fielddata": "true",
"analyzer": "first_letter_pinyin_analyzer"
}
}
}
}
}
]
}
}
上面给raw设置了 "normalizer": "lowercase"
- 添加数据
POST test_raw/_doc/1
{
"name":"Low case Hello",
"title":"我爱北京天安门ttTT Hello",
"content":"世界那么大nibu Hello",
"completion":123
}
POST test_raw/_doc/2
{
"name":"Low case hello",
"title":"我爱北京天安门ttTT hello",
"content":"世界那么大nibu hello",
"completion":123
}
上面添加了两个文档,分别有Hello 和 hello
- ik分词查询效果
GET test_raw/_search
{
"query": {
"multi_match": {
"query": "Hello",
"fields": ["title.ik","name.ik"]
}
}
}
结果集:
{
"took" : 1,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.18232156,
"hits" : [
{
"_index" : "test_raw",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.18232156,
"_source" : {
"name" : "Low case Hello",
"title" : "我爱北京天安门ttTT Hello",
"content" : "世界那么大nibu Hello",
"completion" : 123
}
},
{
"_index" : "test_raw",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.18232156,
"_source" : {
"name" : "Low case hello",
"title" : "我爱北京天安门ttTT hello",
"content" : "世界那么大nibu hello",
"completion" : 123
}
}
]
}
}
- wildcard 效果
GET test_raw/_search
{
"query": {
"wildcard": {
"name.raw": {
"value": "*Hello*"
}
}
}
}
结果集:
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : 0.18232156,
"hits" : [
{
"_index" : "test_raw",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.18232156,
"_source" : {
"name" : "Low case Hello",
"title" : "我爱北京天安门ttTT Hello",
"content" : "世界那么大nibu Hello",
"completion" : 123
}
},
{
"_index" : "test_raw",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.18232156,
"_source" : {
"name" : "Low case hello",
"title" : "我爱北京天安门ttTT hello",
"content" : "世界那么大nibu hello",
"completion" : 123
}
}
]
}
}
可以看到满足了我们预期;
更多推荐
已为社区贡献1条内容
所有评论(0)