Table of Contents
Elasticsearch 자동완성(autocomplete) 구현하기
자동완성을 구현하기 위한 몇몇 기능들을 구현해 봅니다.
데이타 준비하기
curl -XDELETE http://localhost:9200/auto_complete?pretty -H 'Content-Type: application/json'
curl -XPUT 'http://localhost:9200/auto_complete?include_type_name=true&pretty' -H 'Content-Type: application/json' -d '{
"mappings": {
"_doc": {
"properties": {
"search_string": {
"type": "completion"
}
}
}
}
}'
vi data.json
{ "index":{ "_index" : "auto_complete", "_type" : "_doc" } }
{ "search_string":"셀프빨래방"}
{ "index":{ "_index" : "auto_complete", "_type" : "_doc" } }
{ "search_string":"빨래건조대"}
{ "index":{ "_index" : "auto_complete", "_type" : "_doc" } }
{ "search_string":"볼빨간사춘기"}
{ "index":{ "_index" : "auto_complete", "_type" : "_doc" } }
{ "search_string":"빨래건조기"}
curl -XPOST http://localhost:9200/_bulk?pretty -H 'Content-Type: application/json' --data-binary @data.json
curl -XPOST 'localhost:9200/auto_complete/_search?pretty' -H 'Content-Type: application/json' -d'{
"query": {
"match_all": {}
}
}'
completion 을 이용한 자동완성
맨앞의 문자가 매칭되는 데이타를 반환합니다. 하지만, 중간에 매칭되는 검색어는 포함되지 않습니다.
curl -XGET 'localhost:9200/auto_complete/_search?pretty' -H 'Content-Type: application/json' -d'{
"suggest": {
"search-string-suggest": {
"prefix": "빨",
"completion": {
"field": "search_string"
}
}
}
}'
ngram tokenizer 를 이용한 자동완성
인덱스 삭제 후 아래 명령으로 인덱스를 새로 생성하고 데이타를 입력합니다.
curl -XDELETE http://localhost:9200/auto_complete?pretty -H 'Content-Type: application/json'
curl -XPUT 'http://localhost:9200/auto_complete?include_type_name=true&pretty' -H 'Content-Type: application/json' -d '{
"settings" : {
"index":{
"max_ngram_diff": 50,
"analysis":{
"analyzer":{
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer"
}
},
"tokenizer": {
"my_ngram_tokenizer": {
"type": "ngram",
"min_gram": "1",
"max_gram": "10"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"search_string": {
"type": "text",
"fields": {
"ngram": {
"type": "text",
"analyzer": "my_ngram_analyzer"
}
}
}
}
}
}
}'
curl -XPOST http://localhost:9200/_bulk?pretty -H 'Content-Type: application/json' --data-binary @data.json
아래 명령으로 자동완성이 정상적으로 작동하는지 확인합니다.
curl -XGET http://localhost:9200/auto_complete/_search?pretty -H 'Content-Type: application/json' -d '{
"query": {
"match": {
"search_string.ngram": "빨"
}
}
}'
셀프빨래방
이 검색결과에 포함되는 것을 확인할 수 있습니다.
Korean Jaso Analyzer 를 이용한 자동완성
Korean Jaso Analyzer 를 이용하여 한글 자소단위로 자동완성이 되도록 수정해 봅니다.
curl -XDELETE http://localhost:9200/auto_complete?pretty -H 'Content-Type: application/json'
curl -XPUT 'http://localhost:9200/auto_complete?include_type_name=true&pretty' -H 'Content-Type: application/json' -d '{
"settings" : {
"index":{
"number_of_replicas": "0",
"max_ngram_diff": 50,
"analysis":{
"filter": {
"suggest_filter": {
"type": "ngram",
"min_gram": 1,
"max_gram": 50
}
},
"analyzer":{
"my_ngram_analyzer": {
"tokenizer": "my_ngram_tokenizer"
},
"suggest_search_analyzer": {
"type": "custom",
"tokenizer": "jaso_search_tokenizer"
},
"suggest_index_analyzer": {
"type": "custom",
"tokenizer": "jaso_index_tokenizer",
"filter": [
"suggest_filter"
]
}
},
"tokenizer": {
"jaso_search_tokenizer": {
"type": "jaso_tokenizer",
"mistype": true,
"chosung": false
},
"jaso_index_tokenizer": {
"type": "jaso_tokenizer",
"mistype": true,
"chosung": true
},
"my_ngram_tokenizer": {
"type": "ngram",
"min_gram": "1",
"max_gram": "10"
}
}
}
}
},
"mappings": {
"_doc": {
"properties": {
"search_string": {
"type": "text",
"fields": {
"ngram": {
"type": "text",
"analyzer": "my_ngram_analyzer"
},
"jaso": {
"type": "text",
"analyzer": "suggest_index_analyzer"
}
}
}
}
}
}
}'
curl -XPOST http://localhost:9200/_bulk?pretty -H 'Content-Type: application/json' --data-binary @data.json
빨
이 포함된 검색어가 정상적으로 표시됩니다.
curl -XGET http://localhost:9200/auto_complete/_search?pretty -H 'Content-Type: application/json' -d '{
"query": {
"match": {
"search_string.ngram": {
"query": "빨",
"analyzer": "my_ngram_analyzer"
}
}
},
"highlight":{
"fields":{
"search_string.ngram":{}
}
}
}'
{
"took" : 26,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.10943023,
"hits" : [
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "2sz1HXMB01CMBcId4OTi",
"_score" : 0.10943023,
"_source" : {
"search_string" : "셀프빨래방"
},
"highlight" : {
"search_string.ngram" : [
"셀프<em>빨</em>래방"
]
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "28z1HXMB01CMBcId4OTi",
"_score" : 0.10943023,
"_source" : {
"search_string" : "빨래건조대"
},
"highlight" : {
"search_string.ngram" : [
"<em>빨</em>래건조대"
]
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3cz1HXMB01CMBcId4OTi",
"_score" : 0.10943023,
"_source" : {
"search_string" : "빨래건조기"
},
"highlight" : {
"search_string.ngram" : [
"<em>빨</em>래건조기"
]
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3Mz1HXMB01CMBcId4OTi",
"_score" : 0.0947853,
"_source" : {
"search_string" : "볼빨간사춘기"
},
"highlight" : {
"search_string.ngram" : [
"볼<em>빨</em>간사춘기"
]
}
}
]
}
}
볼빨간사춘기
가 제외되는 것을 확인할 수 있습니다.
curl -XGET http://localhost:9200/auto_complete/_search?pretty -H 'Content-Type: application/json' -d '{
"query": {
"bool": {
"must": [
{
"match": {
"search_string.jaso": {
"query": "빨ㄹ",
"analyzer": "suggest_search_analyzer"
}
}
}
],
"should": [
{
"match": {
"search_string.ngram": {
"query": "빨ㄹ",
"analyzer": "my_ngram_analyzer"
}
}
}
]
}
},
"highlight":{
"fields":{
"search_string.ngram":{}
}
}
}'
{
"took" : 4,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : 0.7075971,
"hits" : [
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "2sz1HXMB01CMBcId4OTi",
"_score" : 0.7075971,
"_source" : {
"search_string" : "셀프빨래방"
},
"highlight" : {
"search_string.ngram" : [
"셀프<em>빨</em>래방"
]
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "28z1HXMB01CMBcId4OTi",
"_score" : 0.7075971,
"_source" : {
"search_string" : "빨래건조대"
},
"highlight" : {
"search_string.ngram" : [
"<em>빨</em>래건조대"
]
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3cz1HXMB01CMBcId4OTi",
"_score" : 0.7075971,
"_source" : {
"search_string" : "빨래건조기"
},
"highlight" : {
"search_string.ngram" : [
"<em>빨</em>래건조기"
]
}
}
]
}
}
ㅃ
만 입력해도 검색이 시작됩니다.
curl -XGET http://localhost:9200/auto_complete/_search?pretty -H 'Content-Type: application/json' -d '{
"query": {
"bool": {
"must": [
{
"match": {
"search_string.jaso": {
"query": "ㅃ",
"analyzer": "suggest_search_analyzer"
}
}
}
],
"should": [
{
"match": {
"search_string.ngram": {
"query": "ㅃ",
"analyzer": "my_ngram_analyzer"
}
}
}
]
}
},
"highlight":{
"fields":{
"search_string.ngram":{}
}
}
}'
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.20996921,
"hits" : [
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3Mz1HXMB01CMBcId4OTi",
"_score" : 0.20996921,
"_source" : {
"search_string" : "볼빨간사춘기"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "2sz1HXMB01CMBcId4OTi",
"_score" : 0.20052904,
"_source" : {
"search_string" : "셀프빨래방"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "28z1HXMB01CMBcId4OTi",
"_score" : 0.20052904,
"_source" : {
"search_string" : "빨래건조대"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3cz1HXMB01CMBcId4OTi",
"_score" : 0.20052904,
"_source" : {
"search_string" : "빨래건조기"
}
}
]
}
}
빨
을 한영자판오류로 Qkf
를 입력해도 검색이 됩니다.
curl -XGET http://localhost:9200/auto_complete/_search?pretty -H 'Content-Type: application/json' -d '{
"query": {
"bool": {
"must": [
{
"match": {
"search_string.jaso": {
"query": "Qkf",
"analyzer": "suggest_search_analyzer"
}
}
}
],
"should": [
{
"match": {
"search_string.ngram": {
"query": "Qkf",
"analyzer": "my_ngram_analyzer"
}
}
}
]
}
},
"highlight":{
"fields":{
"search_string.ngram":{}
}
}
}'
{
"took" : 8,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 4,
"relation" : "eq"
},
"max_score" : 0.3533927,
"hits" : [
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "2sz1HXMB01CMBcId4OTi",
"_score" : 0.3533927,
"_source" : {
"search_string" : "셀프빨래방"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "28z1HXMB01CMBcId4OTi",
"_score" : 0.3533927,
"_source" : {
"search_string" : "빨래건조대"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3Mz1HXMB01CMBcId4OTi",
"_score" : 0.3533927,
"_source" : {
"search_string" : "볼빨간사춘기"
}
},
{
"_index" : "auto_complete",
"_type" : "_doc",
"_id" : "3cz1HXMB01CMBcId4OTi",
"_score" : 0.3533927,
"_source" : {
"search_string" : "빨래건조기"
}
}
]
}
}