{"id":962,"date":"2020-06-21T19:43:01","date_gmt":"2020-06-21T10:43:01","guid":{"rendered":"https:\/\/www.skyer9.pe.kr\/wordpress\/?p=962"},"modified":"2020-06-24T22:01:33","modified_gmt":"2020-06-24T13:01:33","slug":"elasticsearch-score-%ec%bb%a4%ec%8a%a4%ed%84%b0%eb%a7%88%ec%9d%b4%ec%a7%95","status":"publish","type":"post","link":"https:\/\/www.skyer9.pe.kr\/wordpress\/?p=962","title":{"rendered":"ElasticSearch score \ucee4\uc2a4\ud130\ub9c8\uc774\uc9d5"},"content":{"rendered":"<h1>ElasticSearch score \ucee4\uc2a4\ud130\ub9c8\uc774\uc9d5<\/h1>\n<p>ES \ub294 \uae30\ubcf8\uac12\uc73c\ub85c <code>BM25<\/code> \uc54c\uace0\ub9ac\uc998\uc744 \uc774\uc6a9\ud574 <code>score<\/code> \ub97c \uacc4\uc0b0\ud569\ub2c8\ub2e4.<\/p>\n<h2>BM25 \uc54c\uace0\ub9ac\uc998<\/h2>\n<p>\uac80\uc0c9\ud0a4\uc6cc\ub4dc\uc758 \ube48\ub3c4\uc218, \ubb38\uc11c\uc0c1\uc758 \ud0a4\uc6cc\ub4dc\uc758 \ube48\ub3c4\uc218, \ubb38\uc11c\uc758 \ud06c\uae30 \ub4f1\uc744 \uae30\uc900\uc73c\ub85c score \ub97c \uc0b0\uc815\ud569\ub2c8\ub2e4.<\/p>\n<h3>TF(Term Frequency)<\/h3>\n<p>\ubb38\uc11c \ub0b4\uc5d0\uc11c \uc790\uc8fc \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc5d0 \uac00\uc911\uce58\ub97c \ub192\uac8c \ubd80\uc5ec\ud569\ub2c8\ub2e4.<\/p>\n<h3>IDF(Inverse Document Frequency)<\/h3>\n<p>\ub9ce\uc740 \ubb38\uc11c\uc5d0\uc11c \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc758 \uac00\uc911\uce58\ub294 \ub0ae\ucd94\uace0, \uc77c\ubd80 \ubb38\uc11c\uc5d0\uc11c\ub9cc \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc5d0 \ub300\ud574 \ub192\uc740 \uac00\uc911\uce58\ub97c \ubd80\uc5ec\ud569\ub2c8\ub2e4.<\/p>\n<p>\ubb38\uc11c\uc758 \uc885\ub958\ub97c \uac00\ub9ac\uc9c0 \uc54a\uace0 \uc790\uc8fc \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4\ub294 \ubcc4 \uc758\ubbf8\uc5c6\ub294 \ud754\ud55c \ub2e8\uc5b4\uc77c \uac00\ub2a5\uc131\uc774 \ub192\uae30 \ub54c\ubb38\uc785\ub2c8\ub2e4.<\/p>\n<h3>Field-length norm<\/h3>\n<p>\ub450\uac1c \uc774\uc0c1\uc758 \ubb38\uc11c\uc5d0\uc11c \ub3d9\uc77c\ud55c \ud0a4\uc6cc\ub4dc\uac00 \ub3d9\uc77c\ud55c \ube48\ub3c4\uc218\ub85c \ub4f1\uc7a5\ud55c\ub2e4\uace0 \uac00\uc815\ud560 \ub54c, \ubb38\uc11c\uc758 \uae38\uc774\uac00 \uc791\uc740 \ubb38\uc11c\uc5d0 \ub354 \ub192\uc740 \uac00\uc911\uce58\ub97c \ubd80\uc5ec\ud569\ub2c8\ub2e4.<\/p>\n<h2>score \ucee4\uc2a4\ud130\ub9c8\uc774\uc9d5<\/h2>\n<p>\ud558\uc9c0\ub9cc \uc1fc\ud551\ubab0\uc5d0\uc11c\ub294 \uac80\uc0c9\uc5b4\uc758 \ube48\ub3c4\uc218\ub9cc \uac00\uc9c0\uace0 score \ub97c \uc0b0\uc815\ud560 \uc218 \uc5c6\uc2b5\ub2c8\ub2e4.<\/p>\n<p>\uac04\ub2e8\ud558\uac8c <code>\uc544\uc774\ud3f0<\/code> \uc774\ub77c\ub294 \ud0a4\uc6cc\ub4dc\uc640 \uae30\uc874 \uace0\uac1d\ub4e4\uc774 \uac80\uc0c9 \ud6c4 \ud074\ub9ad\ud55c \uc218, \uad6c\ub9e4\ud55c \uc218\uac00 \ub192\uc740 \uc0c1\ud488\uc5d0 \uc6b0\uc120\uc21c\uc704\ub97c \ub354 \ubd80\uc5ec\ud560 \ud544\uc694\uac00 \uc788\uc2b5\ub2c8\ub2e4.<\/p>\n<p>\uc544\ub798\uc5d0\uc11c <code>function_score<\/code> \ub97c \uc774\uc6a9\ud574 \uc704 \uae30\ub2a5\uc744 \uad6c\ud604\ud574 \ubd05\ub2c8\ub2e4.<\/p>\n<h2>\ucc38\uc870 \uc0ac\uc774\ud2b8<\/h2>\n<ul>\n<li><a href=\"https:\/\/www.elastic.co\/guide\/en\/elasticsearch\/reference\/current\/query-dsl-function-score-query.html\">https:\/\/www.elastic.co\/guide\/en\/elasticsearch\/reference\/current\/query-dsl-function-score-query.html<\/a><\/li>\n<\/ul>\n<h2>\uae30\ubcf8\uc124\uc815<\/h2>\n<p><a href=\"\/wordpress\/?p=956\">\uc5ec\uae30<\/a> \uc5d0 \uc124\uba85\ub41c \uc11c\ubc84\uc124\uc815\uc744 \uae30\ucd08\ub85c \uc218\uc815\ud569\ub2c8\ub2e4.<\/p>\n<h2>\ud14c\uc774\ube14 \uc900\ube44<\/h2>\n<pre><code class=\"language-sql\">CREATE DATABASE db_test DEFAULT CHARACTER SET UTF8 COLLATE UTF8_GENERAL_CI;\n\nUSE db_test;\n\nDROP TABLE IF EXISTS items;\n\nCREATE TABLE items (\n    itemid BIGINT(20) UNSIGNED NOT NULL,\n    PRIMARY KEY (itemid),\n    UNIQUE KEY unique_id (itemid),\n    itemname VARCHAR(128) NOT NULL,\n    viewKeywords VARCHAR(2048) NOT NULL,\n    buyKeywords VARCHAR(2048) NOT NULL,\n    lastupdate TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,\n    regdate TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP\n);\n\nINSERT INTO items (itemid, itemname, viewKeywords, buyKeywords)\nVALUES (1, &#039;\ube14\ub799 \uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&#039;, &#039;\uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&#039;, &#039;&#039;);\n\nINSERT INTO items (itemid, itemname, viewKeywords, buyKeywords)\nVALUES (2, &#039;\ube14\ub799 \uac24\ub7ed\uc2dc \ucf00\uc774\uc2a4&#039;, &#039;\uac24\ub7ed\uc2dc&#039;, &#039;\uac24\ub7ed\uc2dc&#039;);\n\nINSERT INTO items (itemid, itemname, viewKeywords, buyKeywords)\nVALUES (3, &#039;\ud551\ud06c \uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&#039;, &#039;\uc544\uc774\ud3f0&#039;, &#039;&#039;);\n\nINSERT INTO items (itemid, itemname, viewKeywords, buyKeywords)\nVALUES (4, &#039;\ud654\uc774\ud2b8 \uac24\ub7ed\uc2dc \ucf00\uc774\uc2a4&#039;, &#039;&#039;, &#039;&#039;);\n\nINSERT INTO items (itemid, itemname, viewKeywords, buyKeywords)\nVALUES (5, &#039;\ub798\ub4dc \uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&#039;, &#039;\ucf00\uc774\uc2a4&#039;, &#039;\uc544\uc774\ud3f0&#039;);<\/code><\/pre>\n<p>\uc870\ud68c\ud0a4\uc6cc\ub4dc\uc640 \uad6c\ub9e4\ud0a4\uc6cc\ub4dc\ub97c \ub370\uc774\ud0c0\ub85c \uc785\ub825\ud574 \uc90d\ub2c8\ub2e4.<\/p>\n<h2>\uc778\ub371\uc2a4 \uc900\ube44<\/h2>\n<pre><code class=\"language-bash\">curl -XDELETE http:\/\/localhost:9200\/items?pretty\n\ncurl -XPUT http:\/\/localhost:9200\/items?pretty -H &#039;Content-Type: application\/json&#039; -d &#039;{\n  &quot;settings&quot; : {\n    &quot;index&quot;:{\n      &quot;analysis&quot;:{\n        &quot;analyzer&quot;:{\n          &quot;korean&quot;:{\n            &quot;type&quot;:&quot;custom&quot;,\n            &quot;tokenizer&quot;:&quot;seunjeon_default_tokenizer&quot;,\n            &quot;filter&quot; : [&quot;synonym&quot;, &quot;stopword&quot;, &quot;lowercase&quot;]\n          }\n        },\n        &quot;filter&quot; : {\n          &quot;synonym&quot; : {\n            &quot;type&quot; : &quot;synonym&quot;,\n            &quot;synonyms_path&quot; : &quot;synonyms.txt&quot;\n          },\n          &quot;stopword&quot; : {\n            &quot;type&quot; : &quot;stop&quot;,\n            &quot;stopwords_path&quot; : &quot;stopwords.txt&quot;\n          }\n        },\n        &quot;tokenizer&quot;: {\n          &quot;seunjeon_default_tokenizer&quot;: {\n            &quot;index_eojeol&quot;: &quot;true&quot;,\n            &quot;user_dict_path&quot;: &quot;user_dict.csv&quot;,\n            &quot;index_poses&quot;: [\n                &quot;UNK&quot;, &quot;EP&quot;, &quot;I&quot;, &quot;J&quot;, &quot;M&quot;,\n                &quot;N&quot;, &quot;SL&quot;, &quot;SH&quot;, &quot;SN&quot;, &quot;VCP&quot;,\n                &quot;XP&quot;, &quot;XS&quot;, &quot;XR&quot;\n            ],\n            &quot;decompound&quot;: &quot;true&quot;,\n            &quot;type&quot;: &quot;seunjeon_tokenizer&quot;\n          }\n        }\n      }\n    }\n  },\n  &quot;mappings&quot; : {\n    &quot;_doc&quot; : {\n      &quot;properties&quot; : {\n        &quot;itemid&quot; : {\n          &quot;type&quot; : &quot;integer&quot;\n        },\n        &quot;itemname&quot; : {\n          &quot;type&quot; : &quot;text&quot;,\n          &quot;analyzer&quot;: &quot;korean&quot;\n        },\n        &quot;viewKeywords&quot; : {\n          &quot;type&quot; : &quot;text&quot;,\n          &quot;analyzer&quot;: &quot;korean&quot;\n        },\n        &quot;buyKeywords&quot; : {\n          &quot;type&quot; : &quot;text&quot;,\n          &quot;analyzer&quot;: &quot;korean&quot;\n        },\n        &quot;lastupdate&quot; : {\n          &quot;type&quot; : &quot;date&quot;\n        },\n        &quot;regdate&quot; : {\n          &quot;type&quot; : &quot;date&quot;\n        }\n      }\n    }\n  }\n}&#039;<\/code><\/pre>\n<h2>\uac00\uc911\uce58\ub97c \ubd80\uc5ec\ud55c \uac80\uc0c9<\/h2>\n<pre><code class=\"language-bash\">curl -X GET &quot;localhost:9200\/items\/_search?pretty&quot; -H &#039;Content-Type: application\/json&#039; -d&#039;{\n    &quot;query&quot;: {\n        &quot;function_score&quot;: {\n            &quot;query&quot;: { &quot;match&quot;: { &quot;itemname&quot;: &quot;\uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&quot; } },\n            &quot;boost&quot;: &quot;5&quot;,\n            &quot;functions&quot;: [\n                {\n                    &quot;filter&quot;: { &quot;match&quot;: { &quot;viewKeywords&quot;: &quot;\uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&quot; } },\n                    &quot;random_score&quot;: {},\n                    &quot;weight&quot;: 23\n                },\n                {\n                    &quot;filter&quot;: { &quot;match&quot;: { &quot;buyKeywords&quot;: &quot;\uc544\uc774\ud3f0 \ucf00\uc774\uc2a4&quot; } },\n                    &quot;weight&quot;: 42\n                }\n            ],\n            &quot;max_boost&quot;: 42,\n            &quot;score_mode&quot;: &quot;max&quot;,\n            &quot;boost_mode&quot;: &quot;multiply&quot;,\n            &quot;min_score&quot; : 0\n        }\n    }\n}&#039;<\/code><\/pre>\n","protected":false},"excerpt":{"rendered":"<p>ElasticSearch score \ucee4\uc2a4\ud130\ub9c8\uc774\uc9d5 ES \ub294 \uae30\ubcf8\uac12\uc73c\ub85c BM25 \uc54c\uace0\ub9ac\uc998\uc744 \uc774\uc6a9\ud574 score \ub97c \uacc4\uc0b0\ud569\ub2c8\ub2e4. BM25 \uc54c\uace0\ub9ac\uc998 \uac80\uc0c9\ud0a4\uc6cc\ub4dc\uc758 \ube48\ub3c4\uc218, \ubb38\uc11c\uc0c1\uc758 \ud0a4\uc6cc\ub4dc\uc758 \ube48\ub3c4\uc218, \ubb38\uc11c\uc758 \ud06c\uae30 \ub4f1\uc744 \uae30\uc900\uc73c\ub85c score \ub97c \uc0b0\uc815\ud569\ub2c8\ub2e4. TF(Term Frequency) \ubb38\uc11c \ub0b4\uc5d0\uc11c \uc790\uc8fc \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc5d0 \uac00\uc911\uce58\ub97c \ub192\uac8c \ubd80\uc5ec\ud569\ub2c8\ub2e4. IDF(Inverse Document Frequency) \ub9ce\uc740 \ubb38\uc11c\uc5d0\uc11c \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc758 \uac00\uc911\uce58\ub294 \ub0ae\ucd94\uace0, \uc77c\ubd80 \ubb38\uc11c\uc5d0\uc11c\ub9cc \ub4f1\uc7a5\ud558\ub294 \ub2e8\uc5b4(Term) \uc5d0 \ub300\ud574 \ub192\uc740 \uac00\uc911\uce58\ub97c\u2026 <span class=\"read-more\"><a href=\"https:\/\/www.skyer9.pe.kr\/wordpress\/?p=962\">Read More &raquo;<\/a><\/span><\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[6],"tags":[],"class_list":["post-962","post","type-post","status-publish","format-standard","hentry","category-elasticsearch"],"_links":{"self":[{"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/962","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=962"}],"version-history":[{"count":16,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/962\/revisions"}],"predecessor-version":[{"id":1005,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=\/wp\/v2\/posts\/962\/revisions\/1005"}],"wp:attachment":[{"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=962"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=962"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.skyer9.pe.kr\/wordpress\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=962"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}