Elasticsearch import CSV

By | 2021년 11월 13일
Table of Contents

Elasticsearch import CSV

참조

CSV 를 임포트 하는 방법은 여러가지가 있지만,
여기서는 python 을 이용하는 방법을 정리한다.

import using python

pip3 install elasticsearch
vi import_csv.py
---------------------------
from elasticsearch import helpers, Elasticsearch
import csv

document = {
  "mappings":{
    "dynamic": False,
    "properties":{
      "Time":{
        "type": "date"
      },
      "Year":{
        "type": "text",
        "fields": {
          "keyword":{
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "Month":{
        "type": "text",
        "fields": {
          "keyword":{
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "Hour":{
        "type": "text",
        "fields": {
          "keyword":{
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "Type":{
        "type": "text",
        "fields": {
          "keyword":{
            "type": "keyword",
            "ignore_above": 256
          }
        }
      },
      "Data":{
        "type": "integer"
      },
      "Latitude":{
        "type": "double"
      },
      "Longitude":{
        "type": "double"
      },
    }
  }
}

# itemid,item_name,item_option_name,brand,category,price,image_url

# sed -i '1s/^\xEF\xBB\xBF//' item_info.csv
# cp item_info_header.csv item_info_full.csv
# head -1000000 item_info.csv >> item_info_full.csv
# vi item_info_full.csv

es = Elasticsearch(['192.168.1.16:9200'], port=9200)

if es.indices.exists(index="mydata"):
  pass
else:
  es.indices.create(index="mydata", body=document)

with open('/home/renesys/work/defined_data.csv') as f:
  reader = csv.DictReader(f)
  helpers.bulk(es, reader, index="mydata", raise_on_error=False)
---------------------------
python3 import_csv.py

변경사항

API 가 약간 변경된 듯 하다.(2022-12-20 기준)

from elasticsearch import helpers, Elasticsearch
import csv

# search_string, sort

es = Elasticsearch("http://testconfig.10x10.co.kr:9200")

with open('autocomplete.csv', 'rt', encoding='UTF8') as f:
  fieldnames = ['search_string', 'sort']
  reader = csv.DictReader(f, fieldnames=fieldnames, delimiter='\t')
  helpers.bulk(es, reader, index="auto_complete", raise_on_error=False)

답글 남기기