Table of Content
Elasticsearch import CSV
CSV 를 임포트 하는 방법은 여러가지가 있지만,
여기서는 python 을 이용하는 방법을 정리한다.
import using python
pip3 install elasticsearch
vi import_csv.py
---------------------------
from elasticsearch import helpers, Elasticsearch
import csv
document = {
"mappings":{
"dynamic": False,
"properties":{
"Time":{
"type": "date"
},
"Year":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Month":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Hour":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Type":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Data":{
"type": "integer"
},
"Latitude":{
"type": "double"
},
"Longitude":{
"type": "double"
},
}
}
}
# itemid,item_name,item_option_name,brand,category,price,image_url
# sed -i '1s/^\xEF\xBB\xBF//' item_info.csv
# cp item_info_header.csv item_info_full.csv
# head -1000000 item_info.csv >> item_info_full.csv
# vi item_info_full.csv
es = Elasticsearch(['192.168.1.16:9200'], port=9200)
if es.indices.exists(index="mydata"):
pass
else:
es.indices.create(index="mydata", body=document)
with open('/home/renesys/work/defined_data.csv') as f:
reader = csv.DictReader(f)
helpers.bulk(es, reader, index="mydata", raise_on_error=False)
---------------------------
python3 import_csv.py
변경사항
API 가 약간 변경된 듯 하다.(2022-12-20 기준)
from elasticsearch import helpers, Elasticsearch
import csv
# search_string, sort
es = Elasticsearch("http://testconfig.10x10.co.kr:9200")
with open('autocomplete.csv', 'rt', encoding='UTF8') as f:
fieldnames = ['search_string', 'sort']
reader = csv.DictReader(f, fieldnames=fieldnames, delimiter='\t')
helpers.bulk(es, reader, index="auto_complete", raise_on_error=False)