Table of Content
Elasticsearch import CSV
CSV 를 임포트 하는 방법은 여러가지가 있지만,
여기서는 python 을 이용하는 방법을 정리한다.
import using python
pip3 install elasticsearch
vi import_csv.py
---------------------------
from elasticsearch import helpers, Elasticsearch
import csv
document = {
"mappings":{
"dynamic": False,
"properties":{
"Time":{
"type": "date"
},
"Year":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Month":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Hour":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Type":{
"type": "text",
"fields": {
"keyword":{
"type": "keyword",
"ignore_above": 256
}
}
},
"Data":{
"type": "integer"
},
"Latitude":{
"type": "double"
},
"Longitude":{
"type": "double"
},
}
}
}
# itemid,item_name,item_option_name,brand,category,price,image_url
# sed -i '1s/^\xEF\xBB\xBF//' item_info.csv
# cp item_info_header.csv item_info_full.csv
# head -1000000 item_info.csv >> item_info_full.csv
# vi item_info_full.csv
es = Elasticsearch(['192.168.1.16:9200'], port=9200)
if es.indices.exists(index="mydata"):
pass
else:
es.indices.create(index="mydata", body=document)
with open('/home/renesys/work/defined_data.csv') as f:
reader = csv.DictReader(f)
helpers.bulk(es, reader, index="mydata", raise_on_error=False)
---------------------------
python3 import_csv.py