Table of Contents
머글랭 가이드 the beginning
데이타 구하기
import to MariaDB
file -bi data.csv
application/csv; charset=unknown-8bit
iconv -c -f euc-kr -t utf8 data.csv > data-utf8.csv
head -20 data-utf8.csv
mysql -u root -p
create user '사용자'@'localhost' identified by '비밀번호';
create database DB이름;
grant all privileges on DB이름.* to '사용자'@'localhost';
flush privileges;
use DB이름;
create table tbl_import(
`번호` varchar(100),
`개방서비스명` varchar(100),
`개방서비스id` varchar(100),
`개방자치단체코드` varchar(100),
`관리번호` varchar(100),
`인허가일자` varchar(100),
`인허가취소일자` varchar(100),
`영업상태구분코드` varchar(100),
`영업상태명` varchar(100),
`상세영업상태코드` varchar(100),
`상세영업상태명` varchar(100),
`폐업일자` varchar(100),
`휴업시작일자` varchar(100),
`휴업종료일자` varchar(100),
`재개업일자` varchar(100),
`소재지전화` varchar(100),
`소재지면적` varchar(100),
`소재지우편번호` varchar(100),
`소재지전체주소` varchar(256),
`도로명전체주소` varchar(256),
`도로명우편번호` varchar(100),
`사업장명` varchar(100),
`최종수정시점` varchar(100),
`데이터갱신구분` varchar(100),
`데이터갱신일자` varchar(100),
`업태구분명` varchar(100),
`좌표정보(x)` varchar(100),
`좌표정보(y)` varchar(100),
`위생업태명` varchar(100),
`남성종사자수` varchar(100),
`여성종사자수` varchar(100),
`영업장주변구분명` varchar(100),
`등급구분명` varchar(100),
`급수시설구분명` varchar(100),
`총종업원수` varchar(100),
`본사종업원수` varchar(100),
`공장사무직종업원수` varchar(100),
`공장판매직종업원수` varchar(100),
`공장생산직종업원수` varchar(100),
`건물소유구분명` varchar(100),
`보증액` varchar(100),
`월세액` varchar(100),
`다중이용업소여부` varchar(100),
`시설총규모` varchar(100),
`전통업소지정번호` varchar(100),
`전통업소주된음식` varchar(100),
`홈페이지` varchar(256)
);
LOAD DATA INFILE '/tmp/data-utf8.csv' IGNORE
INTO TABLE tbl_import
FIELDS TERMINATED BY ','
ENCLOSED BY '"'
LINES TERMINATED BY '\n'
IGNORE 1 ROWS;
select 사업장명 from tbl_import where 사업장명 like '%돼지국밥%' limit 10;
select 인허가일자, 소재지전화, 소재지전체주소, 도로명전체주소, 사업장명, `좌표정보(x)`, `좌표정보(y)`
from tbl_import
where 영업상태구분코드 = '01'
limit 10;
create table tbl_restaurant_info(
id bigint(20) unsigned not null auto_increment,
primary key (id),
unique key unique_id (id),
restaurant_name varchar(255),
start_date varchar(10),
tel varchar(32),
address varchar(255),
road_address varchar(255),
x varchar(255),
y varchar(255),
coordinate point,
longitude varchar(255),
latitude varchar(255),
modify_time timestamp not null default current_timestamp on update current_timestamp,
insert_time timestamp not null default current_timestamp
);
insert into tbl_restaurant_info(
restaurant_name,
start_date,
tel,
address,
road_address,
x,
y
)
select 사업장명, 인허가일자, 소재지전화, 소재지전체주소, 도로명전체주소, `좌표정보(x)`, `좌표정보(y)`
from tbl_import
where 영업상태구분코드 = '01';
select id, restaurant_name, start_date, x, y, modify_time, insert_time
from tbl_restaurant_info
limit 10;
전화번호 교정
update tbl_restaurant_info
set tel = replace(tel, ' ', '-')
where tel <> replace(tel, ' ', '-');
-- 실서비스에서 정규식 사용하면 총살당합니다.
update tbl_restaurant_info
set tel = concat(substring(tel, 1, length(tel) -4), '-', right(tel, 4))
where tel regexp '[0-9]{7,8}$';
-- 5-10 회 반복할 것.
update tbl_restaurant_info
set tel = substring(tel, 2, 100)
where tel regexp '^00';
update tbl_restaurant_info r
set tel = concat(substring(tel, 1, 2), '-', substring(tel, 3, 100))
where
1 = 1
and r.tel <> ''
and r.tel regexp '^02'
and r.tel regexp '^[0-9]{5,6}';
update tbl_restaurant_info r
set tel = concat(substring(tel, 1, 3), '-', substring(tel, 4, 100))
where
1 = 1
and r.tel <> ''
and not r.tel regexp '^02'
and r.tel regexp '^[0-9]{6,7}';
-- 2-3 회 반복할 것.
update tbl_restaurant_info r
set tel = replace(tel, '-00', '-')
where
1 = 1
and r.tel <> '';
update tbl_restaurant_info r
set tel = replace(tel, '-0', '-')
where
1 = 1
and r.tel <> '';
update tbl_restaurant_info r
set tel = replace(tel, '--', '-')
where
1 = 1
and r.tel <> '';
중부원점TM(EPSG:2097) to WGS84(EPSG:4326)
pip3 install PyMySQL
pip3 install pyproj
vi convert.py
------------------------------
from pyproj import Proj, transform
import pymysql
import warnings
warnings.filterwarnings('ignore')
def isfloat(value):
try:
float(value)
return True
except ValueError:
return False
# Projection 정의
# 중부원점(Bessel): 서울 등 중부지역 EPSG:2097
proj_1 = Proj(init='epsg:2097')
# WGS84 경위도: GPS가 사용하는 좌표계 EPSG:4326
proj_2 = Proj(init='epsg:4326')
conn = pymysql.connect(
user='mg',
passwd='********',
host='127.0.0.1',
db='mg',
charset='utf8'
)
cursor = conn.cursor()
sql_select = """
select id, x, y from tbl_restaurant_info;
"""
sql_update = """
update tbl_restaurant_info
set longitude = %f, latitude = %f
where id = %d;
"""
cursor.execute(sql_select)
result = cursor.fetchall()
for row in result:
idx, x, y = row[0], row[1], row[2]
if x != '' and y != '':
if isfloat(x) and isfloat(y):
x_, y_ = transform(proj_1, proj_2, x, y)
cursor.execute(sql_update % (x_, y_, int(idx)))
if idx % 500 == 0:
conn.commit()
print('committed : %d' % idx)
conn.commit()
conn.close()
------------------------------
Send to ElasticSearch
PUT /restaurant_info?pretty
{
"settings" : {
"number_of_shards" : 1,
"number_of_replicas" : 0
},
"mappings" : {
"properties" : {
"restaurant_name" : { "type" : "text" },
"start_date" : {
"type" : "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyyMMdd||epoch_millis"
},
"tel" : { "type" : "keyword" },
"address" : { "type" : "text" },
"road_address" : { "type" : "text" },
"coordinate" : { "type" : "geo_point" },
"longitude" : { "type" : "float" },
"latitude" : { "type" : "float" },
"modify_time" : {
"type" : "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyyMMdd||epoch_millis"
},
"insert_time" : {
"type" : "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||yyyyMMdd||epoch_millis"
}
}
}
}