创建配置文件
# Sample Logstash configuration for creating a simple
# Beats -> Logstash -> Elasticsearch pipeline.
#输入组件
input {
file {
#扫描路径
path => "D:/hg/test/csv/*.csv"
#从每个文件的开头读取内容(每行也可以被称为事件)
start_position => "beginning"
codec => plain{
charset=>"UTF-8"
}
}
}
#过滤组件
filter {
csv{
#分隔符 默认是逗号
separator => "�"
#字段
columns => ["id","cid","name","phone","check_result","checker_cid","checker_name","province","city","area","street","address","longitude","latitude","organizationcode","create_time","report_type","certified","group_cid","edu_organizationcode","edu_actiontype","channel","edu_remark","my_create_time","my_id_hash","my_value_md5","my_update_md5","org_name","point_id","person_type","person_type_dmbcms","extra_0","extra_1","extra_2","extra_3","extra_4","extra_5","extra_6","extra_7","extra_8","extra_9","hjk_extra_1"]
#过滤第一行列名
skip_header => true
}
#根据值新增字段(这里是翻译字典)不推荐使用
# 可以参考字典翻译的插件:translate
if [city] == "广州市" {
mutate{
add_field => {"city_code" => "4401"}
}
}else if [city] == "韶关市" {
mutate{
add_field => {"city_code" => "4402"}
}
}else{
mutate{
add_field => {"city_code" => "4400"}
}
}
mutate{
#复制一个索引
copy => {
"create_time" => "index_date"
}
}
#截取字符长度7
truncate{
fields => "index_date"
length_bytes => 7
}
#截取字符长度19
truncate{
fields => "my_create_time"
length_bytes => 19
}
#截取字符长度19
truncate{
fields => "create_time"
length_bytes => 19
}
mutate{
#设置地理坐标数组[longitude,latitude]
add_field => ["[dest_location]","%{longitude}"]
add_field => ["[dest_location]","%{latitude}"]
}
mutate{
#字段类型转换
convert => ["[dest_location]","float"]
}
mutate{
#删除多余字段
remove_field => ["message","@timestamp","host","path"]
#字段类型转换
convert => {
"city_code" => "integer"
"longitude" => "float"
"latitude" => "float"
}
}
}
output {
elasticsearch {
action => "index"
hosts => ["http://10.47.189.143:9200"]
#设置索引
index => "csm-%{[city_code]}-%{[index_date]}"
#设置索引id
document_id => "%{id}"
template_name => "logstash"
#设置索引模板地址
template => "../template/logstash.json"
template_overwrite => true
workers => 1
#user => "elastic"
#password => "*****"
}
#好像是打印控制台字典
stdout {
codec => json_lines
}
}
logstash.json模板配置
{
"index_patterns": ["csm*"],
"order" : 0,
"version": 1,
"settings": {
"number_of_shards": 5,
"number_of_replicas":0,
"index.refresh_interval": "30s"
},
"mappings": {
"dynamic_templates": [
{
"dates": {
"match": "*time",
"mapping": {
"type": "date",
"format":"yyyy-MM-dd HH:mm:ss"
}
}
}
],
"properties" : {
"dest_location" : {
"type" : "geo_point"
},
"city_code" : {
"type" : "long"
},
"longitude" : {
"type" : "float"
},
"latitude" : {
"type" : "float"
}
}
}
}
参考文章:https://blog.csdn.net/qq330983778/article/details/106179934