背景
之前我们基于ES构建了内容中心的全站搜索,现在作品的制作情况也需要在前端BI层面进行交互分析,BI层的实时聚合部分我们采用ES来实现;为了不影响内容搜索业务的正常运行,我们选择新建一个集群来支撑上述功能,通过reindex操作实现跨集群数据同步;_reindex操作并不是es7.0才有的功能,通过这个操作可以快速实现索引的复制、重建、跨集群迁移
搭建流程
整个交互式数据分析平台分两部分构成:CBoard + ES。
CBoard
因本篇侧重ES的使用讲解,cboard细节可参看相关资料解决;cboard是一款开源的自助分析工具,主要分4部分组成,创建数据源(这里我们选用ES)> 创建数据集 > 创建图标 > 创建看板。因创建图表功能支持用户拖拉拽操作,所以这部分操作对用户来说非常简单;部署完该项目后基本不需要二次开发,启动后配置完数据源便可以使用;有难度的是自助分析需要有一个强大的实时计算引擎支撑;经测试ES可满足在现有数据的任意维度的聚合分析,图标的加载性能优化后可秒级响应。ES
a、数据同步;这里采用_reindex的方式每天增量从内容库迁移,配置如下:
POST _reindex?slices=5&refresh
{
"source": {
"remote": { #配置需要抽取的ES源地址
"host": "http://source_host:9200"
},
"index": ["scene_model","ls_model","print_model"], # 指定从哪些数据索引中抽取数据
"_source": ["code","cover","id", "product", "title", "create_time", "publish_time", "update_time", "total_pv" ,"total_uv" ,"total_form", "login_id", "user_reg_time"], #执行抽取的维度,对应下面的mapping设置
"size": 1000, #抽取批次大小
"query": { #增量抽取昨天有过发布更新的数据
"range": {
"publish_time": {
"gte": "now-1d/d"
}
}
}
},
"dest": {
"index": "work_model", #目标索引名称
"version_type": "external" #类似upsert操作
},
"script": {
"lang": "painless",
"source": "ctx._id = ctx._index.substring('as2_'.length(), ctx._index.length()) + '_' + ctx._id ;ctx._source.index = ctx._index.substring('as2_'.length(), ctx._index.length()) " #因目标索引来自多个索引库的数据,为避免Id冲突,给目标索引增加对应的_index前缀
}
}
b、mapping & setting
{
"mappings" : {
"dynamic" : "false",
"dynamic_templates" : [
{
"strings" : {
"match_mapping_type" : "string",
"mapping" : {
"doc_values" : false,
"norms" : false,
"type" : "keyword"
}
}
}
],
"date_detection" : false,
"properties" : {
"biz_type" : {
"type" : "keyword"
},
"check_status" : {
"type" : "keyword" ,
"doc_values" : false
},
"code" : {
"type" : "keyword" ,
"doc_values" : false
},
"cover" : {
"type" : "keyword",
"index" : false,
"doc_values" : false
},
"create_time" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"create_user" : {
"type" : "keyword"
},
"enterprise" : {
"type" : "keyword"
},
"id" : {
"type" : "keyword",
"doc_values" : false
},
"is_del" : {
"type" : "keyword",
"doc_values" : false
},
"login_id" : {
"type" : "keyword",
"doc_values" : false
},
"member_type" : {
"type" : "keyword"
},
"product" : {
"type" : "keyword",
"doc_values" : false
},
"publish_time" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"share_type" : {
"type" : "keyword"
},
"template_code" : {
"type" : "keyword",
"doc_values" : false
},
"template_title" : {
"type" : "text",
"index_options" : "freqs",
"analyzer" : "eqs_analyzer",
"search_analyzer" : "ik_smart"
},
"title" : {
"type" : "text",
"index_options" : "freqs",
"analyzer" : "eqs_analyzer",
"search_analyzer" : "ik_smart"
},
"total_form" : {
"type" : "integer",
"ignore_malformed" : true
},
"total_pv" : {
"type" : "integer",
"ignore_malformed" : true
},
"total_spv" : {
"type" : "integer",
"ignore_malformed" : true
},
"total_uv" : {
"type" : "integer",
"ignore_malformed" : true
},
"update_time" : {
"type" : "date",
"format" : "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
},
"user_name" : {
"type" : "keyword",
"doc_values" : false
},
"user_phone" : {
"type" : "keyword"
},
"user_type" : {
"type" : "keyword"
},
"yesterday_pv" : {
"type" : "integer",
"ignore_malformed" : true
},
"yesterday_uv" : {
"type" : "integer",
"ignore_malformed" : true
}
}
}
},
"settings" : {
"index" : {
"refresh_interval" : "120s",
"translog" : {
"flush_threshold_size" : "1024mb",
"sync_interval" : "120s",
"durability" : "async"
},
"max_result_window" : "20000",
"store" : {
"type" : "niofs"
},
"unassigned" : {
"node_left" : {
"delayed_timeout" : "1d"
}
},
"analysis" : {
"analyzer" : {
"eqs_highlight_analyzer" : {
"filter" : [
"unique"
],
"type" : "custom",
"tokenizer" : "letter"
},
"eqs_analyzer" : {
"filter" : [
"unique"
],
"char_filter" : [
"html_strip"
],
"type" : "custom",
"tokenizer" : "ik_max_word"
}
}
},
"number_of_replicas" : "0",
"codec" : "best_compression",
"routing" : {
"allocation" : {
"total_shards_per_node" : "10"
}
},
"search" : {
"slowlog" : {
"level" : "info",
"threshold" : {
"fetch" : {
"info" : "500ms"
},
"query" : {
"info" : "1s"
}
}
}
},
"number_of_shards" : "8",
"merge" : {
"scheduler" : {
"max_thread_count" : "2"
}
}
}
}
}
未完待续~