一、Elasticsearch分词
分词分为读时分词和写时分词。
1.1 读时分词
读时分词发生在用户查询时,ES 会即时地对用户输入的关键词进行分词,分词结果只存在内存中,当查询结束时,分词结果也会随即消失。
1.2 写时分词
写时分词发生在文档写入时,ES 会对文档进行分词后,将结果存入倒排索引,该部分最终会以文件的形式存储于磁盘上,不会因查询结束或者ES重启而丢失。
写时分词器需要在Mapping中指定,而且一经指定就不能再修改,若要修改必须新建索引,但Elasticsearch可以用Put Mapping API新增字段。
1.3 分词处理器
分词一般在ES中有分词器处理,英文为Analyzer,它决定了分词的规则,ES默认自带了很多分词器,如:Standard、english、Keyword、Whitespace等等。默认的分词器为Standard,通过它们各自的功能可组合成你想要的分词规则。分词器具体详情可查看官网:分词器
另外,在常用的中文分词器、拼音分词器、繁简体转换插件。国内用的就多的分别是:
1.4 安装分词器
下载与ES对应版本的中文分词器。将解压后的后的文件夹放入ES根目录下的plugins/ik目录下(ik目录要手动创建),重启ES即可使用。
二、集成SpringBoot实现拼音、简体、繁体搜索
2.1 示例代码
2.1.1 学校实体类
@Data
public class EsSchool implements Serializable {
//学校ID
private String schoolId;
//学校简称
private String school;
//学校名(全称)
private String schoolFullName;
//学校名(繁体)
private String nameHk;
//学校编码
private String code;
}
2.1.2 创建文档
@Slf4j
@Service
public class EsSchoolServiceImpl implements EsSchoolService{
@Resource
protected RestHighLevelClient client;
protected static final RequestOptions COMMON_OPTIONS;
static {
RequestOptions.Builder builder = RequestOptions.DEFAULT.toBuilder();
// 默认缓冲限制为100MB,此处修改为30MB。
builder.setHttpAsyncResponseConsumerFactory(new HttpAsyncResponseConsumerFactory.HeapBufferedResponseConsumerFactory(30 * 1024 * 1024));
COMMON_OPTIONS = builder.build();
}
@Override
public Boolean createIndex(String index){
XContentBuilder setting = packageSetting();
XContentBuilder mapping = packageMapping();
return super.createIndexSetting(index,setting,mapping);
}
private XContentBuilder packageMapping(){
XContentBuilder mapping = null;
try {
//创建索引Mapping
mapping = XContentFactory.jsonBuilder()
.startObject()
.field("dynamic", true)
.startObject("properties")
//id
.startObject("id")
.field("type", "long")
.field("index", false)
.endObject()
//学校ID
.startObject("code")
.field("type", "keyword")
.endObject()
//学校简称
.startObject("school")
.field("type", "text")
.field("analyzer", "ikSearchAnalyzer")
.field("search_analyzer", "ikSearchAnalyzer")
.startObject("fields")
.startObject("pinyin")
.field("type", "text")
.field("index", true)
.field("analyzer", "pinyinFullIndexAnalyzer")
.endObject()
.endObject()
.endObject()
//学校全称
.startObject("schoolFullName")
.field("type", "text")
.field("analyzer", "ikSearchAnalyzer")
.field("search_analyzer", "ikSearchAnalyzer")
.startObject("fields")
.startObject("pinyin")
.field("type", "text")
.field("index", true)
.field("analyzer", "pinyinFullIndexAnalyzer")
.endObject()
.endObject()
.endObject()
//学校名-繁体
.startObject("nameHk")
.field("type", "text")
.field("analyzer", "ikSearchAnalyzer")
.field("search_analyzer", "ikSearchAnalyzer")
.startObject("fields")
.startObject("pinyin")
.field("type", "text")
.field("index", true)
.field("analyzer", "pinyinFullIndexAnalyzer")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
log.error(e.getMessage());
}
return mapping;
}
/**
* 创建索引setting
*
* ngram分词器配置
* ngram:英文单词按字母分词
* field("filter","lowercase"):大小写兼容搜索
*
* index.max_ngram_diff: 允许min_gram、max_gram的差值
* https://www.elastic.co/guide/en/elasticsearch/reference/6.8/analysis-ngram-tokenizer.html
*
* normalizer:解决keyword区分大小写
* https://www.elastic.co/guide/en/elasticsearch/reference/6.0/normalizer.html
*
* 拼音搜索
* https://github.com/medcl/elasticsearch-analysis-pinyin
*
* 简体繁华转换
* https://github.com/medcl/elasticsearch-analysis-stconvert
*
* 样例
* https://blog.csdn.net/qq_39211866/article/details/85178707
*
* @return
*/
protected XContentBuilder packageSetting() {
XContentBuilder setting = null;
try {
setting = XContentFactory.jsonBuilder()
.startObject()
.field("index.max_ngram_diff","5")
.startObject("analysis")
.startObject("filter")
.startObject("edge_ngram_filter")
.field("type","edge_ngram")
.field("min_gram","1")
.field("max_gram","50")
.endObject()
.startObject("pinyin_edge_ngram_filter")
.field("type","edge_ngram")
.field("min_gram",1)
.field("max_gram",50)
.endObject()
//简拼
.startObject("pinyin_simple_filter")
.field("type","pinyin")
.field("keep_first_letter",true)
.field("keep_separate_first_letter",false)
.field("keep_full_pinyin",false)
.field("keep_original",false)
.field("limit_first_letter_length",50)
.field("lowercase",true)
.endObject()
//全拼
.startObject("pinyin_full_filter")
.field("type","pinyin")
.field("keep_first_letter",false)
.field("keep_separate_first_letter",false)
.field("keep_full_pinyin",true)
.field("keep_original",false)
.field("limit_first_letter_length",50)
.field("lowercase",true)
.endObject()
.endObject()
//简2繁
.startObject("char_filter")
.startObject("tsconvert")
.field("type","stconvert")
.field("convert_type","t2s")
.endObject()
.endObject()
.startObject("analyzer")
.startObject("ngram")
.field("tokenizer","my_tokenizer")
.field("filter","lowercase")
.endObject()
//ik+简体、繁体转换
.startObject("ikSearchAnalyzer")
.field("type","custom")
.field("tokenizer","ik_max_word")
.field("char_filter","tsconvert")
.endObject()
//简拼搜索
.startObject("pinyinSimpleIndexAnalyzer")
.field("type","custom")
.field("tokenizer","keyword")
.array("filter","pinyin_simple_filter","pinyin_edge_ngram_filter","lowercase")
.endObject()
//全拼搜索
.startObject("pinyinFullIndexAnalyzer")
.field("type","custom")
.field("tokenizer","keyword")
.array("filter","pinyin_full_filter","lowercase")
.endObject()
.endObject()
.startObject("tokenizer")
.startObject("my_tokenizer")
.field("type","ngram")
.field("min_gram","1")
.field("max_gram","3")
.endObject()
.endObject()
.startObject("normalizer")
.startObject("lowercase")
.field("type","custom")
.field("filter","lowercase")
.endObject()
.endObject()
.endObject()
.endObject();
} catch (Exception e) {
log.error(e.getMessage());
}
return setting;
}
protected Boolean createIndexSetting(String indexName, XContentBuilder settings, XContentBuilder mapping) {
Boolean is = false;
try {
CreateIndexRequest request = buildCreateIndexRequest(indexName);
if (settings != null) {
request.settings(settings);
}
if (mapping != null) {
request.mapping(mapping);
}
//获取索引客户端
IndicesClient indices = client.indices();
//创建索引
CreateIndexResponse response = indices.create(request, COMMON_OPTIONS);
log.info("是否所有节点都已确认请求: " + response.isAcknowledged());
log.info("指示是否在超时之前为索引中的每个分片启动了必要数量的分片副本: " + response.isShardsAcknowledged());
is = response.isAcknowledged();
} catch (Exception e) {
e.printStackTrace();
}
return is;
}
protected CreateIndexRequest buildCreateIndexRequest(String indexName) {
//创建索引请求
CreateIndexRequest request = new CreateIndexRequest(indexName);
//设置索引
Settings.Builder builder = Settings.builder();
builder.put("index.number_of_shards", 1);
builder.put("index.number_of_replicas", 0);
request.settings(builder);
return request;
}
protected Boolean deleteIndex(String indexName) {
Boolean is = false;
try {
//创建索引请求
DeleteIndexRequest request = buildDeleteIndexRequest(indexName);
//获取索引客户端
IndicesClient indices = client.indices();
AcknowledgedResponse response = indices.delete(request, COMMON_OPTIONS);
log.info("是否所有节点都已确认请求: " + response.isAcknowledged());
is = false;
} catch (IOException e) {
e.printStackTrace();
}
return is;
}
protected IndexResponse insertDoc(String indexName, String docId, Object docObj) {
IndexRequest indexRequest = buildInsertDocRequest(indexName, docId, docObj);
try {
return client.index(indexRequest, COMMON_OPTIONS);
} catch (IOException e) {
e.printStackTrace();
}
}
protected UpdateResponse updateDoc(String indexName, String docId, Object docObj) {
UpdateRequest request = buildUpdDocRequest(indexName, docId, docObj);
try {
request.docAsUpsert(true);
return client.update(request, COMMON_OPTIONS);
} catch (IOException e) {
e.printStackTrace();
}
}
protected DeleteResponse deleteDoc(String indexName, String docId) {
DeleteRequest deleteRequest = new DeleteRequest(indexName, docId);
try {
return client.delete(deleteRequest, COMMON_OPTIONS);
} catch (IOException e) {
e.printStackTrace();
}
}
}
2.1.3 搜索文档
@Slf4j
@Service
public class EsSchoolServiceImpl implements EsSchoolService {
public List<EsSchool> searchSchool(String keyword, Integer currPage, Integer pageSize){
PageSearchVO pageSearchVO = new PageSearchVO();
pageSearchVO.setIndex("es_school");
pageSearchVO.setPageNo(currPage);
pageSearchVO.setPageSize(pageSize);
//不同时匹配中、英文(避免同音字问题)
if(StringUtil.isNotBlank(keyword)){
pageSearchVO.setKeyword(keyword);
boolean isChinese = PinyinUtil.isChinese(keyword.charAt(0));
if(isChinese){
pageSearchVO.setFieldList(Arrays.asList("school","schoolFullName"));
}else{
pageSearchVO.setFieldList(Arrays.asList("school.pinyin","schoolFullName.pinyin"));
}
}
//只查询指定的学校
List<String> schoolIds = listSchoolIds();
schoolMatchQueryPage(pageSearchVO,schoolIds);
}
public List<EsSchool> schoolMatchQueryPage(PageSearchVO pageSearchVO,List<String> schoolIds){
List<EsSchool> resultList = Lists.newArrayList();
String index = pageSearchVO.getIndex();
Integer pageNo = pageSearchVO.getPageNo();
Integer pageSize = pageSearchVO.getPageSize();
String keyword = pageSearchVO.getKeyword();
String orderField = pageSearchVO.getOrderField();
String orderBy = pageSearchVO.getOrderBy();
List<String> fieldList = pageSearchVO.getFieldList();
SearchRequest request = new SearchRequest(index);
SearchSourceBuilder builder = new SearchSourceBuilder();
QueryBuilder matchAllQuery = QueryBuilders.matchAllQuery();
builder.query(matchAllQuery);
Integer from = (pageNo - 1) * pageSize;
builder.from(from);
builder.size(pageSize);
BoolQueryBuilder queryBuilder = QueryBuilders.boolQuery();
if(CollectionUtil.isNotEmpty(schoolIds)){
//类似于MySQL的in
queryBuilder.filter(QueryBuilders.termsQuery("schoolId", schoolIds));
}
if (StringUtils.isNotBlank(keyword)) {
MultiMatchQueryBuilder multiMatchQueryBuilder = QueryBuilders.multiMatchQuery(keyword, ArrayUtil.toArray(fieldList, String.class));
//精确查询(关键字不分词查询)
multiMatchQueryBuilder.operator(Operator.AND);
queryBuilder.filter(multiMatchQueryBuilder);
}
if (StringUtils.isNotBlank(orderField) && StringUtils.isNotBlank(orderBy)) {
FieldSortBuilder order = new FieldSortBuilder(orderField);
if (orderBy.equals("ASC")) {
order.order(SortOrder.ASC);
} else {
order.order(SortOrder.DESC);
}
builder.sort(order);
}
builder.query(queryBuilder);
request.source(builder);
SearchResponse response;
try{
response = client.search(request, RequestOptions.DEFAULT);
}catch (Exception e){
e.printStackTrace();
log.error(e.getMessage());
return null;
}
log.info("DSL:" + builder.toString());
for (SearchHit hit : response.getHits()) {
String resultString = hit.getSourceAsString();
resultList.add(JSONObject.parseObject(resultString, EsSchool.class));
}
// 处理分页结果
if (pageSearchVO != null) {
long totalNum = response.getHits().getTotalHits().value;
long totalPage = totalNum / pageSearchVO.getPageSize() + 1;
pageSearchVO.setTotalNum(totalNum);
pageSearchVO.setTotalPage(totalPage);
}
return resultList;
}
}
2.2 查看学校文档结构
查询结构
GET qmschool/_mapping
结构结果
{
"qmschool" : {
"mappings" : {
"dynamic" : "true",
"properties" : {
"code" : {
"type" : "keyword"
},
"id" : {
"type" : "long",
"index" : false
},
"nameHk" : {
"type" : "text",
"fields" : {
"pinyin" : {
"type" : "text",
"analyzer" : "pinyinFullIndexAnalyzer"
}
},
"analyzer" : "ikSearchAnalyzer"
},
"school" : {
"type" : "text",
"fields" : {
"pinyin" : {
"type" : "text",
"analyzer" : "pinyinFullIndexAnalyzer"
}
},
"analyzer" : "ikSearchAnalyzer"
},
"schoolFullName" : {
"type" : "text",
"fields" : {
"pinyin" : {
"type" : "text",
"analyzer" : "pinyinFullIndexAnalyzer"
}
},
"analyzer" : "ikSearchAnalyzer"
},
"schoolId" : {
"type" : "text",
"fields" : {
"keyword" : {
"type" : "keyword",
"ignore_above" : 256
}
}
}
}
}
}
}
2.3 简体、繁体查询
2.3.1 简体查询
GET qmschool/_search
{
"from": 0,
"size": 50,
"query": {
"bool": {
"filter": [
{
"multi_match": {
"query": "北京大学",
"fields": [
"school^1.0",
"schoolFullName^1.0"
],
"type": "best_fields",
"operator": "AND",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
}
}
简体查询结果
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538563",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學",
"schoolFullName" : "北京大学附属中学",
"schoolId" : "200836953538563"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "169981555834898",
"_score" : 0.0,
"_source" : {
"code" : "4111010001",
"nameHk" : "",
"school" : "北大",
"schoolFullName" : "北京大学",
"schoolId" : "169981555834898"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953541924",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學河南分校",
"schoolFullName" : "北京大学附属中学河南分校",
"schoolId" : "200836953541924"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953543280",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學南寧附屬實驗學校",
"schoolFullName" : "北京大学南宁附属实验学校",
"schoolId" : "200836953543280"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953543981",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學雲南實驗學校",
"schoolFullName" : "北京大学附属中学云南实验学校",
"schoolId" : "200836953543981"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953544603",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學新疆分校",
"schoolFullName" : "北京大学附属中学新疆分校",
"schoolId" : "200836953544603"
}
}
]
}
}
2.3.2 繁体查询
GET qmschool/_search
{
"from": 0,
"size": 50,
"query": {
"bool": {
"filter": [
{
"multi_match": {
"query": "北京大學",
"fields": [
"school^1.0",
"schoolFullName^1.0"
],
"type": "best_fields",
"operator": "AND",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
}
}
繁体查询结果(与简体查询结果一样)
{
"took" : 0,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538563",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學",
"schoolFullName" : "北京大学附属中学",
"schoolId" : "200836953538563"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "169981555834898",
"_score" : 0.0,
"_source" : {
"code" : "4111010001",
"nameHk" : "",
"school" : "北大",
"schoolFullName" : "北京大学",
"schoolId" : "169981555834898"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953541924",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學河南分校",
"schoolFullName" : "北京大学附属中学河南分校",
"schoolId" : "200836953541924"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953543280",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學南寧附屬實驗學校",
"schoolFullName" : "北京大学南宁附属实验学校",
"schoolId" : "200836953543280"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953543981",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學雲南實驗學校",
"schoolFullName" : "北京大学附属中学云南实验学校",
"schoolId" : "200836953543981"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953544603",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學新疆分校",
"schoolFullName" : "北京大学附属中学新疆分校",
"schoolId" : "200836953544603"
}
}
]
}
}
2.4 拼音查询
GET qmschool/_search
{
"from": 0,
"size": 50,
"query": {
"bool": {
"filter": [
{
"multi_match": {
"query": "beijingdaxue",
"fields": [
"school.pinyin^1.0",
"schoolFullName.pinyin^1.0"
],
"type": "best_fields",
"operator": "AND",
"slop": 0,
"prefix_length": 0,
"max_expansions": 50,
"zero_terms_query": "NONE",
"auto_generate_synonyms_phrase_query": true,
"fuzzy_transpositions": true,
"boost": 1.0
}
}
],
"adjust_pure_negative": true,
"boost": 1.0
}
}
}
拼音查询结果
{
"took" : 2,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 54,
"relation" : "eq"
},
"max_score" : 0.0,
"hits" : [
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538563",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學",
"schoolFullName" : "北京大学附属中学",
"schoolId" : "200836953538563"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538564",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京工業大學附屬中學",
"schoolFullName" : "北京工业大学附属中学",
"schoolId" : "200836953538564"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538565",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京航空航太大學附屬中學",
"schoolFullName" : "北京航空航天大学附属中学",
"schoolId" : "200836953538565"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538568",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京理工大學附屬中學",
"schoolFullName" : "北京理工大学附属中学",
"schoolId" : "200836953538568"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538569",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京師範大學第二附屬中學",
"schoolFullName" : "北京师范大学第二附属中学",
"schoolId" : "200836953538569"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538573",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京師範大學密雲實驗中學",
"schoolFullName" : "北京师范大学密云实验中学",
"schoolId" : "200836953538573"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953538574",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京師範大學燕化附屬中學",
"schoolFullName" : "北京师范大学燕化附属中学",
"schoolId" : "200836953538574"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "179981555836941",
"_score" : 0.0,
"_source" : {
"code" : "4145013890",
"nameHk" : "",
"schoolFullName" : "北京航空航天大学北海学院",
"schoolId" : "179981555836941"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "169981555835017",
"_score" : 0.0,
"_source" : {
"code" : "4112013898",
"nameHk" : "",
"school" : "北科大天津学院",
"schoolFullName" : "北京科技大学天津学院",
"schoolId" : "169981555835017"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "169981555834910",
"_score" : 0.0,
"_source" : {
"code" : "4111010013",
"nameHk" : "",
"school" : "北邮",
"schoolFullName" : "北京邮电大学",
"schoolId" : "169981555834910"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "169981555834912",
"_score" : 0.0,
"_source" : {
"code" : "4111010016",
"nameHk" : "",
"school" : "北建大",
"schoolFullName" : "北京建筑大学",
"schoolId" : "169981555834912"
}
},
{
"_index" : "qmschool",
"_type" : "_doc",
"_id" : "200836953541924",
"_score" : 0.0,
"_source" : {
"nameHk" : "北京大學附屬中學河南分校",
"schoolFullName" : "北京大学附属中学河南分校",
"schoolId" : "200836953541924"
}
}
]
}
}
由于拼音有同音字,因此用拼音查询的结果会更多
三、拓展-使用@Mapping、@Setting注解配置Mapping、Settings
SpringBoot 有为我们提供多种方式设置mapping,我们可以使用@Mapping注解配置,使用ES原生的方式进行设置更加直观。
3.1 实体类
@Data
@Document(indexName = "film-entity", type = "film")
@Setting(settingPath = "/json/film-setting.json")
@Mapping(mappingPath = "/json/film-mapping.json")
public class FilmEntity {
@Id
private Long id;
// @Field(type = FieldType.Text, searchAnalyzer = "ik_max_word", analyzer = "ik_smart")
private String name;
private String nameOri;
private String publishDate;
private String type;
private String language;
private String fileDuration;
private String director;
}
3.2 film-mapping.json
{
"film": {
"_all": {
"enabled": true
},
"properties": {
"id": {
"type": "integer"
},
"name": {
"type": "text",
"analyzer": "ikSearchAnalyzer",
"search_analyzer": "ikSearchAnalyzer",
"fields": {
"pinyin": {
"type": "text",
"analyzer": "pinyinSimpleIndexAnalyzer",
"search_analyzer": "pinyinSimpleIndexAnalyzer"
}
}
},
"nameOri": {
"type": "text"
},
"publishDate": {
"type": "text"
},
"type": {
"type": "text"
},
"language": {
"type": "text"
},
"fileDuration": {
"type": "text"
},
"director": {
"type": "text",
"index": "true",
"analyzer": "ikSearchAnalyzer"
},
"created": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis"
}
}
}
}
3.3 film-setting.json
{
"index": {
"analysis": {
"filter": {
"edge_ngram_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 50
},
"pinyin_simple_filter": {
"type": "pinyin",
"first_letter": "prefix",
"padding_char": " ",
"limit_first_letter_length": 50,
"lowercase": true
}
},
"char_filter": {
"tsconvert": {
"type": "stconvert",
"convert_type": "t2s"
}
},
"analyzer": {
"ikSearchAnalyzer": {
"type": "custom",
"tokenizer": "ik_max_word",
"char_filter": [
"tsconvert"
]
},
"pinyinSimpleIndexAnalyzer": {
"tokenizer": "keyword",
"filter": [
"pinyin_simple_filter",
"edge_ngram_filter",
"lowercase"
]
}
}
}
}
}
四、资料来源
SpringBoot集成Elasticsearch 进阶,实现中文、拼音分词,繁简体转换高级搜索
ElasticSerach6.0.1测试拼音分词器,IK分词器,并且次测试语法