我最喜欢直接贴代码
es 8.2.2
<dependency>
<groupId>co.elastic.clients</groupId>
<artifactId>elasticsearch-java</artifactId>
<version>8.2.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-client</artifactId>
<version>8.2.2</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-x-content</artifactId>
<version>8.2.2</version>
</dependency>
<dependency>
<groupId>com.github.zengde</groupId>
<artifactId>lire</artifactId>
<version>1.0b2</version>
</dependency>
// 获取csclient
private static ElasticsearchClient createElasticsearchClient() {
String host = "10.6.30.139";
Integer port = 9200;
RestClient restClient = RestClient.builder(new HttpHost(host, port, "http")).build();
ElasticsearchTransport transport = new RestClientTransport(restClient, new JacksonJsonpMapper());
ElasticsearchClient client = new ElasticsearchClient(transport);
System.out.println(client);
return client;
}
// 新建索引 dims指定144维度
private static void createIndex() throws IOException {
String mapping = "{" +
" \"mappings\": {" +
/* " \"product\":{" +*/
" \"properties\": {" +
" \"imageVector\": {" +
" \"type\": \"dense_vector\"," +
" \"dims\": 144," +
" \"index\": true,\n" +
" \"similarity\": \"dot_product\"" +
" }," +
" \"imageUrl\": {" +
" \"type\": \"text\"" +
" }" +
" }" +
/*" }" +*/
" }" +
"}";
// 额外指定 index 为 true 是因为,为了实现 _knn_search,ES 必须在底层构建一个新的数据结构(目前使用的是 HNSW graph )。
// similarity 指定向量相似度算法,可以是 l2_norm 、dot_product、cosine
CreateIndexRequest request = new CreateIndexRequest.Builder().index(INDEX_NAME).withJson(IOUtils.toInputStream(mapping, StandardCharsets.UTF_8)).build();
CreateIndexResponse response = client.indices().create(request);
System.out.println(response);
}
//根据图片地址返回144维度cedd向量
private static double[] imageVector(String url) throws Exception {
url = OssUtils.processPic(url, "?x-oss-process=image/resize,m_lfit,h_250,w_250");
System.out.println(url);
BufferedImage img = ImageIO.read(new URL(url));
CEDD lireFeature = new CEDD();
lireFeature.extract(img);
byte[] byteHistogram = lireFeature.getByteHistogram();
printArray(byteHistogram);
double[] ds = SerializationUtils.castToDoubleArray(byteHistogram);
printArray(ds);
return ds;
}
//根据图片地址返回144维度cedd向量
private static List<Double> imageVectorList(String url) throws Exception {
double[] searchVector = imageVector(url);
List<Double> vectors = new ArrayList<>();
for (double d : searchVector) {
vectors.add(d);
}
return vectors;
}
// 增加索引
public static void index(String url, Long productId) throws Exception {
Map<String, Object> doc = new HashMap();
doc.put("imageVector", imageVector(url));
doc.put("imageUrl", OssUtils.processPic(url, ""));
IndexRequest<Object> request = new IndexRequest.Builder<>().index(INDEX_NAME).id(productId + "").document(doc).build();
IndexResponse response = client.index(request);
}
//搜索
public static Pager<HashMap> search(String url, Integer pageNum, Integer pageSize) throws Exception {
Pager<HashMap> pager = new Pager(pageNum, pageSize);
List<Double> vectors = imageVectorList(url);
InlineScript inlineScript = new InlineScript.Builder()
.source("cosineSimilarity(params.query_vector, 'imageVector') + 1.0")
.params("query_vector", JsonData.of(vectors)).build();
Script script = new Script.Builder().inline(inlineScript).build();
ScriptScoreQuery scriptScoreQuery = new ScriptScoreQuery.Builder().script(script).query(new MatchAllQuery.Builder().build()._toQuery()).build();
SearchRequest request = new SearchRequest.Builder().index(INDEX_NAME).from(pager.getFirstResult()).size(pager.getPageSize())
.query(scriptScoreQuery._toQuery()).build();
SearchResponse<HashMap> response = client.search(request, HashMap.class);
List<HashMap> datas = new ArrayList<>();
for (Hit<HashMap> hit : response.hits().hits()) {
// System.out.println(hit.id() + " -- " + hit.score() + " -- " + hit.source());
HashMap data = hit.source();
data.put("id", hit.id());
data.put("score", hit.score());
datas.add(data);
}
pager.setDatas(datas);
return pager;
}