使用elasticsearch提高hbase基于列的查询效率 - blacklau的专栏 - 博客频道 - CSDN.NET http://blog.csdn.net/blacklau/article/details/39781803
网上能查到的Hbase提高基于列的查询效率基本上是建立二级索引的方法,介绍另外一种方法,使用分布式索引技术elasticsearch来提高效率,
基本思路和二级索引差不多,都是通过对要查询的列建立索引,先根据建立的列索引查询到rowkey,再根据rowkey查询到需要的数据,步骤如下:
- 安装elasticsearch,安装简单,网上查找安装方法
2.使用elasticsearch建立hbase的相关查询列的索引,值包括查询列与rowkey
3.使用ealsticsearch查询到列对应的rowkey值,再根据rowkey查询需要的信息
代码只包括第3步:
package hbase;import java.io.IOException;import java.util.ArrayList;import java.util.List;import org.apache.hadoop.hbase.HBaseConfiguration;import org.apache.hadoop.hbase.client.Get;import org.apache.hadoop.hbase.client.HTable;import org.apache.hadoop.hbase.client.Result;import org.apache.hadoop.hbase.util.Bytes;import org.elasticsearch.action.search.SearchResponse;import org.elasticsearch.action.search.SearchType;import org.elasticsearch.client.Client;import org.elasticsearch.client.transport.TransportClient;import org.elasticsearch.common.settings.ImmutableSettings;import org.elasticsearch.common.settings.Settings;import org.elasticsearch.common.transport.InetSocketTransportAddress;import org.elasticsearch.index.query.FilterBuilders;import org.elasticsearch.search.SearchHit;import org.elasticsearch.search.SearchHits;public class SearchWithElasticSearch {public static void main(String[] args) throws IOException{//elasticsearch相关设置Settings settings = ImmutableSettings.settingsBuilder() //指定集群名称 .put("cluster.name", "elasticsearch") .put("client.transport.sniff", true).build(); // 创建客户端, Client client = new TransportClient(settings) //ip改为自己的ip .addTransportAddress(new InetSocketTransportAddress("ip", 9300)); //elasticsearch查询,查询NAME为blacklau的记录SearchResponse response = client.prepareSearch("hbase") .setTypes("netflow") .setSearchType(SearchType.DFS_QUERY_THEN_FETCH) // NAME对应hbase表的列名 .setPostFilter(FilterBuilders.termFilter("NAME", "blacklau")) .execute() .actionGet();SearchHits shs = response.getHits();//根据查询到的rowkeys构建GetList<Get> gets = new ArrayList<Get>();for(SearchHit hit : shs){//ROWKEY对应hbase表的rowkeyString rowkey = (String)hit.getSource().get("ROWKEY");Get get = new Get(Bytes.toBytes(rowkey));gets.add(get);}if(gets.size() == 0) return;HTable table=new HTable(HBaseConfiguration.create(),"netflow"); Result[] rs=table.get(gets); //打印hbase表查询结果 for(Result r:rs){ System.out.println(r); } client.close(); table.close();}}