使用PageFilter分页效率比较低,应为每次都须要扫描前面的数据,直到扫描到所须要查的数据,可是查询下一页的时候能够直接利用上一页的rowkey来直接查出java
Filter是定义每次scan得出多少条记录,
下面看用PageFilter实现分页的(最好使用rowksy,不建议使用过滤器,过滤器效率过低,设计表的时候设计一个好的rowkey能够带来好多的便利的条件)apache
代码记录:dom
package hbasepage; import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.FilterList; import org.apache.hadoop.hbase.filter.PageFilter; import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; public class Pager { public static Configuration configuration; static { configuration = HBaseConfiguration.create(); configuration.set("hbase.zookeeper.property.clientPort", "2181"); configuration.set("hbase.zookeeper.quorum", "192.168.10.24,192.168.10.29,192.168.10.64"); configuration.set("hbase.master", "192.168.10.120:60000"); } private String tableName; private static HTable hTable; private static String startRow = null; private static List list = null; public Pager(String tableName) { try { this.hTable = new HTable(configuration, tableName.getBytes()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } public static List getLast(int pageNum, int pageSize){ getPage(pageNum-1,pageSize); return null; } /** * 取得下一页 这个类是接着getPage来用 * @param pageSize 分页的大小 * @return 返回分页数据 */ public static List getNext(int pageSize) throws Exception{ Filter filter = new PageFilter(pageSize +1); Scan scan = new Scan(); scan.setFilter(filter); scan.setStartRow(startRow.getBytes()); ResultScanner result = hTable.getScanner(scan); Iterator iterator = result.iterator(); list = new ArrayList<>(); int count = 0; for(Result r:result){ count++; if (count==pageSize + 1) { startRow = new String(r.getRow()); scan.setStartRow(startRow.getBytes()); System.out.println("startRow" + startRow); break; }else{ list.add(r); } startRow = new String(r.getRow()); System.out.println(startRow); //把 r的全部的列都取出来 key-value age-20 System.out.println(count); } return list; } // pageNum = 3 pageSize = 10 public static void getPage(int pageNum, int pageSize) { System.out.println("hahha"); // int pageNow = 0; // TODO 这个filter究竟是干吗的? Filter page = new PageFilter(pageSize + 1); int totalSize = pageNum * pageSize; Scan scan = new Scan(); scan.setFilter(page); //pageNum = 3 须要扫描3页 for (int i = 0; i < pageNum; i++) { try { ResultScanner rs = hTable.getScanner(scan); int count = 0; for (Result r : rs) { count++; if (count==pageSize + 1) { startRow = new String(r.getRow()); scan.setStartRow(startRow.getBytes()); System.out.println("startRow" + startRow); break; } startRow = new String(r.getRow()); System.out.println(startRow); //把 r的全部的列都取出来 key-value age-20 for (KeyValue keyValue : r.list()) { System.out.println("列:" + new String(keyValue.getQualifier()) + "====值:" + new String(keyValue.getValue())); } System.out.println(count); } if (count < pageSize) { break; } } catch (IOException e) { e.printStackTrace(); } } } }
下面写一点经常使用的fiter过滤器oop
HBase为筛选数据提供了一组过滤器,经过这个过滤器能够在HBase中的数据的多个维度(行,列,数据版本)上进行对数据的筛选操做,也就是说过滤器最终可以筛选的数据可以细化到具体的一个存储单元格上(由行键,列明,时间戳定位)。一般来讲,经过行键,值来筛选数据的应用场景较多。性能
Filter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(“row1”))); // OK 筛选出匹配的全部的行
Filter pf = new PrefixFilter(Bytes.toBytes(“row”)); // OK 筛选匹配行键的前缀成功的行
Filter kof = new KeyOnlyFilter(); // OK 返回全部的行,但值全是空
Filter rrf = new RandomRowFilter((float) 0.8); // OK 随机选出一部分的行
Filter isf = new InclusiveStopFilter(Bytes.toBytes(“row1”)); // OK 包含了扫描的上限在结果以内
Filter fkof = new FirstKeyOnlyFilter(); // OK 筛选出第一个每一个第一个单元格
Filter cpf = new ColumnPrefixFilter(Bytes.toBytes(“qual1”)); // OK 筛选出前缀匹配的列
Filter vf = new ValueFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(“ROW2_QUAL1”)); // OK 筛选某个(值的条件知足的)特定的单元格
Filter ccf = new ColumnCountGetFilter(2); // OK 若是忽然发现一行中的列数超过设定的最大值时,整个扫描操做会中止
SingleColumnValueFilter scvf = new SingleColumnValueFilter( Bytes.toBytes(“colfam1”), Bytes.toBytes(“qual2”), CompareFilter.CompareOp.NOT_EQUAL, new SubstringComparator(“BOGUS”)); scvf.setFilterIfMissing(false); scvf.setLatestVersionOnly(true); // OK
Filter skf = new SkipFilter(vf); // OK 发现某一行中的一列须要过滤时,整个行就会被过滤掉
Filter wmf = new WhileMatchFilter(rf); // OK 相似于Python itertools中的takewhile
List filters = new ArrayList(); filters.add(rf); filters.add(vf); FilterList fl = new FilterList(FilterList.Operator.MUST_PASS_ALL, filters); // OK 综合使用多个过滤器, AND 和 OR 两种关系
以上,是对于HBase内置的过滤器的部分总结优化