因为数据量很是大,客户须要对过时的数据进行清理,例如:hbase表中有2017年与2016年的数据,如今须要将2016的数据进行清理,即批删除操做。又由于hbase在删除方面较弱,提供单行删除功能。html
本次操做的数据如图1所示,能够发现,表rowkey中含有日期时间,因此在批量删除时采用rowkey的时间来过滤。java
代码初版,以A表为例,删除2016年的数据,则在下面代码中的arg[0]设为:2016便可shell
package Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class ClearDatafromHBase { private static Configuration config = HBaseConfiguration.create(); private static HTable tableEvent = null; /** * Perparing clear data */ public static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<Delete>(); try { for (Result result : rs) { Delete delete = new Delete(result.getRow()); list.add(delete); } } finally { rs.close(); } return list; } /** * Clear data from event */ public static void deleteRowkeyOfEvent(String string) { try { tableEvent = new HTable(config, "A"); RowFilter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(string)); Scan scan = new Scan(); scan.setFilter(rf); ResultScanner scanner = tableEvent.getScanner(scan); List<Delete> list = getDeleteList(scanner); if (list.size() > 0) { tableEvent.delete(list); } } catch (IOException e) { e.printStackTrace(); } finally { if (null != tableEvent) { try { tableEvent.close(); } catch (IOException e) { e.printStackTrace(); } } } } public static void main(String[] args) throws IOException { ClearDatafromHBase dh = new ClearDatafromHBase(); String string = args[0]; dh.deleteRowkeyOfEvent(string); } }
代码第二版,可以制定表名和时间,运行方法与初版相同,只是参数[0]表示表名,参数[1]表示时间apache
package Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.*; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.filter.SubstringComparator; import org.apache.hadoop.hbase.util.Bytes; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; /** * Created by lihao on 2017/8/9. * Clear the databases of timeout from hbase */ public class ClearDatafromHBase { private static String nsPrefix = "razor:"; private static Logger logger = LoggerFactory.getLogger(ClearDatafromHBase.class); private static HConnection hBaseConn; private static Configuration config = null; private static HTable tableEvent1 = null; private static HTableInterface table = null; // private static HTableInterface tableError = null; // private static HTableInterface tableClientdata = null; // private static HTableInterface tableUsinglog = null; static { try { config = HBaseConfiguration.create(); hBaseConn = HConnectionManager.createConnection(config); } catch (IOException e) { e.printStackTrace(); } } public static void init(String tableName) throws IOException { table = hBaseConn.getTable(tableName); TableName name = table.getName(); System.out.println(name); } /** * Perparing clear data */ public static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<Delete>(); try { for (Result result : rs) { Delete delete = new Delete(result.getRow()); list.add(delete); } } finally { rs.close(); } return list; } public static Scan getScannerByRowkey(String string) { Scan scan = new Scan(); RowFilter rf = new RowFilter(CompareFilter.CompareOp.EQUAL, new SubstringComparator(string)); scan.setFilter(rf); return scan; } public void clearData(Scan scan, HTableInterface table, String date) { ResultScanner resultScan = null; try { resultScan = table.getScanner(scan); List<Delete> list = getDeleteList(resultScan); if (list.size() > 0) { table.delete(list); } } catch (IOException e) { e.printStackTrace(); } finally { if (null != table) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } } public void deleteOfHbase(String string) { clearData(getScannerByRowkey(string), table, string); } public static void close(HTableInterface table) { if (table != null) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } } public static void main(String[] args) { String tablename = nsPrefix+args[0]; String time = args[1]; if (args == null || args.length <= 0) { logger.error("输入参数错误"); throw new RuntimeException("输入参数错误"); } logger.info("开始清理数据"); try { init(tablename); ClearDatafromHBase dh = new ClearDatafromHBase(); dh.deleteOfHbase(time); logger.info("数据清理结束"); } catch (IOException e) { logger.error("清理数据失败", e); e.printStackTrace(); } finally { close(table); } } }
如何rowkey中不含有时间标志的话,能够根据时间戳的范围进行删除。api
public static void deleteTimeRange(String tableName, Long minTime, Long maxTime) { Table table = null; Connection connection = null; try { Scan scan = new Scan(); scan.setTimeRange(minTime, maxTime); connection = HBaseOperator.getHbaseConnection(); table = connection.getTable(TableName.valueOf(tableName)); ResultScanner rs = table.getScanner(scan); List<Delete> list = getDeleteList(rs); if (list.size() > 0) { table.delete(list); } } catch (Exception e) { e.printStackTrace(); } finally { if (null != table) { try { table.close(); } catch (IOException e) { e.printStackTrace(); } } if (connection != null) { try { connection.close(); } catch (IOException e) { e.printStackTrace(); } } } } private static List<Delete> getDeleteList(ResultScanner rs) { List<Delete> list = new ArrayList<>(); try { for (Result r : rs) { Delete d = new Delete(r.getRow()); list.add(d); } } finally { rs.close(); } return list; }
参考文献:工具
这里是列表文本hbase链接对表的操做oop
hbase shell命令的使用.net
hbase filter的使用code
hbase容灾备份htm
hbase的mapreduce任务
本文仅工做之余所作第一版,后期会进行修改及更新操做,若有转载,请标明出处.