package api; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; public class Utils { public static FileSystem HDFS() throws Exception{ Configuration conf = new Configuration(); conf.set("fs.defaultFS","hdfs://potter2:9000"); System.setProperty("HADOOP_USER_NAME", "potter"); FileSystem fs = FileSystem.get(conf); return fs; } }
package api; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.junit.Test; /** * 删除HDFS集群中的全部空文件和空目录 * @author Administrator * */ public class Empty { @Test public void tt() throws Exception { Path path = new Path("/"); Empty1(path); } public static void Empty1(Path path) throws Exception { FileSystem fs = Utils.HDFS(); //当是空文件时,判断当前路径下有几个空文件夹 FileStatus[] listStatus = fs.listStatus(path); System.out.println(listStatus.length+"********"); //当根目录没有文件的时候会进入if里面 if (listStatus.length == 0) { fs.delete(path,true); } System.out.println("删除成功xxxxx"); //迭代器用于遍历 RemoteIterator<LocatedFileStatus> listLocatedStatus = fs.listLocatedStatus(path); while (listLocatedStatus.hasNext()) { LocatedFileStatus next = listLocatedStatus.next(); //输出文件夹的目录 Path currentPath = next.getPath(); System.out.println(currentPath+"1111111"); //输出上面文件夹的父亲目录 Path parent = next.getPath().getParent(); System.out.println(parent+"2222222"); if (next.isDirectory()) { //若是是空文件夹 if (fs.listStatus(currentPath).length == 0) { //删除掉 fs.delete(currentPath,true); }else { //不是空文件夹,那么继续遍历 if (fs.exists(currentPath)) { Empty1(next.getPath()); } } //若是是文件 }else { //获取文件的长度 long fileLength = next.getLen(); //当文件是空文件时,删除 if (fileLength ==0) { fs.delete(currentPath,true); } } int length = fs.listStatus(parent).length; if (length ==0) { fs.delete(parent,true); } } } }