(1)理解HDFS在Hadoop体系结构中的角色。 (2)熟练使用HDFS操做经常使用的Shell命令。 (3)熟悉HDFS操做经常使用的Java API。java
操做系统:Linux。 Hadoop 版本:2.7.3 或以上版本。 JDK 版本:1.7 或以上版本。 Java IDE:IDEAshell
shell:apache
hadoop fs -put /User/Binguner/Desktop/test.txt /test
hadoop fs -appendToFile /User/Binguner/Desktop/test.txt /test/test.txt
hadoop fs -copyFromLocal -f /User/Binguner/Desktop/test.txt / input/test.txt
复制代码
/** * @param fileSystem * @param srcPath 本地文件地址 * @param desPath 目标文件地址 */
private static void test1(FileSystem fileSystem,Path srcPath, Path desPath){
try {
if (fileSystem.exists(new Path("/test/test.txt"))){
System.out.println("Do you want to overwrite the existed file? ( y / n )");
if (new Scanner(System.in).next().equals("y")){
fileSystem.copyFromLocalFile(false,true,srcPath,desPath);
}else {
FileInputStream inputStream = new FileInputStream(srcPath.toString());
FSDataOutputStream outputStream = fileSystem.append(new Path("/test/test.txt"));
byte[] bytes = new byte[1024];
int read = -1;
while ((read = inputStream.read(bytes)) > 0){
outputStream.write(bytes,0,read);
}
inputStream.close();
outputStream.close();
}
}else {
fileSystem.copyFromLocalFile(srcPath,desPath);
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
运行结果:编程
HDFS 中原来到文件列表:
缓存
第一次运行:app
HDFS 中文件列表:dom
第二次运行:oop
此时 HDFS 中的目录:学习
shell:大数据
hadoop fs -copyToLocal /input/test.txt /User/binguner/Desktop/test.txt
复制代码
/** * @param fileSystem * @param remotePath HDFS 中文件的地址 * @param localPath 本地要保存的文件的地址 */
private static void test2(FileSystem fileSystem,Path remotePath, Path localPath){
try {
if (fileSystem.exists(remotePath)){
fileSystem.copyToLocalFile(remotePath,localPath);
}else {
System.out.println("Can't find this file in HDFS!");
}
} catch (FileAlreadyExistsException e){
try {
System.out.println(localPath.toString());
fileSystem.copyToLocalFile(remotePath,new Path("src/test"+ new Random().nextInt()+".txt"));
} catch (IOException e1) {
e1.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
执行前本地目录:
第一次执行:
第二次执行:
shell:
hadoop fs -cat /test/test.txt
复制代码
/** * @param fileSystem * @param remotePath 目标文件地址 */
private static void test3(FileSystem fileSystem,Path remotePath){
try {
FSDataInputStream inputStream= fileSystem.open(remotePath);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line;
while ((line = bufferedReader.readLine()) != null){
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
运行结果:
shell:
hadoop fs -ls -h /test/test.txt
复制代码
/** * @param fileSystem * @param remotePath 目标文件地址 */
private static void test4(FileSystem fileSystem, Path remotePath){
try {
FileStatus[] fileStatus = fileSystem.listStatus(remotePath);
for (FileStatus status : fileStatus){
System.out.println(status.getPermission());
System.out.println(status.getBlockSize());
System.out.println(status.getAccessTime());
System.out.println(status.getPath());
}
} catch (IOException e) {
e.printStackTrace();
}
复制代码
运行结果:
shell:
hadoop fs -lsr -h /
复制代码
/** * @param fileSystem * @param remotePath 目标文件地址 */
private static void test5(FileSystem fileSystem, Path remotePath){
try {
RemoteIterator<LocatedFileStatus> iterator = fileSystem.listFiles(remotePath,true);
while (iterator.hasNext()){
FileStatus status = iterator.next();
System.out.println(status.getPath());
System.out.println(status.getPermission());
System.out.println(status.getLen());
System.out.println(status.getModificationTime());
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
运行结果:
shell:
hadoop fs -touchz /test/test.txt
hadoop fs -mkdir /test
hadoop fs -rm -R /test/text.txt
复制代码
/** * @param fileSystem * @param remoteDirPath 目标文件夹地址 * @param remoteFilePath 目标文件路径 */
private static void test6(FileSystem fileSystem, Path remoteDirPath, Path remoteFilePath){
try {
if (fileSystem.exists(remoteDirPath)){
System.out.println("Please choose your option: 1.create. 2.delete");
int i = new Scanner(System.in).nextInt();
switch (i){
case 1:
fileSystem.create(remoteFilePath);
break;
case 2:
fileSystem.delete(remoteDirPath,true);
break;
}
}else {
fileSystem.mkdirs(remoteDirPath);
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
第一次执行前:
第一次执行:
第一次执行后自动建立文件目录
第二次执行,选择建立文件:
第三次执行,选择删除文件:
shell:
hadoop fs -touchz /test/test.txt
hadoop fs -mkdir /test
hadoop fs -rm -R /test/text.txt
复制代码
/** * @param fileSystem * @param remotePath 目标文件夹地址 */
private static void test7(FileSystem fileSystem, Path remotePath){
try {
if (!fileSystem.exists(remotePath)){
System.out.println("Can't find this path, the path will be created automatically");
fileSystem.mkdirs(remotePath);
return;
}
System.out.println("Do you want to delete this dir? ( y / n )");
if (new Scanner(System.in).next().equals("y")){
FileStatus[] iterator = fileSystem.listStatus(remotePath);
if (iterator.length != 0){
System.out.println("There are some files in this dictionary, do you sure to delete all? (y / n)");
if (new Scanner(System.in).next().equals("y")){
if (fileSystem.delete(remotePath,true)){
System.out.println("Delete successful");
return;
}
}
}
if (fileSystem.delete(remotePath,true)){
System.out.println("Delete successful");
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
执行前的 HDFS 文件列表:
第一次执行(删除全部文件):
此时 HDFS 中的文件列表:
再次运行程序,自动建立文件夹:
shell:
hadoop fs -get text.txt
cat text.txt >> local.txt
hadoop fs -copyFromLocal -f text.txt text.txt
复制代码
/** * @param fileSystem * @param remotePath HDFS 中文件到路径 * @param localPath 本地文件路径 */
private static void test8(FileSystem fileSystem,Path remotePath, Path localPath){
try {
if (!fileSystem.exists(remotePath)){
System.out.println("Can't find this file");
return;
}
System.out.println("input 1 or 2 , add the content to the remote file's start or end");
switch (new Scanner(System.in).nextInt()){
case 1:
fileSystem.moveToLocalFile(remotePath, localPath);
FSDataOutputStream fsDataOutputStream = fileSystem.create(remotePath);
FileInputStream fileInputStream = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/src/test2.txt");
FileInputStream fileInputStream1 = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/src/test.txt");
byte[] bytes = new byte[1024];
int read = -1;
while ((read = fileInputStream.read(bytes)) > 0) {
fsDataOutputStream.write(bytes,0,read);
}
while ((read = fileInputStream1.read(bytes)) > 0){
fsDataOutputStream.write(bytes,0,read);
}
fileInputStream.close();
fileInputStream1.close();
fsDataOutputStream.close();
break;
case 2:
FileInputStream inputStream = new FileInputStream("/Users/binguner/IdeaProjects/HadoopDemo/"+localPath.toString());
FSDataOutputStream outputStream = fileSystem.append(remotePath);
byte[] bytes1 = new byte[1024];
int read1 = -1;
while ((read1 = inputStream.read(bytes1)) > 0){
outputStream.write(bytes1,0,read1);
}
inputStream.close();
outputStream.close();
break;
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
执行前 HDFS 中文件的内容:
第一次执行,加载文件内容到原有文件开头:
第二次执行,加载文件内容到原有文件末尾:
shell:
hadoop fs -rm -R /test/test.txt
复制代码
private static void test9(FileSystem fileSystem,Path remotePath){
try {
if(fileSystem.delete(remotePath,true)){
System.out.println("Delete success");
}else {
System.out.println("Delete failed");
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
原来 HDFS 中到目录结构:
执行删除操做:
shell:
hadoop fs -mv /test/test.txt /test2
复制代码
/** * @param fileSystem * @param oldRemotePath old name * @param newRemotePath new name */
private static void test10(FileSystem fileSystem, Path oldRemotePath, Path newRemotePath){
try {
if (fileSystem.rename(oldRemotePath,newRemotePath)){
System.out.println("Rename success");
}else {
System.out.println("Rename failed");
}
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
文件原来的名称:
执行修改操纵:
MyFSDataInputStream
,该类继承org.apache.hadoop.fs.FSDataInputStream
,要求以下:readLine()
,若是读到文件末尾,则返回空,不然返回文件一行的文本。MyFSDataInputStream
读取若干字节数据时,首先查找缓存,若是缓存中所需数据,则直接由缓存提供,不然向 HDFS 读取数据。import org.apache.hadoop.fs.*;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
public class MyFSDataInputStream extends FSDataInputStream {
private static MyFSDataInputStream myFSDataInputStream;
private static InputStream inputStream;
private MyFSDataInputStream(InputStream in) {
super(in);
inputStream = in;
}
public static MyFSDataInputStream getInstance(InputStream inputStream){
if (null == myFSDataInputStream){
synchronized (MyFSDataInputStream.class){
if (null == myFSDataInputStream){
myFSDataInputStream = new MyFSDataInputStream(inputStream);
}
}
}
return myFSDataInputStream;
}
public static String readline(FileSystem fileStatus){
try {
// FSDataInputStream inputStream = fileStatus.open(remotePath);
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
if ((line = bufferedReader.readLine()) != null){
bufferedReader.close();
inputStream.close();
return line;
}
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
复制代码
运行结果:
java.net.URL
和 org.apache.hadoop.fs.FsURLStreamHandlerFactory
编程完成输出HDFS中指定文件的文本到终端中。import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsUrlStreamHandlerFactory;
import org.apache.hadoop.fs.Path;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
public class ShowTheContent {
private Path remotePath;
private FileSystem fileSystem;
public ShowTheContent(FileSystem fileSystem, Path remotePath){
this.fileSystem = fileSystem;
this.remotePath = remotePath;
}
public void show(){
try {
URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory());
InputStream inputStream = new URL("hdfs","localhost",9000,remotePath.toString()).openStream();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = null;
while ((line = bufferedReader.readLine()) != null){
System.out.println(line);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
复制代码
输出结果:
欢迎关注本文做者:
扫码关注并回复「干货」,获取我整理的千G Android、iOS、JavaWeb、大数据、人工智能等学习资源。