数据集的大小超过一台独立的计算机的存储能力时,就要经过网络中的多个机器来存储数据集,把管理网络中多台计算机组成的文件系统,称为分布式文件系统java
分布式node
高可用shell
通透性apache
namenode浏览器
datanode安全
流式数据的访问服务器
商用硬件网络
低时间延时的数据访问架构
大量的小文件分布式
多用户写入,任意修改文件
block: 数据块
为何会有以上的设计
合并的时机
http://namenode:50070
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.4</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.4</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.4</version> </dependency>
import org.apache.commons.compress.utils.IOUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.junit.Test; public class HdfsTest { /** * 写文件操做 */ @Test public void testWriteFile() throws Exception { //建立配置对象 Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://uplooking01:8020"); //建立文件系统对象 FileSystem fs = FileSystem.get(conf); Path path = new Path("/test002.txt"); FSDataOutputStream fsDataOutputStream = fs.create(path, true); fsDataOutputStream.write("hello".getBytes()); fsDataOutputStream.flush(); fsDataOutputStream.close(); } /** * 读文件操做 */ @Test public void testReadFile() throws Exception { //建立配置对象 Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://uplooking01:8020"); //建立文件系统对象 FileSystem fs = FileSystem.get(conf); Path path = new Path("/test002.txt"); FSDataInputStream fsDataInputStream = fs.open(path); IOUtils.copy(fsDataInputStream, System.out); } /** * 上传文件操做 */ @Test public void testuploadFile() throws Exception { //建立配置对象 Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://uplooking01:8020"); //建立文件系统对象 FileSystem fs = FileSystem.get(conf); Path fromPath = new Path("file:///f:/test01.txt"); Path toPath = new Path("/test01.txt"); fs.copyFromLocalFile(false, fromPath, toPath); } /** * 下载文件操做 */ @Test public void testdownloadFile() throws Exception { //建立配置对象 Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://uplooking01:8020"); //建立文件系统对象 FileSystem fs = FileSystem.get(conf); Path fromPath = new Path("/test01.txt"); Path toPath = new Path("file:///f:/test01.txt"); fs.copyToLocalFile(false, fromPath, toPath); } /** * 下载文件操做 */ @Test public void testOtherFile() throws Exception { //建立配置对象 Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://uplooking01:8020"); //建立文件系统对象 FileSystem fs = FileSystem.get(conf); // BlockLocation[] blockLocations = fs.getFileBlockLocations(new Path("/test01.txt"), 0, 134217730); // System.out.println(blockLocations); FileStatus[] listStatus = fs.listStatus(new Path("/test01.txt")); System.out.println(listStatus); } }
回滚edits: hdfs dfsadmin -rollEdits
进入安全模式: hdfs dfsadmin -safemode | enter | leave| get| wait
融合edits和fsimage: hdfs dfsadmin -saveNamespace:
查看fsimage: hdfs oiv -i -o -p
查看edits: hdfs oev -i -o -p
目录配额
设置目录配额
清除目录配额
空间配额
设置空间配额
hdfs dfsadmin -setSpaceQuota n dir
清除空间配额
hdfs getconf -confKey keyname
设计目的:
定义协议
/** * 定义协议 */ public interface IHelloService extends VersionedProtocol { public long versionID = 123456798L;//定义协议的版本 public String sayHello(String name);//协议的具体条目 }
定义RPC的服务器实例类
/** * 实例类,实现了协议的类 */ public class HelloServiceImpl implements IHelloService { @Override public String sayHello(String name) { System.out.println("==================" + name + "=================="); return "hello" + name; } @Override public long getProtocolVersion(String protocol, long clientVersion) throws IOException { return versionID; } @Override public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, int clientMethodsHash) throws IOException { return new ProtocolSignature(); } }
定义RPC程序的启动程序
public class MyRpcServer { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); RPC.Server server = new RPC.Builder(conf) .setBindAddress("172.16.4.3")//配置主机 .setPort(8899)//配置端口 .setProtocol(IHelloService.class)//配置协议 .setInstance(new HelloServiceImpl())//配置实例,能够配置多个 .build(); server.start(); System.out.println("RPC服务器启动成功...."); } }
定义协议
/** * 定义协议 */ public interface IHelloService extends VersionedProtocol { public long versionID = 123456798L;//定义协议的版本 public String sayHello(String name);//协议的具体条目 }
定义客户端启动程序
Configuration conf = new Configuration(); ProtocolProxy<IHelloService> proxy = RPC.getProtocolProxy(IHelloService.class, IHelloService.versionID, new InetSocketAddress("172.16.4.3", 8899), conf); IHelloService helloService = proxy.getProxy(); String ret = helloService.sayHello("xiaoming"); System.out.println(ret);
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
hadoop-daemon.sh start secondarynamenode
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start nodemanager
==在namenode中操做==
hdfs-site.xm
<!-- 白名单--> <property> <name>dfs.hosts</name> <value>/opt/hadoop/etc/hadoop/dfs.include</value> </property>
建立白名单文件
/opt/hadoop/etc/hadoop/dfs.include
uplooking03
uplooking04
uplooking05
uplooking06
刷新节点:
hdfs dfsadmin -refreshNodes