转载于:http://blog.csdn.net/lovelovelovelovelo/article/details/52234971mysql
数据类型
基本数据类型
集合类型,array、map、struct
文件格式,textfile、sequencefile、rcfilesql
建立表(内部表)数据库
create table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields termited by '\t' lines terminated by '\n' stored as textfile;
从文件加载数据,覆盖源表markdown
load data local infile 'path' overwrite into table 'table'
建立外部表oop
create external table employee( name string comment 'name', salary float, subordinates array<string>, deductions map<string,float>, address struct<street:string,city:string,state:string,zip:int> ) row format delimited fields terminated by '\t' collection items terminated by ',' map keys terminated by ':' lines terminated by '\n' stored as textfile location '/data/';
表中数据post
lucy 11000 tom,jack,dave,kate tom:1200,jack:1560 beijing,changanjie,xichengqu,10000 lily 13000 dave,kate dave:1300,kate:1260 beijing,changanjie,xichengqu,10000
和咱们熟悉的关系型数据库不同,Hive如今还不支持在insert语句里面直接给出一组记录的文字形式,也就是说,hive并不支持INSERT INTO …. VALUES形式的语句。spa
新建employee.txt,将数据存入文件中,注意字段间用tab,行间换行enter
经过hive命令加载数据.net
hive> load data local inpath '/root/employee.txt' into table employee; hive> select * from employee; OK lucy 11000.0 ["tom","jack","dave","kate"] {"tom":1200.0,"jack":1560.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} lily 13000.0 ["dave","kate"] {"dave":1300.0,"kate":1260.0} {"street":"beijing","city":"changanjie","state":"xichengqu","zip":10000} Time taken: 0.054 seconds, Fetched: 2 row(s)
select * from table不走mapreduce
由一个表建立另外一个表code
create table table2 like table1;
从其余表查询建立表orm
create table table2 as select name,age,add from table1;
hive不一样文件读取
stored as textfile: hadoop fs -text stored as sequencefile: hadoop fs -text stored as rcfile: hive -service rcfilecat path stored as input format 'class': outformat 'class'
分区表操做
alter table employee add if not exists partition(country='') alter table employee drop if exists partition(country='')
hive分桶
create table bucket_table( id int, name string ) clustered by(id) sorted by(name) into 4 buckets row format delimited fields terminated by '\t' stored as textfile; set hive.enforce.bucketing=true;
建立分区表
create table partitionTable( name string, age int ) partitioned by(dt string) row format delimited fields terminated by '\t' lines terminated by '\n' stored as textfile;