一、文件载入Hive表node
语法:express
LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
示例:apache
# 建立分区表 create table tb_load_data( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag1 string,part_tag2 int) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # 加载数据,数据加载后扔是目录下的源文件 load data local inpath '/usr/local/hive-2.1.1/data_dir/complex_data_type.txt' overwrite into table tb_load_data partition (part_tag1 = 'first',part_tag2=100); # 数据文件信息 [hadoop@node224 ~]$ hdfs dfs -cat /user/hive/warehouse/db01.db/tb_load_data/part_tag1=first/part_tag2=100/complex_data_type.txt 1,xiaoming,book-TV-code,beijing:chaoyang-shagnhai:pudong 2,lilei,book-code,nanjing:jiangning-taiwan:taibei 3,lihua,music-book,heilongjiang:haerbin
二、经过查询向Hive表中插入数据数组
语法:bash
# 基础语法 NSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement; INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement; # Hive extension (multiple inserts): FROM from_statement INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...; FROM from_statement INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...; # Hive extension (dynamic partition inserts): INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement; INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
动态分区插入相关的配置参数oop
示例:测试
# 建立基础插入测试表 create table tb_insert_data( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag3 int,part_tag4 string) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # insert into 模式加载追加 # 静态 insert into table tb_insert_data partition(part_tag3=200,part_tag4='second') select id,name,hobby,add from tb_load_data; # 动态 set hive.exec.dynamic.partition.mode=nonstrict; # 是会话级别的设置 insert into table tb_insert_data partition(part_tag3,part_tag4) select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data; select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data # insert overwrite 模式覆盖相同的数据,而不是下一批的数据覆盖整个表的数据 # 静态 insert overwrite table tb_insert_data partition(part_tag3=200,part_tag4='second') select id,name,hobby,add from tb_load_data where name='lilei'; insert overwrite table tb_insert_data partition(part_tag3=300,part_tag4='second') select id,name,hobby,add from tb_load_data; # 动态 insert overwrite table tb_insert_data partition(part_tag3,part_tag4) select id,name,hobby,add,part_tag2,part_tag1 from tb_load_data; select * from tb_insert_data; #hive扩展一次多表写入 # 建立一次多表插入测试表 create table tb_insert_multi_01( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag7 int,part_tag8 string) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; create table tb_insert_multi_02( id int, name string, hobby array<string>, add map<String,string> ) partitioned by(part_tag5 string,part_tag6 int) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; create table tb_insert_multi_03( id int, name string, hobby array<string>, add map<String,string> ) row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' ; # 一次多表 overwrite\into\分区\非分区 from tb_load_data insert overwrite table tb_insert_multi_01 partition(part_tag7=400,part_tag8='multi_01') select id,name,hobby,add insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) select id,name,hobby,add,part_tag1,part_tag2 insert into tb_insert_multi_03 select id,name,hobby,add ; # 加载完成后测试载入数据状况 select * from tb_insert_multi_01; select * from tb_insert_multi_02; select * from tb_insert_multi_03;
三、查询结果写入文件系统(本地\hdfs)code
语法:orm
INSERT OVERWRITE [LOCAL] DIRECTORY directory1 [ROW FORMAT row_format] [STORED AS file_format] (Note: Only available starting with Hive 0.11.0) SELECT ... FROM ... # Hive extension (multiple inserts): FROM from_statement INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1 [INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ... # row_format : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char] [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char] [NULL DEFINED AS char] (Note: Only available starting with Hive 0.13)
示例:blog
# 写入本地文件系统 insert overwrite local directory '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02' row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' stored as textfile select * from tb_insert_multi_02 ; # 写入HDFS目录 insert overwrite directory '/tmp/hive/tb_insert_multi_02' row format delimited fields terminated by ',' collection items terminated by '-' map keys terminated by ':' lines terminated by '\n' stored as textfile select * from tb_insert_multi_02 ; # 扩展语句一次分别写入本地和HDFS FROM tb_insert_multi_02 INSERT OVERWRITE LOCAL DIRECTORY '/usr/local/hive-2.1.1/data_dir/tb_insert_multi_02_local' select id,name,hobby,add INSERT OVERWRITE DIRECTORY '/tmp/hive/tb_insert_multi_02_hdfs' select * ;
四、将指定的值插入Hive表
不能插入复合数据类型,如集合,数组等
语法
INSERT INTO TABLE tablename [PARTITION (partcol1[=val1], partcol2[=val2] ...)] VALUES values_row [, values_row ...] Where values_row is: ( value [, value ...] ) where a value is either null or any valid SQL literal
示例
# 建表 CREATE TABLE students (name VARCHAR(64), age INT, gpa DECIMAL(3, 2)); # 插入数据 INSERT INTO TABLE students VALUES ('fred flintstone', 35, 1.28), ('barney rubble', 32, 2.32); # 复杂数据类型插入是提示错误 0: jdbc:hive2://node225:10000/db01> insert into table tb_insert_multi_02 partition(part_tag5,part_tag6) values (4,huafeng,'["music","book"]','{"heilongjiang":"haerbin"}','third',500); FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values Error: Error while compiling statement: FAILED: SemanticException [Error 10293]: Unable to create temp file for insert values Expression of type TOK_TABLE_OR_COL not supported in insert/values (state=42000,code=10293)
五、更新(update)合并(merge)删除(delete)
Update\Merge\Delete can only be performed on tables that support ACID,此处遗留一个点,Hive的事务开启。。
语法
# update UPDATE tablename SET column = value [, column = value ...] [WHERE expression] # delete DELETE FROM tablename [WHERE expression] # merge MERGE INTO <target table> AS T USING <source expression/table> AS S ON <boolean expression1> WHEN MATCHED [AND <boolean expression2>] THEN UPDATE SET <set clause list> WHEN MATCHED [AND <boolean expression3>] THEN DELETE WHEN NOT MATCHED [AND <boolean expression4>] THEN INSERT VALUES<value list>