select * from XXX;
tar -zxvf .tar.gz -C 目标目录
mv apache-hive-1.2.2-bin/ hive-1.2.2
mv hive-env.sh.template hive-env.sh
vi hive-env.sh
# Set HADOOP_HOME to point to a specific hadoop install directory
# 指定Hadoop安装路径
HADOOP_HOME=Hadoop安装路径
# Hive Configuration Directory can be controlled by:
# 指定Hive配置文件夹
export HIVE_CONF_DIR=/XXXXXX/hive-1.2.2/conf
复制代码
vi /etc/profile
export HIVE_HOME=hive安装路径
export PATH=$PATH:$HIVE_HOME/bin
# Hadoop环境加入Hive依赖
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$HIVE_HOME/lib/*
source /etc/profile
hive
quit;
vi hive-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://主机名:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>密码</value>
<description>password to use against metastore database</description>
</property>
# 查询表时显示表头信息
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
# 显示当前所在的数据库
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
</configuration>
复制代码
hive
Java数据类型 | Hive数据类型 | 长度 |
---|---|---|
byte | TINYINT | 8位二进制 |
short | SMALLINT | 2byte有符号整数 |
int | INT | 4byte有符号整数 |
long | BIGINT | 8byte有符号整数 |
boolean | BOOLEAN | false/true |
float | FLOAT | 单精度浮点 |
double | DOUBLE | 双精度浮点 |
string | STRING | 字符 |
BINARY | 字节数组 |
show databases;
create database 数据库名;
create database if not exists 数据库名;
create database 数据库名 location '路径';
create [external] table [if not exists] 表名(参数) [partitioned by(字段信息)] [clustered by(字段信息)] [sorted by(字段信息)]
row format
---根据行格式化delimited fields
---分割字段terminated by '切割符';
---分割依据desc formatted 表名;
select * from 表名;
select 表名.列1, 表名.列2 from 表名;
select 表名.列 (as) 列别名 from 列原名;
select * from 表名;
select * from 表名 where 分区条件;
select * from 表名1 where 分区条件 union select * from 表名1 where 分区条件;
select count(1) from 表名;
select max(列名) from 表名;
select min(列名) from 表名;
select sum(列名) from 表名;
select avg(列名) from 表名;
select * from 表名 limit n;
select * from 表名 where A>n and A<m;
select * from 表名 where A between n and m;
select * from 表名 where A in(n,m);
select * from 表名 where A<n or A>m;
select * from 表名 where A not in(n,m);
select * from 表名 where A is null;
select * from 表名 where A is not null;
select * from 表名 where A like 'n%';
select * from 表名 where A like '_n%';
select * from 表名 where A like '%n%';
select A,B from 表名 group by B;
select * from 表名 order by 列名 asc;
select * from 表名 order by 列名 desc;
set mapreduce.job.reduces = n;
select * from 表名 sort by 列名;
select * from 表名 sort by 列名 desc;
select * from 表名 distribute by A sort by B desc;
select * from 表名 cluster by A;
select * from 表名 distribute by A sort by A;
alter table 表名 add partition(新分区信息);
show partitions 表名;
alter table 表名 drop partition(分区信息);
msck repair table dept_partitions;
clustered by(字段信息) into n buckets
set hive.enforce.bucketing = true;
set mapreduce.job.reduces = -1;
select * from 表名(bucket n out of a on A);
desc database 数据库名;
alter database 数据库名 set dbproperties('key'='value');
desc database extended 数据库名;
show databases like 'i*';
drop database 数据库名;
drop database if exists 数据库名;
drop database 数据库名 cascade;
drop database if exists 数据库名 cascade;
load data [local] inpath '/XXXX/文件名' into table 表名 [partition(分区位置)];
insert into table 表名 partition(分区信息) values(数据内容);
insert overwrite table 表名 partition(分区信息) select * from 表名 where 查询条件;
create table if not exists 表名 as select * from 表名 where 查询条件;
create table 表名(参数) row fromat delimited fields terminated by '切割符' locatition '';
insert overwrite local directory '本地路径' select * from 表名;
export table 表名 to 'hdfs路径';
import table 表名 from 'hive路径';
truncate table 表名;
hive -e "Hive-DDL语句(注意分号)"
hive -f sql路径
dfs -ls 路径;
dfs -cat 文件路径;
dfs -mkdir -p 目录路径;
dfs -put 文件路径 目录路径;
cat ~/.hivehistory
show functions;
desc function extended 函数名;
add jar jar包路径;
create temporary function 别名 as "java函数类";
<property>
<name>hive.aux.jars.path</name>
<value>file://文件夹路径</value>
</property>
复制代码
set hive.exec.compress.intermediate = true;
set mapreduce.map.output.compress = true;
set mapreduce.map.output.compress.codec = org.apache.hadoop.io.compress.SnappyCodec;
set hive.exec.compress.output= true;
set mapreduce.output.fileoutputformat.compress = true;
set mapreduce.output.fileoutputformat.compress.codec = org.apache.hadoop.io.compress.SnappyCodec;
set mapreduce.output.fileoutputformat.compress.type = BLOCK;
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/module/hive-1.2.2/warehouse</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
# MySQL数据库位置 <value>jdbc:mysql://bigdata01:3306/metastore?createDatabaseIfNotExist=true</value>
<description>JDBC connect string for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>MySQL用户名</value>
<description>username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>MySQL密码</value>
<description>password to use against metastore database</description>
</property>
</configuration>
复制代码
client端配置文件:java
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>/opt/module/hive-1.2.2/warehouse</value>
</property>
<property>
<name>hive.metastore.local</name>
<value>false</value>
</property>
<property>
<name>hive.metastore.uris</name>
# server端地址信息
<value>thrift://bigdata01:9083</value>
</property>
# 查询表时显示表头信息
<property>
<name>hive.cli.print.header</name>
<value>true</value>
</property>
# 显示当前所在的数据库
<property>
<name>hive.cli.print.current.db</name>
<value>true</value>
</property>
</configuration>
复制代码
启动:mysql
hive --service metastore
hive
注意:linux
lls: cannot access /opt/module/spark-2.1.0/lib/spark-assembly-*.jar: No such file or directory
vi /XXXX/hive/bin/hive
sparkAssemblyPath=`ls ${SPARK_HOME}/jars/*.jar`
复制代码