pof补充

 

[初始化系统]

sudo systemctl stop firewalld
sudo systemctl disable firewalld
sudo setenforce 0
sudo vi /etc/sysconfig/selinux
SELINUX=disabledhtml

sudo tar -zxf docker-18.06.3-ce.tgz -C /usr/local/
cp /usr/local/docker/* /usr/bin/
mkdir -p /home/dockernode

[修正docker自启动问题]

添加docker自启动须要文件

sudo vi /usr/lib/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/bin/dockerd --graph /home/docker
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.targetpython

sudo groupadd dockermysql

将您的用户添加到该docker组

sudo usermod -aG docker $USERlinux

将登录用户加入到docker用户组中

sudo gpasswd -a $USER dockernginx

更新用户组

newgrp dockerweb

配置docker以systemctl方式启动

sudo systemctl start dockersql

配置docker开机启动

sudo systemctl enable dockerdocker


数据库是否换成maridb
[数据库需确认]

copy sql脚本到数据库容器
[完善docker cp sql脚本到容器]

使用docker cp方式拷贝dataexa-insight-microservice-poc.sql到容器根目录下

docker cp /home/dataexa/insight-deploy/resources/mirrors/mysql/dataexa-insight-microservice-poc.sql mysql:/
docker exec -it mysql bash
mysql -uroot -pDataExa5528280
create database dataexa-insight-microservice-poc default character set utf8 default collate utf8_general_ci;
show databases;
use dataexa-insight-microservice-poc;数据库

source生效 执行完整路径下的sql脚本

source /dataexa-insight-microservice-poc.sql
show tables;

使用docker cp方式拷贝xxl-job-poc.sql到容器根目录下

docker cp /home/dataexa/insight-deploy/resources/xxl/xxl-job-poc.sql mysql:/
source /xxl-job-poc.sql

平滑方式退出docker容器

键盘: curl+ p +q 三个键位一块儿按出现 read escape sequence 便可平滑退出容器


[unzip /home/dataexa/insight-deploy/resources/html/platform.zip -d] /home/dataexa/insight-microservice/workspace/nginx_workspace/html/

须要先安装unzip

unzip [缺unzip离线包]
sudo rpm -ivh /home/dataexa/insight-deploy/resources/tools_package/offline_installer/centos/unzip-6.0-20.el7.x86_64.rpm

查看unzip是否安装成功
which unzip
/usr/bin/unzip

[粘贴复制的时候-v 没有空格]
[修改完nginx.conf]
docker restart nginx

重启前须要curl+p+q 平滑退出容器

重启elasticsearch容器

docker restart elasticsearch

离线安装bzip2

sudo rpm -ivh insight-deploy/resources/tools_package/offline_installer/centos/bzip2-1.0.6-13.el7.x86_64.rpm

检查 bzip2 是否安装

which bzip2
/usr/bin/bzip2

conda使用的python3.5.tar.gz包变成了zip包
[修正命令]
unzip /home/dataexa/insight-deploy/resources/conda/python/python3.5.zip -d /home/dataexa/anaconda3/envs


增长建立路径[python监控路径]

mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/container_monitor
mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5


vim 补充 [待修改] vim 不能用

sudo cp /home/dataexa/insight-deploy/resources/tools_package/offline_installer/ubuntu/vim.zip
sudo cd /usr/local
sudo unzip vim.zip
sudo ln -s /usr/local/vim/bin/vim /usr/local/bin/vim

查看vim是否安装

sudo which vim

python [修正]
dlv5 修改[增长nohup python]
nohup python /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5/insight-v5service/tornado_service.py >> /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5/insight-v5service/nohup.out 2>&1 &

insight-tagging [建立目录,cp以前没目录]
mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/data_labeling
cd /home/dataexa/insight-microservice/workspace/python_service_workspace/data_labeling/insight-tagging/

启动以前,先把缓存清除

find . -name "pycache" |xargs rm -r

启动命令

nohup python tornado_service.py >/dev/null 2>&1 &

解压前建立spark⼯做⽬录

mkdir -p /home/dataexa/insight-microservice/workspace/spark_workspace

解压前建立hadoop⼯做⽬录

mkdir -p /home/dataexa/insight-microservice/workspace/hadoop_workspace/

解压前建立 yarn-conf⼯做⽬录

mkdir -p /home/dataexa/insight-microservice/workspace/hadoopworkspace/yarn-conf/

Hadoop部署[修正环境变量] 修改jdk和hadoop环境变量都设置到dataexa用户下
vi ~/.bashrc
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH

若是出现格式化文件系统 hdfs namenode -format 出错

cd /tmp
将标识有hadoop文件名称的hadoop相关文件 备份移动到另外一个目录下
从新格式化文件系统

出现20/05/21 07:23:26 INFO ipc.Client: Retrying connect to server: 0.0.0.0/0.0.0.0:8032. Already tried 0 time(s); retry policy is RetryUpT

sbin/start-yarn.sh #启动yarn便可

出现org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://localhost:9000/user/dataexa/output already exists

hdfs dfs -rm -r output
再从新执行便可

完整hadoop部署
[修改了jdk和hadoop的环境变量到dataexa用户下]

解压

tar xf /home/dataexa/insight-deploy/resources/jdk/jdk-8u231-linux-x64.tar.gz -C /home/dataexa/insight-deploy/resources/jdk

添加变量环境

vi ~/.bashrc
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH

生效dataexa下的用户环境

source ~/.bashrc

进入hadoop环境

cd /home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH

在Hadoop的安装目录下,建立input目录

mkdir input

拷贝input文件到input目录下

cp etc/hadoop/*.xml input

执行Hadoop job

hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep input output 'dfs[a-z.]+'
上面的job是使用hadoop自带的样例,在input中统计含有dfs的字符串。

确认执行结果

cat output/*

添加主机地址和映射

sudo vi /etc/hosts
192.168.1.237 localhost

修改设定文件

vi etc/hadoop/core-site.xml


fs.defaultFS
hdfs://localhost:9000

vi etc/hadoop/hdfs-site.xml


dfs.replication
1

设置本机⽆密码ssh登录

ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys

测试

ssh dataexa@192.168.1.237

格式化文件系统

hdfs namenode -format

启动名称节点和数据节点后台进程

sbin/start-dfs.sh

确认

jps

访问NameNode的web页面

http://192.168.1.237:50070/

建立HDFS

hdfs dfs -mkdir /user/test

拷贝input文件到HDFS目录下

hdfs dfs -put etc/hadoop /user/test/input

确认,查看

hadoop fs -ls /user/test/input

执行Hadoop job

hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep /user/test/input output 'dfs[a-z.]+'

确认执行结果

hdfs dfs -cat output/*

或者从HDFS拷贝到本地查看

bin/hdfs dfs -get output output
cat output/*

中止daemon

sbin/stop-dfs.sh

执行YARN job

修改设定文件
cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
vi etc/hadoop/mapred-site.xml


mapreduce.framework.name
yarn

vi etc/hadoop/yarn-site.xml


yarn.nodemanager.aux-services
mapreduce_shuffle

启动ResourceManger和NodeManager后台进程

sbin/start-yarn.sh

访问ResourceManger的web页面

http://192.168.1.237:8088/

#执行hadoop job
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep /user/test/input output 'dfs[a-z.]+'

若是这里报错,须要sbin/stop-yarn.sh再sbin/start-yarn.sh

确认执行结果

hdfs dfs -cat output/*

中止daemon

sbin/stop-yarn.sh

启动jobhistory daemon

sbin/mr-jobhistory-daemon.sh start historyserver

访问Job History Server的web页面 出现数据便可

http://localhost:19888/