innodb cluster

时间 2019-11-16

标签 innodb cluster 栏目 MySQL 繁體版

原文原文链接

操做系统层面配置：mysql

临时关闭防火墙linux

systemctl stop firewalldsql

永久防火墙开机自启动shell

systemctl disable firewalldbootstrap

临时打开防火墙vim

systemctl start firewalldbash

防火墙开机启动session

systemctl enable firewalldapp

查看防火墙状态dom

systemctl status firewalld

临时关闭SELinux

setenforce 0

永久关闭

编辑/etc/selinux/config文件，将SELINUX的值设置为disabled

操做系统层面资源限制

$ vim /etc/security/limits.conf，添加：

mysql soft nproc 2047

mysql hard nproc 16384

mysql soft nofile 1024

mysql hard nofile 65535

配置host信息

[root@cluster3 bin]# cat /etc/hosts

127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4

::1 localhost localhost.localdomain localhost6 localhost6.localdomain6

192.168.0.210 cluster1

192.168.0.220 cluster2

192.168.0.230 cluster3

安装mysql

mysql-8.0/bin/mysqld --initialize-insecure --basedir=/u01/mysql-8.0.13 --datadir=/data/ --user=mysql

配置mysql参数文件

[client]

default-character-set = utf8

port = 3306

socket = /data/s4/mysql.sock

[mysqld]

user = mysql

character_set_server = utf8

basedir = /u01/mysql-8.0.13

datadir = /data

tmpdir = /data

log-error = /data/mysql_error.log

pid-file = /data/mysql.pid

port = 3306

socket = /data/mysql.sock

max_connections = 3000

open_files_limit = 65535

max_connect_errors = 6000

skip-host-cache

skip-external-locking

skip-name-resolve

max_allowed_packet = 32M

read_rnd_buffer_size = 16M

join_buffer_size = 2M

sort_buffer_size = 2M

thread_cache_size = 300

tmp_table_size = 64M

max_heap_table_size = 64M

skip-host-cache

skip-name-resolve

binlog_format = ROW

transaction_isolation = READ-COMMITTED

#INNODB

innodb_buffer_pool_size = 200M

innodb_log_file_size = 50M

innodb_log_buffer_size = 8M

innodb_log_files_in_group = 3

innodb_file_per_table = 1

default-storage-engine = InnoDB

#log

expire_logs_days = 5

slow_query_log = true

long_query_time = 5

slow-query-log-file = /data/mysql_slow.log

lower_case_table_names=0

# Replication configuration parameters

server_id=1

gtid_mode=ON

enforce_gtid_consistency=ON

binlog_checksum=NONE

log-bin=mysql-bin

log_slave_updates=ON

binlog_format=ROW

master_info_repository=TABLE

relay_log_info_repository=TABLE

启动mysql后，配置用户

MGR所需权限

CREATE USER rpl_user@'%' IDENTIFIED BY 'password';

GRANT REPLICATION SLAVE ON *.* TO rpl_user@'%';

GRANT BACKUP_ADMIN ON *.* TO rpl_user@'%';

FLUSH PRIVILEGES;

GRANT SELECT ON mysql_innodb_cluster_metadata.* TO your_user@'%';

GRANT SELECT ON performance_schema.global_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_configuration TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status_by_coordinator TO your_user@'%';

GRANT SELECT ON performance_schema.replication_applier_status_by_worker TO your_user@'%';

GRANT SELECT ON performance_schema.replication_connection_configuration TO your_user@'%';

GRANT SELECT ON performance_schema.replication_connection_status TO your_user@'%';

GRANT SELECT ON performance_schema.replication_group_member_stats TO your_user@'%';

GRANT SELECT ON performance_schema.replication_group_members TO your_user@'%';

GRANT SELECT ON performance_schema.threads TO your_user@'%' WITH GRANT OPTION;

简单建立用户

SET SQL_LOG_BIN=0;

CREATE USER root@'%' IDENTIFIED BY 123456;

GRANT all on *.* TO root@'%';

FLUSH PRIVILEGES;

SET SQL_LOG_BIN=1;

安装mgr插件

INSTALL PLUGIN group_replication SONAME 'group_replication.so';

SHOW PLUGINS;

Mysql shell配置集群

检查三个实例是否知足innodb cluster要求

dba.configureInstance('repl@cluster1:3306)

dba.configureInstance('repl@cluster2:3306)

dba.configureInstance('repl@cluster3:3306)

链接某一个实例，开始建立集群

\connect root@cluster1:3306

var cluster = dba.createCluster('myCluster')

var cluster = dba.getCluster()

cluster.addInstance(root@cluster2:3306')

cluster.addInstance('root@cluster3: 3306')

查看集群状态

cluster.status()

配置mysql router

安装

./mysqlrouter --bootstrap root@cluster1:3306 --directory /u01/mysql-router-8.0.13/ --user=mysql

自动生成启动脚本

./data/start.sh

Mgr监控视图

SELECT * FROM performance_schema.replication_group_members\G

performance_schema.replication_group_member_stats

performance_schema.replication_group_members

These Perfomance Schema replication tables also show information about Group Replication:

performance_schema.replication_connection_status shows information regarding Group

Replication, for example the transactions that have been received from the group and queued in the

applier queue (the relay log).

performance_schema.replication_applier_status shows the state of the Group Replication

related channels and threads If there are many different worker threads applying transactions, then the

worker tables can also be used to monitor what each worker thread is doing.

The replication channels created by the Group Replication plugin are named:

group_replication_recovery - This channel is used for the replication changes that are related to

the distributed recovery phase.

group_replication_applier - This channel is used for the incoming changes from the group. This

is the channel used to apply transactions coming directly from the group.

The following sections describe how to interpret the information available.

##改变主的位置

SELECT group_replication_set_as_primary('102c2bc6-18c1-11e9-92a6-000c296459b4');

##修改mgr为单主模式

##Changing to Single-Primary Mode

SELECT group_replication_switch_to_single_primary_mode()

##修改mgr为多主模式

SELECT group_replication_switch_to_multi_primary_mode()

dba.dropMetadataSchema()

cluster.removeInstance("root@cluster2:3306", {force: true})

##从新启动集群

var cluster = dba.rebootClusterFromCompleteOutage();

dba.configureInstance()
dba.createCluster()
Cluster.addInstance()
Cluster.removeInstance()
· Cluster.rejoinInstance()
· Cluster.rescan()

`集群经常使用操做命令`

dba.checkInstanceConfiguration("root@hostname:3306") // 检查节点配置实例，用于加入cluster以前

dba.rebootClusterFromCompleteOutage('myCluster'); //重启

dba.dropMetadataSchema(); //删除schema

var cluster = dba.getCluster('myCluster') // 获取当前集群

cluster.checkInstanceState("root@hostname:3306") //检查cluster里节点状态

cluster.rejoinInstance("root@hostname:3306") // 从新加入节点，我本地测试的时候发现rejoin一直无效，每次是delete后

addcluster.dissolve({force：true}) // 删除集群

cluster.addInstance("root@hostname:3306") // 增长节点

cluster.removeInstance("root@hostname:3306") // 删除节点

cluster.removeInstance('root@host:3306',{force:true}) // 强制删除节点

cluster.dissolve({force:true}) //解散集群

cluster.describe();//集群描述

cluster.rescan(); //update the metadata

常见问题

问题1：[ERROR] Slave SQL for channel 'group_replication_recovery': Could not execute Write_rows event on table mysql_innodb_cluster_metadata.instances; Cannot add or update a child row: a foreign key constraint fails (mysql_innodb_cluster_metadata.instances, CONSTRAINT instances_ibfk_1 FOREIGN KEY (host_id) REFERENCES hosts (host_id)), Error_code: 1452; handler error HA_ERR_NO_REFERENCED_ROW; the event's master log binlog.000001, end_log_pos 3059, Error_code: 1452

解决方式：清空表mysql_innodb_cluster_metadata.hosts; 从新创建集群

问题2：This member has more executed transactions than those present in the group

解决方式：

mysql-> stop group_replication;

mysql-> reset master;

问题3：用户操做系统资源的限制

[Warning] Buffered warning: Changed limits: max_open_files: 1024 (requested 5000)

[Warning] Buffered warning: Changed limits: table_open_cache: 431 (requested 2000)

解决方式：

$ vim /etc/security/limits.conf，添加：

mysql soft nproc 2047

mysql hard nproc 16384

mysql soft nofile 1024

mysql hard nofile 65535

问题4：dba.rebootClusterFromCompleteOutage: The active session instance isn't the most updated in comparison with the ONLINE instances of the Cluster's metadata.

在集群没有起来时某些机器的数据表发生变更，致使数据不一致；

解决方式：

全部MySQL机器经过reset master命令清空binlogs

mysql> reset master;

mysql> show master logs;

而后再运行Dba.rebootClusterFromCompleteOutage重启集群。

问题5：service mysql restart没法重启mysql，mysql stuck，并一直输出日志'[Note] Plugin group_replication reported: '[GCS] cli_err 2''

解决方式：惟一中止MySQL的命令为：sudo pkill -9 mysqld

问题6：如何将Multi-Primary改成Single-Primary？

(1). 解散原来的集群：mysql-js> cluster.dissolve({force: true})

(2). 每台主机MySQL修改以下配置：

mysql> set global group_replication_enforce_update_everywhere_checks=OFF;

mysql> set global group_replication_single_primary_mode=ON;

(3). 从新建立集群：

mysql-js> var cluster = dba.createCluster('mysqlCluster');

mysql-js> cluster.addInstance('chianyu@svr2:3306');

mysql-js> cluster.addInstance('chianyu@svr3:3306');

mysql router+keepalived作高可用

yum –y install keepalived*

[root@cluster1 mysql-router-8.0.13]# cat /etc/keepalived/keepalived.conf

vrrp_script chk_router {

script "/u01/mysql-router-8.0.13/check_router.sh"

interval 2

weight -20

}

vrrp_instance VI_1 {

state MASTER

interface ens33

virtual_router_id 51

priority 100

advert_int 1

authentication {

auth_type PASS

auth_pass 1111

}

track_script {

chk_router

}

virtual_ipaddress {

192.168.0.200

}

检查脚本：

[root@cluster1 mysql-router-8.0.13]# cat check_router.sh

#!/bin/bash

counter=$(netstat -na|grep "LISTEN"|grep "6446"|wc -l)

if [ "${counter}" -eq 0 ]; then

systemctl stop keepalived

配置完成后，查看/var/log/message日志

[root@cluster2 mysql-router-8.0.13]# cat /etc/keepalived/keepalived.conf

vrrp_script chk_router {

script "/u01/mysql-router-8.0.13/check_router.sh"

interval 2

weight -20

}

vrrp_instance VI_1 {

state backup

interface ens33

virtual_router_id 51

priority 95

advert_int 1

authentication {

auth_type PASS

auth_pass 1111

}

track_script {

chk_router

}

virtual_ipaddress {

192.168.0.200

}

给脚本受权

chown mysql:mysql check_router.sh

chmod 755 check_router.sh

启动keepalived

systemctl start keepalived

systemctl stop keepalived

LVS dr模式配置

1）安装ipvsadm

yum -y install ipvsadm

（2）设置ipv4转发

sysctl -w net.ipv4.ip_forward=1

控制节点执行脚本

[root@cluster1 opt]# cat 1.sh

#!/bin/bash

vip=192.168.0.200

iface='ens33:0'

mask='255.255.255.255'

port='6446'

rs1='192.168.0.220'

rs2='192.168.0.230'

scheduler='wrr'

case $1 in

start)

ifconfig $iface $vip netmask $mask broadcast $vip up

iptables -F

ipvsadm -A -t ${vip}:${port} -s $scheduler

ipvsadm -a -t ${vip}:${port} -r $rs1 -g -w 1

ipvsadm -a -t ${vip}:${port} -r $rs2 -g -w 2

;;

stop)

ipvsadm -C

ifconfig $iface down

;;

echo "Usage: $(basename $0) {start|stop|status}"

exit 1

;;

esac

实际处理节点

[root@cluster2 opt]# cat 1.sh

#!/bin/bash

vip=192.168.0.200

mask='255.255.255.255'

case $1 in

start)

echo 1 > /proc/sys/net/ipv4/conf/all/arp_ignore

echo 1 > /proc/sys/net/ipv4/conf/lo/arp_ignore

echo 2 > /proc/sys/net/ipv4/conf/all/arp_announce

echo 2 > /proc/sys/net/ipv4/conf/lo/arp_announce

/sbin/ifconfig lo:0 $vip netmask $mask broadcast $vip up

route add -host $vip dev lo:0

;;

stop)

/sbin/ifconfig lo:0 down

echo 0 > /proc/sys/net/ipv4/conf/all/arp_ignore

echo 0 > /proc/sys/net/ipv4/conf/lo/arp_ignore

echo 0 > /proc/sys/net/ipv4/conf/all/arp_announce

echo 0 > /proc/sys/net/ipv4/conf/lo/arp_announce

route del -host $vip dev lo:0

;;

status)

# Status of LVS-DR real server.

islothere=`/sbin/ifconfig lo:0 | grep $vip`

isrothere=`netstat -rn | grep "lo:0" | grep $vip`

if [ ! "$islothere" -o ! "isrothere" ]; then

# Either the route or the lo:0 device

# not found.

echo "LVS-DR real server Stopped."

else

echo "LVS-DR real server Running."

;;

echo "Usage $(basename $0) start|stop"

exit 1

;;

esac

查看分发状况

[root@cluster1 opt]# ipvsadm -L -n --statsIP Virtual Server version 1.2.1 (size=4096)Prot LocalAddress:Port Conns InPkts OutPkts InBytes OutBytes -> RemoteAddress:PortTCP 192.168.0.200:6446 0 0 0 0 0 -> 192.168.0.220:6446 0 0 0 0 0 -> 192.168.0.230:6446 0 0 0 0 0TCP 192.168.0.200:6447 0 0 0 0 0 -> 192.168.0.220:6447 0 0 0 0 0 -> 192.168.0.230:6447 0 0 0 0 0