监控思路:一个是集群成员的健康状态,一个是链接数json
1.经过命令rs.status()进行集群内成员健康 MongoDB Enterprise config-rs:PRIMARY> rs.status() { "set" : "config-rs", # 副本集已经配置成功 "date" : ISODate("2019-11-23T04:56:35.588Z"), "myState" : 1, "term" : NumberLong(1), "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "configsvr" : true, "heartbeatIntervalMillis" : NumberLong(2000), "majorityVoteCount" : 2, "writeMajorityCount" : 2, "optimes" : { "lastCommittedOpTime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "lastCommittedWallTime" : ISODate("2019-11-23T04:56:22.464Z"), "readConcernMajorityOpTime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "readConcernMajorityWallTime" : ISODate("2019-11-23T04:56:22.464Z"), "appliedOpTime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "durableOpTime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "lastAppliedWallTime" : ISODate("2019-11-23T04:56:22.464Z"), "lastDurableWallTime" : ISODate("2019-11-23T04:56:22.464Z") }, "lastStableRecoveryTimestamp" : Timestamp(1574484952, 30), "lastStableCheckpointTimestamp" : Timestamp(1574484952, 30), "electionCandidateMetrics" : { "lastElectionReason" : "electionTimeout", "lastElectionDate" : ISODate("2019-11-23T04:55:51.134Z"), "termAtElection" : NumberLong(1), "lastCommittedOpTimeAtElection" : { "ts" : Timestamp(0, 0), "t" : NumberLong(-1) }, "lastSeenOpTimeAtElection" : { "ts" : Timestamp(1574484951, 1), "t" : NumberLong(-1) }, "numVotesNeeded" : 1, "priorityAtElection" : 1, "electionTimeoutMillis" : NumberLong(10000), "newTermStartDate" : ISODate("2019-11-23T04:55:52.141Z"), "wMajorityWriteAvailabilityDate" : ISODate("2019-11-23T04:55:52.266Z") }, "members" : [ { "_id" : 0, "name" : "worker2:27018", "ip" : "192.168.255.134", "health" : 1, # 监控该值,不是1就实现告警 "state" : 1, "stateStr" : "PRIMARY", "uptime" : 722, "optime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "optimeDate" : ISODate("2019-11-23T04:56:22Z"), "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "infoMessage" : "could not find member to sync from", "electionTime" : Timestamp(1574484951, 2), "electionDate" : ISODate("2019-11-23T04:55:51Z"), "configVersion" : 2, "self" : true, "lastHeartbeatMessage" : "" }, { "_id" : 1, "name" : "worker2:27019", "ip" : "192.168.255.134", "health" : 1, "state" : 2, "stateStr" : "SECONDARY", "uptime" : 13, "optime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "optimeDurable" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "optimeDate" : ISODate("2019-11-23T04:56:22Z"), "optimeDurableDate" : ISODate("2019-11-23T04:56:22Z"), "lastHeartbeat" : ISODate("2019-11-23T04:56:34.705Z"), "lastHeartbeatRecv" : ISODate("2019-11-23T04:56:35.176Z"), "pingMs" : NumberLong(0), "lastHeartbeatMessage" : "", "syncingTo" : "", "syncSourceHost" : "", "syncSourceId" : -1, "infoMessage" : "", "configVersion" : 2 } ], "ok" : 1, "$gleStats" : { "lastOpTime" : { "ts" : Timestamp(1574484982, 1), "t" : NumberLong(1) }, "electionId" : ObjectId("7fffffff0000000000000001") }, "lastCommittedOpTime" : Timestamp(1574484982, 1), "$clusterTime" : { "clusterTime" : Timestamp(1574484982, 1), "signature" : { "hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="), "keyId" : NumberLong(0) } }, "operationTime" : Timestamp(1574484982, 1) } 分段 说明 set 当前副本集名称 date 执行命令时间 myState 当前节点的状态(角色) syncingTo 同步源 heartbeatIntervalMillis 心跳间隔 members 节点成员 members.id 成员编号 members.name 成员名称 members.heath 健康状态,1-true,0-false 监控该值,为0结合zabbix则告警 members.state 成员状态(角色)1-主节点 2-备节点 7-仲裁节点 members.stateStr 成员状态名 members.uptime 成员启动运行时长 members.optime 成员oplog时间戳(字段ts) members.optimeDate 成员oplog时间(格式化) members.lastHeartbeat 当前节点对成员的最后一个心跳 members.lastHeartbeatRecv 当前节点收到该成员的最后一个心跳 members.pingMs 当前节点到该成员的回路时长 members.syncingTo 成员同步源 members.electionTime 主节点选举时间戳(ms) members.electionDate 主节点选举时间(格式化) 2.经过mongostat监控conn链接数字段 $ mongostat --host 192.168.255.134 --port 27017 -u adminUser -p adminPass --authenticationDatabase="admin" —json {"192.168.255.134:27017":{"arw":"1|0","command":"2|0","conn":"2","delete":"*0","dirty":"0.0%","flushes":"0","getmore":"0","insert":"*0","net_in":"167b","net_out":"35.4k","qrw":"0|0","query":"*0","res":"83.0M","time":"14:27:26","update":"*0","used":"0.0%","vsize":"1.52G"}}
3.能够引入pymongo模块进行监控,具体脚本就不贴出了,比较简单。链接数默认大于2000就有问题,也能够结合zabbix进行自定义监控,大于2000的链接数就触发告警app