最近因为老项目频繁挂掉,因为项目通过多批人之手,短期难以定位问题,因此只好写一个监控程序。 时间比较紧半天时间,并且水平有限大神勿喷,有好的方法还请赐教。 一、问题描述:分两种状况1.一、tomcat 完全挂掉端口不会占用进程;1.二、并无完全挂掉 端口仍占用进程,可是接口访问异常; 二、解决思路:启动一个java服务轮询(10秒钟一次)程序的一个接口(好比获取当前时间),若是请求不到则查看该端口是否占用进程,若是占用则杀死进程,而后重启tomcat ;若是没有占用进程则直接重启tomca; 原本考虑的是监控多个程序的 可是时间问题就先监控一个吧java
3.1 轮训接口linux
@Service public class SchedulerService { private static final Logger logger = LoggerFactory.getLogger(SchedulerService.class); @Autowired private KillService killService; @Value("#{system['jiankong.ports']}") private String portsStr; @Value("#{system['url']}") private String url; /** * 监控接口是否通 若是接口不通 或者返回结果不对则重启服务 并发送邮件 每10秒执行一次扫描 * @author gaozemin * @date 2017年10月18日 * @throws Exception * @return */ public void watch() throws Exception { String[] ports = portsStr.split(","); for (String port : ports) { // 调用测试接口 String ret = HttpUtil.sendPost(url, null); if (ret == null) {// 若是返回结果为空重启服务 logger.info("返回结果为null "); killService.start(Integer.valueOf(port)); } else { try { Map retMap = JSONObject.parseObject(ret, Map.class); String retFlag = String.valueOf(retMap.get("result")); if (!"200".equals(retFlag)) {// 若是返回结果异常 重启服务 killService.start(Integer.valueOf(port)); } else { logger.info("系统运行正常...."); } } catch (Exception e) { logger.info("返回值解析异常...."); killService.start(Integer.valueOf(port)); } } logger.info("监控执行中.."); } }
3.2 若是监控到异常则重启服务tomcat
@Service public class KillService { private static final Logger logger = LoggerFactory.getLogger(KillService.class); @Value("#{system['waitTime']}") private Long waitTime; @Value("#{system['startTomcatExec']}") private String startTomcatExec; @Value("#{system['startLinuxTomcatExec']}") private String startLinuxTomcatExec; @Value("#{system['findPid']}") private String findPid; @Value("#{system['isLinux']}") private boolean isLinux; @Value("#{system['send.emails']}") private String emails; @Autowired private SendMail sendMail; private Map<Integer, Date> map = new HashMap(); public void start(int port) { // 先每10秒 杀死一次进程 而后重启一次 ,执行重启后5分钟后再从新执行扫描,确保程序从新启动 // 1 获取 指定端口的进程号 // 若是调用接口失败则杀死进程并从新启动 ,并记录当前时间 ,不然不进行操做 Date lastExecTime = map.get(port); if (lastExecTime != null) {// 若是存在重启记录则判断重启时间是否间隔5分钟 Date nowTome = new Date(); Long subTime = nowTome.getTime() - lastExecTime.getTime(); logger.info("间隔时间:{}", subTime); if (subTime < waitTime) { logger.info("间隔时间太短 等待程序启动!"); return; } else { map.put(port, new Date()); restartTomcat(port, isLinux); } } else { map.put(port, new Date()); restartTomcat(port, isLinux); } } private void restartTomcat(int port, boolean isLinux) { Runtime runtime = Runtime.getRuntime(); try { if (isLinux) { // 查找进程号 linuxRestart(port); StartTomcatThread a = new StartTomcatThread(startLinuxTomcatExec); a.start(); } else { Process p = runtime.exec(findPid + port); InputStream inputStream = p.getInputStream(); List<String> read = read(port, inputStream, "UTF-8"); if (read.size() == 0) { logger.info("找不到端口:{}的进程", port); StartTomcatThread a = new StartTomcatThread(startTomcatExec); a.start(); logger.info("tomcat已重启"); } else { logger.info("找到" + read.size() + "个进程,正在准备清理"); kill(read); StartTomcatThread a = new StartTomcatThread(startTomcatExec); a.start(); } } String dataStr = "admin 服务宕机 现已自动重启 请及时查看日志 修改错误!"; String[] emailStrs = emails.split(","); for (String email : emailStrs) { sendMail.sendMsg(email, dataStr); } } catch (Exception e) { e.printStackTrace(); } } /** * 验证此行是否为指定的端口,由于 findstr命令会是把包含的找出来,例如查找80端口,可是会把8099查找出来 * * @param str * @return */ private boolean validPort(int port, String str) { String patternString = "^ *[a-zA-Z]+ +\\S+"; Pattern pattern = Pattern.compile(patternString); Matcher matcher = pattern.matcher(str); matcher.find(); String find = matcher.group(); int spstart = find.lastIndexOf(":"); find = find.substring(spstart + 1); int findport = 0; try { findport = Integer.parseInt(find); } catch (NumberFormatException e) { System.out.println("查找到错误的端口:" + find); return false; } if (port == findport) { return true; } else { return false; } } public void linuxRestart(int port) throws IOException, InterruptedException { String cmd = "kill -9 $(netstat -tlnp|grep " + port + "|awk '{print $7}'|awk -F '/' '{print $1}')"; String[] command = { "sh", "-c", cmd }; Process pro = Runtime.getRuntime().exec(command); pro.waitFor(); // cmd = path; // pro = Runtime.getRuntime().exec(cmd); // pro.waitFor(); } /** * 更换为一个Set,去掉重复的pid值 * * @param data */ public void kill(List<String> data) { Set<Integer> pids = new HashSet<>(); logger.info("列表:{}" + pids); for (String line : data) { int offset = line.lastIndexOf(" "); String spid = line.substring(offset); spid = spid.replaceAll(" ", ""); int pid = 0; try { pid = Integer.parseInt(spid); } catch (NumberFormatException e) { System.out.println("获取的进程号错误:" + spid); } pids.add(pid); } killWithPid(pids); } /** * 一次性杀除全部的端口 * * @param pids */ public void killWithPid(Set<Integer> pids) { for (Integer pid : pids) { try { Process process = Runtime.getRuntime().exec("taskkill /F /pid " + pid + ""); InputStream inputStream = process.getInputStream(); String txt = readTxt(inputStream, "UTF-8"); logger.info(txt); } catch (IOException e) { e.printStackTrace(); } } } private List<String> read(int port, InputStream in, String charset) throws IOException { List<String> data = new ArrayList<>(); BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset)); String line; while ((line = reader.readLine()) != null) { boolean validPort = validPort(port, line); if (validPort) { data.add(line); } } reader.close(); return data; } public String readTxt(InputStream in, String charset) throws IOException { BufferedReader reader = new BufferedReader(new InputStreamReader(in, charset)); StringBuffer sb = new StringBuffer(); String line; while ((line = reader.readLine()) != null) { sb.append(line); } reader.close(); return sb.toString(); } }
源代码并发