如何让Java应用成为杀不死的小强?(下篇)

【这是一猿小讲的第 49 篇原创分享】java


各位坐稳扶好,咱们要开车了。不过在开车以前,咱们仍是例行回顾一下上期分享的要点。windows


通过前两期的铺垫及烧脑的分享,咱们大概对「如何实现 Java 应用进程的状态监控,若是被监控的进程 down 掉,是否有机制能启动起来?」问题自己有了一个新的认识,那这期咱们不妨拿出攻城狮的绝招 Ctrl + C、Ctrl + V,从 Resin 源码中摘取一二,稍微简单实践一下。bash



按照图示,我们先演示一下实践效果吧,首先找到并运行程序入口 MonitorApp,日志输出以下。服务器


此时咱们不妨在控制台输入 jps 命令,看一看效果。app


18830 MonitorAppjvm

18831 Resinsocket


发现成功启动了 MonitorApp、Resin 两个进程,和 Resin 应用服务器是如出一辙的,若是咱们把进程号为 18831 的 kill 掉,会是什么效果?发现控制台日志输出又多了一些,貌似丫环 Resin 又被从新给启动了。ide


在控制台输入 jps 命令再确认一下是否真的变了。函数


18830 MonitorAppui

18935 Resin


那咱们到底该如何实现?那不妨照葫芦画瓢,模仿一下 Resin 的实现一下(这就是绝招:仿一仿)。


首先定义咱们的监控应用入口 MonitorApp,很简单就是把建立子进程的任务给启动起来。

package com.caucho.server.resin;
public class MonitorApp {
    public static void main(String[] args) {
        new WatchdogChildTask().start();
    }
}复制代码

接下来再编写 WatchdogChildTask 子进程任务的代码,大部分来源于 Resin 的源码,只是剔除了不少不少不少,简化了不少不少不少。仔细看发现也很简单,就有一个循环一直调用 WatchdogChildProcess 的 run 方法,目的也就是一直让丫环进程跑起来。

package com.caucho.server.resin;

import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;

class WatchdogChildTask implements Runnable {

    private static final Logger log = Logger.getLogger(WatchdogChildTask.class.getName());

    private WatchdogChildProcess _process;

    /**
     * Starts management of the watchdog process
     */
    public void start() {
        //TODO 手动建立线程池会更好 【阿里开发规约】
        Executors.newFixedThreadPool(1).execute(this);
    }

    /**
     * Main thread watching over the health of the Resin instances.
     */
    public void run() {
        try {
            int i = 0;
            long retry = Long.MAX_VALUE;
            while (i++ < retry) {
                WatchdogChildProcess process = new WatchdogChildProcess();
                _process = process;
                try {
                    log.log(Level.INFO, "我是大总管,准备让乳名为Resin的丫环跑起来");
                    _process.run();
                } catch (Exception e) {
                    log.log(Level.WARNING, e.toString(), e);
                } finally {
                    _process = null;
                    if (process != null) {
                        log.log(Level.INFO, "我是大总管,发现乳名为Resin的丫环出情况了,须要让她释放资源,从新跑起来");
                        process.kill();
                    }
                }
            }
        } catch (Exception e) {
            log.log(Level.WARNING, e.toString(), e);
        } finally {
            if (_process != null) {
                _process.kill();
                _process = null;
            }
        }
    }
}复制代码

具体是怎么把丫环进程跑起来的,这个事情专门交给 WatchdogChildProcess 去作了,先启动了一个 socket 通信端口;而后采用 ProcessBuilder 启动 Resin 进程;而后等待丫环进程创建 socket 链接通信。大部分也是来源于 Resin 的源码,只不过作了大量删减。另外重点提一嘴:拿下去只需修改 com.caucho.server.resin.Resin 为你要监控应用的主函数便可。

package com.caucho.server.resin;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Level;
import java.util.logging.Logger;

class WatchdogChildProcess {

    private static final Logger log = Logger.getLogger(WatchdogChildProcess.class.getName());

    private Socket _childSocket;
    private OutputStream _stdOs;
    private int _status = -1;
    private AtomicReference _processRef = new AtomicReference();

    public void run() {
        ServerSocket ss = null;
        Socket s = null;
        try {
            ss = new ServerSocket(0, 5, InetAddress.getByName("127.0.0.1"));
            int port = ss.getLocalPort();
            log.log(Level.INFO, "我是大总管,我启动一个端口为{0}的socket,让丫环们实时与我通信",port);

            Process process = createProcess(port);
            if (process != null) {
                _processRef.compareAndSet(null, process);

                InputStream stdIs = process.getInputStream();
                _stdOs = process.getOutputStream();

                //TODO 不要显式建立线程,请使用线程池【阿里开发规约】
                new Thread(new WatchdogProcessLogThread(stdIs)).start();

                s = connectToChild(ss);
                _status = process.waitFor();
                logStatus(_status);
            }
        } catch (Exception e) {
            log.log(Level.WARNING, e.toString(), e);
            try {
                Thread.sleep(5000);
            } catch (Exception e1) {
            }
        } catch (Throwable e) {
            log.log(Level.WARNING, e.toString(), e);
        } finally {
            if (ss != null) {
                try {
                    ss.close();
                } catch (Throwable e) {
                }
            }
            try {
                if (s != null) {
                    s.close();
                }
            } catch (Throwable e) {
                log.log(Level.FINER, e.toString(), e);
            }
            kill();
            synchronized (this) {
                notifyAll();
            }
        }
    }

    private void logStatus(int status) {
        String code = " (exit code=" + status + ")";
        log.warning("大总管忽然发现丫环进程罢工了!!");
    }

    void kill() {
        Process process = _processRef.getAndSet(null);
        if (process != null) {
            try {
                process.destroy();
            } catch (Exception e) {
                log.log(Level.FINE, e.toString(), e);
            }
        }

        OutputStream stdOs = _stdOs;
        _stdOs = null;
        if (stdOs != null) {
            try {
                stdOs.close();
            } catch (Throwable e) {
                log.log(Level.FINE, e.toString(), e);
            }
        }

        Socket childSocket = _childSocket;
        _childSocket = null;

        if (childSocket != null) {
            try {
                childSocket.close();
            } catch (Throwable e) {
                log.log(Level.FINE, e.toString(), e);
            }
        }

        if (process != null) {
            try {
                process.waitFor();
            } catch (Exception e) {
                log.log(Level.INFO, e.toString(), e);
            }
        }
    }

    /**
     * Waits for a socket connection from the child, returning the socket
     *
     * @param ss TCP ServerSocket from the watchdog for the child to connect to
     */
    private Socket connectToChild(ServerSocket ss)
            throws IOException {
        Socket s = null;
        try {
            ss.setSoTimeout(60000);
            for (int i = 0; i < 120 && s == null; i++) {
                try {
                    s = ss.accept();
                } catch (SocketTimeoutException e) {
                }
            }

            if (s != null) {
                _childSocket = s;
            }
        } catch (Exception e) {
            log.log(Level.WARNING, e.toString(), e);
        } finally {
            ss.close();
        }
        return s;
    }

    /**
     * Creates a new Process for the Resin JVM, initializing the environment
     * and passing value to the new process.
     *
     * @param socketPort the watchdog socket port
     * @param out        the debug log jvm-default.log
     */
    private Process createProcess(int socketPort)
            throws IOException {
        HashMap env = buildEnv();
        ArrayList jvmArgs = buildJvmArgs();

        jvmArgs.add("com.caucho.server.resin.Resin");
        jvmArgs.add("-socketwait");
        jvmArgs.add(String.valueOf(socketPort));

        ProcessBuilder builder = new ProcessBuilder();
        builder.environment().putAll(env);
        builder = builder.command(jvmArgs);
        builder.redirectErrorStream(true);
        return builder.start();
    }

    private HashMap buildEnv()
            throws IOException {
        HashMap env = new HashMap();
        env.putAll(System.getenv());

        StringBuilder classPath = new StringBuilder();
        // TODO 系统不同分割符也不一样 windows为分号;
        classPath.append(".:");

        String appPath = System.getProperty("user.dir");
        classPath.append(appPath).append("/resin/target/classes");
        env.put("CLASSPATH", classPath.toString());

        // 。。。 删除了可多可多的代码 。。。
        return env;
    }

    private ArrayList buildJvmArgs() {
        ArrayList jvmArgs = new ArrayList();
        jvmArgs.add("java");
        // ... 又删除了可多代码 ...
        return jvmArgs;
    }

    /**
     * Watchdog thread responsible for writing jvm-default.log by reading the
     * JVM's stdout and copying it to the log. */ class WatchdogProcessLogThread implements Runnable { private InputStream _is; /** * @param is the stdout stream from the Resin */ WatchdogProcessLogThread(InputStream is) { _is = is; } @Override public void run() { try { int len; byte[] data = new byte[4096]; while ((len = _is.read(data, 0, data.length)) > 0) { System.out.print(new String(data, 0, len)); } } catch (Throwable e) { log.log(Level.WARNING, e.toString(), e); } finally { kill(); } } } }复制代码

下面这个要重点说下,由于这套模型你拿过去,只需修改下面 Resin 这个类的代码,这个其实也就是咱们要监控的应用。其实很简单,就有一个 connect 方法主要用于与大总管进行通信,一旦通信失败自己就退出。

package com.caucho.server.resin;

import java.io.IOException;
import java.io.InputStream;
import java.net.Socket;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;

public class Resin {

    private static ExecutorService executorService = Executors.newFixedThreadPool(1);

    private static final Logger log = Logger.getLogger(Resin.class.getName());

    public static void main(String[] args) {
        log.log(Level.INFO, "我是乳名为Resin的丫环,大总管给的通信端口为{0} {1}", args);
        //获取传入的参数 port
        int port = Integer.parseInt(args[1]);

        connect(port);
    }

    public static void connect(final int port) {
        log.log(Level.INFO, "我是乳名为Resin的丫环,我要开始与端口为{0}的大总管进行通信",port);
        executorService.execute(new Runnable() {
            @Override
            public void run() {
                Socket socket = null;
                try {
                    socket = new Socket("127.0.0.1", port);
                    InputStream s = socket.getInputStream();
                    byte[] buf = new byte[1024];
                    int len;
                    while ((len = s.read(buf)) != -1) {
                        log.log(Level.INFO, "通信信息 {0}", new String(buf, 0, len));
                    }
                } catch (IOException e) {
                    log.log(Level.WARNING, "我是乳名为Resin的丫环,与端口为{0}的大总管进行通信发生异常",port);
                } finally {
                    try {
                        socket.close();
                    } catch (IOException e) {
                        log.log(Level.WARNING, e.getMessage(), e);
                    }
                    log.log(Level.INFO, "我是乳名为Resin的丫环,与端口为{0}的大总管进行通信结束,我要退下啦",port);
                    System.exit(0);
                }
            }
        });
        log.log(Level.INFO, "丫环与大总管通信完成");
    }
}复制代码

到这,代码也就码完了,不妨把代码拔下去,运行一下,稍微体验体验,看看是否是那回事儿!其中为了演示须要删除了 N 多代码,有些地方很不优雅,还需按照阿里开发规约适当调整调整,不过这些不是我们这期分享的重点,我们重点是思想 + 轻实践。


好了,思想也落地了,接下来就看你怎么让它老树开新花啦。分享就到这儿吧,但愿可以解你所惑;但愿能在你前进的道路上,帮你披荆斩棘。若是感受有点帮助,欢迎在看、秒赞,疯狂分享转发,由于你的每一次分享,我都认真当成了鼓励与喜欢。

推荐阅读:

老技术新谈,Java应用监控利器JMX(1)

老技术新谈,Java应用监控利器JMX(2)

老技术新谈,Java应用监控利器JMX(3)

一篇文章讲透线上应用监控

相关文章
相关标签/搜索