最近项目准备把hadoop的MR转换为Spark,之前的MR是能够直接提交java文件到集群服务器中,但Spark我没有找到相应的方式(有大神知道如何处理但愿能够告之下),我这边使用了SparkAppHandle的方式来进行处理.java
CountDownLatch cdl= new CountDownLatch(1); SparkAppHandle handle = new SparkLauncher().setSparkHome("/usr/local/spark-2.2.0") .setAppResource("/usr/local/spark-2.2.0/lib/spark.jar") .setMainClass("run.aaa.spark.SimpleApp") .setMaster("yarn").setDeployMode("client") .setAppName("test yarn client") .setConf("spark.yarn.jars", "hdfs://master:9000/tmp/spark-jars/*") .setConf("spark.driver.allowMultipleContexts", "true") .setConf("spark.executor.cores", "2") .setConf("spark.executor.instances", "2") .addAppArgs("/README.md") .setVerbose(true) .startApplication(new SparkAppHandle.Listener() { // 这里监放任务状态,当任务结束时(不论是什么缘由结束),isFinal方法会返回true,不然返回false @Override public void stateChanged(SparkAppHandle sparkAppHandle) { if (sparkAppHandle.getState().isFinal()) { cdl.countDown(); } System.out.println("state:" + sparkAppHandle.getState().toString()); } @Override public void infoChanged(SparkAppHandle sparkAppHandle) { System.out.println("Info:" + sparkAppHandle.getState().toString()); } }); System.out.println("The task is executing, please wait ...."); // 线程等待任务结束 cdl.await(); System.out.println("The task is finished!");