Cartesian算子操做。java
Github项目上已包含Spark全部操做DEMO。git
Java版本:github
package com.huangyueran.spark.operator; import java.util.Arrays; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.VoidFunction; import scala.Tuple2; /** * @category 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging * @author huangyueran * @time 2019-7-21 16:38:20 */ public class Cartesian { public static void main(String[] args) { /** * SparkConf:第一步建立一个SparkConf,在这个对象里面能够设置容许模式Local Standalone yarn * AppName(能够在Web UI中看到) 还能够设置Spark运行时的资源要求 */ SparkConf conf = new SparkConf().setAppName("Cartesian").setMaster("local"); /** * 基于SparkConf的对象能够建立出来一个SparkContext Spark上下文 * SparkContext是通往集群的惟一通道,SparkContext在建立的时候还会建立任务调度器 */ JavaSparkContext sc = new JavaSparkContext(conf); cartesian(sc); } private static void cartesian(JavaSparkContext sc) { List<String> names = Arrays.asList("张三", "李四", "王五"); List<Integer> scores = Arrays.asList(60, 70, 80); JavaRDD<String> namesRDD = sc.parallelize(names); JavaRDD<Integer> scoreRDD = sc.parallelize(scores); /** * ===================================== * | 两个RDD进行笛卡尔积合并 | * | The two RDD are Cartesian product merging | | * ===================================== */ JavaPairRDD<String, Integer> cartesianRDD = namesRDD.cartesian(scoreRDD); cartesianRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() { public void call(Tuple2<String, Integer> t) throws Exception { System.out.println(t._1 + "\t" + t._2()); } }); } }
Scala版本:apache
package com.hyr.spark.operator import org.apache.spark.{SparkConf, SparkContext} /** ***************************************************************************** * * @date 2019-08-07 17:04 * @author: <a href=mailto:huangyr>黄跃然</a> * @Description: 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging ******************************************************************************/ object Cartesian { def cartesian(sparkContext: SparkContext): Unit = { val names = List("张三", "李四", "王五") val scores = List(60, 70, 90) val namesRDD = sparkContext.parallelize(names) val scoresRDD = sparkContext.parallelize(scores) val cartesianRDD = namesRDD.cartesian(scoresRDD) cartesianRDD.foreach(tuple => { println("key:"+tuple._1+"\tvalue:"+tuple._2) }) } def main(args: Array[String]): Unit = { val sparkConf = new SparkConf setAppName "Cartesian" setMaster "local" val sparkContext = new SparkContext(sparkConf) cartesian(sparkContext) } }
Github地址:https://github.com/huangyueranbbc/SparkDemo api