Spark算子汇总-Cartesian

Cartesian算子操做。java

Github项目上已包含Spark全部操做DEMO。git

Java版本:github

package com.huangyueran.spark.operator;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.VoidFunction;

import scala.Tuple2;

/** * @category 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging * @author huangyueran * @time 2019-7-21 16:38:20 */
public class Cartesian {

	public static void main(String[] args) {
		/** * SparkConf:第一步建立一个SparkConf,在这个对象里面能够设置容许模式Local Standalone yarn * AppName(能够在Web UI中看到) 还能够设置Spark运行时的资源要求 */
		SparkConf conf = new SparkConf().setAppName("Cartesian").setMaster("local");

		/** * 基于SparkConf的对象能够建立出来一个SparkContext Spark上下文 * SparkContext是通往集群的惟一通道,SparkContext在建立的时候还会建立任务调度器 */
		JavaSparkContext sc = new JavaSparkContext(conf);

		cartesian(sc);
	}

	private static void cartesian(JavaSparkContext sc) {
	    List<String> names = Arrays.asList("张三", "李四", "王五");
	    List<Integer> scores = Arrays.asList(60, 70, 80);

	    JavaRDD<String> namesRDD = sc.parallelize(names);
	    JavaRDD<Integer> scoreRDD = sc.parallelize(scores);

	    /** * ===================================== * | 两个RDD进行笛卡尔积合并 | * | The two RDD are Cartesian product merging | | * ===================================== */
	    JavaPairRDD<String, Integer> cartesianRDD = namesRDD.cartesian(scoreRDD);
	    
	    cartesianRDD.foreach(new VoidFunction<Tuple2<String, Integer>>() {
	        public void call(Tuple2<String, Integer> t) throws Exception {
	            System.out.println(t._1 + "\t" + t._2());
	        }
	    });
	}
	
}

 

Scala版本:apache

package com.hyr.spark.operator

import org.apache.spark.{SparkConf, SparkContext}

/** ***************************************************************************** * * @date 2019-08-07 17:04 * @author: <a href=mailto:huangyr>黄跃然</a> * @Description: 两个RDD进行笛卡尔积合并--The two RDD are Cartesian product merging ******************************************************************************/
object Cartesian {


  def cartesian(sparkContext: SparkContext): Unit = {
    val names = List("张三", "李四", "王五")
    val scores = List(60, 70, 90)

    val namesRDD = sparkContext.parallelize(names)
    val scoresRDD = sparkContext.parallelize(scores)

    val cartesianRDD = namesRDD.cartesian(scoresRDD)

    cartesianRDD.foreach(tuple => {
      println("key:"+tuple._1+"\tvalue:"+tuple._2)
    })

  }

  def main(args: Array[String]): Unit = {
    val sparkConf = new SparkConf setAppName "Cartesian" setMaster "local"
    val sparkContext = new SparkContext(sparkConf)

    cartesian(sparkContext)
  }

}

Github地址:https://github.com/huangyueranbbc/SparkDemo  api

相关文章
相关标签/搜索