建立rdd方法1//use case class Person case class Person(name:String,age:Int) def rddToDFCase(sparkSession : SparkSession):DataFrame = { //导入隐饰操做,不然RDD没法调用toDF方法 import sparkSession.implicits._ val peopleRDD = sparkSession.sparkContext .textFile("file:/E:/scala_workspace/z_spark_study/people.txt",2) .map( x => x.split(",")).map( x => Person(x(0),x(1).trim().toInt)).toDF() peopleRDD
}spa
建立rdd方法2:scala
def rddToDF(sparkSession : SparkSession):DataFrame = { //设置schema结构 val schema = StructType( Seq( StructField("name",StringType,true) ,StructField("age",IntegerType,true) ) ) val rowRDD = sparkSession.sparkContext .textFile("file:/E:/scala_workspace/z_spark_study/people.txt",2) .map( x => x.split(",")).map( x => Row(x(0),x(1).trim().toInt)) sparkSession.createDataFrame(rowRDD,schema) }