基本操作介绍:
持久化:cache、persist
创建临时视图:createTempView、createOrReplaceTempView
获取执行计划:explain
查看schema:printSchema
写数据到外部存储:write
dataset与dataframe互相转换:as、toDF
基本操作实践
package session
import org.apache.spark.sql.SparkSession
object BasicOperation {
case class Employee(name: String, age: Long, depId: Long, gender: String, salary: Long)
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName(BasicOperation.getClass.getName)
.master("local")
.config("spark.sql.warehouse.dir","d:/spark-warehouse")
.getOrCreate()
import spark.implicits._
val employee = spark.read.json("data\\depart\\employee.json")
employee.cache()
employee.createOrReplaceTempView("employee")
spark.sql("select * from employee where age > 30").show()
employee.printSchema()
val employeeWithAgeGreaterThen30DF = spark.sql("select * from employee where age > 30")
employeeWithAgeGreaterThen30DF.write.json("d:/writetest.json")
val employeeDS = employee.as[Employee]
employeeDS.show()
employeeDS.printSchema()
val employeeDF = employeeDS.toDF()
}
}
运行结果
输出1:
+---+-----+------+----+------+
|age|depId|gender|name|salary|
+---+-----+------+----+------+
| 35| 1| male|Jack| 15000|
| 42| 3| male| Tom| 18000|
+---+-----+------+----+------+
输出2:
root
|-- age: long (nullable = true)
|-- depId: long (nullable = true)
|-- gender: string (nullable = true)
|-- name: string (nullable = true)
|-- salary: long (nullable = true)
输出3:
+---+-----+------+------+------+
|age|depId|gender| name|salary|
+---+-----+------+------+------+
| 25| 1| male| Leo| 20000|
| 30| 2|female| Marry| 25000|
| 35| 1| male| Jack| 15000|
| 42| 3| male| Tom| 18000|
| 21| 3|female|Kattie| 21000|
| 30| 2|female| Jen| 28000|
| 19| 2|female| Jen| 8000|
+---+-----+------+------+------+
输出4:
root
|-- age: long (nullable = true)
|-- depId: long (nullable = true)
|-- gender: string (nullable = true)
|-- name: string (nullable = true)
|-- salary: long (nullable = true)
检查本地写入目录: