比如原始表的schema如下:
现在想将该DataFrame 的schema转换成:
id:String,
goods_name:String
price: Array<String>
sql 转换
spark.sql("create table speedup_tmp_test_spark_schema_parquet12 using parquet as select cast(id as string),cast(goods_name as string),cast(price as array<string>) from tmp_test_spark_schema_parquet")case class 变换
case class newSchemaClass(id: String, goods_name: String, price: Array[String])
// 原dataframe
val df = spark.sql("select * from tmp_test_spark_schema_parquet")
// 新dataframe
val newDF = df .rdd.map { r =>
newSchemaClass(r(0).toString, r(1).toString, r.getSeqInt.map(_.toString).toArray)
}.toDF()
// 获取具体数据
newDF.collect()(2).getListString