使用spark读取json文件生成临时表
import org.apache.spark.sql.SparkSession
import org.mortbay.util.ajax.JSON
val spark = SparkSession
.builder()
.appName("tidb-bench")
.config("zeppelin.spark.sql.stacktrace", "true")
.getOrCreate()
// For implicit conversions like converting RDDs to DataFrames
import spark.implicits._
val tidb = spark.read.json("file:///share/data/tidb.json")
tidb.printSchema()
tidb.show(10)
tidb.createOrReplaceTempView("tidb")
使用spark-sql进行数据可视化
%sql
select *
from tidb
使用pyspark + plotly进行自定义可视化
%pyspark
from pyspark.sql import SQLContext,Row
import plotly
from plotly.graph_objs import Scatter, Layout
def plot(plot_dic, height=500, width=500, **kwargs):
kwargs['output_type'] = 'div'
plot_str = plotly.offline.plot(plot_dic, **kwargs)
print('%%angular <div style="height: %ipx; width: %spx"> %s </div>' % (height, width, plot_str))
xx=sqlContext.sql("select * from tidb")
z=xx.toPandas()
plot({
"data": [
Scatter(x=z["avg"], y=z["Number of threads"])
],
"layout": Layout(
title="hello world"
)
})