Geospark SQL 加载SHP数据
GeoSpark SQL默认是无法读取Shp和GeoJson格式的矢量数据的,必须要通过RDD读取,然后利用GeoSpark提供的Adapter
在RDD和DataFrame之间互转。
接下来我们还是利用我们上一节的公园的数据来学习。
初始化一个SparkSession
SparkSession spark = SparkSession.builder().
config("spark.serializer","org.apache.spark.serializer.KryoSerializer").
config("spark.kryo.registrator", "org.datasyslab.geospark.serde.GeoSparkKryoRegistrator").
master("local[*]").appName("Learn06").getOrCreate();
GeoSparkSQLRegistrator.registerAll(spark);
GeoSparkVizRegistrator.registerAll(spark);
利用RDD读取shp
JavaSparkContext context = new JavaSparkContext(spark.sparkContext());
String inputPath = Learn06.class.getResource("/parks").toString();
SpatialRDD<Geometry> rdd = ShapefileReader.readToGeometryRDD(new JavaSparkContext(spark.sparkContext()), inputPath);
将RDD转为DataFrame
Dataset<Row> rawDF = Adapter.toDf(rdd, spark);
rawDF.createOrReplaceTempView("park");
rawDF.show();
rawDF.printSchema();
+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
| geometry|ParkId|RefParkId| ParkName| Neighborho| EWStreet| NSStreet|DogPark|Washrooms|SpecialFea|
+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
|POLYGON ((-123.15...| 1| -9999| | Kitsilano| | | N| | |
|POLYGON ((-123.15...| 2| 208| Rosemary Brown Park| Kitsilano| W 11th Avenue| Vine Street| N| N| N|
|MULTIPOLYGON (((-...| 3| 141| Tea Swamp Park|Mount Pleasant| E 15th Avenue| Sophia Street| N| N| N|
|MULTIPOLYGON (((-...| 4| -9999| | Strathcona| | | N| | |
|MULTIPOLYGON (((-...| 5| 202| Morton Park| West End| Morton Avenue| Denman Street| N| N| N|
|MULTIPOLYGON (((-...| 6| -9999| Mcbride Park| Kitsilano| | | N| | |
|MULTIPOLYGON (((-...| 7| -9999| Granville Park| Fairview| | | N| | |
|MULTIPOLYGON (((-...| 8| -9999| |Mount Pleasant| | | N| | |
|MULTIPOLYGON (((-...| 9| 15| Creekside Park|Mount Pleasant|Terminal Avenue| Quebec Street| N| N| Y|
|MULTIPOLYGON (((-...| 10| 134|China Creek South...|Mount Pleasant| E 10th Avenue| Clark Drive| N| N| N|
|MULTIPOLYGON (((-...| 11| 200|Barclay Heritage ...| West End| Barclay Street| Nicola Street| N| Y| N|
|POLYGON ((-123.15...| 12| 233|Arbutus Greenway ...| Kitsilano| W 11th Avenue| Arbutus Street| N| N| N|
|POLYGON ((-123.18...| 13| 106| Almond Park| Kitsilano| W 12th Avenue| Dunbar Street| N| N| N|
|POLYGON ((-123.15...| 14| 109| Delamont Park| Kitsilano| W 7th Avenue| Arbutus Street| N| N| N|
|POLYGON ((-123.15...| 15| -9999| | Kitsilano| | | N| | |
|POLYGON ((-123.14...| 16| 118| Seaforth Peace Park| Kitsilano|Cornwall Avenue|Chestnut Street| N| N| N|
|POLYGON ((-123.10...| 17| 139| Mount Pleasant Park|Mount Pleasant| W 16th Avenue| Ontario Street| N| N| N|
|POLYGON ((-123.10...| 18| 138| Major Matthews Park|Mount Pleasant| W 11th Avenue|Manitoba Street| N| N| N|
|POLYGON ((-123.10...| 19| 137|Jonathan Rogers Park|Mount Pleasant| W 7th Avenue|Manitoba Street| N| Y| N|
|POLYGON ((-123.09...| 20| 183| Thornton Park| Strathcona|Terminal Avenue| Main Street| N| N| N|
+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
only showing top 20 rows
root
|-- geometry: string (nullable = true)
|-- ParkId: string (nullable = true)
|-- RefParkId: string (nullable = true)
|-- ParkName: string (nullable = true)
|-- Neighborho: string (nullable = true)
|-- EWStreet: string (nullable = true)
|-- NSStreet: string (nullable = true)
|-- DogPark: string (nullable = true)
|-- Washrooms: string (nullable = true)
|-- SpecialFea: string (nullable = true)
从输出结果中可以看到,转为dataframe后,默认是没有Geometry的,需要我们自己构建。
// 构建几何图形(Geometry)
String sqlText = "select ST_GeomFromWKT(geometry) as shape, * from park";
rawDF = spark.sql(sqlText);
rawDF.createOrReplaceTempView("park");
rawDF.show();
rawDF.printSchema();
+--------------------+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
| shape| geometry|ParkId|RefParkId| ParkName| Neighborho| EWStreet| NSStreet|DogPark|Washrooms|SpecialFea|
+--------------------+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
|POLYGON ((-123.15...|POLYGON ((-123.15...| 1| -9999| | Kitsilano| | | N| | |
|POLYGON ((-123.15...|POLYGON ((-123.15...| 2| 208| Rosemary Brown Park| Kitsilano| W 11th Avenue| Vine Street| N| N| N|
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 3| 141| Tea Swamp Park|Mount Pleasant| E 15th Avenue| Sophia Street| N| N| N|
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 4| -9999| | Strathcona| | | N| | |
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 5| 202| Morton Park| West End| Morton Avenue| Denman Street| N| N| N|
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 6| -9999| Mcbride Park| Kitsilano| | | N| | |
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 7| -9999| Granville Park| Fairview| | | N| | |
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 8| -9999| |Mount Pleasant| | | N| | |
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 9| 15| Creekside Park|Mount Pleasant|Terminal Avenue| Quebec Street| N| N| Y|
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 10| 134|China Creek South...|Mount Pleasant| E 10th Avenue| Clark Drive| N| N| N|
|MULTIPOLYGON (((-...|MULTIPOLYGON (((-...| 11| 200|Barclay Heritage ...| West End| Barclay Street| Nicola Street| N| Y| N|
|POLYGON ((-123.15...|POLYGON ((-123.15...| 12| 233|Arbutus Greenway ...| Kitsilano| W 11th Avenue| Arbutus Street| N| N| N|
|POLYGON ((-123.18...|POLYGON ((-123.18...| 13| 106| Almond Park| Kitsilano| W 12th Avenue| Dunbar Street| N| N| N|
|POLYGON ((-123.15...|POLYGON ((-123.15...| 14| 109| Delamont Park| Kitsilano| W 7th Avenue| Arbutus Street| N| N| N|
|POLYGON ((-123.15...|POLYGON ((-123.15...| 15| -9999| | Kitsilano| | | N| | |
|POLYGON ((-123.14...|POLYGON ((-123.14...| 16| 118| Seaforth Peace Park| Kitsilano|Cornwall Avenue|Chestnut Street| N| N| N|
|POLYGON ((-123.10...|POLYGON ((-123.10...| 17| 139| Mount Pleasant Park|Mount Pleasant| W 16th Avenue| Ontario Street| N| N| N|
|POLYGON ((-123.10...|POLYGON ((-123.10...| 18| 138| Major Matthews Park|Mount Pleasant| W 11th Avenue|Manitoba Street| N| N| N|
|POLYGON ((-123.10...|POLYGON ((-123.10...| 19| 137|Jonathan Rogers Park|Mount Pleasant| W 7th Avenue|Manitoba Street| N| Y| N|
|POLYGON ((-123.09...|POLYGON ((-123.09...| 20| 183| Thornton Park| Strathcona|Terminal Avenue| Main Street| N| N| N|
+--------------------+--------------------+------+---------+--------------------+--------------+---------------+---------------+-------+---------+----------+
only showing top 20 rows
root
|-- shape: geometry (nullable = false)
|-- geometry: string (nullable = true)
|-- ParkId: string (nullable = true)
|-- RefParkId: string (nullable = true)
|-- ParkName: string (nullable = true)
|-- Neighborho: string (nullable = true)
|-- EWStreet: string (nullable = true)
|-- NSStreet: string (nullable = true)
|-- DogPark: string (nullable = true)
|-- Washrooms: string (nullable = true)
|-- SpecialFea: string (nullable = true)
渲染
方法参考上一小节。
// 转为像素
sqlText = "select ST_Envelope_Aggr(shape) as boundary from park";
rawDF = spark.sql(sqlText);
rawDF.createOrReplaceTempView("bound");
sqlText = "select ST_Pixelize(shape, 256, 256, (select boundary from bound)) as pixel, shape from park ";
rawDF = spark.sql(sqlText);
rawDF.createOrReplaceTempView("pixels");
rawDF.show(false);
// 选择颜色
sqlText = "select pixel, shape, ST_Colorize(1, 1, 'red') as color from pixels";
rawDF = spark.sql(sqlText);
rawDF.createOrReplaceTempView("pixels");
rawDF.show();
// 渲染
sqlText = "select ST_Render(pixel, color) as image from pixels";
rawDF = spark.sql(sqlText);
rawDF.createOrReplaceTempView("images");
rawDF.show();
// 保存
Dataset<org.apache.spark.sql.Row> images = spark.table("images");
Row[] take = (Row[])images.take(1);
ImageSerializableWrapper image = (ImageSerializableWrapper)take[0].get(0);
new ImageGenerator().SaveRasterImageAsLocalFile(image.getImage(),System.getProperty("user.home") + "/park", ImageType.PNG);