-
对POI数据进行空间聚类,以高德学校数据为例,下图为数据空间分布:
- 以下是聚类的Python代码,获得20个空间聚类中心。
from sklearn.cluster import KMeans
from sklearn.externals import joblib
import numpy
import matplotlib.pyplot as plt
import psycopg2
# -*- coding: utf-8 -*-
from sklearn.cluster import KMeans
from sklearn.externals import joblib
import numpy
import time
import matplotlib.pyplot as plt
print ("step 1: load data...")
dataSet = []
conn = psycopg2.connect(database="superpower", user="postgres", password="123456",
host="localhost",
port="5432");
cur = conn.cursor();
sql="select st_x(st_transform(t.geom,3857)) x,st_y(st_transform(t.geom,3857)) y from gaode_poi_school t";
cur.execute(sql);
keyData = cur.fetchall();
print(keyData);
cur.close();
conn.close();
for point in keyData:
dataSet.append([float(point[0]), float(point[1])])
clf = KMeans(n_clusters=20) # 设定k !!!!!!!!!!这里就是调用KMeans算法
s = clf.fit(dataSet) # 加载数据集合
numSamples = len(dataSet)
centroids = clf.cluster_centers_
for center in centroids:
conn = psycopg2.connect(database="superpower", user="postgres", password="123456",
host="localhost",
port="5432");
cur = conn.cursor();
print(center[0]);
x=str(center[0]);
y=str(center[1]);
sql = "INSERT INTO centrois ( x, y) VALUES ('"+x+"', '"+y+"');";
print(sql);
cur.execute(sql);
conn.commit();
cur.close();
conn.close();
下图为计算的聚类中心。
- 对POI进行分类:
--将中心点集转成geom,使用st_closestpoint 进行分类
INSERT into poi_class SELECT
kl.geom,a.type center_type
FROM
(
SELECT
st_closestpoint (
ST_Collect (
ARRAY (SELECT T .geom FROM centrois T)
),
K .geom
),
K .*
FROM
gaode_poi_school K
) kl
INNER JOIN centrois A ON kl.st_closestpoint=a.geom
- 分类结果如下图: