K-Means算法
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
data = pd.read_csv('multi_vol.csv')
data1 = data.T #使待分类样本格式正确
estimator = KMeans(n_clusters=4) #构造聚类器
estimator.fit(data1)
label_pred = estimator.labels_ #最终聚类类别
centroids = estimator.cluster_centers_ #最终聚类中心
inertia = estimator.inertia_
学习向量量化LVQ
#迭代轮数
for i in range(loops):
#随机产生样本
index = np.random.randint(0,30)
min_dist = sum((q[0] - x[index])**2)
q_index = 0
#搜索里样本最近原型向量
for j in range(1,len(q)):
dist = sum((q[j] - x[index])**2)
if dist < min_dist:
min_dist = dist
q_index = j
if q_label[q_index] == y[index]:
#样本标记和原型向量标记相同,该原型向量向样本方向移动
#eta为学习率
q[q_index] += eta*(x[index]-q[q_index])
else:
#样本标记和原型向量标记相同,该原型向量远离样本方向
q[q_index] -= eta*(x[index]-q[q_index])
#画图
for i in range(len(x)):
if y[i] == 0:
plt.plot(x[i,0],x[i,1],'or')
else:
plt.plot(x[i,0],x[i,1],'o',color='black')
for i in range(len(q)):
plt.plot(q[i,0],q[i,1],marker='*',color='blue')
高斯混合聚类 GMM
from sklearn import mixture
def test_GMM(dataMat, components=3,iter = 100,cov_type="full"):
clst = mixture.GaussianMixture(n_components=n_components,max_iter=iter,covariance_type=cov_type)
clst.fit(dataMat)
predicted_labels =clst.predict(dataMat)
return clst.means_,predicted_labels # clst.means_返回均值
层次聚类
import numpy
import pandas
from sklearn import datasets
import scipy.cluster.hierarchy as hcluster
iris = datasets.load_iris()
data = iris.data
target = iris.target
# Compute and plot first dendrogram.
linkage = hcluster.linkage( data, method='centroid')
hcluster.dendrogram( linkage, leaf_font_size=10.)
hcluster.dendrogram( linkage, truncate_mode='lastp', p=12, leaf_font_size=12.)
p = hcluster.fcluster( linkage,3, criterion='maxclust')
ct = pandas.DataFrame({'p': p,'t': target}).pivot_table( index=['t'], columns=['p'], aggfunc=[numpy.size])
密度聚类 DBSCAN
import pandas
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
#导入数据
data = pandas.read_csv("%%%%.csv")
eps =0.2
MinPts =5
model = DBSCAN(eps, MinPts)
model.fit(data)
data['type'] = model.fit_predict(data)
plt.scatter( data['x'], data['y'], c=data['type'])
参考引用:
LVQ:https://blog.csdn.net/weixin_35732969/article/details/81141005
GMM:https://blog.csdn.net/FAICULTY/article/details/79343640
层次聚类:https://www.jianshu.com/p/b5e97f8d420b
密度聚类:https://www.jianshu.com/p/c2415196cc34