init = np.array([cc[0], cc[1:3].mean(axis=0), cc[3:].mean(axis=0)]))
init           # shape:(3,23)  23个变量对应的23个聚类中心值, 3表示要聚成三类(三个聚类中心)
>>>
array([[ 8.58138315e-01, -6.32089539e-03, -7.09504719e-02,
        -1.07528958e-01,  5.53871437e-03, -1.35326569e-01,
        -3.50160094e-02, -6.24170829e-03, -7.65911653e-02,
        -6.11620977e-02, -6.28841001e-02, -3.20038354e-02,
        -5.30895347e-02, -5.06773339e-02,  1.79398913e-01,
        -4.66092054e-02,  5.56972009e-03, -4.71417616e-02,
        -2.10955588e-01, -4.87051726e-02, -1.73233863e-03,
        -3.73275420e-02, -7.06864442e-04],
       [-3.51357569e-01,  1.43431653e-01, -7.09504719e-02,
         1.27921832e-01, -5.74469004e-02,  2.09236529e-01,
        -3.50160094e-02,  1.96311992e-01,  3.23475874e-02,
        -1.31726560e-01,  3.57251560e-01, -5.15899191e-02,
        -5.30895347e-02, -1.25491296e-01, -2.69016977e-01,
        -3.96680230e-02, -5.08697263e-02,  9.40163865e+00,
         3.49077944e-01,  2.98768592e-02, -3.17764827e-02,
         4.51912144e-01,  2.96023541e-01],
       [-1.24071949e-01, -1.18783683e-02,  2.68147009e+00,
         3.09763947e-01, -1.23061993e-02,  1.41113588e-01,
         2.97285920e+00, -4.09739156e-02,  1.99286561e-02,
         1.10281151e+00,  1.05728041e+00,  2.17326255e-01,
         3.13918418e+00,  4.49698745e-01,  2.38760196e-02,
         2.68086133e+00,  3.05643393e-02, -4.71417616e-02,
         8.72121529e-01,  1.15639109e-01,  1.09733551e-01,
         2.96646391e-01,  8.28448624e-02]])
model_data  # 23个变量

在这里插入图片描述

model = KMeans(n_clusters=3, init=init # 'init': 'k-means++',默认为k-means++
model.fit(model_data)
查看聚类效果
from sklearn.manifold import TSNE

tsne = TSNE(random_state=105)
tsne.fit_transform(clu_data)                               # 进行数据降维到2维
tsne = pd.DataFrame(tsne.embedding_) # 转换数据格式

# 画图
for i in range(k):
    d = tsne[clu_data.reset_index()['k_means'] == i+1]
    plt.scatter(d[0], d[1], s=5, label='类别%s'%(i+1))
plt.legend()

请忽略图标的8个类别,图中只有3个类别
在这里插入图片描述
这效果很差,可以先聚成8个类别,然后手动分成三类

model = KMeans(n_clusters=8, random_state=100) # 'init': 'k-means++',默认为k-means++
model.fit(model_data)
# 原始数据添加类别  
clu_data['k_means'] = model.labels_ + 1

在这里插入图片描述
新类别1:类别1
新类别2:类别2+类别3
新类别3:类别4+类别5+类别6+类别7+类别8

Logo

华为开发者空间,是为全球开发者打造的专属开发空间,汇聚了华为优质开发资源及工具,致力于让每一位开发者拥有一台云主机,基于华为根生态开发、创新。

更多推荐