#coding:utf-8 __author__ = 'jmh081701' # This document mainly studies a classical clustering method :k-means
# We apply this algorithm to a RGB Image clustering , See what the effect is #k-means The principle of : '''
input :x[1],x[2],x[3],...x[n], Each of them x[i] All of them m Vector of dimension , Number of given clusters k
1. Random generation k Representative elements :z[1],z[2],...z[k]; each z[i] It's all first i The central element of a class 2.repeat: to update
xi Categories described ci, bring :|x[i]-z[ci]| minimum to update z[j],z[j] Equal to category G[j] The average of all samples until:z No change '''
import numpy as np import math import random from PIL import Image cnt=0 def
calculate_zi(Gi,X): # given Gi, It contains elements belonging to this category , Then calculate the center point of these elements # In this case ,Gi It contains subscripts global
cnt sumi=np.zeros(len(X[0])) for each in Gi: cnt+=1 sumi+=X[each]
sumi/=(len(Gi)+0.000000001) zi=sumi return zi def find_ci(xi,Z):
# Looking for distance xi Nearest central element ci, bring Z[ci] And xi The inner product of vector difference between is the smallest global cnt dis_= np.inf len_=len(Z)
rst_index =None for i in range(len_): cnt+=1
tmp_dist=np.dot(xi-Z[i],np.transpose(xi-Z[i]))if tmp_dist<dis_: rst_index=i
dis_=tmp_distreturn rst_index def k_mean(X,k): G=[]
#G[i]={1,2,3...} Indicates that it belongs to i Class in the X Index in , Washing label Z=[] #Z[i] The first i The center point of the class N=len(X) c=[]
#c[i]=1,2,...k; Denotes the i Samples belong to c[i] class tmpr=set() while len(Z)<k: r=random.randint(0
,len(X)-1) if r not in tmpr: tmpr.add(r) Z.append(X[r]) G.append(set()) for i in
range(N): c.append(0) # Random generation K Central elements while True: group_flag=np.zeros(k) for i in
range(N): new_ci = find_ci(X[i],Z)if c[i] != new_ci:
# Found a better one , hold xi From the original c[i] Transfer to new_ci go , So there are two groups that need to be updated :new_ci,c[i] if i in G[c[i]]:
G[c[i]].remove(i) group_flag[c[i]]=1 # hold i Move out of the original group G[new_ci].add(i)
group_flag[new_ci]=1 # hold i Join the new group c[i]=new_ci # It has updated the attribute of each element if
np.sum(group_flag)==0: # No groups have been modified break for i in range(k): if group_flag[i]==0:
# not changed , There is no need to recalculate continue else: Z[i]=calculate_zi(list(G[i]),X) return Z,c,k def
test_rgb_img(): filename=r"1.jpg" im = Image.open(filename) img = im.load()
im.close() height = im.size[0] width= im.size[1] print(im.size) X=[] for i in
range(0,height): for j in range(0,width): X.append(np.array(img[i,j]))
Z,c,k=k_mean(X,8) #print(Z) new_im = Image.new("RGB",(height,width)) for i in
range(0,height): for j in range(0,width): index = i * width + j pix =
list(Z[c[index]])for k in range(len(pix)): pix[k]=int(pix[k])
new_im.putpixel((i,j),tuple(pix)) new_im.show()if __name__ == '__main__':
test_rgb_img() print(cnt)
Original picture :

k=8 Clustering results of :

k=4 Clustering results of :

k=2: Clustering results

github address :https://github.com/jmhIcoding/ml.git