Haskellな気分で。最後while入れたけど。
from itertools import groupby
from math import sqrt
zipWith = lambda f, xs, ys : [f(x, y) for x,y in zip(xs, ys)]
snd = lambda x: x[1]
fst = lambda x: x[0]
euclid = lambda x, y : (x-y)**2
def distance(a,b):
return sqrt(sum(zipWith(euclid,a,b)))
def centroid(clusters):
sums = reduce(lambda x,y: zipWith(lambda a,b:a+b,x,y),clusters)
return map(lambda x: x / float(len(clusters)), sums)
def closest(pts, pt):
closest_ct = pts[0]
for ct in pts[1:]:
if distance(pt,closest_ct) > distance(pt,ct):
closest_ct = ct
return closest_ct
def recluster_(centroids,points):
reclustered = [(closest(centroids,a), a) for a in points]
reclustered.sort()
return [map(snd,list(g)) for k, g in groupby(reclustered, fst)]
def recluster(clusters):
centroids = map(centroid, clusters)
concated_clusters = reduce(lambda a,b: a+b, clusters)
return recluster_(centroids,concated_clusters)
def part(l,points):
size = len(l)/points
return [l[i:i+size] for i in range(0,len(l),size)]
def kmeans(k,points):
cluster = part(k,points)
newcluster = recluster(cluster)
while(cluster != newcluster):
cluster = newcluster
newcluster = recluster(cluster)
return newcluster
if __name__ == "__main__":
pts = [[1,2,4],[1,3,3],[4,3,0],[2,5,1],[7,3,8],[0,0,0],[4,3,2],[6,1,8]]
print kmeans(pts,3)