'''K-Means.py Read in an image catalog, and then cluster the images. The criteria to use in the clustering can be adjusted by editing this file. ''' N = 0 DATA = None def read_catalog(cat_filename): global DATA, N try: datafile = open(cat_filename, "r") except: print "Could not read a file named: "+cat_filename print "Sorry, but we can't cluster data that's not available." return lines = datafile.readlines() N = len(lines) print "Number of lines read: "+str(N) #print "The first line is: "+lines[0] DATA = N*[None] for line in lines: avpairs = line.split("; ") #print "The avpairs are: "+str(avpairs) one_images_info = {} for pair in avpairs: #print "Current pair: "+str(pair) pair_as_tuple = pair.split(": ") #print "pair_as_tuple: "+str(pair_as_tuple) a = pair_as_tuple[0] v = pair_as_tuple[1] one_images_info[a]=v IMG_NO = int(one_images_info['IMG_NO']) DATA[IMG_NO-1] = one_images_info print "Done" def put(attribute, value, hash): hash[attribute]=value def get(attribute, hash): try: return hash[attribute] except: return None from math import sqrt def d1(image_info_1, image_info_2): blue1 = float(image_info_1['BLUE_FRAC']) blue2 = float(image_info_2['BLUE_FRAC']) green1 = float(image_info_1['GREEN_FRAC']) green2 = float(image_info_2['GREEN_FRAC']) return sqrt((blue2-blue1)**2+(green2-green1)**2) import random CENTERS = None def clusterViaKMeans(nclusters, max_iter, distFn, newPointFn): global DATA, CENTERS, N print "Starting K-means clustering with: nclusters="+str(nclusters) # choose random elements to serve as centers: CENTERS = [] CLUSTERS = [] for i in range(nclusters): example = random.choice(DATA) CENTERS.append(newPointFn(example, perturb=True)) CLUSTERS.append([]) # Now begin the iteration: for iter in range(max_iter): print "Starting iteration "+str(iter) # Assign each item to one of the clusters. for i in range(N): item = DATA[i] # Compute distance to each cluster center. distSoFar = 1.0E10 closest = -1 for j in range(nclusters): candidateCenter = CENTERS[j] d = distFn(item, candidateCenter) if d < distSoFar: distSoFar = d closest = j CLUSTERS[closest].append(i) #print "Assigning image "+str(i)+" to cluster "+str(closest) item['CLUSTER'] = closest # For each cluster center, recompute the point as the mean of its members. # To be done. for i in range(nclusters): cluster = CLUSTERS[i] newCenter = createClusterMean(cluster) # Report on how far the center is moving: d = distFn(newCenter, CENTERS[i]) print "Center["+str(i)+"] is moving a distance of "+str(d) CENTERS[i] = newCenter plot(CLUSTERS, CENTERS, iter) # Get ready to redefine the clusters: CLUSTERS = [[] for i in range(nclusters)] def newPoint(example, perturb=False): blue = float(example['BLUE_FRAC']) green = float(example['GREEN_FRAC']) if perturb: dBlue = random.random() / 100 dGreen = random.random() / 100 else: dBlue = dGreen = 0.0 newP = {'BLUE_FRAC': str(blue+dBlue), 'GREEN_FRAC': str(green+dGreen)} return newP def createClusterMean(cluster): global DATA blue_frac=0.0 green_frac=0.0 n = len(cluster) for i in range(n): item = DATA[cluster[i]] blue_frac += float(item['BLUE_FRAC']) green_frac += float(item['GREEN_FRAC']) blue_frac /= n green_frac /= n newMean = {'BLUE_FRAC': blue_frac, 'GREEN_FRAC': green_frac} return newMean from javax.swing import JFrame, JPanel from java.awt import Dimension, Color def plot(clusters, centers, iter): w = JFrame("Plotting Window "+str(iter)) w.setSize(Dimension(500, 500)) w.setVisible(True) p = PlotPanel(clusters, centers) w.getContentPane().add(p) class PlotPanel(JPanel): def __init__(self, clusters, centers): self.clusters = clusters[:] self.centers = centers[:] def paintComponent(self, g): #g.drawString("Welcome", 50, 50) #g.setColor(Color.red) #g.fillOval(100,100, 20, 20) ncenters = len(self.centers) print "ncenters="+str(ncenters) scale = 600 for c in range(ncenters): g.setColor(Color.green) for m in range(len(self.clusters[c])): member = DATA[self.clusters[c][m]] blue = int(scale*float(member['BLUE_FRAC'])) + 20 green = int(scale*float(member['GREEN_FRAC'])) + 20 img_no = member['IMG_NO'] #print "img_no = "+img_no g.fillOval(blue, green, 10, 10) g.drawString(img_no, blue+12, green) center = self.centers[c] blue = int(scale*float(center['BLUE_FRAC'])) + 20 green = int(scale*float(center['GREEN_FRAC'])) + 20 #print "blue = "+str(blue) g.setColor(Color.blue) g.fillOval(blue, green, 10, 10) g.drawString(str(c), blue+12, green) read_catalog('Images-for-Pattern-Rec_reduced_by_0.1-catalog.txt') clusterViaKMeans(5, 5, d1, newPoint)