python計算機視覺第六章

05-21

python計算機視覺第六章

來自專欄 cv python

Kmean聚類

K均值聚類的目標是類內總方差最小

圖6-1

from scipy.cluster.vq import *import numpy as npimport pylab as pl#生成隨機數矩陣100*2class1 = 1.5*np.random.randn(100,2)class2 = np.random.randn(100,2)+np.array([5,5])features = np.vstack((class1,class2))#對200*2數據進行K均值聚類，擬定中心數目為2，得到中心點以及方差centroid,variance = kmeans(features,2)#根據中心點對數據進行分類，返回分類類別信息以及距離code,distance = vq(features,centroid)pl.figure()x = np.where(code==0)ndx = np.where(code==0)[0]pl.plot(features[ndx,0],features[ndx,1],*)ndx = np.where(code==1)[0]pl.plot(features[ndx,0],features[ndx,1],+)pl.plot(centroid[:,0],centroid[:,1],go)pl.show()

圖6-4 對圖像進行像素聚類

from scipy.cluster.vq import *from scipy.misc import imresizeimport numpy as npimport pylab as plfrom PIL import Imagedef im_kmean(im,steps,K=3): # steps = 50 dx = im.shape[0] / steps dy = im.shape[1] / steps #生成50*50矩陣，求取塊中像素均值 features = [] for x in range(steps): for y in range(steps): R = np.mean(im[x * dxx + 1) * dx, y * dyy + 1) * dy, 0]) G = np.mean(im[x * dxx + 1) * dx, y * dyy + 1) * dy, 1]) B = np.mean(im[x * dxx + 1) * dx, y * dyy + 1) * dy, 2]) features.append([R, G, B]) #生成np.array對象 features = np.array(features, f) #進行K均值聚類 centrio, variance = kmeans(features, K) code, distance = vq(features, centrio) codeim = code.reshape(steps, steps) codeim = imresize(codeim, im.shape[:2], interp=nearest) return codeimim = np.array(Image.open(../data/empire.jpg))codeim_50 = im_kmean(im,50)codeim_100 = im_kmean(im,100)im_2 = np.array(Image.open(../data/boy_on_hill.jpg))codeim_50_2 = im_kmean(im_2,50)codeim_100_2 = im_kmean(im_2,100)pl.figure()pl.subplot(2,3,1)pl.imshow(im)pl.subplot(2,3,2)pl.imshow(codeim_50)pl.subplot(2,3,3)pl.imshow(codeim_100)pl.subplot(2,3,4)pl.imshow(im_2)pl.subplot(2,3,5)pl.imshow(codeim_50_2)pl.subplot(2,3,6)pl.imshow(codeim_100_2)pl.show()

圖6-5 層次聚類：基於樣本間成對距離建立一個簡相似性樹。該演算法首先將特徵向量距離最近的兩個樣本歸併為一組（定義的一個clusterNode對象），並在樹中創建一個平均節點，將這兩個距離最近的樣本作為該平均節點下的子節點；然後在剩下的包含任意平均節點的樣本中尋找下一個最近的樹，重複進行前面的操作。在每一個節點處保留了兩個子節點之間的距離。通過遍歷整個樹，通過設定的閾值，遍歷過程可以在比閾值大的節點位置終止，從而提取聚類簇。

#coding=UTF-8from itertools import combinationsimport numpy as npfrom PIL import Image,ImageDrawclass ClusterNode(object): #left以及right均為ClusterfNode對象 def __init__(self,vec,left,right,distance=0.0,count=1): self.left = left self.right = right self.vec = vec self.distance = distance self.count = count #函數嵌套，但會當前節點 def extract_clusters(self,dist): if self.distance<dist: return [self] return self.left.extract_clusters(dist)+self.right.extract_clustersdist) def get_cluster_elements(self): return self.left.get_cluster_elements()+self.right.get_cluster_elements() #返回當前對象高度=左右節點高度之和 def get_height(self): return self.left.get_height()+self.right.get_height() #返回當前對象的深度depth=左右節點深度與當前兩節點距離之和 def get_depth(self): return max(self.left.get_depth(),self.right.get_depth())+self.distance def draw(self, draw, x, y, s, imlist, im): h1 = int(self.left.get_height() * 20 / 2) h2 = int(self.right.get_height() * 20 / 2) top = y - (h1 + h2) bottom = y + (h1 + h2) draw.line((x, top + h1, x,bottom - h2), fill=(0, 0, 0)) l1 = self.distance * s draw.line((x, top + h1, x + l1, top + h1), fill=(0, 0, 0)) draw.line((x, bottom - h2, x + l1, bottom - h2), fill=(0, 0, 0)) self.left.draw(draw, x + l1, top + h1, s, imlist, im) self.right.draw(draw, x + l1, bottom - h2, s, imlist, im)class ClusterfNode(object): def __init__(self,vec,id): self.vec = vec self.id = id def extract_clusters(self,dist): return [self] def get_cluster_elements(self): return [self.id] def get_height(self): return 1 def get_depth(self): return 0 def draw(self,draw,x,y,s,imlist,im): nodeim = Image.open(imlist[self.id]) nodeim.thumbnail([20,20]) ns = nodeim.size im.paste(nodeim,[int(x),int(y-ns[1]//2),int(x+ns[0]),int(y+ns[1]-ns[1]//2)])def l2dist(v1,v2): return np.sqrt(np.sum((v1-v2)**2))def l1dist(v1,v2): return np.sum(np.abs(v1-v2))def hcluster(features,disfcn=l2dist): distance = {} node = [ClusterfNode(np.array(f),id=i) for i,f in enumerate(features)] while len(node)>1: closest = 100 for ni,nj in combinations(node,2): if (ni,nj) not in distance: distance[ni,nj] = disfcn(ni.vec,nj.vec) d = distance[ni,nj] if d<closest: closest = d lowestpair = (ni,nj) ni,nj = lowestpair new_vec = (ni.vec+nj.vec)/2.0 new_node = ClusterNode(new_vec,left=ni,right=nj,distance=closest) node.remove(ni) node.remove(nj) node.append(new_node) return node[0]def draw_dendrogram(node,imlist,filename=clusters.jpg): #node = node #type:ClusterNode rows = node.get_height() * 20 cols = 1200 s = float(cols - 150) / node.get_depth() im = Image.new(RGB, (cols, rows), (255, 255, 255)) draw = ImageDraw.Draw(im) draw.line((0, rows / 2, 20, rows / 2), fill=(0, 0, 0)) node.draw(draw, 20, (rows / 2), s, imlist, im) im.save(filename) im.show()

對每幅圖像計算多維直方圖，在每個顏色通道使用8個小區間進行量化，將三個通道量化後的小區間拉成一行後便可用512維向量描述當前圖像

import osimport hclusterimport numpy as npfrom PIL import Imagepath = ../data/sunsets/flickr-sunsets-small/imlist = [os.path.join(path,f) for f in os.listdir(path) if f.endswith(.jpg)]feature = np.zeros((len(imlist),512))for i,f in enumerate(imlist): im = np.array(Image.open(f)) h,edges = np.histogramdd(im.reshape(-1,3),8,normed=True,range=[(0,255),(0,255),(0,255)]) feature[i] = h.flatten()tree = hcluster.hcluster(feature)hcluster.draw_dendrogram(tree,imlist)

clusters = tree.extract_clusters(0.23*tree.distance)for c in clusters: elements = c.get_cluster_elements() nbr_elements = len(elements) if nbr_elements>3: pl.figure() for p in range(np.minimum(nbr_elements,20)): pl.subplot(4,5,p+1) im = np.array(Image.open(imlist[elements[p]])) pl.imshow(im)pl.show()

6.3 譜聚類

對於n個元素，相似矩陣是一個n×n的矩陣，矩陣每個元素表示兩輛之間的相似性分數。譜聚類是由相似性矩陣構建譜矩陣完成的，對該譜矩陣進行特徵分解得到的特徵向量可以用於降維，然後聚類

依據S創建一個拉普拉斯矩陣L

$L=I-D^{-1/2}SD^{-1/2}$ 其中I是單位矩陣，D是對角矩陣，對角線上的元素是S對應行元素之和

計算L的特徵向量（進行SVD分解），並使用k個最大特徵值對應的k個特徵向量，構建出一個特徵向量集

創建一個矩陣，該矩陣的各列是由之前求出的k個特徵向量構成，每一行可以看做一個新的特徵向量，長度為k。

python計算機視覺 第六章

python計算機視覺第六章