從零開始實現遺傳演算法(用遺傳演算法求解TSP)

05-13

知乎專欄： https://www.zhihu.com/people/feng-xue-ye-gui-zi

由於知乎不支持markdown格式，所以有公式的地方都使用了截圖，若影響閱讀效果，可移步我的Blog：風雪夜歸子 - CSDN博客，文章會同步更新！源碼可以去我的 Github fork！

前面我們用遺傳演算法動手做了兩個實驗，本篇文章我們再用遺傳演算法做一個實驗：用遺傳演算法求解TSP。

在這個問題中，我們的個體就是一條一條的路線了，其目的就是找到一條總距離最短的路線。基本步驟與前兩篇文章基本類似，不過在本問題中，我們用城市路線中每個城市的經緯度來表示個體(城市路線)的DNA。

在產生後代的過程中，需要注意的是，因為我們的個體是路線，所以我們不能夠像前兩篇文章一樣將兩個父本的樣本進行隨機交換，因為如果隨機交換，就會出現路線重複的問題，比如說，有兩個父本[2,1,0,3]和[3,0,1,2]，若將第一個元素進行交換得到一個後代[3,1,0,3]或者[2,0,1,2]，這就表示去過3號城市去了兩次或2號城市去了兩次，明顯不合理。這裡我們用了一個簡單技巧，比如說我們先取[2,1]，然後再到另一個父本中去掉[2,1]之後的剩下的城市，同時保持其順序，即從父本中取出的是[3,0]，然後concat就得到了一個後代[2,1,3,0]。詳細代碼如下：

def create_child(self, parent, population): if np.random.rand() < self.cross_rate: index = np.random.randint(0, self.n_population, size=1) cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool) dad_DNA = parent[cross_points] mom_DNA = population[index, np.isin(population[index].ravel(), dad_DNA, invert=True)] parent = np.hstack((dad_DNA, mom_DNA)) #child = parent return parent

還有，基因突變的時候也會與以前不一樣，假設我們現在已經得到了一個後代[2,1,3,0]，我們現在要對其進行基因突變，如果我們將1變成了0，那麼我們也要將0變1，否則就會出現上面剛剛說到的問題，即城市名重複問題。

def mutate_child(self, child): for i in range(self.DNA_size): if np.random.rand() < self.mutate_rate: child = self.swap(i, child) return childdef swap(self, i, child): new_value = np.random.randint(0, self.DNA_size) j = np.argwhere(child == new_value)[0][0] child[j] = child[i] child[i] = new_value return child

其它的就不多說了，全文代碼如下：

import numpy as npfrom math import radians, cos, sin, asin, sqrtimport pandas as pdimport matplotlibimport matplotlib.pyplot as plt%matplotlib inlineimport sysreload(sys)sys.setdefaultencoding(utf8)class GeneticAlgorithm(object): """遺傳演算法. Parameters: ----------- cross_rate: float 交配的可能性大小. mutate_rate: float 基因突變的可能性大小. n_population: int 種群的大小. n_iterations: int 迭代次數. DNA_size: int DNA的長度. n_cities: int 城市個數. """ def __init__(self, cross_rate, mutation_rate, n_population, n_iterations, n_cities): self.cross_rate = cross_rate self.mutate_rate = mutation_rate self.n_population = n_population self.n_iterations = n_iterations self.DNA_size = n_cities self.n_cities = n_cities # 初始化一個種群 def init_population(self): population = np.array([np.random.permutation(self.DNA_size) for _ in np.arange(self.n_population)]).astype(np.int8) return population # 將個體的DNA轉換成ASCII def translateDNA(self, population, longitudes_latitudes): longitudes = np.empty_like(population, dtype=np.float64) latitudes = np.empty_like(population, dtype=np.float64) for i, person in enumerate(population): longitude_latitude = longitudes_latitudes[person] longitudes[i, :] = longitude_latitude[:, 0] latitudes[i, :] = longitude_latitude[:, 1] return longitudes, latitudes # 計算種群中每個個體的適應度，適應度越高，說明該個體的基因越好 def fitness(self, population, longitudes, latitudes): total_distances = np.empty((longitudes.shape[0],), dtype=np.float64) for i in range(population.shape[0]): # 方法一: 用歐氏距離計算 # total_distance = np.sum( np.power(np.diff(longitudes[i]), 2) + np.power(np.diff(latitudes[i]), 2) ) # 方法二: 用球面距離計算 total_distance = 0 for j in range(population.shape[1] - 1): total_distance = total_distance + self.haversine(longitudes[i][j], latitudes[i][j], longitudes[i][j+1], latitudes[i][j+1] ) total_distances[i] = total_distance fitness_score = np.exp(1/(total_distances + 1e-4)) return fitness_score, total_distances def haversine(self, lon1, lat1, lon2, lat2): """ Calculate the great circle distance between two points on the earth (specified in decimal degrees) 參數: 經度1, 緯度1, 經度2, 緯度2 (十進位度數) """ # 將十進位度數轉化為弧度 lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2]) # haversine公式 dlon = lon2 - lon1 dlat = lat2 - lat1 a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2 c = 2 * asin(sqrt(a)) r = 6371 # 地球平均半徑，單位為公里 return c * r # 對種群按照其適應度進行採樣，這樣適應度高的個體就會以更高的概率被選擇 def select(self, population, fitness_score): idx = np.random.choice(np.arange(self.n_population), size=self.n_population, replace=True, p=fitness_score/fitness_score.sum()) return population[idx] # 進行交配 def create_child(self, parent, population): if np.random.rand() < self.cross_rate: index = np.random.randint(0, self.n_population, size=1) cross_points = np.random.randint(0, 2, self.DNA_size).astype(np.bool) dad_DNA = parent[cross_points] mom_DNA = population[index, np.isin(population[index].ravel(), dad_DNA, invert=True)] parent = np.hstack((dad_DNA, mom_DNA)) #child = parent return parent # 基因突變 def mutate_child(self, child): for i in range(self.DNA_size): if np.random.rand() < self.mutate_rate: child = self.swap(i, child) return child def swap(self, i, child): new_value = np.random.randint(0, self.DNA_size) j = np.argwhere(child == new_value)[0][0] child[j] = child[i] child[i] = new_value return child # 進化 def evolution(self, longitudes_latitudes): population = self.init_population() longitudes, latitudes = self.translateDNA(population, longitudes_latitudes) #print(population.shape) for i in range(self.n_iterations): fitness_score, total_distances = self.fitness(population, longitudes, latitudes) #print(fitness_score) best_person = population[np.argmax(fitness_score)] best_person = best_person.reshape(-1, best_person.shape[0]) best_person_longitude, best_person_latitude = self.translateDNA(best_person, longitudes_latitudes) best_person_fitness_score, best_person_distance = self.fitness(best_person, best_person_longitude, best_person_latitude) if i % 100 == 0: print(u第%-4d次進化後, 基因最好的個體(最好的路線)是: %s, 其總距離為: %-4.2f 公里% (i, str(best_person[0]), best_person_distance)) if i == self.n_iterations - 1: print() print(u遺傳演算法找到的基因最好的個體(最好的路線)是: %s, 其總距離為: %-4.2f 公里% (str(cities[best_person][0]), best_person_distance) ) population = self.select(population, fitness_score) population_copy = population.copy() #print(population.shape) for parent in population: child = self.create_child(parent, population_copy) #print(child) child = self.mutate_child(child) parent[:] = child population = population self.best_person = best_person self.best_person_distance = best_person_distance self.best_person_longitude = best_person_longitude self.best_person_latitude = best_person_latitude def main(): # 載入數據集 #longitudes_latitudes = np.random.rand(n_cities, 2) data = pd.read_csv(F:/work/Machine-learning-implement/data/china.csv, sep=;, header=None) global cities cities = data.ix[:, 0].values n_cities = cities.shape[0] longitudes_latitudes = data.ix[:, 1:].values ga = GeneticAlgorithm(cross_rate=0.8, mutation_rate=0.01, n_population=100, n_iterations=500, n_cities=n_cities) ga.evolution(longitudes_latitudes) plt.figure(figsize=(12, 8)) zhfont1 = matplotlib.font_manager.FontProperties(fname=C:WindowsFontssimkai.ttf) plt.scatter(longitudes_latitudes[:, 0], longitudes_latitudes[:, 1], s=100, c=y) for i in range(ga.best_person_longitude.shape[1]): plt.text(ga.best_person_longitude[0][i] + 0.5, ga.best_person_latitude[0][i] + 0.5, "%s" % cities[ga.best_person][0][i], fontdict={size: 12, color: k}, fontproperties=zhfont1) plt.plot(ga.best_person_longitude[0], ga.best_person_latitude[0], r-) plt.text(ga.best_person_longitude[0].min()+1, ga.best_person_latitude[0].min()+1, "Total distance=%.2f" % ga.best_person_distance, fontdict={size: 10, color: k}) plt.axis(off) plt.show() if __name__ == __main__: main()