Nearest Neighbor Algorithm

  1 """
  2     最近邻算法:从任一城市开始,总是选择还未参观的城市中最近的那座,再选择路径中最短的那条
  3     具体如下:
  4     1. 从未参观的城市中选择距离当前路径最近的城市C
  5     2. 将C添加到路径末尾,并从未参观的城市中移除C
  6 """
  7 
  8 def nn_tsp(cities):
  9     start = first(cities)
 10     tour = [start]
 11     unvisited = set(cities - {start})
 12     while unvisited:
 13         C = nearest_neighbor(tour[-1], unvisited)
 14         tour.append(C)
 15         unvisited.remove(C)
 16     return tour
 17 
 18 def nearest_neighbor(A, cities):
 19     """
 20     寻找cities中距离城市A最近的城市
 21     :param A:
 22     :param cities:
 23     :return:
 24     """
 25     return min(cities, key=lambda c:distance(c, A))
 26 
 27 """
 28     虽然近似算法的速度很快,但是得到的却不是最短的距离
 29     搞清楚nn_tsp的起始城市,可能会有助于我们明白它为什么会“出错”
 30 """
 31 
 32 def plot_tour(tour):
 33     """
 34     将起始的城市标记为红色
 35     :param tour:
 36     :return:
 37     """
 38     start = tour[0]
 39     plot_lines(list(tour) + [start])
 40     plot_lines([start], 'rs')
 41 
 42 def length_ratio(cities):
 43     """
 44     计算nn_tsp与alltours_tsp路径长度的比值
 45     :param cities:
 46     :return:
 47     """
 48     return tour_length(nn_tsp(cities)) / tour_length(alltours_tsp(cities))
 49 
 50 """
 51     优化:nn_tsp固定了起始城市,很可能使得更优值丢失
 52     我们可以重复该算法,计算以任一城市为起点的最近邻算法的解,从中选择最优
 53 """
 54 def repeated_nn_tsp(cities):
 55     return shortest_tour(nn_tsp(cities, start)
 56                          for start in cities)
 57 
 58 
 59 def nn_tsp(cities, start=None):
 60     if start is None:
 61         start = first(cities)
 62 
 63     tour = [start]
 64     unvisited = set(cities - {start})
 65     while unvisited:
 66         C = nearest_neighbor(tour[-1], unvisited)
 67         tour.append(C)
 68         unvisited.remove(C)
 69     return tour
 70 
 71 """
 72     比较nn_tsp与repeat_nn_tsp
 73     plot_tsp(nn_tsp, cities(100))
 74     plot_tsp(repeated_nn_tsp, cities(100))
 75     100 city tour with length 6734.1 in 0.004 secs for nn_tsp
 76     100 city tour with length 5912.6 in 0.485 secs for repeated_nn_tsp
 77     显然repeat_nn_tsp比nn_tsp表现的更好,但是,如果当城市的个数n很大时,我们就要重复n次,
 78     有木有一种方法能够改进这种repeat的方式————只是穷尽部分而不是全部的城市
 79 """
 80 def repeated_nn_tsp(cities, repetitions=100):
 81     return shortest_tour(nn_tsp(cities, start)
 82                          for start in sample(cities, repetitions))
 83 
 84 def sample(population, k, seed=42):
 85     """
 86     从population中选择k个元素作为一个样本
 87     :param population:
 88     :param k:
 89     :param seed:
 90     :return:
 91     """
 92     if k is None or k > len(population):
 93         return population
 94     random.seed(len(population) * k * seed)
 95     return random.sample(population, k)
 96 
 97 """
 98 """
 99 def Maps(num_maps, num_cities):
100     """
101 
102     :param num_maps:
103     :param num_cities:
104     :return:
105     """
106     return tuple(cities(num_cities, seed=(m, num_cities))
107                  for m in range(num_maps))
108 
109 """
110 
111 """
112 def memoize(func):
113     """
114     缓存结果
115     :param func:
116     :return:
117     """
118     cache = {}
119     def new_func(*args):
120         try:
121             # 判断,如果之前已经运算过该函数,则返回之前的值
122             return cache[args]
123         except TypeError:
124             # 如果args无法hash,则计算该函数并返回值
125             return func(*args)
126         except KeyError:
127             # 如果args可哈希,计算该函数并存储
128             cache[args] = func(*args)
129             return cache[args]
130     new_func._cache = cache
131     new_func.__doc__ = func.__doc__
132     new_func.__name__ = func.__name__
133     return new_func
134 
135 
136 
137 @memoize
138 def benchmark(function, inputs):
139     """
140     在所有的input上运行, 返回(平均时耗,结果)
141     :param function:
142     :param inputs:
143     :return:
144     """
145     t0 = time.clock()
146     results = list(map(function, inputs))
147     t1 = time.clock()
148     average_time = (t1 - t0) / len(inputs)
149     return (average_time, results)
150 
151 def benchmarks(tsp_algorithms, maps=Maps(30,60)):
152     """
153     同时计算多种算法
154     :param tsp_algorithms:
155     :param maps:
156     :return:
157     """
158     for tsp in tsp_algorithms:
159         time, results = benchmark(tsp, maps)
160         lengths = map(tour_length, results)
161         print("{:>25} |{:7.1f} ± {:4.0f} ({:5.0f} to {:5.0f}) |{:7.3f} secs/map | {}   {}-city maps"
162               .format(tsp.__name__, mean(lengths), stddev(lengths), min(lengths), max(lengths),
163                       time, len(maps), len(maps[0])))
164 
165 def mean(numbers):
166     return sum(numbers) / len(numbers)
167 
168 def stddev(numbers):
169     """
170     标准差
171     :param numbers:
172     :return:
173     """
174     return (mean([x ** 2 for x in numbers]) - mean(numbers) ** 2) ** 0.5
175 
176 """
177     当城市的数量为多少时,nn_tsp能取得最好的效果
178 """
179 def repeat_10_nn_tsp(cities):return repeated_nn_tsp(cities, 10)
180 def repeat_25_nn_tsp(cities):return repeated_nn_tsp(cities, 25)
181 def repeat_50_nn_tsp(cities):return repeated_nn_tsp(cities, 50)
182 def repeat_100_nn_tsp(cities):return repeated_nn_tsp(cities, 100)
183 
184 algorithms = [nn_tsp, repeat_10_nn_tsp, repeat_25_nn_tsp, repeat_25_nn_tsp,
185               repeat_50_nn_tsp, repeat_100_nn_tsp]
186 print(benchmarks(algorithms))
187 
188 """
189     最近邻面临的问题:离群点
190 """
191 outliers_list = [City(2, 2),  City(2, 3),  City(2, 4),  City(2, 5),  City(2, 6),
192                  City(3, 6),  City(4, 6),  City(5, 6),  City(6, 6),
193                  City(6, 5),  City(6, 4),  City(6, 3),  City(6, 2),
194                  City(5, 2),  City(4, 2),  City(3, 2),
195                  City(1, 6.8),  City(7.8, 6.4),  City(7, 1.2),  City(0.2, 2.8)]
196 
197 outliers = set(outliers_list)
198 
199 plot_lines(outliers, 'bo')

 

  

 1 plot_tsp(nn_tsp, outliers) 

 1 """
 2     存在四个围绕的离群点,但是我们却让它们各自连接————使用了最长的四条线
 3     —————这不科学
 4 """
 5 def plot_labeled_lines(points, *args):
 6     """
 7     分别画出每个点并标记
 8     :param points:
 9     :param args:
10     :return:
11     """
12     plot_lines(points, 'bo')
13     for (label, p) in enumerate(points):
14         plt.text(X(p), Y(p), ' '+str(label))
15 
16         style = 'bo-'
17         for arg in args:
18             if isinstance(arg, str):
19                 style = arg
20             else:# arg 是points序列链
21                 Xs = [X(points[i]) for i in arg]
22                 Ys = [Y(points[i]) for i in arg]
23                 plt.plot(Xs, Ys, style)
24     plt.axis('scaled')
25     plt.axis('off')
26     plt.show()
27 """
28   注意到在第四个点时
29 """
30 plot_labeled_lines(outliers_list, 'bo-', [0, 1, 2, 3, 4], 'ro--', [4, 16], 'bo--', [4, 5])

 

  

"""
  此时若是选择离群点所得到的结果更优,
  在没有模糊的环境下,很难决定是否选择离群点还是义无反顾的选择最近邻
  建议在构建路径的过程中不要总是想着去做决定,不妨等到路径建设完全后再来考虑离群点
"""

 

  




posted @ 2015-03-24 23:34  Kalpan  阅读(644)  评论(0编辑  收藏  举报