matlab进行社群划分
结论:
目前对louvain社群划分的方法,用的比较多:
code如下
import collections
import random
import networkx as nx
import matplotlib.pyplot as plt
def load_graph(path):
G = collections.defaultdict(dict)
with open(path) as text:
for line in text:
vertices = line.strip().split()
v_i = int(vertices[0])
v_j = int(vertices[1])
# w = float(vertices[2])
G[v_i][v_j] = 1
return G
class Vertex:
def __init__(self, vid, cid, nodes, k_in=0):
self._vid = vid
self._cid = cid
self._nodes = nodes
self._kin = k_in # 结点内部的边的权重
class Louvain:
def __init__(self, G):
self._G = G
self._m = 0 # 边数量
# 需维护的关于社区的信息(社区编号,其中包含的结点编号的集合)
self._cid_vertices = {}
# 需维护的关于结点的信息(结点编号,相应的Vertex实例)
self._vid_vertex = {}
for vid in self._G.keys():
self._cid_vertices[vid] = set([vid])
self._vid_vertex[vid] = Vertex(vid, vid, set([vid]))
self._m += sum([1 for neighbor in self._G[vid].keys() \
if neighbor > vid])
def first_stage(self):
mod_inc = False # 用于判断算法是否可终止
visit_sequence = self._G.keys()
random.shuffle(list(visit_sequence))
while True:
can_stop = True # 第一阶段是否可终止
for v_vid in visit_sequence:
v_cid = self._vid_vertex[v_vid]._cid
k_v = sum(self._G[v_vid].values()) + \
self._vid_vertex[v_vid]._kin
cid_Q = {}
for w_vid in self._G[v_vid].keys():
w_cid = self._vid_vertex[w_vid]._cid
if w_cid in cid_Q:
continue
else:
tot = sum(
[sum(self._G[k].values()) + \
self._vid_vertex[k]._kin for \
k in self._cid_vertices[w_cid]])
if w_cid == v_cid:
tot -= k_v
k_v_in = sum([v for k, v in self._G[v_vid]\
.items() if k in self._cid_vertices[w_cid]])
delta_Q = k_v_in - k_v * tot / self._m # 由于只需要知道delta_Q的正负,所以少乘了1/(2*self._m)
cid_Q[w_cid] = delta_Q
cid, max_delta_Q = sorted(cid_Q.items(), key=\
lambda item: item[1], reverse=True)[0]
if max_delta_Q > 0.0 and cid != v_cid:
self._vid_vertex[v_vid]._cid = cid
self._cid_vertices[cid].add(v_vid)
self._cid_vertices[v_cid].remove(v_vid)
can_stop = False
mod_inc = True
if can_stop:
break
return mod_inc
def second_stage(self):
cid_vertices = {}
vid_vertex = {}
for cid, vertices in self._cid_vertices.items():
if len(vertices) == 0:
continue
new_vertex = Vertex(cid, cid, set())
for vid in vertices:
new_vertex._nodes.update(self._vid_vertex[vid]._nodes)
new_vertex._kin += self._vid_vertex[vid]._kin
for k, v in self._G[vid].items():
if k in vertices:
new_vertex._kin += v / 2.0
cid_vertices[cid] = set([cid])
vid_vertex[cid] = new_vertex
G = collections.defaultdict(dict)
for cid1, vertices1 in self._cid_vertices.items():
if len(vertices1) == 0:
continue
for cid2, vertices2 in self._cid_vertices.items():
if cid2 <= cid1 or len(vertices2) == 0:
continue
edge_weight = 0.0
for vid in vertices1:
for k, v in self._G[vid].items():
if k in vertices2:
edge_weight += v
if edge_weight != 0:
G[cid1][cid2] = edge_weight
G[cid2][cid1] = edge_weight
self._cid_vertices = cid_vertices
self._vid_vertex = vid_vertex
self._G = G
def get_communities(self):
communities = []
for vertices in self._cid_vertices.values():
if len(vertices) != 0:
c = set()
for vid in vertices:
c.update(self._vid_vertex[vid]._nodes)
communities.append(c)
return communities
def execute(self):
iter_time = 1
while True:
iter_time += 1
mod_inc = self.first_stage()
if mod_inc:
self.second_stage()
else:
break
return self.get_communities()
def load(path):
g = nx.Graph()
with open(path, 'r', encoding='utf8') as f:
lines = f.readlines()
for line in lines:
line = line.replace('\n', '')
nodex, nodey = line.split('\t')
# 因为我们是无权图,所以这里将所有的边权重设为1.0
g.add_edge(nodex, nodey, weight=1.0)
return g
if __name__ == '__main__':
G = load_graph('./data/network.dat')
algorithm = Louvain(G)
communities = algorithm.execute()
# 按照社区大小从大到小排序输出
communities = sorted(communities, key=lambda b: -len(b)) # 按社区大小排序
print(G)
total = []
for temp in communities:
total.append(list(temp))
# 4.写入文件 这种写入文件的数据与标签文件的样子相同 但是我们检测预测的准确率
result = []
for temp in total:
for i in temp:
result.append([i, total.index(temp)])
f = open('./data/Louvain_predict.txt', 'w')
for r in result:
f.write(str(r[0]) + '\t')
f.write(str(r[1]) + '\n')
f.close()
graph = load("./data/Louvain_predict.txt")
# 5. 画出我们的网络结构图
nx.draw_networkx(graph)
plt.show()
求证思路:
1、matlab进行社群划分
2、matlab相关矩阵的社群划分
参考资料:
1、社区发现:论文中模块度Q的计算 - CodeAntenna
基于模块度的一种划分方法:Modularity
2、代码片段_数据分析基于FastNewman实现数据聚类matlab源码
3、
啥东西也没有,但是标题是我很关心的问题,错付了
4、
这个博客中,有非常详细的社群划分内容,介绍;
5、这个博客介绍了十种,社团聚类的分析方法;
https://www.csdn.net/tags/MtTaMgysMTUwNjU4LWJsb2cO0O0O.html
6、这个博客的内容非常仔细:
这里介绍了一个网络社群划分的一个指标:NMI指数(运用这个指标的前提是,需要有金标准)
7、使用louvain算法进行模块度一个划分: