Python处理alist文件——稀疏矩阵的读取和存储
众所周知,alist文件是一种保存稀疏矩阵格式,常用于保存LDPC码的校验矩阵与生成矩阵。
Matrix File Formats - TU Kaiserslautern提供了一个信道编码的数据库,并给出了数据库中信道编码的校验矩阵的存储方法和格式。
这种格式也见于alist format
Database of Channel Codes and ML Simulation Results从这里可以下载校验矩阵的alist文件。
读取alist文件中保存的矩阵,将稀疏矩阵保存为alist格式,有以下三种python方案(其中前两种,矩阵的行和列设定是一致的):
- LDPC encode ,alist2(H&G) · Issue #3 · supermihi/lpdec
- bp_osd/src_python at master · quantumgizmos/bp_osd
- Deep-learning-for-channel-encoding-and-decoding/Encoder.py at master · s-serenity/Deep-learning-for-channel-encoding-and-decoding
要注意的是,可能保存或读写的是想获得的稀疏矩阵的转置,在应用以上的脚本时,根据自己的需要和判断,决定加不加.T
最适合Matrix File Formats - TU Kaiserslautern的python实现
import numpy as np
# 校验矩阵的读取
def alistToNumpy(lines):
"""Converts a parity-check matrix in AList format to a 0/1 numpy array. The argument is a
list-of-lists corresponding to the lines of the AList format, already parsed to integers
if read from a text file.
The AList format is introduced on http://www.inference.phy.cam.ac.uk/mackay/codes/alist.html.
This method supports a "reduced" AList format where lines 3 and 4 (containing column and row
weights, respectively) and the row-based information (last part of the Alist file) are omitted.
Example:
alistToNumpy([[3,2], [2, 2], [1,1,2], [2,2], [1], [2], [1,2], [1,2,3,4]])
array([[1, 0, 1],
[0, 1, 1]])
"""
nCols, nRows = lines[0]
if len(lines[2]) == nCols and len(lines[3]) == nRows:
startIndex = 4
else:
startIndex = 2
matrix = np.zeros((nRows, nCols), dtype=np.int)
for col, nonzeros in enumerate(lines[startIndex:startIndex + nCols]):
for rowIndex in nonzeros:
if rowIndex != 0:
matrix[rowIndex - 1, col] = 1
return matrix
def save_alist(name, mat, j=None, k=None):
H=np.copy(mat)
# H=H.T
'''
Function converts parity check matrix into the format required for the LDPC decoder
'''
if j is None:
j=int(max(H.sum(axis=0)))
if k is None:
k=int(max(H.sum(axis=1)))
m, n = H.shape # rows, cols
f = open(name, 'w')
print(n, m, file=f)
print(j, k, file=f)
for col in range(n):
print( int(H[:, col].sum()), end=" ", file=f)
print(file=f)
for row in range(m):
print( int(H[row, :].sum()), end=" ", file=f)
print(file=f)
for col in range(n):
for row in range(m):
if H[row, col]:
print( row+1, end=" ", file=f)
print(file=f)
for row in range(m):
for col in range(n):
if H[row, col]:
print(col+1, end=" ", file=f)
print(file=f)
f.close()
def alist2sparse(file_H):
with open(file_H) as f:
lines = f.readlines()
new_lines = []
for line in lines:
new_lines.append(list(map(int, line.split())))
H_matrix = alistToNumpy(new_lines)
return H_matrix
def alist2sparse2(fname):
a = np.loadtxt(fname, delimiter='\n',dtype=str)
alist_n = a.shape[0]
list_a = []
for i in range(alist_n-1):
k = a[i].split()
#print(k)
list_a.extend(k)
a=np.array(list_a,dtype=np.int32)
#Read file contents as an array
max_index = a.shape[0]
mat_n = a[0]
mat_m = a[1]
maxincol = a[2]
# print(maxincol)
maxinrow = a[3]
num = sum(a[4:4+mat_n])#非0元素个数
index_col_num = a[4:4+mat_n]#逐列的非0元素个数
# print(num)
start = 4 + mat_m + mat_n
b = a[a>0]
k = 0
H = np.zeros((mat_m,mat_n),dtype=np.int32)
for i in range(mat_n):#共有7列,每一列的索引都要读进来,重建索引
for j in range(index_col_num[i]):#出现的最多列数,不代表所有列。
if(k==(num)):
break
H[b[start+k]-1,i]=1
k = k+1
return H
def test_save_alist():
hamming_matrix=np.array([[1,0,0,1,1,0,1],
[0,1,0,1,0,1,1],
[0,0,1,0,1,1,1]])
save_alist("hamming_d_3.alist",hamming_matrix)
def test_2_alist2sparse():
H=alist2sparse('hamming_d_3.alist')
print(H)
H_2 = alist2sparse2('hamming_d_3.alist')
#print(H_2)
print((H==H_2).all())
if __name__=="__main__":
# main()
test_save_alist()
test_2_alist2sparse()