稀疏矩阵的有效值提取,生成mtx文件
import scanpy as sc import scipy # from scipy.sparse import csr_matrix # import scipy.sparse import numpy as np import time # 记个运行时长 start = time.perf_counter() # read origin file anndata = sc.read('test.h5ad') print('start read data ##############') indptr = np.array(anndata.X.indptr).astype(int) indices = np.array(anndata.X.indices).astype(int) data = np.array(anndata.X.data).astype(int) height, width = anndata.X.shape total = anndata.X.data.size print('start patch main data #############') # 共有(noCnt = indptr.size - 1) 行数据 noCnt = 1 with open('main.mtx', 'w') as f: # write head f.write('%%MatrixMarket matrix coordinate integer general\n%\n') # data size f.write(str(height) + ' ' + str(width) + ' ' + str(total) + '\n') # main data content for row_no in range(1, indptr.size - 1): # row_no行的有效数据个数 valid_num = indptr[row_no] - indptr[row_no - 1] blobData = '' for num in range(0, valid_num): # 取列序数 colNum = indices[0] # 取对应数据值 val = data[0] # 把第一个取出来,并删除 indices = indices[1:] data = data[1:] # 组装数据 blobData = blobData + str(noCnt) + ' ' + str(colNum + 1) + ' ' + str(val) + '\n' pass f.write(blobData) noCnt = noCnt + 1 print('progress: ' + str(noCnt) + '/' + str(indptr.size - 1)) pass end = time.perf_counter() print("运行时间为", round(end-start), 'seconds')