234234234

稀疏矩阵的有效值提取,生成mtx文件

 

import scanpy as sc
import scipy
# from scipy.sparse import csr_matrix
# import scipy.sparse
import numpy as np
import time

# 记个运行时长
start = time.perf_counter()

# read origin file
anndata = sc.read('test.h5ad')

print('start read data ##############')
indptr = np.array(anndata.X.indptr).astype(int)
indices = np.array(anndata.X.indices).astype(int)
data = np.array(anndata.X.data).astype(int)
height, width = anndata.X.shape
total = anndata.X.data.size

print('start patch main data #############')
# 共有(noCnt = indptr.size - 1) 行数据
noCnt = 1
with open('main.mtx', 'w') as f:
    # write head
    f.write('%%MatrixMarket matrix coordinate integer general\n%\n')
    # data size
    f.write(str(height) + ' ' + str(width) + ' ' + str(total) + '\n')
    # main data content
    for row_no in range(1, indptr.size - 1):
        # row_no行的有效数据个数
        valid_num = indptr[row_no] - indptr[row_no - 1]
        blobData = ''
        for num in range(0, valid_num):
            # 取列序数
            colNum = indices[0]
            # 取对应数据值
            val = data[0]
            # 把第一个取出来,并删除
            indices = indices[1:]
            data = data[1:]
            # 组装数据
            blobData = blobData + str(noCnt) + ' ' + str(colNum + 1) + ' ' + str(val) + '\n'
            pass
        f.write(blobData)
        noCnt = noCnt + 1
        print('progress: ' + str(noCnt) + '/' + str(indptr.size - 1))
        pass


end = time.perf_counter()
print("运行时间为", round(end-start), 'seconds')

 

posted @ 2022-10-01 13:51  你若愿意,我一定去  阅读(163)  评论(0编辑  收藏  举报
23423423423