write10xCounts写入10x格式的h5文件

转自:https://rdrr.io/github/MarioniLab/DropletUtils/man/write10xCounts.html

1.安装

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("DropletUtils")
library(DropletUtils)

2.write10xCounts

write10xCounts(
  path,
  x,
  barcodes = colnames(x),
  gene.id = rownames(x),
  gene.symbol = gene.id,
  gene.type = "Gene Expression",
  overwrite = FALSE,
  type = c("auto", "sparse", "HDF5"),
  genome = "unknown",
  version = c("2", "3"),
  chemistry = "Single Cell 3' v3",
  original.gem.groups = 1L,
  library.ids = "custom"
)

3.多组学数据

参考代码:

library(DropletUtils)
rna=read.table(path1,header=T,sep='\t',row.names=1)
rna_matrix=as.matrix(rna)
dim(rna_matrix)

atac=read.table(path2,header=T,sep='\t',row.names=1)
atac_matrix=as.matrix(atac)
dim(atac_matrix)


ar=data.frame(rna,atac)#1047 1000
ar=t(ar)
write10xCounts(
  'ra.h5',
  ar,
  barcodes = colnames(ar),
  gene.id = rownames(ar),
  gene.symbol = rownames(ar),
  gene.type = c(rep("Gene Expression",500),rep("Peaks",500)),
  overwrite = FALSE,
  type = "HDF5",
  genome = "GRCh38",
  version = "3",
  chemistry = "Single Cell 3' v3",
  original.gem.groups = 1L,
  library.ids = "custom"
)

在python中读取可以通过

re=sc.read_10x_h5('./ra.h5', gex_only=False)

4.注意 

https://www.jianshu.com/p/eae2359f6eb1

需要设置

check.names=F

因为R会自动地检测列名,将ATAC的peak特征格式  染色体:start-end,将符号都会改为点.,所以需要上述参数来控制。

posted @ 2021-08-26 20:05  lypbendlf  阅读(522)  评论(0编辑  收藏  举报