RNA速率scVelo

import scvelo as scv
adata = scv.read("E:/data_exercise/day1_27/scV/data/Pancreas/adata_dynamics.h5ad", cache=True)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts'
    var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics'
    obsm: 'X_pca', 'X_umap'
    varm: 'loss'
    layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced'
    obsp: 'connectivities', 'distances'
# 计算速度矢量
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
    finished (0:00:06) --> added 
    'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
    finished (0:00:11) --> added 
    'velocity_graph', sparse matrix with cosine correlations (adata.uns)
#汇出嵌入模型
scv.pl.velocity_embedding_grid(adata, basis='umap')
computing velocity embedding
    finished (0:00:01) --> added
    'velocity_umap', embedded velocity vectors (adata.obsm)

#汇出嵌入模型
scv.pl.velocity_embedding_stream(adata, basis='umap')

#计算cell cycle score并可视化
scv.tl.score_genes_cell_cycle(adata)

scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis="umap", smooth=True, perc=[5, 95])
calculating cell cycle phase
-->     'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)

#3.5 计算细胞状态并可视化

scv.tl.terminal_states(adata)

scv.pl.scatter(adata, color=[ 'root_cells', 'end_points'], basis='umap') 
#'root_cells', root cells of Markov diffusion process (adata.obs)
#'end_points', end points of Markov diffusion process (adata.obs)

computing terminal states
WARNING: Uncertain or fuzzy root cell identification. Please verify.
    identified 2 regions of root cells and 1 region of end points .
    finished (0:00:00) --> added
    'root_cells', root cells of Markov diffusion process (adata.obs)
    'end_points', end points of Markov diffusion process (adata.obs)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points'
    var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
    obsm: 'X_pca', 'X_umap', 'velocity_umap'
    varm: 'loss'
    layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
    obsp: 'connectivities', 'distances'
#计算pseudotime,可视化,与导出

scv.tl.velocity_pseudotime(adata)

scv.pl.scatter(adata=adata, color='velocity_pseudotime', cmap='gnuplot', basis="umap")

adata.obs["velocity_pseudotime"].to_csv('velocyto_pseudotime.csv')

#查看特定基因的velocity

var_names = ['Snhg6', 'Sbspon', 'Eif2s3y', 'Sntg1'] #adata.var 下的index就是基因名

scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
    var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
    obsm: 'X_pca', 'X_umap', 'velocity_umap'
    varm: 'loss'
    layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
    obsp: 'connectivities', 'distances'
df = adata.var
df
highly_variable_genes gene_count_corr means dispersions dispersions_norm highly_variable fit_r2 fit_alpha fit_beta fit_gamma ... fit_std_s fit_likelihood fit_u0 fit_s0 fit_pval_steady fit_steady_u fit_steady_s fit_variance fit_alignment_scaling velocity_genes
index
Sntg1 True NaN 0.005065 0.131135 0.214469 True 0.401981 0.015726 0.005592 0.088792 ... 0.030838 0.406523 0.0 0.0 0.159472 2.470675 0.094304 0.149138 5.355590 False
Snhg6 False NaN 0.375835 0.158585 0.040765 True 0.125072 0.389126 2.981982 0.260322 ... 0.245248 0.243441 0.0 0.0 0.403409 0.106128 0.596630 0.762252 2.037296 True
Ncoa2 False NaN 0.106945 0.145879 0.298358 True -2.313923 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
Sbspon True NaN 0.143032 0.277016 1.044508 True 0.624803 0.464865 2.437113 0.379645 ... 0.178859 0.252135 0.0 0.0 0.182088 0.164805 0.430623 0.674312 1.193015 True
Ube2w False NaN 0.018522 0.109459 0.091136 True -5.324582 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Tmem27 True NaN 1.297619 2.475960 3.254982 True 0.527855 3.954723 21.314334 0.253763 ... 5.014219 0.292956 0.0 0.0 0.496411 0.168456 12.425889 0.389388 3.163113 False
Uty False NaN 0.018673 0.182256 0.505338 True -0.555835 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
Ddx3y True NaN 0.165256 0.350978 1.465338 True -2.538731 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False
Eif2s3y True NaN 0.182643 0.318686 1.281603 True 0.352190 0.193458 0.746410 0.295085 ... 0.158051 0.262140 0.0 0.0 0.390710 0.219716 0.424033 0.748614 2.312621 True
Erdr1 True NaN 0.344720 0.215833 0.288351 True -0.193263 NaN NaN NaN ... NaN NaN NaN NaN NaN NaN NaN NaN NaN False

2000 rows × 23 columns

df.to_csv("adata.var.csv",index=True, header=True )
# 数据框筛选那些基因
df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]

kwargs = dict(xscale='log', fontsize=16)
df
highly_variable_genes gene_count_corr means dispersions dispersions_norm highly_variable fit_r2 fit_alpha fit_beta fit_gamma ... fit_std_s fit_likelihood fit_u0 fit_s0 fit_pval_steady fit_steady_u fit_steady_s fit_variance fit_alignment_scaling velocity_genes
index
Snhg6 False NaN 0.375835 0.158585 0.040765 True 0.125072 0.389126 2.981982 0.260322 ... 0.245248 0.243441 0.0 0.0 0.403409 0.106128 0.596630 0.762252 2.037296 True
Sbspon True NaN 0.143032 0.277016 1.044508 True 0.624803 0.464865 2.437113 0.379645 ... 0.178859 0.252135 0.0 0.0 0.182088 0.164805 0.430623 0.674312 1.193015 True
Fam135a True NaN 0.257955 0.171546 0.096820 True 0.384662 0.172335 0.118088 0.204538 ... 0.149868 0.283343 0.0 0.0 0.387921 1.345830 0.393197 0.671096 3.390194 True
Tbc1d8 True NaN 0.070612 0.123167 0.169130 True 0.613194 0.083656 0.118139 0.227401 ... 0.081595 0.246594 0.0 0.0 0.392402 0.659434 0.240100 0.829478 2.914215 True
Fhl2 True NaN 0.232482 0.818012 2.892669 True 0.325223 0.224027 0.330934 0.121456 ... 0.317963 0.262849 0.0 0.0 0.476793 0.539878 1.073687 0.695997 4.785407 True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Map3k15 True NaN 0.180941 0.427601 1.901315 True 0.437069 0.252467 0.348783 0.275921 ... 0.225956 0.289070 0.0 0.0 0.434454 0.611380 0.560048 0.510572 2.208756 True
Rai2 True NaN 0.231965 0.651755 2.173640 True 0.769033 0.792172 2.885604 0.647345 ... 0.276047 0.271263 0.0 0.0 0.311969 0.243741 0.716996 0.707595 1.063908 True
Rbbp7 False NaN 1.017536 0.422314 0.153713 True 0.344213 1.446806 5.332362 0.326539 ... 0.796754 0.281498 0.0 0.0 0.477365 0.208398 2.985787 0.704522 2.100311 True
Ap1s2 True NaN 0.127856 0.319473 1.286081 True 0.716659 0.202643 0.596662 0.392769 ... 0.140455 0.286893 0.0 0.0 0.140955 0.315635 0.366964 0.671211 1.775751 True
Eif2s3y True NaN 0.182643 0.318686 1.281603 True 0.352190 0.193458 0.746410 0.295085 ... 0.158051 0.262140 0.0 0.0 0.390710 0.219716 0.424033 0.748614 2.312621 True

1006 rows × 23 columns

#RNA转录,剪接和降解的速率。
with scv.GridSpec(ncols=3) as pl:
    pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
    pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
    pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)

adata
AnnData object with n_obs × n_vars = 3696 × 2000
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
    var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
    uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
    obsm: 'X_pca', 'X_umap', 'velocity_umap'
    varm: 'loss'
    layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
    obsp: 'connectivities', 'distances'
#latent_time
scv.tl.latent_time(adata)

computing latent time using root_cells as prior
    finished (0:00:02) --> added 
    'latent_time', shared time (adata.obs)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)

top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
top_genes
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
       'Pim2', 'Smoc1',
       ...
       'Cyr61', 'Ptprn2', 'Rpa2', 'Nit2', 'Cdc20', 'Hmmr', 'Ankrd12', 'Itpr1',
       'Auts2', 'Mdfi'],
      dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)

top = df['fit_likelihood'].sort_values(ascending=False).index[:300]
top
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
       'Pim2', 'Smoc1',
       ...
       'Trp53inp2', 'Ccnb1', 'Atp2a2', 'Dtl', 'Kmt2a', 'Emb', 'Rcan2',
       'Gm28172', 'Rab3b', 'Gk'],
      dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top, sortby='latent_time', col_color='clusters', n_convolve=100)

# 驱动基因
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:15], ncols=5, frameon=False)

var_names = ['Actn4', 'Ppp3ca', 'Cpe', 'Nnat']
scv.pl.scatter(adata, var_names, frameon=False)
scv.pl.scatter(adata, x='latent_time', y=var_names, frameon=False)

#特定于簇的最高似然基因
scv.tl.rank_dynamical_genes(adata, groupby='clusters')
df = scv.get_df(adata, 'rank_dynamical_genes/names')
df.head(5)
ranking genes by cluster-specific likelihoods
    finished (0:00:05) --> added 
    'rank_dynamical_genes', sorted scores by group ids (adata.uns)
Ductal Ngn3 low EP Ngn3 high EP Pre-endocrine Beta Alpha Delta Epsilon
0 Dcdc2a Dcdc2a Rbfox3 Abcc8 Pcsk2 Cpe Pcsk2 Tox3
1 Top2a Adk Mapre3 Tmem163 Ank Gnao1 Rap1b Rnf130
2 Nfib Mki67 Btbd17 Gnao1 Tmem163 Pak3 Pak3 Meis2
3 Wfdc15b Rap1gap2 Sulf2 Ank Tspan7 Pim2 Abcc8 Adk
4 Cdk1 Top2a Tcp11 Tspan7 Map1b Map1b Slc7a14 Rap1gap2
for cluster in ['Ductal', 'Ngn3 high EP', 'Pre-endocrine', 'Beta']:
    scv.pl.scatter(adata, df[cluster][:5], ylabel=cluster, frameon=False)

df = scv.get_df(adata, 'rank_dynamical_genes/scores')
df.head(5)
Ductal Ngn3 low EP Ngn3 high EP Pre-endocrine Beta Alpha Delta Epsilon
0 0.59 0.59 0.53 0.73 0.90 0.61 0.96 0.61
1 0.56 0.53 0.50 0.54 0.70 0.57 0.70 0.61
2 0.56 0.51 0.47 0.53 0.59 0.54 0.67 0.59
3 0.50 0.50 0.46 0.53 0.59 0.54 0.62 0.57
4 0.49 0.49 0.45 0.51 0.57 0.51 0.60 0.55
[参考链接1](https://scvelo.readthedocs.io/DynamicalModeling.html)
[参考链接2](https://www.bilibili.com/read/cv7764833/)
posted @ 2021-03-10 17:01  小猫妮  阅读(711)  评论(0编辑  收藏  举报