import scvelo as scv
adata = scv.read("E:/data_exercise/day1_27/scV/data/Pancreas/adata_dynamics.h5ad", cache=True)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics'
obsm: 'X_pca', 'X_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced'
obsp: 'connectivities', 'distances'
# 计算速度矢量
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
computing velocities
finished (0:00:06) --> added
'velocity', velocity vectors for each individual cell (adata.layers)
computing velocity graph
finished (0:00:11) --> added
'velocity_graph', sparse matrix with cosine correlations (adata.uns)
#汇出嵌入模型
scv.pl.velocity_embedding_grid(adata, basis='umap')
computing velocity embedding
finished (0:00:01) --> added
'velocity_umap', embedded velocity vectors (adata.obsm)
#汇出嵌入模型
scv.pl.velocity_embedding_stream(adata, basis='umap')
#计算cell cycle score并可视化
scv.tl.score_genes_cell_cycle(adata)
scv.pl.scatter(adata, color_gradients=['S_score', 'G2M_score'], basis="umap", smooth=True, perc=[5, 95])
calculating cell cycle phase
--> 'S_score' and 'G2M_score', scores of cell cycle phases (adata.obs)
#3.5 计算细胞状态并可视化
scv.tl.terminal_states(adata)
scv.pl.scatter(adata, color=[ 'root_cells', 'end_points'], basis='umap')
#'root_cells', root cells of Markov diffusion process (adata.obs)
#'end_points', end points of Markov diffusion process (adata.obs)
computing terminal states
WARNING: Uncertain or fuzzy root cell identification. Please verify.
identified 2 regions of root cells and 1 region of end points .
finished (0:00:00) --> added
'root_cells', root cells of Markov diffusion process (adata.obs)
'end_points', end points of Markov diffusion process (adata.obs)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
#计算pseudotime,可视化,与导出
scv.tl.velocity_pseudotime(adata)
scv.pl.scatter(adata=adata, color='velocity_pseudotime', cmap='gnuplot', basis="umap")
adata.obs["velocity_pseudotime"].to_csv('velocyto_pseudotime.csv')
#查看特定基因的velocity
var_names = ['Snhg6', 'Sbspon', 'Eif2s3y', 'Sntg1'] #adata.var 下的index就是基因名
scv.pl.velocity(adata, var_names=var_names, colorbar=True, ncols=2)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
df = adata.var
df
|
highly_variable_genes |
gene_count_corr |
means |
dispersions |
dispersions_norm |
highly_variable |
fit_r2 |
fit_alpha |
fit_beta |
fit_gamma |
... |
fit_std_s |
fit_likelihood |
fit_u0 |
fit_s0 |
fit_pval_steady |
fit_steady_u |
fit_steady_s |
fit_variance |
fit_alignment_scaling |
velocity_genes |
index |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Sntg1 |
True |
NaN |
0.005065 |
0.131135 |
0.214469 |
True |
0.401981 |
0.015726 |
0.005592 |
0.088792 |
... |
0.030838 |
0.406523 |
0.0 |
0.0 |
0.159472 |
2.470675 |
0.094304 |
0.149138 |
5.355590 |
False |
Snhg6 |
False |
NaN |
0.375835 |
0.158585 |
0.040765 |
True |
0.125072 |
0.389126 |
2.981982 |
0.260322 |
... |
0.245248 |
0.243441 |
0.0 |
0.0 |
0.403409 |
0.106128 |
0.596630 |
0.762252 |
2.037296 |
True |
Ncoa2 |
False |
NaN |
0.106945 |
0.145879 |
0.298358 |
True |
-2.313923 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
False |
Sbspon |
True |
NaN |
0.143032 |
0.277016 |
1.044508 |
True |
0.624803 |
0.464865 |
2.437113 |
0.379645 |
... |
0.178859 |
0.252135 |
0.0 |
0.0 |
0.182088 |
0.164805 |
0.430623 |
0.674312 |
1.193015 |
True |
Ube2w |
False |
NaN |
0.018522 |
0.109459 |
0.091136 |
True |
-5.324582 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
False |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
Tmem27 |
True |
NaN |
1.297619 |
2.475960 |
3.254982 |
True |
0.527855 |
3.954723 |
21.314334 |
0.253763 |
... |
5.014219 |
0.292956 |
0.0 |
0.0 |
0.496411 |
0.168456 |
12.425889 |
0.389388 |
3.163113 |
False |
Uty |
False |
NaN |
0.018673 |
0.182256 |
0.505338 |
True |
-0.555835 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
False |
Ddx3y |
True |
NaN |
0.165256 |
0.350978 |
1.465338 |
True |
-2.538731 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
False |
Eif2s3y |
True |
NaN |
0.182643 |
0.318686 |
1.281603 |
True |
0.352190 |
0.193458 |
0.746410 |
0.295085 |
... |
0.158051 |
0.262140 |
0.0 |
0.0 |
0.390710 |
0.219716 |
0.424033 |
0.748614 |
2.312621 |
True |
Erdr1 |
True |
NaN |
0.344720 |
0.215833 |
0.288351 |
True |
-0.193263 |
NaN |
NaN |
NaN |
... |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
NaN |
False |
2000 rows × 23 columns
df.to_csv("adata.var.csv",index=True, header=True )
# 数据框筛选那些基因
df = df[(df['fit_likelihood'] > .1) & df['velocity_genes'] == True]
kwargs = dict(xscale='log', fontsize=16)
df
|
highly_variable_genes |
gene_count_corr |
means |
dispersions |
dispersions_norm |
highly_variable |
fit_r2 |
fit_alpha |
fit_beta |
fit_gamma |
... |
fit_std_s |
fit_likelihood |
fit_u0 |
fit_s0 |
fit_pval_steady |
fit_steady_u |
fit_steady_s |
fit_variance |
fit_alignment_scaling |
velocity_genes |
index |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Snhg6 |
False |
NaN |
0.375835 |
0.158585 |
0.040765 |
True |
0.125072 |
0.389126 |
2.981982 |
0.260322 |
... |
0.245248 |
0.243441 |
0.0 |
0.0 |
0.403409 |
0.106128 |
0.596630 |
0.762252 |
2.037296 |
True |
Sbspon |
True |
NaN |
0.143032 |
0.277016 |
1.044508 |
True |
0.624803 |
0.464865 |
2.437113 |
0.379645 |
... |
0.178859 |
0.252135 |
0.0 |
0.0 |
0.182088 |
0.164805 |
0.430623 |
0.674312 |
1.193015 |
True |
Fam135a |
True |
NaN |
0.257955 |
0.171546 |
0.096820 |
True |
0.384662 |
0.172335 |
0.118088 |
0.204538 |
... |
0.149868 |
0.283343 |
0.0 |
0.0 |
0.387921 |
1.345830 |
0.393197 |
0.671096 |
3.390194 |
True |
Tbc1d8 |
True |
NaN |
0.070612 |
0.123167 |
0.169130 |
True |
0.613194 |
0.083656 |
0.118139 |
0.227401 |
... |
0.081595 |
0.246594 |
0.0 |
0.0 |
0.392402 |
0.659434 |
0.240100 |
0.829478 |
2.914215 |
True |
Fhl2 |
True |
NaN |
0.232482 |
0.818012 |
2.892669 |
True |
0.325223 |
0.224027 |
0.330934 |
0.121456 |
... |
0.317963 |
0.262849 |
0.0 |
0.0 |
0.476793 |
0.539878 |
1.073687 |
0.695997 |
4.785407 |
True |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
... |
Map3k15 |
True |
NaN |
0.180941 |
0.427601 |
1.901315 |
True |
0.437069 |
0.252467 |
0.348783 |
0.275921 |
... |
0.225956 |
0.289070 |
0.0 |
0.0 |
0.434454 |
0.611380 |
0.560048 |
0.510572 |
2.208756 |
True |
Rai2 |
True |
NaN |
0.231965 |
0.651755 |
2.173640 |
True |
0.769033 |
0.792172 |
2.885604 |
0.647345 |
... |
0.276047 |
0.271263 |
0.0 |
0.0 |
0.311969 |
0.243741 |
0.716996 |
0.707595 |
1.063908 |
True |
Rbbp7 |
False |
NaN |
1.017536 |
0.422314 |
0.153713 |
True |
0.344213 |
1.446806 |
5.332362 |
0.326539 |
... |
0.796754 |
0.281498 |
0.0 |
0.0 |
0.477365 |
0.208398 |
2.985787 |
0.704522 |
2.100311 |
True |
Ap1s2 |
True |
NaN |
0.127856 |
0.319473 |
1.286081 |
True |
0.716659 |
0.202643 |
0.596662 |
0.392769 |
... |
0.140455 |
0.286893 |
0.0 |
0.0 |
0.140955 |
0.315635 |
0.366964 |
0.671211 |
1.775751 |
True |
Eif2s3y |
True |
NaN |
0.182643 |
0.318686 |
1.281603 |
True |
0.352190 |
0.193458 |
0.746410 |
0.295085 |
... |
0.158051 |
0.262140 |
0.0 |
0.0 |
0.390710 |
0.219716 |
0.424033 |
0.748614 |
2.312621 |
True |
1006 rows × 23 columns
#RNA转录,剪接和降解的速率。
with scv.GridSpec(ncols=3) as pl:
pl.hist(df['fit_alpha'], xlabel='transcription rate', **kwargs)
pl.hist(df['fit_beta'] * df['fit_scaling'], xlabel='splicing rate', xticks=[.1, .4, 1], **kwargs)
pl.hist(df['fit_gamma'], xlabel='degradation rate', xticks=[.1, .4, 1], **kwargs)
adata
AnnData object with n_obs × n_vars = 3696 × 2000
obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'initial_size_spliced', 'initial_size_unspliced', 'initial_size', 'n_counts', 'velocity_self_transition', 'phase', 'clusters_gradients', 'root_cells', 'end_points', 'velocity_pseudotime'
var: 'highly_variable_genes', 'gene_count_corr', 'means', 'dispersions', 'dispersions_norm', 'highly_variable', 'fit_r2', 'fit_alpha', 'fit_beta', 'fit_gamma', 'fit_t_', 'fit_scaling', 'fit_std_u', 'fit_std_s', 'fit_likelihood', 'fit_u0', 'fit_s0', 'fit_pval_steady', 'fit_steady_u', 'fit_steady_s', 'fit_variance', 'fit_alignment_scaling', 'velocity_genes'
uns: 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca', 'recover_dynamics', 'velocity_params', 'velocity_graph', 'velocity_graph_neg', 'clusters_gradients_colors'
obsm: 'X_pca', 'X_umap', 'velocity_umap'
varm: 'loss'
layers: 'Ms', 'Mu', 'fit_t', 'fit_tau', 'fit_tau_', 'spliced', 'unspliced', 'velocity', 'velocity_u'
obsp: 'connectivities', 'distances'
#latent_time
scv.tl.latent_time(adata)
computing latent time using root_cells as prior
finished (0:00:02) --> added
'latent_time', shared time (adata.obs)
scv.pl.scatter(adata, color='latent_time', color_map='gnuplot', size=80)
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index[:300]
top_genes
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
'Pim2', 'Smoc1',
...
'Cyr61', 'Ptprn2', 'Rpa2', 'Nit2', 'Cdc20', 'Hmmr', 'Ankrd12', 'Itpr1',
'Auts2', 'Mdfi'],
dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top_genes, sortby='latent_time', col_color='clusters', n_convolve=100)
top = df['fit_likelihood'].sort_values(ascending=False).index[:300]
top
Index(['Pcsk2', 'Dcdc2a', 'Ank', 'Gng12', 'Top2a', 'Pak3', 'Tmem163', 'Nfib',
'Pim2', 'Smoc1',
...
'Trp53inp2', 'Ccnb1', 'Atp2a2', 'Dtl', 'Kmt2a', 'Emb', 'Rcan2',
'Gm28172', 'Rab3b', 'Gk'],
dtype='object', name='index', length=300)
scv.pl.heatmap(adata, var_names=top, sortby='latent_time', col_color='clusters', n_convolve=100)
# 驱动基因
top_genes = adata.var['fit_likelihood'].sort_values(ascending=False).index
scv.pl.scatter(adata, basis=top_genes[:15], ncols=5, frameon=False)
var_names = ['Actn4', 'Ppp3ca', 'Cpe', 'Nnat']
scv.pl.scatter(adata, var_names, frameon=False)
scv.pl.scatter(adata, x='latent_time', y=var_names, frameon=False)
#特定于簇的最高似然基因
scv.tl.rank_dynamical_genes(adata, groupby='clusters')
df = scv.get_df(adata, 'rank_dynamical_genes/names')
df.head(5)
ranking genes by cluster-specific likelihoods
finished (0:00:05) --> added
'rank_dynamical_genes', sorted scores by group ids (adata.uns)
|
Ductal |
Ngn3 low EP |
Ngn3 high EP |
Pre-endocrine |
Beta |
Alpha |
Delta |
Epsilon |
0 |
Dcdc2a |
Dcdc2a |
Rbfox3 |
Abcc8 |
Pcsk2 |
Cpe |
Pcsk2 |
Tox3 |
1 |
Top2a |
Adk |
Mapre3 |
Tmem163 |
Ank |
Gnao1 |
Rap1b |
Rnf130 |
2 |
Nfib |
Mki67 |
Btbd17 |
Gnao1 |
Tmem163 |
Pak3 |
Pak3 |
Meis2 |
3 |
Wfdc15b |
Rap1gap2 |
Sulf2 |
Ank |
Tspan7 |
Pim2 |
Abcc8 |
Adk |
4 |
Cdk1 |
Top2a |
Tcp11 |
Tspan7 |
Map1b |
Map1b |
Slc7a14 |
Rap1gap2 |
for cluster in ['Ductal', 'Ngn3 high EP', 'Pre-endocrine', 'Beta']:
scv.pl.scatter(adata, df[cluster][:5], ylabel=cluster, frameon=False)
df = scv.get_df(adata, 'rank_dynamical_genes/scores')
df.head(5)
|
Ductal |
Ngn3 low EP |
Ngn3 high EP |
Pre-endocrine |
Beta |
Alpha |
Delta |
Epsilon |
0 |
0.59 |
0.59 |
0.53 |
0.73 |
0.90 |
0.61 |
0.96 |
0.61 |
1 |
0.56 |
0.53 |
0.50 |
0.54 |
0.70 |
0.57 |
0.70 |
0.61 |
2 |
0.56 |
0.51 |
0.47 |
0.53 |
0.59 |
0.54 |
0.67 |
0.59 |
3 |
0.50 |
0.50 |
0.46 |
0.53 |
0.59 |
0.54 |
0.62 |
0.57 |
4 |
0.49 |
0.49 |
0.45 |
0.51 |
0.57 |
0.51 |
0.60 |
0.55 |
[参考链接1](https://scvelo.readthedocs.io/DynamicalModeling.html)
[参考链接2](https://www.bilibili.com/read/cv7764833/)