import numpy as np import pandas as pd import stlearn as st from pathlib import Path import os os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
# 读取数据 data = st.Read10X("mouse-brain-section-coronal-1-standard-1-1-0/") data
读取进来后是一个 AnnData 对象, 在经过 stlearn 软件进行 标准化,降维聚类:
# pre-processing for gene count table st.pp.filter_genes(data,min_cells=1) st.pp.normalize_total(data) st.pp.log1p(data)
# pre-processing for spot image st.pp.tiling(data, "./tiles")
# this step uses deep learning model to extract high-level features from tile images # may need few minutes to be completed st.pp.extract_feature(data)
# run PCA for gene expression data st.em.run_pca(data,n_comps=50)
data_SME = data.copy() # apply stSME to normalise log transformed data st.spatial.SME.SME_normalize(data_SME, use_data="raw") data_SME.X = data_SME.obsm['raw_SME_normalized'] st.pp.scale(data_SME) st.em.run_pca(data_SME,n_comps=50)
# Saving count data adata.layers["counts"] = adata.X.copy() # Normalizing to median total counts sc.pp.normalize_total(adata) # Logarithmize the data sc.pp.log1p(adata)
# 高变基因鉴定 sc.pp.highly_variable_genes(adata, n_top_genes=2000) sc.pl.highly_variable_genes(adata) # pca sc.tl.pca(adata) sc.pp.neighbors(adata) sc.tl.umap(adata) # Using the igraph implementation and a fixed number of iterations can be significantly faster, especially for larger datasets sc.tl.leiden(adata, flavor="igraph", n_iterations=2)