scRNAseq-seurat
By Kaiyi
示例数据与代码链接:https://pan.baidu.com/s/1yp_xd7kyPoI4vqPHiPBO6w 提取码:ybu9
R包安装
## 设置镜像,加快安装速度
options(repos=structure(c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/")))
## 查看镜像源
options()$repos
if (!requireNamespace("BiocManager", quietly = TRUE))
install.packages("BiocManager")
install.packages("devtools")
install.packages("R.utils")
install.packages("Seurat")
install.packages("pheatmap")
install.packages("tidyverse")
install.packages("magrittr") # 管道使用 R 包
install.packages("Matrix")
devtools::install_github('satijalab/seurat-data')
remotes::install_github('satijalab/seurat-wrappers') ## 可能比较慢, 请耐心等待
remotes::install_github('chris-mcginnis-ucsf/DoubletFinder')
BiocManager::install("batchelor")
install.packages("harmony")
BiocManager::install("Biostrings",version = '3.14', force = TRUE)
BiocManager::install("celldex")
BiocManager::install("SingleR")
BiocManager::install("clusterProfiler") ## 自动安装 "GO.db"
BiocManager::install("org.Hs.eg.db") ## 可能比较耗时, 请耐心等待
R包加载
library(Seurat)
library(SeuratData)
library(patchwork)
library(ggplot2)
library(batchelor)
library(SeuratWrappers)
library(magrittr)
library(tidyverse)
library(clusterProfiler)
library(GO.db)
library(org.Hs.eg.db)
library(DOSE)
library(DoubletFinder)
library(SingleR)
library(celldex)
library(harmony)
library(pheatmap)
1、单细胞数据读入
#方法一
# 读入数据,使用目录向量合并
rm(list=ls())
obj_dir <- c("E:/Protocol and tutorial/scRNAseq/2.单细胞转录组基础分析/HNC01TIL","E:/Protocol and tutorial/scRNAseq/2.单细胞转录组基础分析/Tonsil2")
names(obj_dir) = c("HNC01TIL","Tonsil2")
counts <- Read10X(data.dir = obj_dir)
#前提是提供了3个10X文件,名字分别为barcodes.tsv.gz,features.tsv.gz,matrix.mtx.gz
#如果只提供了txt格式的表达矩阵,可以用counts = read.table("matrix.txt", header = T, row.names = 1)
scRNA1 = CreateSeuratObject(counts, min.cells=1) #创建Seurat对象
scRNA1 = subset(scRNA1, downsample=1000,seed=42) #从两个样本中各提取1000个细胞,减少后续运算所需的内存与时间
dim(scRNA1)
table(scRNA1@meta.data$orig.ident)
#保存读入的rds
saveRDS(scRNA1, "Step1_merged_sample.rds")
#生成seurat对象list
scRNAlist <- SplitObject(scRNA1, split.by = "orig.ident")
#方法二
#使用merge函数合并seurat对象
scRNA2list <- list()
#以下代码会把每个样本的数据创建一个seurat对象,并存放到列表scRNAlist里
for(i in 1:length(obj_dir)){
counts <- Read10X(data.dir = obj_dir[i])
scRNA2list[[i]] <- CreateSeuratObject(counts, min.cells=1)
}
#使用merge函数将2个seurat对象合并成一个seurat对象
scRNA2 <- merge(scRNA2list[[1]], y=scRNA2list[[2]])
scRNA2 = subset(scRNA2, downsample=1000,seed=42)
table(scRNA2@meta.data$orig.ident)
sum(row.names(scRNA1@meta.data) == row.names(scRNA2@meta.data)) #检验两个object之间的细胞是否一致
2、单细胞数据质控
2.1 去除双细胞
# remove doublets
obj = SplitObject(scRNA1, split.by = "orig.ident") #将整合的SeuratObject重新分为两个SeuratObject
obj_rm=list() #创造一个空的list,用于后续盛放去除双细胞后Seurat对象
doublets_plot = list()
pc.num = 1:20 #定义pc维数
dir.create("SingleCell_QC") #在当前目录下新建一个文件夹,用于存放QC结果
RemoveDoublets <-function(
object,
doublet.rate,
sample_name,
pN=0.25,
pc.num=1:30,
use.SCT=FALSE
){
## 寻找最优pK值
sweep.res.list <- paramSweep_v3(object, PCs = pc.num, sct = F) #使用log标准化,sct=F,如使用SCT标准化,则为T
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats) #可以看到最佳参数的点
pK_bcmvn <- bcmvn$pK[which.max(bcmvn$BCmetric)] %>% as.character() %>% as.numeric()
## 排除不能检出的同源doublets,优化期望的doublets数量
homotypic.prop <- modelHomotypic(object$seurat_clusters) # 最好提供celltype
nExp_poi <- round(doublet.rate*ncol(object))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
seu.scored <- doubletFinder_v3(object, PCs = pc.num, pN = 0.25, pK = pK_bcmvn,
nExp = nExp_poi.adj, reuse.pANN = F, sct = F)
# pick out doublets
cname <-colnames(seu.scored[[]])
DF<-cname[grep('^DF',cname)]
seu.scored[["doublet"]] <- as.numeric(seu.scored[[DF]]=="Doublet")
# remove doublets
seu.removed <- subset(seu.scored, subset = doublet != 1)
p1 <- DimPlot(seu.scored, group.by = DF)
res.list <- list("plot"=p1, "obj"=seu.removed)
return(res.list)
}
for( i in names(obj)){
obj[[i]] <- NormalizeData(obj[[i]])
obj[[i]] <- FindVariableFeatures(obj[[i]], selection.method = "vst", nfeatures = 2000)
obj[[i]] <- ScaleData(obj[[i]])
obj[[i]] <- RunPCA(obj[[i]])
obj[[i]] <- RunUMAP(obj[[i]], dims = 1:20)
obj[[i]] <- FindNeighbors(obj[[i]], dims = pc.num) %>% FindClusters(resolution = 0.3)
tmp <- RemoveDoublets(obj[[i]], doublet.rate=0.008,sample_name=i,pc.num=pc.num)
#直接查表,1000细胞对应的doublets rate是0.8%,10000细胞对应的doublets rate是~7.6%
obj_rm[[i]] <- tmp$obj
doublets_plot[[i]] <- tmp$plot
}
doublets_plot[["Tonsil2"]]
doublets_plot[["HNC01TIL"]]
2.2 去除线粒体、红细胞的影响
#质控
scRNA <- merge(obj_rm[[1]], y=obj_rm[[2]]) #重新整合去除双细胞后的两个SeuratObject
Idents(scRNA) <- 'orig.ident' #确立细胞身份的主要标签,此处是样本来源(即Tonsil2和HNC01TIL)
# 计算线粒体基因比例
scRNA[["percent.mt"]] = PercentageFeatureSet(scRNA,
pattern = "^MT-") #因为人的线粒体基因是MT-开头的,所以可以通过这种方式匹配,如果是小鼠的话,是以“mt-”开头的。其他特殊物种可以直接提供线粒体基因的列表
#计算红细胞基因比例
HB.genes <- c("HBA1","HBA2","HBB","HBD","HBE1","HBG1","HBG2","HBM","HBQ1","HBZ")
HB_m <- match(HB.genes, rownames(scRNA@assays$RNA))
HB.genes <- rownames(scRNA@assays$RNA)[HB_m]
HB.genes <- HB.genes[!is.na(HB.genes)]
scRNA[["percent.HB"]]<-PercentageFeatureSet(scRNA, features=HB.genes) #此处就是根据提供的红细胞基因列表计算红细胞基因表达的比例
# 数据过滤前,对 nFeature数、nCount 数,线粒体基因比例和红细胞基因比例进行可视化
beforeQC_vlnplot = VlnPlot(scRNA,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.HB"),
ncol = 4,
pt.size = 0.1)
dir.create("SingleCell_QC")
ggsave("SingleCell_QC/BeforeQC_nFeature_nCount_percent.mt_percent.HB_vlnplot.pdf", plot = beforeQC_vlnplot)
ggsave("SingleCell_QC/BeforeQC_nFeature_nCount_percent.mt_percent.HB_vlnplot.png", plot = beforeQC_vlnplot)
beforeQC_vlnplot
#=== 对 nFeature、线粒体基因比例与 nCount 的关系进行可视化 ===#
plot1 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
plot2 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
plot3 <- FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.HB")
pearplot <- CombinePlots(plots = list(plot1, plot2, plot3), nrow=1, legend="none")
ggsave("SingleCell_QC/pearplot_before_qc.pdf", plot = pearplot, width = 12, height = 5)
ggsave("SingleCell_QC/pearplot_before_qc.png", plot = pearplot, width = 12, height = 5)
plot1
plot2
plot3
# 根据基因数和线粒体比例进行数据过滤
minGene=500 #这里设置了比较严格的条件,一般可以设置200,保留较多的细胞用于后续分析,保留细胞类型多样性,由于今天演示,为保证细胞的基因表达数量足够,设置得较高。
maxGene=3000 #也可以设置为6000
pctMT=10 #也可以设置成20
scRNA = subset(scRNA,
subset = nFeature_RNA > minGene & nFeature_RNA < maxGene & percent.mt < pctMT)
afterQC_vlnplot = VlnPlot(scRNA,
features = c("nFeature_RNA",
"nCount_RNA",
"percent.mt",
"percent.HB"),
ncol = 4,
pt.size = 0.1)
ggsave("SingleCell_QC/afterQC_nFeature_nCount_percent.mt_percent.HB_vlnplot.pdf", plot = afterQC_vlnplot)
ggsave("SingleCell_QC/afterQC_nFeature_nCount_percent.mt_percent.HB_vlnplot.png", plot = afterQC_vlnplot)
afterQC_vlnplot
saveRDS(scRNA,"Step2_After_QC.rds")
3、数据归一化与标准化
#方法一:NormalizeData+FindVariableFeatures+ScaleData
scRNA <- NormalizeData(scRNA)
scRNA <- FindVariableFeatures(scRNA, selection.method = "vst")
scRNA <- ScaleData(scRNA, features = VariableFeatures(scRNA))
#方法二:也可以使用SCTransform进行归一化
scRNA <- SCTransform(scRNA, vars.to.regress = c("nCount_RNA","nCount_RNA","percent.mt", "percent.HB"), verbose = F) #排除线粒体基因等干扰
#检查seurat对象的结构
str(scRNA)
#稀疏矩阵的格式,没有经过归一化
scRNA@assays$RNA@counts[1:6,1:60]
#稀疏矩阵的格式,经过归一化
scRNA@assays$RNA@data[1:6,1:60]
#meta.data内的信息
head(scRNA@meta.data)
4、数据批次矫正与聚类
#查看pca结果
scRNA <- RunPCA(scRNA, features = VariableFeatures(scRNA))
plot1 <- DimPlot(scRNA, reduction = "pca", group.by="orig.ident")
plot2 <- ElbowPlot(scRNA, ndims=30, reduction="pca")
plot3 <- plot1+plot2
dir.create("Clustering")
ggsave("Clustering/pca.png", plot = plot3, width = 8, height = 4)
plot3
##细胞聚类
scRNA <- FindNeighbors(scRNA, dims = pc.num) #pc.num可以更改
scRNA <- FindClusters(scRNA, resolution = 0.5) #resolution可以更改,数字越小,分群越少
table(scRNA@meta.data$seurat_clusters)
metadata <- scRNA@meta.data
cell_cluster <- data.frame(cell_ID=rownames(metadata), cluster_ID=metadata$seurat_clusters)
write.csv(cell_cluster,'Clustering/cell_cluster.csv',row.names = F, quote = F)
#UMAP
scRNA <- RunUMAP(scRNA, dims = pc.num) #这一步有时候有点慢,pc.num可以更改
#group_by_cluster
plot3 = DimPlot(scRNA, reduction = "umap", label=T)
ggsave("Clustering/UMAP.png",plot3)
plot3
#group_by_sample
plot4 = DimPlot(scRNA, reduction = "umap", group.by='orig.ident')
#split_by_sample
plot5 = DimPlot(scRNA, reduction = "umap", split.by='orig.ident')
plotc <- plot4+plot5
ggsave("Clustering/UMAP_cluster_sample.png", plot = plotc, width = 10, height = 5)
plotc
saveRDS(scRNA,"Step3_Clustering.rds")
4.1 批次效应矫正方法一:MNN
#去批次后进行聚类
scRNAlist <- SplitObject(scRNA, split.by = "orig.ident")
scRNAlist <- lapply(scRNAlist, FUN = function(x) NormalizeData(x)) #如果前面用的是SCTransform就不能这样做了
scRNAlist <- lapply(scRNAlist, FUN = function(x) FindVariableFeatures(x))
scRNA_mnn <- RunFastMNN(object.list = scRNAlist)
scRNA_mnn <- FindVariableFeatures(scRNA_mnn)
scRNA_mnn <- RunUMAP(scRNA_mnn, reduction = "mnn", dims = 1:20) #稍微有点慢,dims也是可以改变的
scRNA_mnn <- FindNeighbors(scRNA_mnn, reduction = "mnn", dims = 1:20)
scRNA_mnn <- FindClusters(scRNA_mnn)
p1 <- DimPlot(scRNA_mnn, group.by = "orig.ident", pt.size=0.1) +
ggtitle("Integrated by fastMNN")
p2 <- DimPlot(scRNA, group.by="orig.ident", pt.size=0.1) +
ggtitle("No integrated")
p = p1 + p2 + plot_layout(guides='collect')
#比较去批次前后聚类结果的变化
p
saveRDS(scRNA_mnn,"Step4_MNN.rds")
p3 <- DimPlot(scRNA_mnn, pt.size=0.1)
p4 <- DimPlot(scRNA_mnn, split.by="orig.ident", pt.size=0.1)
p = p3 + p4 + plot_layout(guides='collect')
#比较各个样本内cluster的组成情况
p
ggsave('Clustering/fastMNN.png', p, width=8, height=4)
4.2 批次效应矫正方法二:harmony
#harmony
library(harmony)
scRNA_harmony <- RunHarmony(scRNA, group.by.vars = "orig.ident")
scRNA_harmony <- RunUMAP(scRNA_harmony, reduction = "harmony", dims = 1:20) #稍微有点慢
scRNA_harmony <- FindNeighbors(scRNA_harmony, reduction = "harmony", dims = 1:20) %>% FindClusters()
p1 <- DimPlot(scRNA_harmony, group.by = "orig.ident", pt.size=0.1) +
ggtitle("Integrated by harmony")
p2 <- DimPlot(scRNA, group.by="orig.ident", pt.size=0.1) +
ggtitle("No integrated")
#比较去批次前后聚类结果的变化
p = p1 + p2 + plot_layout(guides='collect')
p
p3 = DimPlot(scRNA_harmony, reduction = "umap")
p4 = DimPlot(scRNA_harmony, reduction = "umap", split.by='orig.ident')
#比较各个样本内cluster的组成情况
plotc <- p3+p4 + plot_layout(guides='collect')
ggsave("Clustering/scRNA_harmony.png", plot = plotc, width = 10, height = 5)
plotc
saveRDS(scRNA_harmony,"Step4_harmony.rds")
4.3 批次效应矫正方法三:锚点整合IntegrateData
#数据整合
# 选取两个样本之间共有的feature
features <- SelectIntegrationFeatures(object.list = scRNAlist)
#以共有features为基础寻找锚点
scRNA.anchors <- FindIntegrationAnchors(object.list = scRNAlist,anchor.features = features)
#利用锚点整合数据
scRNA3 <- IntegrateData(anchorset = scRNA.anchors)
#重跑前面聚类的一般流程
scRNA3 <- ScaleData(scRNA3, verbose = FALSE)
scRNA3 <- RunPCA(scRNA3, npcs = 30, verbose = FALSE)
scRNA3 <- RunUMAP(scRNA3, reduction = "pca", dims = 1:20)
scRNA3 <- FindNeighbors(scRNA3, reduction = "pca", dims = 1:20)
scRNA3 <- FindClusters(scRNA3, resolution = 0.5)
#比较整合前后聚类结果的变化
p1 <- DimPlot(scRNA3, group.by = "orig.ident", pt.size=0.1) +
ggtitle("Integrated")
p2 <- DimPlot(scRNA, group.by="orig.ident", pt.size=0.1) +
ggtitle("No integrated")
p = p1 + p2 + plot_layout(guides='collect')
p
p3 <- DimPlot(scRNA3, reduction = "umap")
p4 <- DimPlot(scRNA3, reduction = "umap", split.by = "orig.ident")
Integrated_dimplot <- p3 + p4 + plot_layout(guides='collect')
#比较各个样本内cluster的组成情况
Integrated_dimplot
ggsave('Clustering/Integrated.png', Integrated_dimplot, width=8, height=4)
5、marker基因鉴定
#鉴定 marker 基因
dir.create("Marker")
# 鉴定各 cluster 的 Marker 基因
all.markers = FindAllMarkers(scRNA_mnn,
min.pct = 0.25,
logfc.threshold = 0.25,
only.pos = TRUE)
head(all.markers)
# cluster5 和cluster0、3进行比较
cluster5.markers <- FindMarkers(scRNA_mnn,
ident.1 = 5,
ident.2 = c(0, 3),
min.pct = 0.25,
only.pos = TRUE)
head(cluster5.markers)
# 根据 FoldChange 进行排序选取每群细胞的 top10 Marker 基因
top10 = all.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
# 保存 Marker 基因
write.table(all.markers,
"Marker/all_Markers_of_each_clusters.xls",
col.names = T,
row.names = F,
sep = "\t")
write.table(top10,
"Marker/top10_Markers_of_each_clusters.xls",
col.names = T,
row.names = F,
sep = "\t")
# 绘制热图
scRNA_mnn <- ScaleData(scRNA_mnn, features = row.names(scRNA_mnn))
heatmap_plot = DoHeatmap(object = scRNA_mnn,
features = as.vector(top10$gene),
group.by = "seurat_clusters",
group.bar = T,
size = 2) +
theme(axis.text.y = element_text(size = 4))
# 保存绘图结果
ggsave("Marker/top10_marker_of_each_cluster_heatmap.pdf", width = 12, height = 12,
plot = heatmap_plot)
ggsave("Marker/top10_marker_of_each_cluster_heatmap.png", width = 12, height = 12,
plot = heatmap_plot)
heatmap_plot
top1 = all.markers %>% group_by(cluster) %>% top_n(n = 1, wt = avg_log2FC)
# 绘制单个基因的小提琴图
VlnPlot(scRNA_mnn,
cols = NULL,
features = "RGS13",
pt.size = 0,
group.by = "seurat_clusters")
#批量绘制小提琴图
dir.create("Marker/vlnplot/")
for (gene in as.vector(top1$gene)) {
vlnplot = VlnPlot(scRNA_mnn,
features = gene,
pt.size = 0,
group.by = "seurat_clusters")
ggsave(paste0("Marker/vlnplot/",gene,"_vlnplot.pdf"), plot = vlnplot)
ggsave(paste0("Marker/vlnplot/",gene,"_vlnplot.png"), plot = vlnplot)
}
# 绘制单个基因的featureplot
FeaturePlot(scRNA_mnn,
features = "RGS13",
pt.size = 0.5,
reduction = "umap",
cols = c("lightgrey", "#DE1F1F"))
#批量绘制 featureplot 图
dir.create("Marker/featureplot/")
for (gene in as.vector(top1$gene)) {
featureplot = FeaturePlot(scRNA_mnn,
features = gene,
pt.size = 0.5,
cols = c("lightgrey", "#DE1F1F"))
ggsave(paste0("Marker/featureplot/",gene,"_featureplot.pdf"), plot = featureplot)
ggsave(paste0("Marker/featureplot/",gene,"_featureplot.png"), plot = featureplot)
}
6、差异基因鉴定
#将数据集的idents从clusters切换回orig.ident(各样本)
Idents(scRNA_mnn) <- 'orig.ident'
# 鉴定各样本的差异基因
Diff_exp = FindAllMarkers(scRNA_mnn,
min.pct = 0.25,
logfc.threshold = 0.25,
only.pos = TRUE)
head(Diff_exp)
top10 = Diff_exp %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
dir.create("Diffexp")
write.table(Diff_exp,
"Diffexp/group_HNC01TIL-vs-Tonsil2-diff.xls",
col.names = T,
row.names = F,
sep = "\t")
write.table(top10,
"Diffexp/group_HNC01TIL-vs-Tonsil2-top10-diff.xls",
col.names = T,
row.names = F,
sep = "\t")
heatmap_plot = DoHeatmap(object = scRNA_mnn,
features = as.vector(top10$gene),
group.by = "orig.ident",
group.bar = T,
size = 2) +
theme(axis.text.y = element_text(size = 4))
# 保存绘图结果
ggsave("Diffexp/top10_marker_of_each_cluster_heatmap.pdf",
plot = heatmap_plot)
ggsave("Diffexp/top10_marker_of_each_cluster_heatmap.png",
plot = heatmap_plot)
heatmap_plot
7、富集分析
#富集分析
genes_symbol <- as.character(Diff_exp$gene)
#转换ID
eg = bitr(genes_symbol, fromType="SYMBOL", toType="ENTREZID", OrgDb="org.Hs.eg.db")
id = as.character(eg[,2])
dir.create("enrichment")
#GO
ego <- enrichGO(gene = id,
OrgDb = org.Hs.eg.db,
ont = "MF",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05,
readable = TRUE)
GO_dot <- dotplot(ego)
GO_bar <- barplot(ego)
res_plot <- CombinePlots(list(GO_dot,GO_bar), nrow=1)
ggsave("enrichment/GO_results.pdf", plot=res_plot, width = 12,height = 7)
ggsave("enrichment/GO_results.png", plot=res_plot, width = 12,height = 7)
GO_dot
GO_bar
8、细胞类型鉴定
8.1 方法一:SingleR(有时候不太准确)
dir.create("SingleR")
Idents(scRNA_mnn) <- 'seurat_clusters'
# 读入参考数据集
load("HumanPrimaryCellAtlas_hpca.se_human.RData")
norm_count = GetAssayData(scRNA_mnn,
slot="data") #标准化后的矩阵
pred<- SingleR(test = norm_count,
ref = hpca.se,
labels = hpca.se$label.main)
#将注释结果添加到seurat对象里
scRNA_mnn$singleR_celltype = pred$labels
# 对细胞类型注释结果进行可视化
celltype_plot = DimPlot(scRNA_mnn,
reduction = "umap",
group.by = "singleR_celltype")
ggsave("SingleR/celltype_plot.pdf", celltype_plot)
ggsave("SingleR/celltype_plot.png", celltype_plot)
celltype_plot
# 通过热图了解每个细胞对应细胞类型的打分情况
score_heatmap <- plotScoreHeatmap(pred, clusters = scRNA_mnn@meta.data$seurat_clusters, order.by = "clusters")
ggsave("SingleR/score_heatmap.pdf", score_heatmap, width = 8, height = 10)
ggsave("SingleR/score_heatmap.png", score_heatmap, width = 8, height = 10)
score_heatmap
# 通过热图了解每个cluster对应细胞类型的打分情况
tab <- table(cluster=scRNA_mnn@meta.data$seurat_clusters, label=pred$labels)
cluster_type <- pheatmap::pheatmap(log10(tab+10)) # using a larger pseudo-count for smoothing.
ggsave("SingleR/cluster_type.pdf", cluster_type)
ggsave("SingleR/cluster_type.png", cluster_type)
cluster_type
8.2 方法二:根据细胞marker手动注释
#比如可以找到一些T细胞marker,用散点图或者小提琴图展示出来,观察其分布情况
#也可以根据各细胞亚群的top差异基因(FindAllMarkers的结果)直接注释
genes_to_check = c('PTPRC', 'CD3D', 'CD3E','FOXP3',
'CD4','IL7R','NKG7','CD8A',"GZMB",'GNLY','MKI67','TCF7','IFNG','IL17A','PDCD1','TOX','CD160','KLRC2','FGFBP2', 'CD27','CCR7')
DotPlot(scRNA_mnn, group.by = 'seurat_clusters',
features = unique(genes_to_check)) + RotatedAxis()
在meta.data中添加细胞注释
# 将singleR得到的细胞类型加入到对象的meta.data中,后续用于高级分析
scRNA_mnn <- RenameIdents(scRNA_mnn, '0'='T_cells','1'='B_cell','2'='T_cells','3'='B_cell','4'='T_cells','5'='T_cells','6'='T_cells','7'='Monocyte','8'='B_cell','9'='Monocyte','10'='B_cell','11'='Pre-B_cell_CD34-','12'='CMP')
scRNA_mnn[["celltype"]] <- Idents(scRNA_mnn)
head(scRNA_mnn@meta.data)
saveRDS(scRNA_mnn, "Step5_CellTyping.rds")
p1 <- DimPlot(scRNA_mnn, group.by = "seurat_clusters")
p2 <- DimPlot(scRNA_mnn, group.by = "celltype")
#查看cluster与细胞类型的对应关系
p3 = p1+p2
ggsave("SingleR/celltype_contrast.pdf",p3,width = 12, height = 6)
ggsave("SingleR/celltype_contrast.png",p3, width = 12, height = 6)
p3
#查看各样本中细胞类型的组成情况
DimPlot(scRNA_mnn, group.by = "celltype", split.by = "orig.ident")
Last updated