Copy // 在R中进行
rm(list=ls())
setwd("E:/Research/9.PXX/citeseq")
####R包加载####
library(Seurat)
library(SeuratData)
library(patchwork)
library(ggplot2)
library(batchelor)
library(SeuratWrappers)
library(magrittr)
library(tidyverse)
library(clusterProfiler)
library(GO.db)
library(org.Hs.eg.db)
library(DOSE)
library(DoubletFinder)
library(SingleR)
library(celldex)
library(harmony)
library(pheatmap)
library(ggsci)
library(data.table)
#### 1 数据读入 ######
###### 1.1 依次读入RNA和蛋白 #####
#每个文件夹内有barcodes.tsv,genes.tsv,matrix.mtx三个文件
#genes.tsv最下面为蛋白ADT的信息(如果是抗体标签号可以对应替换为基因名)
counts1 <- Read10X(data.dir = "./rawdata/RNA_Blood_Day7")
cite_counts1 <- Read10X(data.dir = "./rawdata/CITE_Blood_Day7")
adt.names1 <- setdiff (rownames (cite_counts1),rownames (counts1))
idx1 <- which(rownames(cite_counts1 ) %in% adt.names1)
adt1 <- cite_counts1[idx1,]
counts2 <- Read10X(data.dir = "./rawdata/RNA_Blood_Day11")
cite_counts2 <- Read10X(data.dir = "./rawdata/CITE_Blood_Day11")
adt.names2 <- setdiff (rownames (cite_counts2),rownames (counts2))
idx2 <- which(rownames(cite_counts2 ) %in% adt.names2)
adt2 <- cite_counts2[idx2,]
counts3 <- Read10X(data.dir = "./rawdata/RNA_Blood_Day14")
cite_counts3 <- Read10X(data.dir = "./rawdata/CITE_Blood_Day14")
adt.names3 <- setdiff (rownames (cite_counts3),rownames (counts3))
idx3 <- which(rownames(cite_counts3 ) %in% adt.names3)
adt3 <- cite_counts3[idx3,]
counts4 <- Read10X(data.dir = "./rawdata/RNA_Colon_Day4")
cite_counts4 <- Read10X(data.dir = "./rawdata/CITE_Colon_Day4")
adt.names4 <- setdiff (rownames (cite_counts4),rownames (counts4))
idx4 <- which(rownames(cite_counts4 ) %in% adt.names4)
adt4 <- cite_counts4[idx4,]
counts5 <- Read10X(data.dir = "./rawdata/RNA_Colon_Day7")
cite_counts5 <- Read10X(data.dir = "./rawdata/CITE_Colon_Day7")
adt.names5 <- setdiff (rownames (cite_counts5),rownames (counts5))
idx5 <- which(rownames(cite_counts5 ) %in% adt.names5)
adt5 <- cite_counts5[idx5,]
counts6 <- Read10X(data.dir = "./rawdata/RNA_Colon_Day11")
cite_counts6 <- Read10X(data.dir = "./rawdata/CITE_Colon_Day11")
adt.names6 <- setdiff (rownames (cite_counts6),rownames (counts6))
idx6 <- which(rownames(cite_counts6 ) %in% adt.names6)
adt6 <- cite_counts6[idx6,]
counts7 <- Read10X(data.dir = "./rawdata/RNA_Colon_Day14")
cite_counts7 <- Read10X(data.dir = "./rawdata/CITE_Colon_Day14")
adt.names7 <- setdiff (rownames (cite_counts7),rownames (counts7))
idx7 <- which(rownames(cite_counts7 ) %in% adt.names7)
adt7 <- cite_counts7[idx7,]
counts8 <- Read10X(data.dir = "./rawdata/RNA_BM_Day14")
cite_counts8 <- Read10X(data.dir = "./rawdata/CITE_BM_Day14")
adt.names8 <- setdiff (rownames (cite_counts8),rownames (counts8))
idx8 <- which(rownames(cite_counts8 ) %in% adt.names8)
adt8 <- cite_counts8[idx8,]
scRNA01 = CreateSeuratObject(counts1,project="BBloodDay07")
scCITE01 = CreateAssayObject(adt1)
scRNA01[["ADT"]] <- scCITE01
scRNA02 = CreateSeuratObject(counts2,project="BBloodDay11")
scCITE02 = CreateAssayObject(adt2)
scRNA02[["ADT"]] <- scCITE02
scRNA03 = CreateSeuratObject(counts3,project="BBloodDay14")
scCITE03 = CreateAssayObject(adt3)
scRNA03[["ADT"]] <- scCITE03
scRNA04 = CreateSeuratObject(counts4,project="AColonDay04")
scCITE04 = CreateAssayObject(adt4)
scRNA04[["ADT"]] <- scCITE04
scRNA05 = CreateSeuratObject(counts5,project="AColonDay07")
scCITE05 = CreateAssayObject(adt5)
scRNA05[["ADT"]] <- scCITE05
scRNA06 = CreateSeuratObject(counts6,project="AColonDay11")
scCITE06 = CreateAssayObject(adt6)
scRNA06[["ADT"]] <- scCITE06
scRNA07 = CreateSeuratObject(counts7,project="AColonDay14")
scCITE07 = CreateAssayObject(adt7)
scRNA07[["ADT"]] <- scCITE07
scRNA08 = CreateSeuratObject(counts8,project="CBoneMarrowDay14")
scCITE08 = CreateAssayObject(adt8)
scRNA08[["ADT"]] <- scCITE08
# 合并RNA的Seurat对象,将所有Seurat对象合并到一个对象中
scRNA_list <- list(scRNA01,scRNA02,scRNA03,scRNA04,scRNA05,scRNA06,scRNA07,scRNA08)
scRNA <- merge(scRNA_list[[1]],
y=c(scRNA_list[[2]],scRNA_list[[3]],scRNA_list[[4]],scRNA_list[[5]],
scRNA_list[[6]],scRNA_list[[7]],scRNA_list[[8]]),
add.cell.ids= c("BloodDay07","BloodDay11","BloodDay14",
"ColonDay04","ColonDay07","ColonDay11","ColonDay14",
"BMDay14"))
table(scRNA@meta.data$orig.ident)
#AColonDay04 AColonDay07 AColonDay11 AColonDay14 BBloodDay07 BBloodDay11
#17529 15245 10256 16746 18830 21433
#BBloodDay14 CBoneMarrowDay14
#15625 20838
#将orig.ident从character转为factor
scRNA@meta.data$orig.ident <- as.factor(scRNA@meta.data$orig.ident)
levels(scRNA@meta.data$orig.ident)
#删除不需要的变量
rm(scRNA01,scRNA02,scRNA03,scRNA04,scRNA05,scRNA06,scRNA07,scRNA08)
rm(scCITE01,scCITE02,scCITE03,scCITE04,scCITE05,scCITE06,scCITE07,scCITE08)
rm(counts1,counts2,counts3,counts4,counts5,counts6,counts7,counts8)
rm(adt1,adt2,adt3,adt4,adt5,adt6,adt7,adt8)
rm(cite_counts1,cite_counts2,cite_counts3,cite_counts4,cite_counts5,cite_counts6,cite_counts7,cite_counts8)
rm(scRNA_list)
dim(scRNA)
#32589 136502
###### 1.2 批量读入RNA和蛋白(在genes.tsv顺序不一样时失效) #####
# 读入数据,使用目录向量合并
obj_dir <- c("./data/RNA_Blood_Day7","./data/RNA_Blood_Day11","./data/RNA_Blood_Day14",
"./data/RNA_Colon_Day4","./data/RNA_Colon_Day7","./data/RNA_Colon_Day11","./data/RNA_Colon_Day14",
"./data/RNA_BM_Day14")
names(obj_dir) = c("BBloodDay07","BBloodDay11","BBloodDay14",
"AColonDay04","AColonDay07","AColonDay11","AColonDay14",
"CBoneMarrowDay14")
counts <- Read10X(data.dir = obj_dir)
#读入citeseq的矩阵
cite_dir <- c("./data/CITE_Blood_Day7","./data/CITE_Blood_Day11","./data/CITE_Blood_Day14",
"./data/CITE_Colon_Day4","./data/CITE_Colon_Day7","./data/CITE_Colon_Day11","./data/CITE_Colon_Day14",
"./data/CITE_BM_Day14")
names(cite_dir) = c("BBloodDay07","BBloodDay11","BBloodDay14",
"AColonDay04","AColonDay07","AColonDay11","AColonDay14",
"CBoneMarrowDay14") #多样本整合的时候命名barcode-ID
cite_counts <- Read10X(data.dir = cite_dir)
#如报错Barcode file missing. Expecting barcodes.tsv.gz:需要把features.tsv改成genes.tsv
#寻找citeseq比转录组矩阵多出来的部分,这部分即为蛋白抗体信息
adt.names <- setdiff (rownames (cite_counts),rownames (counts))
idx <- which(rownames(cite_counts ) %in% adt.names)
adt <- cite_counts[idx,]
scRNA1 = CreateSeuratObject(counts,min.cells = 5) #创建Seurat对象,2GB,保留5个以上细胞表达的基因
adt_assay <- CreateAssayObject(counts = adt)
#Warning: Feature names cannot have underscores ('_'), replacing with dashes ('-')
scRNA1[["ADT"]] <- adt_assay
rownames(scRNA1[["ADT"]])
#注意:我们可以通过函数快速地在两个数据之间来回切换,以便后续的分析以及可视化
#DefaultAssay(scRNA) <- "RNA"
#DefaultAssay(scRNA) <- "ADT"
#scRNA@assays[["RNA"]]@counts#原始数据,就是我们读取那三个文件后获得的原始数据
#scRNA@assays[["RNA"]]@data#经过标准化的数据
#scRNA@assays[["RNA"]]@scale.data#经过标准化后执行高可变基因筛选并且归一化后的数据
rm(cite_counts)
rm(counts)
rm(adt)
rm(adt_assay)
dim(scRNA1)
#22398 136502
###### 1.3 添加metadata信息#####
table(scRNA@meta.data$orig.ident)
#AColonDay04 AColonDay07 AColonDay11 AColonDay14 BBloodDay07 BBloodDay11
#17529 15245 10256 16746 18830 21433
#BBloodDay14 CBoneMarrowDay14
#15625 20838
###### 1.3.1 添加site #####
#添加一列样本信息至meta.data$site,用于存储样本类型(区分是来自血/肠/骨髓的)
names(scRNA@meta.data)
#"orig.ident" "nCount_RNA" "nFeature_RNA" "nCount_ADT" "nFeature_ADT"
scRNA@meta.data$site <- scRNA@meta.data$orig.ident
levels(scRNA@meta.data$site)
#替换
level_BloodDay7 <- which(levels(scRNA@meta.data$site)=="BBloodDay07")
levels(scRNA@meta.data$site)[level_BloodDay7]<- "Blood"
level_BloodDay11 <- which(levels(scRNA@meta.data$site)=="BBloodDay11")
levels(scRNA@meta.data$site)[level_BloodDay11]<- "Blood"
level_BloodDay14 <- which(levels(scRNA@meta.data$site)=="BBloodDay14")
levels(scRNA@meta.data$site)[level_BloodDay14]<- "Blood"
level_ColonDay4 <- which(levels(scRNA@meta.data$site)=="AColonDay04")
levels(scRNA@meta.data$site)[level_ColonDay4]<- "Colon"
level_ColonDay7 <- which(levels(scRNA@meta.data$site)=="AColonDay07")
levels(scRNA@meta.data$site)[level_ColonDay7]<- "Colon"
level_ColonDay11 <- which(levels(scRNA@meta.data$site)=="AColonDay11")
levels(scRNA@meta.data$site)[level_ColonDay11]<- "Colon"
level_ColonDay14 <- which(levels(scRNA@meta.data$site)=="AColonDay14")
levels(scRNA@meta.data$site)[level_ColonDay14]<- "Colon"
level_BMDay14 <- which(levels(scRNA@meta.data$site)=="CBoneMarrowDay14")
levels(scRNA@meta.data$site)[level_BMDay14]<- "BM"
table(scRNA@meta.data$site)
#Colon Blood BM
#59776 55888 20838
###### 1.3.2 添加time #####
#添加一列样本信息至meta.data$time,用于存储样本类型(区分是来自DSS处理4/7/11还是处理14天的)
names(scRNA@meta.data)
#"orig.ident" "nCount_RNA" "nFeature_RNA" "nCount_ADT" "nFeature_ADT" "site"
scRNA@meta.data$time <- scRNA@meta.data$orig.ident
levels(scRNA@meta.data$time)
#替换
level_BloodDay7 <- which(levels(scRNA@meta.data$time)=="BBloodDay07")
levels(scRNA@meta.data$time)[level_BloodDay7]<- "Day7"
level_BloodDay11 <- which(levels(scRNA@meta.data$time)=="BBloodDay11")
levels(scRNA@meta.data$time)[level_BloodDay11]<- "Day11"
level_BloodDay14 <- which(levels(scRNA@meta.data$time)=="BBloodDay14")
levels(scRNA@meta.data$time)[level_BloodDay14]<- "Day14"
level_ColonDay4 <- which(levels(scRNA@meta.data$time)=="AColonDay04")
levels(scRNA@meta.data$time)[level_ColonDay4]<- "Day4"
level_ColonDay7 <- which(levels(scRNA@meta.data$time)=="AColonDay07")
levels(scRNA@meta.data$time)[level_ColonDay7]<- "Day7"
level_ColonDay11 <- which(levels(scRNA@meta.data$time)=="AColonDay11")
levels(scRNA@meta.data$time)[level_ColonDay11]<- "Day11"
level_ColonDay14 <- which(levels(scRNA@meta.data$time)=="AColonDay14")
levels(scRNA@meta.data$time)[level_ColonDay14]<- "Day14"
level_BMDay14 <- which(levels(scRNA@meta.data$time)=="CBoneMarrowDay14")
levels(scRNA@meta.data$time)[level_BMDay14]<- "Day14"
#查看修改后的metadata
table(scRNA@meta.data$time)
#Day4 Day7 Day11 Day14
#17529 34075 31689 53209
#修改metadata$orig.ident的名字
levels(scRNA@meta.data$orig.ident) <- c('ColonDay4', 'ColonDay7', 'ColonDay11','ColonDay14',
"BloodDay7", "BloodDay11", "BloodDay14",
"BMDay14")
table(scRNA@meta.data$orig.ident)
# ColonDay4 ColonDay7 ColonDay11 ColonDay14 BloodDay7 BloodDay11 BloodDay14 BMDay14
#17529 15245 10256 16746 18830 21433 15625 20838
###### 1.3.3 添加batch #####
names(scRNA@meta.data)
#"orig.ident" "nCount_RNA" "nFeature_RNA" "nCount_ADT" "nFeature_ADT" "site" "time"
scRNA@meta.data$batch <- scRNA@meta.data$orig.ident
levels(scRNA@meta.data$batch)
#替换
level_BloodDay7 <- which(levels(scRNA@meta.data$batch)=="BloodDay7")
levels(scRNA@meta.data$batch)[level_BloodDay7]<- "Batch1"
level_BloodDay11 <- which(levels(scRNA@meta.data$batch)=="BloodDay11")
levels(scRNA@meta.data$batch)[level_BloodDay11]<- "Batch1"
level_BloodDay14 <- which(levels(scRNA@meta.data$batch)=="BloodDay14")
levels(scRNA@meta.data$batch)[level_BloodDay14]<- "Batch2"
level_ColonDay4 <- which(levels(scRNA@meta.data$batch)=="ColonDay4")
levels(scRNA@meta.data$batch)[level_ColonDay4]<- "Batch2"
level_ColonDay7 <- which(levels(scRNA@meta.data$batch)=="ColonDay7")
levels(scRNA@meta.data$batch)[level_ColonDay7]<- "Batch1"
level_ColonDay11 <- which(levels(scRNA@meta.data$batch)=="ColonDay11")
levels(scRNA@meta.data$batch)[level_ColonDay11]<- "Batch1"
level_ColonDay14 <- which(levels(scRNA@meta.data$batch)=="ColonDay14")
levels(scRNA@meta.data$batch)[level_ColonDay14]<- "Batch2"
level_BMDay14 <- which(levels(scRNA@meta.data$batch)=="BMDay14")
levels(scRNA@meta.data$batch)[level_BMDay14]<- "Batch2"
table(scRNA@meta.data$batch)
#Batch2 Batch1
#70738 65764
#保存
saveRDS(scRNA, "./result/Alldata_new/Step1_merged_sample.rds")