Page cover

🧬bulk RNAseq

By Kaiyi Fu

主要包含RNA测序数据下载与预处理、差异分析、富集分析(GO、KEGG、GSEA)等步骤。

R包加载

library(survival) #生存曲线
library(survminer) #生存曲线
library(forestmodel) #森林图
library(DESeq2) #差异分析
library(msigdbr) #GSEA
library(clusterProfiler) #GSEA
library(GseaVis) #GSEA
library(limma) #差异分析
library(ggplot2)
library(stringr)
library(tidyverse)
library(mice) #填补缺失值
library(maftools) #CNV分析
library(immunedeconv) #免疫浸润
library(CancerSubtypes) #癌症分型
library(GenVisR) #Maf可视化
library(GSVA) #ssGSEA
library(EnhancedVolcano)#火山图
library(ComplexHeatmap)
library(circlize)
library(cols4all)
library(org.Mm.eg.db)
library(enrichplot)
library(RColorBrewer)
library(ggrepel)
library(aplot)

一、测序数据的获取与预处理

GEO数据库的下载与处理

#方式一:从GEO直接获取RNAseq数据
if(! require("GEOquery")) BiocManager::install("GEOquery",update=F,ask=F)
library(GEOquery)
##下载并且了解你的数据
gset <- getGEO('GSE14323',destdir = '.',getGPL = F)

exprSet=exprs(gset[[1]])
pdata=pData(gset[[1]]) #获取临床信息
#最重要!!!
#问题解决:如何生成group_list,如何选择需要的样本??
#group_list=
#exprSet=
pdata1=pdata[pdata$source_name_ch1 == 'Normal liver tissue',]
pdata2=pdata[pdata$source_name_ch1=='liver tissue with HCC',]
exprSet=as.data.frame(exprSet)
exprSet1=exprSet[,rownames(pdata1)]
exprSet2=exprSet[,rownames(pdata2)]
#按列合并exprSet1,2
exprSet=cbind(exprSet1,exprSet2)
pdata=rbind(pdata1,pdata2)
group_list=c(rep('normal',19),rep('tumor',38))
group_list <- factor(group_list,levels = c('normal','tumor'))
group_list
save(exprSet,pdata,group_list,file ='lab0113.Rdata')
load('lab0113.Rdata')
#microarray等格式的数据需要进行ID转换
##这个方法直接下载GEO GPL处的注释文件来做转换,肯定是可行的

#GSE14323是用GPL96和GPL571一起的,两个平台注释文件非常相似
#我们选择样本数多的GPL571
#https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GPL571 Download full table
#开始转换
#载入注释文件,在你自己的文件夹里
#没data.table,geoquery啥的先装一下
#install.package('data.table')
#BiocManager::install('GEOquery',update = F,ask = F)

library(data.table)
anno = fread("D:/R学习/platform/GPL571-17391.txt",data.table = F,sep = '\t')
#看一下列名
colnames(anno)
#一定选ID探针列和gene symbol列,!!!此处是可变的!!!
anno = anno[,c(1,11)]
anno = anno[!anno$`Gene Symbol`== "",]
#无脑跑代码即可
tmp = rownames(exprSet)%in% anno[,1]
exprSet = exprSet[tmp,]
dim(exprSet)
match(rownames(exprSet),anno$ID)
anno = anno[match(rownames(exprSet),anno$ID),]
match(rownames(exprSet),anno$ID)
dim(exprSet)
dim(anno)
tail(sort(table(anno[,2])), n = 12L)
#注释到相同基因的探针,保留表达量最大的那个探针
{
MAX = by(exprSet, anno[,2],
function(x) rownames(x)[ which.max(rowMeans(x))])
MAX = as.character(MAX)
exprSet = exprSet[rownames(exprSet) %in% MAX,]
rownames(exprSet) = anno[ match(rownames(exprSet), anno[,1] ),2]
}
dim(exprSet)
#cel.gz文件合并

library(affy)
library(dplyr)
library(tidyverse)

data.raw <- ReadAffy()
eset <- rma(data.raw)
write.exprs(eset,"probe_exp.txt")

ann <- read.delim2("./GPL11180.txt")##修改注释文件的工作路径,指定到GPL所在文件位置,记得从GEO下载下来删除前几行
ann <- ann[,c("ID","Gene.Symbol")]
ann$Gene.symbol <-str_split_fixed(ann$Gene.Symbol,pattern = "///",3)[,1]
rt <- read.table("./probe_exp.txt",sep = "\t",header = T)
symbol <- merge(ann,rt,by.x = 1,by.y = 1)
symbol <- symbol[-1]
colnames(symbol)[1] <- c("ID")
symbol <- distinct(symbol,ID,.keep_all = T)
rownames(symbol) <- symbol$ID
symbol <- symbol[-1]
colnames(symbol) <- str_split_fixed(colnames(symbol),pattern = "_",2)[,1]
symbol <- cbind(ID=rownames(symbol),symbol)
write.table(symbol,"symbol.txt",sep = "\t",quote = F,row.names = F)

自测序数据

##方式二:读入自己的测序矩阵

#读入表达矩阵
mrna_expr <- read.table("E:/Research/12.GYY/RNAseq/data/data_CvsS.txt",header = T)

#处理重复的基因:挑选行平均值大的那一行

#计算行平均值,按降序排列
index=order(rowMeans(mrna_expr[,-1]),decreasing = T)
#调整表达谱的基因顺序
expr_ordered=mrna_expr[index,]
#对于有重复的基因,保留第一次出现的那个,即行平均值大的那个
keep=!duplicated(expr_ordered$gene_name)
#得到最后处理之后的表达谱矩阵
mrna_expr=expr_ordered[keep,]
rownames(mrna_expr) <- mrna_expr$gene_name
mrna_expr <- mrna_expr[,-1]
#读入样本分组信息
#注意保证表达矩阵中的样本顺序和group文件分组顺序是一一对应的
mrna_sample <- read.csv("E:/Research/12.GYY/RNAseq/data/group_CvsS.csv",header = T,row.names = 1,check.names=F)

人鼠同源基因转换

#HomoloGene包

# install.packages('homologene')
library(homologene)

# 只保留编码基因
coding_gene <- data.table::fread("./input_data/protein-coding-gene.csv") %>% as.data.frame() %>% filter(gene_type == "protein_coding")
genelist<-coding_gene$gene_name

# 随便取点基因看看
genes = genelist[1:10]
genes

#使用homologene函进行转换

#inTax是输入的基因列表所属的物种号,10090是小鼠
#outTax是要转换成的物种号,9606是人
trans <- homologene(genes = genes, #需要转换的基因列表
                    inTax = 9606, #人
                    outTax = 10090 #小鼠
                    )

二、数据清理

删除低表达基因

#对表达量矩阵取整数(DESEQ2处理非整数rawcount时需要)
mrna_expr <- round(mrna_expr)

# 保留每个样本中都有表达的基因
keep_data <- rowSums(mrna_expr> 0) >= floor(ncol(mrna_expr))
#table(keep_data)
mrna_expr <- mrna_expr[keep_data,]

#删除表达量为0的基因
#mrna_expr <- mrna_expr[which(rowSums(mrna_expr)!=0),] 


# 或保留至少在75%的样本中都有表达的基因
#keep_data <- rowSums(mrna_expr> 0) >= floor(0.75*ncol(mrna_expr))
#table(keep_data)

标准化

#先看一下样本间齐不齐
boxplot(log2(mrna_expr))
#不齐的话样本间normalize一下(做热图的话可以不normalize)
mrna_expr=normalizeBetweenArrays(mrna_expr)
boxplot(mrna_expr,outline=FALSE, notch=T,col=mrna_sample, las=2)
#log化处理
mrna_expr = log2(mrna_expr+1) 
boxplot(log2(mrna_expr))
#将log化后的负无穷值替换为0
#mrna_expr[mrna_expr == -Inf] = 0 

三、差异分析

DESeq2差异分析(适用于rawcount)

dds_2 <- DESeqDataSetFromMatrix(countData = expr_data,
                                colData = sample_data,
                                design = ~ group)
dds_2$condition <- relevel(dds_2$group, ref = "DSS") # 指定哪一组作为对照

dds_2 <- DESeq(dds_2, fitType = 'mean', minReplicatesForReplace = 7, parallel = FALSE)

## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing

nrDEG_2 <- as.data.frame(results(dds_2))
nrDEG_2 = nrDEG_2[order(nrDEG_2$log2FoldChange),] # 按照logFC排序
nrDEG_2[1:3,1:4]

write.csv(nrDEG_2,file = "./DESeq2_DEG.csv")

limma差异分析

#构建分组矩阵
design <- model.matrix(~factor(mrna_sample$GROUP) + 0)
colnames(design) <- c("CART","GSCART")
#构建比较矩阵
contrast.matrix <- makeContrasts(GSCART - CART, levels = factor(mrna_sample$GROUP))#实验组vs对照组
#线性混合模拟
fit <- lmFit(mrna_expr,design)#非线性最小二乘法,线性模型拟合
fit2 <- contrasts.fit(fit,contrast.matrix)
fit2 <- eBayes(fit2)#用经验贝叶斯调整t-test中方差的部分
#输出差异分析结果
RNA_DEG <- topTable(fit2,
                    #adjust = 'BH',
                    coef = 1,
                    n = Inf,
                    sort.by="logFC"
                    #p.value = 0.05
)
#筛选差异基因
allDiffsig=RNA_DEG[RNA_DEG$P.Val<0.05,]
allDiffsig=allDiffsig[allDiffsig$logFC >1 | allDiffsig$logFC < -1,]
#显著上调的基因
up=allDiffsig[allDiffsig$logFC>0,]
#显著下调的基因
down=allDiffsig[allDiffsig$logFC<0,]

#RNA_DEG = RNA_DEG[order(RNA_DEG$logFC),] # 按照logFC排序
write.csv(RNA_DEG,"E:/Research/12.GYY/RNAseq/result/GvsC_DEG_limma.csv")
write.csv(mrna_expr,"E:/Research/12.GYY/RNAseq/result/GvsC_RNA_norm.csv")

特殊情况:配对样本差异分析

rm(list=ls())
setwd("E:/Research/5.CRC_liver/Analysis-202304/data/Diff_analysis")#设置工作路径

#读取总数据
data = read.csv('Input data.csv',header=TRUE,row.names=1)
group = read.csv('group.csv', header=TRUE, row.names=1) 
individuals = read.csv('individual.csv', header=TRUE, row.names=1) 
#提取MO+MM
data_MMMO <- data[,c(1:47,125:171)]
group_MMMO <- group[c(1:47,125:171),]
individuals_MMMO <- individuals[c(1:47,125:171),]
groupID_MMMO <- factor(group_MMMO,levels = unique(group_MMMO))
individualsID_MMMO <- factor(individuals_MMMO,levels = unique(individuals_MMMO))

#整理分组矩阵
design_MMMO_paried <- model.matrix(~ individualsID_MMMO + groupID_MMMO)

fit_MMMO <- lmFit(data_MMMO,design_MMMO_paried)
fit_MMMO <- eBayes(fit_MMMO)
#差异分析
allDiff_MMMO_paired <- topTable(fit_MMMO,
                           adjust = 'BH',
                           coef = ncol(design_MMMO_paried),
                           n = Inf)
p_MMMO <- EnhancedVolcano(allDiff_MMMO_paired,
                          lab = rownames(allDiff_MMMO_paired),
                          x = 'logFC',
                          y =  'P.Value',
                          title = 'paired MM VS MO',
                          pointSize = 3.0,
                          labSize = 6.0,
                          legendPosition = 'right',
                          pCutoff = 0.05,
                          FCcutoff = 1)
p_MMMO
write.csv(allDiff_MMMO_paired,"allDiff_MMMO_paired.csv")

四、差异基因展示

火山图的绘制

p_MONO <- EnhancedVolcano(RNA_DEG,
                          lab = rownames(RNA_DEG),
                          x = 'logFC',
                          y =  'P.Value',
                          xlim = c(-5, 5), #x轴范围
                          ylim = c(0, 6.5), #y轴范围
                          title = 'GS_CART vs CART',
                          #selectLab = c("CCR5"), #填写你想标注的基因
                          col = c("grey30", "#458c45", "#3c61a3", "#d4372a"),
                          pointSize = 3.0,
                          labSize = 6.0,
                          legendPosition = 'right',
                          pCutoff = 0.05,
                          FCcutoff = 1,
                          gridlines.major = FALSE,     # 背景网格
                          gridlines.minor = FALSE,
                          legendLabSize = 8,
                          legendIconSize = 8,
                          titleLabSize =12,subtitle = NULL)

p_MONO

热图1:pheatmap

rt=mrna_expr[c('MAP2K3','FOS','LIF','TBX21',"TNFRSF9","CD40LG",
               "IL2","IFNG","TNF","IL1A","IL3","IL4","IL9","IL22",
               "GZMB","CRTAM","IL18RAP","CD160","CAMK2D",
               "XCL1","XCL2","CCL1","CCL4"),] #"IL21","IL17A","IL31"不在里面
rt=na.omit(rt)

write.csv(rt,"E:/Research/12.GYY/RNAseq/data/heatmap_gene.csv")

anno=data.frame(row.names = colnames(rt),group=mrna_sample)

library(pheatmap)
pheatmap(rt,annotation_col = anno,scale = 'row',#针对行进行标化
         cluster_cols = F,
         show_rownames = T, show_colnames = F,
color = colorRampPalette(c(rep("blue",1), "white", rep("red",1)))(100),
breaks = seq(-3,3,length.out = 100),border_color = NA)

热图2:ComplexHeatmap

#ComplexHeatmap包并不会对数据进行标准化,为了让图形更好看,先手动进行标准化
dt <- apply(rt, 1, scale)
rownames(dt) <- colnames(rt)
dt <- t(dt)
#scale函数是按列归一化,对于我们一般习惯基因名为行,样本名为列的数据框,就需要进行转置
#配色自定义:
mycol <- colorRamp2(c(-2, 0, 2), c("#0da9ce", "white", "#e74a32"))

#格子大小设置(按个人需求):
cellwidth = 1
cellheight = 0.35
cn = dim(dt)[2]
rn = dim(dt)[1]
w = cellwidth*cn
h = cellheight*rn

#注释配色自定义:
c4a_gui()
cols <- c4a('pastel1',6)
df <- data.frame(colnames(dt))
colnames(df) <- 'cell'
cols;df
#注释颜色添加:
row_cols <- setNames(c("#FBB4AE","#FBB4AE","#FBB4AE","#FBB4AE","#FBB4AE","#FBB4AE","#B3CDE3","#B3CDE3","#B3CDE3","#B3CDE3","#B3CDE3","#B3CDE3","#B3CDE3","#B3CDE3","#CCEBC5","#CCEBC5", "#CCEBC5", "#CCEBC5", "#CCEBC5", "#DECBE4", "#DECBE4" ,"#DECBE4" ,"#DECBE4"), rownames(dt))
row_cols
#生成注释信息:
col_anno <- HeatmapAnnotation(df = df,
                             show_annotation_name = F,
                             gp = gpar(col = 'white', lwd = 2),
                             col = list(cell = c(
                               'P1_C' = "#FBB4AE",
                               'P2_C' = "#B3CDE3",
                               'P3_C' = "#CCEBC5",
                               'P1_S' = "#DECBE4",
                               'P2_S' = "#FED9A6",
                               'P3_S' = "#FFFFCC" 
                              )))
#生成注释信息:
row_anno <- rowAnnotation(foo = anno_text(rownames(dt),
                                location = 0,
                                just = "left",
                                gp = gpar(fill = row_cols,
                                            col = "black",
                                            fontface = 'italic'),
                                width = max_text_width(rownames(dt))*1.25))

#绘图:
Heatmap(as.matrix(dt),
        width = unit(w, "cm"),
        height = unit(h, "cm"),
        name = 'expression',
        col = mycol,
        cluster_columns = F,
        cluster_rows = F,
        column_names_side = c('top'),
        column_names_rot = 60,
        row_title = 'T cell key markers',
        rect_gp = gpar(col = "white", lwd = 1.5),
        heatmap_legend_param = list(legend_height = unit(2.8, "cm"),
                            grid_width = unit(0.4, "cm"),
                            labels_gp = gpar(fontsize = 10)),
        top_annotation = col_anno, #添加列注释色块
        row_split = c("A","A","A","A","A","A","B","B","B","B","B","B","B","B","C","C", "C", "C", "C", "D", "D" ,"D" ,"D"), #对行切片
        row_gap = unit(2, "mm"),
        border = T,
        border_gp = gpar(col = "black", lwd = 1.2)) +
  row_anno #热图中添加背景注释色块

五、富集分析

KEGG富集分析

down_gene_entrezid <- bitr(down_gene, fromType = "SYMBOL",
            toType = c( "ENTREZID"),
            OrgDb = org.Hs.eg.db)
KEGGenrich_down <- enrichKEGG(gene = down_gene_entrezid$ENTREZID,
            organism = 'hsa', #物种是人
            #universe = gene_all,
            pvalueCutoff = 1,
            qvalueCutoff = 1)
#画图:KEGG-上调
library(ggsci)
kegg_up=kegg_up[kegg_up$pvalue<0.05,]
#柱状图
ggplot(data=kegg_up, aes(x=Description,y=GeneRatio)) +
  geom_bar(stat="identity",fill='#BC3C29') +
  xlab("KEGG_up") + ylab("GeneRatio") + theme_bw()+
  theme(axis.text.x=element_text(angle=0)) +
  scale_fill_nejm()+scale_color_nejm()+coord_flip()
  
#气泡图
dotplot(KEGGenrich_up)+scale_color_continuous(low='#bc3c29', high='#dadada')
#画图:KEGG-下调
library(ggsci)
kegg_down=kegg_down[kegg_down$pvalue<0.05,]

ggplot(data=kegg_down, aes(x=Description,y=GeneRatio)) +
  geom_bar(stat="identity",fill='#BC3C29') +
  xlab("KEGG_down") + ylab("GeneRatio") + theme_bw()+
  theme(axis.text.x=element_text(angle=0)) +
  scale_fill_nejm()+scale_color_nejm()+coord_flip()
  
dotplot(KEGGenrich_down)+scale_color_continuous(low='#bc3c29', high='#dadada')

GO富集分析

#上调基因
GOenrich_up <- enrichGO(gene = up_gene_entrezid$ENTREZID,
            OrgDb = 'org.Hs.eg.db',
            ont = 'All',
            #universe = gene_all,
            pvalueCutoff = 1,
            qvalueCutoff = 1)
GOenrich_up=DOSE::setReadable(GOenrich_up,OrgDb='org.Hs.eg.db',keyType='ENTREZID')

#下调基因
GOenrich_down <- enrichGO(gene = down_gene_entrezid$ENTREZID,
            OrgDb = 'org.Hs.eg.db',
            ont = 'All',
            #universe = gene_all,
            pvalueCutoff = 1,
            qvalueCutoff = 1)
GOenrich_down=DOSE::setReadable(GOenrich_down,OrgDb='org.Hs.eg.db',keyType='ENTREZID')


GO_up=GOenrich_up@result
GO_down=GOenrich_down@result


write.csv(GO_up,'E:/Research/12.GYY/RNAseq/result/GO_up.csv',quote = F)
write.csv(GO_down,'E:/Research/12.GYY/RNAseq/result/GO_down.csv',quote = F)
#画图:GO上调
GO_up=GO_up[GO_up$pvalue<0.05,]
#柱状图
ggplot(data=GO_up, aes(x=Description,y=GeneRatio)) +
  geom_bar(stat="identity",fill='#BC3C29') +
  xlab("GO_up") + ylab("GeneRatio") + theme_bw()+
  theme(axis.text.x=element_text(angle=0)) +
  scale_fill_nejm()+scale_color_nejm()+coord_flip()

dotplot(GOenrich_up)+scale_color_continuous(low='#bc3c29', high='#dadada')
#画图:GO下调

GO_down=GO_down[GO_down$pvalue<0.05,]
#柱状图
ggplot(data=GO_down, aes(x=Description,y=GeneRatio)) +
  geom_bar(stat="identity",fill='#BC3C29') +
  xlab("GO_up") + ylab("GeneRatio") + theme_bw()+
  theme(axis.text.x=element_text(angle=0)) +
  scale_fill_nejm()+scale_color_nejm()+coord_flip()

dotplot(GOenrich_down)+scale_color_continuous(low='#bc3c29', high='#dadada')

GSEA富集分析

library(tidyverse)
library(msigdbr)
library(clusterProfiler)
library(org.Mm.eg.db)
library(enrichplot)
library(RColorBrewer)
library(ggrepel)
library(ggplot2)
library(aplot)

GO_BP数据库

######### 1、选择GO_BP数据库 ##############
# choose C5 BP gene sets
genesets <-  msigdbr(species = "Homo sapiens",#Mus musculus
                     category = "C5",
                     subcategory = "BP")
# check
head(genesets,3)

# TERM2GENE
gmt <- genesets %>% dplyr::select(gs_name,gene_symbol)

#从差异基因列表导入基因
RNA_DEG$gene_name <- rownames(RNA_DEG)
RNA_DEG$log2FoldChange <- RNA_DEG$logFC
gene <- RNA_DEG[,-c(1:6)]
gene <- arrange(gene,desc(log2FoldChange)) #按照LogFC降序排列
#从txt文件读入
#gene <- read.table('./limma_DEG.txt',header = T) %>%  arrange(desc(log2FoldChange))

# check
head(gene,3)

geneList <- gene$log2FoldChange
names(geneList) <- gene$gene_name

# check
head(geneList,3)
# GO enrich
gseaRes <- GSEA(geneList = geneList,
                TERM2GENE = gmt,
                minGSSize    = 10,
                maxGSSize    = 500,
                pvalueCutoff = 1,
                pAdjustMethod = "BH", #一般选择 "BH" 或 "fdr",BH较严格,fdr较温和(计算的q小些)
                by = "fgsea", #"fgsea"或“DOSE”,传统的GSEA方法很慢,可以通过对随机的基因集抽样解决这个问题,也就是 a fast gene set enrichment analysis (FGSEA) 
                #nPerm = 2000,
                verbose      = FALSE)

# to dataframe
data_ga <- data.frame(gseaRes) %>%
  filter(pvalue < 0.05)

View(data_ga)

write.csv(data_ga, file = "E:/Research/12.GYY/RNAseq/result/gsea_result_gobp.csv")

KEGG_BP数据库

######### 2、选择KEGG_BP数据库 ##############
#h <- msigdbr(species = "Mus musculus")
#table(h$gs_subcat) #查看GO;KEGG等基因集
#h2 <- h[grep(pattern = "TP53",x = h$gs_name),]  #pattern参数内输入想要寻找的关键词

#获取KEGG基因集
genesets <-  msigdbr(species = "Homo sapiens",#Mus musculus
                     category = "C2",
                     subcategory = "KEGG")

# check
head(genesets,3)

# TERM2GENE
gmt <- genesets %>% dplyr::select(gs_name,gene_symbol)


#从差异基因列表导入基因
RNA_DEG$gene_name <- rownames(RNA_DEG)
RNA_DEG$log2FoldChange <- RNA_DEG$logFC
gene <- RNA_DEG[,-c(1:6)]
gene <- arrange(gene,desc(log2FoldChange)) #按照LogFC降序排列

#从txt文件读入
#gene <- read.table('./limma_DEG.txt',header = T) %>%
#  arrange(desc(log2FoldChange))

# check
head(gene,3)

geneList <- gene$log2FoldChange
names(geneList) <- gene$gene_name

# check
head(geneList,3)

# KEGG enrich
gseaRes <- GSEA(geneList = geneList,
                TERM2GENE = gmt,
                minGSSize    = 10,
                maxGSSize    = 500,
                pvalueCutoff = 1,
                pAdjustMethod = "BH",
                verbose      = FALSE)

# to dataframe
data_ga <- data.frame(gseaRes) %>%
  filter(pvalue < 0.05)

write.csv(data_ga, file = "E:/Research/12.GYY/RNAseq/result/gsea_result_kegg.csv")

HALLMARK数据库或自建基因集

######### 3、选择HALLMARK数据库或自建基因集 ##############

#HALLMARK数据库
genesets <-  msigdbr(species = "Homo sapiens",
                     category = "H")

# 1)从HALLMARK获取gmt
gmt <- genesets %>% dplyr::select(gs_name,gene_symbol)


# 2)导入自己构建的gmt
gmt <- read.table("./AEos_gene.txt")


# load gene
gene <- read.table('./limma_DEG.txt',header = T) %>%
  arrange(desc(log2FoldChange))

geneList <- gene$log2FoldChange
names(geneList) <- gene$gene_name

# KEGG enrich
gseaRes <- GSEA(geneList = geneList,
                TERM2GENE = gmt,
                minGSSize    = 10,
                maxGSSize    = 500,
                pvalueCutoff = 1,
                pAdjustMethod = "BH",
                verbose      = FALSE)



# to dataframe
data_ga <- data.frame(gseaRes) %>%
  filter(pvalue < 1)

data_ga <- write.csv(data_ga, file = "./AEos_gene_result.csv")#
#可视化
library(GseaVis)


#MDSC
gseaNb(object = gseaRes,
       geneSetID = 'AEos',
       subPlot = 2,
       addPval = T,
       pvalX = 0.75,pvalY = 0.8,
       pCol = 'black',
       pHjust = 0)



gseaNb(object = gseaRes,
       geneSetID = 'MDSC',
       subPlot = 2,
       addPval = T,
       pvalX = 0.75,pvalY = 0.8,
       pCol = 'black',
       pHjust = 0,
       addGene = T,
       markTopgene= T,
       topGeneN = 10,
       geneCol = '#4d4d4d')

六、免疫浸润分析

#使用IOBR包进行分析

Last updated