// R
library(tidyverse)
library(DESeq2)
library(cluster)
library(ggh4x)
vsd <- vst(dds, blind = FALSE)
# 1. vsd to dataframe
mat <- assay(vsd) %>%
as.data.frame() %>%
rownames_to_column(var="gene")
# 2. significant DEGs
DEG_DESeq2$gene <- row.names(DEG_DESeq2)
sig_genes <- DEG_DESeq2 %>%
filter(pvalue < 0.05, abs(log2FoldChange) > 1) %>%
pull(gene)
# 3. subset matrix
mat <- mat %>%
filter(gene %in% sig_genes) %>%
column_to_rownames(var="gene")
heatmap <- as.data.frame(t(scale(t(mat))))
pam_res <- pam(heatmap, k = 5)
clusters <- pam_res$clustering
# 构建分面条第颜色
strips <- strip_themed(
background_y = elem_list_rect(
fill=c("1"="#171236","2"="#5ca0bc","3"="#fcc103",#aca47c,#685704
"4"="#36998d","5"="#a661ff")),
background_x = elem_list_rect(fill=c("ctrl"="#125ed8","pos"="#02935f")))
heatmap %>% rownames_to_column("gene_id") %>%
mutate(cluster = factor(clusters[gene_id], levels = sort(unique(clusters)))) %>%
pivot_longer(cols = -c(gene_id, cluster),
names_to = "sample", values_to = "value") %>%
mutate(group = sub(".*_(.*)[0-9]+$", "\\1", sample)) %>%
arrange(group, sample) %>%
mutate(sample = factor(sample, levels = unique(sample)))%>%
ggplot(aes(sample,gene_id,fill=value)) +
geom_tile() +
facet_grid2(cluster~group,scale="free",strip=strips,switch="y")+
force_panelsizes(rows=table(clusters),respect=F) +
scale_fill_gradientn(colours = rev(RColorBrewer::brewer.pal(11,"RdBu")),
breaks=c(seq(-2,2,by=0.5)))+
scale_x_discrete(expand = c(0,0)) +
labs(fill="Z score") +
guides(fill = guide_colorbar(
barwidth = unit(0.6,"cm"), # 调整条宽度
barheight = unit(5,"cm"), # 调整条高度
direction = "vertical" # 纵向图例
)) +
theme(axis.text.y=element_blank(),
axis.text.x=element_text(color="black",angle = 90,vjust=0.5),
axis.ticks = element_blank(),
axis.title=element_blank(),
strip.text = element_text(color="white",face="bold"),
panel.spacing.y = unit(0.01,"cm"),
panel.spacing.x = unit(0.02,"cm"),
legend.position = "right", # 放在右侧
legend.box = "vertical",
legend.background = element_blank(),
legend.text = element_text(color="black"),
legend.key.height = unit(1,"null"))+ scale_fill_gradientn(
colours = rev(RColorBrewer::brewer.pal(11,"RdBu")),
breaks = seq(-4,4,1),
name = "Z score"
)
#8*8
#导出每个cluster的基因
library(dplyr)
cluster_df <- data.frame(
gene = names(clusters),
cluster = clusters
)
# 导出各个cluster的基因
cluster_df %>%
group_by(cluster) %>%
summarise(genes = paste(gene, collapse = ",")) %>%
write.csv("./clusters_all_genes.csv", row.names = FALSE, quote = FALSE)
# 分别KEGG富集分析
library(clusterProfiler)
library(org.Mm.eg.db) # 小鼠基因注释数据库
library(dplyr)
library(ggplot2)
library(tidyr)
# 1. cluster 数据框
cluster_df <- data.frame(
gene = names(clusters),
cluster = clusters,
stringsAsFactors = FALSE
)
# 2. gene symbol 转 EntrezID
gene2entrez <- bitr(cluster_df$gene, fromType="SYMBOL",
toType="ENTREZID", OrgDb=org.Mm.eg.db)
cluster_df <- cluster_df %>%
left_join(gene2entrez, by = c("gene" = "SYMBOL"))
# 3. KEGG 富集分析,每个 cluster 前10条
kegg_list <- list()
for(cl in sort(unique(cluster_df$cluster))) {
genes_entrez <- cluster_df %>%
filter(cluster == cl) %>%
pull(ENTREZID) %>% na.omit()
if(length(genes_entrez) > 0){
kegg_res <- enrichKEGG(
gene = genes_entrez,
organism = 'mmu',
pAdjustMethod = "BH",
qvalueCutoff = 0.05
)
if(!is.null(kegg_res) && nrow(kegg_res) > 0){
df <- as.data.frame(kegg_res)
df$cluster <- cl
# 按 p.adjust 排序并取前10
df <- df %>% arrange(p.adjust) %>% head(10)
# 去掉通路名后缀
df$Description <- str_replace(df$Description, " - Mus.*$", "")
kegg_list[[as.character(cl)]] <- df
}
}
}
# 4. 合并所有 cluster
kegg_all <- bind_rows(kegg_list)
# 5. 计算 gene ratio 数值
kegg_all <- kegg_all %>%
mutate(GeneRatioNum = as.numeric(sapply(strsplit(GeneRatio, "/"),
function(x) as.numeric(x[1])/as.numeric(x[2]))))
# 6. pathway 纵轴顺序,按照 cluster1→cluster5 出现顺序
pathway_order <- kegg_all %>%
arrange(cluster) %>%
distinct(Description) %>%
pull(Description)
kegg_all$Description <- factor(kegg_all$Description, levels = rev(pathway_order)) # 纵轴从上到下
# 7. 绘制气泡图
blue_white <- brewer.pal(11, "RdBu")[6:11] # 前6个颜色:深蓝→白
ggplot(kegg_all, aes(x = factor(cluster), y = Description)) +
geom_point(aes(size = GeneRatioNum, color = -log10(p.adjust))) +
scale_size_continuous(range = c(3, 10)) +
scale_color_gradientn(
colors = rev(blue_white), # 蓝到白
limits = c(0, max(-log10(kegg_all$p.adjust))),
name = "-log10(adj.P)"
) +
theme_bw() +
labs(
x = "Cluster",
y = "KEGG Pathway",
size = "Gene Ratio"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.text.y = element_text(size = 10),
panel.grid = element_blank(),
legend.key.height = unit(1.2, "cm"),
legend.key.width = unit(0.6, "cm")
)
#10*10