专题：R包画paper中常见的图

生物信息与R语言QQ群: 187923577

目录 Contents：

1. 使用 survminer 包可视化生存分析
2. 使用 ggpubr 包球棍图 (棒棒糖图 Lollipop Chart)
3. 使用 cowplot 包合并2个 ggplot2 对象并对齐y坐标
4. 使用 grid 包修改 ggplot2 分面顶标签:去掉三面的方框线
5. 使用 grid 包修改 ggplot2 图例: 矩形改为圆角矩形
6. 使用 pheatmap 包绘制热图: 显示样本分类，设置颜色和间隔
n. xx图 todo

Figure and Code, mainly using other packages besides ggplot2

Tips: 看不清请刷新，换个颜色再看。

1. 比较干净的背景: +theme_bw(); 最干净的背景: +theme_classic()
2. 参数的解释: 生物慕课网
3. 本页面最顶/底部有生信QQ群号，欢迎加入讨论，严禁广告。

1. 使用 survminer 包可视化生存分析

详情查看githug上 survminer 包官方示例。

大佬写过好几个高层包: ggpubr, factoextra, survminer, ggcorrplot, rstatix, datarium。

 # 展示的是最后一个代码的图。
# load data ####
library("survival")
library("survminer")
data("lung")
#write.table(lung, "lung.df.txt")
dim(lung) #228 10
head(lung) #sex:	Male=1 Female=2
#   inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
#1    3  306      2  74   1       1       90       100     1175      NA
#2    3  455      2  68   1       0       90        90     1225      15
#3    3 1010      1  56   1       0       90        90       NA      15
# inst:	Institution code
# time:	Survival time in days
# status:	censoring status 1=censored, 2=dead (用1/2编码是历史习惯)
# age:	Age in years
# sex:	Male=1 Female=2
# ph.ecog:	ECOG performance score as rated by the physician. 
#    0=asymptomatic, 
#    1= symptomatic but completely ambulatory, 
#    2= in bed <50% of the day, 
#    3= in bed > 50% of the day but not bedbound, 
#    4 = bedbound
# ph.karno:	Karnofsky performance score (bad=0-good=100) rated by physician
# pat.karno:	Karnofsky performance score as rated by patient
# meal.cal:	Calories consumed at meals
# wt.loss:	Weight loss in last six months

boxplot(time~status, data=lung)
#只需要三列: time, status, 分类变量
# 分类变量可以是表达值、某个组合打分等和阈值的比较，结果是二分类的即可。


# 看性别是否对生存期有影响
# 构建模型
fit = survfit(Surv(time, status) ~ sex, data=lung)
# 绘制原生KM曲线
plot(fit)
#可优化点：
#1）区分两条线的颜色和legend
#2）坐标轴，标题，主题优化
#3）Risk table
#4）P值，OR值，CI值等注释信息



#1) 线的颜色是啥？----
p1 = ggsurvplot(fit)
p1

#2) 坐标轴，标题，主题优化 ----
p2 = ggsurvplot(fit, data = lung,
                 surv.median.line = "hv",  #添加中位生存曲线
                 palette=c("red", "blue"), #更改线的颜色
                 legend.labs=c("Male","Female"),  #标签
                 legend.title="Treatment", #图例标题
                 title="Overall survival",  #标题
                 ylab="Cumulative survival (percentage)", xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124, censor.size = 2, conf.int = FALSE,  #删失点的形状和大小
                 break.x.by = 100  #横坐标间隔
)
p2
# 以上基本就完成了KM曲线颜色，线型大小，标签，横纵坐标，标题，删失点等的修改，Q2搞定！
# 注意：中位生存时间表示 50％ 的个体尚存活的时间，而不是生存时间的中位数！



#3) Risk Table ----
p3 = ggsurvplot(fit, data = lung,
                 surv.median.line = "hv", #添加中位生存曲线
                 palette=c("red", "blue"),
                 legend.labs=c("Male","Female"), #标签
                 legend.title="Treatment",
                 title="Overall survival",
                 ylab="Cumulative survival (percentage)",xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124,censor.size = 2,conf.int = FALSE,
                 break.x.by = 100,
                 
                 risk.table = TRUE,tables.height = 0.2,
                 tables.theme = theme_cleantable(),
                 ggtheme = theme_bw())
p3
# 注 tables.height可调整为看起来“舒服”的高度
# 根据risk table 可以看出关键点的当前状态，Q3摆平！



#4) 添加注释信息 ----
# 添加KM的P值
P4 = ggsurvplot(fit, data = lung,
                 
                 pval = TRUE,#添加P值
                 pval.coord = c(0, 0.03), #调节Pval的位置
                 
                 surv.median.line = "hv", #添加中位生存曲线
                 palette=c("red", "blue"),
                 legend.labs=c("Male","Female"), #标签
                 legend.title="Treatment",
                 title="Overall survival",
                 ylab="Cumulative survival (percentage)",xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124,censor.size = 2,conf.int = FALSE,
                 break.x.by = 100,
                 
                 risk.table = TRUE,tables.height = 0.2,
                 tables.theme = theme_cleantable(),
                 ggtheme = theme_bw())
P4
# pval.coord可以调节P值得位置


# 添加COX回归hazard ratio值等相关信息 ----
###添加HR ,CI ,P
res_cox = coxph(Surv(time, status) ~sex, data=lung)
p5=p3
p5$plot = p3$plot + 
  ggplot2::annotate("text",x = 90, y = 0.15, size=3,
                    label = paste("HR :",round(summary(res_cox)$conf.int[1],2))) +
  ggplot2::annotate("text",x = 90, y = 0.10, size=3,
                    label = paste("(","95%CI:",round(summary(res_cox)$conf.int[3],2),
                                  "-",round(summary(res_cox)$conf.int[4],2),")",sep = ""))+
  ggplot2::annotate("text",x = 90, y = 0.05, size=3,
                    label = paste("P:",round(summary(res_cox)$coef[5],4)))#+
  #ggplot2::theme( axis.text.x = element_text(face="bold", color="blue", size=8))
p5

# 添加其他信息
# 可类似上述annotation得方式，使用ggplot2添加文字，箭头，公式等其他信息

2. 使用 ggpubr 包球棍图 (棒棒糖图 Lollipop Chart)

就是一根柱子加上一个圆，类似传统的柱状图，但是提供了更多的信息。

暂时无图。等学深入了再补充图。

library(ggpubr)
#微调数据，添加 name列，设置cyl位因子
dfm=mtcars
dfm$name=rownames(dfm)
dfm$cyl=as.factor(dfm$cyl)
head(dfm)

# 1.基本
ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",       # 按照cyl填充颜色
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), # 修改颜色
           sorting ="descending", #"ascending",                        
           add = "segments",        # 添加棒子
           ggtheme = theme_pubr(),  # 改变主题
           #rotate=T,
           xlab=""
)


# 2.横着，添加小球和数字
ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",       # 按照cyl填充颜色
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), # 修改颜色
           sorting = "descending",                      
           add = "segments",          # 添加棒子
           add.params = list(color = "lightgray", size = 1.5),#改变棒子参数
           rotate = TRUE,              # 方向转为垂直
           group = "cyl",                                
           dot.size = 6,               # 改变点的大小
           label = round(dfm$mpg),     # 添加label
           font.label = list(color = "white", size = 9, 
                             vjust = 0.5),   # 设置label参数
           ggtheme = theme_pubr(),           # 改变主题
           xlab=""
)

3. 使用 cowplot 包合并2个 ggplot2 对象并对齐y坐标

如果一个是分面的，则很难能否对齐坐标轴，但也不是不可能。参数 axis = "bt" 很关键。

# 预先准备好 Seurat(v4.0.4) 对象sce和各个类之间的差异基因top11。

> sce
An object of class Seurat 
13714 features across 745 samples within 1 assay 
Active assay: RNA (13714 features, 2000 variable features)
 3 dimensional reductions calculated: pca, umap, tsne

> head(top11)
# A tibble: 6 × 7
# Groups:   cluster [1]
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster     gene 
1 3.75e-112      1.09  0.912 0.592 5.14e-108 Naive CD4 T LDHB 
2 9.57e- 88      1.36  0.447 0.108 1.31e- 83 Naive CD4 T CCR7 
3 1.15e- 76      0.935 0.845 0.406 1.58e- 72 Naive CD4 T CD3D



# 开始画图
topN=top11 %>% group_by(cluster) %>% top_n(5, wt=avg_log2FC)
dim(topN)
head(topN)

# 使用 Seurat::DotPlot 画主图，并修饰主题。
g0_plot=DotPlot(sce, features=split(topN$gene, topN$cluster),
                cols= c("lightyellow", "red3") )+
  RotatedAxis()+theme(
    # 各个画板
    panel.border = element_rect(color="black"),#要边框
    panel.spacing = unit(1, "mm"), #画板间距

    axis.title = element_blank(), #去掉 坐标轴 label
    axis.text.y=element_blank(), #去掉y轴文字
  ); g0_plot


# 使用 ggplot2 画左边的文字和彩色圆圈。
colors=c("#96C3D8", "#5F9BBE", "#F5B375", "#C0937E", "#67A59B", "#A5D38F", "#4A9D47", "#F19294", "#E45D61", "#3377A9",
         "#BDA7CB", "#684797", "#8D75AF", "#CD9C9B", "#D62E2D", "#DA8F6F", "#F47D2F")
df1=data.frame(x=0, y= levels(sce), stringsAsFactors = F )
df1$y=factor(df1$y, levels = df1$y )
# df1$y
g1_left=ggplot(df1, aes(x,y, color=factor(y) ))+
  geom_point(size=6, show.legend = F)+
  scale_color_manual(values=colors)+
  theme_classic()+
  scale_x_continuous(expand=c(0,0))+
  theme(
    plot.margin =margin(r=0), #no margin on the right
    axis.title = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks = element_blank(),
    axis.line = element_blank(),
    axis.text.y=element_text(size=12)
  ); g1_left


# 拼合图形
library(cowplot)
# https://wilkelab.org/cowplot/articles/aligning_plots.html
# we can align both the bottom and the top axis (axis = "bt").
plot_grid(g1_left, g0_plot, align ="h", axis="bt", rel_widths = c(1, 9))

4. 使用 grid 包修改 ggplot2 分面顶标签:去掉三面的方框线

This solution is based on grobs: find positions of "strip-t" (top strips) and then substitute the rect grobs with line grobs.

library(ggplot2)

p1=ggplot(iris, aes(Sepal.Length, Sepal.Width, color=Species))+
  geom_point(size=1.5)+
  theme_classic()+
  facet_grid(~Species,
             space = "free_x", #x轴宽度自由
             scales = "free_x")+ #x坐标轴范围自由
  theme(
    panel.border = element_rect(color="black", size=0.8, fill="#00112200"),
    strip.placement = "outside", #分面顶部标签和主图分离
    strip.background = element_rect(linetype = 1, size=2 ), #边框线条
    strip.text.x = element_text(margin = margin(0,0,0.2,0, "cm")), # 控制分面顶部框内边距
    # strip.background = element_rect(),
    axis.line = element_blank(), #不要坐标轴的线
    axis.ticks = element_blank(), #不要坐标轴的刻度
    axis.title = element_blank(), #不要坐标轴label
    axis.text.x=element_text(angle=60, hjust = 1), #旋转60度
); p1


# 去掉分面标签的3面的边
# https://stackoverflow.com/questions/54471816/remove-three-sides-of-border-around-ggplot-facet-strip-label

library(grid)
lg = linesGrob(x=unit(c(0,1),"npc"), y=unit(c(0,0)+0.2,"npc"),
                gp=gpar(col="red", lwd=3))

# grid.newpage(); grid.draw(lg) #预览效果


q = ggplotGrob(p1) #ggplot2 变 grid 对象
q$layout$name #先预览

# 替换
for (k in grep("strip-t",q$layout$name)) {
  q$grobs[[k]]$grobs[[1]]$children[[1]] = lg
}
# 画图
grid.draw(q)

5. 使用 grid 包修改 ggplot2 图例: 矩形改为圆角矩形

library(ggplot2)
head(mtcars)

cols=c("darkred", "orange","darkblue")

p1=ggplot(mtcars, aes(mpg, cyl, color=factor(gear) ))+
  geom_line()+
  facet_grid(~gear)+
  theme_bw()+
  theme(
    strip.background = element_blank(), #rm分面标题的背景
    
    legend.position = "top", #图例在上面
    legend.justification = "left", #图例左对齐
    legend.margin=margin(t = 0, r = 0, b = -8, l = 0, unit = "pt"), #move legend litter lower
    legend.title = element_text( margin=margin(r=10) ),
    legend.text = element_text( margin = margin(r = 15)), #每个文字小盒子的外间距
    legend.spacing.x = unit(0, 'mm'), #一个颜色块和对应文字的距离
    legend.key.width = unit(2, "mm"), #control legend width and height
    legend.key.height = unit(3, "mm"), #最大空间的高度(可能用不到，但最多这么多)
  )+
  scale_color_manual(name="Gear", values=cols)+
  guides(color = guide_legend(override.aes = list(size = 3, title="Gear")));p1

{
	#1. ggplot 2 grob
	q = ggplotGrob(p1)
	q$layout$name
	length(q$layout$name)

	#2. define round rect Grob
	library(grid)
	getRR=function(color="red"){
	  roundrectGrob(width=0.5, height=0.9, 
					r=unit(0.3, "snpc"),
					gp=gpar(col=color, fill=color))
	}
	#3. replace ggplot legend box with Grob
	q$grobs[[20]]$grobs[[1]]
	q$grobs[[20]]$grobs[[1]]$grobs[[3]]=getRR(cols[1])
	q$grobs[[20]]$grobs[[1]]$grobs[[4]]=zeroGrob()
	q$grobs[[20]]$grobs[[1]]$grobs[[5]]=getRR(cols[2])
	q$grobs[[20]]$grobs[[1]]$grobs[[6]]=zeroGrob()
	q$grobs[[20]]$grobs[[1]]$grobs[[7]]=getRR(cols[3])
	q$grobs[[20]]$grobs[[1]]$grobs[[8]]=zeroGrob()
	
	#4. draw
	grid.newpage()
	grid.draw(q)
}
# https://stackoverflow.com/questions/46683956/create-two-legends-for-one-ggplot2-graph-and-modify-them

6.使用 pheatmap 包绘制热图: 显示样本分类，设置颜色和间隔

pheatmap 的更多教程：参数示例 |

fontsize参数设定标签字体大小，filename参数设定图片保存名称: pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf")

# 1. 基因子集
apa_factors=list(
  CPSF=c("CPSF1","CPSF2","CPSF3", "CPSF3L", "CPSF4","CPSF4L","CPSF6","CPSF7"),
  CSTF=c("CSTF1","CSTF2","CSTF2T","CSTF3"),
  PABP=c("PABPC1","PABPC1L","PABPC3","PABPC4","PABPC4L","PABPC5","PABPN1"),
  PAPOL=c("PAPOLG", "PAPOLA"),
  other=c("SYMPK", "RBBP6", "WDR33", "FIP1L1", "NUDT21","CLP1", "PCF11")  #无法归类
)

tmp.genes=unique(unlist(c( apa_factors, 
      "CD34", "KIT", "SOCS2", "GATA1", "MYC",  #stem cell
      "CSF1RA", "ITGAM", "LYZ", "LST1", "SIRPA", "TYROBP", "CD14", #monocyte marker
      "CCNE2", #"MKI67", "CCNE2", "CCNB1", "CCNB2", #cell cycle?
      "BCAT1", "CBX5", "NPR3")) ) #Stem cell marker


# 2. get tpm
dat.heatmap=gene.tpm[ intersect(rownames(gene.tpm), tmp.genes),]
# rm all 0 rows
dat.heatmap=dat.heatmap[rowSums(dat.heatmap)>0,]
head(dat.heatmap[,1:5])


# 3. plot
library(pheatmap) #https://www.jianshu.com/p/1c55ea64ff3f
# anno columns
annotation_col = data.frame(
  type = gene.long$type,
  row.names = rownames(gene.long)
)
# set colors
ann_colors = list(
  type = c('wt'="#ed553b", 'kd1'="#99b433", 'kd2'="#00a300")
)

# for heatmap, use log scale
pheatmap(log(dat.heatmap+1), 
         border_color = "white", 
         color = colorRampPalette(c("navy", "white", "firebrick3"))(50), #自定义颜色
         scale="row",
         cluster_cols = T,
         #cluster_rows = F,
         annotation_col = annotation_col, #set anno for column
         annotation_colors = ann_colors, #set colors
         show_colnames = F,
         
         filename =paste0("FIP1L1_KD/Kasumi-1_FIP1L1_KD_pheatmap-3.pdf"),width=3.9, height=6.5,
         main="FIP1L1 knock down in Kasumi-1",
         clustering_method = "ward.D2") #聚类方法

示例2: pheatmap 聚类并设置间隔

关键参数: gaps_row = c(1,3,7, 10) 适用于非聚类时；cutree_row=5, cutree_cols=5, 适用于聚类时

library(pheatmap)
set.seed(2025)
mat = matrix(rnorm(100), nrow=20)
rownames(mat)=paste0("cell", 1:nrow(mat))
# 相关系数
mat.cor=cor( t(mat), method="spearman")
colnames(mat.cor)=rownames(mat)

# 列注释: 区分前10个ctrl组blue，后10个细胞treat组red
annotation_col = data.frame(
  id=colnames(mat.cor),
  group=c( rep("ctrl", 10), rep("treat", 10) ), 
  row.names=1
)
# 设置颜色
ann_colors = list(
  group = c(ctrl = "navy", treat = "deeppink")
)

# 热图
library(grid)
#ComplexHeatmap::pheatmap(mat.cor,
p1=pheatmap::pheatmap(mat.cor,
         #border_color = "white",
         border_color = NA,
         # 加方框：失败
         #add_geom = "rectangles",
         #rect_gp = gpar(fill = "transparent", col = "red", lwd = 3),
         #rect_row = c(1, 5), rect_col = c(2, 7),
         show_rownames = TRUE,
         #gaps_row = c(1,3,7, 10), #适用于非聚类时
         cutree_row=5, cutree_cols=5, #适用于聚类时
         clustering_method = "ward.D2",
         annotation_col = annotation_col, #annotation_row = annotation_row,
         annotation_colors = ann_colors,
         #color = c(colorRampPalette(colors = c("white","yellow"))(20),colorRampPalette(colors = c("yellow","firebrick3"))(20)),
         show_colnames = TRUE, main="cutree demo")
#保存pdf
pdf(paste0("D://other//demo.heatmap.pdf"), width=6, height=5)
grid::grid.newpage()
grid::grid.draw(p1$gtable)
dev.off()

n. xx图 todo

欢迎互相切磋，共同进步: 秋秋号 314649593, 请备注大名、来意。
秋秋群: 生物信息与R语言 187923577 禁止营销活动，否则飞机票。
bioToolKit is part of 生物慕课网 www.biomooc.com

GoTop