专题:R包画paper中常见的图

生物信息与R语言QQ群: 187923577

Figure and Code, mainly using other packages besides ggplot2

Tips: 看不清请刷新,换个颜色再看。

1. 比较干净的背景: +theme_bw(); 最干净的背景: +theme_classic()
2. 参数的解释: 生物慕课网
3. 本页面最顶/底部有生信QQ群号,欢迎加入讨论,严禁广告。

1. 使用 survminer 包可视化生存分析

详情查看githug上 survminer 包官方示例。

大佬写过 好几个高层包: ggpubr, factoextra, survminer, ggcorrplot, rstatix, datarium。

 # 展示的是最后一个代码的图。
# load data ####
library("survival")
library("survminer")
data("lung")
#write.table(lung, "lung.df.txt")
dim(lung) #228 10
head(lung) #sex:	Male=1 Female=2
#   inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
#1    3  306      2  74   1       1       90       100     1175      NA
#2    3  455      2  68   1       0       90        90     1225      15
#3    3 1010      1  56   1       0       90        90       NA      15
# inst:	Institution code
# time:	Survival time in days
# status:	censoring status 1=censored, 2=dead (用1/2编码是历史习惯)
# age:	Age in years
# sex:	Male=1 Female=2
# ph.ecog:	ECOG performance score as rated by the physician. 
#    0=asymptomatic, 
#    1= symptomatic but completely ambulatory, 
#    2= in bed <50% of the day, 
#    3= in bed > 50% of the day but not bedbound, 
#    4 = bedbound
# ph.karno:	Karnofsky performance score (bad=0-good=100) rated by physician
# pat.karno:	Karnofsky performance score as rated by patient
# meal.cal:	Calories consumed at meals
# wt.loss:	Weight loss in last six months

boxplot(time~status, data=lung)
#只需要三列: time, status, 分类变量
# 分类变量可以是表达值、某个组合打分等和阈值的比较,结果是二分类的即可。


# 看性别是否对生存期有影响
# 构建模型
fit = survfit(Surv(time, status) ~ sex, data=lung)
# 绘制原生KM曲线
plot(fit)
#可优化点:
#1)区分两条线的颜色和legend
#2)坐标轴,标题,主题优化
#3)Risk table
#4)P值,OR值,CI值等注释信息



#1) 线的颜色是啥?----
p1 = ggsurvplot(fit)
p1

#2) 坐标轴,标题,主题优化 ----
p2 = ggsurvplot(fit, data = lung,
                 surv.median.line = "hv",  #添加中位生存曲线
                 palette=c("red", "blue"), #更改线的颜色
                 legend.labs=c("Male","Female"),  #标签
                 legend.title="Treatment", #图例标题
                 title="Overall survival",  #标题
                 ylab="Cumulative survival (percentage)", xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124, censor.size = 2, conf.int = FALSE,  #删失点的形状和大小
                 break.x.by = 100  #横坐标间隔
)
p2
# 以上基本就完成了KM曲线颜色,线型大小,标签,横纵坐标,标题,删失点等的修改,Q2搞定!
# 注意:中位生存时间表示 50% 的个体尚存活的时间,而不是生存时间的中位数!



#3) Risk Table ----
p3 = ggsurvplot(fit, data = lung,
                 surv.median.line = "hv", #添加中位生存曲线
                 palette=c("red", "blue"),
                 legend.labs=c("Male","Female"), #标签
                 legend.title="Treatment",
                 title="Overall survival",
                 ylab="Cumulative survival (percentage)",xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124,censor.size = 2,conf.int = FALSE,
                 break.x.by = 100,
                 
                 risk.table = TRUE,tables.height = 0.2,
                 tables.theme = theme_cleantable(),
                 ggtheme = theme_bw())
p3
# 注 tables.height可调整为看起来“舒服”的高度
# 根据risk table 可以看出关键点的当前状态,Q3摆平!



#4) 添加注释信息 ----
# 添加KM的P值
P4 = ggsurvplot(fit, data = lung,
                 
                 pval = TRUE,#添加P值
                 pval.coord = c(0, 0.03), #调节Pval的位置
                 
                 surv.median.line = "hv", #添加中位生存曲线
                 palette=c("red", "blue"),
                 legend.labs=c("Male","Female"), #标签
                 legend.title="Treatment",
                 title="Overall survival",
                 ylab="Cumulative survival (percentage)",xlab = " Time (Days)", #更改横纵坐标
                 censor.shape = 124,censor.size = 2,conf.int = FALSE,
                 break.x.by = 100,
                 
                 risk.table = TRUE,tables.height = 0.2,
                 tables.theme = theme_cleantable(),
                 ggtheme = theme_bw())
P4
# pval.coord可以调节P值得位置


# 添加COX回归hazard ratio值等相关信息 ----
###添加HR ,CI ,P
res_cox = coxph(Surv(time, status) ~sex, data=lung)
p5=p3
p5$plot = p3$plot + 
  ggplot2::annotate("text",x = 90, y = 0.15, size=3,
                    label = paste("HR :",round(summary(res_cox)$conf.int[1],2))) +
  ggplot2::annotate("text",x = 90, y = 0.10, size=3,
                    label = paste("(","95%CI:",round(summary(res_cox)$conf.int[3],2),
                                  "-",round(summary(res_cox)$conf.int[4],2),")",sep = ""))+
  ggplot2::annotate("text",x = 90, y = 0.05, size=3,
                    label = paste("P:",round(summary(res_cox)$coef[5],4)))#+
  #ggplot2::theme( axis.text.x = element_text(face="bold", color="blue", size=8))
p5

# 添加其他信息
# 可类似上述annotation得方式,使用ggplot2添加文字,箭头,公式等其他信息

2. 使用 ggpubr 包球棍图 (棒棒糖图 Lollipop Chart)

就是 一根柱子加上一个圆,类似传统的柱状图,但是提供了更多的信息。

暂时无图。等学深入了再补充图。

library(ggpubr)
#微调数据,添加 name列,设置cyl位因子
dfm=mtcars
dfm$name=rownames(dfm)
dfm$cyl=as.factor(dfm$cyl)
head(dfm)

# 1.基本
ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",       # 按照cyl填充颜色
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), # 修改颜色
           sorting ="descending", #"ascending",                        
           add = "segments",        # 添加棒子
           ggtheme = theme_pubr(),  # 改变主题
           #rotate=T,
           xlab=""
)


# 2.横着,添加小球和数字
ggdotchart(dfm, x = "name", y = "mpg",
           color = "cyl",       # 按照cyl填充颜色
           palette = c("#00AFBB", "#E7B800", "#FC4E07"), # 修改颜色
           sorting = "descending",                      
           add = "segments",          # 添加棒子
           add.params = list(color = "lightgray", size = 1.5),#改变棒子参数
           rotate = TRUE,              # 方向转为垂直
           group = "cyl",                                
           dot.size = 6,               # 改变点的大小
           label = round(dfm$mpg),     # 添加label
           font.label = list(color = "white", size = 9, 
                             vjust = 0.5),   # 设置label参数
           ggtheme = theme_pubr(),           # 改变主题
           xlab=""
)

3. 使用 cowplot 包合并2个 ggplot2 对象并对齐y坐标

如果一个是分面的,则很难能否对齐坐标轴,但也不是不可能。参数 axis = "bt" 很关键。

# 预先准备好 Seurat(v4.0.4) 对象sce和各个类之间的差异基因top11。

> sce
An object of class Seurat 
13714 features across 745 samples within 1 assay 
Active assay: RNA (13714 features, 2000 variable features)
 3 dimensional reductions calculated: pca, umap, tsne

> head(top11)
# A tibble: 6 × 7
# Groups:   cluster [1]
      p_val avg_log2FC pct.1 pct.2 p_val_adj cluster     gene 
1 3.75e-112      1.09  0.912 0.592 5.14e-108 Naive CD4 T LDHB 
2 9.57e- 88      1.36  0.447 0.108 1.31e- 83 Naive CD4 T CCR7 
3 1.15e- 76      0.935 0.845 0.406 1.58e- 72 Naive CD4 T CD3D



# 开始画图
topN=top11 %>% group_by(cluster) %>% top_n(5, wt=avg_log2FC)
dim(topN)
head(topN)

# 使用 Seurat::DotPlot 画主图,并修饰主题。
g0_plot=DotPlot(sce, features=split(topN$gene, topN$cluster),
                cols= c("lightyellow", "red3") )+
  RotatedAxis()+theme(
    # 各个画板
    panel.border = element_rect(color="black"),#要边框
    panel.spacing = unit(1, "mm"), #画板间距

    axis.title = element_blank(), #去掉 坐标轴 label
    axis.text.y=element_blank(), #去掉y轴文字
  ); g0_plot


# 使用 ggplot2 画左边的文字和彩色圆圈。
colors=c("#96C3D8", "#5F9BBE", "#F5B375", "#C0937E", "#67A59B", "#A5D38F", "#4A9D47", "#F19294", "#E45D61", "#3377A9",
         "#BDA7CB", "#684797", "#8D75AF", "#CD9C9B", "#D62E2D", "#DA8F6F", "#F47D2F")
df1=data.frame(x=0, y= levels(sce), stringsAsFactors = F )
df1$y=factor(df1$y, levels = df1$y )
# df1$y
g1_left=ggplot(df1, aes(x,y, color=factor(y) ))+
  geom_point(size=6, show.legend = F)+
  scale_color_manual(values=colors)+
  theme_classic()+
  scale_x_continuous(expand=c(0,0))+
  theme(
    plot.margin =margin(r=0), #no margin on the right
    axis.title = element_blank(),
    axis.text.x = element_blank(),
    axis.ticks = element_blank(),
    axis.line = element_blank(),
    axis.text.y=element_text(size=12)
  ); g1_left


# 拼合图形
library(cowplot)
# https://wilkelab.org/cowplot/articles/aligning_plots.html
# we can align both the bottom and the top axis (axis = "bt").
plot_grid(g1_left, g0_plot, align ="h", axis="bt", rel_widths = c(1, 9))

4. 使用 grid 包修改 ggplot2 分面顶标签:去掉三面的方框线

This solution is based on grobs: find positions of "strip-t" (top strips) and then substitute the rect grobs with line grobs.

library(ggplot2)

p1=ggplot(iris, aes(Sepal.Length, Sepal.Width, color=Species))+
  geom_point(size=1.5)+
  theme_classic()+
  facet_grid(~Species,
             space = "free_x", #x轴宽度自由
             scales = "free_x")+ #x坐标轴范围自由
  theme(
    panel.border = element_rect(color="black", size=0.8, fill="#00112200"),
    strip.placement = "outside", #分面顶部标签和主图分离
    strip.background = element_rect(linetype = 1, size=2 ), #边框线条
    strip.text.x = element_text(margin = margin(0,0,0.2,0, "cm")), # 控制分面顶部框内边距
    # strip.background = element_rect(),
    axis.line = element_blank(), #不要坐标轴的线
    axis.ticks = element_blank(), #不要坐标轴的刻度
    axis.title = element_blank(), #不要坐标轴label
    axis.text.x=element_text(angle=60, hjust = 1), #旋转60度
); p1


# 去掉分面标签的3面的边
# https://stackoverflow.com/questions/54471816/remove-three-sides-of-border-around-ggplot-facet-strip-label

library(grid)
lg = linesGrob(x=unit(c(0,1),"npc"), y=unit(c(0,0)+0.2,"npc"),
                gp=gpar(col="red", lwd=3))

# grid.newpage(); grid.draw(lg) #预览效果


q = ggplotGrob(p1) #ggplot2 变 grid 对象
q$layout$name #先预览

# 替换
for (k in grep("strip-t",q$layout$name)) {
  q$grobs[[k]]$grobs[[1]]$children[[1]] = lg
}
# 画图
grid.draw(q)

5. 使用 grid 包修改 ggplot2 图例: 矩形改为圆角矩形

library(ggplot2)
head(mtcars)

cols=c("darkred", "orange","darkblue")

p1=ggplot(mtcars, aes(mpg, cyl, color=factor(gear) ))+
  geom_line()+
  facet_grid(~gear)+
  theme_bw()+
  theme(
    strip.background = element_blank(), #rm分面标题的背景
    
    legend.position = "top", #图例在上面
    legend.justification = "left", #图例左对齐
    legend.margin=margin(t = 0, r = 0, b = -8, l = 0, unit = "pt"), #move legend litter lower
    legend.title = element_text( margin=margin(r=10) ),
    legend.text = element_text( margin = margin(r = 15)), #每个文字小盒子的外间距
    legend.spacing.x = unit(0, 'mm'), #一个颜色块和对应文字的距离
    legend.key.width = unit(2, "mm"), #control legend width and height
    legend.key.height = unit(3, "mm"), #最大空间的高度(可能用不到,但最多这么多)
  )+
  scale_color_manual(name="Gear", values=cols)+
  guides(color = guide_legend(override.aes = list(size = 3, title="Gear")));p1

{
	#1. ggplot 2 grob
	q = ggplotGrob(p1)
	q$layout$name
	length(q$layout$name)

	#2. define round rect Grob
	library(grid)
	getRR=function(color="red"){
	  roundrectGrob(width=0.5, height=0.9, 
					r=unit(0.3, "snpc"),
					gp=gpar(col=color, fill=color))
	}
	#3. replace ggplot legend box with Grob
	q$grobs[[20]]$grobs[[1]]
	q$grobs[[20]]$grobs[[1]]$grobs[[3]]=getRR(cols[1])
	q$grobs[[20]]$grobs[[1]]$grobs[[4]]=zeroGrob()
	q$grobs[[20]]$grobs[[1]]$grobs[[5]]=getRR(cols[2])
	q$grobs[[20]]$grobs[[1]]$grobs[[6]]=zeroGrob()
	q$grobs[[20]]$grobs[[1]]$grobs[[7]]=getRR(cols[3])
	q$grobs[[20]]$grobs[[1]]$grobs[[8]]=zeroGrob()
	
	#4. draw
	grid.newpage()
	grid.draw(q)
}
# https://stackoverflow.com/questions/46683956/create-two-legends-for-one-ggplot2-graph-and-modify-them

n. xx图 todo


欢迎互相切磋,共同进步: 秋秋号 314649593, 请备注大名、来意。
秋秋群: 生物信息与R语言 187923577 禁止营销活动,否则飞机票。
bioToolKit is part of 生物慕课网 www.biomooc.com

GoTop