一键完成单因素方差分析及可视化的R脚本

library(optparse)
library(tidyverse)
library(agricolae)
library(reshape2)
library(ggplot2)
library(ggpubr)
mytheme=theme(
  panel.grid.major=element_blank(),
  panel.grid.minor=element_blank(),
  plot.title = element_text(vjust = -8.5,hjust = 0.1),
  axis.title.y =element_text(size = 20,face = "bold",colour = "black"),
  axis.title.x =element_text(size = 24,face = "bold",colour = "black"),
  axis.text = element_text(size = 20,face = "bold"),
  axis.text.x = element_text(colour = "black",size = 14),
  axis.text.y = element_text(colour = "black",size = 14),
  legend.text = element_text(size = 15,face = "bold"),
  legend.position = "none"#是否删除图例) 
)
option_list=list(
  make_option(c("-f","--file"),type = "character",default = FALSE,
              help = "The input file"),
  make_option(c("-d","--depvar"),type = "character",default = FALSE,
              help="The column name of the dependent variable"),
  make_option(c("-i","--indepvar"),type="character",default=FALSE,
              help="The column name of the independent variable"),
  make_option(c("-t","--type"),type = "character",default = FALSE,
              help = "Type of drawing: boxplot or barplot"),
  make_option(c("-o","--out"),type = "character",default = FALSE,
              help = "the out put file name")
)
opt = parse_args(OptionParser(option_list = option_list, usage = "This Script is use for Analysis of variance and plotting"))
###参数检验
if(opt$type != "boxplot" && opt$type != "barplot"){
  print("Please input the right type of drawing:boxplot or barplot!")
  stop("Please input the right type of drawing:boxplot or barplot!")
}

out_name=paste(opt$out,"pdf",sep = ".")
#数据列从第一列开始是ID,第二列是分组信息,剩下的列均为数据列
df=read.table(opt$file,sep = "\t",header = T)
colname_list=colnames(df)
if(opt$depvar %in% colname_list == FALSE){
  print("Please input the correct column name of the dependent variable")
  print(colname_list)
  stop("Please input the correct column name of the dependent variable")
}

depvar_index=which(colname_list==opt$depvar)
indepvar_index=which(colname_list==opt$indepvar)
ss = df[depvar_index]
colnames(ss) = c("count")
ss$group = df[,indepvar_index]
# 正态性检验 Shapiro-Wilk normality test,保存p-value
normality=shapiro.test(ss$count)
p1 = normality$p.value
# 方差齐性检验 Bartlett test of homogeneity of variances,保存p-value
homo = bartlett.test(count~group, data = ss)
p2 = homo$p.value
model = aov(count~group, data = ss)
###绘制柱状图


if(opt$type=="barplot"){
  if (p1 > 0.05 & p2 > 0.05) {
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    wtx1 = summary(model)
    wtx2 = wtx1[[1]]
    wtx3 = wtx2[5]
    # 条件2. anova存在显著差异分组
    if ( wtx3$`Pr(>F)`[1]< 0.05) {
      # 进行多重比较,不矫正P值
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      p=ggplot(aa , aes(x = group, y = mean,colour= group)) +
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") +
        geom_text(aes(label = groups,y=ymax, x = group,vjust = -0.3,size = 6))+
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if ( wtx3$`Pr(>F)`[1]>= 0.05){#  anova不存在显著差异分组
      out = LSD.test(model,"group", p.adj="none")
      aa = out$groups
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      res = round(wtx3$`Pr(>F)`[1],3)
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) +
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") +
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
  }else if( p1 <.05| p2 <.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    krusk=compare_means(count~group, data = ss, method = "kruskal.test")
    sumkrusk=as.data.frame(krusk)
    if ( sumkrusk[3]< 0.05) {
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) + 
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") + 
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"), y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"kruskal.test",sumkrusk[3],sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(sumkrusk[3] >= 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      wen1 = as.data.frame(tapply(ss$count,ss$group,mean,na.rm=TRUE))
      wen2 = as.data.frame(tapply(ss$count,ss$group,sd,na.rm=TRUE))
      went = cbind(wen1,wen2)
      wentao = merge(aa,went, by="row.names",all=F)
      colnames(wentao) = c(colnames(wentao[1:4]),"mean" ,"SD")
      aa = mutate(wentao, ymin = mean - SD, ymax =  mean + SD)
      a = max(aa$mean)*1.2
      mi=c("#1B9E77" ,"#D95F02", "#7570B3","#E7298A")
      p = ggplot(aa , aes(x = group, y = mean,colour= group)) + 
        geom_bar(aes(colour= group,fill = group),stat = "identity", width = 0.4,position = "dodge") + 
        geom_errorbar(aes(ymin=ymin,ymax=ymax),colour="black",width=0.1,size = 1)+
        scale_y_continuous(expand = c(0,0),limits = c(0,a))+
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"kruskal.test",sumkrusk[3],sep = ":"))+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
    }
  
}else if(opt$type=="boxplot"){
  if (p1>.05& p2 >.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    wtx1 = summary(model)
    wtx2 = wtx1[[1]]
    wtx3 = wtx2[5]
    if ( wtx3$`Pr(>F)`[1]< 0.05) {
      out = LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值
      aa = out$group#结果显示:标记字母法
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i," group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+
        geom_text(data=data_box, aes(x=group, y=y, color=group, label= stat)) +
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
        }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(wtx3$`Pr(>F)`[1]>= 0.05){
      out = LSD.test(model,"group", p.adj="none")#进行多重比较,不矫正P值
      aa = out$group#结果显示:标记字母法
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      res = round(wtx3$`Pr(>F)`[1],3)
      p = ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"box", sep = "_"),
             y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
      }
   
  }else if (p1 <.05| p2 <.05){
    p1 = round(p1,3)
    p2 = round(p2,3)
    name_i = opt$depvar
    krusk=compare_means(count~group, data = ss, method = "kruskal.test")
    sumkrusk=as.data.frame(krusk)
    if( sumkrusk[3]< 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      wtq = levels(as.factor(df$group))
      lis = combn(wtq, 2)
      print(lis)
      x =lis
      my_comparisons = tapply(x,rep(1:ncol(x),each=nrow(x)),function(i)i)
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"of all group", sep = "_"),y="group",
             title = paste("Normality test",p1,"Homogeneity of variance",p2,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+
        stat_compare_means()+
        stat_compare_means(comparisons=my_comparisons,label = "p.signif",hide.ns = F)+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }else if(sumkrusk[3] >= 0.05){
      out = LSD.test(model,"group", p.adj="none")
      aa = out$group
      aa$group = row.names(aa)
      a = max(aa$count)*1.2
      data_box = df[,c(1,indepvar_index,depvar_index)]
      colnames(data_box) = c("ID" , "group","dd" )
      stat = out$groups
      data_box$stat=stat[as.character(data_box$group),]$groups
      max=max(data_box[,c("dd")])
      min=min(data_box[,c("dd")])
      x = data_box[,c("group","dd")]
      y = x %>% group_by(group) %>% summarise_(Max=paste('max(',"dd",')',sep=""))
      y=as.data.frame(y)
      rownames(y)=y$group
      data_box$y=y[as.character(data_box$group),]$Max + (max-min)*0.05
      res = round(sumkrusk[3],3)
      p=ggplot(data_box, aes(x=group, y=data_box[["dd"]], color=group)) +
        geom_boxplot(alpha=1, outlier.size=0, size=0.7, width=0.5, fill="transparent") +
        labs(x=paste(name_i,"box", sep = "_"),
             y="group",title = paste("Normality test",p1,"Homogeneity of variance",p2,"aov",res,sep = ":"))+
        geom_jitter( position=position_jitter(0.17), size=1, alpha=0.7)+theme(legend.position="none")+mytheme
      if (length(unique(data_box$group))>3){    
        p=p+theme(axis.text.x=element_text(angle=45,vjust=1, hjust=1))
      }
      ggsave(out_name, p, width = 8.3, height = 5.8)
    }
  }
  
}
    

脚本有五个参数

-f:输出的数据,第一列是样本名,第二列是自变量也就是分组信息,第三列至以后就是因变量,就是分组效应

-d:因变量的列名

-i:自变量的列名

-t:选择可视化的类型,箱线图或者条形图

-o:输出文件名称的前缀

使用示例:

Rscript aov.R -f input2.txt -d response -i trt -t boxplot -o 123
image.png
©著作权归作者所有,转载或内容合作请联系作者
  • 序言:七十年代末,一起剥皮案震惊了整个滨河市,随后出现的几起案子,更是在滨河造成了极大的恐慌,老刑警刘岩,带你破解...
    沈念sama阅读 199,519评论 5 468
  • 序言:滨河连续发生了三起死亡事件,死亡现场离奇诡异,居然都是意外死亡,警方通过查阅死者的电脑和手机,发现死者居然都...
    沈念sama阅读 83,842评论 2 376
  • 文/潘晓璐 我一进店门,熙熙楼的掌柜王于贵愁眉苦脸地迎上来,“玉大人,你说我怎么就摊上这事。” “怎么了?”我有些...
    开封第一讲书人阅读 146,544评论 0 330
  • 文/不坏的土叔 我叫张陵,是天一观的道长。 经常有香客问我,道长,这世上最难降的妖魔是什么? 我笑而不...
    开封第一讲书人阅读 53,742评论 1 271
  • 正文 为了忘掉前任,我火速办了婚礼,结果婚礼上,老公的妹妹穿的比我还像新娘。我一直安慰自己,他们只是感情好,可当我...
    茶点故事阅读 62,646评论 5 359
  • 文/花漫 我一把揭开白布。 她就那样静静地躺着,像睡着了一般。 火红的嫁衣衬着肌肤如雪。 梳的纹丝不乱的头发上,一...
    开封第一讲书人阅读 48,027评论 1 275
  • 那天,我揣着相机与录音,去河边找鬼。 笑死,一个胖子当着我的面吹牛,可吹牛的内容都是我干的。 我是一名探鬼主播,决...
    沈念sama阅读 37,513评论 3 390
  • 文/苍兰香墨 我猛地睁开眼,长吁一口气:“原来是场噩梦啊……” “哼!你这毒妇竟也来了?” 一声冷哼从身侧响起,我...
    开封第一讲书人阅读 36,169评论 0 254
  • 序言:老挝万荣一对情侣失踪,失踪者是张志新(化名)和其女友刘颖,没想到半个月后,有当地人在树林里发现了一具尸体,经...
    沈念sama阅读 40,324评论 1 294
  • 正文 独居荒郊野岭守林人离奇死亡,尸身上长有42处带血的脓包…… 初始之章·张勋 以下内容为张勋视角 年9月15日...
    茶点故事阅读 35,268评论 2 317
  • 正文 我和宋清朗相恋三年,在试婚纱的时候发现自己被绿了。 大学时的朋友给我发了我未婚夫和他白月光在一起吃饭的照片。...
    茶点故事阅读 37,299评论 1 329
  • 序言:一个原本活蹦乱跳的男人离奇死亡,死状恐怖,灵堂内的尸体忽然破棺而出,到底是诈尸还是另有隐情,我是刑警宁泽,带...
    沈念sama阅读 32,996评论 3 315
  • 正文 年R本政府宣布,位于F岛的核电站,受9级特大地震影响,放射性物质发生泄漏。R本人自食恶果不足惜,却给世界环境...
    茶点故事阅读 38,591评论 3 303
  • 文/蒙蒙 一、第九天 我趴在偏房一处隐蔽的房顶上张望。 院中可真热闹,春花似锦、人声如沸。这庄子的主人今日做“春日...
    开封第一讲书人阅读 29,667评论 0 19
  • 文/苍兰香墨 我抬头看了看天上的太阳。三九已至,却和暖如春,着一层夹袄步出监牢的瞬间,已是汗流浃背。 一阵脚步声响...
    开封第一讲书人阅读 30,911评论 1 255
  • 我被黑心中介骗来泰国打工, 没想到刚下飞机就差点儿被人妖公主榨干…… 1. 我叫王不留,地道东北人。 一个月前我还...
    沈念sama阅读 42,288评论 2 345
  • 正文 我出身青楼,却偏偏与公主长得像,于是被迫代替她去往敌国和亲。 传闻我的和亲对象是个残疾皇子,可洞房花烛夜当晚...
    茶点故事阅读 41,871评论 2 341

推荐阅读更多精彩内容