千家信息网

R语言农艺性状表型数据与环境关联互作代码分析

发表于:2025-01-21 作者:千家信息网编辑
千家信息网最后更新 2025年01月21日,这篇文章主要介绍了R语言农艺性状表型数据与环境关联互作代码分析的相关知识,内容详细易懂,操作简单快捷,具有一定借鉴价值,相信大家阅读完这篇R语言农艺性状表型数据与环境关联互作代码分析文章都会有所收获,
千家信息网最后更新 2025年01月21日R语言农艺性状表型数据与环境关联互作代码分析

这篇文章主要介绍了R语言农艺性状表型数据与环境关联互作代码分析的相关知识,内容详细易懂,操作简单快捷,具有一定借鉴价值,相信大家阅读完这篇R语言农艺性状表型数据与环境关联互作代码分析文章都会有所收获,下面我们一起来看看吧。

农艺性状表型数据与环境关联互作分析

local({r <- getOption("repos")  r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"   options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")#install.packages("GGEBiplots")#install.packages("GGEBiplotGUI")library(GGEBiplotGUI)library("GGEBiplots")library("reshape2")library(vegan)library(corrplot)library("psych")library(Cairo)setwd("D:/potato/")data1<-read.table("张北data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")data.t<-as.data.frame(t(data1))data.l<-melt(data.t,id.vars = c('年度', '性状'),variable.name = "基因型")mydata1<-dcast(data.l, `年度` + `基因型` ~ `性状`)#`单株产量` +   `淀粉含量(%)` + `干物质含量(%)` +  `平均出苗率(%)`+ `生育日数(天)`+ `株高`data2<-read.table("康保data.txt",header = F,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")data.t<-as.data.frame(t(data2))data.l<-melt(data.t,id.vars = c('年度', '性状'),variable.name = "基因型")mydata2<-dcast(data.l, `年度` + `基因型` ~ `性状`)#`单株产量` +   `淀粉含量(%)` + `干物质含量(%)` +  `平均出苗率(%)`+ `生育日数(天)` +   `株高`#inter<-intersect(colnames(mydata1),colnames(mydata2))#去掉一些数据inter<-c("年度"   ,           "基因型"    ,        "单株产量"    ,      "淀粉含量(%)"    ,   "干物质含量(%)"  ,   "平均出苗率(%)"  ,  "生育日数(天)"   ,  "株高" )mydata1.c<-mydata1[,inter]mydata2.c<-mydata2[,inter]mydata1.c$`环境`="张北"mydata2.c$`环境`="康保"mydata=rbind(mydata1.c,mydata2.c)########################计算平均值#traits<-c("单株产量" , "淀粉含量(%)" , "干物质含量(%)" , "平均出苗率(%)", "生育日数(天)" , "小区产量" ,"折合单位产量kg/hm", "株高")traits=c(   "单株产量"  ,        "淀粉含量(%)"  ,                "干物质含量(%)"  ,   "平均出苗率(%)" ,               "生育日数(天)"    ,                   "株高"      )for (i in traits){  mydata[,i]=as.double(mydata[,i])}SD=round(apply(mydata[,traits],2,sd),2)MEAN=round(apply(mydata[,traits],2,mean),2)RANGE=round(apply(mydata[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计.csv")#################################相关性分析occor = corr.test(mydata[,traits],use="pairwise",method="spearman",adjust="BH",alpha=.05)occor.r = occor$r  # 取相关性矩阵R值occor.p = occor$p  # 取相关性矩阵p值#设置色板palette_2 <- colorRampPalette(c("yellow","red"))(100)#绘图#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")corrplot(occor.r, method = "square", type = "lower", order="AOE",         col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r",         title = "Traits corr", mar = c(2,4,4,1))corrplot(occor.r, method = "number", type = "upper",order="AOE",         col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n",          number.cex=1,mar = c(2,4,4,1))dev.off()write.csv(occor.p,file="02.相关性P值.csv")write.csv(occor.r,file="03.相关性R值.csv")########################分布范围柱状图#########################求平均genotype.mean=aggregate(mydata[,traits],by=list(`基因型`=mydata$`基因型`),mean)for(t in traits){    #t="平均出苗率(%)"  d<-  genotype.mean[,t]  ff=gsub("\\(\\S+\\)", "", t)  ff=gsub("kg/hm", "", ff)      pdf(paste0(ff,"-表型性状值分布.pdf"),w=5,h=5,family="GB1")  par(mar=c(6,4.1,2,4))  xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10),                plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="")    xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d)),length.out=10),                plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="")    mtext(t,side=1,line=4.5)  axis(side=1,xhist$mids,labels=F)    b=(xhist$mids[2]-xhist$mids[1])/2  s=round(xhist$mids-b,2)  s[1]=0  e=round(xhist$mids+b,2)  lab=paste(s,e,sep="-")    text(x=xhist$mids, y=-3, labels=lab,cex=1,  xpd=TRUE, srt=45,adj=1)    dd=density(d)        par(new=T)  plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "")   axis(4, las=1,at=seq(from = 0, to = max(xhist$density), length.out = 6), labels=round(seq(0, max(xhist$density),length.out =6)/sum(xhist$density),2), col = 'red', col.axis = 'red')  mtext("频率",side=4,line=3,col="red")  box(bty="l")  dev.off()}##############GGEbioplotsggdata=mydata[,c("环境","基因型","单株产量")]ggdata1=aggregate(`单株产量` ~ 基因型 + 环境, data = mydata, mean)gg.f=dcast(ggdata1,基因型  ~环境)row.names(gg.f)=gg.f[,1]gg.f=gg.f[,-1]*10#GGEBiplot(Data = gg.f)library(ggplot2)GGE1<-GGEModel(gg.f,centering = "tester",               SVP = "symmetrical",               scaling = "none")g1=GGEPlot(GGE1,type=6,sizeGen=4,sizeEnv = 2,largeSize=3)g1=g1+ labs(title = "")pdf(paste0("适应性分析bioplot.pdf"),w=5,h=5,family="GB1")print(g1)dev.off()g2=GGEPlot(GGE1,type=9,sizeGen=2,sizeEnv = 3,largeSize=3)g2=g2+ labs(title = "")pdf(paste0("丰产性-稳定性分析bioplot.pdf"),w=5,h=5,family="GB1")print(g2)dev.off()#################################表型主成分分析p=genotype.meanrow.names(p)=p[,1]p=p[,-1]pca = prcomp(t(p), scale=TRUE)#pca = prcomp(t(decostand(p, "hellinger")), scale=TRUE)ss=summary(pca)pc=pca$xxx=ss$importancerow.names(xx)<-c("特征值E","贡献率CR","累计贡献率CCR")pca.res=rbind(pc,xx)write.csv(pca.res,file="04.PCA分析结果.csv")#表型数据聚类分析dist_mat <- dist(genotype.mean[,2:5], method = "euclidean")clustering <- hclust(dist_mat, method = "complete")plot(clustering,labels=genotype.mean$基因型,xlab="sample",ylab="Height" ,main="")#根据进化树分组group.data=genotype.meangroup.data$group=as.factor(cutree(clustering,h=15))names=c("基因型"   ,     "单株产量" ,     "淀粉含量"  , "干物质含量",        "平均出苗率" ,"生育日数"  ,"株高"    ,      "group" )colnames(group.data)=nameslibrary("ggpubr")phenotype=names[2:(length(names)-1)]test.res=data.frame()for(i in phenotype){  f=as.formula(paste0("`",i,"`~group"))  print(f)  res=compare_means(f, data = group.data,method="t.test")  print(res)  test.res=rbind(test.res,res)}write.csv(test.res,file="聚类树分组差异统计检验.csv")write.csv(group.data,file="聚类树分组信息表.csv")g1.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=mean)g2.res=aggregate(group.data[,2:7],by=list(group.data$group),FUN=sd)write.csv(g1.res,file="聚类树分组统计平均值.csv")write.csv(g2.res,file="聚类树分组统计标准差.csv")#表型数据在不同环境下变异for (i in traits){  mydata1[,i]=as.double(mydata1[,i])}SD=round(apply(mydata1[,traits],2,sd),2)MEAN=round(apply(mydata1[,traits],2,mean),2)RANGE=round(apply(mydata1[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata1[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计(张北).csv")#表型数据在不同环境下变异for (i in traits){  mydata2[,i]=as.double(mydata2[,i])}SD=round(apply(mydata2[,traits],2,sd),2)MEAN=round(apply(mydata2[,traits],2,mean),2)RANGE=round(apply(mydata2[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata2[,traits], index = "shannon", MARGIN = 2),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="-"),`多样性指数 H'`=H)write.csv(trait.mean,file="01.马铃薯表型性状统计(康保).csv")######关联分析mydata$年度=as.factor(mydata$年度)mydata$环境=as.factor(mydata$环境)F.res=data.frame()for (i in traits){  print(i)  fl=c(as.formula(paste0("`",i,"`~基因型")),       as.formula(paste0("`",i,"`~环境")),       as.formula(paste0("`",i,"`~年度")),       as.formula(paste0("`",i,"`~基因型*环境")),       as.formula(paste0("`",i,"`~基因型*年度"))       #as.formula(paste0("`",i,"`~基因型*年度*环境"))  )    for(f in fl){     blp=lm(f,data=mydata)    aa=summary(blp)    #    #print(aa)    print(aa$df)    print(f)    fstatistic = aa$fstatistic        p_value = pf(as.numeric(fstatistic[1]), as.numeric(fstatistic[2]), as.numeric(fstatistic[3]), lower.tail = FALSE)    print(p_value)    aa=data.frame(`公式`=as.character(f)[3],df=aa$df[1],F=fstatistic[1],P=p_value,`性状`=i)    F.res=rbind(F.res,aa)            # question1 <- readline("Would you like to proceed untill the loop ends? (Y/N)")     # if(regexpr(question1, 'y', ignore.case = TRUE) == 1){     #   continue = TRUE     #   next     # } else{    #   break    # }  }  }write.csv(F.res,file="05.表型与基因型环境年度互作效应.csv")

第二版版本,注意PCA分析升级,输入数据行为样本,列为表型;

local({r <- getOption("repos")  r["CRAN"] <- "http://mirrors.tuna.tsinghua.edu.cn/CRAN/"   options(repos=r)}) options(BioC_mirror="https://mirrors.tuna.tsinghua.edu.cn/bioconductor")#install.packages("GGEBiplots")#install.packages("GGEBiplotGUI")library(GGEBiplotGUI)library("GGEBiplots")library("reshape2")library(vegan)library(corrplot)library("psych")library(Cairo)setwd("D:/potato/第二次分析")mydata<-read.table("data.txt",header = T,row.names = 1,comment.char = "",sep = "\t",encoding = "UTF-8")########################计算平均值traits=colnames(mydata)for (i in traits){  mydata[,i]=as.double(mydata[,i])}SD=round(apply(mydata[,traits],2,sd),2)MEAN=round(apply(mydata[,traits],2,mean),2)RANGE=round(apply(mydata[,traits],2,range),2)CV=round(SD/MEAN*100,2)H=round(diversity(mydata[,traits], index = "shannon", MARGIN = 2),2)J=round(H/log(specnumber(t(mydata[,traits]))),2)trait.mean=cbind(`平均值`=MEAN,`标准差`=SD,`变异系数(%)`=CV,`变幅`=paste(range.min=RANGE[1,],range.max=RANGE[2,],sep="~"),`H'`=H,J=J)write.csv(trait.mean,file="01.马铃薯表型性状统计.csv")########################分布范围柱状图#########################求平均for(t in traits){    ##t="顶小叶宽"  #t="出苗率"  d<-  mydata[,t]  pdf(paste0(t,"-表型性状值分布.pdf"),w=5,h=5,family="GB1")  par(mar=c(6,4.1,2,4))    MIN=min(d)  MAX=max(d)  step=(MAX-MIN)/(length(d)^(1/2)+1)    xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step),                plot=F,xlab =t ,ylim=c(0,),col="white",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="")    xhist <- hist(d,breaks=seq(from=floor(min(d)),to=ceiling(max(d))+step,by=step),                plot=T,xlab ="" ,ylim=c(0,max(xhist$counts)*1.1),col="green",xaxt="n", yaxs="i",fill=NULL,ylab = "样本数",freq=T,main="")    mtext(t,side=1,line=4.5)  axis(side=1,xhist$mids,labels=F)      s=round(xhist$mids-step,2)  if (s[1]<0) {s[1]=0}  e=round(xhist$mids+step,2)  lab=paste(s,e,sep="-")    text(x=xhist$mids, y=-3, labels=lab,cex=1,  xpd=TRUE, srt=45,adj=1)  axis(4, las=1,at=seq(from = 0, to = max(xhist$counts), length.out = 4), labels=paste0(round(seq(0, max(xhist$counts),length.out =4)/sum(xhist$counts),4)*100,"%"), col = 'red', col.axis = 'red')    dd=density(d)  par(new=T)  plot(dd, ylim=c(0,max(dd$y)*1.1), col ='red',lwd=2,yaxs="i",xlab = "",ylab="",xaxt="n",yaxt="n",main = "")   #mtext("频率",side=4,line=3,col="red")  box(bty="l")  l.at="topright"  if(t=="出苗率" || t=="叶缘形状"){    l.at="topleft"  }  legend(l.at,legend = c("直方图"),          fill = c('green'),           bty='n')  legend(l.at,legend = c("正态图"),          col = c("red"), lty=1,          bty='n',inset = c(0,0.08), lwd=3)  dev.off()}################相关性分析####################################################相关性分析occor = corr.test(mydata,use="pairwise",method="spearman",adjust="BH",alpha=.05)occor.r = occor$r  # 取相关性矩阵R值occor.p = occor$p  # 取相关性矩阵p值#设置色板palette_2 <- colorRampPalette(c("yellow","red"))(100)#绘图#png(file="traits_cor.png",w=10*300,h=10*300,res = 300)pdf(file="traits_cor.pdf",w=10,h=10,family="GB1")corrplot(occor.r, method = "square", type = "lower", order="AOE",         col = palette_2, tl.pos = "tp", tl.col = "blue", cl.pos = "r",         title = "Traits corr", mar = c(2,8,4,1))corrplot(occor.r, method = "number", type = "upper",order="AOE",         col = palette_2,add = TRUE, diag = FALSE, tl.pos = "n", cl.pos = "n",          number.cex=1,mar = c(2,4,4,1))dev.off()write.csv(occor.p,file="02.相关性P值.csv")write.csv(occor.r,file="03.相关性R值.csv")###############因子分析###############mydata.l=melt(mydata)bt=bartlett.test(value~variable,data=mydata.l)kmo=KMO(mydata)sink("04.因子分析.txt")btkmosink()#################################表型主成分分析#pca = prcomp(mydata, scale=TRUE)pca = princomp(mydata, scores=TRUE, cor=TRUE)#pca = prcomp(t(mydata), scale=TRUE)fa1 = factanal(mydata, factor=2, rotation="varimax", scores="regression")fa1#write.csv(abs(fa1$loadings), "loadings.csv")pdf(file="04.PCA分析特征根碎石图.pdf",w=8,h=4,family="GB1")screeplot(pca, type="line", main="Scree Plot") dev.off()sink(file="04.PCA分析summary.txt")summary(pca)sink()pc=loadings(pca)[1:21,]pc=as.data.frame(pc)write.csv(pc,file="04.PCA分析结果.csv")library(RColorBrewer)mycolor=c(brewer.pal(9, "Set1"),brewer.pal(8, "Set2"),brewer.pal(9, "Paired"))pdf(file="04.PCA分析表型分布.pdf",w=7,h=6,family="GB1")par(mar=c(5,4,4,10))plot(x=pc[,1],y=pc[,2],cex=1.5,col=mycolor,pch=16,xlab="factor 1",ylab = "factor 2")abline(h=0,lty=1)abline(v=0,lty=1)legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)dev.off()#表型数据聚类分析dist_mat <- dist(mydata, method = "euclidean")clustering <- hclust(dist_mat, method = "complete")pdf(file="05.聚类图.pdf",w=15,h=4,family="GB1")plot(clustering,labels=rownames(mydata),cex=0.55,xlab="sample",ylab="Height" ,main="")abline(h=40,col="red")dev.off()#根据聚类树分组group.data=mydatagroup.data$group=as.factor(cutree(clustering,h=40))###################分组PCA图#################install.packages("ggfortify")library(ggfortify)pdf(file="04.PCA分析表型样本双序图.pdf",w=5,h=4,family="GB1")#par(mar=c(5,4,4,10)) #pcs=pca$scores #plot(x=pcs[,1],y=pcs[,2],cex=1.5,col=mycolor[group.data$group],pch=1,xlab="factor 1",ylab = "factor 2") #par(new=T) #plot(x=pc[,1],y=pc[,2],cex=1.5,col="blue",pch=16,xlab="factor 1",ylab = "factor 2")#legend("topright",legend =rownames(pc) ,pch=16,col=mycolor,xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)#legend("topright",legend =1:6 ,pch=16,col=mycolor[1:6],xpd =T,inset = c(-0.3,0),ncol=1,bty='n',cex=1)#biplot(pca)g=autoplot(pca, data = group.data, colour = 'group',         loadings = TRUE, loadings.colour = 'blue',         loadings.label = TRUE, loadings.label.size = 3,size=2)#,frame = TRUE, frame.type = 'norm')g=g+scale_color_manual(name="group",values =mycolor)+  scale_fill_manual(name="group",values =mycolor)+  theme_bw()+ theme(      panel.grid=element_blank(),     axis.text.x=element_text(colour="black"),    axis.text.y=element_text(colour="black"),    panel.border=element_rect(colour = "black"),    legend.key = element_blank(),    plot.title = element_text(hjust = 0.5))gdev.off()library("ggpubr")phenotype=traitstest.res=data.frame()for(i in phenotype){  f=as.formula(paste0("`",i,"`~group"))  print(f)  fit=aov(f,data=group.data)  ss=summary(fit)  res=as.data.frame(ss[[1]])  res$`表型`=i  rownames(res)=c("组间","组内")  print(res)  test.res=rbind(test.res,res)}colnames(test.res)=c("Df","平方和SS","均方MS","F","显著性","性状trait")write.csv(test.res,file="05.聚类树分组差异统计检验.csv")write.csv(group.data,file="05.聚类树分组信息表.csv")g1.res=aggregate(group.data[,traits],by=list(group.data$group),FUN=mean)Average=apply(group.data[,traits],2,mean)Average=c("Group.1"="总体平均值 Average",Average)g.mean=rbind(g1.res,t(as.data.frame(Average)))write.csv(t(g.mean),file="06.聚类树分组统计平均值.csv")

关于"R语言农艺性状表型数据与环境关联互作代码分析"这篇文章的内容就介绍到这里,感谢各位的阅读!相信大家对"R语言农艺性状表型数据与环境关联互作代码分析"知识都有一定的了解,大家如果还想学习更多知识,欢迎关注行业资讯频道。

0