Benchmark Analysis

We provide the benchmark Analysis. The baseline model is the simple linear regression, we also use the tree model that assume Brownian motion model for trait evolution. The rmse is reported and the benchmark ratio is provided at the bottom of this page.

rm(list=ls())
setwd("~/Dropbox/JournalSubmission/0Stats20230228/rcode/empanalysis/")
# create a function to compute the RMSE
rmse <- function(actual, predicted) {
  sqrt(mean((actual - predicted)^2))
  #1/n * sum(abs(predicted - actual))
}
#install.packages("ape")
#install.packages("phylolm")
library(phylolm)

Loading required package: ape

#install.packages("MuMIn")
library(MuMIn)

Registered S3 methods overwritten by 'MuMIn':
  method         from   
  nobs.phylolm   phylolm
  logLik.phylolm phylolm

library(ape)
#x.tre<-ape::read.tree(text='((A, B), ((C, D), (E, F)));')
#sunflower 11 taxa (3 hybrids) Gross and Riesberg paper

# load the dataset
data <- read.csv("AnnPrecAllResponseTrait.csv")
cmdata<-NULL
#predictor is always the annual precipitation
cmdata$x<-log(data$AnnPrec)
rmse.array<-array(NA,c(12,3))
colnames(rmse.array)<-c("lm","tree","network")
aicc.array<-array(NA,c(12,3))
colnames(aicc.array)<-c("lm","tree","network")
benchmark.ratio.array<-array(NA,c(12,2))
colnames(benchmark.ratio.array)<-c("tree/lm","network/lm")


lm.sig.array<-array(NA,12)
bm.sig.array<-array(NA,12)

## empirical data for tree model 
tree<-ape::read.tree(text=c("(((1,2)15,(3,(4,5)13)16)20,((((6,7)12,8)14,9)17,(10,11)18)19)21;"))
tree<-compute.brlen(tree)
#plot(tree)
## call the essential function
source("phynetregmodel.r")
## load the ertimate from empirical analysis
load("regsunflower.RData")
for(respIndex in 3:dim(data)[2]){
  #  respIndex<-3
  cmdata$y<-log(data[,respIndex])
  ## baseline model
  ## Simple Linear regression 
  lm_model<-lm(cmdata$y ~ cmdata$x)
  sum.lm<-summary(lm_model)
  lm.sig.array[respIndex-2]<- ifelse(sum.lm$coefficients[2,4]<0.05,"Yes","No")
  aicc.lm<--logLik(lm_model)+2*3+ 2 * 3 * (3 + 1) / (10 - 3 - 1)
  pred.lm<-lm_model$coefficients[1]+lm_model$coefficients[2]*cmdata$x
  rmse.lm<-rmse(cmdata$y, pred.lm)
  
  ## Comparable to Tree model
  # Fit the Brownian motion Tree model
  bm_model <- phylolm(phy=tree, cmdata$y ~ cmdata$x, method = "BM")
  
  sum.bm<-summary(bm_model)
  bm.sig.array[respIndex-2]<- ifelse(sum.bm$coefficients[2,4]<0.05,"Yes","No")
  aicc.bm<-bm_model$aic+ 2 * 3 * (3 + 1) / (10 - 3 - 1)
  pred.bm<-bm_model$coefficients[1]+bm_model$coefficients[2]*cmdata$x
  rmse.bm<-rmse(cmdata$y, pred.bm)
  
  ## Our network model
  ## data sunflower 11 taxa (3 hybrids) 
  pred.net<-b.est.array[respIndex-2,1]+b.est.array[respIndex-2,2]*cmdata$x
  rmse.net<-rmse(cmdata$y, pred.net)
  aicc.net<-NegLogLike(x=MLE.array[respIndex-2,],Y=cmdata$y,predictor=cmdata$x,b.ini=b.est.array[respIndex-2,],n=n,branchlength=branchlength,ancdes.array=ancdes.array,nleaves=nleaves,tipnames=tipnames,model.Index=model.Index)+ 2*3+ 2 * 3 * (3 + 1) / (10 - 3 - 1)
  
  
  rmse.array[respIndex-2,]<-round(c(rmse.lm,rmse.bm,rmse.net),3)
  aicc.array[respIndex-2,]<-round(c(aicc.lm,aicc.bm,aicc.net),3)  
  benchmark.ratio.array[respIndex-2,]<-round(c(rmse.bm/rmse.lm,rmse.net/rmse.lm),3)
}

rmse.benchmark.array<-cbind(rmse.array,benchmark.ratio.array)

rownames(rmse.benchmark.array)<-colnames(data)[3:14]

RMSE

rownames(rmse.array)<-colnames(data)[3:14]  
kableExtra::kable(rmse.array)

	lm	tree	network
Annuals	0.608	0.612	0.655
petioles	0.558	0.559	0.565
Peduncles	0.718	0.727	0.730
Involucres	0.227	0.234	0.246
Phyllaries	0.276	0.276	0.277
Paleae	0.164	0.165	0.171
laminae	0.250	0.267	0.265
Ray.florets	0.307	0.308	0.357
Disc.florets	0.665	0.676	0.768
corollas	0.125	0.128	0.147
Cypselae	0.213	0.219	0.332
pappi	0.175	0.183	0.239

Benchmark Ratio

rownames(benchmark.ratio.array)<-colnames(data)[3:14]  
kableExtra::kable(benchmark.ratio.array)

	tree/lm	network/lm
Annuals	1.006	1.077
petioles	1.002	1.013
Peduncles	1.013	1.017
Involucres	1.033	1.087
Phyllaries	1.001	1.005
Paleae	1.009	1.046
laminae	1.065	1.059
Ray.florets	1.004	1.162
Disc.florets	1.017	1.154
corollas	1.020	1.175
Cypselae	1.026	1.555
pappi	1.046	1.367

In the first row of the table, the comparison of the tree model and linear regression model using the “Annuals” trait shows a benchmark ratio of 1.006. This means that the tree model has a higher Root Mean Square Error (RMSE) than the linear regression model by 0.6%. This suggests that the tree model is slightly underperforming compared to the linear regression model with an independent assumption.

Similarly, in the same row of the table, the comparison of our network model and linear regression model using the “Annuals” trait shows a benchmark ratio of 1.077. This means that the network model has a higher RMSE than the linear regression model by 7.7%. This indicates that the network model is performing worse than the linear regression model with an independent assumption.