Comparison of pairwise models under the LRT test

#############################################
rm(list = ls())
#load("~/Dropbox/MingHanHsu/fulldataset/DataSets/results/empR1.rda")

load("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/MingHanHsu/fulldataset/DataSets/results/empR1.rda")

# 5 models 
# two AICs in each model
# 14 trees
# few dataset within each tree


BM1.7$AICc.obs

## [1] -168.4176

BM1.7$AICc.constrained

## [1] -2e+10

BM11.1$Lobs

## [1] -15.63377

BM11.1$Lconstrained

## [1] -37.64364

OU11.1$Lobs.ou

## [1] -17.65558

OU11.1$Lconstrained.ou

## [1] -18.63979

BM11.1$AICc.obs

## [1] 43.26753

BM11.1$AICc.constrained

## [1] 83.28728

BM11.1$Prob

## [1] 2.762069e-10

# Step1: screen reasonable AICs  (exclude -2e+10)
# Record those reasonable AICs () within each models

# output 1: check consistency between AIC(comprare two AICs) and LRT(p-value) 

# output 2: report AIC across all dataset in the aspect of model adequacy.  (BM support obs, does PMM also support obs)





# ifelse(BM2.1$Prob<0.05,1,0)
# log10(BM2.1$Prob)
# log10(EB2.1$Prob)
# log10(PMM2.1$Prob)
# log10(OU2.1$Prob)
# log10(ID2.1$Prob)



### Assessing the consistency on estimating the rates in multiple trait space.

### H_0: rate are the same $sigma_i$ are equals 
### H_a: not all $sigma_i$ are equals 

## by LRT 
## across all datasets --- 

modelset<-c("BM","PMM","OU","EB","ID")

A=array(NA,c(2,3))
A[2,1]<-1
A[2,2]<-0
A

##      [,1] [,2] [,3]
## [1,]   NA   NA   NA
## [2,]    1    0   NA

p.value.all<-pout<-aicout<-array(NA,c(5,14,100))
aic.value.all<-array(NA,c(2,5,14,100))

for(mIndex in 1:length(modelset)){
  #mIndex<-1
  for (dIndex in 1:length(matrixsetfile)){
    #dIndex<-11
    for (tIndex in 1:100){
      # tIndex<-1
      #print(paste(modelset[mIndex],dIndex,".",tIndex,sep=""))
      objtoload<-paste(modelset[mIndex],dIndex,".",tIndex,sep="")
      if(exists(objtoload)){
        getobjtoload<-get(objtoload)
        pout[mIndex,dIndex,tIndex]<-ifelse(getobjtoload$Prob<0.001,1,0)
        p.value.all[mIndex,dIndex,tIndex]<-getobjtoload$Prob
        
        if( getobjtoload$AICc.obs>-2e+7 && getobjtoload$AICc.constrained>-2e+7){
        aic.value.all[1,mIndex,dIndex,tIndex]<-getobjtoload$AICc.obs
        aic.value.all[2,mIndex,dIndex,tIndex]<-getobjtoload$AICc.constrained
        aicout[mIndex,dIndex,tIndex]<-ifelse(getobjtoload$AICc.constrained< getobjtoload$AICc.obs,1,0)
        }
      }
    }
  }
}

table(pout[1,,])

## 
##   0   1 
##  65 146

table(aicout[1,,])

## 
##   0   1 
## 153   3

table(pout[2,,])

## 
##   0   1 
## 120   6

table(aicout[2,,])

## 
##   0   1 
##  12 114

summary(aic.value.all)

##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max.       NA's 
## -285670.11      50.35      72.35   -1035.00     113.15    3575.20      12536

#modelset<-c("BM","PMM","OU","EB","ID")
BMpout<-pout[1,,]# 
PMMpout<-pout[2,,]
OUpout<-pout[3,,]
EBpout<-pout[4,,]
IDpout<-pout[5,,]


BMp.value.all<-p.value.all[1,,]# 
PMMp.value.all<-p.value.all[2,,]
OUp.value.all<-p.value.all[3,,]
EBp.value.all<-p.value.all[4,,]
IDp.value.all<-p.value.all[5,,]


BMaic.value.all.obs<-aic.value.all[1,1,,]# observation 
BMaic.value.all.con<-aic.value.all[2,1,,]# constraint
PMMaic.value.all.obs<-aic.value.all[1,2,,]# observation 
PMMaic.value.all.con<-aic.value.all[2,2,,]# constraint
OUaic.value.all.obs<-aic.value.all[1,3,,]# observation 
OUaic.value.all.con<-aic.value.all[2,3,,]# constraint
EBaic.value.all.obs<-aic.value.all[1,4,,]# observation 
EBaic.value.all.con<-aic.value.all[2,4,,]# constraint
IDaic.value.all.obs<-aic.value.all[1,5,,]# observation 
IDaic.value.all.con<-aic.value.all[2,5,,]# constraint


# Assessing the performance of models in the aspect of the violation to the null hypothesis.

BMpout[1:5,1:10]

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]   NA   NA   NA   NA   NA   NA    0   NA   NA    NA
## [2,]    1   NA    1    1   NA   NA   NA   NA   NA    NA
## [3,]   NA    0   NA   NA    0    1    1    1    1    NA
## [4,]    0   NA   NA   NA   NA    1   NA    1    1     1
## [5,]   NA   NA    0   NA   NA   NA    0    0    0     0

PMMpout[1:5,1:10]

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    0    0    0    0    0    0    0    0   NA    NA
## [2,]   NA   NA    0   NA   NA   NA   NA   NA   NA    NA
## [3,]    0    1   NA   NA   NA   NA    0    0    0    NA
## [4,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA
## [5,]    0   NA   NA   NA   NA   NA    0   NA   NA    NA

OUpout[1:5,1:10]

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    0    0    0    0    0    0    0    0   NA     0
## [2,]    1   NA    1    0   NA   NA   NA   NA   NA    NA
## [3,]    1    1    0   NA    0    0    0    0    0     1
## [4,]   NA   NA   NA   NA   NA   NA   NA   NA   NA    NA
## [5,]    0   NA   NA   NA   NA   NA    0   NA   NA    NA

EBpout[1:5,1:10]

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]    0   NA    0   NA   NA   NA    0   NA   NA    NA
## [2,]    1    0    1    0   NA   NA   NA   NA   NA    NA
## [3,]   NA   NA   NA   NA    0    0    0    1    0    NA
## [4,]   NA   NA   NA   NA    1    1   NA   NA   NA    NA
## [5,]    0   NA    0   NA    0   NA   NA   NA    0    NA

IDpout[1:5,1:10]

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
## [1,]   NA    0   NA   NA   NA   NA   NA   NA   NA    NA
## [2,]    0   NA    1    1   NA   NA   NA   NA   NA    NA
## [3,]   NA   NA   NA   NA    0   NA    0   NA   NA     1
## [4,]   NA    0   NA    0    0    0    0    0    0    NA
## [5,]    0    0   NA    0    0   NA    0   NA   NA     0

#BMPMM
agree=0
notagree=0

sensbmpmm<-0
senspmmbm<-0

aicobsbmpmm<-0
aicobspmmbm<-0

aicconbmpmm<-0
aicconpmmbm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(BMpout[i,j]) & !is.na(PMMpout[i,j]) ){
      if(BMpout[i,j]==PMMpout[i,j]){agree=agree+1}
      if(BMpout[i,j]!=PMMpout[i,j]){notagree=notagree+1}
      
      if(BMp.value.all[i,j]<PMMp.value.all[i,j]){sensbmpmm=sensbmpmm+1}else{senspmmbm=senspmmbm+1}
    }
    
    if(!is.na(BMaic.value.all.obs[i,j]) & !is.na(PMMaic.value.all.obs[i,j]) ){
      if(BMaic.value.all.obs[i,j] < PMMaic.value.all.obs[i,j]){
        aicobsbmpmm=aicobsbmpmm+1}else{aicobspmmbm=aicobspmmbm+1}
      
      if(BMaic.value.all.con[i,j] < PMMaic.value.all.con[i,j]){
        aicconbmpmm=aicconbmpmm+1}else{aicconpmmbm=aicconpmmbm+1}
    }
  }
}
print(agree)

## [1] 23

print(notagree)

## [1] 53

agreebmpmm<-agree
notagreebmpmm<-notagree
sensbmpmm

## [1] 55

senspmmbm

## [1] 21

sensbmpmm/(sensbmpmm+senspmmbm)

## [1] 0.7236842

#  0.7236842,  bm is more violate to the null assumption


aicobsbmpmm

## [1] 41

aicobspmmbm

## [1] 15

aicobsbmpmm/(aicobsbmpmm+aicobspmmbm)

## [1] 0.7321429

aicconbmpmm/(aicconbmpmm+aicconpmmbm)

## [1] 0.05357143

#BMEB
agree=0
notagree=0

sensbmeb<-0
sensebbm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(BMpout[i,j]) & !is.na(EBpout[i,j]) ){
      if(BMpout[i,j]==EBpout[i,j]){agree=agree+1}
      if(BMpout[i,j]!=EBpout[i,j]){notagree=notagree+1}
      
      if(BMp.value.all[i,j]<EBp.value.all[i,j]){sensbmeb=sensbmeb+1}else{sensebbm=sensebbm+1}
      
    }
  }
}
print(agree)

## [1] 64

print(notagree)

## [1] 34

agreebmeb<-agree
notagreebmeb<-notagree
sensbmeb

## [1] 59

sensebbm

## [1] 39

sensbmeb/(sensbmeb+sensebbm)

## [1] 0.6020408

#   0.6020408,  bm is more violate to the null assumption


#BMOU
agree=0
notagree=0

sensbmou<-0
sensoubm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(BMpout[i,j]) & !is.na(OUpout[i,j]) ){
      if(BMpout[i,j]==OUpout[i,j]){agree=agree+1}
      if(BMpout[i,j]!=OUpout[i,j]){notagree=notagree+1}
      
      if(BMp.value.all[i,j]<OUp.value.all[i,j]){sensbmou=sensbmou+1}else{sensoubm=sensoubm+1}
      
    }
  }
}
print(agree)

## [1] 52

print(notagree)

## [1] 82

agreebmou<-agree
notagreebmou<-notagree
sensbmou

## [1] 82

sensoubm

## [1] 52

sensbmou/(sensoubm+sensbmou)

## [1] 0.6119403

#   0.6119403,  bm is more violate to the null assumption


#BMID
agree=0
notagree=0

sensbmid<-0
sensidbm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(BMpout[i,j]) & !is.na(IDpout[i,j]) ){
      if(BMpout[i,j]==IDpout[i,j]){agree=agree+1}
      if(BMpout[i,j]!=IDpout[i,j]){notagree=notagree+1}
      
      if(BMp.value.all[i,j]<IDp.value.all[i,j]){sensbmid=sensbmid+1}else{sensidbm=sensidbm+1}
      
    }
  }
}
print(agree)

## [1] 116

print(notagree)

## [1] 22

agreebmid<-agree
notagreebmid<-notagree
sensbmid

## [1] 32

sensidbm

## [1] 106

sensbmid/(sensbmid+sensidbm)

## [1] 0.2318841

#   0.2318841,  bm is more violate to the null assumption


#PMMEB
agree=0
notagree=0

senspmmeb<-0
sensebpmm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(PMMpout[i,j]) & !is.na(EBpout[i,j]) ){
      if(PMMpout[i,j]==EBpout[i,j]){agree=agree+1}
      if(PMMpout[i,j]!=EBpout[i,j]){notagree=notagree+1}
      
      if(PMMp.value.all[i,j]<EBp.value.all[i,j]){senspmmeb=senspmmeb+1}else{sensebpmm=sensebpmm+1}
      
    }
  }
}
print(agree)

## [1] 32

print(notagree)

## [1] 25

agreepmmeb<-agree
notagreepmmeb<-notagree
senspmmeb

## [1] 4

sensebpmm

## [1] 53

senspmmeb/(senspmmeb+sensebpmm)

## [1] 0.07017544

#PMMOU
agree=0
notagree=0

senspmmou<-0
sensoupmm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(PMMpout[i,j]) & !is.na(OUpout[i,j]) ){
      if(PMMpout[i,j]==OUpout[i,j]){agree=agree+1}
      if(PMMpout[i,j]!=OUpout[i,j]){notagree=notagree+1}
      
      if(PMMp.value.all[i,j]<OUp.value.all[i,j]){senspmmou=senspmmou+1}else{sensoupmm=sensoupmm+1}
      
    }
  }
}
print(agree)

## [1] 87

print(notagree)

## [1] 17

agreepmmou<-agree
notagreepmmou<-notagree
senspmmou

## [1] 9

sensoupmm

## [1] 95

senspmmou/(sensoupmm+senspmmou)

## [1] 0.08653846

#PMMID
agree=0
notagree=0

senspmmid<-0
sensidpmm<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(PMMpout[i,j]) & !is.na(IDpout[i,j]) ){
      if(PMMpout[i,j]==IDpout[i,j]){agree=agree+1}
      if(PMMpout[i,j]!=IDpout[i,j]){notagree=notagree+1}
      
      if(PMMp.value.all[i,j]<IDp.value.all[i,j]){senspmmid=senspmmid+1}else{sensidpmm=sensidpmm+1}
      
    }
  }
}
print(agree)

## [1] 27

print(notagree)

## [1] 43

agreepmmid<-agree
notagreepmmid<-notagree
senspmmid

## [1] 11

sensidpmm

## [1] 59

senspmmid/(senspmmid+sensidpmm)

## [1] 0.1571429

#EBOU
agree=0
notagree=0

sensebou<-0
sensoueb<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(EBpout[i,j]) & !is.na(OUpout[i,j]) ){
      if(EBpout[i,j]==OUpout[i,j]){agree=agree+1}
      if(EBpout[i,j]!=OUpout[i,j]){notagree=notagree+1}
      
      if(EBp.value.all[i,j]<OUp.value.all[i,j]){sensebou=sensebou+1}else{sensoueb=sensoueb+1}
      
    }
  }
}
print(agree)

## [1] 48

print(notagree)

## [1] 43

agreeebou<-agree
notagreeebou<-notagree
sensebou

## [1] 51

sensoueb

## [1] 40

sensebou/(sensoueb+sensebou)

## [1] 0.5604396

#EBID
agree=0
notagree=0

sensebid<-0
sensideb<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(EBpout[i,j]) & !is.na(IDpout[i,j]) ){
      if(EBpout[i,j]==IDpout[i,j]){agree=agree+1}
      if(EBpout[i,j]!=IDpout[i,j]){notagree=notagree+1}
      
      if(EBp.value.all[i,j]<IDp.value.all[i,j]){sensebid=sensebid+1}else{sensideb=sensideb+1}
      
    }
  }
}
print(agree)

## [1] 63

print(notagree)

## [1] 27

agreeebid<-agree
notagreeebid<-notagree
sensebid

## [1] 30

sensideb

## [1] 60

sensebid/(sensebid+sensideb)

## [1] 0.3333333

#OUID
agree=0
notagree=0

sensouid<-0
sensidou<-0

for (i in 1:14){
  for(j in 1:100){
    if(!is.na(OUpout[i,j]) & !is.na(IDpout[i,j]) ){
      if(OUpout[i,j]==IDpout[i,j]){agree=agree+1}
      if(OUpout[i,j]!=IDpout[i,j]){notagree=notagree+1}
      
      if(OUp.value.all[i,j]<IDp.value.all[i,j]){sensouid=sensouid+1}else{sensidou=sensidou+1}
      
    }
  }
}
print(agree)

## [1] 51

print(notagree)

## [1] 67

agreeouid<-agree
notagreeouid<-notagree
sensouid

## [1] 31

sensidou

## [1] 87

sensouid/(sensouid+sensidou)

## [1] 0.2627119

############

models<-c("bm", "pmm", "ou",  "eb",  "id" )
outcount<-array(NA,c(length(models),length(models)))
rownames(outcount)<-colnames(outcount)<-models
for(i in 1:length(models)){
  for(j in 1:length(models)){
    if(i!=j){
      outcount[i,j]<-paste("(", get(paste("sens",models[i],models[j],sep="")),",",
                           get(paste("sens",models[j],models[i],sep="")),")",sep="")
    }
  }
}
outcount

##     bm         pmm       ou        eb        id        
## bm  NA         "(55,21)" "(82,52)" "(59,39)" "(32,106)"
## pmm "(21,55)"  NA        "(9,95)"  "(4,53)"  "(11,59)" 
## ou  "(52,82)"  "(95,9)"  NA        "(40,51)" "(31,87)" 
## eb  "(39,59)"  "(53,4)"  "(51,40)" NA        "(30,60)" 
## id  "(106,32)" "(59,11)" "(87,31)" "(60,30)" NA

library(xtable)
xtable(outcount,digits=3)

## % latex table generated in R 4.2.3 by xtable 1.8-4 package
## % Sun Jun 25 08:57:57 2023
## \begin{table}[ht]
## \centering
## \begin{tabular}{rlllll}
##   \hline
##  & bm & pmm & ou & eb & id \\ 
##   \hline
## bm &  & (55,21) & (82,52) & (59,39) & (32,106) \\ 
##   pmm & (21,55) &  & (9,95) & (4,53) & (11,59) \\ 
##   ou & (52,82) & (95,9) &  & (40,51) & (31,87) \\ 
##   eb & (39,59) & (53,4) & (51,40) &  & (30,60) \\ 
##   id & (106,32) & (59,11) & (87,31) & (60,30) &  \\ 
##    \hline
## \end{tabular}
## \end{table}

namesVar<-array(NA,c(10))
ratiotoPlot<-array(NA,c(10))
k=0
for(i in 1:4){
  for(j in (i+1):5){
    k=k+1
    ratiotoPlot[k]<-get(paste("sens",models[i],models[j],sep=""))/(get(paste("sens",models[i],models[j],sep=""))+get(paste("sens",models[j],models[i],sep="")))
    namesVar[k]<-paste(modelset[i],"-",modelset[j],sep="")
#    print(outcount[i,j])
  }
}

names(ratiotoPlot)<-namesVar
barplot(ratiotoPlot)

library(ggplot2)

df=data.frame(namesVar=namesVar,ratiotoPlot=ratiotoPlot)
ggplot(df,aes(x=namesVar,y=ratiotoPlot,fill=ratiotoPlot))+geom_bar(stat="identity")+ geom_hline(yintercept=0.5, linetype="dashed", color = "red") + xlab("Pair of Models")+ ylab("Ratio")+ggtitle("Compared by LRT")+
  geom_text(aes(label=round(ratiotoPlot,2)), position=position_dodge(width=0.9), vjust=-0.25)

# Ensure dplyr is loaded
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Ensure dplyr and ggplot2 are loaded
library(dplyr)
library(ggplot2)

# Arrange the data.frame by 'ratiotoPlot' in descending order
df <- df %>% arrange(desc(ratiotoPlot))

# Reorder namesVar according to the order of ratiotoPlot
df$namesVar <- factor(df$namesVar, levels = df$namesVar)

# Plot
ggplot(df,aes(x=namesVar,y=ratiotoPlot,fill=ratiotoPlot)) +
  geom_bar(stat="identity") +
  geom_hline(yintercept=0.5, linetype="dashed", color = "red") +
  xlab("Pair of Models") +
  ylab("Ratio") +
  ggtitle("Compared by LRT") +
  theme(plot.title = element_text(hjust = 0.5), # this centers the title
        legend.position = "none", # this removes the legend
        panel.background = element_rect(fill = "white")) + # this makes the background white
  geom_text(aes(label=round(ratiotoPlot,2)), position=position_dodge(width=0.9), vjust=-0.25)

# Filter out rows with 'ID' in the namesVar
df <- df %>% 
  filter(!grepl('ID', namesVar)) %>%
  arrange(desc(ratiotoPlot))

# Reorder namesVar according to the order of ratiotoPlot
df$namesVar <- factor(df$namesVar, levels = df$namesVar)

# Plot
ggplot(df, aes(x=namesVar,y=ratiotoPlot,fill=ratiotoPlot)) +
  geom_bar(stat="identity") +
  geom_hline(yintercept=0.5, linetype="dashed", color = "red") +
  xlab("Pair of Models") +
  ylab("Ratio") +
  ggtitle("Compared by LRT") +
  theme(plot.title = element_text(hjust = 0.5), # this centers the title
        legend.position = "none", # this removes the legend
        panel.background = element_rect(fill = "white")) + # this makes the background white
  geom_text(aes(label=round(ratiotoPlot,2)), position=position_dodge(width=0.9), vjust=-0.25)

# A smaller p-value indicate stronger violation to the null hypothesis for homogeneous rate of evolution across traits 
# here we compare two models on testing the homogeneity of rates via their sensitivity to the violation to the null. 
# Since all models use the same datasets, 
# for each dataset, we say the model A has stronger evidence to violate the null hypothesis than the model B if the p-value of the likelihood ratio test under model A is smaller than the the p-value of the likelihood ratio test under model B.