rm(list=ls())
#setwd("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/rcodeCYW/")
setwd("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/")


#mtx <- read.table("C:/Users/User/Dropbox/ChiYoWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
#mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
library(phytools)
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
library(gee)
library(ggtree)

compar.gee.nb2<-function (formula, data = NULL, phy, corStruct,scale.fix = FALSE, scale.value = 1,theta=theta){
  #family =  negative.binomial(2)
  if (requireNamespace("gee", quietly = TRUE)) 
    gee <- gee::gee
  else stop("package 'gee' not available")
  if (!missing(corStruct)) {
    if (!missing(phy)) 
      warning("the phylogeny was ignored because you gave a 'corStruct' object")
    R <- vcv(corStruct, corr = TRUE)
  }
  else {
    R <- vcv(phy, corr = TRUE)
  }
  if (is.null(data)) 
    data <- parent.frame()
  else {
    nmsR <- rownames(R)
    if (!identical(rownames(data), nmsR)) {
      if (!any(is.na(match(rownames(data), nmsR)))) 
        data <- data[nmsR, ]
      else {
        msg <- if (missing(corStruct)) 
          "the tip labels of the tree"
        else "those of the correlation structure"
        msg <- paste("the rownames of the data.frame and", 
                     msg, "do not match: the former were ignored in the analysis")
        warning(msg)
      }
    }
  }
  effect.assign <- attr(model.matrix(formula, data = data), "assign")
  for (i in all.vars(formula)) {
    if (any(is.na(eval(parse(text = i), envir = data)))) 
      stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
  }
  id <- rep(1, dim(R)[1])
  ###############################
  #  negative.binomial(2)
  # HERE WE HAVE NEGATIVE BINOMIAL
  geemod<-do.call("geem", list(formula, id, data = data, family = MASS::negative.binomial(theta= theta),  corstr = "fixed", corr.mat=R, tol=1e-4,maxit=25))
  geemod$coefficients<-geemod$beta
  return(geemod)
}



mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
dim(mtx)


mtx<-as.matrix(mtx)

mtx<-mtx +t(mtx)
diag(mtx)<-1

library(phangorn)
D<-  2*(max(mtx)- mtx)
tree<-upgma(D)
plot(tree)

#tree$tip.label<-LETTERS[1:17]
tree$tip.label <-  c("S. undulatus(GA)","S. undulatus(OH)","S. undulatus(AL)","S. undulatus(NJ)","S. undulatus(PA)","S. undulatus(SC)","S. woodi","S. undulatus(AZ)","S. undulatus(UT)","S. undulatus(Huerfano.CO)","S. undulatus(Mesa.CO)","S. undulatus(NE)","S. undulatus(TX)","S. undulatus(Grant.NM)","S. undulatus(Hidalgo.NM)","S. virgatus","S. occidentalis")

tree$tip.label <-  c("GA","OH","AL","NJ","PA","SC","S. woodi","AZ","UT","Huerfano.CO","Mesa.CO","NE","TX","Grant.NM","Hidalgo.NM","S. virgatus","S. occidentalis")

plot(tree)
vcv(tree)
library(ggtree)
ggtree(tree, branch.length="none")+ geom_tiplab()+xlim(0, 15)

#traitset<-read.table("C:/Users/User/Dropbox/ChiYoWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")
#traitset<-read.table("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")

traitset<-read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")

dim(traitset)
head(traitset)
traitset<-as.data.frame(traitset)
colnames(traitset)<-c("size at maturity(mm)", "average size(mm)", "age at maturity(mo)", "egg mass(g)", "clutch size", "clutch mass(g)", "eggs per year")

traitset$`eggs per year`<-round(traitset$`eggs per year`)
traitset<-as.data.frame(traitset)
traitset$`egg mass(g)`
traitset$`eggs per year`
#install.packages("ggplot2")
library(ggplot2)
library(tidyverse)
f <- function(x){
  3.397*exp(-1.179 *x)
}
ggplot(traitset,aes(x=`egg mass(g)`,y=`eggs per year`)) + geom_point()+ggtitle('Lizard Dataset')+theme_bw()

library(xtable)
xtable(traitset)

poi<-glm(traitset$`eggs per year`~traitset$`egg mass(g)`,family = "poisson")
poi
summary(poi)
library(MASS)

nb2<-glm(traitset$`eggs per year`~traitset$`egg mass(g)`,family = negative.binomial(theta = 10))
nb2
summary(nb2)


#1 size at maturity,
#2 average size 
#3 age at maturity, 
#4 egg mass,    
#5 clutch size  
#6 clutch mass  
#7 eggs per year

eggs_per_year<-traitset$`eggs per year`
mean(eggs_per_year)
var(eggs_per_year)

#eggs_mass<-traitset$`egg mass(g)`
eggs_mass<-log(traitset$`egg mass(g)`)
glm(eggs_per_year~eggs_mass,family="poisson")

data <- data.frame(eggs_mass=eggs_mass, eggs_per_year=eggs_per_year)
data




library(MASS)
#fitLM <- glm(eggs_per_year ~ eggs_mass, data, family = gaussian(link = "identity"))
fitPOIS <- glm(eggs_per_year ~ eggs_mass, data, family = poisson(link = "log"))


# do bootstrap sample and refit the data to compute the CI and then check with theoretical 
summary(fitPOIS)

fitPOIS$coefficients

lambda.vec= exp(fitPOIS$coefficients[1]+ fitPOIS$coefficients[2]*eggs_mass)
lambda.vec
sim.poi.coef<-array(NA,c(2,1000))
for(i in 1:1000){
  sim.eggs_per_year<-rpois(length(lambda.vec),lambda = lambda.vec)
  sim.eggs_per_year
  sim.fitPOIS <- glm(sim.eggs_per_year ~ eggs_mass, data, family = poisson(link = "log"))
  sim.poi.coef[,i]<-sim.fitPOIS$coefficients
}
rownames(sim.poi.coef)<-c("b0","b1")

# look nice, the bootstrap results look goods for poisson regression. 
# the standard deviation is closed to the theoretical one
boot.poi.mean<-apply(sim.poi.coef,1,mean)
boot.poi.sd<-apply(sim.poi.coef,1,sd)
boot.poi.mean
boot.poi.sd
sumfitpois<-summary(fitPOIS)
sumfitpois$coefficients

###########################################################
fitNB2 <-glm.nb(eggs_per_year ~ eggs_mass, data = data)
sim.nb2.coef<-array(NA,c(2,1000))
#?rnegbin
for(i in 1:1000){
  sim.eggs_per_year<-  rnegbin(length(eggs_mass), mu = fitted(fitNB2), theta = fitNB2$theta)   
  sim.eggs_per_year
  sim.nb2 <- glm.nb(sim.eggs_per_year ~ eggs_mass, data = data)
  sim.nb2.coef[,i]<-sim.nb2$coefficients
}
rownames(sim.nb2.coef)<-c("b0","b1")

# look nice, the bootstrap results look goods for poisson regression. 
# the standard deviation is closed to the theoretical one
boot.nb2.mean<-apply(sim.nb2.coef,1,mean)
boot.nb2.sd<-apply(sim.nb2.coef,1,sd)
boot.nb2.mean
boot.nb2.sd
sumfitnb2<-summary(fitNB2)
sumfitnb2$coefficients

#library(xtable)
#install.packages("qpcR")
library(qpcR)
# results<-AIC(fitLM, fitPOIS,fitNB2)
# awe<-akaike.weights(results$AIC)
# results$deltaAIC<-awe$deltaAIC
# results$weights<-awe$weights
# xtable(results)

results<-AIC(fitPOIS,fitNB2)
awe<-akaike.weights(results$AIC)
results$deltaAIC<-awe$deltaAIC
results$weights<-awe$weights
xtable(results)


plot(eggs_per_year ~ eggs_mass)

newdat <- data.frame(eggs_mass = seq(min(eggs_mass),max(eggs_mass),0.01))
#newdat$predLM <- predict(fitLM, newdata = newdat, type = "response")
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")

plot(eggs_per_year ~ eggs_mass, data,main="Lizard Trait",xlab="eggs mass", ylab="eggs per year")
#lines(predLM ~ eggs_mass, newdat, col = 2)
lines(predPOIS ~ eggs_mass, newdat, col = "red")
lines(predNB2 ~ eggs_mass, newdat, col = "blue")





############################################
############# phypoi #######################
############################################

### Poisson
ordsamplep.poi<-function (n, lambda, Sigma){
  #  n=sims
  #  lambda=lambda.array
  #  Sigma=C
  k <- length(lambda)
  #  n<-2
  valori <- mvrnorm(n, rep(0, k), Sigma)
  for (i in 1:k)
  {
    #    i<-1
    valori[, i] <- qpois(pnorm(valori[,i]), lambda[i])
  }
  return(valori)
}

phyfitPOIS<-compar.gee(eggs_per_year ~ eggs_mass,phy=tree,family="poisson",scale.fix = TRUE)
sim.phypoi.coef<-array(NA,c(2,1000))
sim.eggs_per_year.full<-  ordsamplep.poi(n=1000, lambda=fitted(phyfitPOIS), Sigma=vcv(tree))
for(i in 1:1000){
  #  i<-1
  sim.eggs_per_year<-sim.eggs_per_year.full[i,]
  sim.phyfitPOIS <- compar.gee(sim.eggs_per_year ~ eggs_mass,phy=tree,family="poisson",scale.fix = TRUE)
  sim.phypoi.coef[,i]<-sim.phyfitPOIS$coefficients
}
rownames(sim.phypoi.coef)<-c("b0","b1")

boot.phypoi.mean<-apply(sim.phypoi.coef,1,mean)
boot.phypoi.sd<-apply(sim.phypoi.coef,1,sd)

phyfitPOIS

#phypoi also work too nice


curve(exp(phyfitPOIS$coefficients[1]+phyfitPOIS$coefficients[2]*x),range(eggs_mass),add=TRUE,col="orange",lwd=2)


#####################################################################################
# QIC for GEE models
# Daniel J. Hocking
# 07 February 2012
# Refs:
# Pan (2001)
# Liang and Zeger (1986)
# Zeger and Liang (1986)
# Hardin and Hilbe (2003)
# Dornmann et al 2007
# # http://www.unc.edu/courses/2010spring/ecol/562/001/docs/lectures/lecture14.htm
#####################################################################################
# Poisson QIC for geese{geepack} output
# Ref: Pan (2001)
#QIC.pois.geese <- function(model.R, model.indep) {  
QIC.pois.geese <- function(model.R){
  library(MASS)
  # Fitted and observed values for quasi likelihood
  mu.R <- model.R$fitted.values
  # alt: X <- model.matrix(model.R)
  #  names(model.R$coefficients) <- NULL
  #  beta.R <- model.R$coefficients
  #  mu.R <- exp(X %*% beta.R)
  y <- model.R$y
  # Quasi Likelihood for Poisson
  quasi.R <- sum((y*log(mu.R)) - mu.R) # poisson()$dev.resids - scale and weights = 1
  
  # Trace Term (penalty for model complexity)
  #  AIinverse <- ginv(model.Indep$vbeta.naiv) # Omega-hat(I) via Moore-Penrose generalized inverse of a matrix in MASS package
  # Alt: AIinverse <- solve(model.Indep$vbeta.naiv) # solve via identity
  #  Vr <- model.R$vbeta
  #  trace.R <- sum(diag(AIinverse %*% Vr))
  px <- length(mu.R) # number non-redunant columns in design matrix
  #  QIC
  #  QIC <- (-2)*quasi.R + 2*trace.R
  QICu <- (-2)*quasi.R + 2*px    # Approximation assuming model structured correctly 
  #  output <- c(QIC, QICu, quasi.R, trace.R, px)
  #  names(output) <- c('QIC', 'QICu', 'Quasi Lik', 'Trace', 'px')
  #  output
  return(QICu)
}

model.R.phypoi<-NULL
model.R.phypoi$fitted.values<- exp(phyfitPOIS$coefficients[1]+phyfitPOIS$coefficients[2]*eggs_mass)
model.R.phypoi$y<-eggs_per_year
QIC.pois.geese.phy<-QIC.pois.geese(model.R.phypoi)

iidpoi<-geeglm(eggs_per_year ~ eggs_mass, id = 1:length(eggs_mass), data = NULL,
               family = poisson, corstr = "independence", scale.fix = TRUE)
QIC.pois.geese.iid<-geepack::QIC(iidpoi)

QIC.pois.geese.iid
QIC.pois.geese.phy

############################################
############# phynbs #######################
############################################

Mu<-function(params,r=r,X=X){
  b0<-params["b0"]
  b1<-params["b1"]
  mu.vec<-r*exp(b0+b1*X)/(1-exp(b0+b1*X)) 
  return(mu.vec)
}


gee_nb2_equation<-function(params,C=C,X=X,Y=Y,r=r){
  b0<-params["b0"]
  b1<-params["b1"]
  C<-C/max(C)
  mu<-Mu(params,r=r,X=X)
  A<-diag((mu + mu^2/r))
  sqrtA<-sqrt(A)
  eqn<- rbind(mu,mu*X) %*%solve(sqrtA%*%C%*%sqrtA)%*%(Y-mu)
  return(eqn)
}

C<-vcv(tree)
params<-phyfitPOIS$coefficients
names(params) <- c("b0","b1")
X=eggs_mass
Y=eggs_per_year
r=mean(Y)
library(MASS)
library(nleqslv)
nb2est<-nleqslv(params,gee_nb2_equation,C=C,X=X,Y=Y,r=r)
nb2est$x
curve(r*exp(nb2est$x["b0"]+nb2est$x["b1"]*x)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*x)),range(X),add=TRUE,col="black",lwd=2)

legend("topleft", legend = c("poi","nb2","phypoi","phynb2"), col = c("red","blue","orange","black"), lty = 1)
#legend("topleft", legend = c("lm", "poisson","nb2"), col = c(2,4,3), lty = 1)


#####################################################################################
# QIC for GEE models
# modify from Daniel J. Hocking
# 07 February 2012
# Refs:0
# Pan (2001)
# Liang and Zeger (1986)
# Zeger and Liang (1986)
# Hardin and Hilbe (2003)
# Dornmann et al 2007
# # http://www.unc.edu/courses/2010spring/ecol/562/001/docs/lectures/lecture14.htm
# # https://www.r-bloggers.com/2012/03/r-script-to-calculate-qic-for-generalized-estimating-equation-gee-model-selection/
#####################################################################################
# Poisson QIC for geese{geepack} output
# Ref: Pan (2001)


QIC.nb2.geese <- function(model.R) {
  library(MASS)
  # Fitted and observed values for quasi likelihood
  mu.R <- model.R$fitted.values
  # alt: X <- model.matrix(model.R)
  #  names(model.R$coefficients) <- NULL
  #  beta.R <- model.R$coefficients
  #  mu.R <- exp(X %*% beta.R)
  y <- model.R$y
  # Quasi Likelihood for Poisson
  quasi.R <- sum(y*(log(mu.R)-2*log(mu.R+1))) # nb2()$dev.resids - scale and weights = 1
  
  #  quasi.R <- sum((y*log(mu.R)) - mu.R) # poisson()$dev.resids - scale and weights = 1
  
  # Trace Term (penalty for model complexity)
  #  AIinverse <- ginv(model.Indep$vbeta.naiv) # Omega-hat(I) via Moore-Penrose generalized inverse of a matrix in MASS package
  # Alt: AIinverse <- solve(model.Indep$vbeta.naiv) # solve via identity
  #  Vr <- model.R$vbeta
  #  trace.R <- sum(diag(AIinverse %*% Vr))
  px <- length(mu.R) # number non-redunant columns in design matrix
  # QIC
  #  QIC <- (-2)*quasi.R + 2*trace.R
  QICu <- (-2)*quasi.R + 2*px    # Approximation assuming model structured correctly 
  #output <- c(QIC, QICu, quasi.R, trace.R, px)
  #names(output) <- c('QIC', 'QICu', 'Quasi Lik', 'Trace', 'px')
  #return(output)
  return(-0.5*QICu)
}

model.R.phynb2<-NULL
#model.R$fitted.values<- r*exp(nb2est$x["b0"]+nb2est$x["b1"]*X)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*X))
model.R.phynb2$fitted.values<- r*exp(nb2est$x["b0"]+nb2est$x["b1"]*X)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*X))
model.R.phynb2$y<-Y

QIC.nb2.geese(model.R.phynb2)

#########################################

#compar.gee.nb2(params,C=C,X=X,Y=Y,r=r)
ordsamplep.nb2<-function (n,r, mu, Sigma){
  k <- length(mu)
  valori <- mvrnorm(n, rep(0, k), Sigma)
  dim(valori)
  for (i in 1:k)
  {
    #    i<-1
    valori[,i] <- qnbinom(p=pnorm(valori[,i]), size=r, prob=mu[i]/(mu[i]+r)) # mu = n(1-p)/p
    #  qnbinom
  }
  return(valori)
}

fitnb <-summary(glm.nb(eggs_per_year ~ eggs_mass)) # glm  find theta
fitnb$coefficients
fitnb$coefficients[,"Estimate"]
theta <-   fitnb$theta
phyfitNB2<-compar.gee.nb2(eggs_per_year ~ eggs_mass,phy=tree,theta=theta)
fitted(phyfitNB2)
sim.phyNB2.coef<-array(NA,c(2,1000))
#sim.eggs_per_year.full<- ordsamplep.nb2(n=1000,r=mean(eggs_per_year), mu=fitted(phyfitNB2), Sigma=vcv(tree))
fit.neg<-fitdistr(eggs_per_year,"Negative binomial")
#sim.eggs_per_year.full<- ordsamplep.nb2(n=1000,r=fit.neg$estimate["size"], mu=fitted(phyfitNB2), Sigma=vcv(tree))
# install.packages("devtools")
# library(devtools)
# install_github("zdk123/SpiecEasi")
# library(SpiecEasi)
# 
# install.packages("remotes")
# remotes::install_github("zdk123/SpiecEasi")

#install.packages("SimMultiCorrData")
library(SimMultiCorrData)
rcorrvar2(n = 1000, means=fitted(phyfitNB2),Sigma=vcv(tree))
#?rcorrvar2


.zinegbin_getLam <- function(mu, S) {
  S   <- max(sqrt(mu)+1e-3, S)
  (mu + (mu^2 - mu + S^2) / mu) / 2
}

#' @noRd
#' @keywords internal
.zinegbin_getP <- function(mu, lam) {
  1 - (mu / lam)
}

#' @noRd
#' @keywords internal
.zinegbin_getK <- function(mu, S, lam) {
  S   <- max(sqrt(mu)+1e-3, S)
  (mu * lam) / (mu^2 - (mu * (lam + 1)) + S^2)
}

#' @keywords internal
.fixInf <- function(data) {
  # hacky way of replacing infinite values with the col max + 1
  if (any(is.infinite(data))) {
    data <-  apply(data, 2, function(x) {
      if (any(is.infinite(x))) {
        x[ind<-which(is.infinite(x))] <- NA
        x[ind] <- max(x, na.rm=TRUE)+1
      }
      x
    })
  }
  data
}

rmvzinegbin <- function(n, mu, Sigma, munbs, ks, ps, ...) {
  # Generate an NxD matrix of Zero-inflated poisson data,
  # with counts approximately correlated according to Sigma
  Cor <- cov2cor(Sigma)
  SDs <- sqrt(diag(Sigma))
  if (missing(munbs) || missing(ps) || missing(ks)) {
    if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
    munbs <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getLam(mu[i], SDs[i])))
    ps   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getP(mu[i], munbs[i])))
    ks   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getK(mu[i], SDs[i], munbs[i])))
  }
  if (length(munbs) != length(SDs)) stop("Sigma and mu dimensions don't match")
  d   <- length(munbs)
  normd  <- rmvnorm(n, rep(0, d), sigma=Cor)
  unif   <- pnorm(normd)
  data <- t(matrix(VGAM::qzinegbin(t(unif), munb=munbs, size=ks, pstr0=ps, ...), d, n))
  data <- .fixInf(data)
  return(data)
}

library(MASS)
library(mvtnorm)
sim.eggs_per_year.full<-rmvzinegbin(n=1000, mu=fitted(phyfitNB2), Sigma=vcv(tree))

for(i in 1:1000){
  print(i)
#  i<-4
  
  sim.eggs_per_year<-sim.eggs_per_year.full[i,]
  sim.eggs_per_year
  # par(mfrow=c(1,2))
  # plot(eggs_per_year~eggs_mass)
  # plot(sim.eggs_per_year~eggs_mass)
  # 
  sim.phyfitNB2 <- compar.gee.nb2(sim.eggs_per_year ~ eggs_mass,phy=tree,theta=theta)
  sim.phyfitNB2
  sim.phyNB2.coef[,i]<-sim.phyfitNB2$coefficients
}
rownames(sim.phyNB2.coef)<-c("b0","b1")


boot.phynb2.mean<-apply(sim.phyNB2.coef,1,mean)
boot.phynb2.sd<-apply(sim.phyNB2.coef,1,sd)

phyfitNB2
phyfitPOIS$coefficients
fitPOIS$coefficients
fitNB2$coefficients

apply(sim.phypoi.coef,1,sd)


system("pwd")
save.image("lizardyoboot2.rda")

output<-t(rbind(fitPOIS$coefficients,
                fitNB2$coefficients,
                phyfitPOIS$coefficients,
                phyfitNB2$coefficients))
xtable(output)
rm(list=ls())
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
## Loading required package: Matrix
library(gee)
library(ggtree)
## ggtree v3.8.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
## 
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following object is masked from 'package:ape':
## 
##     rotate
load("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/lizardyoboot.rda")


boot.poi.mean<-round(boot.poi.mean,3)
boot.poi.sd<-round(boot.poi.sd,3)

boot.nb2.mean<-round(boot.nb2.mean,3)
boot.nb2.sd<-round(boot.nb2.sd,3)

boot.phypoi.mean<-round(boot.phypoi.mean,3)
boot.phypoi.sd<-round(boot.phypoi.sd,3)

boot.phynb2.mean<-round(boot.phynb2.mean,3)
boot.phynb2.sd<-round(boot.phynb2.sd,3)


out.meansd.array<-array(NA,c(2,4))

#poi
out.meansd.array[1,1]<-paste(boot.poi.mean[1],"(",boot.poi.sd[1],")",sep="")
out.meansd.array[2,1]<-paste(boot.poi.mean[2],"(",boot.poi.sd[2],")",sep="")

#nb2
out.meansd.array[1,2]<-paste(boot.nb2.mean[1],"(",boot.nb2.sd[1],")",sep="")
out.meansd.array[2,2]<-paste(boot.nb2.mean[2],"(",boot.nb2.sd[2],")",sep="")

#phypoi
out.meansd.array[1,3]<-paste(boot.phypoi.mean[1],"(",boot.phypoi.sd[1],")",sep="")
out.meansd.array[2,3]<-paste(boot.phypoi.mean[2],"(",boot.phypoi.sd[2],")",sep="")

#phynb2
out.meansd.array[1,4]<-paste(boot.phynb2.mean[1],"(",boot.phynb2.sd[1],")",sep="")
out.meansd.array[2,4]<-paste(boot.phynb2.mean[2],"(",boot.phynb2.sd[2],")",sep="")

#install.packages("xtable")
library(xtable)
xtable(out.meansd.array)
## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Sun Jul  9 14:40:28 2023
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllll}
##   \hline
##  & 1 & 2 & 3 & 4 \\ 
##   \hline
## 1 & 3.397(0.237) & 3.411(0.344) & 3.88(0.172) & 3.753(0.176) \\ 
##   2 & -1.188(0.735) & -1.258(1.065) & -3.302(0.566) & -2.831(0.569) \\ 
##    \hline
## \end{tabular}
## \end{table}
boot.nb2.mean
##     b0     b1 
##  3.411 -1.258
boot.nb2.sd
##    b0    b1 
## 0.344 1.065
boot.phypoi.mean
##     b0     b1 
##  3.880 -3.302
boot.phypoi.sd
##    b0    b1 
## 0.172 0.566
boot.phynb2.mean
##     b0     b1 
##  3.753 -2.831
boot.phynb2.sd
##    b0    b1 
## 0.176 0.569
summary(fitPOIS)
## 
## Call:
## glm(formula = eggs_per_year ~ eggs_mass, family = poisson(link = "log"), 
##     data = data)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.3968     0.2334  14.553   <2e-16 ***
## eggs_mass    -1.1792     0.7276  -1.621    0.105    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 42.962  on 16  degrees of freedom
## Residual deviance: 40.280  on 15  degrees of freedom
## AIC: 126.01
## 
## Number of Fisher Scoring iterations: 4
summary(fitNB2)
## 
## Call:
## glm.nb(formula = eggs_per_year ~ eggs_mass, data = data, init.theta = 14.97916062, 
##     link = log)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.4256     0.3583   9.560   <2e-16 ***
## eggs_mass    -1.2707     1.1045  -1.151     0.25    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(14.9792) family taken to be 1)
## 
##     Null deviance: 18.319  on 16  degrees of freedom
## Residual deviance: 17.088  on 15  degrees of freedom
## AIC: 119.28
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  14.98 
##           Std. Err.:  8.89 
## 
##  2 x log-likelihood:  -113.278
phyfitPOIS
## Call: compar.gee(formula = eggs_per_year ~ eggs_mass, family = "poisson", 
##     phy = tree, scale.fix = TRUE)
## Number of observations:  17 
## Model:
##                       Link: log 
##  Variance to Mean Relation: poisson 
## 
## QIC: -1393.149 
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -12.829250   3.055108   4.414483   6.225469  17.902768 
## 
## 
## Coefficients:
##              Estimate      S.E.         t  Pr(T > |t|)
## (Intercept)  3.877618 0.1661241 23.341700 4.529787e-06
## eggs_mass   -3.259023 0.5492056 -5.934068 2.335199e-03
## 
## Estimated Scale Parameter:  1
## "Phylogenetic" df (dfP):  6.733333
phyfitNB2
## geem(formula = eggs_per_year ~ eggs_mass, id = c(1, 1, 1, 1, 
## 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), data = <environment>, 
##     family = list(family = "Negative Binomial(14.9792)", link = "log", 
##         linkfun = function (mu) 
##         log(mu), linkinv = function (eta) 
##         pmax(exp(eta), .Machine$double.eps), variance = function (mu) 
##         mu + mu^2/.Theta, dev.resids = function (y, mu, wt) 
##         2 * wt * (y * log(pmax(1, y)/mu) - (y + .Theta) * log((y + 
##             .Theta)/(mu + .Theta))), aic = function (y, n, mu, 
##             wt, dev) 
##         {
##             term <- (y + .Theta) * log(mu + .Theta) - y * log(mu) + 
##                 lgamma(y + 1) - .Theta * log(.Theta) + lgamma(.Theta) - 
##                 lgamma(.Theta + y)
##             2 * sum(term * wt)
##         }, mu.eta = function (eta) 
##         pmax(exp(eta), .Machine$double.eps), initialize = expression(
##             {
##                 if (any(y < 0)) 
##                   stop("negative values not allowed for the negative binomial family")
##                 n <- rep(1, nobs)
##                 mustart <- y + (y == 0)/6
##             }), validmu = function (mu) 
##         all(mu > 0), valideta = function (eta) 
##         TRUE, simulate = function (object, nsim) 
##         {
##             ftd <- fitted(object)
##             rnegbin(nsim * length(ftd), ftd, .Theta)
##         }), corstr = "fixed", corr.mat = c(1, 0.933333333, 0.866666667, 
##     0.666666667, 0.666666667, 0.666666667, 0.6, 0.4, 0.4, 0.4, 
##     0.4, 0.266666667, 0.266666667, 0.133333333, 0.133333333, 
##     0.066666667, 0, 0.933333333, 1, 0.866666667, 0.666666667, 
##     0.666666667, 0.666666667, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.866666667, 
##     0.866666667, 1, 0.666666667, 0.666666667, 0.666666667, 0.6, 
##     0.4, 0.4, 0.4, 0.4, 0.266666667, 0.266666667, 0.133333333, 
##     0.133333333, 0.066666667, 0, 0.666666667, 0.666666667, 0.666666667, 
##     1, 0.933333333, 0.866666667, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.666666667, 
##     0.666666667, 0.666666667, 0.933333333, 1, 0.866666667, 0.6, 
##     0.4, 0.4, 0.4, 0.4, 0.266666667, 0.266666667, 0.133333333, 
##     0.133333333, 0.066666667, 0, 0.666666667, 0.666666667, 0.666666667, 
##     0.866666667, 0.866666667, 1, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.6, 
##     0.6, 0.6, 0.6, 0.6, 0.6, 1, 0.4, 0.4, 0.4, 0.4, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4, 
##     0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 1, 0.933333333, 0.8, 0.8, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4, 
##     0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.933333333, 1, 0.8, 0.8, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4, 
##     0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.8, 0.8, 1, 0.933333333, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4, 
##     0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.8, 0.8, 0.933333333, 1, 0.266666667, 
##     0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.266666667, 
##     0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667, 
##     0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667, 
##     1, 0.933333333, 0.133333333, 0.133333333, 0.066666667, 0, 
##     0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667, 
##     0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667, 
##     0.266666667, 0.933333333, 1, 0.133333333, 0.133333333, 0.066666667, 
##     0, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 
##     0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 
##     0.133333333, 0.133333333, 0.133333333, 1, 0.933333333, 0.066666667, 
##     0, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 
##     0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 
##     0.133333333, 0.133333333, 0.133333333, 0.933333333, 1, 0.066666667, 
##     0, 0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667, 
##     0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667, 
##     0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667, 
##     1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), 
##     maxit = 25, tol = 1e-04)
## 
##  Coefficients: 
##  (Intercept) eggs_mass
##        3.747    -2.784
## 
##  Scale Parameter:  1.566 
## 
##  Correlation Model:  fixed
##  Working Correlation[1:4,1:4]: 
##        GA     OH     AL     NJ
## GA 1.0000 0.9333 0.8667 0.6667
## OH 0.9333 1.0000 0.8667 0.6667
## AL 0.8667 0.8667 1.0000 0.6667
## NJ 0.6667 0.6667 0.6667 1.0000
## 
##  Number of clusters:  1   Maximum cluster size:  17 
##  Number of observations with nonzero weight:  17
# Now resampling


# ordsamplep.nb2(n=sims,r=r,mu=mu,Sigma=C)


# bootstrp normal

X<-rnorm(100,mean=3,sd=2)

m.X<-mean(X)
sd.X<-sd(X)
bt.X<-apply(replicate(1000,rnorm(100,mean=m.X,sd=sd.X) ),2,mean)

c(mean(X) - 1.96*sd(X)/10,mean(X) + 1.96*sd(X)/10)
## [1] 2.766937 3.515809
quantile(bt.X,probs = c(0.025,0.975))
##     2.5%    97.5% 
## 2.759188 3.517992
## plot
newdat <- data.frame(eggs_mass = sort(eggs_mass))
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")

newdat$phypredPOIS<- predict(phyfitPOIS, newdata = newdat, type = "response")
newdat$phypredNB2<-c(sort(fitted(phyfitNB2),decreasing = T))# predict(phyfitNB2, newdata = newdat, type = "response")

plot(eggs_per_year ~ eggs_mass, data,main="Lizard Trait",xlab="eggs mass", ylab="eggs per year")
#lines(predLM ~ eggs_mass, newdat, col = 2)
lines(predPOIS ~ eggs_mass, newdat, col = "red")
lines(predNB2 ~ eggs_mass, newdat, col = "blue")
lines(phypredPOIS ~ eggs_mass, newdat, col = "blue")
lines(phypredNB2 ~ eggs_mass, newdat, col = "blue")

## plot
newdat <- data.frame(eggs_mass = sort(eggs_mass))
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")

newdat$phypredPOIS <- predict(phyfitPOIS, newdata = newdat, type = "response")
newdat$phypredNB2 <- c(sort(fitted(phyfitNB2), decreasing = T))

# Create plot
plot(eggs_per_year ~ eggs_mass, data, main="Lizard Trait", xlab="Eggs Mass", ylab="Eggs Per Year")

# Add lines for each model with different colors, line types and widths
lines(predPOIS ~ eggs_mass, newdat, col = "red", lty = 1, lwd = 2)
lines(predNB2 ~ eggs_mass, newdat, col = "blue", lty = 2, lwd = 2)
lines(phypredPOIS ~ eggs_mass, newdat, col = "black", lty = 3, lwd = 2)
lines(phypredNB2 ~ eggs_mass, newdat, col = "green", lty = 4, lwd = 2)

# Add legend
legend("topright", legend=c("Poi", "Negative Binomial", "Phylogenetic Poisson", "Phylogenetic Negative Binomial"),
       col=c("red", "blue", "black", "green"), lty = 1:4, lwd = 2, cex = 0.8)

# Load ggplot2
library(ggplot2)

# Create base plot with white theme
p <- ggplot(data, aes(x=eggs_mass, y=eggs_per_year)) +
  geom_point(size = 3) + # Make points larger
  labs(x="Eggs Mass", y="Eggs Per Year") +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5, size = 20),  # Larger font for title
        axis.title.x = element_text(size = 16),             # Larger font for x-axis label
        axis.title.y = element_text(size = 16),             # Larger font for y-axis label
        legend.title = element_text(size = 14),             # Larger font for legend title
        legend.text = element_text(size = 12))              # Larger font for legend text

# Add title
p <- p + ggtitle("Lizard Trait")

# Add lines for each model with larger line width
p <- p + 
  geom_line(data=newdat, aes(y=predPOIS, color="Poisson"), size = 1.5) +
  geom_line(data=newdat, aes(y=predNB2, color="Negative Binomial"), size = 1.5) +
  geom_line(data=newdat, aes(y=phypredPOIS, color="Phylogenetic Poisson"), size = 1.5) +
  geom_line(data=newdat, aes(y=phypredNB2, color="Phylogenetic Negative Binomial"), size = 1.5)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Add legend at the bottom
p <- p + scale_color_manual(name="Model", 
                            values=c("Poisson"="red", "Negative Binomial"="blue", 
                                     "Phylogenetic Poisson"="black", "Phylogenetic Negative Binomial"="green")) +
  theme(legend.position="bottom")

# Show the plot
print(p)

library(ggtree)
library(ape)
#tree<-read.tree("~/Dropbox/ChiYoWu/rcodeCYW/mammal2.nwk")
tree<- read.tree("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/mammal2.nwk")


#ggtree(tree)+theme_tree()+geom_text(aes(label=label),size=.5)
plot(tree,cex = .5)

mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
dim(mtx)
## [1] 17 17
mtx<-as.matrix(mtx)

mtx<-mtx +t(mtx)
diag(mtx)<-1

library(phangorn)
D<-  2*(max(mtx)- mtx)
tree<-upgma(D)
plot(tree)

#tree$tip.label<-LETTERS[1:17]
tree$tip.label <-  c("S. undulatus(GA)","S. undulatus(OH)","S. undulatus(AL)","S. undulatus(NJ)","S. undulatus(PA)","S. undulatus(SC)","S. woodi","S. undulatus(AZ)","S. undulatus(UT)","S. undulatus(Huerfano.CO)","S. undulatus(Mesa.CO)","S. undulatus(NE)","S. undulatus(TX)","S. undulatus(Grant.NM)","S. undulatus(Hidalgo.NM)","S. virgatus","S. occidentalis")

tree$tip.label <-  c("GA","OH","AL","NJ","PA","SC","S. woodi","AZ","UT","Huerfano.CO","Mesa.CO","NE","TX","Grant.NM","Hidalgo.NM","S. virgatus","S. occidentalis")

plot(tree)

vcv(tree)
##                         GA         OH         AL         NJ         PA
## GA              1.00000000 0.93333333 0.86666667 0.66666667 0.66666667
## OH              0.93333333 1.00000000 0.86666667 0.66666667 0.66666667
## AL              0.86666667 0.86666667 1.00000000 0.66666667 0.66666667
## NJ              0.66666667 0.66666667 0.66666667 1.00000000 0.93333333
## PA              0.66666667 0.66666667 0.66666667 0.93333333 1.00000000
## SC              0.66666667 0.66666667 0.66666667 0.86666667 0.86666667
## S. woodi        0.60000000 0.60000000 0.60000000 0.60000000 0.60000000
## AZ              0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## UT              0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## Huerfano.CO     0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## Mesa.CO         0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## NE              0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## TX              0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## Grant.NM        0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## Hidalgo.NM      0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## S. virgatus     0.06666667 0.06666667 0.06666667 0.06666667 0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##                         SC   S. woodi         AZ         UT Huerfano.CO
## GA              0.66666667 0.60000000 0.40000000 0.40000000  0.40000000
## OH              0.66666667 0.60000000 0.40000000 0.40000000  0.40000000
## AL              0.66666667 0.60000000 0.40000000 0.40000000  0.40000000
## NJ              0.86666667 0.60000000 0.40000000 0.40000000  0.40000000
## PA              0.86666667 0.60000000 0.40000000 0.40000000  0.40000000
## SC              1.00000000 0.60000000 0.40000000 0.40000000  0.40000000
## S. woodi        0.60000000 1.00000000 0.40000000 0.40000000  0.40000000
## AZ              0.40000000 0.40000000 1.00000000 0.93333333  0.80000000
## UT              0.40000000 0.40000000 0.93333333 1.00000000  0.80000000
## Huerfano.CO     0.40000000 0.40000000 0.80000000 0.80000000  1.00000000
## Mesa.CO         0.40000000 0.40000000 0.80000000 0.80000000  0.93333333
## NE              0.26666667 0.26666667 0.26666667 0.26666667  0.26666667
## TX              0.26666667 0.26666667 0.26666667 0.26666667  0.26666667
## Grant.NM        0.13333333 0.13333333 0.13333333 0.13333333  0.13333333
## Hidalgo.NM      0.13333333 0.13333333 0.13333333 0.13333333  0.13333333
## S. virgatus     0.06666667 0.06666667 0.06666667 0.06666667  0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000  0.00000000
##                    Mesa.CO         NE         TX   Grant.NM Hidalgo.NM
## GA              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## OH              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## AL              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## NJ              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## PA              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## SC              0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## S. woodi        0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## AZ              0.80000000 0.26666667 0.26666667 0.13333333 0.13333333
## UT              0.80000000 0.26666667 0.26666667 0.13333333 0.13333333
## Huerfano.CO     0.93333333 0.26666667 0.26666667 0.13333333 0.13333333
## Mesa.CO         1.00000000 0.26666667 0.26666667 0.13333333 0.13333333
## NE              0.26666667 1.00000000 0.93333333 0.13333333 0.13333333
## TX              0.26666667 0.93333333 1.00000000 0.13333333 0.13333333
## Grant.NM        0.13333333 0.13333333 0.13333333 1.00000000 0.93333333
## Hidalgo.NM      0.13333333 0.13333333 0.13333333 0.93333333 1.00000000
## S. virgatus     0.06666667 0.06666667 0.06666667 0.06666667 0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
##                 S. virgatus S. occidentalis
## GA               0.06666667               0
## OH               0.06666667               0
## AL               0.06666667               0
## NJ               0.06666667               0
## PA               0.06666667               0
## SC               0.06666667               0
## S. woodi         0.06666667               0
## AZ               0.06666667               0
## UT               0.06666667               0
## Huerfano.CO      0.06666667               0
## Mesa.CO          0.06666667               0
## NE               0.06666667               0
## TX               0.06666667               0
## Grant.NM         0.06666667               0
## Hidalgo.NM       0.06666667               0
## S. virgatus      1.00000000               0
## S. occidentalis  0.00000000               1
library(ggtree)
ggtree(tree, branch.length="none")+ geom_tiplab()+xlim(0, 15)