rm(list=ls())

setwd("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/")

#how to read and plot tree
#http://www.phytools.org/Cordoba2017/ex/2/Intro-to-phylogenies.html

#source("nb2regsim.R")
#install.packages(c("phytools","ape","ggplot2","geepack","geeM","gee"))

#install.packages("ggtree")

library(phytools)
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
library(gee)

compar.gee.nb2<-function (formula, data = NULL, phy, corStruct,scale.fix = FALSE, scale.value = 1,theta=theta){
  #family =  negative.binomial(2)
  if (requireNamespace("gee", quietly = TRUE)) 
    gee <- gee::gee
  else stop("package 'gee' not available")
  if (!missing(corStruct)) {
    if (!missing(phy)) 
      warning("the phylogeny was ignored because you gave a 'corStruct' object")
    R <- vcv(corStruct, corr = TRUE)
  }
  else {
    R <- vcv(phy, corr = TRUE)
  }
  if (is.null(data)) 
    data <- parent.frame()
  else {
    nmsR <- rownames(R)
    if (!identical(rownames(data), nmsR)) {
      if (!any(is.na(match(rownames(data), nmsR)))) 
        data <- data[nmsR, ]
      else {
        msg <- if (missing(corStruct)) 
          "the tip labels of the tree"
        else "those of the correlation structure"
        msg <- paste("the rownames of the data.frame and", 
                     msg, "do not match: the former were ignored in the analysis")
        warning(msg)
      }
    }
  }
  effect.assign <- attr(model.matrix(formula, data = data), "assign")
  for (i in all.vars(formula)) {
    if (any(is.na(eval(parse(text = i), envir = data)))) 
      stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
  }
  id <- rep(1, dim(R)[1])
  ###############################
  #  negative.binomial(2)
  # HERE WE HAVE NEGATIVE BINOMIAL
  geemod<-do.call("geem", list(formula, id, data = data, family = MASS::negative.binomial(theta= theta),  corstr = "fixed", corr.mat=R, tol=1e-4,maxit=25))
  geemod$coefficients<-geemod$beta
  return(geemod)
}

ordsamplep.nb2<-function (n,size, mu, Sigma){
  # n=sims 
  # size=theta 
  # mu=mu 
  # Sigma=C
  k <- length(mu)
  valori <- mvrnorm(n, rep(0, k), Sigma)
  for (i in 1:k)
  {
    valori[, i] <- qnbinom(p=pnorm(valori[,i]), size=size,mu=mu[i])
  }
  return(valori)
}

compar.gee2<-function (formula, data = NULL, family = gaussian, phy, corStruct, scale.fix = FALSE, scale.value = 1){
  # formula=  rawA~X
  # phy=tree
  # scale.fix = FALSE 
  # scale.value = 1
  # data = NULL
  # theta=2
  #family =  negative.binomial(2)
  if (requireNamespace("gee", quietly = TRUE)) 
    gee <- gee::gee
  else stop("package 'gee' not available")
  if (!missing(corStruct)) {
    if (!missing(phy)) 
      warning("the phylogeny was ignored because you gave a 'corStruct' object")
    R <- vcv(corStruct, corr = TRUE)
  }
  else {
    R <- vcv(phy, corr = TRUE)
  }
  if (is.null(data)) 
    data <- parent.frame()
  else {
    nmsR <- rownames(R)
    if (!identical(rownames(data), nmsR)) {
      if (!any(is.na(match(rownames(data), nmsR)))) 
        data <- data[nmsR, ]
      else {
        msg <- if (missing(corStruct)) 
          "the tip labels of the tree"
        else "those of the correlation structure"
        msg <- paste("the rownames of the data.frame and", 
                     msg, "do not match: the former were ignored in the analysis")
        warning(msg)
      }
    }
  }
  effect.assign <- attr(model.matrix(formula, data = data), "assign")
  for (i in all.vars(formula)) {
    if (any(is.na(eval(parse(text = i), envir = data)))) 
      stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
  }
  id <- rep(1, dim(R)[1])
  ###############################
  #  negative.binomial(2)
  library(gee)
  family = "poisson"
  geemod <- do.call("gee", list(formula, id, data = data, family = family, 
                                R = R, corstr = "fixed", 
                                scale.fix = scale.fix, 
                                scale.value = scale.value,
                                tol=1e-4,maxit=25)
  )
  W <- geemod$naive.variance
  fname <- if (is.function(family)) 
    deparse(substitute(family))
  else if (is.list(family)) 
    family$family
  else family
  if (fname == "binomial") 
    W <- summary(glm(formula, family = quasibinomial, data = data))$cov.scaled
  N <- geemod$nobs
  if (!missing(corStruct)) 
    phy <- attr(corStruct, "tree")
  dfP <- sum(phy$edge.length) * N/sum(diag(vcv(phy)))
  Y <- geemod$y
  MU <- geemod$fitted.values
  Qlik <- switch(fname, 
                 gaussian = -sum((Y - MU)^2)/2, 
                 binomial = sum(Y *log(MU/(1 - MU)) + log(1 - MU)), 
                 poisson = sum(Y * log(MU) - MU), 
                 Gamma = sum(Y/MU + log(MU)), 
                 inverse.gaussian = sum(-Y/(2 * MU^2) + 1/MU),
  )
  Ai <- do.call("gee", list(formula, id, data = data, family = family, 
                            corstr = "independence", scale.fix = scale.fix, scale.value = scale.value))$naive.variance
  QIC <- -2 * Qlik + 2 * sum(diag(solve(Ai) %*% W))
  print(geemod$family)
  obj <- list(call = match.call(), effect.assign = effect.assign, 
              nobs = N, QIC = QIC, coefficients = geemod$coefficients,
              residuals = geemod$residuals, fitted.values = MU, family = geemod$family$family, 
              link = geemod$family$link, scale = geemod$scale, W = W, 
              dfP = dfP)
  class(obj) <- "compar.gee"
  obj
}

#########################################################################
##########################################################################
#########################################################################

#df<-read.csv("~/Dropbox/ChiYoWu/dataset/capellini2015role.csv")

df<-read.csv("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/capellini2015role.csv")
head(df)

#df = read.csv("C:/Users/User/Dropbox/ChiYoWu/dataset/capellini2015role.csv")
df$OV<- as.numeric(as.vector(unlist(df$OV,recursive=FALSE)))
testdata <- na.omit(df)
#head(testdata)
df2<-testdata[,-1]
#sum(is.na(df2))
#str(df2)
#ls(df2)
#df2$Intro
#df2$Est
#ols<-lm(LS~.,data=df2)
#round(ols$coefficients,2)
#lm(df2$LS~df2$RL)
#lm(df2$LS~df2$Est)
#str(df2)
#lm(LS~ Intro +  NoLocs  +  LG + BM + GT+  WA+ NBM+LY+ AFB+RL+OV, data=df2) 
#lm(LS~RL, data=df2) 

testdata <- testdata[-6,] # cause not used in tree

#spetoget<-testdata[,"Binomial"]
#df[spetoget,]

#mammal.nwk ???20
#mammal2.nwk 21-50
#tree<-read.tree("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/rcodeCYW/mammal.nwk")


#tree<-read.tree("~/Dropbox/ChiYoWu/rcodeCYW/mammal2.nwk")
tree<- read.tree("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/mammal2.nwk")

plot(tree)
#vcv(tree )

# AFB: age at first birth in days
# BM: body mass in grams
# GT: gestation time in days
# LG: Longevity in years
# LS: litter size
# LY: litters per year
# NBM: neonatal body mass in grams
# OV: offspring value as per equation (see protocol)
# RL: reproductive lifespan in days (computed using LG converted into days and AFB)
# WA: waning age in days
tree$tip.label

tree2<-tree

tree2$tip.label<-c("H. javanicus", "G. genetta", "N. procyonoides", "V. vulpes", "P. lotor", "N. vison", "M. nivalis", "M. putorius", "M. erminea", "E. caballus", "C. bactrianus", "H. inermis", "M. reevesi", "P. tajacu", "S. scrofa", "M. coypus", "R. rattus", "R. exulans", "R. norvegicus", "M. minutus", "M. musculus", "M. glareolus", "O. zibethicus", "D. ordii", "C. canadensis", "S. vulgaris", "S. carolinensis", "E. quercinus", "M. avellanarius", "G. glis")

# if (!require("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("ggtree")
library(ggtree)
ggtree(tree2, branch.length="none")+ geom_tiplab()+xlim(0, 15)
plot(tree2)

reps<-round(testdata$LS[21:50]) # litter size 
pred1<-testdata$LY[21:50]  #body mass
pred2<-testdata$OV[21:50]
pred3<-testdata$Spread[21:50]
pred4<-testdata$LG[21:50]


###################################################
#################### Poisson#######################
###################################################
library(MASS)
#fitLM <- glm(eggs_per_year ~ eggs_mass, data, family = gaussian(link = "identity"))
fitPOIS <- glm(reps ~ pred1+pred2+pred3+pred4, family = poisson(link = "log"))

summary(fitPOIS)

fitPOIS$coefficients

lambda.vec= exp(fitPOIS$coefficients[1]+ fitPOIS$coefficients[2]*pred1+ fitPOIS$coefficients[3]*pred2+ fitPOIS$coefficients[4]*pred3+ fitPOIS$coefficients[5]*pred4)
lambda.vec
sim.poi.coef<-array(NA,c(5,1000))
for(i in 1:1000){
  #i<-1
  sim.reps<-rpois(length(lambda.vec),lambda = lambda.vec)
  sim.fitPOIS <- glm(sim.reps ~ pred1+pred2+pred3+pred4, family = poisson(link = "log"))
  sim.poi.coef[,i]<-sim.fitPOIS$coefficients
}
rownames(sim.poi.coef)<-c("b0","b1","b2","b3","b4")

# look nice, the bootstrap results look goods for poisson regression. 
# the standard deviation is closed to the theoretical one
boot.poi.mean<-apply(sim.poi.coef,1,mean)
boot.poi.sd<-apply(sim.poi.coef,1,sd)
boot.poi.mean
boot.poi.sd
sumfitpois<-summary(fitPOIS)
sumfitpois$coefficients
boot.poi.mean
boot.poi.sd


# You can make a data frame for the variable you use 
# see  

###################################################
#################### NB2#######################
###################################################

###########################################################
fitNB2 <-glm.nb(reps ~ pred1+pred2+pred3+pred4)
sim.nb2.coef<-array(NA,c(5,1000))
#?rnegbin
for(i in 1:1000){
  sim.reps<-  rnegbin(length(reps), mu = fitted(fitNB2), theta = fitNB2$theta)   
  sim.nb2 <- glm.nb(sim.reps ~ pred1+pred2+pred3+pred4)
  sim.nb2.coef[,i]<-sim.nb2$coefficients
}
rownames(sim.nb2.coef)<-c("b0","b1","b2","b3","b4")

# look nice, the bootstrap results look goods for poisson regression. 
# the standard deviation is closed to the theoretical one
boot.nb2.mean<-apply(sim.nb2.coef,1,mean)
boot.nb2.sd<-apply(sim.nb2.coef,1,sd)
boot.nb2.mean
boot.nb2.sd
sumfitnb2<-summary(fitNB2)
sumfitnb2$coefficients
boot.nb2.mean
boot.nb2.sd

############################################
############# phypoi #######################
############################################

### Poisson
ordsamplep.poi<-function (n, lambda, Sigma){
  #  n=sims
  #  lambda=lambda.array
  #  Sigma=C
  k <- length(lambda)
  #  n<-2
  valori <- mvrnorm(n, rep(0, k), Sigma)
  for (i in 1:k)
  {
    #    i<-1
    valori[, i] <- qpois(pnorm(valori[,i]), lambda[i])
  }
  return(valori)
}

phyfitPOIS<-compar.gee(reps ~ pred1+pred2+pred3+pred4,phy=tree,family="poisson",scale.fix = TRUE)
sim.phypoi.coef<-array(NA,c(5,1000))
sim.reps.full<-  ordsamplep.poi(n=1000, lambda=fitted(phyfitPOIS), Sigma=vcv(tree)/max(vcv(tree)))
dim(sim.reps.full)
for(i in 1:1000){
#  i<-1
  sim.reps<-sim.reps.full[i,]
  try(sim.phyfitPOIS <- compar.gee(sim.reps ~ pred1+pred2+pred3+pred4,phy=tree,family="poisson",scale.fix = TRUE))
  try(sim.phypoi.coef[,i]<-sim.phyfitPOIS$coefficients)
}
rownames(sim.phypoi.coef)<-c("b0","b1","b2","b3","b4")

boot.phypoi.mean<-apply(sim.phypoi.coef,1,mean)
boot.phypoi.sd<-apply(sim.phypoi.coef,1,sd)

phyfitPOIS
boot.phypoi.mean
boot.phypoi.sd

########################################################
#####################phynb2
#######################################################
#compar.gee.nb2(params,C=C,X=X,Y=Y,r=r)
ordsamplep.nb2<-function (n,r, mu, Sigma){
  k <- length(mu)
  valori <- mvrnorm(n, rep(0, k), Sigma)
  dim(valori)
  for (i in 1:k)
  {
    #    i<-1
    valori[,i] <- qnbinom(p=pnorm(valori[,i]), size=r, prob=mu[i]/(mu[i]+r)) # mu = n(1-p)/p
    #  qnbinom
  }
  return(valori)
}

fitnb <-summary(glm.nb(reps ~ pred1+pred2+pred3+pred4)) # glm  find theta
fitnb$coefficients
fitnb$coefficients[,"Estimate"]
theta <-   fitnb$theta
phyfitNB2<-compar.gee.nb2(reps ~ pred1+pred2+pred3+pred4,phy=tree,theta=theta)
fitted(phyfitNB2)
sim.phyNB2.coef<-array(NA,c(5,1000))
#install.packages("SimMultiCorrData")
library(SimMultiCorrData)
#rcorrvar2(n = 1000, means=fitted(phyfitNB2),Sigma=vcv(tree)/max(vcv(tree)))
?rcorrvar2


.zinegbin_getLam <- function(mu, S) {
  S   <- max(sqrt(mu)+1e-3, S)
  (mu + (mu^2 - mu + S^2) / mu) / 2
}

#' @noRd
#' @keywords internal
.zinegbin_getP <- function(mu, lam) {
  1 - (mu / lam)
}

#' @noRd
#' @keywords internal
.zinegbin_getK <- function(mu, S, lam) {
  S   <- max(sqrt(mu)+1e-3, S)
  (mu * lam) / (mu^2 - (mu * (lam + 1)) + S^2)
}

#' @keywords internal
.fixInf <- function(data) {
  # hacky way of replacing infinite values with the col max + 1
  if (any(is.infinite(data))) {
    data <-  apply(data, 2, function(x) {
      if (any(is.infinite(x))) {
        x[ind<-which(is.infinite(x))] <- NA
        x[ind] <- max(x, na.rm=TRUE)+1
      }
      x
    })
  }
  data
}

rmvzinegbin <- function(n, mu, Sigma, munbs, ks, ps, ...) {
  # Generate an NxD matrix of Zero-inflated poisson data,
  # with counts approximately correlated according to Sigma
  Cor <- cov2cor(Sigma)
  SDs <- sqrt(diag(Sigma))
  if (missing(munbs) || missing(ps) || missing(ks)) {
    if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
    munbs <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getLam(mu[i], SDs[i])))
    ps   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getP(mu[i], munbs[i])))
    ks   <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getK(mu[i], SDs[i], munbs[i])))
  }
  if (length(munbs) != length(SDs)) stop("Sigma and mu dimensions don't match")
  d   <- length(munbs)
  normd  <- rmvnorm(n, rep(0, d), sigma=Cor)
  unif   <- pnorm(normd)
  data <- t(matrix(VGAM::qzinegbin(t(unif), munb=munbs, size=ks, pstr0=ps, ...), d, n))
  data <- .fixInf(data)
  return(data)
}

library(MASS)
library(mvtnorm)
sim.reps.full<-rmvzinegbin(n=1000, mu=fitted(phyfitNB2), Sigma=vcv(tree)/max(vcv(tree)))

for(i in 1:1000){
  print(i)
  #  i<-4
  
  sim.reps<-sim.reps.full[i,]
  sim.reps

  try(sim.phyfitNB2 <- compar.gee.nb2(sim.reps ~ pred1+pred2+pred3+pred4,phy=tree,theta=theta))
  sim.phyfitNB2
  sim.phyNB2.coef[,i]<-sim.phyfitNB2$coefficients
}
rownames(sim.phyNB2.coef)<-c("b0","b1","b2","b3","b4")
boot.phynb2.mean<-apply(sim.phyNB2.coef,1,mean)
boot.phynb2.sd<-apply(sim.phyNB2.coef,1,sd)



save.image("mammalyoboot2.rda")
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
## Loading required package: Matrix
library(gee)

load("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/mammalyoboot.rda")

boot.poi.mean<-round(boot.poi.mean,3)
boot.poi.sd<-round(boot.poi.sd,3)

boot.nb2.mean<-round(boot.nb2.mean,3)
boot.nb2.sd<-round(boot.nb2.sd,3)

boot.phypoi.mean<-round(boot.phypoi.mean,3)
boot.phypoi.sd<-round(boot.phypoi.sd,3)

boot.phynb2.mean<-round(boot.phynb2.mean,3)
boot.phynb2.sd<-round(boot.phynb2.sd,3)

out.meansd.array<-array(NA,c(5,4))

#poi
out.meansd.array[1,1]<-paste(boot.poi.mean[1],"(",boot.poi.sd[1],")",sep="")
out.meansd.array[2,1]<-paste(boot.poi.mean[2],"(",boot.poi.sd[2],")",sep="")

out.meansd.array[3,1]<-paste(boot.poi.mean[3],"(",boot.poi.sd[3],")",sep="")

out.meansd.array[4,1]<-paste(boot.poi.mean[4],"(",boot.poi.sd[4],")",sep="")

out.meansd.array[5,1]<-paste(boot.poi.mean[5],"(",boot.poi.sd[5],")",sep="")


#nb2
out.meansd.array[1,2]<-paste(boot.nb2.mean[1],"(",boot.nb2.sd[1],")",sep="")

out.meansd.array[2,2]<-paste(boot.nb2.mean[2],"(",boot.nb2.sd[2],")",sep="")

out.meansd.array[3,2]<-paste(boot.nb2.mean[3],"(",boot.nb2.sd[3],")",sep="")

out.meansd.array[4,2]<-paste(boot.nb2.mean[4],"(",boot.nb2.sd[4],")",sep="")

out.meansd.array[5,2]<-paste(boot.nb2.mean[5],"(",boot.nb2.sd[5],")",sep="")


#phypoi
out.meansd.array[1,3]<-paste(boot.phypoi.mean[1],"(",boot.phypoi.sd[1],")",sep="")

out.meansd.array[2,3]<-paste(boot.phypoi.mean[2],"(",boot.phypoi.sd[2],")",sep="")

out.meansd.array[3,3]<-paste(boot.phypoi.mean[3],"(",boot.phypoi.sd[3],")",sep="")

out.meansd.array[4,3]<-paste(boot.phypoi.mean[4],"(",boot.phypoi.sd[4],")",sep="")

out.meansd.array[5,3]<-paste(boot.phypoi.mean[5],"(",boot.phypoi.sd[5],")",sep="")

#phynb2

out.meansd.array[1,4]<-paste(boot.phynb2.mean[1],"(",boot.phynb2.sd[1],")",sep="")

out.meansd.array[2,4]<-paste(boot.phynb2.mean[2],"(",boot.phynb2.sd[2],")",sep="")

out.meansd.array[3,4]<-paste(boot.phynb2.mean[3],"(",boot.phynb2.sd[3],")",sep="")

out.meansd.array[4,4]<-paste(boot.phynb2.mean[4],"(",boot.phynb2.sd[4],")",sep="")

out.meansd.array[5,4]<-paste(boot.phynb2.mean[5],"(",boot.phynb2.sd[5],")",sep="")


#install.packages("xtable")
library(xtable)
xtable(out.meansd.array)
## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Sun Jul  9 14:29:36 2023
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllll}
##   \hline
##  & 1 & 2 & 3 & 4 \\ 
##   \hline
## 1 & 2.13(0.414) & 2.153(0.414) & 2.497(0.289) & 2.492(0.309) \\ 
##   2 & -0.135(0.105) & -0.143(0.102) & -0.235(0.105) & -0.231(0.106) \\ 
##   3 & -1.409(1.458) & -1.479(1.616) & -2.572(0.815) & -2.621(1.041) \\ 
##   4 & 0.47(0.24) & 0.478(0.223) & 0.515(0.181) & 0.521(0.19) \\ 
##   5 & -0.047(0.014) & -0.048(0.015) & -0.058(0.011) & -0.059(0.012) \\ 
##    \hline
## \end{tabular}
## \end{table}
fitnb <-summary(glm.nb(reps ~ pred1+pred2+pred3+pred4)) # glm  find theta
## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached

## Warning in theta.ml(Y, mu, sum(w), w, limit = control$maxit, trace =
## control$trace > : iteration limit reached
fitnb$coefficients
##                Estimate Std. Error    z value     Pr(>|z|)
## (Intercept)  2.10300295 0.38389484  5.4780704 4.299890e-08
## pred1       -0.12827811 0.09939273 -1.2906187 1.968359e-01
## pred2       -1.22739458 1.36905302 -0.8965282 3.699708e-01
## pred3        0.46580641 0.22792177  2.0437118 4.098203e-02
## pred4       -0.04541566 0.01387460 -3.2732943 1.063017e-03
fitnb$coefficients[,"Estimate"]
## (Intercept)       pred1       pred2       pred3       pred4 
##  2.10300295 -0.12827811 -1.22739458  0.46580641 -0.04541566
theta <-  1 #fitnb$theta
fitnbphy <-compar.gee.nb2(reps~pred1+pred2+pred3+pred4,phy=tree,theta=theta)

phynb2est<-fitnbphy$coefficients



QIC.nb2.geese <- function(model.R) {
  library(MASS)
  # Fitted and observed values for quasi likelihood
  mu.R <- model.R$fitted.values
  # alt: X <- model.matrix(model.R)
  #  names(model.R$coefficients) <- NULL
  #  beta.R <- model.R$coefficients
  #  mu.R <- exp(X %*% beta.R)
  y <- model.R$y
  # Quasi Likelihood for Poisson
  quasi.R <- sum(y*(log(mu.R)-2*log(mu.R+1))) # nb2()$dev.resids - scale and weights = 1
  
  #  quasi.R <- sum((y*log(mu.R)) - mu.R) # poisson()$dev.resids - scale and weights = 1
  
  # Trace Term (penalty for model complexity)
  #  AIinverse <- ginv(model.Indep$vbeta.naiv) # Omega-hat(I) via Moore-Penrose generalized inverse of a matrix in MASS package
  # Alt: AIinverse <- solve(model.Indep$vbeta.naiv) # solve via identity
  #  Vr <- model.R$vbeta
  #  trace.R <- sum(diag(AIinverse %*% Vr))
  px <- length(mu.R) # number non-redunant columns in design matrix
  # QIC
  #  QIC <- (-2)*quasi.R + 2*trace.R
  QICu <- (-2)*quasi.R + 2*px    # Approximation assuming model structured correctly 
  #output <- c(QIC, QICu, quasi.R, trace.R, px)
  #names(output) <- c('QIC', 'QICu', 'Quasi Lik', 'Trace', 'px')
  #return(output)
  return(-0.5*QICu)
}

names(phynb2est)<-paste("b",0:4,sep="")
phynb2est
##         b0         b1         b2         b3         b4 
##  2.4074678 -0.2214458 -2.1447881  0.4838492 -0.0517304
model.R.phynb2<-NULL
#model.R$fitted.values<- r*exp(nb2est$x["b0"]+nb2est$x["b1"]*X)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*X))

X<-cbind(rep(1,length(pred1)),pred1,pred2,pred3,pred4)
colnames(X)<-paste("b",0:4,sep="")
Y<-reps


r<-mean(colMeans(X)[2:5])
model.R.phynb2$fitted.values<- abs(r*exp(X%*%matrix(phynb2est,ncol=1))/(1-exp(X%*%matrix(phynb2est,ncol=1))))
model.R.phynb2$y<-Y

QIC.nb2.geese(model.R.phynb2)
## [1] -305.2931
#mu<-exp(coef[1]+coef[2]*X)#/theta/(1- exp(coef[1]+coef[2]*X)) 

treeglm<-compute.brlen(stree(length(reps)))

fitpoi <-compar.gee2(reps ~ pred1+pred2+pred3+pred4,phy=treeglm,family="poisson")
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept)       pred1       pred2       pred3       pred4 
##   2.1030035  -0.1282781  -1.2273978   0.4658067  -0.0454157
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept)       pred1       pred2       pred3       pred4 
##   2.1030035  -0.1282781  -1.2273978   0.4658067  -0.0454157 
## 
## Family: poisson 
## Link function: log
fitpoi$coefficients
## (Intercept)       pred1       pred2       pred3       pred4 
##   2.1030035  -0.1282781  -1.2273978   0.4658067  -0.0454157
fitpoiphy <-compar.gee2(reps ~ pred1+pred2+pred3+pred4,phy=tree,family="poisson")
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept)       pred1       pred2       pred3       pred4 
##   2.1030035  -0.1282781  -1.2273978   0.4658067  -0.0454157
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept)       pred1       pred2       pred3       pred4 
##   2.1030035  -0.1282781  -1.2273978   0.4658067  -0.0454157 
## 
## Family: poisson 
## Link function: log
fitpoiphy$coefficients
## (Intercept)       pred1       pred2       pred3       pred4 
##  2.45530634 -0.21619373 -2.39881340  0.50772870 -0.05697998
ls(fitnb)
##  [1] "aic"            "aliased"        "call"           "coefficients"  
##  [5] "cov.scaled"     "cov.unscaled"   "deviance"       "deviance.resid"
##  [9] "df"             "df.null"        "df.residual"    "dispersion"    
## [13] "family"         "iter"           "null.deviance"  "SE.theta"      
## [17] "terms"          "th.warn"        "theta"          "twologlik"
ls(fitnbphy)
##  [1] "alpha"           "beta"            "biggest.R.alpha" "call"           
##  [5] "clusz"           "coefficients"    "coefnames"       "converged"      
##  [9] "corr"            "eta"             "formula"         "FunList"        
## [13] "naiv.var"        "niter"           "offset"          "phi"            
## [17] "terms"           "var"             "weights"         "X"              
## [21] "y"
ls(fitpoi)
##  [1] "call"          "coefficients"  "dfP"           "effect.assign"
##  [5] "family"        "fitted.values" "link"          "nobs"         
##  [9] "QIC"           "residuals"     "scale"         "W"
ls(fitpoiphy)
##  [1] "call"          "coefficients"  "dfP"           "effect.assign"
##  [5] "family"        "fitted.values" "link"          "nobs"         
##  [9] "QIC"           "residuals"     "scale"         "W"
fitnbphy$coefficients
## [1]  2.4074678 -0.2214458 -2.1447881  0.4838492 -0.0517304
round(fitnb$coefficients[,"Estimate"],3)
## (Intercept)       pred1       pred2       pred3       pred4 
##       2.103      -0.128      -1.227       0.466      -0.045
round(fitnbphy$coefficients,3)
## [1]  2.407 -0.221 -2.145  0.484 -0.052
round(fitpoi$coefficients,3)
## (Intercept)       pred1       pred2       pred3       pred4 
##       2.103      -0.128      -1.227       0.466      -0.045
round(fitpoiphy$coefficients,3)
## (Intercept)       pred1       pred2       pred3       pred4 
##       2.455      -0.216      -2.399       0.508      -0.057
fitnbphy$naiv.var
## 5 x 5 Matrix of class "dgeMatrix"
##               (Intercept)         pred1         pred2         pred3
## (Intercept)  0.0401408585 -6.788811e-03 -0.0127695407 -5.937376e-03
## pred1       -0.0067888114  4.483880e-03  0.0007796333  4.253958e-05
## pred2       -0.0127695407  7.796333e-04  0.0970220861  2.458455e-03
## pred3       -0.0059373764  4.253958e-05  0.0024584550  1.058676e-02
## pred4       -0.0001699916 -1.151838e-05  0.0005336897 -1.548662e-04
##                     pred4
## (Intercept) -1.699916e-04
## pred1       -1.151838e-05
## pred2        5.336897e-04
## pred3       -1.548662e-04
## pred4        2.469036e-05
fitpoi
## Call: compar.gee2(formula = reps ~ pred1 + pred2 + pred3 + pred4, family = "poisson", 
##     phy = treeglm)
## Number of observations:  30 
## Model:
##                       Link: log 
##  Variance to Mean Relation: poisson 
## 
## QIC: -125.7292 
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -2.6208232 -0.8012708 -0.1144702  0.8701914  2.6498865 
## 
## 
## Coefficients:
##               Estimate        S.E.         t  Pr(T > |t|)
## (Intercept)  2.1030035 0.257692517  8.160902 1.631248e-08
## pred1       -0.1282781 0.066718008 -1.922691 6.598276e-02
## pred2       -1.2273978 0.918986524 -1.335599 1.937135e-01
## pred3        0.4658067 0.152994228  3.044603 5.423181e-03
## pred4       -0.0454157 0.009313465 -4.876348 5.133433e-05
## 
## Estimated Scale Parameter:  0.4505968
## "Phylogenetic" df (dfP):  30
#### + s.e.
a = vector()
for (i in 1:5){
  a[(i-1)*2+1]<- round(fitnb$coefficients,3)[i,1]
  a[(i-1)*2+2]<- round(fitnb$coefficients,3)[i,2]
}
b=vector()
for (i in 1:5){
  b[(i-1)*2+1] <- round(fitnbphy$coefficients,3)[i]
  b[(i-1)*2+2]<- NA
}

a
##  [1]  2.103  0.384 -0.128  0.099 -1.227  1.369  0.466  0.228 -0.045  0.014
b
##  [1]  2.407     NA -0.221     NA -2.145     NA  0.484     NA -0.052     NA
result <- data.frame(nb.glm = round(fitnb$coefficients,3)[1:5],nb.gee = round(fitnbphy$coefficients,3),poi.glm=round(fitpoi$coefficients,3),poi.gee=round(fitpoiphy$coefficients,3),row.names = c("Intercept","litter per year","offspring value as per equation","Spread","Longevity in years"))

#Can consider to make tree plot using ggtree
# c
########################################## spetoget
library(ggtree)
## ggtree v3.8.0 For help: https://yulab-smu.top/treedata-book/
## 
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
## 
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
## 
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
## 
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
## 
## Attaching package: 'ggtree'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following object is masked from 'package:ape':
## 
##     rotate
library(ape)
#tree<-read.tree("~/Dropbox/ChiYoWu/rcodeCYW/mammal2.nwk")
tree<- read.tree("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/mammal2.nwk")


ggtree(tree)+theme_tree()+geom_text(aes(label=label),size=.5)

plot(tree,cex = .5)