rm(list=ls())
#setwd("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/rcodeCYW/")
setwd("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/")
#mtx <- read.table("C:/Users/User/Dropbox/ChiYoWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
#mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
library(phytools)
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
library(gee)
library(ggtree)
compar.gee.nb2<-function (formula, data = NULL, phy, corStruct,scale.fix = FALSE, scale.value = 1,theta=theta){
#family = negative.binomial(2)
if (requireNamespace("gee", quietly = TRUE))
gee <- gee::gee
else stop("package 'gee' not available")
if (!missing(corStruct)) {
if (!missing(phy))
warning("the phylogeny was ignored because you gave a 'corStruct' object")
R <- vcv(corStruct, corr = TRUE)
}
else {
R <- vcv(phy, corr = TRUE)
}
if (is.null(data))
data <- parent.frame()
else {
nmsR <- rownames(R)
if (!identical(rownames(data), nmsR)) {
if (!any(is.na(match(rownames(data), nmsR))))
data <- data[nmsR, ]
else {
msg <- if (missing(corStruct))
"the tip labels of the tree"
else "those of the correlation structure"
msg <- paste("the rownames of the data.frame and",
msg, "do not match: the former were ignored in the analysis")
warning(msg)
}
}
}
effect.assign <- attr(model.matrix(formula, data = data), "assign")
for (i in all.vars(formula)) {
if (any(is.na(eval(parse(text = i), envir = data))))
stop("the present method cannot be used with missing data: you may consider removing the species with missing data from your tree with the function 'drop.tip'.")
}
id <- rep(1, dim(R)[1])
###############################
# negative.binomial(2)
# HERE WE HAVE NEGATIVE BINOMIAL
geemod<-do.call("geem", list(formula, id, data = data, family = MASS::negative.binomial(theta= theta), corstr = "fixed", corr.mat=R, tol=1e-4,maxit=25))
geemod$coefficients<-geemod$beta
return(geemod)
}
mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
dim(mtx)
mtx<-as.matrix(mtx)
mtx<-mtx +t(mtx)
diag(mtx)<-1
library(phangorn)
D<- 2*(max(mtx)- mtx)
tree<-upgma(D)
plot(tree)
#tree$tip.label<-LETTERS[1:17]
tree$tip.label <- c("S. undulatus(GA)","S. undulatus(OH)","S. undulatus(AL)","S. undulatus(NJ)","S. undulatus(PA)","S. undulatus(SC)","S. woodi","S. undulatus(AZ)","S. undulatus(UT)","S. undulatus(Huerfano.CO)","S. undulatus(Mesa.CO)","S. undulatus(NE)","S. undulatus(TX)","S. undulatus(Grant.NM)","S. undulatus(Hidalgo.NM)","S. virgatus","S. occidentalis")
tree$tip.label <- c("GA","OH","AL","NJ","PA","SC","S. woodi","AZ","UT","Huerfano.CO","Mesa.CO","NE","TX","Grant.NM","Hidalgo.NM","S. virgatus","S. occidentalis")
plot(tree)
vcv(tree)
library(ggtree)
ggtree(tree, branch.length="none")+ geom_tiplab()+xlim(0, 15)
#traitset<-read.table("C:/Users/User/Dropbox/ChiYoWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")
#traitset<-read.table("~/Dropbox/FCU/Teaching/Mentoring/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")
traitset<-read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/traitEOV-2.txt")
dim(traitset)
head(traitset)
traitset<-as.data.frame(traitset)
colnames(traitset)<-c("size at maturity(mm)", "average size(mm)", "age at maturity(mo)", "egg mass(g)", "clutch size", "clutch mass(g)", "eggs per year")
traitset$`eggs per year`<-round(traitset$`eggs per year`)
traitset<-as.data.frame(traitset)
traitset$`egg mass(g)`
traitset$`eggs per year`
#install.packages("ggplot2")
library(ggplot2)
library(tidyverse)
f <- function(x){
3.397*exp(-1.179 *x)
}
ggplot(traitset,aes(x=`egg mass(g)`,y=`eggs per year`)) + geom_point()+ggtitle('Lizard Dataset')+theme_bw()
library(xtable)
xtable(traitset)
poi<-glm(traitset$`eggs per year`~traitset$`egg mass(g)`,family = "poisson")
poi
summary(poi)
library(MASS)
nb2<-glm(traitset$`eggs per year`~traitset$`egg mass(g)`,family = negative.binomial(theta = 10))
nb2
summary(nb2)
#1 size at maturity,
#2 average size
#3 age at maturity,
#4 egg mass,
#5 clutch size
#6 clutch mass
#7 eggs per year
eggs_per_year<-traitset$`eggs per year`
mean(eggs_per_year)
var(eggs_per_year)
#eggs_mass<-traitset$`egg mass(g)`
eggs_mass<-log(traitset$`egg mass(g)`)
glm(eggs_per_year~eggs_mass,family="poisson")
data <- data.frame(eggs_mass=eggs_mass, eggs_per_year=eggs_per_year)
data
library(MASS)
#fitLM <- glm(eggs_per_year ~ eggs_mass, data, family = gaussian(link = "identity"))
fitPOIS <- glm(eggs_per_year ~ eggs_mass, data, family = poisson(link = "log"))
# do bootstrap sample and refit the data to compute the CI and then check with theoretical
summary(fitPOIS)
fitPOIS$coefficients
lambda.vec= exp(fitPOIS$coefficients[1]+ fitPOIS$coefficients[2]*eggs_mass)
lambda.vec
sim.poi.coef<-array(NA,c(2,1000))
for(i in 1:1000){
sim.eggs_per_year<-rpois(length(lambda.vec),lambda = lambda.vec)
sim.eggs_per_year
sim.fitPOIS <- glm(sim.eggs_per_year ~ eggs_mass, data, family = poisson(link = "log"))
sim.poi.coef[,i]<-sim.fitPOIS$coefficients
}
rownames(sim.poi.coef)<-c("b0","b1")
# look nice, the bootstrap results look goods for poisson regression.
# the standard deviation is closed to the theoretical one
boot.poi.mean<-apply(sim.poi.coef,1,mean)
boot.poi.sd<-apply(sim.poi.coef,1,sd)
boot.poi.mean
boot.poi.sd
sumfitpois<-summary(fitPOIS)
sumfitpois$coefficients
###########################################################
fitNB2 <-glm.nb(eggs_per_year ~ eggs_mass, data = data)
sim.nb2.coef<-array(NA,c(2,1000))
#?rnegbin
for(i in 1:1000){
sim.eggs_per_year<- rnegbin(length(eggs_mass), mu = fitted(fitNB2), theta = fitNB2$theta)
sim.eggs_per_year
sim.nb2 <- glm.nb(sim.eggs_per_year ~ eggs_mass, data = data)
sim.nb2.coef[,i]<-sim.nb2$coefficients
}
rownames(sim.nb2.coef)<-c("b0","b1")
# look nice, the bootstrap results look goods for poisson regression.
# the standard deviation is closed to the theoretical one
boot.nb2.mean<-apply(sim.nb2.coef,1,mean)
boot.nb2.sd<-apply(sim.nb2.coef,1,sd)
boot.nb2.mean
boot.nb2.sd
sumfitnb2<-summary(fitNB2)
sumfitnb2$coefficients
#library(xtable)
#install.packages("qpcR")
library(qpcR)
# results<-AIC(fitLM, fitPOIS,fitNB2)
# awe<-akaike.weights(results$AIC)
# results$deltaAIC<-awe$deltaAIC
# results$weights<-awe$weights
# xtable(results)
results<-AIC(fitPOIS,fitNB2)
awe<-akaike.weights(results$AIC)
results$deltaAIC<-awe$deltaAIC
results$weights<-awe$weights
xtable(results)
plot(eggs_per_year ~ eggs_mass)
newdat <- data.frame(eggs_mass = seq(min(eggs_mass),max(eggs_mass),0.01))
#newdat$predLM <- predict(fitLM, newdata = newdat, type = "response")
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")
plot(eggs_per_year ~ eggs_mass, data,main="Lizard Trait",xlab="eggs mass", ylab="eggs per year")
#lines(predLM ~ eggs_mass, newdat, col = 2)
lines(predPOIS ~ eggs_mass, newdat, col = "red")
lines(predNB2 ~ eggs_mass, newdat, col = "blue")
############################################
############# phypoi #######################
############################################
### Poisson
ordsamplep.poi<-function (n, lambda, Sigma){
# n=sims
# lambda=lambda.array
# Sigma=C
k <- length(lambda)
# n<-2
valori <- mvrnorm(n, rep(0, k), Sigma)
for (i in 1:k)
{
# i<-1
valori[, i] <- qpois(pnorm(valori[,i]), lambda[i])
}
return(valori)
}
phyfitPOIS<-compar.gee(eggs_per_year ~ eggs_mass,phy=tree,family="poisson",scale.fix = TRUE)
sim.phypoi.coef<-array(NA,c(2,1000))
sim.eggs_per_year.full<- ordsamplep.poi(n=1000, lambda=fitted(phyfitPOIS), Sigma=vcv(tree))
for(i in 1:1000){
# i<-1
sim.eggs_per_year<-sim.eggs_per_year.full[i,]
sim.phyfitPOIS <- compar.gee(sim.eggs_per_year ~ eggs_mass,phy=tree,family="poisson",scale.fix = TRUE)
sim.phypoi.coef[,i]<-sim.phyfitPOIS$coefficients
}
rownames(sim.phypoi.coef)<-c("b0","b1")
boot.phypoi.mean<-apply(sim.phypoi.coef,1,mean)
boot.phypoi.sd<-apply(sim.phypoi.coef,1,sd)
phyfitPOIS
#phypoi also work too nice
curve(exp(phyfitPOIS$coefficients[1]+phyfitPOIS$coefficients[2]*x),range(eggs_mass),add=TRUE,col="orange",lwd=2)
#####################################################################################
# QIC for GEE models
# Daniel J. Hocking
# 07 February 2012
# Refs:
# Pan (2001)
# Liang and Zeger (1986)
# Zeger and Liang (1986)
# Hardin and Hilbe (2003)
# Dornmann et al 2007
# # http://www.unc.edu/courses/2010spring/ecol/562/001/docs/lectures/lecture14.htm
#####################################################################################
# Poisson QIC for geese{geepack} output
# Ref: Pan (2001)
#QIC.pois.geese <- function(model.R, model.indep) {
QIC.pois.geese <- function(model.R){
library(MASS)
# Fitted and observed values for quasi likelihood
mu.R <- model.R$fitted.values
# alt: X <- model.matrix(model.R)
# names(model.R$coefficients) <- NULL
# beta.R <- model.R$coefficients
# mu.R <- exp(X %*% beta.R)
y <- model.R$y
# Quasi Likelihood for Poisson
quasi.R <- sum((y*log(mu.R)) - mu.R) # poisson()$dev.resids - scale and weights = 1
# Trace Term (penalty for model complexity)
# AIinverse <- ginv(model.Indep$vbeta.naiv) # Omega-hat(I) via Moore-Penrose generalized inverse of a matrix in MASS package
# Alt: AIinverse <- solve(model.Indep$vbeta.naiv) # solve via identity
# Vr <- model.R$vbeta
# trace.R <- sum(diag(AIinverse %*% Vr))
px <- length(mu.R) # number non-redunant columns in design matrix
# QIC
# QIC <- (-2)*quasi.R + 2*trace.R
QICu <- (-2)*quasi.R + 2*px # Approximation assuming model structured correctly
# output <- c(QIC, QICu, quasi.R, trace.R, px)
# names(output) <- c('QIC', 'QICu', 'Quasi Lik', 'Trace', 'px')
# output
return(QICu)
}
model.R.phypoi<-NULL
model.R.phypoi$fitted.values<- exp(phyfitPOIS$coefficients[1]+phyfitPOIS$coefficients[2]*eggs_mass)
model.R.phypoi$y<-eggs_per_year
QIC.pois.geese.phy<-QIC.pois.geese(model.R.phypoi)
iidpoi<-geeglm(eggs_per_year ~ eggs_mass, id = 1:length(eggs_mass), data = NULL,
family = poisson, corstr = "independence", scale.fix = TRUE)
QIC.pois.geese.iid<-geepack::QIC(iidpoi)
QIC.pois.geese.iid
QIC.pois.geese.phy
############################################
############# phynbs #######################
############################################
Mu<-function(params,r=r,X=X){
b0<-params["b0"]
b1<-params["b1"]
mu.vec<-r*exp(b0+b1*X)/(1-exp(b0+b1*X))
return(mu.vec)
}
gee_nb2_equation<-function(params,C=C,X=X,Y=Y,r=r){
b0<-params["b0"]
b1<-params["b1"]
C<-C/max(C)
mu<-Mu(params,r=r,X=X)
A<-diag((mu + mu^2/r))
sqrtA<-sqrt(A)
eqn<- rbind(mu,mu*X) %*%solve(sqrtA%*%C%*%sqrtA)%*%(Y-mu)
return(eqn)
}
C<-vcv(tree)
params<-phyfitPOIS$coefficients
names(params) <- c("b0","b1")
X=eggs_mass
Y=eggs_per_year
r=mean(Y)
library(MASS)
library(nleqslv)
nb2est<-nleqslv(params,gee_nb2_equation,C=C,X=X,Y=Y,r=r)
nb2est$x
curve(r*exp(nb2est$x["b0"]+nb2est$x["b1"]*x)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*x)),range(X),add=TRUE,col="black",lwd=2)
legend("topleft", legend = c("poi","nb2","phypoi","phynb2"), col = c("red","blue","orange","black"), lty = 1)
#legend("topleft", legend = c("lm", "poisson","nb2"), col = c(2,4,3), lty = 1)
#####################################################################################
# QIC for GEE models
# modify from Daniel J. Hocking
# 07 February 2012
# Refs:0
# Pan (2001)
# Liang and Zeger (1986)
# Zeger and Liang (1986)
# Hardin and Hilbe (2003)
# Dornmann et al 2007
# # http://www.unc.edu/courses/2010spring/ecol/562/001/docs/lectures/lecture14.htm
# # https://www.r-bloggers.com/2012/03/r-script-to-calculate-qic-for-generalized-estimating-equation-gee-model-selection/
#####################################################################################
# Poisson QIC for geese{geepack} output
# Ref: Pan (2001)
QIC.nb2.geese <- function(model.R) {
library(MASS)
# Fitted and observed values for quasi likelihood
mu.R <- model.R$fitted.values
# alt: X <- model.matrix(model.R)
# names(model.R$coefficients) <- NULL
# beta.R <- model.R$coefficients
# mu.R <- exp(X %*% beta.R)
y <- model.R$y
# Quasi Likelihood for Poisson
quasi.R <- sum(y*(log(mu.R)-2*log(mu.R+1))) # nb2()$dev.resids - scale and weights = 1
# quasi.R <- sum((y*log(mu.R)) - mu.R) # poisson()$dev.resids - scale and weights = 1
# Trace Term (penalty for model complexity)
# AIinverse <- ginv(model.Indep$vbeta.naiv) # Omega-hat(I) via Moore-Penrose generalized inverse of a matrix in MASS package
# Alt: AIinverse <- solve(model.Indep$vbeta.naiv) # solve via identity
# Vr <- model.R$vbeta
# trace.R <- sum(diag(AIinverse %*% Vr))
px <- length(mu.R) # number non-redunant columns in design matrix
# QIC
# QIC <- (-2)*quasi.R + 2*trace.R
QICu <- (-2)*quasi.R + 2*px # Approximation assuming model structured correctly
#output <- c(QIC, QICu, quasi.R, trace.R, px)
#names(output) <- c('QIC', 'QICu', 'Quasi Lik', 'Trace', 'px')
#return(output)
return(-0.5*QICu)
}
model.R.phynb2<-NULL
#model.R$fitted.values<- r*exp(nb2est$x["b0"]+nb2est$x["b1"]*X)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*X))
model.R.phynb2$fitted.values<- r*exp(nb2est$x["b0"]+nb2est$x["b1"]*X)/(1-exp(nb2est$x["b0"]+nb2est$x["b1"]*X))
model.R.phynb2$y<-Y
QIC.nb2.geese(model.R.phynb2)
#########################################
#compar.gee.nb2(params,C=C,X=X,Y=Y,r=r)
ordsamplep.nb2<-function (n,r, mu, Sigma){
k <- length(mu)
valori <- mvrnorm(n, rep(0, k), Sigma)
dim(valori)
for (i in 1:k)
{
# i<-1
valori[,i] <- qnbinom(p=pnorm(valori[,i]), size=r, prob=mu[i]/(mu[i]+r)) # mu = n(1-p)/p
# qnbinom
}
return(valori)
}
fitnb <-summary(glm.nb(eggs_per_year ~ eggs_mass)) # glm find theta
fitnb$coefficients
fitnb$coefficients[,"Estimate"]
theta <- fitnb$theta
phyfitNB2<-compar.gee.nb2(eggs_per_year ~ eggs_mass,phy=tree,theta=theta)
fitted(phyfitNB2)
sim.phyNB2.coef<-array(NA,c(2,1000))
#sim.eggs_per_year.full<- ordsamplep.nb2(n=1000,r=mean(eggs_per_year), mu=fitted(phyfitNB2), Sigma=vcv(tree))
fit.neg<-fitdistr(eggs_per_year,"Negative binomial")
#sim.eggs_per_year.full<- ordsamplep.nb2(n=1000,r=fit.neg$estimate["size"], mu=fitted(phyfitNB2), Sigma=vcv(tree))
# install.packages("devtools")
# library(devtools)
# install_github("zdk123/SpiecEasi")
# library(SpiecEasi)
#
# install.packages("remotes")
# remotes::install_github("zdk123/SpiecEasi")
#install.packages("SimMultiCorrData")
library(SimMultiCorrData)
rcorrvar2(n = 1000, means=fitted(phyfitNB2),Sigma=vcv(tree))
#?rcorrvar2
.zinegbin_getLam <- function(mu, S) {
S <- max(sqrt(mu)+1e-3, S)
(mu + (mu^2 - mu + S^2) / mu) / 2
}
#' @noRd
#' @keywords internal
.zinegbin_getP <- function(mu, lam) {
1 - (mu / lam)
}
#' @noRd
#' @keywords internal
.zinegbin_getK <- function(mu, S, lam) {
S <- max(sqrt(mu)+1e-3, S)
(mu * lam) / (mu^2 - (mu * (lam + 1)) + S^2)
}
#' @keywords internal
.fixInf <- function(data) {
# hacky way of replacing infinite values with the col max + 1
if (any(is.infinite(data))) {
data <- apply(data, 2, function(x) {
if (any(is.infinite(x))) {
x[ind<-which(is.infinite(x))] <- NA
x[ind] <- max(x, na.rm=TRUE)+1
}
x
})
}
data
}
rmvzinegbin <- function(n, mu, Sigma, munbs, ks, ps, ...) {
# Generate an NxD matrix of Zero-inflated poisson data,
# with counts approximately correlated according to Sigma
Cor <- cov2cor(Sigma)
SDs <- sqrt(diag(Sigma))
if (missing(munbs) || missing(ps) || missing(ks)) {
if (length(mu) != length(SDs)) stop("Sigma and mu dimensions don't match")
munbs <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getLam(mu[i], SDs[i])))
ps <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getP(mu[i], munbs[i])))
ks <- unlist(lapply(1:length(SDs), function(i) .zinegbin_getK(mu[i], SDs[i], munbs[i])))
}
if (length(munbs) != length(SDs)) stop("Sigma and mu dimensions don't match")
d <- length(munbs)
normd <- rmvnorm(n, rep(0, d), sigma=Cor)
unif <- pnorm(normd)
data <- t(matrix(VGAM::qzinegbin(t(unif), munb=munbs, size=ks, pstr0=ps, ...), d, n))
data <- .fixInf(data)
return(data)
}
library(MASS)
library(mvtnorm)
sim.eggs_per_year.full<-rmvzinegbin(n=1000, mu=fitted(phyfitNB2), Sigma=vcv(tree))
for(i in 1:1000){
print(i)
# i<-4
sim.eggs_per_year<-sim.eggs_per_year.full[i,]
sim.eggs_per_year
# par(mfrow=c(1,2))
# plot(eggs_per_year~eggs_mass)
# plot(sim.eggs_per_year~eggs_mass)
#
sim.phyfitNB2 <- compar.gee.nb2(sim.eggs_per_year ~ eggs_mass,phy=tree,theta=theta)
sim.phyfitNB2
sim.phyNB2.coef[,i]<-sim.phyfitNB2$coefficients
}
rownames(sim.phyNB2.coef)<-c("b0","b1")
boot.phynb2.mean<-apply(sim.phyNB2.coef,1,mean)
boot.phynb2.sd<-apply(sim.phyNB2.coef,1,sd)
phyfitNB2
phyfitPOIS$coefficients
fitPOIS$coefficients
fitNB2$coefficients
apply(sim.phypoi.coef,1,sd)
system("pwd")
save.image("lizardyoboot2.rda")
output<-t(rbind(fitPOIS$coefficients,
fitNB2$coefficients,
phyfitPOIS$coefficients,
phyfitNB2$coefficients))
xtable(output)
rm(list=ls())
library(phytools)
## Loading required package: ape
## Loading required package: maps
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
## Loading required package: Matrix
library(gee)
library(ggtree)
## ggtree v3.8.0 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
##
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
##
## Attaching package: 'ggtree'
## The following object is masked from 'package:Matrix':
##
## expand
## The following object is masked from 'package:ape':
##
## rotate
load("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/lizardyoboot.rda")
boot.poi.mean<-round(boot.poi.mean,3)
boot.poi.sd<-round(boot.poi.sd,3)
boot.nb2.mean<-round(boot.nb2.mean,3)
boot.nb2.sd<-round(boot.nb2.sd,3)
boot.phypoi.mean<-round(boot.phypoi.mean,3)
boot.phypoi.sd<-round(boot.phypoi.sd,3)
boot.phynb2.mean<-round(boot.phynb2.mean,3)
boot.phynb2.sd<-round(boot.phynb2.sd,3)
out.meansd.array<-array(NA,c(2,4))
#poi
out.meansd.array[1,1]<-paste(boot.poi.mean[1],"(",boot.poi.sd[1],")",sep="")
out.meansd.array[2,1]<-paste(boot.poi.mean[2],"(",boot.poi.sd[2],")",sep="")
#nb2
out.meansd.array[1,2]<-paste(boot.nb2.mean[1],"(",boot.nb2.sd[1],")",sep="")
out.meansd.array[2,2]<-paste(boot.nb2.mean[2],"(",boot.nb2.sd[2],")",sep="")
#phypoi
out.meansd.array[1,3]<-paste(boot.phypoi.mean[1],"(",boot.phypoi.sd[1],")",sep="")
out.meansd.array[2,3]<-paste(boot.phypoi.mean[2],"(",boot.phypoi.sd[2],")",sep="")
#phynb2
out.meansd.array[1,4]<-paste(boot.phynb2.mean[1],"(",boot.phynb2.sd[1],")",sep="")
out.meansd.array[2,4]<-paste(boot.phynb2.mean[2],"(",boot.phynb2.sd[2],")",sep="")
#install.packages("xtable")
library(xtable)
xtable(out.meansd.array)
## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Sun Jul 9 14:40:28 2023
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllll}
## \hline
## & 1 & 2 & 3 & 4 \\
## \hline
## 1 & 3.397(0.237) & 3.411(0.344) & 3.88(0.172) & 3.753(0.176) \\
## 2 & -1.188(0.735) & -1.258(1.065) & -3.302(0.566) & -2.831(0.569) \\
## \hline
## \end{tabular}
## \end{table}
boot.nb2.mean
## b0 b1
## 3.411 -1.258
boot.nb2.sd
## b0 b1
## 0.344 1.065
boot.phypoi.mean
## b0 b1
## 3.880 -3.302
boot.phypoi.sd
## b0 b1
## 0.172 0.566
boot.phynb2.mean
## b0 b1
## 3.753 -2.831
boot.phynb2.sd
## b0 b1
## 0.176 0.569
summary(fitPOIS)
##
## Call:
## glm(formula = eggs_per_year ~ eggs_mass, family = poisson(link = "log"),
## data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.3968 0.2334 14.553 <2e-16 ***
## eggs_mass -1.1792 0.7276 -1.621 0.105
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 42.962 on 16 degrees of freedom
## Residual deviance: 40.280 on 15 degrees of freedom
## AIC: 126.01
##
## Number of Fisher Scoring iterations: 4
summary(fitNB2)
##
## Call:
## glm.nb(formula = eggs_per_year ~ eggs_mass, data = data, init.theta = 14.97916062,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.4256 0.3583 9.560 <2e-16 ***
## eggs_mass -1.2707 1.1045 -1.151 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(14.9792) family taken to be 1)
##
## Null deviance: 18.319 on 16 degrees of freedom
## Residual deviance: 17.088 on 15 degrees of freedom
## AIC: 119.28
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 14.98
## Std. Err.: 8.89
##
## 2 x log-likelihood: -113.278
phyfitPOIS
## Call: compar.gee(formula = eggs_per_year ~ eggs_mass, family = "poisson",
## phy = tree, scale.fix = TRUE)
## Number of observations: 17
## Model:
## Link: log
## Variance to Mean Relation: poisson
##
## QIC: -1393.149
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -12.829250 3.055108 4.414483 6.225469 17.902768
##
##
## Coefficients:
## Estimate S.E. t Pr(T > |t|)
## (Intercept) 3.877618 0.1661241 23.341700 4.529787e-06
## eggs_mass -3.259023 0.5492056 -5.934068 2.335199e-03
##
## Estimated Scale Parameter: 1
## "Phylogenetic" df (dfP): 6.733333
phyfitNB2
## geem(formula = eggs_per_year ~ eggs_mass, id = c(1, 1, 1, 1,
## 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), data = <environment>,
## family = list(family = "Negative Binomial(14.9792)", link = "log",
## linkfun = function (mu)
## log(mu), linkinv = function (eta)
## pmax(exp(eta), .Machine$double.eps), variance = function (mu)
## mu + mu^2/.Theta, dev.resids = function (y, mu, wt)
## 2 * wt * (y * log(pmax(1, y)/mu) - (y + .Theta) * log((y +
## .Theta)/(mu + .Theta))), aic = function (y, n, mu,
## wt, dev)
## {
## term <- (y + .Theta) * log(mu + .Theta) - y * log(mu) +
## lgamma(y + 1) - .Theta * log(.Theta) + lgamma(.Theta) -
## lgamma(.Theta + y)
## 2 * sum(term * wt)
## }, mu.eta = function (eta)
## pmax(exp(eta), .Machine$double.eps), initialize = expression(
## {
## if (any(y < 0))
## stop("negative values not allowed for the negative binomial family")
## n <- rep(1, nobs)
## mustart <- y + (y == 0)/6
## }), validmu = function (mu)
## all(mu > 0), valideta = function (eta)
## TRUE, simulate = function (object, nsim)
## {
## ftd <- fitted(object)
## rnegbin(nsim * length(ftd), ftd, .Theta)
## }), corstr = "fixed", corr.mat = c(1, 0.933333333, 0.866666667,
## 0.666666667, 0.666666667, 0.666666667, 0.6, 0.4, 0.4, 0.4,
## 0.4, 0.266666667, 0.266666667, 0.133333333, 0.133333333,
## 0.066666667, 0, 0.933333333, 1, 0.866666667, 0.666666667,
## 0.666666667, 0.666666667, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.866666667,
## 0.866666667, 1, 0.666666667, 0.666666667, 0.666666667, 0.6,
## 0.4, 0.4, 0.4, 0.4, 0.266666667, 0.266666667, 0.133333333,
## 0.133333333, 0.066666667, 0, 0.666666667, 0.666666667, 0.666666667,
## 1, 0.933333333, 0.866666667, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.666666667,
## 0.666666667, 0.666666667, 0.933333333, 1, 0.866666667, 0.6,
## 0.4, 0.4, 0.4, 0.4, 0.266666667, 0.266666667, 0.133333333,
## 0.133333333, 0.066666667, 0, 0.666666667, 0.666666667, 0.666666667,
## 0.866666667, 0.866666667, 1, 0.6, 0.4, 0.4, 0.4, 0.4, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.6,
## 0.6, 0.6, 0.6, 0.6, 0.6, 1, 0.4, 0.4, 0.4, 0.4, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4,
## 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 1, 0.933333333, 0.8, 0.8, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4,
## 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.933333333, 1, 0.8, 0.8, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4,
## 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.8, 0.8, 1, 0.933333333, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.4,
## 0.4, 0.4, 0.4, 0.4, 0.4, 0.4, 0.8, 0.8, 0.933333333, 1, 0.266666667,
## 0.266666667, 0.133333333, 0.133333333, 0.066666667, 0, 0.266666667,
## 0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667,
## 0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667,
## 1, 0.933333333, 0.133333333, 0.133333333, 0.066666667, 0,
## 0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667,
## 0.266666667, 0.266666667, 0.266666667, 0.266666667, 0.266666667,
## 0.266666667, 0.933333333, 1, 0.133333333, 0.133333333, 0.066666667,
## 0, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333,
## 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333,
## 0.133333333, 0.133333333, 0.133333333, 1, 0.933333333, 0.066666667,
## 0, 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333,
## 0.133333333, 0.133333333, 0.133333333, 0.133333333, 0.133333333,
## 0.133333333, 0.133333333, 0.133333333, 0.933333333, 1, 0.066666667,
## 0, 0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667,
## 0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667,
## 0.066666667, 0.066666667, 0.066666667, 0.066666667, 0.066666667,
## 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1),
## maxit = 25, tol = 1e-04)
##
## Coefficients:
## (Intercept) eggs_mass
## 3.747 -2.784
##
## Scale Parameter: 1.566
##
## Correlation Model: fixed
## Working Correlation[1:4,1:4]:
## GA OH AL NJ
## GA 1.0000 0.9333 0.8667 0.6667
## OH 0.9333 1.0000 0.8667 0.6667
## AL 0.8667 0.8667 1.0000 0.6667
## NJ 0.6667 0.6667 0.6667 1.0000
##
## Number of clusters: 1 Maximum cluster size: 17
## Number of observations with nonzero weight: 17
# Now resampling
# ordsamplep.nb2(n=sims,r=r,mu=mu,Sigma=C)
# bootstrp normal
X<-rnorm(100,mean=3,sd=2)
m.X<-mean(X)
sd.X<-sd(X)
bt.X<-apply(replicate(1000,rnorm(100,mean=m.X,sd=sd.X) ),2,mean)
c(mean(X) - 1.96*sd(X)/10,mean(X) + 1.96*sd(X)/10)
## [1] 2.766937 3.515809
quantile(bt.X,probs = c(0.025,0.975))
## 2.5% 97.5%
## 2.759188 3.517992
## plot
newdat <- data.frame(eggs_mass = sort(eggs_mass))
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")
newdat$phypredPOIS<- predict(phyfitPOIS, newdata = newdat, type = "response")
newdat$phypredNB2<-c(sort(fitted(phyfitNB2),decreasing = T))# predict(phyfitNB2, newdata = newdat, type = "response")
plot(eggs_per_year ~ eggs_mass, data,main="Lizard Trait",xlab="eggs mass", ylab="eggs per year")
#lines(predLM ~ eggs_mass, newdat, col = 2)
lines(predPOIS ~ eggs_mass, newdat, col = "red")
lines(predNB2 ~ eggs_mass, newdat, col = "blue")
lines(phypredPOIS ~ eggs_mass, newdat, col = "blue")
lines(phypredNB2 ~ eggs_mass, newdat, col = "blue")
## plot
newdat <- data.frame(eggs_mass = sort(eggs_mass))
newdat$predPOIS <- predict(fitPOIS, newdata = newdat, type = "response")
newdat$predNB2 <- predict(fitNB2, newdata = newdat, type = "response")
newdat$phypredPOIS <- predict(phyfitPOIS, newdata = newdat, type = "response")
newdat$phypredNB2 <- c(sort(fitted(phyfitNB2), decreasing = T))
# Create plot
plot(eggs_per_year ~ eggs_mass, data, main="Lizard Trait", xlab="Eggs Mass", ylab="Eggs Per Year")
# Add lines for each model with different colors, line types and widths
lines(predPOIS ~ eggs_mass, newdat, col = "red", lty = 1, lwd = 2)
lines(predNB2 ~ eggs_mass, newdat, col = "blue", lty = 2, lwd = 2)
lines(phypredPOIS ~ eggs_mass, newdat, col = "black", lty = 3, lwd = 2)
lines(phypredNB2 ~ eggs_mass, newdat, col = "green", lty = 4, lwd = 2)
# Add legend
legend("topright", legend=c("Poi", "Negative Binomial", "Phylogenetic Poisson", "Phylogenetic Negative Binomial"),
col=c("red", "blue", "black", "green"), lty = 1:4, lwd = 2, cex = 0.8)
# Load ggplot2
library(ggplot2)
# Create base plot with white theme
p <- ggplot(data, aes(x=eggs_mass, y=eggs_per_year)) +
geom_point(size = 3) + # Make points larger
labs(x="Eggs Mass", y="Eggs Per Year") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5, size = 20), # Larger font for title
axis.title.x = element_text(size = 16), # Larger font for x-axis label
axis.title.y = element_text(size = 16), # Larger font for y-axis label
legend.title = element_text(size = 14), # Larger font for legend title
legend.text = element_text(size = 12)) # Larger font for legend text
# Add title
p <- p + ggtitle("Lizard Trait")
# Add lines for each model with larger line width
p <- p +
geom_line(data=newdat, aes(y=predPOIS, color="Poisson"), size = 1.5) +
geom_line(data=newdat, aes(y=predNB2, color="Negative Binomial"), size = 1.5) +
geom_line(data=newdat, aes(y=phypredPOIS, color="Phylogenetic Poisson"), size = 1.5) +
geom_line(data=newdat, aes(y=phypredNB2, color="Phylogenetic Negative Binomial"), size = 1.5)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Add legend at the bottom
p <- p + scale_color_manual(name="Model",
values=c("Poisson"="red", "Negative Binomial"="blue",
"Phylogenetic Poisson"="black", "Phylogenetic Negative Binomial"="green")) +
theme(legend.position="bottom")
# Show the plot
print(p)
library(ggtree)
library(ape)
#tree<-read.tree("~/Dropbox/ChiYoWu/rcodeCYW/mammal2.nwk")
tree<- read.tree("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/mammal2.nwk")
#ggtree(tree)+theme_tree()+geom_text(aes(label=label),size=.5)
plot(tree,cex = .5)
mtx <- read.table("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/dataset/niewiarowski2004phylogenetic/mtxEOV-2.txt")
dim(mtx)
## [1] 17 17
mtx<-as.matrix(mtx)
mtx<-mtx +t(mtx)
diag(mtx)<-1
library(phangorn)
D<- 2*(max(mtx)- mtx)
tree<-upgma(D)
plot(tree)
#tree$tip.label<-LETTERS[1:17]
tree$tip.label <- c("S. undulatus(GA)","S. undulatus(OH)","S. undulatus(AL)","S. undulatus(NJ)","S. undulatus(PA)","S. undulatus(SC)","S. woodi","S. undulatus(AZ)","S. undulatus(UT)","S. undulatus(Huerfano.CO)","S. undulatus(Mesa.CO)","S. undulatus(NE)","S. undulatus(TX)","S. undulatus(Grant.NM)","S. undulatus(Hidalgo.NM)","S. virgatus","S. occidentalis")
tree$tip.label <- c("GA","OH","AL","NJ","PA","SC","S. woodi","AZ","UT","Huerfano.CO","Mesa.CO","NE","TX","Grant.NM","Hidalgo.NM","S. virgatus","S. occidentalis")
plot(tree)
vcv(tree)
## GA OH AL NJ PA
## GA 1.00000000 0.93333333 0.86666667 0.66666667 0.66666667
## OH 0.93333333 1.00000000 0.86666667 0.66666667 0.66666667
## AL 0.86666667 0.86666667 1.00000000 0.66666667 0.66666667
## NJ 0.66666667 0.66666667 0.66666667 1.00000000 0.93333333
## PA 0.66666667 0.66666667 0.66666667 0.93333333 1.00000000
## SC 0.66666667 0.66666667 0.66666667 0.86666667 0.86666667
## S. woodi 0.60000000 0.60000000 0.60000000 0.60000000 0.60000000
## AZ 0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## UT 0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## Huerfano.CO 0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## Mesa.CO 0.40000000 0.40000000 0.40000000 0.40000000 0.40000000
## NE 0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## TX 0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## Grant.NM 0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## Hidalgo.NM 0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## S. virgatus 0.06666667 0.06666667 0.06666667 0.06666667 0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## SC S. woodi AZ UT Huerfano.CO
## GA 0.66666667 0.60000000 0.40000000 0.40000000 0.40000000
## OH 0.66666667 0.60000000 0.40000000 0.40000000 0.40000000
## AL 0.66666667 0.60000000 0.40000000 0.40000000 0.40000000
## NJ 0.86666667 0.60000000 0.40000000 0.40000000 0.40000000
## PA 0.86666667 0.60000000 0.40000000 0.40000000 0.40000000
## SC 1.00000000 0.60000000 0.40000000 0.40000000 0.40000000
## S. woodi 0.60000000 1.00000000 0.40000000 0.40000000 0.40000000
## AZ 0.40000000 0.40000000 1.00000000 0.93333333 0.80000000
## UT 0.40000000 0.40000000 0.93333333 1.00000000 0.80000000
## Huerfano.CO 0.40000000 0.40000000 0.80000000 0.80000000 1.00000000
## Mesa.CO 0.40000000 0.40000000 0.80000000 0.80000000 0.93333333
## NE 0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## TX 0.26666667 0.26666667 0.26666667 0.26666667 0.26666667
## Grant.NM 0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## Hidalgo.NM 0.13333333 0.13333333 0.13333333 0.13333333 0.13333333
## S. virgatus 0.06666667 0.06666667 0.06666667 0.06666667 0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## Mesa.CO NE TX Grant.NM Hidalgo.NM
## GA 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## OH 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## AL 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## NJ 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## PA 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## SC 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## S. woodi 0.40000000 0.26666667 0.26666667 0.13333333 0.13333333
## AZ 0.80000000 0.26666667 0.26666667 0.13333333 0.13333333
## UT 0.80000000 0.26666667 0.26666667 0.13333333 0.13333333
## Huerfano.CO 0.93333333 0.26666667 0.26666667 0.13333333 0.13333333
## Mesa.CO 1.00000000 0.26666667 0.26666667 0.13333333 0.13333333
## NE 0.26666667 1.00000000 0.93333333 0.13333333 0.13333333
## TX 0.26666667 0.93333333 1.00000000 0.13333333 0.13333333
## Grant.NM 0.13333333 0.13333333 0.13333333 1.00000000 0.93333333
## Hidalgo.NM 0.13333333 0.13333333 0.13333333 0.93333333 1.00000000
## S. virgatus 0.06666667 0.06666667 0.06666667 0.06666667 0.06666667
## S. occidentalis 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## S. virgatus S. occidentalis
## GA 0.06666667 0
## OH 0.06666667 0
## AL 0.06666667 0
## NJ 0.06666667 0
## PA 0.06666667 0
## SC 0.06666667 0
## S. woodi 0.06666667 0
## AZ 0.06666667 0
## UT 0.06666667 0
## Huerfano.CO 0.06666667 0
## Mesa.CO 0.06666667 0
## NE 0.06666667 0
## TX 0.06666667 0
## Grant.NM 0.06666667 0
## Hidalgo.NM 0.06666667 0
## S. virgatus 1.00000000 0
## S. occidentalis 0.00000000 1
library(ggtree)
ggtree(tree, branch.length="none")+ geom_tiplab()+xlim(0, 15)