#############################################################
#############GEE NEGATIVE BINOMIAL PHYLOGENTIC###############
#############################################################
rm(list=ls())
#setwd("C:/Users/User/Dropbox/ChiYoWu/rcodeCYW/")
setwd("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/")
library(ape)
library(MASS)
library(ggplot2)
library(geepack)
library(geeM)
library(nleqslv)
#install.packages("nleqslv")
Mu<-function(params,r=r,X=X){
b0<-params["b0"]
b1<-params["b1"]
mu.vec<-r*exp(b0+b1*X)/(1-exp(b0+b1*X))
return(abs(mu.vec))
}
gee_equation.nb2<-function(params,C=C,X=X,Y=Y,r=r){
mu<-Mu(params,r=r,X=X)
A<-abs(diag((mu + mu^2/r)))
sqrtA<-sqrt(A)
eqn<- rbind(mu,mu*X)%*%ginv(sqrtA%*%C%*%sqrtA,tol=1e-4)%*%(Y-mu)
return(eqn)
}
#compar.gee.nb2(params,C=C,X=X,Y=Y,r=r)
ordsamplep.nb2<-function (n,r, mu, Sigma){
k <- length(mu)
valori <- mvrnorm(n, rep(0, k), Sigma)
dim(valori)
for (i in 1:k)
{
# i<-1
valori[,i] <- qnbinom(p=pnorm(valori[,i]), size=r, prob=mu[i]/(mu[i]+r)) # mu = n(1-p)/p
# qnbinom
}
return(valori)
}
# ?qnbinom
# ?qpois
### Poisson
ordsamplep.poi<-function (n, lambda, Sigma){
# n=sims
# lambda=lambda.array
# Sigma=C
k <- length(lambda)
# n<-2
valori <- mvrnorm(n, rep(0, k), Sigma)
for (i in 1:k)
{
# i<-1
valori[, i] <- qpois(pnorm(valori[,i]), lambda[i])
}
return(valori)
}
########################################################
simCoefNB2<-function(sims=sims,taxa=taxa,mu=mu,predictor=predictor,r=r,treetype=treetype,tree=tree){
coef.array <- array(NA,c(sims,2))
colnames(coef.array)<-c("beta_0","beta_1")
C<-vcv(tree)
C<-C/max(C)
if(treetype=="star"){
simY<-matrix(rnbinom(n=sims*dim(C)[1], size=r,mu=mu),nrow=sims) }else{
try(simY<-ordsamplep.nb2(n=sims,r=r,mu=abs(mu),Sigma=C))
}
#ordsamplep.nb2(n=sims,size=size,mu=abs(mu),Sigma=C)
# simY
# dim(simY)
# # lambda<-exp(3+0.5*predictor)
# simY1<-ordsamplep.poi(n=sims, lambda=lambda, Sigma=C)
#simY
#dim(simY)
for (simIndex in 1:sims){
# simIndex<-10
# print(simY[simIndex,])
Y<-simY[simIndex,]
X<-predictor
#fitnpoi <-glm(Y~X,family = "poisson")
#fitnpoi$coefficients
# fitnpoi <-glm(simY1[simIndex,]~X,family = "poisson")
# fitnpoi
# fitnNB2
# ?glm.nb
#true.param
#params<-fitnpoi$coefficients
#names(params)<-c("b0","b1")
# params
# fitsimpoigee <- compar.gee(simY[simIndex,]~predictor,phy=tree,family="poisson",scale.fix = TRUE)
params<- true.param #c(3.08, 0.4704 )
# names(params)<-c("b0","b1")
# set.seed(24528)
# X<-c(23.4,26.7,24.5,30.6,32.5)
# Y<-c(3,5,6,9,12)
# tree<-rcoal(5)
# r <-mean(Y)
# C<-vcv(tree)
# ?nleqslv
try(coefsim <- nleqslv(params,gee_equation.nb2,C=C,X=X,Y=Y,r=r,method="Broyden", jacobian=TRUE,control=list(allowSingular=T,xtol=1e-5,ftol=1e-2,btol=1e-5,cndtol=1e-18,delta="cauchy")))
#coefsim$x
try(coef.array[simIndex,]<-coefsim$x)
# plot(Y~X, main="test sim for nb2 reg")
# curve(exp(params["b0"]+params["b1"]*x),range(X),add=TRUE,col="blue",lwd=3)
# b0<-coefsim$x["b0"]
# b1<-coefsim$x["b1"]
# curve(r*exp(b0+b1*x)/(1-exp(b0+b1*x)),range(X),add=TRUE,col="red",lwd=3)
# curve(r*exp(b0+b1*x)/(1-exp(b0+b1*x)),0,10,add=TRUE,col="red",lwd=3)
#nleqslv(params,gee_equation,C=C,X=X,Y=Y)
}
# print(apply(coef.array,2,summary))
return(coef.array)
}
#########################################################
#set.seed(8122)
#r.array<- seq(0.1,10,by=0.6)
sims<- 1000
true.param<-c(3,5)
#true.param<-c(-0.1,0)
names(true.param)<-c("b0","b1")
#theta=X%*%matrix(true.param,nrow=2)
taxa.array<-c(16,32,64,128)
treetype.array<-c("coal","balanced","left","star")
tree.coef.array<-array(NA,c(length(treetype.array), length(taxa.array),sims,2))
#for(rIndex in 1:length(r.array)){
r<-10.698#r.array[rIndex]
for(treetypeIndex in 1:length(treetype.array)){
#treetypeIndex<-1
treetype<-treetype.array[treetypeIndex]
for (taxaIndex in 1:length(taxa.array)){
#taxaIndex<-1
taxa <- taxa.array[taxaIndex]
if(treetype=="coal"){tree<-rcoal(taxa)}
if(treetype=="balanced"){tree<-compute.brlen(stree(taxa,type = "balanced"))}
if(treetype=="left"){tree<-compute.brlen(stree(n=taxa,type = "left"))}
if(treetype=="star"){tree<-compute.brlen(stree(n=taxa,type = "star"))}
#plot(tree)
predictor<-rexp(n=taxa,rate=4)
X=cbind(rep(1,taxa),predictor)
theta=X%*%matrix(true.param,nrow=2)
try(mu<- abs(r*exp(theta)/(1-exp(theta))))
try(coef.array <- simCoefNB2(sims=sims,taxa=taxa,mu=mu,predictor=predictor,r=r,treetype=treetype,tree=tree))
try(tree.coef.array[treetypeIndex,taxaIndex,,]<-coef.array)
#true.param
}
}
#print(c(true.param,size))
#print(apply(coef.array,2,median,na.rm=T))
dim(tree.coef.array)
save.image("phynb2sim3.rda")
rm(list=ls())
load("~/Dropbox/FCU/Teaching/Mentoring/2016Spring-2021Fall_AssociateProfessor/2020Fall/ChiYuWu/rcodeCYW/phynb2sim3.rda")
# Define filter ranges for both dimensions
filter.range <- list(c(1, 5), c(3, 7))
# Apply filter to each of the last dimension of tree.coef.array
for (i in 1:2) {
# Apply filter on the 4th dimension
tree.coef.array[,,,i] <- apply(tree.coef.array[,,,i], c(1,2,3), function(x) {
# If the value is in the desired range, keep it, otherwise set it to NA
ifelse(x >= filter.range[[i]][1] & x <= filter.range[[i]][2], x, NA)
})
}
#tree.coef.array[2,1,,]
#
# outtable<-array(NA,c(length(taxa.array),length(treetype.array)*2))
# rownames(outtable)<-taxa.array
# outtable
# dim(tree.coef.array)
# for(treetypeIndex in 1:length(treetype.array)){
# for(taxaIndex in 1:length(taxa.array)){
# outtable[taxaIndex,(2*treetypeIndex-1):(2*treetypeIndex)] <- c(paste(round(mean(tree.coef.array[treetypeIndex,taxaIndex,,1],na.rm=T),3),"(",round(sd(tree.coef.array[treetypeIndex,taxaIndex,,1],na.rm=T),3),")",sep=""),paste(round(mean(tree.coef.array[treetypeIndex,taxaIndex,,2],na.rm=T),3),"(",round(sd(tree.coef.array[treetypeIndex,taxaIndex,,2],na.rm=T),3),")",sep="")
# )
# }
# }
#
# print(paste("r=",r,sep=""))
# print(outtable)
outtable<-array(NA,c(length(taxa.array),length(treetype.array)*2))
rownames(outtable)<-taxa.array
outtable
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## 16 NA NA NA NA NA NA NA NA
## 32 NA NA NA NA NA NA NA NA
## 64 NA NA NA NA NA NA NA NA
## 128 NA NA NA NA NA NA NA NA
dim(tree.coef.array)
## [1] 4 4 1000 2
for(treetypeIndex in 1:length(treetype.array)){
for(taxaIndex in 1:length(taxa.array)){
b0_values <- tree.coef.array[treetypeIndex,taxaIndex,,1][is.finite(tree.coef.array[treetypeIndex,taxaIndex,,1])]
b1_values <- tree.coef.array[treetypeIndex,taxaIndex,,2][is.finite(tree.coef.array[treetypeIndex,taxaIndex,,2])]
outtable[taxaIndex,(2*treetypeIndex-1):(2*treetypeIndex)] <- c(paste(round(mean(b0_values, na.rm=T),3),"(",round(sd(b0_values, na.rm=T),3),")",sep=""),
paste(round(mean(b1_values, na.rm=T),3),"(",round(sd(b1_values, na.rm=T),3),")",sep="")
)
}
}
print(paste("r=",r,sep=""))
## [1] "r=10.698"
print(outtable)
## [,1] [,2] [,3] [,4] [,5]
## 16 "2.964(1.109)" "4.819(1.378)" "2.079(0.754)" "4.474(1.158)" "2.104(0.682)"
## 32 "2.839(1.114)" "5.106(0.598)" "2.638(0.874)" "5.543(0.785)" "2.197(0.772)"
## 64 "3.161(0.872)" "5.656(0.959)" "3.359(0.871)" "6.056(1.187)" "2.71(0.846)"
## 128 "3.368(0.987)" "5.119(0.83)" "4.125(1.036)" "5.413(0.313)" "2.741(0.781)"
## [,6] [,7] [,8]
## 16 "5.106(1.173)" "2.546(0.911)" "4.921(1.057)"
## 32 "4.493(1.145)" "2.777(0.924)" "4.608(1.161)"
## 64 "6.1(1.057)" "2.897(0.875)" "4.938(1.157)"
## 128 "4.839(0.764)" "3.12(0.856)" "4.416(1.092)"
library(xtable)
xtable(outtable)
## % latex table generated in R 4.3.1 by xtable 1.8-4 package
## % Sun Jul 9 14:49:57 2023
## \begin{table}[ht]
## \centering
## \begin{tabular}{rllllllll}
## \hline
## & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 \\
## \hline
## 16 & 2.964(1.109) & 4.819(1.378) & 2.079(0.754) & 4.474(1.158) & 2.104(0.682) & 5.106(1.173) & 2.546(0.911) & 4.921(1.057) \\
## 32 & 2.839(1.114) & 5.106(0.598) & 2.638(0.874) & 5.543(0.785) & 2.197(0.772) & 4.493(1.145) & 2.777(0.924) & 4.608(1.161) \\
## 64 & 3.161(0.872) & 5.656(0.959) & 3.359(0.871) & 6.056(1.187) & 2.71(0.846) & 6.1(1.057) & 2.897(0.875) & 4.938(1.157) \\
## 128 & 3.368(0.987) & 5.119(0.83) & 4.125(1.036) & 5.413(0.313) & 2.741(0.781) & 4.839(0.764) & 3.12(0.856) & 4.416(1.092) \\
## \hline
## \end{tabular}
## \end{table}
# Prepare data for b0
df_b0 <- data.frame()
for(treetypeIndex in 1:length(treetype.array)){
for(taxaIndex in 1:length(taxa.array)){
b0_values <- tree.coef.array[treetypeIndex,taxaIndex,,1][is.finite(tree.coef.array[treetypeIndex,taxaIndex,,1])]
temp_df_b0 <- data.frame(
taxa = taxa.array[taxaIndex],
treetype = treetype.array[treetypeIndex],
value = b0_values
)
df_b0 <- rbind(df_b0, temp_df_b0)
}
}
head(df_b0)
## taxa treetype value
## 1 16 coal 4.369561
## 2 16 coal 1.754779
## 3 16 coal 1.754779
## 4 16 coal 2.553154
## 5 16 coal 2.495506
## 6 16 coal 3.132605
# Load library
library(ggplot2)
df_b0$taxa<-as.factor(df_b0$taxa)
# Create boxplot for b0 across all tree types
p_b0 <- ggplot(df_b0, aes(x=treetype, y=value, fill=taxa)) +
geom_boxplot() +
labs(x = "Tree Type",
y = expression(beta[0] ~ "Estimates"),
fill = "Taxa Size",
title = "Phylogenetic Poisson Regression:" ~ beta[0] ~ "Estimates") +
theme_minimal() + ylim(3,5)+
theme(plot.title = element_text(hjust = 0.5)) # This line centers the title
# Print the plot
print(p_b0)
## Warning: Removed 3547 rows containing non-finite values (`stat_boxplot()`).
# png(file="phynb2simb0v2.png",
# width=600, height=350)
# print(p_b0)
# dev.off()
# Prepare data for b1
df_b1 <- data.frame()
for(treetypeIndex in 1:length(treetype.array)){
for(taxaIndex in 1:length(taxa.array)){
b1_values <- tree.coef.array[treetypeIndex,taxaIndex,,2][is.finite(tree.coef.array[treetypeIndex,taxaIndex,,2])]
temp_df_b1 <- data.frame(
taxa = taxa.array[taxaIndex],
treetype = treetype.array[treetypeIndex],
value = b1_values
)
df_b1 <- rbind(df_b1, temp_df_b1)
}
}
# Load library
library(ggplot2)
df_b1$taxa<-as.factor(df_b1$taxa)
# Create boxplot for b1 across all tree types
p_b1 <- ggplot(df_b1, aes(x=treetype, y=value, fill=taxa)) +
geom_boxplot() +
labs(x = "Tree Type",
y = expression(beta[1] ~ "Estimates"),
fill = "Taxa Size",
title = "Phylogenetic NB2 Regression:" ~ beta[1] ~ "Estimates") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5)) # This line centers the title
# Print the plot
print(p_b1)
## violine plot
library(ggplot2)
library(ggbeeswarm)
library(gridExtra)
# For b0 estimates
df_b0$taxa <- as.factor(df_b0$taxa)
pb0 <- ggplot(df_b0, aes(x=treetype, y=value, color=taxa)) +
geom_quasirandom(groupOnX = FALSE, alpha=0.6, size=1.5) +
geom_hline(yintercept = 3, linetype="dashed", color = "red", size = 1) + # true value for b0
coord_cartesian(ylim = c(1, 5)) +
labs(x = "Tree Type",
y = expression(beta[0] ~ "Estimates"),
color = "Taxa Size",
title = "Phylogenetic NB2 Regression: " ~ beta[0] ~ " Estimates") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# For b1 estimates
df_b1$taxa <- as.factor(df_b1$taxa)
pb1 <- ggplot(df_b1, aes(x=treetype, y=value, color=taxa)) +
geom_quasirandom(groupOnX = FALSE, alpha=0.6, size=1.5) +
geom_hline(yintercept = 5, linetype="dashed", color = "red", size = 1) + # true value for b1
coord_cartesian(ylim = c(2, 8)) +
labs(x = "Tree Type",
y = expression(beta[1] ~ "Estimates"),
color = "Taxa Size",
title = "Phylogenetic NB2 Regression: " ~ beta[1] ~ " Estimates") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# Arrange the plots side by side
grid.arrange(pb0, pb1, ncol=2)