Regression under GLM for mammal and lizard datasets

Published

December 23, 2022

rm(list=ls())
library(MASS)
library(xtable)
library(dplyr)
#install.packages("kableExtra")
library(kableExtra)
library(MASS)
# The mamma dataset: Poisson Better
df.mam<-read.csv("~/Dropbox/NSCT/112年計畫113年八月/phyNB2reg/rcode/mammalcapellini2015role.csv")


# LG: Longevity in years
# BM: body mass in grams
# GT: gestation time in days
# WA: waning age in days
# NBM: neonatal body mass in grams
# LS: litter size
# LY: litters per year
# AFB: age at first birth in days
# RL: reproductive lifespan in days (computed using LG converted into days and AFB)
# OV: offspring value as per equation (see protocol)

#head(df.mam)
df.mam<-df.mam[complete.cases(df.mam), ]
#dim(df.mam)
df.mam$LY <-ceiling(df.mam$LY)
df.mam$OV <- as.numeric(df.mam$OV)
#ls(df.mam)
#str(df.mam)
df.mam.out<-df.mam[,c("LY","LG","BM","GT","WA","LS","AFB","RL","OV")]
rownames(df.mam.out)<-NULL

#kable(round(df.mam.out,2),) %>%kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
#xtable(round(df.mam.out,2))
#dim(df.mam.out)
fitPOIS.mam <- glm(LY ~ LG + BM + GT + WA + LS + AFB + RL +  OV, df.mam, family = poisson(link = "log"))

smpoimam<-summary(fitPOIS.mam)
#smpoimam$aic

nb2.aic.mam<-array(NA,c(100))
for(i in 1:100){
  fitNB2.mam <- glm(LY ~ LG + BM + GT + WA + LS + AFB + RL +  OV, df.mam, family = negative.binomial(theta = i))
  #fitNB2 <-glm.nb(LY ~ LG + BM + GT + WA + LS + AFB + RL +  OV, data = df)
  smbn2mam<-summary(fitNB2.mam)
  nb2.aic.mam[i]<-smbn2mam$aic
  #print(smbn2mam$aic)
}

theta.best.mam=which(nb2.aic.mam==min(nb2.aic.mam))
#theta.best.mam
#nb2.aic.mam[theta.best.mam]

# It never smaller than the poi AIC

mam.mean=round(mean(df.mam$LY),3)
mam.var=round(var(df.mam$LY),3)
taxasize.mam<-dim(df.mam)[1]

This liz dataset: negative binomial better

df.liz<-read.table("~/Dropbox/NSCT/112年計畫113年八月/phyNB2reg/rcode/lizardtraitEOV-2.txt")
#df.liz
# SM: "size at maturity(mm)", 
# AS: "average size(mm)", 
# AM: "age at maturity(mo)", 
# EM: "egg mass(g)", 
# CS: "clutch size", 
# CM: "clutch mass(g)", 
# EPY "eggs per year")

df.liz<-as.data.frame(df.liz)
colnames(df.liz)<-c("SM","AS","AM","EM","CS","CM","EPY")
df.liz$EPY <- ceiling(df.liz$EPY)
#head(df.liz)

fitPOIS.liz <- glm(EPY ~ SM + AS + AM + EM + CS + CM , data = df.liz, family = poisson(link = "log"))

#summary(fitPOIS.liz)

smpoiliz<-summary(fitPOIS.liz)
#smpoiliz$aic

#fitNB2.liz <- glm.nb(EPY ~ SM + AS + AM + EM + CS + CM , data = df.liz)

nb2.aic.liz<-array(NA,c(100))
for(i in 1:100){
  fitNB2.liz  <- glm(EPY ~ SM + AS + AM + EM + CS + CM , df.liz, family = negative.binomial(theta = i))
  ss =  summary(fitNB2.liz)
  nb2.aic.liz[i]<-ss["aic"]$aic
  #print(c(i,ss["aic"]$aic))
}

theta.best.liz=which(nb2.aic.liz==min(nb2.aic.liz))
#theta.best.liz
#nb2.aic.liz[theta.best.liz]

liz.mean=round(mean(df.liz$EPY),3)
#liz.mean
liz.var=round(var(df.liz$EPY),3)
#liz.var
taxasize.liz<-dim(df.liz)[1]
#taxasize.liz

mean_value<-c(liz.mean,mam.mean)
var_value<-c(liz.var,mam.var)
taxasize<-c(taxasize.liz,taxasize.mam)
PoisAICc<-c(smpoiliz$aic,smpoimam$aic)
NB2AICc<-c(nb2.aic.liz[theta.best.liz],nb2.aic.mam[theta.best.mam])
dNB2AICc<-c(nb2.aic.liz[theta.best.liz],nb2.aic.mam[theta.best.mam])-min(c(nb2.aic.liz[theta.best.liz],nb2.aic.mam[theta.best.mam]))
dAICc.liz<-c(smpoiliz$aic,nb2.aic.liz[theta.best.liz])-min(c(smpoiliz$aic,nb2.aic.liz[theta.best.liz]))

#dAICc.liz
weight.liz<-round(exp(-0.5*dAICc.liz)/sum(exp(-0.5*dAICc.liz)),2)


dAICc.mam<-c(smpoimam$aic,nb2.aic.mam[theta.best.mam])-min(c(smpoimam$aic,nb2.aic.mam[theta.best.mam]))
dAICc.mam
[1] 0.0000000 0.6830124
weight.mam<-round(exp(-0.5*dAICc.mam)/sum(exp(-0.5*dAICc.mam)),2)
PoisAICc<-round(PoisAICc,2)
PoisAICcWeight<-c(paste(PoisAICc[1],"(",weight.liz[1],")",sep=""), paste(PoisAICc[2],"(",weight.mam[1],")",sep=""))

NB2AICc<-round(NB2AICc,2)
NB2AICcWeight<-c(paste(NB2AICc[1],"(",weight.liz[2],")",sep=""), paste(NB2AICc[2],"(",weight.mam[2],")",sep=""))

df.out<-rbind(
  mean_value,
  var_value,
  taxasize,
  PoisAICcWeight,
  NB2AICcWeight
)

colnames(df.out)<-c("Lizard","Mammal")
rownames(df.out)<-c("mean","var","taxa","PoiAICcWeight","NB2AICcWeight")
#df.out
kable(t(df.out))%>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
mean var taxa PoiAICcWeight NB2AICcWeight
Lizard 20.824 55.029 17 116.82(0.45) 116.42(0.55)
Mammal 1.986 2.37 74 228.84(0.58) 229.52(0.42)
#xtable(t(df.out))