0% found this document useful (0 votes)

20 views

Assignment Q4

Uploaded by

lewis.hastie

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

20 views

Assignment Q4

Uploaded by

lewis.hastie

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 7

Question 4

2023-08-28

y <- c(0,1,2,3,1,5,10,17,23,31,20,25,37,45)
x <- c(1:14)

n <- length(x)
one <- c(rep(1,n))
X <- cbind(one,x)
beta <- matrix(c(0.3, 0.3))#starting values
r <- 10
iter <- 25
result <- matrix(0, iter, 8)

for (i in 1:iter){
eta = X%*%beta
mu = exp(eta)
v = mu*(mu+r)/r
W = diag(c((mu*r)/(mu+r)))
z = eta+(y-mu)/mu
XWX = t(X)%*%W%*%X
XWXI = solve(XWX)
XWZ = t(X)%*%W%*%z
beta = XWXI%*%XWZ
b0 = beta[1,]
b1 = beta[2,]
b_se = sqrt(diag(XWXI))
names(beta) = NULL
names(b_se) = NULL

# NR procedure for calculating r

eta = X%*%beta
mu = exp(eta)

l = sum(log(gamma(y + r))) - sum(log(gamma(y + 1))) - nlog(gamma(r)) + rsum(log(r/(mu + r))) + sum(y

dl = sum(digamma(y + r)) - n*digamma(r) + sum(log(r/(mu + r))) + sum(mu/(r + mu)) - sum(y/(r + mu))
dl2 = sum(trigamma(y + r)) - n*trigamma(r) + sum(mu/(r*(r + mu))) - sum(mu/(r + mu)ˆ2) + sum(y/(r + mu

r = r - dl/dl2
r_se = sqrt(-1/dl2)

result[i, ] = c(i, r, r_se, l, b0, b1, b_se[1], b_se[2])

}

colnames(result) = c("iter", "r", "SE(r)", "logL", "beta_0", "beta_1", "SE(beta_0)", "SE(beta_1)")

round(result,3)

1
## iter r SE(r) logL beta_0 beta_1 SE(beta_0) SE(beta_1)
## [1,] 1 10.557 6.626 -39.047 -0.015 0.301 0.323 0.032
## [2,] 2 11.599 7.308 -38.905 -0.013 0.295 0.352 0.034
## [3,] 3 12.233 8.526 -38.886 0.021 0.291 0.349 0.034
## [4,] 4 12.479 9.330 -38.882 0.035 0.290 0.344 0.033
## [5,] 5 12.558 9.657 -38.881 0.039 0.289 0.342 0.033
## [6,] 6 12.581 9.763 -38.881 0.041 0.289 0.341 0.033
## [7,] 7 12.589 9.795 -38.881 0.042 0.289 0.341 0.033
## [8,] 8 12.591 9.805 -38.881 0.042 0.289 0.341 0.033
## [9,] 9 12.592 9.808 -38.881 0.042 0.289 0.341 0.033
## [10,] 10 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [11,] 11 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [12,] 12 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [13,] 13 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [14,] 14 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [15,] 15 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [16,] 16 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [17,] 17 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [18,] 18 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [19,] 19 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [20,] 20 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [21,] 21 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [22,] 22 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [23,] 23 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [24,] 24 12.592 9.809 -38.881 0.042 0.289 0.341 0.033
## [25,] 25 12.592 9.809 -38.881 0.042 0.289 0.341 0.033

# Tidying up with some dataframe manipulation

result <- as.data.frame(result)
r_fin <- result[iter,]

data <- as.data.frame(cbind(x,y))

glmNB <- glm.nb(y ~ x, control = glm.control(maxit = 25, trace = T))

## Theta(1) = 17.086900, 2(Ls - Lm) = 16.831100

## Theta(2) = 13.402300, 2(Ls - Lm) = 15.387900

## Theta(3) = 13.402300, 2(Ls - Lm) = 15.368300

## Theta(4) = 12.760300, 2(Ls - Lm) = 15.072000

## Theta(5) = 12.760200, 2(Ls - Lm) = 15.071100

## Theta(6) = 12.627800, 2(Ls - Lm) = 15.008000

## Theta(7) = 12.627800, 2(Ls - Lm) = 15.008000

## Theta(8) = 12.599700, 2(Ls - Lm) = 14.994500

## Theta(9) = 12.599600, 2(Ls - Lm) = 14.994500

2
## Theta(10) = 12.593600, 2(Ls - Lm) = 14.991600

## Theta(11) = 12.593600, 2(Ls - Lm) = 14.991600

## Theta(12) = 12.592400, 2(Ls - Lm) = 14.991000

## Theta(13) = 12.592300, 2(Ls - Lm) = 14.990900

## Theta(14) = 12.592000, 2(Ls - Lm) = 14.990800

## Theta(15) = 12.592000, 2(Ls - Lm) = 14.990800

## Theta(16) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(17) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(18) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(19) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(20) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(21) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(22) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(23) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(24) = 12.591900, 2(Ls - Lm) = 14.990800

## Theta(25) = 12.591900, 2(Ls - Lm) = 14.990800

## Warning in glm.nb(y ~ x, control = glm.control(maxit = 25, trace = T)):

## alternation limit reached

summary(glmNB)

##
## Call:
## glm.nb(formula = y ~ x, control = glm.control(maxit = 25, trace = T),
## init.theta = 12.59189515, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.04179 0.34097 0.123 0.902
## x 0.28892 0.03308 8.734 <2e-16 ***
## ---
## Signif. codes: 0 ’***’ 0.001 ’**’ 0.01 ’*’ 0.05 ’.’ 0.1 ’ ’ 1
##

3
## (Dispersion parameter for Negative Binomial(12.5919) family taken to be 1)
##
## Null deviance: 104.160 on 13 degrees of freedom
## Residual deviance: 14.991 on 12 degrees of freedom
## AIC: 83.762
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 12.59
## Std. Err.: 9.81
## Warning while fitting theta: alternation limit reached
##
## 2 x log-likelihood: -77.762

pred_NB <- predict(glmNB, newdata = as.data.frame(x), type = 'response')

pred_NB

## 1 2 3 4 5 6 7 8
## 1.391958 1.858250 2.480745 3.311770 4.421179 5.902230 7.879419 10.518945
## 9 10 11 12 13 14
## 14.042687 18.746847 25.026853 33.410599 44.602815 59.544313

var_func <- pred_NB*(pred_NB + r)/r

# Convergence Plots
p1 <- ggplot(data = result) +
aes(x = iter, y = beta_0) +
geom_line() +
labs(x = 'Iteration Number', y = 'beta_0')

p2 <- ggplot(data = result) +

aes(x = iter, y = beta_1) +
geom_line() +
labs(x = 'Iteration Number', y = 'beta_1')

p3 <- ggplot(data = result) +

aes(x = iter, y = r) +
geom_line() +
labs(x = 'Iteration Number', y = 'r')

p4 <- ggplot(data = result) +

aes(x = iter, y = logL) +
geom_line() +
labs(x = 'Iteration Number', y = 'log-likelihood value')

grid.arrange(p1, p2, p3, p4, nrow = 2, ncol = 2, top = textGrob("Convergence Plots", gp = gpar(fontsize

4
Convergence Plots
0.04
0.300
beta_0

beta_1
0.02 0.296

0.00 0.292

0 5 10 15 20 25 0 5 10 15 20 25
Iteration Number Iteration Number

12.5

log−likelihood value
−38.90

12.0
−38.95
r

11.5

−39.00
11.0

10.5 −39.05
0 5 10 15 20 25 0 5 10 15 20 25
Iteration Number Iteration Number

# Fitted Line Plots

ggplot(data, aes(x = x, y = y)) +
geom_point() +
geom_smooth(method = "glm", method.args = list(family = poisson), aes(colour = 'Poisson Mean'), linety
geom_line(aes(y = pred_NB, colour = 'Neg-Bin Mean'), linetype = 1) +
geom_line(aes(y = var_func, colour = 'Neg-Bin Variance'), linetype = 2) +
scale_colour_manual(name="Function",values= c('red', 'blue', 'black')) +
labs(title = 'Fitted Line Plots') +
theme(plot.title = element_text(hjust = 0.5)) +
ylim(0, 80)

## ‘geom_smooth()‘ using formula = ’y ~ x’

## Warning: Removed 3 rows containing missing values (‘geom_line()‘).

5
Fitted Line Plots
80

Function
Neg−Bin Mean
40
y

Neg−Bin Variance
Poisson Mean

5 10
x