Hobson GLM - Chapter 7-2

.Purpose To work out the example in Chap 7 section 7.8

Senility and WAIS

> folder <- "C:/Cauldron/garage/R/soulcraft/Volatility/Learn/Dobson-GLM/"
> file.input <- paste(folder, "Table 7.8 Senility and WAIS.csv",
+     sep = "")
> data <- read.csv(file.input, header = T, stringsAsFactors = F)

> total <- as.data.frame(tapply(data$s, data$x, length))
> present <- as.data.frame(tapply(data$s, data$x, sum))
> temp <- cbind(as.numeric(rownames(total)), present[, 1], total[,
+     1])
> temp <- data.frame(temp)
> colnames(temp) <- c("s", "y", "total")
> temp$ny <- temp$total - temp$y

Using R

> fit <- glm(formula = cbind(temp$y, temp$ny) ~ temp$s, family = binomial(link = "logit"))
> summary(fit)
Call:
glm(formula = cbind(temp$y, temp$ny) ~ temp$s, family = binomial(link = "logit"))

Deviance Residuals:
    Min       1Q   Median       3Q      Max
-0.9064  -0.6965  -0.2538   0.1719   1.7771

Coefficients:
            Estimate Std. Error z value Pr(>|z|)
(Intercept)   2.4040     1.1918   2.017  0.04369 *
temp$s       -0.3235     0.1140  -2.838  0.00453 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 20.208  on 16  degrees of freedom
Residual deviance:  9.419  on 15  degrees of freedom
AIC: 27.792

Number of Fisher Scoring iterations: 5

> betas <- coef(fit)
> t1 <- exp(betas[1] + betas[2] * temp$s)
> t2 <- 1 + exp(betas[1] + betas[2] * temp$s)
> probs <- t1/t2
> temp$probs <- probs
> temp
    s y total ny      probs
4   4 1     2  1 0.75211453
5   5 1     1  0 0.68705597
6   6 1     2  1 0.61369267
7   7 2     3  1 0.53477641
8   8 2     2  0 0.45407982
9   9 2     6  4 0.37572578
10 10 1     6  5 0.30337860
11 11 1     6  5 0.23961509
12 12 0     2  2 0.18568111
13 13 1     6  5 0.14162581
14 14 2     7  5 0.10665418
15 15 0     3  3 0.07951811
16 16 0     4  4 0.05883161
17 17 0     1  1 0.04327366
18 18 0     1  1 0.03169146
19 19 0     1  1 0.02313427
20 20 0     1  1 0.01684746

Pearson Chi Square Statistic

> t1 <- temp$y - temp$total * temp$probs
> t2 <- sqrt(temp$total * temp$probs * (1 - temp$probs))
> chisq.stat <- sum((t1/t2)^2)
> print(chisq.stat)
[1] 8.083029

Goodness of fit

> qchisq(0.95, 15)
[1] 24.99579

Obviously model fits well

> loglikf <- function(idata, probs) {
+     t1 <- sum(idata$y * log(probs))
+     t2 <- sum(idata$ny * log(1 - probs))
+     t3 <- sum(log(choose(idata$total, idata$y)))
+     loglik <- t1 + t2 + t3
+     return(loglik)
+ }
> loglikf(temp, probs)
[1] -11.89593
> loglikf(temp, probs = rep(sum(temp$y)/sum(temp$total), 17))
[1] -17.2904

I am not getting the same values for loglike for null model and alternate model.