Stationarity Tests
Purpose
To check the pairs for residuals stationarity using adf tests and unit root tests
Let me prepare the data first
> library(fArma)
> library(fSeries)
> library(TSA)
> library(pspline)
> library(fUnitRoots)
> library(RSQLite)
> library(lmtest)
> library(tseries)
> program.date <- "feb18"
> ticker.data <- "tickers.csv"
> date.start <- "2009-02-02"
> date.end <- "2010-02-18"
> INITIAL.CAPITAL <- 1e+06
> UNITS <- 1e+05
> all.pairs.file <- paste("all_pairs_", program.date, "_v1.csv",
+ sep = "")
> shortlisted.pairs.file <- paste("shortlisted_pairs_", program.date,
+ "_v1.csv", sep = "")
> arma.file <- paste("arma_", program.date, "_revised.csv", sep = "")
> final.pairs.file <- paste("finalpairs_", program.date, ".csv",
+ sep = "")
> discuss.pairs.file <- paste("disusspairs_", program.date, ".csv",
+ sep = "")
> eacf.result.file <- paste("eacf_resultfile_", program.date, ".csv",
+ sep = "")
> db <- "C:/sqlite/mydbases/pairs/pairsv1.s3db"
> drv <- dbDriver("SQLite")
> con <- dbConnect(drv, dbname = db)
> query <-
> security.db <- dbGetQuery(con, query)
> security.db <- security.db[, 3:4]
> security.db1 <- unstack(security.db, form = security.db$price ~
+ security.db$ticker)
> dbDisconnect(con)
[1] TRUE
> con <- dbConnect(drv, dbname = db)
> query <-
> security.db <- dbGetQuery(con, query)
> security.db <- security.db[, 3:4]
> security.db1.high <- unstack(security.db, form = security.db$high_price ~
+ security.db$ticker)
> query <-
> security.db <- dbGetQuery(con, query)
> head(security.db)
ticker_id trade_date ticker low_price*1.0
1 1 2009-02-02 ABAN 462.20
2 1 2009-02-03 ABAN 461.15
3 1 2009-02-04 ABAN 424.50
4 1 2009-02-05 ABAN 412.60
5 1 2009-02-06 ABAN 417.20
6 1 2009-02-09 ABAN 424.10
> security.db <- security.db[, 3:4]
> security.db1.low <- unstack(security.db, form = security.db$low_price ~
+ security.db$ticker)
> stkreturns <- returns(ts(security.db1), "simple")
> stkreturns <- stkreturns[-1, ]
> n <- dim(stkreturns)[2]
> pair.combinations <- matrix(data = NA, nrow = n * (n + 1)/2,
+ ncol = 2)
> rowcount <- 1
> for (i in 1:n) {
+ for (j in 1:i) {
+ pair.combinations[rowcount, 1] <- i
+ pair.combinations[rowcount, 2] <- j
+ rowcount <- rowcount + 1
+ }
+ }
> pair.combinations <- data.frame(pair.combinations)
> colnames(pair.combinations) <- c("i", "j")
> tickers$mktcap.cr <- round(tickers$mktcap, 0)
> lookupi <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector",
+ "sector_id")]
> lookupj <- tickers[, c("tickers", "ticker_id", "mktcap.cr", "sector",
+ "sector_id")]
> colnames(lookupi) <- c("tickeri", "i", "mktcap.cr.i", "sector.i",
+ "sector.i.id")
> colnames(lookupj) <- c("tickerj", "j", "mktcap.cr.j", "sector.j",
+ "sector.j.id")
> d1 <- merge(pair.combinations, lookupj, by.x = "j", by.y = "j",
+ all.x = T)
> d2 <- merge(d1, lookupi, by.x = "i", by.y = "i", all.x = T)
> d3 <- d2[d2$i != d2$j, c("i", "j", "tickeri", "tickerj", "sector.i",
+ "mktcap.cr.i", "sector.j", "mktcap.cr.j", "sector.i.id",
+ "sector.j.id")]
> samesector <- d3[d3$sector.i.id == d3$sector.j.id, ]
> x <- pmax(samesector$mktcap.cr.i, samesector$mktcap.cr.j)/pmin(samesector$mktcap.cr.i,
+ samesector$mktcap.cr.j)
> x <- round(x, 0)
> samesector <- samesector[which(x <= 8), ]
> sector.tests <- samesector
> sector.tests$p.a.b.1 <- 0
> sector.tests$p.a.b.2 <- 0
> sector.tests$p.b.a.1 <- 0
> sector.tests$p.b.a.2 <- 0
> npairs <- dim(samesector)[1]
> pair = 1
> for (pair in 1:npairs) {
+ a <- samesector[pair, "tickeri"]
+ b <- samesector[pair, "tickerj"]
+ y1 <- log(security.db1[, a])
+ x1 <- log(security.db1[, b])
+ fit <- lm(y1 ~ x1)
+ if (summary(fit)$coefficients[1, 4] < 0.05) {
+ error <- residuals(fit)
+ }
+ else {
+ fit <- lm(y1 ~ x1 + 0)
+ error <- residuals(fit)
+ }
+ n <- length(error)
+ res <- unitrootTest(error, lag = 1, type = "c")
+ p.a.b.1 <- attr(res, "test")$p.value[1]
+ res <- adfTest(error, lag = 1, type = "c")
+ p.a.b.2 <- attr(res, "test")$p.value[1]
+ y1 <- log(security.db1[, b])
+ x1 <- log(security.db1[, a])
+ fit <- lm(y1 ~ x1)
+ if (summary(fit)$coefficients[1, 4] < 0.05) {
+ error <- residuals(fit)
+ }
+ else {
+ fit <- lm(y1 ~ x1 + 0)
+ error <- residuals(fit)
+ }
+ n <- length(error)
+ res <- unitrootTest(error, lag = 1, type = "c")
+ p.b.a.1 <- attr(res, "test")$p.value[1]
+ res <- adfTest(error, lag = 1, type = "c")
+ p.b.a.2 <- attr(res, "test")$p.value[1]
+ sector.tests[pair, "p.a.b.1"] <- p.a.b.1
+ sector.tests[pair, "p.a.b.2"] <- p.a.b.2
+ sector.tests[pair, "p.b.a.1"] <- p.b.a.1
+ sector.tests[pair, "p.b.a.2"] <- p.b.a.2
+ } |
> library(ggplot2) > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ] > dim(temp) [1] 127 5 > p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1, + p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1, + p.b.a.2 = sector.tests$p.b.a.2) > temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ] > dim(temp) [1] 121 5 > p <- ggplot(temp, aes(x = p.a.b.1, y = p.a.b.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) > p <- ggplot(temp, aes(x = p.b.a.1, y = p.b.a.2)) > q <- p + geom_point() > q <- q + geom_abline(intercept = 0, slope = 1, colour = "sienna", + lwd = 1.2) > print(q) |

Till now I have used only first lag and found that there are about 120 pairs which pass the stationarity tests
Now I want to use more lags and check the same.
> sector.tests <- samesector
> sector.tests$p.a.b.1 <- 0
> sector.tests$p.a.b.2 <- 0
> sector.tests$p.a.b.3 <- 0
> sector.tests$p.b.a.1 <- 0
> sector.tests$p.b.a.2 <- 0
> sector.tests$p.b.a.3 <- 0
> npairs <- dim(samesector)[1]
> pair = 1
> for (pair in 1:npairs) {
+ a <- samesector[pair, "tickeri"]
+ b <- samesector[pair, "tickerj"]
+ y1 <- log(security.db1[, a])
+ x1 <- log(security.db1[, b])
+ fit <- lm(y1 ~ x1)
+ if (summary(fit)$coefficients[1, 4] < 0.05) {
+ error <- residuals(fit)
+ }
+ else {
+ fit <- lm(y1 ~ x1 + 0)
+ error <- residuals(fit)
+ }
+ n <- length(error)
+ res <- unitrootTest(error, lag = 5, type = "c")
+ p.a.b.1 <- attr(res, "test")$p.value[1]
+ res <- adfTest(error, lag = 5, type = "c")
+ p.a.b.2 <- attr(res, "test")$p.value[1]
+ res <- adf.test(error)
+ p.a.b.3 <- res$p.value
+ y1 <- log(security.db1[, b])
+ x1 <- log(security.db1[, a])
+ fit <- lm(y1 ~ x1)
+ if (summary(fit)$coefficients[1, 4] < 0.05) {
+ error <- residuals(fit)
+ }
+ else {
+ fit <- lm(y1 ~ x1 + 0)
+ error <- residuals(fit)
+ }
+ n <- length(error)
+ res <- unitrootTest(error, lag = 5, type = "c")
+ p.b.a.1 <- attr(res, "test")$p.value[1]
+ res <- adfTest(error, lag = 5, type = "c")
+ p.b.a.2 <- attr(res, "test")$p.value[1]
+ res <- adf.test(error)
+ p.b.a.3 <- res$p.value
+ sector.tests[pair, "p.a.b.1"] <- p.a.b.1
+ sector.tests[pair, "p.a.b.2"] <- p.a.b.2
+ sector.tests[pair, "p.a.b.3"] <- p.a.b.3
+ sector.tests[pair, "p.b.a.1"] <- p.b.a.1
+ sector.tests[pair, "p.b.a.3"] <- p.b.a.3
+ }
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+ p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+ p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 2] < 0.05 & temp[, 3] < 0.05, ]
> dim(temp)
[1] 88 5
> temp <- data.frame(x = 1:311, p.a.b.1 = sector.tests$p.a.b.1,
+ p.a.b.2 = sector.tests$p.a.b.2, p.b.a.1 = sector.tests$p.b.a.1,
+ p.b.a.2 = sector.tests$p.b.a.2)
> temp <- temp[temp[, 4] < 0.05 & temp[, 5] < 0.05, ]
> length(which(sector.tests$p.b.a.3 < 0.05 & sector.tests$p.a.b.3 <
+ 0.05))
[1] 42
> length(which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 <
+ 0.05))
[1] 59 |
If you take more lags in to consideration, there are only about 90 pairs which pass the stationarity tests. Does it surprise me ?
I was under the misconception that if I take more lags, then there would be more pairs for my disposal..
But I got a completely different result.
If I do adf test I get only 59 pairs
So, should I be following adf test..Let me see what pairs I get if I follow adf test only
> sector.tests[which(sector.tests$p.b.a.3 < 0.05 | sector.tests$p.a.b.3 <
+ 0.05), c(3, 4)]
tickeri tickerj
11 AMBUJACEM ACC
439 GESHIP CONCOR
511 GRASIM AMBUJACEM
531 GSPL BPCL
918 HINDPETRO GSPL
1044 IBREALEST DLF
1129 IDBI ANDHRABANK
1173 IDBI BANKINDIA
1272 IDFC HDFC
1428 INDIACEM ACC
1474 INDIAINFO IDFC
1480 INDIAINFO IFCI
1539 INFOSYSTCH HCLTECH
2063 KOTAKBANK ICICIBANK
2072 KOTAKBANK AXISBANK
2152 LITL HCC
2446 MLL GESHIP
2607 MPHASIS HCLTECH
2748 MTNL IDEA
2827 NAGARCONST LITL
2950 NATIONALUM HINDALCO
3149 ONGC CAIRN
3844 RANBAXY CIPLA
3887 RANBAXY LUPIN
3908 RANBAXY BIOCON
4030 RECLTD PFC
4105 RELCAPITAL INDIAINFO
4370 RELINFRA POWERGRID
4584 ROLTA MPHASIS
4617 ROLTA POLARIS
4623 ROLTA HCLTECH
4727 RPOWER POWERGRID
4959 SCI GESHIP
5040 SCI CONCOR
5175 SIEMENS CESC
5221 SIEMENS ABB
5609 SUZLON RPOWER
5621 SUZLON POWERGRID
5682 SYNDIBANK BANKINDIA
5701 SYNDIBANK IOB
5719 SYNDIBANK PNB
5723 SYNDIBANK ANDHRABANK
5727 SYNDIBANK IDBI
6295 TATASTEEL JSWSTEEL
6562 TECHM ROLTA
6954 TULIP TTML
7122 ULTRACEMCO INDIACEM
7237 UNIONBANK IDBI
7260 UNIONBANK SYNDIBANK
7484 UNITECH DLF
7554 VIJAYABANK IDBI
7559 VIJAYABANK SYNDIBANK
8195 DISHTV ZEEL
8307 DRREDDY BIOCON
8337 DRREDDY RANBAXY
8940 PATNI HCLTECH
8991 PATNI MPHASIS
9015 PATNI ROLTA
9199 HDFCBANK1 HDFC1 |
Let me check the other stats for these
INDIACEM ACC TULIP TTML ULTRACEMCO INDIACEM DISHTV ZEEL DRREDDY RANBAXY
The above are the pairs which passed ADF test but failed stricter tests like that from urca.