library("testthat") library("arules") context("associations") set.seed(20070611) m <- matrix(as.integer(runif(100000) > 0.8), ncol = 20) dimnames(m) <- list(NULL, paste("item", c(1:20), sep = "")) t <- as(m, "transactions") #t #inspect(t[10]) expect_identical(dim(t), dim(m)) r <- apriori(t, parameter = list(supp = 0.01, conf = 0.1), control = list(verb = FALSE)) #r #summary(r) #inspect(r) ss <- subset(r, subset = lift > 1.4 & lhs %in% "item3") #inspect(ss) expect_identical(labels(lhs(ss)), "{item2,item3}") expect_true(quality(ss)$lift > 1.4) f <- eclat(t, parameter = list(supp = 0.01), control = list(verb = FALSE)) #f #summary(f) #inspect(f) ss <- subset(f, subset = items %in% "item7") #inspect(ss) expect_identical(labels(ss), grep("item7", labels(ss), value = TRUE)) ### create associations manually lmat <- matrix(rbind(c(1, 1, 0), c(0, 0, 1)), ncol = 3) rmat <- matrix(rbind(c(1, 0, 0), c(0, 1, 0)), ncol = 3) colnames(lmat) <- c("a", "b", "c") colnames(rmat) <- c("c", "a", "b") # Note: the column names do not agree! lhs <- as(lmat, "itemMatrix") rhs <- as(rmat, "itemMatrix") is <- new("itemsets", items = lhs, quality = data.frame(support = c(.1, .1))) #inspect(is) expect_equal(labels(is), c("{a,b}", "{c}")) qual <- data.frame( support = c(.5, .5), confidence = c(.5, .5), lift = c(2, 1) ) ## warning because of the disagreeing labels # expect_warning(r <- new( # "rules", # lhs = lhs, # rhs = rhs, # quality = qual # )) expect_warning(r <- rules(lhs, rhs, quality = qual)) #inspect(r) context("subsetting") ## subsetting (also tests itemMatrix) # numeric take_r <- sample(nrow(t), 10) take_c <- sample(ncol(t), 10) expect_equal(dim(sub_n <- t[take_r, take_c]), c(10L, 10L)) expect_equal(dim(t[take_r]), c(10L, ncol(t))) expect_equal(dim(t[take_r, ]), c(10L, ncol(t))) expect_equal(dim(t[, take_c]), c(nrow(t), 10L)) # logical take_rb <- rep(FALSE, nrow(t)) take_rb[take_r] <- TRUE take_cb <- rep(FALSE, ncol(t)) take_cb[take_c] <- TRUE expect_equal(dim(sub_b <- t[take_rb, take_cb]), c(10L, 10L)) expect_equal(dim(t[take_rb, ]), c(10L, ncol(t))) expect_equal(dim(t[take_rb]), c(10L, ncol(t))) expect_equal(dim(t[, take_cb]), c(nrow(t), 10L)) # Note: transactions and itemLabels are mixed up in numeric subset! expect_true(setequal(sub_b, recode(sub_n, sub_b))) # character take_cc <- itemLabels(t)[take_c] expect_equal(dim(t[, take_cc]), c(nrow(t), 10L)) # NA expect_warning(expect_equal(dim(t[NA, NA]), c(0L, 0L))) expect_warning(expect_equal(dim(t[NA]), c(0L, ncol(t)))) expect_warning(expect_equal(dim(t[, NA]), c(nrow(t), 0L))) take_rn <- take_r take_rn[3:4] <- NA take_cn <- take_c take_cn[3:4] <- NA expect_warning(expect_equal(dim(t[take_rn, take_cn]), c(8L, 8L))) take_rbn <- take_rb take_rbn[which(take_rbn)[3:4]] <- NA take_cbn <- take_cb take_cbn[which(take_cbn)[3:4]] <- NA expect_warning(expect_equal(dim(t[take_rbn, take_cbn]), c(8L, 8L))) take_ccn <- take_cc take_ccn[3:4] <- NA expect_warning(expect_equal(dim(t[, take_cbn]), c(nrow(t), 8L))) # rules r <- apriori(t, parameter = list(supp = 0.01, conf = 0.1), control = list(verb = FALSE)) expect_warning(expect_equal(length(r[NA]), 0L)) expect_warning(expect_equal(length(r[c(1L, NA_integer_)]), 1L)) expect_warning(expect_equal(length(r[c(TRUE, NA, FALSE)]), sum(rep( c(TRUE, NA, FALSE), length.out = length(r) ), na.rm = TRUE))) # recycle # itemsets f <- eclat(t, parameter = list(supp = 0.01), control = list(verb = FALSE)) expect_warning(expect_equal(length(f[NA]), 0L)) expect_warning(expect_equal(length(f[c(1L, NA_integer_)]), 1L)) expect_warning(expect_equal(length(f[c(TRUE, NA, FALSE)]), sum(rep( c(TRUE, NA, FALSE), length.out = length(f) ), na.rm = TRUE))) # recycle # head and tail expect_identical(r[1:5], head(r, n = 5)) expect_identical(r[1:6], head(r)) expect_identical(r[1:2], head(r[1:2])) expect_identical(r[0], head(r, n = 0)) expect_identical(r[1:(length(r) - 10)], head(r, n = -10)) expect_identical(r[1:length(r)], head(r, n = length(r))) expect_identical(r[1:length(r)], head(r, n = length(r) + 100L)) expect_identical(sort(r, by = "lift")[1:5], head(r, n = 5, by = "lift")) expect_identical(sort(r, by = "lift", decreasing = FALSE)[1:5], head( r, n = 5, by = "lift", decreasing = FALSE )) expect_identical(head(r[0]), r[0]) # empty rule set expect_identical(r[tail(1:length(r), n = 5)], tail(r, n = 5)) expect_identical(r[1:2], tail(r[1:2])) expect_identical(r[0], tail(r, n = 0)) expect_identical(sort(r, by = "lift")[tail(1:length(r), n = 5)], tail(r, n = 5, by = "lift")) expect_identical(tail(r[0]), r[0]) # empty rule set # match, %in%, %pin% expect_identical(match(r[2:10], r), 2:10) expect_identical(r[2:10] %in% r, 2:10) expect_identical(match(f[2:10], f), 2:10) expect_identical(f[2:10] %in% f, 2:10) expect_identical(rhs(r[1:10]) %pin% "item1", c(FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, TRUE)) expect_error(rhs(r[1:10]) %pin% "") expect_warning(rhs(r[1:10]) %pin% c("1", "2"))