options(digits=2) nums1 <- sample(rep(0:10, time = 5)) nums2 <- c(rep(1,7), rep(2,3), rep(3,4), rep(4,5), rep(5,9), rep(6,10), rep(7,8), rep(8,1), rep(9,9), rep(10,4) ) # interval d <- discretize(nums1, method = "interval", breaks = 2) expect_equal(attr(d, 'discretized:breaks'), c(0, 5, 10)) d <- discretize(nums2, method = "interval", breaks = 2) expect_equal(attr(d, 'discretized:breaks'), c(1, 5.5, 10)) expect_equal(as.numeric(table(d)), c(28, 32)) d <- discretize(nums2, method = "interval", breaks = 9) expect_equal(attr(d, 'discretized:breaks'), as.numeric(1:10)) # fixed expect_error(discretize(nums2, method = "fixed", breaks = 1)) ### needs at least 2 values for breaks d <- discretize(nums2, method = "fixed", breaks = c(0,5,10)) expect_equal(length(levels(d)), 2L) expect_equal(as.numeric(table(d)), c(19, 41)) # frequency d <- discretize(nums1, method = "frequency", breaks = 2) expect_equal(length(levels(d)), 2L) expect_equal(as.numeric(table(d)), c(25, 30)) d <- discretize(nums1, method = "frequency", breaks = 11) expect_equal(as.numeric(table(d)), rep(5, 11)) d <- discretize(nums2, method = "frequency", breaks = 2) expect_equal(length(levels(d)), 2L) expect_equal(as.numeric(table(d)), c(28, 32)) d <- discretize(nums2, method = "frequency", breaks = 6) expect_equal(as.numeric(table(d)), c(10, 9, 9, 10, 9, 13)) # missing values nums1[3:5] <- NA d <- discretize(nums1, method = "interval") expect_equal(sum(is.na(d)), 3L) d <- discretize(nums1, method = "frequency") expect_equal(sum(is.na(d)), 3L) d <- discretize(nums1, method = "cluster") expect_equal(sum(is.na(d)), 3L)