test_that("mismm() works for data-frame-like inputs", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) for (method in c("mip", "heuristic", "qp-heuristic")) { set.seed(8) fit1 <- .run_mismm(df, method = method) expect_s3_class(fit1, "mismm") expect_equal( predict(fit1, new_data = df, type = "raw", layer = "bag"), predict(fit1, new_data = df, type = "raw", layer = "bag", new_bags = df$bag_name) ) expect_equal( predict(fit1, new_data = df, type = "raw", layer = "instance"), predict(fit1, new_data = df, type = "raw", layer = "instance", new_bags = df$bag_name, new_instances = df$instance_name) ) bag_preds <- df %>% .get_pred_matrix(fit1) %>% .summarize_preds(by = bag_name) expect_equal(nrow(bag_preds), length(unique(df$bag_name))) expect_setequal(bag_preds$bag_name, unique(df$bag_name)) expect_snapshot(bag_preds) predict(fit1, new_data = df, type = "class", layer = "bag") predict(fit1, new_data = df, type = "class", layer = "instance") predict(fit1, new_data = df, type = "raw", layer = "bag") predict(fit1, new_data = df, type = "raw", layer = "instance") } }) test_that("mismm() works with formula method", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) fit1 <- mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2 + X3, data = df) fit2 <- .run_mismm(df) expect_equal(fit1$ksvm_fit, fit2$ksvm_fit) expect_equal(fit1$total_step, fit2$total_step) expect_equal(fit1$call_type, "mismm.formula") expect_equal(fit1$features, c("X1", "X2", "X3")) expect_equal(fit1$bag_name, "bag_name") expect_equal(fit1$instance_name, "instance_name") # predictions should match expect_equal(predict(fit1, df, type = "raw"), predict(fit2, df, type = "raw")) expect_equal(predict(fit1, df, type = "class"), predict(fit2, df, type = "class")) # check only 1 predictor works fit1 <- mismm(mild(bag_label, bag_name, instance_name) ~ X1, data = df) predict(fit1, df, type = "raw") # check some obscure formulas fit1 <- mismm(mild(bag_label, bag_name, instance_name) ~ 0 + X1:X2 + X2*X3, data = df) expect_equal(fit1$features, colnames(model.matrix(~ 0 + X1:X2 + X2*X3, data = df))) predict(fit1, df, type = "raw") # check other methods for (method in c("heuristic", "mip", "qp-heuristic")) { set.seed(8) formula <- mild(bag_label, bag_name, instance_name) ~ X1 + X2 + X3 fit1 <- mismm(formula, data = df, method = method) expect_s3_class(fit1, "mismm") } }) test_that("mismm() works with mild_df method", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) fit1 <- mismm(df) fit2 <- .run_mismm(df) # default method expect_s3_class(fit1, "mismm") expect_equal(fit1$ksvm_fit, fit2$ksvm_fit) expect_equal(fit1$total_step, fit2$total_step) expect_equal(fit1$call_type, "mismm.mild_df") expect_equal(fit1$features, paste0("X", 1:3)) expect_equal(fit1$bag_name, "bag_name") expect_equal(fit1$instance_name, "instance_name") predict(fit1, new_data = df) fit1 <- mismm(df, method = "qp-heuristic") expect_s3_class(fit1, "mismm") }) test_that("predict.mismm returns labels that match the input labels", { skip_if_not_installed("gurobi") df1 <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) %>% tibble::as_tibble() %>% dplyr::filter(bag_name %in% c("bag1", "bag2", "bag4", "bag8")) test_prediction_levels_equal <- function(df, method, class = "default") { mdl <- switch( class, "default" = .run_mismm(df, method = method), "formula" = mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2, data = df, method = method) ) preds <- predict(mdl, df, type = "class") expect_setequal(levels(preds$.pred_class), levels(df$bag_label)) } # 0/1 df2 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label)) test_prediction_levels_equal(df2, method = "heuristic") test_prediction_levels_equal(df2, method = "mip") test_prediction_levels_equal(df2, method = "qp-heuristic") test_prediction_levels_equal(df2, method = "heuristic", class = "formula") # 1/0 df2 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label, levels = c(1, 0))) test_prediction_levels_equal(df2, method = "heuristic") test_prediction_levels_equal(df2, method = "mip") test_prediction_levels_equal(df2, method = "qp-heuristic") test_prediction_levels_equal(df2, method = "heuristic", class = "formula") # TRUE/FALSE df2 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label, labels = c(TRUE, FALSE))) test_prediction_levels_equal(df2, method = "heuristic") test_prediction_levels_equal(df2, method = "mip") test_prediction_levels_equal(df2, method = "qp-heuristic") test_prediction_levels_equal(df2, method = "heuristic", class = "formula") # Yes/No df2 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label, labels = c("No", "Yes"))) expect_message(test_prediction_levels_equal(df2, method = "heuristic")) expect_message(test_prediction_levels_equal(df2, method = "mip")) expect_message(test_prediction_levels_equal(df2, method = "qp-heuristic")) # check that 0/1 and 1/0 return the same predictions df2 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label, levels = c(0, 1))) df3 <- df1 %>% dplyr::mutate(bag_label = factor(bag_label, levels = c(1, 0))) fit2 <- mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2, data = df2) mdl3 <- mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2, data = df3) expect_equal(predict(fit2, df2, type = "class"), predict(mdl3, df3, type = "class")) }) test_that("Dots work in mismm() formula", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) mismm_dot <- mismm(mild(bag_label, bag_name, instance_name) ~ ., data = df) mismm_nodot <- mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2 + X3, data = df) expect_equal(mismm_dot$ksvm_fit, mismm_nodot$ksvm_fit) expect_equal(mismm_dot$features, mismm_nodot$features) expect_equal(mismm_dot$bag_name, mismm_nodot$bag_name) expect_equal(predict(mismm_dot, new_data = df), predict(mismm_nodot, new_data = df)) }) test_that("mismm() has correct argument handling", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) %>% dplyr::filter(bag_name %in% c("bag1", "bag2", "bag4", "bag8")) ## weights mismm(df, weights = TRUE) fit1 <- mismm(df, weights = c("0" = 1, "1" = 1)) fit1$weights <- NULL expect_equal( fit1, mismm(df, weights = FALSE) ) df2 <- df %>% dplyr::mutate(bag_label = factor(bag_label, levels = c(1, 0))) expect_equal(dimnames(df2), dimnames(df)) expect_equal( mismm(df, weights = c("0" = 2, "1" = 1)), mismm(df2, weights = c("0" = 2, "1" = 1)) ) set.seed(8) # nystrom sampling may change, need to set seed for each fit1 <- mismm(df, weights = c("0" = 2, "1" = 1), method = "mip") set.seed(8) fit2 <- mismm(df2, weights = c("0" = 2, "1" = 1), method = "mip") expect_equal(fit1, fit2) df2 <- df %>% dplyr::mutate(bag_label = factor(bag_label, labels = c("No", "Yes"))) expect_equal(dimnames(df2), dimnames(df)) expect_equal( mismm(df, weights = c("0" = 2, "1" = 1))$ksvm_fit, suppressMessages(mismm(df2, weights = c("No" = 2, "Yes" = 1))$ksvm_fit) ) set.seed(8) # nystrom sampling may change, need to set seed for each fit1 <- mismm(df, weights = c("0" = 2, "1" = 1), method = "mip") set.seed(8) fit2 <- mismm(df2, weights = c("No" = 2, "Yes" = 1), method = "mip") %>% suppressMessages() expect_equal(fit1$gurobi_fit, fit2$gurobi_fit) expect_false(isTRUE(all.equal( mismm(df, weights = c("0" = 2, "1" = 1), method = "mip")$gurobi_fit, mismm(df, weights = c("0" = 1e-6, "1" = 1), method = "mip")$gurobi_fit ))) expect_false(isTRUE(all.equal( mismm(df, weights = c("0" = 200, "1" = 1), method = "heuristic")$ksvm_fit, mismm(df, weights = c("0" = 1e-6, "1" = 1), method = "heuristic")$ksvm_fit ))) expect_false(isTRUE(all.equal( mismm(df, weights = c("0" = 200, "1" = 1), method = "qp-heuristic")$gurobi_fit, mismm(df, weights = c("0" = 1e-6, "1" = 1), method = "qp-heuristic")$gurobi_fit ))) ## kernel # there isn't a "linear" kernel option for mismm expect_warning(expect_equal( mismm(df, method = "heuristic", control = list(kernel = "radial")), mismm(df, method = "heuristic", control = list(kernel = "linear")) )) # TODO: try passing in the kernel as a matrix into this expect_warning(expect_false(isTRUE(all.equal( mismm(df, method = "mip", control = list(kernel = "radial")), mismm(df, method = "mip", control = list(kernel = "linear")) )))) expect_warning(expect_false(isTRUE(all.equal( mismm(df, method = "qp-heuristic", control = list(kernel = "radial")), mismm(df, method = "qp-heuristic", control = list(kernel = "linear")) )))) ## scale expect_false(isTRUE(all.equal( mismm(df, method = "heuristic", control = list(scale = TRUE)), mismm(df, method = "heuristic", control = list(scale = FALSE)) ))) expect_false(isTRUE(all.equal( mismm(df, method = "mip", control = list(scale = TRUE)), mismm(df, method = "mip", control = list(scale = FALSE)) ))) expect_false(isTRUE(all.equal( mismm(df, method = "qp-heuristic", control = list(scale = TRUE)), mismm(df, method = "qp-heuristic", control = list(scale = FALSE)) ))) expect_false(isTRUE(all.equal( mismm(df, method = "qp-heuristic", control = list(scale = TRUE)), mismm(df, method = "qp-heuristic", control = list(scale = FALSE)) ))) ## nystrom_args mdl <- mismm(df, method = "mip", control = list(nystrom_args = list(m = 16, r = 8))) expect_equal(length(mdl$gurobi_fit$w), 8) expect_equal(dim(mdl$kfm_fit$dv), c(8, 16)) expect_equal(dim(mdl$kfm_fit$df_sub), c(16, ncol(df) - 3)) ## minimal arguments mismm.mild_df(df) mismm.formula(mild(bag_label, bag_name, instance_name) ~ ., data = df) .run_mismm(df) }) test_that("mismm mip can warm start", { skip_if_not_installed("gurobi") verbose <- interactive() df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) # manually check that the output says "User MIP start produced solution with objective ..." set.seed(8) fit1 <- .run_mismm(df, method = "mip", control = list(start = TRUE, verbose = verbose)) fit2 <- .run_mismm(df, method = "mip", control = list(start = FALSE, verbose = verbose)) expect_equal(fit1$gurobi_fit[c("xi", "z")], fit2$gurobi_fit[c("xi", "z")]) expect_lte(abs(fit1$gurobi_fit$b - fit2$gurobi_fit$b), 1e-7) expect_lte(max(abs(fit1$gurobi_fit$w) - abs(fit2$gurobi_fit$w)), 1e-4) pred1 <- predict(fit1, new_data = df, type = "raw", layer = "instance") pred2 <- predict(fit2, new_data = df, type = "raw", layer = "instance") expect_equal(pred1, pred2, tolerance = 1e-7) # Hard to test whether the warm start improves the time to reach a solution # without testing large problems }) test_that("mismm mip works with radial kernel", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) fit1 <- .run_mismm(df, method = "mip", control = list(kernel = "radial", sigma = 1)) expect(!is.null(fit1$kfm_fit), failure_message = "Kfm_fit was not found in the model") predict(fit1, new_data = df, type = "class", layer = "bag") predict(fit1, new_data = df, type = "class", layer = "instance") predict(fit1, new_data = df, type = "raw", layer = "bag") predict(fit1, new_data = df, type = "raw", layer = "instance") fit2 <- mismm(mild(bag_label, bag_name, instance_name) ~ X1 + X2 + X3, data = df, method = "mip", control = list(kernel = "radial", sigma = 1)) expect(!is.null(fit1$kfm_fit), failure_message = "Kfm_fit was not found in the model") m <- 20 r <- 10 fit2 <- .run_mismm(df, method = "mip", control = list(kernel = "radial", sigma = 1, nystrom_args = list(m = m, r = r))) expect_equal(dim(fit2$kfm_fit$dv), c(r, m)) expect_equal(dim(fit2$kfm_fit$df_sub), c(m, length(4:6))) # Running with linear kernel shouldn't have the kfm_fit element expect_warning({ fit1 <- .run_mismm(df, method = "mip", control = list(kernel = "linear")) }) expect(!is.null(fit1$kfm_fit), failure_message = "Kfm_fit was not found in the model") }) test_that("Passing kernel matrix into mismm works", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) df_test <- readRDS(test_path("fixtures", "mimmm-test_mild_df.rds")) set.seed(8) df_shuf <- df[sample(seq_len(nrow(df))), ] check_kernel_matrix_works <- function(method) { set.seed(8) mdl1 <- mismm(df_shuf, method = method, control = list(kernel = kme(df_shuf, sigma = 0.05), sigma = 0.05)) pred1 <- predict(mdl1, new_data = df_test, type = "raw", kernel = kme(df_test, df_shuf, sigma = 0.05)) set.seed(8) mdl2 <- mismm(df_shuf, method = method, control = list(sigma = 0.05, scale = FALSE)) pred2 <- predict(mdl2, new_data = df_test, type = "raw") shared <- c( "ksvm_fit", "call_type", "x", "features", "levels", "cost", "sigma", "weights", "repr_inst", "n_step", "useful_inst_idx", "inst_order", "bag_name", "instance_name" ) expect_equal(mdl1[shared], mdl2[shared]) expect_equal(pred1, pred2) } check_kernel_matrix_works(method = "heuristic") %>% expect_message() check_kernel_matrix_works(method = "qp-heuristic") %>% expect_message() }) test_that("Re-ordering data doesn't reduce performance", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) df_test <- readRDS(test_path("fixtures", "mimmm-test_mild_df.rds")) check_auc_after_reordering <- function(method) { set.seed(8) mdl1 <- mismm(df, method = method, control = list(sigma = 0.1)) mdl2 <- mismm(df[sample(seq_len(nrow(df))), ], method = method, control = list(sigma = 0.1)) pred1 <- predict(mdl1, df_test, type = "raw") pred2 <- predict(mdl2, df_test, type = "raw") auc1 <- with(df_test, pROC::auc(response = classify_bags(bag_label, bag_name), predictor = classify_bags(pred1$.pred, bag_name), levels = c(0,1), direction = "<")) auc2 <- with(df_test, pROC::auc(response = classify_bags(bag_label, bag_name), predictor = classify_bags(pred2$.pred, bag_name), levels = c(0,1), direction = "<")) # the auc2 should be in the neighborhood of auc1 auc1; auc2 eps <- 0.01 expect_gte(auc2, auc1 - eps) expect_lte(auc2, auc1 + eps) } check_auc_after_reordering(method = "heuristic") check_auc_after_reordering(method = "qp-heuristic") }) test_that("`mismm()` value returns make sense", { skip_if_not_installed("gurobi") df <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) expect_snapshot({ models <- list( "mildata-heur" = mismm(df, method = "heuristic"), "mildata-mip" = mismm(df, method = "mip", control = list(nystrom_args = list(m = 10))), "mildata-qp" = mismm(df, method = "qp-heuristic"), "xy" = mismm(x = as.data.frame(df[, 4:6]), y = df$bag_label, bags = df$bag_name, instances = df$instance_name), "formula" = mismm(mild(bag_label, bag_name, instance_name) ~ ., data = df), "no-scale-heur" = mismm(df, method = "heuristic", control = list(scale = FALSE)), "no-scale-mip" = mismm(df, method = "mip", control = list(scale = FALSE, nystrom_args = list(m = 10))), "no-scale-qp" = mismm(df, method = "qp-heuristic", control = list(scale = FALSE)), "no-weights" = mismm(df, method = "heuristic", weights = FALSE) ) %>% suppressWarnings() %>% suppressMessages() print(lapply(models, names)) print(models) }) expect_true(TRUE) }) test_that("`predict.mismm()` works without new_data", { skip_if_not_installed("gurobi") df_train <- readRDS(test_path("fixtures", "mimmm-train_mild_df.rds")) df_test <- readRDS(test_path("fixtures", "mimmm-test_mild_df.rds")) check_prediction_no_data <- function(method) { fit1 <- mismm(df_train, method = method, control = list(scale = FALSE, sigma = 1/10)) pred1 <- predict(fit1, df_test, type = "raw", layer = "instance") pred2 <- predict(fit1, NULL, "raw", "instance", new_bags = df_test$bag_label, new_instances = df_test$instance_name, kernel = kme(df_test, df_train, sigma = 1/10)) expect_equal(pred1, pred2) } check_prediction_no_data(method = "heuristic") check_prediction_no_data(method = "qp-heuristic") })