context("dataset-plankton")


test_that("whoi_small_plankton_dataset val downloads correctly", {
  skip_on_cran()
  skip_if_not_installed("torch")

  expect_error(
    whoi_small_plankton_dataset(split = "test", download = FALSE),
    "Dataset not found. You can use `download = TRUE`",
    label = "Dataset should fail if not previously downloaded"
  )

  expect_no_error(
    val_ds <- whoi_small_plankton_dataset(split = "val", download = TRUE, transform = transform_to_tensor)
  )
  # Validation dataset should have exactly 5799 samples
  expect_equal(val_ds$.length(), 5799)

  first_item <- val_ds[1]
  expect_tensor_shape(first_item$x, c(1,145, 230))
  # classification of the first item is "47: Leegaardiella_ovalis"
  expect_equal(first_item$y, 47L)
  expect_equal(val_ds$classes[first_item$y], "Leegaardiella_ovalis")

})


test_that("whoi_small_plankton_dataset downloads correctly whatever the split", {
  skip_on_cran()
  skip_if_not_installed("torch")
  skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1,
          "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.")

  expect_error(
    whoi_small_plankton_dataset(split = "test", download = FALSE),
    "Dataset not found. You can use `download = TRUE`",
    label = "Dataset should fail if not previously downloaded"
  )

  expect_no_error(
    train_ds <- whoi_small_plankton_dataset(split = "train", download = TRUE)
  )

  expect_is(train_ds, "dataset", "train should be a dataset")
  # Train dataset should have exactly 40599 samples
  expect_equal(train_ds$.length(), 40599)

  expect_no_error(
    test_ds <- whoi_small_plankton_dataset(split = "test", download = TRUE)
  )
  # Test dataset should have exactly 11601 samples
  expect_equal(test_ds$.length(), 11601)

})


test_that("whoi_small_coralnet_dataset downloads correctly whatever the split", {
  skip_on_cran()
  skip_if_not_installed("torch")
  skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1,
          "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.")

  expect_error(
    whoi_small_coralnet_dataset(split = "test", download = FALSE),
    "Dataset not found. You can use `download = TRUE`",
    label = "Dataset should fail if not previously downloaded"
  )

  expect_no_error(
    train_ds <- whoi_small_coralnet_dataset(split = "train", download = TRUE)
  )

  expect_is(train_ds, "dataset", "train should be a dataset")
  # Train dataset should have exactly 314 samples
  expect_equal(train_ds$.length(), 314)

  expect_no_error(
    val_ds <- whoi_small_coralnet_dataset(split = "val", download = TRUE, transform = transform_to_tensor)
  )
  # Validation dataset should have exactly 45 samples
  expect_equal(val_ds$.length(), 45)

  first_item <- val_ds[1]
  expect_tensor_shape(first_item$x, c(3, 3000, 4000))
  # classification of the first item is "1: diploria_labrinthyformis"
  expect_equal(first_item$y, 1L)
  expect_equal(val_ds$classes[first_item$y], "diploria_labrinthyformis")

  expect_no_error(
    test_ds <- whoi_small_coralnet_dataset(split = "test", download = TRUE)
  )
  # Test dataset should have exactly 91 samples
  expect_equal(test_ds$.length(), 91)

})

test_that("whoi_small_plankton_dataset derivatives download and prepare correctly", {
  skip_on_cran()
  skip_if_not_installed("torch")

  val_ds <- whoi_small_plankton_dataset(
    split = "val", download = TRUE,
    transform = . %>% transform_to_tensor() %>% transform_resize(size = c(150, 300))
    )


  dl <- torch::dataloader(val_ds, batch_size = 10)
  # 5799 turns into 580 batches of 10
  expect_length(dl, 580)
  iter <- dataloader_make_iter(dl)
  expect_no_error(
    i <- dataloader_next(iter)
  )

  # Check shape, dtype, and values on X
  expect_tensor_shape(i[[1]], c(10, 1, 150, 300))
  expect_tensor_dtype(i[[1]], torch_float())
  expect_true((torch_max(i[[1]]) <= 1)$item())
  # Check shape, dtype and names on y
  expect_length(i[[2]], 10)
  expect_named(i, c("x", "y"))})


test_that("whoi_plankton_dataset downloads correctly whatever the split", {
  skip_on_cran()
  skip_if_not_installed("torch")
  skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1,
          "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.")


  expect_error(
    whoi_plankton_dataset(split = "test", download = FALSE),
    "Dataset not found. You can use `download = TRUE`",
    label = "Dataset should fail if not previously downloaded"
  )

  expect_no_error(
    train_ds <- whoi_plankton_dataset(split = "train", download = TRUE)
  )

  expect_is(train_ds, "dataset", "train should be a dataset")
  # Train dataset should have exactly 669806 samples
  expect_equal(train_ds$.length(), 669806)

  val_ds <- whoi_plankton_dataset(split = "val", download = TRUE, transform = transform_to_tensor)
  # Validation dataset should have exactly 95686 samples
  expect_equal(val_ds$.length(), 95686)

  first_item <- val_ds[1]
  expect_tensor_shape(first_item$x, c(1,45, 388))
  # classification of the first item is "48: Leptocylindrus"
  expect_equal(first_item$y, 48L)
  expect_equal(val_ds$classes[first_item$y], "Leptocylindrus")

  test_ds <- whoi_plankton_dataset(split = "test", download = TRUE)
  # Test dataset should have exactly 191375 samples
  expect_equal(test_ds$.length(), 191375)

})

test_that("dataloader from whoi_plankton_dataset gets torch tensors", {
  skip_on_cran()
  skip_if_not_installed("torch")
  skip_if(Sys.getenv("TEST_LARGE_DATASETS", unset = 0) != 1,
          "Skipping test: set TEST_LARGE_DATASETS=1 to enable tests requiring large downloads.")

  ds <- whoi_plankton_dataset(
    split = "val", download = TRUE,
    transform = . %>% transform_to_tensor() %>% transform_resize(size = c(150, 300))
  )
  dl <- torch::dataloader(ds, batch_size = 10)
  # 956,9k turns into 9569 batches of 10
  expect_length(dl, 9569)
  iter <- dataloader_make_iter(dl)
  expect_no_error(
    i <- dataloader_next(iter)
  )
  # Check shape, dtype, and values on X
  expect_tensor_shape(i[[1]], c(10, 1, 150, 300))
  expect_tensor_dtype(i[[1]], torch_float())
  expect_true((torch_max(i[[1]]) <= 1)$item())
  # Check shape, dtype and names on y
  expect_length(i[[2]],10)
  expect_named(i, c("x", "y"))})