require(quanteda)

dict_newsmap <- dictionary(file = "../data/dictionary.yml")

toks_test <- tokens(data_corpus_inaugural, remove_punct = TRUE)
dfmt_test <- dfm(toks_test) %>%
    dfm_remove(stopwords("en"))
toks_dict_test <- tokens_lookup(toks_test, dict_newsmap, level = 3)
dfmt_dict_test <- dfm(toks_dict_test)

test_that("textmodel_wordmap() works with different inputs", {
    toks <- tokens(data_corpus_inaugural, remove_punct = TRUE) %>%
        tokens_remove(stopwords())
    dfmt <- dfm(toks)
    dfmt$Party <- factor(dfmt$Party)

    smat <- xtabs( ~ docid(dfmt) + dfmt$Party, sparse = TRUE)
    map1 <- textmodel_wordmap(dfmt, smat)
    expect_identical(map1$data, dfmt)
    expect_equal(names(coef(map1)), levels(dfmt$Party))
    expect_null(map1$weight)

    mat <- as.matrix(smat)
    map2 <- textmodel_wordmap(dfmt, mat)
    expect_identical(map2$data, dfmt)
    expect_equal(names(coef(map2)), levels(dfmt$Party))
    expect_null(map2$weight)

    map3 <- textmodel_wordmap(dfmt, mat, boolean = TRUE)
    expect_identical(map3$data, dfmt)
    expect_equal(names(coef(map3)), levels(dfmt$Party))
    expect_false(identical(coef(map1), coef(map3)))

    expect_error(textmodel_wordmap(list(), smat))
    expect_error(textmodel_wordmap(dfmt, list()))
    expect_error(textmodel_wordmap(dfmt, NULL))
    expect_warning(textmodel_wordmap(dfmt, mat, aaa = 10),
                   "aaa argument is not used")

    # use entropy weighting
    map_loc <- textmodel_wordmap(dfmt, mat, entropy = "local")
    expect_identical(dim(map_loc$weight), dim(map_loc$model))
    expect_false(all(map_loc$weight[1,] == map_loc$weight[2,]))
    expect_equal(coef(map_loc, 10)[[1]],
                 head(sort(map_loc$weight[1,] * map_loc$model[1,], decreasing = TRUE), 10))

    map_avg <- textmodel_wordmap(dfmt, mat, entropy = "average")
    expect_identical(dim(map_avg$weight), dim(map_avg$model))
    expect_true(all(map_avg$weight[1,] == map_avg$weight[2,]))
    expect_equal(coef(map_avg, 10)[[1]],
                 head(sort(map_avg$weight[1,] * map_avg$model[1,], decreasing = TRUE), 10))

    map_glo <- textmodel_wordmap(dfmt, mat, entropy = "global")
    expect_identical(dim(map_glo$weight), dim(map_glo$model))
    expect_true(all(map_glo$weight[1,] == map_glo$weight[2,]))
    expect_equal(coef(map_glo, 10)[[1]],
                 head(sort(map_glo$weight[1,] * map_glo$model[1,], decreasing = TRUE), 10))

    expect_false(all(map_glo$weight[1,] == map_loc$weight[1,]))
    expect_false(all(map_loc$weight[1,] == map_avg$weight[1,]))
    expect_false(all(map_avg$weight[1,] == map_glo$weight[1,]))

    expect_error(
        textmodel_wordmap(dfmt, smat, smooth = -1),
        "The value of smooth must be between 0 and Inf"
    )
    expect_error(
        textmodel_wordmap(dfmt, smat, smooth = c(1, 2)),
        "The length of smooth must be 1"
    )
    expect_error(
        textmodel_wordmap(dfmt, smat, boolean = "yes"),
        "The value of boolean cannot be NA"
    )
    expect_error(
        textmodel_wordmap(dfmt, smat, drop_label = "no"),
        "The value of drop_label cannot be NA"
    )
})


test_that("methods for textmodel_wordmap works correctly", {
    txt <- c("Ireland is famous for Guinness.",
              "Guinness began retailing in India in 2007.",
              "Cork is an Irish coastal city.",
              "Titanic departed Cork Harbour in 1912.")

    toks <- tokens(txt)
    label_toks <- tokens_lookup(toks, dict_newsmap, levels = 3)
    label_dfm <- dfm(label_toks)

    feat_dfm <- dfm(toks, tolower = FALSE) %>%
        dfm_select('^[A-Z][A-Za-z1-2]+', selection = "keep",
                             valuetype = 'regex', case_insensitive = FALSE)
    map <- textmodel_wordmap(feat_dfm, label_dfm)

    expect_equal(
        names(map),
        c("model", "data", "weight", "feature", "class",
          "concatenator", "entropy", "boolean", "call", "version")
    )

    # class association is calculated correctly
    # note: both Guinness and Cork occur in IE only once
    expect_equivalent(map$model['ie', c('Ireland', 'Guinness')],
                      map$model['ie', c('Irish', 'Cork')] )
    expect_identical(map$feature, featnames(feat_dfm))

    # rank argument is working
    expect_equal(unname(predict(map)),
                 factor(c("ie", "in", "ie", "ie"), levels = c("in", "ie")))
    expect_equal(unname(predict(map, rank = 2)),
                 factor(c("in", "ie", "in", "in"), levels = c("in", "ie")))
    expect_error(predict(map, rank = 0))

    # different prediction outputs agree
    pred_top <- predict(map, confidence = TRUE)
    pred_all <- predict(map, type = 'all')
    expect_equivalent(pred_top$confidence.fit, apply(pred_all, 1, max))
    expect_equivalent(pred_top$confidence.fit[1], pred_top$confidence.fit[3])

    expect_warning(
        predict(map, confidence.fit = TRUE),
        "'confidence.fit' is deprecated; use 'confidence'"
    )

    expect_output(
        textmodel_wordmap(feat_dfm, label_dfm, verbose = TRUE),
        'Fitting textmodel_wordmap.*label = "ie".*  label = "in"'
    )

    # print
    expect_output(
        print(map),
        paste0('(\n)',
               'Call:(\n)',
               'textmodel_wordmap\\(.*\\)(\n)')
    )

    expect_output(
        print(summary(map)),
        paste0('(\n)',
               'Call:(\n)',
               'textmodel_wordmap\\(.*\\)(\n)',
               '\n',
               'Labels:(\n)',
               '\\[1\\] "in" "ie"(\n)',
               '(\n)',
               'Data Dimension:(\n)',
               '\\[1\\] 4 7(\n)')
    )

})

test_that("textmodel_wordmap() raises errors", {

    dfmt <- dfm(tokens(c(doc1 = "a b c", doc2 = "d e f", doc3 = "a d e")))
    mat0 <- matrix(c(0, 0, 0, 0, 0, 0),
                   nrow = 3, dimnames = list(NULL, c("X", "Y")))
    mat1 <- matrix(c(1, 0, 1, 0, 1, 0),
                   nrow = 3, dimnames = list(NULL, c("X", "Y")))
    mat2 <- matrix(c(1, 0, 1, 0, 1, 0),
                   nrow = 3, dimnames = list(c("doc1", "doc2", "doc3"),
                                             c("X", "Y")))
    mat3 <- matrix(c(1, 0, 1, 0, 1, 0),
                   nrow = 3, dimnames = list(c("d1", "d2", "d3"),
                                             c("X", "Y")))

    expect_silent(textmodel_wordmap(dfmt, mat1))
    expect_silent(textmodel_wordmap(dfmt, mat2))
    expect_warning(textmodel_wordmap(dfmt, mat3),
                   "x and y have different rownames")

    expect_error(textmodel_wordmap(dfmt[1:2,], mat1),
                 "x and y must have the same number of rows")

    expect_error(textmodel_wordmap(dfm_trim(dfmt, min_termfreq = 10), mat1),
                 "x must have at least one non-zero feature")

    expect_error(textmodel_wordmap(dfmt, mat0),
                 "y must have at least one non-zero feature")
})

test_that("label and drop_label are working", {
    txt <- c("American and Japanese leaders met in Tokyo.",
             "Paris Hilton visited British museum in London.",
             "India and Pakistan are neighbours.",
             "A man went to the Moon.")
    toks <- tokens(txt)
    toks_label <- tokens_lookup(toks, dict_newsmap, levels = 3)
    dfmt <- dfm(toks)
    dfmt_label <- dfm(toks_label)

    map1 <- textmodel_wordmap(dfmt, dfmt_label)
    expect_equal(names(coef(map1)), c("us", "jp", "in", "pk", "gb", "fr"))

    map2 <- textmodel_wordmap(dfmt, dfmt_label, label = "max")
    expect_equal(names(coef(map2)), c("jp", "in", "pk", "gb"))

    map3 <- textmodel_wordmap(dfmt, dfmt_label, drop_label = FALSE)
    expect_equal(names(coef(map3)), colnames(dfmt_label))
})

test_that("accuracy() is correct", {

    v1 <- c("c", NA,  "b", "a", "b", "c", "b", "b", "a", "c")
    v2 <- c("c", "b", "a", "a", "b", "c", "b", "b", "a", "c")

    accu <- accuracy(v1, v2)

    expect_equal(accu$tp, c(2, 3, 3))
    expect_equal(accu$fp, c(0, 1, 0))
    expect_equal(accu$tn, c(6, 5, 6))
    expect_equal(accu$fn, c(1, 0, 0))

    expect_identical(
        accu,
        accuracy(rev(v1), rev(v2))
    )
})

test_that("afe() is working", {

    txt <- c("American and Japanese leaders met in Tokyo.",
             "Paris Hilton visited British museum in London.",
             "India and Pakistan are neighbours.",
             "A man went to the Moon.")
    toks <- tokens(txt)
    toks_label <- tokens_lookup(toks, dict_newsmap, levels = 3)
    dfmt <- dfm(toks)
    dfmt_label <- dfm(toks_label)

    expect_equal(afe(dfmt, dfmt_label),
                 7.90, tolerance = 0.1)
    expect_error(afe(dfmt, matrix()))
    expect_error(afe(list(), dfmt_label))
})

test_that("coef() and dictionary() are working", {

    dfmt <- dfm_trim(dfmt_test, min_termfreq = 100)
    dfmt_dict <- dfm_trim(dfmt_dict_test, min_termfreq = 2)

    map <- textmodel_wordmap(dfmt, dfmt_dict)

    expect_true(all(lengths(coef(map, n = 5)) == 5))
    expect_identical(coef(map)[c("us")],
                     coef(map, select = c("us")))
    expect_identical(coef(map)[c("us", "ru", "gb")],
                     coef(map, select = c("us", "gb", "ru")))
    expect_identical(coef(map, select = c("ru", "gb", "us")),
                     coef(map, select = c("us", "gb", "ru")))

    expect_error(coef(map, n = -1),
                 "The value of n must be between 0 and Inf")
    expect_error(coef(map, select = "xx"),
                 "Selected class must be in the model")
    expect_error(coef(map, select = character()),
                 "The length of select must be between 1 and 16")

    # TODO: remove as.list()
    lis1 <- as.list(map)
    expect_equal(length(lis1), 16)
    expect_true(all(sapply(lis1, is.character)))
    expect_true(all(lengths(as.list(lis1)) == 10))

    lis2 <- as.list(map, n = 20, c("ru", "fr"))
    expect_equal(names(lis2), c("ru", "fr"))
    expect_true(all(sapply(lis2, is.character)))
    expect_true(all(lengths(lis2) == 20))

    # TODO: change to as.dictionary()
    dict1 <- as.dictionary(map)
    expect_s4_class(dict1, "dictionary2")
    expect_true(all(lengths(as.list(dict1)) == 10))
    expect_equal(dict1@meta$object$separator, "_")

    dict2 <- as.dictionary(map, n = 20, select = c("ru", "fr"))
    expect_s4_class(dict2, "dictionary2")
    expect_equal(names(dict2), c("ru", "fr"))
    expect_true(all(lengths(dict2) == 20))

    dict3 <- as.dictionary(map, separator = "+")
    expect_s4_class(dict3, "dictionary2")
    expect_equal(dict3@meta$object$separator, "+")

})

test_that("residual is working", {

    smat <- xtabs( ~ docid(dfmt_test) + dfmt_test$Party, sparse = TRUE)
    smat <- smat[,c("Republican", "Democratic")]

    map1 <- textmodel_wordmap(dfmt_test, smat)
    expect_identical(names(coef(map1)),
                     c("Republican", "Democratic"))

    map2 <- textmodel_wordmap(dfmt_test, smat, residual = TRUE)
    expect_identical(names(coef(map2)),
                     c("Republican", "Democratic", "other"))

    options("wordmap_residual_name" = "junk")
    map3 <- textmodel_wordmap(dfmt_test, smat, residual = TRUE)
    expect_identical(names(coef(map3)),
                     c("Republican", "Democratic", "junk"))
    options("wordmap_residual_name" = NULL)

    expect_error(
        textmodel_wordmap(dfmt_test, smat, residual = c(TRUE, FALSE)),
        "The length of residual must be 1"
    )

})