test_that("add_duration", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_duration <- add_duration(wt, cutoff = 300, replace_by = NA) # test that variables in result expect_true("duration" %in% names(wt_duration)) # test that duration variable is not NA expect_true(min(wt_duration$duration, na.rm = TRUE) >= 0) # test that no duration > cutoff expect_true(max(wt_duration$duration, na.rm = TRUE) < 300) # test that last row for panelist is NA for default expect_true(is.na(tail(wt_duration$duration[wt_duration$panelist_id == "AiDS4k1rQZ"], 1))) # test that last row for panelist is not NA wt_duration <- add_duration(wt, last_replace_by = 0) expect_true(tail(wt_duration$duration[wt_duration$panelist_id == "AiDS4k1rQZ"], 1) == 0) # test device_switch_na wt_duration <- add_duration(wt, device_switch_na = TRUE, device_var = "device") wt_duration$device_next <- ave(wt_duration$device, wt_duration$panelist_id, FUN = function(x) c(tail(x, -1), NA)) expect_true(is.na(wt_duration$duration[wt_duration$device_next != wt_duration$device][1])) }) test_that("add_duration testdt_specific", { skip_on_cran() options(digits = 22) data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_duration <- add_duration(wt) # test duration for first row expect_true(as.numeric(wt_duration[1, "duration"]) == 2.8580000400543212890625) # test total duration expect_true(sum(wt_duration[, "duration"], na.rm = T) == 1177364.354005098342896) }) test_that("add_duration errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(add_duration(wt, replace_by = -1)) expect_error(add_duration(wt, device_switch_na = T, device_var = NULL)) expect_error(add_duration(wt, device_switch_na = T, device_var = "not_a_variable")) }) test_that("add_session", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_session <- add_session(wt, cutoff = 1800) # test that variables in result expect_true("session" %in% names(wt_session)) # test that session variable always positive expect_true(min(wt_session$session, na.rm = T) >= 1) # test that next session is only smaller than session when switch to new panelist wt_session$next_session <- ave(wt_session$session, wt_session$panelist_id, FUN = function(x) c(tail(x, -1), NA)) expect_true(nrow(wt_session[wt_session$session > wt_session$next_session, ]) <= length(unique(wt_session$panelist_id))) }) test_that("add_session errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) # no cutoff specified expect_error(add_session(wt)) }) test_that("add_session testdt_specific", { options(digits = 22) data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_session <- add_session(wt, cutoff = 1800) expect_true(max(wt_session$session[wt$panelist_id == "AiDS4k1rQZ"]) == 123) }) test_that("deduplicate", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt <- add_duration(wt, cutoff = 300, replace_by = 300) # test that variables in result wt_dedup <- deduplicate(wt, method = "flag") expect_true("duplicate" %in% names(wt_dedup)) wt_dedup <- deduplicate(wt, method = "drop") expect_true(!"duplicate" %in% names(wt_dedup)) wt_dedup <- deduplicate(wt, method = "aggregate", keep_nvisits = TRUE) expect_true("visits" %in% names(wt_dedup)) wt <- extract_domain(wt) wt_dedup <- deduplicate(wt, method = "aggregate", add_grpvars = "domain") expect_true("domain" %in% names(wt_dedup)) wt <- extract_host(wt) wt_dedup <- deduplicate(wt, method = "aggregate", add_grpvars = c("domain", "host")) expect_true("domain" %in% names(wt_dedup)) }) test_that("deduplicate errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt <- add_duration(wt, cutoff = 300, replace_by = 300) expect_error(deduplicate(wt, method = "aggregate", duration_var = "not_a_variable")) expect_error(deduplicate(wt, method = "flag", within = NULL)) expect_error(deduplicate(wt, method = "drop", within = NULL)) }) test_that("deduplicate testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt <- add_duration(wt, cutoff = 300, replace_by = 300) wt_dedup <- deduplicate(wt, method = "drop") expect_true(nrow(wt_dedup) == 46574) wt_dedup <- deduplicate(wt, method = "flag") expect_true(sum(wt_dedup[, "duplicate"]) == 3038) wt_dedup <- deduplicate(wt, method = "aggregate") expect_true(nrow(wt_dedup) == 39540) wt_dedup <- deduplicate(wt, method = "aggregate", keep_nvisits = TRUE) expect_true(max(wt_dedup[, "visits"]) == 608) }) test_that("extract_host", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_host <- suppressWarnings(extract_host(wt)) expect_true("host" %in% names(wt_host)) wt$other_url <- wt$url wt_host <- suppressWarnings(extract_host(wt, varname = "other_url")) expect_true("other_url_host" %in% names(wt_host)) }) test_that("extract_host errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(extract_host(wt, varname = "not_a_variable")) }) test_that("extract_host testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_host <- extract_host(wt) expect_true(wt_host$host[1] == "dkr1.ssisurveys.com") expect_true(!any(is.na(wt_host$host))) }) test_that("extract_domain", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) # test existence of new columns wt_domain <- extract_domain(wt) expect_true("domain" %in% names(wt_domain)) wt$other_url <- wt$url wt_domain <- extract_domain(wt, varname = "other_url") expect_true("other_url_domain" %in% names(wt_domain)) }) test_that("extract_domain errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(extract_domain(wt, varname = "not_a_variable")) }) test_that("extract_domain testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_domain <- extract_domain(wt) expect_true(wt_domain[1, "domain"] == "ssisurveys.com") }) test_that("extract_path", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_path <- extract_path(wt) expect_true("path" %in% names(wt_path)) wt$other_url <- wt$url wt_path <- extract_path(wt, varname = "other_url") expect_true("other_url_path" %in% names(wt_path)) }) test_that("extract_path errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(extract_path(wt, varname = "not_a_variable")) }) test_that("extract_path testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_path <- extract_path(wt) expect_true(wt_path[1, "path"] == "/tzktsxomta") expect_true(wt_path[wt_path$url == "https://www.youtube.com/", "path"][1] == "/") }) test_that("parse_path", { skip_on_cran() data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_path <- parse_path(wt) expect_true("path_split" %in% names(wt_path)) # test that all path_split values have letters expect_true(sum(grepl("[A-Za-z]", wt_path$path_split)) == sum(wt_path$path_split != "")) # test different name for URL variable wt$url2 <- wt$url wt_path2 <- parse_path(wt, varname = "url2") expect_true("url2_path_split" %in% names(wt_path2)) }) test_that("parse_path errors", { skip_on_cran() data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(extract_path(wt, varname = "not_a_variable")) }) test_that("parse_path testdt_specific", { skip_on_cran() data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_path <- parse_path(wt) expect_true(wt_path[4879, "path_split"] == "quartzy,instagram,influencers,are,out,slackers,are,in") expect_true(wt_path$path_split[wt_path$url == "https://www.youtube.com/"][1] == "") }) test_that("drop_query", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) # test existence of new colums wt_noquery <- drop_query(wt) expect_true("url_noquery" %in% names(wt_noquery)) wt$other_url <- wt$url wt_noquery <- drop_query(wt, varname = "other_url") expect_true("other_url_noquery" %in% names(wt_noquery)) # test absence of queries / fragments wt_noquery <- drop_query(wt) expect_true(length(grep("\\?|#", wt_noquery$url_noquery)) == 0) }) test_that("drop_query errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(drop_query(wt, varname = "not_a_variable")) }) test_that("drop_query testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_noquery <- drop_query(wt) expect_true(wt_noquery[1, "url_noquery"] == "https://dkr1.ssisurveys.com/tzktsxomta") wt_queries <- wt[grep("\\?", wt$url), ] wt_noquery <- drop_query(wt_queries) expect_true(wt_noquery[1, "url_noquery"] == "https://www.marketwatch.com/story/kelloggs-owned-veggie-burger-brand-morningstar-farms-to-go-all-vegan-by-2021-2019-03-04") }) test_that("add_next_visit add_previous_visit", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) # test existence of new colums wt_next <- add_next_visit(wt) expect_true("url_next" %in% names(wt_next)) wt_next <- add_next_visit(wt, level = "host") expect_true("host_next" %in% names(wt_next)) wt_next <- add_next_visit(wt, level = "domain") expect_true("domain_next" %in% names(wt_next)) wt_prev <- add_previous_visit(wt) expect_true("url_previous" %in% names(wt_prev)) wt_prev <- add_previous_visit(wt, level = "host") expect_true("host_previous" %in% names(wt_prev)) wt_prev <- add_previous_visit(wt, level = "domain") expect_true("domain_previous" %in% names(wt_prev)) # test identity of second visit and first next visit wt_next <- add_next_visit(wt) expect_true(wt_next[2, "url"] == wt_next[1, "url_next"]) # test identity of first visit and second previous visit wt_prev <- add_previous_visit(wt) expect_true(wt_prev[1, "url"] == wt_prev[2, "url_previous"]) # test first and last row expect_true(is.na(wt_next[nrow(wt_next), "url_next"])) expect_true(is.na(wt_prev[1, "url_previous"])) }) test_that("add_next_visit add_previous_visit testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_next <- add_next_visit(wt) expect_true(wt_next[1, "url_next"] == "https://roirocket.decipherinc.com/hivvocmeox") wt_prev <- add_previous_visit(wt) expect_true(wt_prev[2, "url_previous"] == "https://dkr1.ssisurveys.com/tzktsxomta") }) test_that("add_title", { skip_on_cran() data("testdt_tracking") wt <- as.wt_dt(testdt_tracking[1, ]) wt_title <- add_title(wt) expect_true("title" %in% names(wt_title)) }) test_that("add_title testdt_specific", { skip_on_cran() data("testdt_tracking") wt <- as.wt_dt(testdt_tracking[1, ]) wt_title <- add_title(wt) expect_true(is.na(wt_title[, "title"])) }) test_that("add_referral", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_ref <- add_referral(wt, platform_domains = "facebook.com", patterns = "fbclid=") # test existence of columns expect_true("referral" %in% names(wt_ref)) expect_true(!"domain_previous" %in% names(wt_ref)) # test value of new column expect_true(names(table(wt_ref$referral)) == "facebook.com") }) test_that("add_referral errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) expect_error(add_referral(wt)) expect_error(add_referral(wt, platform_domains = "facebook.com")) expect_error(add_referral(wt, platform_domains = c("facebook.com", "twitter.com"), pattern = "some")) }) test_that("add_referral testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt_ref <- add_referral(wt, platform_domains = "facebook.com", patterns = "fbclid=") expect_true(table(wt_ref$referral) == 57) expect_true(table(wt_ref$referral, exclude = NULL)[2] == 49555) }) test_that("urldummy", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) wt <- extract_domain(wt) code_urls <- c("https://dkr1.ssisurveys.com/tzktsxomta") wt <- create_urldummy(wt, dummy = code_urls, name = "test_dummy") expect_true(wt$test_dummy[1]) }) test_that("panelist_data", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) data("testdt_survey_w") # test existence of columns wt_joined <- add_panelist_data(wt, testdt_survey_w) expect_true("leftright" %in% names(wt_joined)) wt_joined <- add_panelist_data(wt, testdt_survey_w, cols = c("gender", "education")) expect_true(!("leftright" %in% names(wt_joined))) # text presence of data expect_true(sum(is.na(wt_joined$gender)) == 0) }) test_that("panelist_data errors", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) data("testdt_survey_w") wt_joined <- add_panelist_data(wt, testdt_survey_w) expect_error(add_panelist_data(wt_joined, "not_a_variable")) expect_error(add_panelist_data(wt_joined, join_on = "not_a_variable")) }) test_that("panelist_data testdt_specific", { data("testdt_tracking") wt <- as.wt_dt(testdt_tracking) data("testdt_survey_w") wt_joined <- add_panelist_data(wt, testdt_survey_w) expect_true(round(mean(wt_joined$leftright), 2) == 4.99) }) test_that("issue 98", { test <- data.frame( panelist_id = "abc", timestamp = as.POSIXct("2019-05-31 12:41:59"), url = "https://www.omahasteaks.com/product/Private-Reserve%AE-Boneless-Strips-00000004718" ) test <- as.wt_dt(test) expect_error(suppressWarnings(parse_path(test, decode = TRUE))) expect_no_error(parse_path(test, decode = FALSE)) })