R Under development (unstable) (2024-01-07 r85787 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > if (require(RUnit)) { + library(FeatureHashing) + + mapping_value <- structure(c(3789462177, 4122940517, 1079927366, 1505155248, 4103768016, + 1576910802, 248868694, 2189134401, 1321560276, 2636986885, 1980993114, + 3588767725, 3873367263, 3437882550, 1125161513, 875000041, 1178743966, + 1791688646), .Names = c("PlantQn1", "PlantQn2", "PlantQn3", "uptake", + "TypeMississippi", "Treatmentchilled", "PlantMn1", "PlantMn2", + "PlantMn3", "PlantQc1", "PlantQc2", "PlantQc3", "Treatmentnonchilled", + "PlantMc1", "PlantMc2", "PlantMc3", "conc", "TypeQuebec")) + + checkTrue(all(hashed.value(names(mapping_value)) %% 2^32 == mapping_value), + "Unexpected hashing result by hashed.value") + + m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) + mapping <- as.list(attr(m, "mapping")) + checkTrue(all(!duplicated(unlist(mapping) %% 2^10 + 1)), + "Unexpected collision of hashing example") + + name.candidate <- lapply(seq_along(CO2), function(i) { + if (class(CO2[[i]])[1] == "numeric") rep(names(CO2)[i], nrow(CO2)) else { + paste0(names(CO2)[i], CO2[[i]]) + } + }) + + for(index in seq_along(mapping)) { + i <- hashed.value(names(mapping))[index] %% 2^10 + 1 + name <- names(mapping)[index] + col.i <- which(sapply(name.candidate, function(x) name %in% x)) + + X <- name.candidate[[col.i]] + j <- which(name == X) + value <- hash.sign(names(mapping))[index] + if (class(CO2[[col.i]])[1] == "numeric") { + value <- value * CO2[[col.i]] + } + + x <- m[i,] + checkTrue(all(x[j] == value), "Inconsistent hash value between hashed.value, hash.sign and hashed.model.matrix") + checkTrue(all(x[-j] == 0), "Inconsistent hash value between hashed.value, hash.sign and hashed.model.matrix") + } + + m <- hashed.model.matrix(~ .^2, CO2, hash.size = 2^10, create.mapping = TRUE, + transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) + mapping_value <- hash.mapping(m) + + mapping_value.expected <- structure(list(PlantQc1 = 2636986885, PlantQn1 = 3789462177, + PlantQc2 = 1980993114, PlantQn2 = 4122940517, "PlantMc3:conc" = 3739801583, + "PlantMc2:conc" = 1395604525, "PlantMn3:conc" = 3235311896, + "PlantMn2:conc" = 540436913, "PlantQc1:TypeQuebec" = 925725157, + "TypeQuebec:conc" = 2746149015, "PlantQn1:TypeQuebec" = 4237186241, + PlantQc3 = 3588767725, "PlantQn1:conc" = 2288578128, "Treatmentchilled:conc" = 255906650, + PlantQn3 = 1079927366, "PlantQc1:conc" = 2251098928, TypeQuebec = 1791688646, + "PlantMc3:Treatmentchilled" = 3891992855, "PlantMn2:Treatmentnonchilled" = 1166626593, + "TypeQuebec:Treatmentchilled" = 2941354564, "PlantMn1:Treatmentnonchilled" = 3205838849, + Treatmentnonchilled = 3873367263, Treatmentchilled = 1576910802, + "PlantMc2:uptake" = 175028363, "PlantMc1:uptake" = 4172577474, + "PlantMn3:uptake" = 4053818018, "TypeQuebec:uptake" = 3178360588, + "PlantQc3:uptake" = 3937745018, uptake = 1505155248, "PlantMc3:TypeMississippi" = 1611851048, + "PlantMn3:TypeMississippi" = 2164201561, PlantMc1 = 3437882550, + PlantMn1 = 248868694, PlantMc2 = 1125161513, PlantMn2 = 2189134401, + PlantMc3 = 875000041, PlantMn3 = 1321560276, "PlantQn3:conc" = 3023324759, + "TypeMississippi:conc" = 1821057777, "PlantQc2:conc" = 714503009, + "PlantQc3:conc" = 1657455447, "PlantQn2:TypeQuebec" = 2781903365, + "PlantQc2:TypeQuebec" = 51079300, "PlantQn3:TypeQuebec" = 862836424, + "PlantQn2:conc" = 900931883, "PlantQc3:TypeQuebec" = 510730956, + "TypeMississippi:Treatmentchilled" = 2281473014, "PlantMn3:Treatmentnonchilled" = 1193828828, + "PlantQc1:Treatmentchilled" = 4048766167, "PlantQc2:Treatmentchilled" = 2534699902, + "PlantQn2:Treatmentnonchilled" = 1386368423, "PlantQn1:Treatmentnonchilled" = 160472416, + "PlantMc3:uptake" = 712540968, "PlantQn2:uptake" = 974267571, + "Treatmentnonchilled:uptake" = 48404478, "PlantQn1:uptake" = 3757368420, + "conc:uptake" = 375043273, "PlantMc1:conc" = 3318696701, + conc = 1178743966, "Treatmentnonchilled:conc" = 4015153694, + "PlantMn1:conc" = 1646616462, "PlantMc2:Treatmentchilled" = 2148207650, + "PlantMc1:Treatmentchilled" = 3227113198, "PlantQc3:Treatmentchilled" = 3565755962, + "TypeQuebec:Treatmentnonchilled" = 1973117262, "TypeMississippi:Treatmentnonchilled" = 4231588217, + "PlantQn3:Treatmentnonchilled" = 1411718622, "PlantMn2:uptake" = 342355816, + "TypeMississippi:uptake" = 31699673, "PlantQn3:uptake" = 4247950021, + "PlantQc2:uptake" = 3035200074, "Treatmentchilled:uptake" = 3235544148, + "PlantQc1:uptake" = 946686067, "PlantMn1:uptake" = 3186749149, + "PlantMc2:TypeMississippi" = 3984179893, "PlantMc1:TypeMississippi" = 517052919, + "PlantMn2:TypeMississippi" = 2154821378, "PlantMn1:TypeMississippi" = 3688281232, + TypeMississippi = 4103768016), .Names = c("PlantQc1", "PlantQn1", + "PlantQc2", "PlantQn2", "PlantMc3:conc", "PlantMc2:conc", "PlantMn3:conc", + "PlantMn2:conc", "PlantQc1:TypeQuebec", "TypeQuebec:conc", "PlantQn1:TypeQuebec", + "PlantQc3", "PlantQn1:conc", "Treatmentchilled:conc", "PlantQn3", + "PlantQc1:conc", "TypeQuebec", "PlantMc3:Treatmentchilled", "PlantMn2:Treatmentnonchilled", + "TypeQuebec:Treatmentchilled", "PlantMn1:Treatmentnonchilled", + "Treatmentnonchilled", "Treatmentchilled", "PlantMc2:uptake", + "PlantMc1:uptake", "PlantMn3:uptake", "TypeQuebec:uptake", "PlantQc3:uptake", + "uptake", "PlantMc3:TypeMississippi", "PlantMn3:TypeMississippi", + "PlantMc1", "PlantMn1", "PlantMc2", "PlantMn2", "PlantMc3", "PlantMn3", + "PlantQn3:conc", "TypeMississippi:conc", "PlantQc2:conc", "PlantQc3:conc", + "PlantQn2:TypeQuebec", "PlantQc2:TypeQuebec", "PlantQn3:TypeQuebec", + "PlantQn2:conc", "PlantQc3:TypeQuebec", "TypeMississippi:Treatmentchilled", + "PlantMn3:Treatmentnonchilled", "PlantQc1:Treatmentchilled", + "PlantQc2:Treatmentchilled", "PlantQn2:Treatmentnonchilled", + "PlantQn1:Treatmentnonchilled", "PlantMc3:uptake", "PlantQn2:uptake", + "Treatmentnonchilled:uptake", "PlantQn1:uptake", "conc:uptake", + "PlantMc1:conc", "conc", "Treatmentnonchilled:conc", "PlantMn1:conc", + "PlantMc2:Treatmentchilled", "PlantMc1:Treatmentchilled", "PlantQc3:Treatmentchilled", + "TypeQuebec:Treatmentnonchilled", "TypeMississippi:Treatmentnonchilled", + "PlantQn3:Treatmentnonchilled", "PlantMn2:uptake", "TypeMississippi:uptake", + "PlantQn3:uptake", "PlantQc2:uptake", "Treatmentchilled:uptake", + "PlantQc1:uptake", "PlantMn1:uptake", "PlantMc2:TypeMississippi", + "PlantMc1:TypeMississippi", "PlantMn2:TypeMississippi", "PlantMn1:TypeMississippi", + "TypeMississippi")) + mapping_value.expected <- unlist(mapping_value.expected) %% 2^10 + 1 + mapping_value[names(mapping_value.expected)] + checkTrue(isTRUE(all.equal(mapping_value[names(mapping_value.expected)], + mapping_value.expected)), + "Unexpected hashing result of interaction term") + + m2 <- hashed.model.matrix(~ . ^ 2, data = CO2, hash.size = 32, create.mapping = TRUE, + transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE) + checkTrue(!all(m2@i == 0), + "All hashed indices created by hashed.model.matrix are zero") + checkTrue(sum(m2 %*% rep(1, ncol(m2)) != 0) > 1, + "Incorrect hashed matrix created by hashed.model.matrix") + + mapping <- hash.mapping(m2) + mapping.raw <- hashed.value(names(mapping)) + names(mapping.raw) <- names(mapping) + is.interaction <- grepl(":", names(mapping), fixed = TRUE) + checkTrue(all(mapping[!is.interaction] == mapping.raw[!is.interaction] %% 32 + 1)) + checkTrue(sum(is.na(unlist(lapply(names(mapping), strsplit, "")))) == 0, + "Non-ascii name occurs!") + + for(i in grep(":", names(mapping), fixed = TRUE)) { + name <- names(mapping)[i] + key <- strsplit(name, ":")[[1]] + input <- unlist(lapply(mapping.raw[key], intToRaw), use.names = FALSE) + r1 <- hashed.value(rawToChar(input)) + checkTrue(r1 %% 32 + 1== mapping[name], + "The hashing result of interaction is not expected!") + } + + # check handling of NA + tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8, signed.hash = TRUE, + transpose = TRUE, is.dgCMatrix = FALSE), error = function(e) { + if (class(e)[1] != "std::invalid_argument") stop(e) + if (conditionMessage(e) != "Failed to find the column:PlAnT") stop(e) + }) + + m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE, signed.hash = TRUE) + map <- hash.mapping(m) + map <- map[grepl("\\w+:\\w+:\\w+", names(map))] + checkTrue(all(hashed.interaction.value(names(map)) %% (2^18) + 1== map), + "The hashed.interaction.value gives inconsistent result of hashed.model.matrix") + } Loading required package: RUnit [1] TRUE > > proc.time() user system elapsed 1.87 0.25 2.12