lines <- c( "1,M,1.45,Rotterdam", "2,F,12.00,Amsterdam", "3,,.22,Berlin", ",M,22,Paris", "4,F,12345,London", "5,M,,Copenhagen", "6,M,-12.1,", "7,F,-1,Oslo") data <- data.frame( id=c(1,2,3,NA,4,5,6,7), gender=factor(c("M", "F", NA, "M", "F", "M", "M", "F"), levels=c("M", "F")), x=c(1.45, 12, 0.22, 22, 12345, NA, -12.1, -1), city=c("Rotterdam", "Amsterdam ", "Berlin", "Paris", "London", "Copenhagen", "", "Oslo"), stringsAsFactors=FALSE ) tmpcsv <- tempfile() writeLines(lines, con=tmpcsv, sep="\n") laf <- laf_open_csv(filename=tmpcsv, column_types=c("integer", "categorical", "double", "string")) context("Test calculation of column statistics") test_that( "colmean works", { expect_equal(as.numeric(colmean(laf, 1)), mean(data[,1], na.rm=TRUE)) expect_equal(as.numeric(colmean(laf, 3)), mean(data[,3], na.rm=TRUE)) expect_equal(as.numeric(colmean(laf, 1, na.rm=FALSE)), mean(data[,1], na.rm=FALSE)) expect_equal(as.numeric(colmean(laf, 3, na.rm=FALSE)), mean(data[,3], na.rm=FALSE)) expect_equal(as.numeric(colmean(laf$V3, na.rm=FALSE)), mean(data[,3], na.rm=FALSE)) }) test_that( "colsum works", { expect_equal(as.numeric(colsum(laf, 1)), sum(data[,1], na.rm=TRUE)) expect_equal(as.numeric(colsum(laf, 3)), sum(data[,3], na.rm=TRUE)) expect_equal(as.numeric(colsum(laf, 1, na.rm=FALSE)), sum(data[,1], na.rm=FALSE)) expect_equal(as.numeric(colsum(laf, 3, na.rm=FALSE)), sum(data[,3], na.rm=FALSE)) expect_equal(as.numeric(colsum(laf$V3, na.rm=FALSE)), sum(data[,3], na.rm=FALSE)) }) test_that( "colrange works", { expect_equal(as.numeric(colrange(laf, 1)), range(data[,1], na.rm=TRUE)) expect_equal(as.numeric(colrange(laf, 3)), range(data[,3], na.rm=TRUE)) expect_equal(as.numeric(colrange(laf, 1, na.rm=FALSE)), range(data[,1], na.rm=FALSE)) expect_equal(as.numeric(colrange(laf, 3, na.rm=FALSE)), range(data[,3], na.rm=FALSE)) expect_equal(as.numeric(colrange(laf$V3, na.rm=FALSE)), range(data[,3], na.rm=FALSE)) }) test_that( "colnmissing works", { expect_equal(as.numeric(colnmissing(laf, 1:ncol(laf))), as.numeric(apply(data, 2, function(a) sum(is.na(a))))) }) test_that( "colfreq works", { expect_equal(as.numeric(colfreq(laf, 1)), as.numeric(table(data[, 1], useNA="ifany"))) expect_equal(as.numeric(colfreq(laf, 1, useNA="always")), as.numeric(table(data[, 1], useNA="always"))) expect_equal(as.numeric(colfreq(laf, 1, useNA="no")), as.numeric(table(data[, 1], useNA="no"))) expect_equal(as.numeric(colfreq(laf, 2)), as.numeric(table(data[, 2], useNA="ifany"))) expect_equal(as.numeric(colfreq(laf, 3)), as.numeric(table(as.integer(data[, 3]), useNA="ifany"))) }) file.remove(tmpcsv)