* using log directory ‘/home/hornik/tmp/CRAN_recheck/fixest.Rcheck’ * using R Under development (unstable) (2025-09-02 r88773) * using platform: x86_64-pc-linux-gnu * R was compiled by gcc-15 (Debian 15.2.0-1) 15.2.0 GNU Fortran (Debian 15.2.0-1) 15.2.0 * running under: Debian GNU/Linux forky/sid * using session charset: UTF-8 * checking for file ‘fixest/DESCRIPTION’ ... OK * checking extension type ... Package * this is package ‘fixest’ version ‘0.13.0’ * package encoding: UTF-8 * checking CRAN incoming feasibility ... [11s/14s] OK * checking package namespace information ... OK * checking package dependencies ... OK * checking if this is a source package ... OK * checking if there is a namespace ... OK * checking for executable files ... OK * checking for hidden files and directories ... OK * checking for portable file names ... OK * checking for sufficient/correct file permissions ... OK * checking whether package ‘fixest’ can be installed ... [93s/94s] OK * used C++ compiler: ‘g++-15 (Debian 15.2.0-1) 15.2.0’ * checking package directory ... OK * checking for future file timestamps ... OK * checking ‘build’ directory ... OK * checking DESCRIPTION meta-information ... OK * checking top-level files ... OK * checking for left-over files ... OK * checking index information ... OK * checking package subdirectories ... OK * checking code files for non-ASCII characters ... OK * checking R files for syntax errors ... OK * checking whether the package can be loaded ... [2s/2s] OK * checking whether the package can be loaded with stated dependencies ... [1s/1s] OK * checking whether the package can be unloaded cleanly ... [1s/1s] OK * checking whether the namespace can be loaded with stated dependencies ... [1s/1s] OK * checking whether the namespace can be unloaded cleanly ... [1s/1s] OK * checking loading without being on the library search path ... [0s/0s] OK * checking whether startup messages can be suppressed ... [1s/1s] OK * checking use of S3 registration ... OK * checking dependencies in R code ... OK * checking S3 generic/method consistency ... OK * checking replacement functions ... OK * checking foreign function calls ... OK * checking R code for possible problems ... [58s/58s] OK * checking Rd files ... [1s/1s] OK * checking Rd metadata ... OK * checking Rd line widths ... OK * checking Rd cross-references ... OK * checking for missing documentation entries ... OK * checking for code/documentation mismatches ... OK * checking Rd \usage sections ... OK * checking Rd contents ... OK * checking for unstated dependencies in examples ... OK * checking contents of ‘data’ directory ... OK * checking data for non-ASCII characters ... [0s/0s] OK * checking LazyData ... OK * checking data for ASCII and uncompressed saves ... OK * checking line endings in C/C++/Fortran sources/headers ... OK * checking line endings in Makefiles ... OK * checking compilation flags in Makevars ... OK * checking for GNU extensions in Makefiles ... OK * checking for portable use of $(BLAS_LIBS) and $(LAPACK_LIBS) ... OK * checking use of PKG_*FLAGS in Makefiles ... OK * checking use of SHLIB_OPENMP_*FLAGS in Makefiles ... OK * checking pragmas in C/C++ headers and code ... OK * checking compilation flags used ... OK * checking compiled code ... OK * checking installed files from ‘inst/doc’ ... OK * checking files in ‘vignettes’ ... OK * checking examples ... [24s/24s] OK * checking for unstated dependencies in ‘demo’ ... OK * checking for unstated dependencies in ‘tests’ ... OK * checking tests ... [16s/16s] ERROR Running ‘fixest_tests.R’ [16s/16s] Running the tests in ‘tests/fixest_tests.R’ failed. Complete output: > #----------------------------------------------# > # Author: Laurent Berge > # Date creation: Fri Jul 10 09:03:06 2020 > # ~: package sniff tests > #----------------------------------------------# > > # Not everything is currently covered, but I'll improve it over time > > # Some functions are not trivial to test properly though > > library(fixest) (Permanently remove the following message with fixest_startup_msg(FALSE).) FALSE > library(sandwich) > > test = fixest:::test ; chunk = fixest:::chunk > vcovClust = fixest:::vcovClust > stvec = stringmagic::string_vec_alias() > > setFixest_notes(FALSE) > > if(fixest:::is_r_check()){ + if(requireNamespace("data.table", quietly = TRUE)){ + library(data.table) + data.table::setDTthreads(1) + } + setFixest_nthreads(1) + } > > #### > #### ESTIMATIONS #### > #### > > #### > #### ... Main #### > #### > > > chunk("ESTIMATION") ESTIMATION > > set.seed(0) > > base = iris > names(base) = c("y", "x1", "x2", "x3", "species") > base$fe_2 = rep(1:5, 30) > base$fe_3 = sample(15, 150, TRUE) > base$constant = 5 > base$y_int = as.integer(base$y) > base$w = as.vector(unclass(base$species) - 0.95) > base$offset_value = unclass(base$species) - 0.95 > base$y_01 = 1 * ((scale(base$x1) + rnorm(150)) > 0) > # what follows to avoid removal of fixed-effects (logit is pain in the neck) > base$y_01[1:5 + rep(c(0, 50, 100), each = 5)] = 1 > base$y_01[6:10 + rep(c(0, 50, 100), each = 5)] = 0 > # We enforce the removal of observations > base$y_int_null = base$y_int > base$y_int_null[base$fe_3 %in% 1:5] = 0 > > for(model in c("ols", "pois", "logit", "negbin", "Gamma")){ + cat("Model: ", format(model, width = 6), sep = "") + for(use_weights in c(FALSE, TRUE)){ + my_weight = NULL + if(use_weights) my_weight = base$w + + for(use_offset in c(FALSE, TRUE)){ + my_offset = NULL + if(use_offset) my_offset = base$offset_value + + for(id_fe in 0:9){ + + cat(".") + + tol = switch(model, "negbin" = 1e-2, "logit" = 3e-5, 1e-5) + + # Setting up the formula to accommodate FEs + if(id_fe == 0){ + fml_fixest = fml_stats = y ~ x1 + } else if(id_fe == 1){ + fml_fixest = y ~ x1 | species + fml_stats = y ~ x1 + factor(species) + } else if(id_fe == 2){ + fml_fixest = y ~ x1 | species + fe_2 + fml_stats = y ~ x1 + factor(species) + factor(fe_2) + } else if(id_fe == 3){ + # varying slope + fml_fixest = y ~ x1 | species[[x2]] + fml_stats = y ~ x1 + x2:species + } else if(id_fe == 4){ + # varying slope -- 1 VS, 1 FE + fml_fixest = y ~ x1 | species[[x2]] + fe_2 + fml_stats = y ~ x1 + x2:species + factor(fe_2) + } else if(id_fe == 5){ + # varying slope -- 2 VS + fml_fixest = y ~ x1 | species[x2] + fml_stats = y ~ x1 + x2:species + species + } else if(id_fe == 6){ + # varying slope -- 2 VS bis + fml_fixest = y ~ x1 | species[[x2]] + fe_2[[x3]] + fml_stats = y ~ x1 + x2:species + x3:factor(fe_2) + } else if(id_fe == 7){ + # Combined clusters + fml_fixest = y ~ x1 + x2 | species^fe_2 + fml_stats = y ~ x1 + x2 + paste(species, fe_2) + } else if(id_fe == 8){ + fml_fixest = y ~ x1 | species[x2] + fe_2[x3] + fe_3 + fml_stats = y ~ x1 + species + i(species, x2) + factor(fe_2) + i(fe_2, x3) + factor(fe_3) + } else if(id_fe == 9){ + fml_fixest = y ~ x1 | species + fe_2[x2,x3] + fe_3 + fml_stats = y ~ x1 + species + factor(fe_2) + i(fe_2, x2) + i(fe_2, x3) + factor(fe_3) + } + + # ad hoc modifications of the formula + if(model == "logit"){ + fml_fixest = xpd(y_01 ~ ..rhs, ..rhs = fml_fixest[[3]]) + fml_stats = xpd(y_01 ~ ..rhs, ..rhs = fml_stats[[3]]) + + # The estimations are OK, conv differences out of my control + if(id_fe %in% 8:9) tol = 0.5 + + } else if(model == "pois"){ + fml_fixest = xpd(y_int_null ~ ..rhs, ..rhs = fml_fixest[[3]]) + fml_stats = xpd(y_int_null ~ ..rhs, ..rhs = fml_stats[[3]]) + + } else if(model %in% c("negbin", "Gamma")){ + fml_fixest = xpd(y_int ~ ..rhs, ..rhs = fml_fixest[[3]]) + fml_stats = xpd(y_int ~ ..rhs, ..rhs = fml_stats[[3]]) + } + + adj = 1 + if(model == "ols"){ + res = feols(fml_fixest, base, weights = my_weight, offset = my_offset) + res_bis = lm(fml_stats, base, weights = my_weight, offset = my_offset) + + } else if(model %in% c("pois", "logit", "Gamma")){ + adj = 0 + if(model == "Gamma" && use_offset) next + + my_family = switch(model, pois = poisson(), logit = binomial(), Gamma = Gamma()) + + res = feglm(fml_fixest, base, family = my_family, weights = my_weight, offset = my_offset) + + if(!is.null(res$obs_selection$obsRemoved)){ + qui = res$obs_selection$obsRemoved + + # I MUST do that.... => subset does not work... + base_tmp = base[qui, ] + base_tmp$my_offset = my_offset[qui] + base_tmp$my_weight = my_weight[qui] + res_bis = glm(fml_stats, base_tmp, family = my_family, weights = my_weight, offset = my_offset) + } else { + res_bis = glm(fml_stats, data = base, family = my_family, weights = my_weight, offset = my_offset) + } + + } else if(model == "negbin"){ + # no offset in glm.nb + no VS in fenegbin + no weights in fenegbin + if(use_weights || use_offset || id_fe > 2) next + + res = fenegbin(fml_fixest, base, notes = FALSE) + res_bis = MASS::glm.nb(fml_stats, base) + + } + + test(coef(res)["x1"], coef(res_bis)["x1"], "~", tol) + test(se(res, se = "st", ssc = ssc(K.adj = adj))["x1"], se(res_bis)["x1"], "~", tol) + test(pvalue(res, se = "st", ssc = ssc(K.adj = adj))["x1"], pvalue(res_bis)["x1"], "~", tol*10**(model == "negbin")) + # cat("Model: ", model, ", FE: ", id_fe, ", weight: ", use_weights, ", offset: ", use_offset, "\n", sep="") + + } + cat("|") + } + } + cat("\n") + } Model: ols ..........|..........|..........|..........| Model: pois ..........|..........|..........|..........| Model: logit ..........|..........|..........|..........| Model: negbin..........|..........|..........|..........| Model: Gamma ..........|..........|..........|..........| There were 36 warnings (use warnings() to see them) > > #### > #### ... Corner cases #### > #### > > chunk("Corner cases") CORNER CASES > > > # We test the absence of bugs > > base = iris > names(base) = c("y", "x1", "x2", "x3", "fe1") > base$fe2 = rep(1:5, 30) > base$y[1:5] = NA > base$x1[4:8] = NA > base$x2[4:21] = NA > base$x3[110:111] = NA > base$fe1[110:118] = NA > base$fe2[base$fe2 == 1] = 0 > base$fe3 = sample(letters[1:5], 150, TRUE) > base$period = rep(1:50, 3) > base$x_cst = 1 > > res = feols(y ~ 1 | csw(fe1, fe1^fe2), base) > > res = feols(y ~ 1 + csw(x1, i(fe1)) | fe2, base) > > res = feols(y ~ csw(f(x1, 1:2), x2) | sw0(fe2, fe2^fe3), base, panel.id = ~ fe1 + period) > > res = feols(d(y) ~ -1 + d(x2), base, panel.id = ~ fe1 + period) > test(length(coef(res)), 1) > > res = feols(c(y, x1) ~ 1 | fe1 | x2 ~ x3, base) > > res = feols(y ~ x1 | fe1[x2] + fe2[x2], base) > > # > # singletons > # > > # all variables removed bc of singleton > base$singletons = 1:nrow(base) > test(feols(y ~ x1 | singletons, base, fixef.rm = "singleton"), "err") > > # in multiple estimations => no error > res = feols(y ~ x1 | sw0(singletons), base, fixef.rm = "singleton") > res_bis = feols(y ~ x1, base) > test(coef(res[[1]]), coef(res_bis)) > test(is.null(coef(res[[2]])), TRUE) > > # > # NA models (ie all variables are collinear with the FEs) > # > > # Should work when warn = FALSE or multiple est > for(i in 1:2){ + fun = switch(i, "1" = feols, "2" = feglm) + + res = feols(y ~ x_cst | fe1, base, warn = FALSE) + res # => no error + etable(res) # => no error + + # error when warn = TRUE + test(feols(y ~ x_cst | fe1, base), "err") + + # multiple est => no error + res = feols(c(y, x1) ~ x_cst | fe1, base) + res # => no error + etable(res) # => no error + } > > > # Removing the intercept!!! > > res = feols(y ~ -1 + x1 + i(fe1), base) > test("(Intercept)" %in% names(res$coefficients), FALSE) > > res = feols(y ~ -1 + x1 + factor(fe1), base) > test("(Intercept)" %in% names(res$coefficients), FALSE) > > res = feols(y ~ -1 + x1 + i(fe1) + i(fe2), base) > test("(Intercept)" %in% names(res$coefficients), FALSE) > test(is.null(res$collin.var), TRUE) > > > # IV + interacted FEs > res = feols(y ~ x1 | fe1^fe2 | x2 ~ x3, base) > > # IVs no exo var > res = feols(y ~ 0 | x2 ~ x3, base) > # Same in stepwise > res = feols(y ~ 0 | sw0(fe1) | x2 ~ x3, base) > > # IVs + lags > res = feols(y ~ x1 | fe1^fe2 | l(x2, -1:1) ~ l(x3, -1:1), base, panel.id = ~ fe1 + period) > > # functions in interactions > res = feols(y ~ x1 | factor(fe1)^factor(fe2), base) > res = feols(y ~ x1 | round(x2^2), base) > test(feols(y ~ x1 | factor(fe1^fe2), base), "err") > > res = feols(y ~ x1 | bin(x2, "bin::1")^fe1 + fe1^fe2, base) > > # 1 obs (after FE removal) estimation > base_1obs = data.frame(y = c(1, 0), fe = c(1, 2), x = c(1, 0)) > test(fepois(y ~ x | fe, base_1obs, fixef.rm = "infinite"), "err") > # no error > res = fepois(y ~ 1 | fe, base_1obs, fixef.rm = "infinite") > > # warning when demeaning algo reaches max iterations > data(trade) > test(feols(Euros ~ log(dist_km) | Destination + Origin + Product, + trade, fixef.iter = 1), "warn") > > > #### > #### ... depvar removal #### > #### > > chunk("depvar removal") DEPVAR REMOVAL > > # we remove the depvar when it is in the RHS > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > fml_equiv = c( + "y" = "1", + "y + x1" = "x1", + "x1 + y + x2 + I(x1)" = "x1 + x2 + I(x1)" + ) > > all_funs = list(feols, feglm, femlm) > > id_fml = 3 > id_fun = 2 > > fun = all_funs[[id_fun]] > > for(id_fun in seq_along(all_funs)){ + fun = all_funs[[id_fun]] + cat("|") + for(id_fml in seq_along(fml_equiv)){ + + if(id_fun == 3 && id_fml == 3) next + + cat(".") + rhs_0 = names(fml_equiv)[id_fml] + rhs_1 = fml_equiv[id_fml] + + # simple estimation + est_0 = fun(y ~ .[rhs_0], base) + est_1 = fun(y ~ .[rhs_1], base) + test(coef(est_0), coef(est_1)) + test(se(est_0), se(est_1)) + + # multiple samples + est_0 = fun(y ~ .[rhs_0], base, split = "species") + est_1 = fun(y ~ .[rhs_1], base, split = "species") + test(coef(est_0[[1]]), coef(est_1[[1]])) + test(se(est_0[[1]]), se(est_1[[1]])) + + # multiple lhs + est_0 = fun(c(y, x3) ~ .[rhs_0] + x3, base) + est_1a = fun(y ~ .[rhs_1] + x3, base) + est_1b = fun(x3 ~ .[rhs_0], base) + test(coef(est_0[[1]]), coef(est_1a)) + test(coef(est_0[[2]]), coef(est_1b)) + test(se(est_0[[1]]), se(est_1a)) + test(se(est_0[[2]]), se(est_1b)) + + # multiple rhs + fixef + est_0 = fun(y ~ csw(.[rhs_0], x3) | sw0(species), base) + est_1 = fun(y ~ csw(.[rhs_1], x3) | sw0(species), base) + for(id_mult in n_models(est_0)){ + test(coef(est_0[[id_mult]]), coef(est_1[[id_mult]])) + test(se(est_0[[id_mult]]), se(est_1[[id_mult]])) + } + + # in IVs + if(id_fun == 1){ + base$inst = rnorm(nrow(base)) + base$endo = rnorm(nrow(base)) + est_0 = feols(y ~ csw(.[rhs_0], x3) | sw0(species) | endo ~ inst, base) + est_1 = feols(y ~ csw(.[rhs_1], x3) | sw0(species) | endo ~ inst, base) + for(id_mult in n_models(est_0)){ + test(coef(est_0[[id_mult]]), coef(est_1[[id_mult]])) + } + } + } + } |...|...|..> cat("\n") > > > # model.matrix > est = feols(y ~ x1 + y, base) > test(ncol(model.matrix(est)), 2) > > #### > #### ... obs removal #### > #### > > base_rm = base_did > base_rm$y = abs(base_rm$y) > # we add 0-values > is_only_0 = base_rm$id <= 10 > base_rm$y[is_only_0] = 0 > # we add singletons > is_single = base_rm$id %in% 50:55 > base_rm$period[is_single] = runif(sum(is_single)) > > est_none = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "none") > est_single = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "singletons") > est_inf = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "infinite") > est_perfect = fepois(y ~ x1 | id + period, base_rm, fixef.rm = "perfect") > > n = nrow(base_rm) > test(nobs(est_none), n) > test(nobs(est_single), n - sum(is_single)) > test(nobs(est_inf), n - sum(is_only_0)) > test(nobs(est_perfect), n - sum(is_single) - sum(is_only_0)) > > #### > #### ... Fit methods #### > #### > > chunk("Fit methods") FIT METHODS > > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > base$y_int = as.integer(base$y) > base$y_log = sample(c(TRUE, FALSE), 150, TRUE) > > res = feglm.fit(base$y, base[, 2:4]) > res_bis = feglm(y ~ -1 + x1 + x2 + x3, base) > test(coef(res), coef(res_bis)) > > res = feglm.fit(base$y_int, base[, 2:4]) > res_bis = feglm(y_int ~ -1 + x1 + x2 + x3, base) > test(coef(res), coef(res_bis)) > > res = feglm.fit(base$y_log, base[, 2:4]) > res_bis = feglm(y_log ~ -1 + x1 + x2 + x3, base) > test(coef(res), coef(res_bis)) > > > > res = feglm.fit(base$y, base[, 2:4], family = "poisson") > res_bis = feglm(y ~ -1 + x1 + x2 + x3, base, family = "poisson") > test(coef(res), coef(res_bis)) > > res = feglm.fit(base$y_int, base[, 2:4], family = "poisson") > res_bis = feglm(y_int ~ -1 + x1 + x2 + x3, base, family = "poisson") > test(coef(res), coef(res_bis)) > > res = feglm.fit(base$y_log, base[, 2:4], family = "poisson") > res_bis = feglm(y_log ~ -1 + x1 + x2 + x3, base, family = "poisson") > test(coef(res), coef(res_bis)) > > #### > #### global variables #### > #### > > chunk("globals") GLOBALS > > est_reg = function(df, yvar, xvar, refgrp) { + feols(.[yvar] ~ i(.[xvar], ref = refgrp), data = df) + } > > est = est_reg(iris, "Sepal.Length", "Species", refgrp = "setosa") > > # checking when it should not work > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > z = base$x1 > test(feols(y ~ z, base), "err") > > > > #### > #### ... Collinearity #### > #### > > chunk("COLLINEARITY") COLLINEARITY > > base = iris > names(base) = c("y", "x1", "x2", "x3", "species") > base$constant = 5 > base$y_int = as.integer(base$y) > base$w = as.vector(unclass(base$species) - 0.95) > > for(useWeights in c(FALSE, TRUE)){ + for(model in c("ols", "pois")){ + for(use_fe in c(FALSE, TRUE)){ + cat(".") + + my_weight = NULL + if(useWeights) my_weight = base$w + + adj = 1 + if(model == "ols"){ + if(!use_fe){ + res = feols(y ~ x1 + constant, base, weights = my_weight) + res_bis = lm(y ~ x1 + constant, base, weights = my_weight) + } else { + res = feols(y ~ x1 + constant | species, base, weights = my_weight) + res_bis = lm(y ~ x1 + constant + species, base, weights = my_weight) + } + } else { + if(!use_fe){ + res = fepois(y_int ~ x1 + constant, base, weights = my_weight) + res_bis = glm(y_int ~ x1 + constant, base, weights = my_weight, family = poisson) + } else { + res = fepois(y_int ~ x1 + constant | species, base, weights = my_weight) + res_bis = glm(y_int ~ x1 + constant + species, base, weights = my_weight, family = poisson) + } + adj = 0 + } + + test(coef(res)["x1"], coef(res_bis)["x1"], "~") + test(se(res, se = "st", ssc = ssc(K.adj=adj))["x1"], se(res_bis)["x1"], "~") + # cat("Weight: ", useWeights, ", model: ", model, ", FE: ", use_fe, "\n", sep="") + + } + } + } ........> cat("\n") > > > #### > #### ... Non linear tests #### > #### > > chunk("NON LINEAR") NON LINEAR > > base = iris > names(base) = c("y", "x1", "x2", "x3", "species") > > tab = c("versicolor" = 5, "setosa" = 0, "virginica" = -5) > > fun_nl = function(a, b, spec){ + res = as.numeric(tab[spec]) + a*res + b*res^2 + } > > est_nl = feNmlm(y ~ x1, base, NL.fml = ~fun_nl(a, b, species), NL.start = 1, family = "gaussian") > > base$var_spec = as.numeric(tab[base$species]) > > est_lin = feols(y ~ x1 + var_spec + I(var_spec^2), base) > > test(coef(est_nl), coef(est_lin)[c(3, 4, 1, 2)], "~") > > #### > #### ... Lagging #### > #### > > # Different types of lag > # 1) check no error in wide variety of situations > # 2) check consistency > > chunk("LAGGING") LAGGING > > data(base_did) > base = base_did > > n = nrow(base) > > set.seed(0) > base$y_na = base$y ; base$y_na[sample(n, 50)] = NA > base$period_txt = letters[base$period] > ten_dates = c("1960-01-15", "1960-01-16", "1960-03-31", "1960-04-05", "1960-05-12", "1960-05-25", "1960-06-20", "1960-07-30", "1965-01-02", "2002-12-05") > base$period_date = as.Date(ten_dates, "%Y-%m-%d")[base$period] > base$y_0 = base$y**2 ; base$y_0[base$id == 1] = 0 > > # We compute the lags "by hand" > base = base[order(base$id, base$period), ] > base$x1_lag = c(NA, base$x1[-n]) ; base$x1_lag[base$period == 1] = NA > base$x1_lead = c(base$x1[-1], NA) ; base$x1_lead[base$period == 10] = NA > base$x1_diff = base$x1 - base$x1_lag > > # we create holes > base$period_bis = base$period ; base$period_bis[base$period_bis == 5] = 50 > base$x1_lag_hole = base$x1_lag ; base$x1_lag_hole[base$period %in% c(5, 6)] = NA > base$x1_lead_hole = base$x1_lead ; base$x1_lead_hole[base$period %in% c(4, 5)] = NA > > # we reshuffle the base > base = base[sample(n), ] > > # > # Checks consistency > # > > cat("consistentcy...") consistentcy...> > test(lag(x1 ~ id + period, data = base), base$x1_lag) > test(lag(x1 ~ id + period, -1, data = base), base$x1_lead) > > test(lag(x1 ~ id + period_bis, data = base), base$x1_lag_hole) > test(lag(x1 ~ id + period_bis, -1, data = base), base$x1_lead_hole) > > test(lag(x1 ~ id + period_txt, data = base), base$x1_lag) > test(lag(x1 ~ id + period_txt, -1, data = base), base$x1_lead) > > test(lag(x1 ~ id + period_date, data = base), base$x1_lag) > test(lag(x1 ~ id + period_date, -1, data = base), base$x1_lead) > > cat("done.\nEstimations...") done. Estimations...> > # > # Estimations > # > > # Poisson > > for(depvar in c("y", "y_na", "y_0")){ + for(p in c("period", "period_txt", "period_date")){ + + base$per = base[[p]] + + cat(".") + + base$y_dep = base[[depvar]] + pdat = panel(base, ~ id + period) + + if(depvar == "y_0"){ + estfun = fepois + } else { + estfun = feols + } + + est_raw = estfun(y_dep ~ x1 + x1_lag + x1_lead, base) + est = estfun(y_dep ~ x1 + l(x1) + f(x1), base, panel.id = "id,per") + est_pdat = estfun(y_dep ~ x1 + l(x1, 1) + f(x1, 1), pdat) + test(coef(est_raw), coef(est)) + test(coef(est_raw), coef(est_pdat)) + + # Now diff + est_raw = estfun(y_dep ~ x1 + x1_diff, base) + est = estfun(y_dep ~ x1 + d(x1), base, panel.id = "id,per") + est_pdat = estfun(y_dep ~ x1 + d(x1, 1), pdat) + test(coef(est_raw), coef(est)) + test(coef(est_raw), coef(est_pdat)) + + # Now we just check that calls to l/f works without checking coefs + + est = estfun(y_dep ~ x1 + l(x1) + f(x1), base, panel.id = "id,per") + est = estfun(y_dep ~ l(x1, -1:1) + f(x1, 2), base, panel.id = c("id", "per")) + est = estfun(y_dep ~ l(x1, -1:1, fill = 1), base, panel.id = ~ id + per) + if(depvar == "y") test(est$nobs, n) + est = estfun(f(y_dep) ~ f(x1, -1:1), base, panel.id = ~ id + per) + } + } .........> > cat("done.\n\n") done. > > # > # Data table > # > > cat("data.table...") data.table...> # We just check there is no bug (consistency should be OK) > > suppressPackageStartupMessages(library(data.table)) > > base_dt = data.table(id = c("A", "A", "B", "B"), + time = c(1, 2, 1, 3), + x = c(5, 6, 7, 8)) > > base_dt = panel(base_dt, ~id + time) > > base_dt[, x_l := l(x)] > test(base_dt$x_l, c(NA, 5, NA, NA)) > > lag_creator = function(dt) { + dt2 = panel(dt, ~id + time) + dt2[, x_l := l(x)] + return(dt2) + } > > base_bis = lag_creator(base_dt) > > base_bis[, x_d := d(x)] > > cat("done.\n\n") done. > > # > # Panel > # > > # We ensure we get the right SEs whether we use the panel() or the panel.id method > data(base_did) > > # Setting a data set as a panel... > pdat = panel(base_did, ~id+period) > pdat$fe = sample(15, nrow(pdat), replace = TRUE) > > base_panel = unpanel(pdat) > > est_pdat = feols(y ~ x1 | fe, pdat) > est_panel = feols(y ~ x1 | fe, base_panel, panel.id = ~id+period) > > test(attr(vcov(est_pdat, attr = TRUE), "type"), + attr(vcov(est_panel, attr = TRUE), "type")) > > # > # testing irregularities > # > > base_panel = data.frame( + id = rep(letters[1:3], each = 4), + time = c(1, 2, 3, 3, + 1, 2, 4, 6, + 2, 4, 6, 8) + ) > base_panel$x = rnorm(nrow(base_panel)) > base_panel$y = base_panel$x * 0.5 + rnorm(nrow(base_panel)) > > est_panel_irr = feols(y ~ l(x), base_panel, panel.id = "id,time", + panel.duplicate.method = "first") > > x_lag = model.matrix(est_panel_irr)[, 2] > test(x_lag, base_panel$x[c(1, 2, 2, 5)]) > > est_panel_irr_cons = feols(y ~ l(x), base_panel, panel.id = "id,time", + panel.time.step = "cons", + panel.duplicate.method = "first") > > x_lag = model.matrix(est_panel_irr_cons)[, 2] > test(x_lag, base_panel$x[c(NULL, 1, 2, 2, + NULL, 5, NULL, 7, + NULL, NULL, 10, 11)]) > > est_panel_irr_with = feols(y ~ l(x), base_panel, panel.id = "id,time", + panel.time.step = "within", + panel.duplicate.method = "first") > > x_lag = model.matrix(est_panel_irr_with)[, 2] > test(x_lag, base_panel$x[c(NULL, 1, 2, 3, + NULL, 5, 6, 7, + NULL, 9, 10, 11)]) > > #### > #### ... subset #### > #### > > chunk("SUBSET") SUBSET > > set.seed(5) > base = iris > names(base) = c("y", "x1", "x2", "x3", "species") > base$fe_bis = sample(letters, 150, TRUE) > base$x4 = rnorm(150) > base$x1[sample(150, 5)] = NA > > fml = y ~ x1 + x2 > > # Errors > test(feols(fml, base, subset = ~species), "err") > test(feols(fml, base, subset = -1:15), "err") > test(feols(fml, base, subset = integer(0)), "err") > test(feols(fml, base, subset = c(TRUE, TRUE, FALSE)), "err") > > # Valid use > for(id_fun in 1:6){ + estfun = switch(as.character(id_fun), + "1" = feols, + "2" = feglm, + "3" = fepois, + "4" = femlm, + "5" = fenegbin, + "6" = feNmlm) + + for(id_fe in 1:5){ + + cat(".") + + fml = switch(as.character(id_fe), + "1" = y ~ x1 + x2, + "2" = y ~ x1 + x2 | species, + "3" = y ~ x1 + x2 | fe_bis, + "4" = y ~ x1 + x2 + i(fe_bis), + "5" = y ~ x1 + x2 | fe_bis[x3]) + + if(id_fe == 5 && id_fun %in% 4:6) next + + if(id_fun == 6){ + res_sub_a = estfun(fml, base, subset = ~species == "setosa", NL.fml = ~ a*x4, NL.start = 0) + res_sub_b = estfun(fml, base, subset = base$species == "setosa", NL.fml = ~ a*x4, NL.start = 0) + res_sub_c = estfun(fml, base, subset = which(base$species == "setosa"), NL.fml = ~ a*x4, NL.start = 0) + res = estfun(fml, base[base$species == "setosa", ], NL.fml = ~ a*x4, NL.start = 0) + } else { + res_sub_a = estfun(fml, base, subset = ~species == "setosa") + res_sub_b = estfun(fml, base, subset = base$species == "setosa") + res_sub_c = estfun(fml, base, subset = which(base$species == "setosa")) + res = estfun(fml, base[base$species == "setosa", ]) + } + + test(coef(res_sub_a), coef(res)) + test(coef(res_sub_b), coef(res)) + test(coef(res_sub_c), coef(res)) + test(se(res_sub_c, cluster = "fe_bis"), se(res, cluster = "fe_bis")) + } + cat("|") + } .....|.....|.....|.....|.....|.....|There were 12 warnings (use warnings() to see them) > cat("\n") > > > #### > #### ... split #### > #### > > chunk("split") SPLIT > > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > # simple: formula > est = feols(y ~ x.[1:3], base, split = ~species %keep% "@^v") > test(length(est), 2) > > est = feols(y ~ x.[1:3], base, fsplit = ~species %keep% c("set", "vers")) > test(length(est), 3) > > est = feols(y ~ x.[1:3], base, split = ~species %drop% "set") > test(length(est), 2) > > # simple: vector > est = feols(y ~ x.[1:3], base, split = base$species %keep% "@^v") > test(length(est), 2) > > est = feols(y ~ x.[1:3], base, split = base$species %keep% c("set", "vers")) > test(length(est), 2) > > est = feols(y ~ x.[1:3], base, split = base$species %drop% "set") > test(length(est), 2) > > # with bin > est = feols(y ~ x.[1:2], base, + split = ~bin(x3, c("cut::5", "saint emilion", "pessac leognan", + "margaux", "saint julien", "entre deux mers")) %keep% c("saint e", "pe")) > test(length(est), 2) > > est = feols(y ~ x.[1:2], base, + split = ~bin(x3, c("cut::5", "saint emilion", "pessac leognan", NA)) %drop% "@\\d") > test(length(est), 2) > > # with argument > est = feols(y ~ x.[1:3], base, split = ~species, split.keep = "@^v") > test(length(est), 2) > > est = feols(y ~ x.[1:3], base, fsplit = ~species, split.keep = c("set", "vers")) > test(length(est), 3) > > est = feols(y ~ x.[1:3], base, split = ~species, split.drop = "set") > test(length(est), 2) > > > #### > #### ... Multiple estimations #### > #### > > chunk("Multiple") MULTIPLE > > set.seed(2) > base = iris > names(base) = c("y1", "x1", "x2", "x3", "species") > base$y2 = 10 + rnorm(150) + 0.5 * base$x1 > base$x4 = rnorm(150) + 0.5 * base$y1 > base$fe2 = rep(letters[1:15], 10) > base$fe2[50:51] = NA > base$y2[base$fe2 == "a" & !is.na(base$fe2)] = 0 > base$x2[1:5] = NA > base$x3[6] = NA > base$x5 = rnorm(150) > base$x6 = rnorm(150) + base$y1 * 0.25 > base$fe3 = rep(letters[1:10], 15) > > > for(id_fun in 1:5){ + estfun = switch(as.character(id_fun), + "1" = feols, + "2" = feglm, + "3" = fepois, + "4" = femlm, + "5" = feNmlm) + + # Following weird bug ASAN on CRAN I cannot replicate, check 4/5 not performed on non Windows + if(Sys.info()["sysname"] != "Windows"){ + if(id_fun %in% 4:5) next + } + + + est_multi = estfun(c(y1, y2) ~ x1 + sw(x2, x3), base, split = ~species) + + k = 1 + for(s in c("setosa", "versicolor", "virginica")){ + for(lhs in c("y1", "y2")){ + for(rhs in c("x2", "x3")){ + res = estfun(.[lhs] ~ x1 + .[rhs], base[base$species == s, ], notes = FALSE) + + test(coef(est_multi[[k]]), coef(res)) + test(se(est_multi[[k]], cluster = "fe3"), se(res, cluster = "fe3")) + k = k + 1 + } + } + } + + cat("__") + + est_multi = estfun(c(y1, y2) ~ x1 + csw0(x2, x3) + x4 | species + fe2, base, fsplit = ~species) + k = 1 + all_rhs = c("", "x2", "x3") + for(s in c("all", "setosa", "versicolor", "virginica")){ + for(lhs in c("y1", "y2")){ + for(n_rhs in 1:3){ + if(s == "all"){ + res = estfun(xpd(..lhs ~ x1 + ..rhs + x4 | species + fe2, ..lhs = lhs, ..rhs = all_rhs[1:n_rhs]), base, notes = FALSE) + } else { + res = estfun(xpd(..lhs ~ x1 + ..rhs + x4 | species + fe2, ..lhs = lhs, ..rhs = all_rhs[1:n_rhs]), base[base$species == s, ], notes = FALSE) + } + + vname = names(coef(res)) + test(coef(est_multi[[k]])[vname], coef(res), "~" , 1e-6) + test(se(est_multi[[k]], cluster = "fe3")[vname], se(res, cluster = "fe3"), "~" , 1e-6) + k = k + 1 + } + } + } + + cat("|") + } __|__|__|> cat("\n") > > > > # No error tests > # We test with IV + possible corner cases > > base$left = rnorm(150) > base$right = rnorm(150) > > est_multi = feols(c(y1, y2) ~ sw0(x1) | sw0(species) | x2 ~ x3, base) > > # We check a few > est_a = feols(y1 ~ 1 | x2 ~ x3, base) > est_b = feols(y1 ~ x1 | species | x2 ~ x3, base) > est_c = feols(y2 ~ 1 | x2 ~ x3, base) > > test(coef(est_multi[lhs = "y1", rhs = "^1", fixef = "1", drop = TRUE]), coef(est_a)) > test(coef(est_multi[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE]), coef(est_b)) > test(coef(est_multi[lhs = "y2", rhs = "^1", fixef = "1", drop = TRUE]), coef(est_c)) > > # with fixed covariates > est_multi_LR = feols(c(y1, y2) ~ left + sw0(x1*x4) + right | sw0(species) | x2 ~ x3, base) > > est_a = feols(y1 ~ left + right | x2 ~ x3, base) > est_b = feols(y1 ~ left + x1*x4 + right | species | x2 ~ x3, base) > est_c = feols(y2 ~ left + right | x2 ~ x3, base) > > test(coef(est_multi_LR[lhs = "y1", rhs = "!x1", fixef = "1", drop = TRUE]), coef(est_a)) > user_name = c("fit_x2", "left", "x1", "x4", "x1:x4", "right") > test(names(coef(est_multi_LR[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE])), user_name) > test(coef(est_multi_LR[lhs = "y1", rhs = "x1", fixef = "spe", drop = TRUE]), coef(est_b)[user_name]) > test(coef(est_multi_LR[lhs = "y2", rhs = "!x1", fixef = "1", drop = TRUE]), coef(est_c)) > > > # mvsw > > est_mvsw = feols(y1 ~ mvsw(x1, x2), base) > est_mvsw_fe = feols(y1 ~ mvsw(x1, x2) | mvsw(species, fe2), base) > est_mvsw_fe_iv = feols(y1 ~ mvsw(x1, x2) | mvsw(species, fe2) | x3 ~ x4, base) > > test(length(est_mvsw), 4) > test(length(as.list(est_mvsw_fe)), 16) > test(length(as.list(est_mvsw_fe_iv)), 16) > > # Summary of multiple endo vars > est_multi_iv = feols(c(y1, y2) ~ sw0(x1) | sw0(species) | x3 + x4 ~ x5 + x6, base) > test(length(est_multi_iv), 8) > test(length(summary(est_multi_iv, stage = 1)), 16) > > # IV without exo var: > est_mult_no_exo = feols(c(y1, y2) ~ 0 | x3 + x4 ~ x5 + x6, base) > est_no_exo_y2 = feols(y2 ~ 0 | x3 + x4 ~ x5 + x6, base) > test(coef(est_mult_no_exo[[2]]), coef(est_no_exo_y2)) > > # proper ordering > est_multi = feols(c(y1, y2) ~ sw0(x1) | sw0(fe2), base, split = ~species) > test(names(models(est_multi[fixef = TRUE, sample = FALSE])), + stvec("id, fixef, lhs, rhs, sample.var, sample")) > > test(names(models(est_multi[fixef = "fe2", sample = "seto"])), + stvec("id, fixef, sample.var, sample, lhs, rhs")) > > test(names(models(est_multi[fixef = "fe2", sample = "seto", reorder = FALSE])), + stvec("id, sample.var, sample, fixef, lhs, rhs")) > > # NA models > base$y_0 = base$x1 ** 2 + rnorm(150) > base$y_0[base$species == "setosa"] = 0 > > est_pois = fepois(y_0 ~ csw(x.[,1:4]), base, split = ~species) > > base$x1_bis = base$x1 > est_pois = fepois(y_0 ~ x.[1:3] + x1_bis | sw0(species), base) > > # Different ways .[] > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > dep_all = list(stvec("y, x1, x2"), ~y + x1 + x2) > for(dep in dep_all){ + m = feols(.[dep] ~ x3, base) + test(length(m), 3) + + m = feols(x3 ~ .[dep], base) + test(length(m$coefficients), 4) + + m = feols(x3 ~ csw(.[,dep]), base) + test(length(m), 3) + } > > # offset in multiple outcomes // no error test > > offset_single_ols = feols(am ~ hp, offset = ~ log(qsec), data = mtcars) > offset_mult_ols = feols(c(mpg, am) ~ hp, offset = ~ log(qsec), data = mtcars) > > test(coef(offset_mult_ols[[2]]), coef(offset_single_ols)) > > offset_single_glm = feglm(am ~ hp, offset = ~ log(qsec), data = mtcars) > offset_mult_glm = feglm(c(mpg, am) ~ hp, offset = ~ log(qsec), data = mtcars) > > test(coef(offset_mult_glm[[2]]), coef(offset_single_glm)) > > # LHS expansion with IVs > > lhs = c("mpg", "wt") > est_lhs = feols(.[lhs] ~ disp | hp ~ qsec, data = mtcars) > test(length(est_lhs), 2) > > est_lhs = feols(..("mpg|wt") ~ disp | hp ~ qsec, data = mtcars) > test(length(est_lhs), 2) > > > > #### > #### ... IV #### > #### > > chunk("IV") IV > > base = iris > names(base) = c("y", "x1", "x_endo_1", "x_inst_1", "fe") > set.seed(2) > base$x_inst_2 = 0.2 * base$y + 0.2 * base$x_endo_1 + rnorm(150, sd = 0.5) > base$x_endo_2 = 0.2 * base$y - 0.2 * base$x_inst_1 + rnorm(150, sd = 0.5) > > # Checking a basic estimation > > setFixest_vcov(all = "iid") > > est_iv = feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1 + x_inst_2, base) > > res_f1 = feols(x_endo_1 ~ x1 + x_inst_1 + x_inst_2, base) > res_f2 = feols(x_endo_2 ~ x1 + x_inst_1 + x_inst_2, base) > > base$fit_x_endo_1 = predict(res_f1) > base$fit_x_endo_2 = predict(res_f2) > > res_2nd = feols(y ~ fit_x_endo_1 + fit_x_endo_2 + x1, base) > > # the coef > test(coef(est_iv), coef(res_2nd)) > > # the SE > resid_iv = base$y - predict(res_2nd, + data.frame(x1 = base$x1, fit_x_endo_1 = base$x_endo_1, + fit_x_endo_2 = base$x_endo_2)) > sigma2_iv = sum(resid_iv**2) / (res_2nd$nobs - res_2nd$nparams) > > sum_2nd = summary(res_2nd, vcov = res_2nd$cov.iid / res_2nd$sigma2 * sigma2_iv) > > test(se(sum_2nd), se(est_iv)) > > # check no bug when all exogenous vars are removed bc of collinearity > df = data.frame(x = rnorm(8), y = rnorm(8), + z = rnorm(8), fe = rep(0:1, each = 4)) > > est_iv = feols(y ~ fe | fe | x ~ z, df) > est_iv = feols(y ~ sw(fe, fe) | fe | x ~ z, df) > > # check no bug > etable(summary(est_iv, stage = 1:2)) summary(est_..1 summary(es..2 summary(est_..3 summary(es..4 Dependent Var.: x y x y z 0.1127 (0.5532) 0.1127 (0.5532) x 1.110 (6.499) 1.110 (6.499) Fixed-Effects: --------------- ------------- --------------- ------------- fe Yes Yes Yes Yes _______________ _______________ _____________ _______________ _____________ S.E. type IID IID IID IID Observations 8 8 8 8 R2 0.35875 0.19449 0.35875 0.19449 Within R2 0.00823 0.00779 0.00823 0.00779 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 > > setFixest_vcov(reset = TRUE) > > # Check the number of instruments > test( + feols(y ~ x1 | x_endo_1 + x_endo_2 ~ x_inst_1, base), + "err" + ) > > > #### > #### ... VCOV at estimation #### > #### > > chunk("vcov at estimation") VCOV AT ESTIMATION > > base = iris > names(base) = c("y", "x1", "x2", "x3", "species") > base$clu = sample(6, 150, TRUE) > base$clu[1:5] = NA > > est = feols(y ~ x1 | species, base, cluster = ~clu, ssc = ssc(K.adj = FALSE)) > > # The three should be identical > v1 = est$cov.scaled > v1b = vcov(est) > v1c = summary(est)$cov.scaled > > test(v1, v1b) > test(v1, v1c) > > # Only ssc change > v2 = summary(est, ssc = ssc())$cov.scaled > v2b = vcov(est, ssc = ssc()) > > test(v2, v2b) > test(max(abs(v1 - v2)) == 0, FALSE) > > # vcov change only > v3 = summary(est, se = "hetero")$cov.scaled > v3b = vcov(est, se = "hetero") > > test(v3, v3b) > test(max(abs(v1 - v3)) == 0, FALSE) > test(max(abs(v2 - v3)) == 0, FALSE) > > # feols.fit > > ymat = base$y > xmat = base[, 2:3] > fe = base$species > > for(use_fe in c(TRUE, FALSE)){ + all_vcov = stvec("iid, hetero") + if(use_fe){ + setFixest_fml(..fe = ~ 1 | species) + all_vcov = c(all_vcov, "cluster") + } else { + setFixest_fml(..fe = ~ 1) + } + + for(v in all_vcov){ + + if(use_fe){ + est_fit = feols.fit(ymat, xmat, fe, vcov = v) + } else { + est_fit = feols.fit(ymat, cbind(1, xmat), vcov = v) + } + + est = feols(y ~ x1 + x2 + ..fe, base, vcov = v) + + test(vcov(est), vcov(est_fit)) + } + } > > #### > #### ... Custom VCOV #### > #### > > chunk("custom vcov") CUSTOM VCOV > > # Named list stores string > est <- feols(mpg ~ i(cyl), mtcars) > vcov_HC3 <- sandwich::vcovHC(est, type = "HC3") > est_HC3 <- summary(est, vcov = list("HC3" = vcov_HC3)) > test(attr(est_HC3$se, "type"), "HC3") > est_tab <- etable(est_HC3) > test(any(grepl("HC3", est_tab[[2]])), TRUE) > > # Passing functions > test( + summary(est, vcov = function(x) sandwich::vcovHC(x, type = "HC3"))$se, + summary(est, vcov = sandwich::vcovHC, .vcov_args = list(type = "HC3"))$se + ) > # Confirming these work > temp <- etable(est, vcov = function(x) sandwich::vcovHC(x, type = "HC3")) > temp <- etable(est, vcov = sandwich::vcovHC, .vcov_args = list(type = "HC3")) > > > # deprecated `.vcov` still works > est_.vcov <- summary(est, .vcov = vcov_HC3) Warning message: In summary.fixest(est, .vcov = vcov_HC3) : The argument '.vcov' is deprecated. Please use 'vcov' instead. > test(est_.vcov$se, est_HC3$se) > etable(est, .vcov = vcov_HC3) est Dependent Var.: mpg Constant 26.66*** (0.9718) cyl = 6 -6.921*** (1.558) cyl = 8 -11.56*** (1.299) _______________ _________________ S.E. type IID Observations 32 R2 0.73246 Adj. R2 0.71401 --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 > > # Custom vcov to fixest multi > est_multi <- feols(c(mpg, hp) ~ i(cyl), mtcars) > vcovs_HC3 <- lapply(est_multi, function(est) sandwich::vcovHC(est, type = "HC3")) > names(vcovs_HC3) <- c("HC3", "HC3") > est_multi_HC3 <- summary(est_multi, vcov = vcovs_HC3) > > test(vcov(est_multi_HC3[[1]]), vcovs_HC3[[1]]) > test(vcov(est_multi_HC3[[2]]), vcovs_HC3[[2]]) > > test(attr(est_multi_HC3[[1]]$se, "type"), "HC3") > test(attr(est_multi_HC3[[2]]$se, "type"), "HC3") > > est_tab <- etable(est_multi, est_multi_HC3) > test(any(grepl("HC3", est_tab$est_multi_HC3.1)), TRUE) > test(any(grepl("HC3", est_tab$est_multi_HC3.2)), TRUE) > > # deprecated `.vcov` still works > est_multi_.vcov <- summary(est_multi, .vcov = vcovs_HC3) > test(est_multi_.vcov[[1]]$se, est_multi_HC3[[1]]$se) > > > > #### > #### ... Argument sliding #### > #### > > chunk("argument sliding") ARGUMENT SLIDING > > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > setFixest_estimation(data = base) > > raw = feols(y ~ x1 + x2, base, ~species) Warning message: The VCOV matrix is not positive semi-definite and was 'fixed' (see ?vcov). > slided = feols(y ~ x1 + x2, ~species) Warning message: The VCOV matrix is not positive semi-definite and was 'fixed' (see ?vcov). > > test(coef(raw), coef(slided)) > > # Error, with error msg relative to 'data' > test(feols(y ~ x1 + x2, 1:5), "err") > > # should be another estimation > other_est = feols(y ~ x1 + x2, head(base, 50)) > test(nobs(other_est), 50) > > setFixest_estimation(reset = TRUE) > > #### > #### ... Offset #### > #### > > chunk("offset") OFFSET > > # we test the different ways to set an offset > > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > > o1 = feols(y ~ x1 + offset(x2) + offset(x3^2 + 3), base) > o2 = feols(y ~ x1, base, offset = ~x2 + x3^2 + 3) > test(coef(o1), coef(o2)) > > test(predict(o1, newdata = head(base)), + predict(o2, newdata = head(base))) > > # error > test(feols(y ~ x1 + offset(x2), base, offset = ~x3), "err") > > > #### > #### ... Only Coef #### > #### > > chunk("only.coef") ONLY.COEF > > > base = setNames(iris, c("y", "x1", "x2", "x3", "species")) > base$x4 = base$x1 + 5 > > m = feols(y ~ x1 + x2 + x4, base, only.coef = TRUE) > test(length(m), 4) > test(sum(is.na(m)), 1) > > m = fepois(y ~ x1 + x2 + x4, base, only.coef = TRUE) > test(length(m), 4) > test(sum(is.na(m)), 1) > > m = femlm(y ~ x1 + x2, base, only.coef = TRUE) > test(length(m), 3) > test(sum(is.na(m)), 0) > > test(feols(y ~ sw(x1, x2), base, only.coef = TRUE), "err") > > > #### > #### Standard-errors #### > #### > > chunk("STANDARD ERRORS") STANDARD ERRORS > > # > # Fixed-effects corrections > # > > # We create "irregular" FEs > set.seed(0) > base = data.frame(x = rnorm(20)) > base$y = base$x + rnorm(20) > base$fe1 = rep(rep(1:3, c(4, 3, 3)), 2) > base$fe2 = rep(rep(1:5, each = 2), 2) > est = feols(y ~ x | fe1 + fe2, base, vcov = "cluster") > > # fe1: 3 FEs > # fe2: 5 FEs > > # > # Clustered standard-errors: by fe1 > # > > # Default: K.fixef = "nonnested" > # => adjustment K = 1 + 5 (i.e. x + fe2) > test(attr(vcov(est, ssc = ssc(K.fixef = "nonnested"), attr = TRUE), "df.K"), 6) > > # K.fixef = FALSE > # => adjustment K = 1 (i.e. only x) > test(attr(vcov(est, ssc = ssc(K.fixef = "none"), attr = TRUE), "df.K"), 1) > > # K.fixef = TRUE > # => adjustment K = 1 + 3 + 5 - 1 (i.e. x + fe1 + fe2 - 1 restriction) > test(attr(vcov(est, ssc = ssc(K.fixef = "full"), attr = TRUE), "df.K"), 8) > > # K.fixef = TRUE & fixef.exact = TRUE > # => adjustment K = 1 + 3 + 5 - 2 (i.e. x + fe1 + fe2 - 2 restrictions) > test(attr(vcov(est, ssc = ssc(K.fixef = "full", K.exact = TRUE), attr = TRUE), "df.K"), 7) > > # > # Manual checks of the SEs > # > > n = est$nobs > VCOV_raw = est$cov.iid / ((n - 1) / (n - est$nparams)) > > # standard > for(k_val in c("none", "nonnested", "full")){ + for(adj in c(FALSE, TRUE)){ + + K = switch(k_val, none = 1, nonnested = 8, full = 8) + my_adj = ifelse(adj, (n - 1) / (n - K), 1) + + test(vcov(est, se = "standard", ssc = ssc(K.adj = adj, K.fixef = k_val)), VCOV_raw * my_adj) + + # cat("adj = ", adj, " ; K.fixef = ", k_val, "\n", sep = "") + } + } > > # Clustered, fe1 > VCOV_raw = est$cov.iid / est$sigma2 > H = vcovClust(est$fixef_id$fe1, VCOV_raw, scores = est$scores, adj = FALSE) > n = nobs(est) > > for(tdf in c("conventional", "min")){ + for(k_val in c("none", "nonnested", "full")){ + for(c_adj in c(FALSE, TRUE)){ + for(adj in c(FALSE, TRUE)){ + + K = switch(k_val, none = 1, nonnested = 6, full = 8) + cluster_factor = ifelse(c_adj, 3/2, 1) + df = ifelse(tdf == "min", 2, 20 - K) + my_adj = ifelse(adj, (n - 1) / (n - K), 1) + + V = H * cluster_factor + + # test SE + test(vcov(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj)), V * my_adj) + + # test pvalue + my_tstat = tstat(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj)) + test(pvalue(est, se = "cluster", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, t.df = tdf)), 2*pt(-abs(my_tstat), df)) + + # cat("adj = ", adj, " ; K.fixef = ", k_val, " ; G.adj = ", c_adj, " t.df = ", tdf, "\n", sep = "") + } + } + } + } > > > # 2-way Clustered, fe1 fe2 > VCOV_raw = est$cov.iid / est$sigma2 > M_i = vcovClust(est$fixef_id$fe1, VCOV_raw, scores = est$scores, adj = FALSE) > M_t = vcovClust(est$fixef_id$fe2, VCOV_raw, scores = est$scores, adj = FALSE) > M_it = vcovClust(paste(base$fe1, base$fe2), VCOV_raw, scores = est$scores, adj = FALSE, do.unclass = TRUE) > > M_i + M_t - M_it [,1] [1,] 0.005391594 > vcov(est, se = "two", ssc = ssc(K.adj = FALSE, G.adj = FALSE)) x x 0.005391594 > > for(cdf in c("conventional", "min")){ + for(tdf in c("conventional", "min")){ + for(k_val in c("none", "nonnested", "full")){ + for(c_adj in c(FALSE, TRUE)){ + for(adj in c(FALSE, TRUE)){ + + K = switch(k_val, none = 1, nonnested = 2, full = 8) + + if(c_adj){ + if(cdf == "min"){ + V = (M_i + M_t - M_it) * 3/2 + } else { + V = M_i * 3/2 + M_t * 5/4 - M_it * 6/5 + } + } else { + V = M_i + M_t - M_it + } + + df = ifelse(tdf == "min", 2, 20 - K) + my_adj = ifelse(adj, (n - 1) / (n - K), 1) + + # test SE + test(vcov(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf)), + V * my_adj) + + # test pvalue + my_tstat = tstat(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf)) + test(pvalue(est, se = "two", ssc = ssc(K.adj = adj, K.fixef = k_val, G.adj = c_adj, G.df = cdf, t.df = tdf)), + 2*pt(-abs(my_tstat), df)) + + # cat("adj = ", adj, " ; K.fixef = ", k_val, " ; G.adj = ", c_adj, " t.df = ", tdf, "\n", sep = "") + } + } + } + } + } > > > # > # Comparison with sandwich and plm > # > > # Data generation > set.seed(0) > N = 20 ; G = N / 5 ; T = N / G > d = data.frame(y = rnorm(N), x = rnorm(N), grp = rep(1:G, T), tm = rep(1:T, each = G)) > > # Estimations > est_lm = lm(y ~ x + as.factor(grp) + as.factor(tm), data=d) > est_feols = feols(y ~ x | grp + tm, data = d, vcov = "cluster") > > # > # Standard > # > > test(se(est_feols, se = "st")["x"], se(est_lm)["x"]) > > # > # Heteroskedasticity-robust > # > > se_white_lm_HC1 = sqrt(vcovHC(est_lm, type = "HC1")["x", "x"]) > se_white_lm_HC0 = sqrt(vcovHC(est_lm, type = "HC0")["x", "x"]) > > test(se(est_feols, se = "hetero"), se_white_lm_HC1) > test(se(est_feols, se = "hetero", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), se_white_lm_HC0) > > > # > # Clustered > # > > # Clustered by grp > se_CL_grp_lm_HC1 = sqrt(vcovCL(est_lm, cluster = d$grp, type = "HC1")["x", "x"]) > se_CL_grp_lm_HC0 = sqrt(vcovCL(est_lm, cluster = d$grp, type = "HC0")["x", "x"]) > > # How to get the lm > test(se(est_feols, ssc = ssc(K.fixef = "full")), se_CL_grp_lm_HC1) > test(se(est_feols, ssc = ssc(K.adj = FALSE, K.fixef = "full")), se_CL_grp_lm_HC0) > > # > # Two way > # > > # Clustered by grp & tm > se_CL_2w_lm = sqrt(vcovCL(est_lm, cluster = ~ grp + tm, type = "HC1")["x", "x"]) > se_CL_2w_feols = se(est_feols, se = "twoway") > > test(se(est_feols, se = "twoway", ssc = ssc(K.fixef = "full", G.df = "conv")), se_CL_2w_lm) > > # > # HC2/HC3 > # > > # HC2/HC3 > base = iris > base$w = runif(nrow(base)) > > est_feols = feols(Sepal.Length ~ Sepal.Width | Species, base) > est_lm = lm(Sepal.Length ~ Sepal.Width + factor(Species), base) > > test( + vcov(est_feols, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_lm, "HC2")["Sepal.Width", "Sepal.Width"] + ) > > test( + vcov(est_feols, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_lm, "HC3")["Sepal.Width", "Sepal.Width"] + ) > > # HC2/HC3 with weights > base = iris > base$w = runif(nrow(base)) > > est_feols = feols(Sepal.Length ~ Sepal.Width | Species, base, weights = ~ w) > est_lm = lm(Sepal.Length ~ Sepal.Width + factor(Species), base, weights = base$w) > > test( + vcov(est_feols, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_lm, "HC2")["Sepal.Width", "Sepal.Width"] + ) > > test( + vcov(est_feols, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_lm, "HC3")["Sepal.Width", "Sepal.Width"] + ) > > # HC2/HC3 with GLM > base$Sepal.Length = floor(base$Sepal.Length) > est_feglm = feglm( + Sepal.Length ~ Sepal.Width | Species, base, + "poisson", weights = ~ w + ) > est_glm = glm( + Sepal.Length ~ Sepal.Width + factor(Species), base, + family = poisson(), weights = base$w + ) > > test( + vcov(est_feglm, "hc2", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_glm, "HC2")["Sepal.Width", "Sepal.Width"] + ) > test( + vcov(est_feglm, "hc3", ssc = ssc(K.adj = FALSE, G.adj = FALSE)), + sandwich::vcovHC(est_glm, "HC3")["Sepal.Width", "Sepal.Width"] + ) > > # Fail when P_ii = 1 > base$Species = as.character(base$Species) > base[1, "Species"] = "foo" > > est_pii_singular = feols(Sepal.Length ~ Sepal.Width | Species, base, fixef.rm = "none") > > test(vcov(est_pii_singular, "hc2"), "err") > > # > # Newey-west + Driscoll-Kraay > # > > if(requireNamespace("plm", quietly = TRUE)){ + + # library(plm) + data(Grunfeld, package = "plm") + + est_panel_plm = plm::plm(inv ~ capital + as.factor(year), Grunfeld, + index = c("firm", "year"), model = "within") + est_panel_feols = feols(inv ~ capital | firm + year, Grunfeld, + panel.id = ~firm + year) + + # NW + se_plm_NW = se(plm::vcovNW(est_panel_plm))["capital"] + test(se(est_panel_feols, vcov = NW ~ ssc(adj = FALSE, cluster.adj = FALSE)), se_plm_NW) + + # DK + se_plm_DK = se(plm::vcovSCC(est_panel_plm))["capital"] + test(se(est_panel_feols, vcov = DK ~ ssc(adj = FALSE, cluster.adj = FALSE)), se_plm_DK) + } > > # > # Checking the calls work properly > # > > data(trade) > > est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination, trade) > > se_clust = se(est_pois, se = "cluster", cluster = "Product") > test(se(est_pois, cluster = trade$Product), se_clust) > test(se(est_pois, cluster = ~Product), se_clust) > > se_two = se(est_pois, se = "twoway", cluster = trade[, c("Product", "Destination")]) > test(se_two, se(est_pois, cluster = c("Product", "Destination"))) > test(se_two, se(est_pois, cluster = ~Product+Destination)) > > se_clu_comb = se(est_pois, cluster = "Product^Destination") > test(se_clu_comb, se(est_pois, cluster = paste(trade$Product, trade$Destination))) > test(se_clu_comb, se(est_pois, cluster = ~Product^Destination)) > > se_two_comb = se(est_pois, cluster = c("Origin^Destination", "Product")) > test(se_two_comb, se(est_pois, cluster = list(paste(trade$Origin, trade$Destination), + trade$Product))) > test(se_two_comb, se(est_pois, cluster = ~Origin^Destination + Product)) > > # With cluster removed > base = trade > base$Euros[base$Origin == "FR"] = 0 > est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination, base) > > se_clust = se(est_pois, se = "cluster", cluster = "Product") > test(se(est_pois, cluster = base$Product), se_clust) > test(se(est_pois, cluster = ~Product), se_clust) > > se_two = se(est_pois, se = "twoway", cluster = base[, c("Product", "Destination")]) > test(se_two, se(est_pois, cluster = c("Product", "Destination"))) > test(se_two, se(est_pois, cluster = ~Product+Destination)) > > se_clu_comb = se(est_pois, cluster = "Product^Destination") > test(se_clu_comb, se(est_pois, cluster = paste(base$Product, base$Destination))) > test(se_clu_comb, se(est_pois, cluster = ~Product^Destination)) > > se_two_comb = se(est_pois, cluster = c("Origin^Destination", "Product")) > test(se_two_comb, se(est_pois, cluster = list(paste(base$Origin, base$Destination), + base$Product))) > test(se_two_comb, se(est_pois, cluster = ~Origin^Destination + Product)) > > # With cluster removed and NAs > base = trade > base$Euros[base$Origin == "FR"] = 0 > base$Euros_na = base$Euros ; base$Euros_na[sample(nrow(base), 50)] = NA > base$Destination_na = base$Destination ; base$Destination_na[sample(nrow(base), 50)] = NA > base$Origin_na = base$Origin ; base$Origin_na[sample(nrow(base), 50)] = NA > base$Product_na = base$Product ; base$Product_na[sample(nrow(base), 50)] = NA > > est_pois = femlm(Euros ~ log(dist_km)|Origin+Destination_na, base) > > se_clust = se(est_pois, se = "cluster", cluster = "Product") > test(se(est_pois, cluster = base$Product), se_clust) > test(se(est_pois, cluster = ~Product), se_clust) > > se_two = se(est_pois, se = "twoway", cluster = base[, c("Product", "Destination")]) > test(se_two, se(est_pois, cluster = c("Product", "Destination"))) > test(se_two, se(est_pois, cluster = ~Product+Destination)) > > se_clu_comb = se(est_pois, cluster = "Product^Destination") > test(se_clu_comb, se(est_pois, cluster = paste(base$Product, base$Destination))) > test(se_clu_comb, se(est_pois, cluster = ~Product^Destination)) > > se_two_comb = se(est_pois, cluster = c("Origin^Destination", "Product")) > test(se_two_comb, se(est_pois, cluster = list(paste(base$Origin, base$Destination), + base$Product))) > test(se_two_comb, se(est_pois, cluster = ~Origin^Destination + Product)) malloc(): largebin double linked list corrupted (bk) Aborted * checking for unstated dependencies in vignettes ... OK * checking package vignettes ... OK * checking re-building of vignette outputs ... [21s/27s] OK * checking PDF version of manual ... [14s/14s] OK * checking HTML version of manual ... [7s/7s] OK * checking for non-standard things in the check directory ... OK * checking for detritus in the temp directory ... OK * DONE Status: 1 ERROR