#
# library(haven)
# library(data.table)
# library(sdcLog)
#
# path_dta <- list.files(
#     pattern = "nobsdes5xmpl.dta", full.names = TRUE, recursive = TRUE
# )
# dt <- setDT(read_dta(path_dta))
#
# # 2. Applying nobsdes2
# ## NOBSDES2 without options
# options(sdc.info_level = 2L)
# sdc_descriptives(dt, "id")
# sdc_descriptives(dt, "id", "x")
#
#
# ## How to use the BY - option
# res <- sdc_descriptives(dt, "id", "x", by = "year")
# sdc_descriptives(dt, "id", "x", by = "year")
#
# dt[year != 2002, .(`sum(x)` = sum(x, na.rm = TRUE)), by = "year"][order(year)]
#
# dt[, year_bucket := fifelse(year %% 2L == 1L, year + 1L, year)
# ][, .(`sum(x)` = sum(x, na.rm = TRUE)), keyby = year_bucket]
#
# sdc_descriptives(dt, "id", "x", by = "year_bucket")
#
#
# ## GEN - option
# res <- sdc_descriptives(dt, "id", "x", by = "year")
# merge(
#     dt[,.(`sum(x)` = sum(x, na.rm = TRUE)), keyby = year],
#     merge(
#         as.data.table(res$distinct_ids[distinct_ids >= 5]),
#         as.data.table(res$dominance),
#         by = "year"
#     )
# )
#
#
# ## BYEST - option
# dt[, quantiles := cut(xpc, breaks = quantile(xpc, seq(0, 1, 0.1), na.rm = TRUE, type = 6L))]
# res <- sdc_descriptives(dt, "id", "xpc", by = "quantiles")
# na.omit(as.data.table(merge(res$distinct_ids, res$dominance))[order(quantiles)])
#
#
# ## ZEROS
# res <- sdc_descriptives(dt, "id", "xwithzeros", by = "year")
# res <- sdc_descriptives(dt, "id", "xwithzeros", by = "year", zero_as_NA = FALSE)
# as.data.table(res$distinct_ids)
#
#
# ## DOMINANCE
# DT <- cube(dt[!is.na(xfordom)], j = sum(xfordom), by = c("land", "id"), id = TRUE)
# dcast(DT[grouping <= 1], id ~ land, value.var = "V1")[order(id)]
#
# sdc_descriptives(dt, "id", "xfordom", by = "land")
#
#
# ## MAXIMUM and MINIMUM
# sdc_min_max(dt, "id", "x")
#
# ## PERCENTILES
# # sdc_percentiles() would be helpful suppress percentile labels
# dt[year == 2010L, percentiles := cut(xpc, unique(quantile(xpc, probs = c(0, .05, .50, .90, 1), type = 6, na.rm = TRUE)))]
# res <- sdc_descriptives(dt[year == 2010], "id", val_var = "xpc", by = "percentiles")
# as.data.table(merge(res$distinct_ids, res$dominance))[order(percentiles)]
#
# dt[, percentiles := cut(xpc, unique(quantile(xpc, probs = c(0, .05, .50, .90, 1), type = 6, na.rm = TRUE)))]
# res <- sdc_descriptives(dt, "id", val_var = "xpc", by = "percentiles")
# as.data.table(merge(res$distinct_ids, res$dominance))[order(percentiles)]
#
# dt[, percentiles := cut(xpc, quantile(xpc, probs = seq(0, 1, .1), type = 6, na.rm = TRUE), right = FALSE,l)]
# res <- sdc_descriptives(dt, "id", "xpc", by = "percentiles")
#
#
# ## Creating GRAPHS
# options(sdc.id_var = "id")
# res <- sdc_descriptives(dt, val_var = "x", by = c("year", "land"))
# plot_dt <- as.data.table(merge(res$distinct_ids, res$dominance))[distinct_ids >= 5 & value_share <= 0.85]
# plot_dt <- merge(plot_dt, dt)
# plot(plot_dt[land == "BE", .(`mean(x)` = mean(x, na.rm = TRUE)), by = "year"])
#
#
# ## Creating HISTOGRAMS
# dt[, bins := cut(xpc, breaks = 10L)]
# sdc_descriptives(dt, val_var = "xpc", by = "bins")
# ggplot(na.omit(dt[, .N, by = bins])) +
#     aes(x = bins, y = N) +
#     geom_col()
#
#
# ## Aggregating Data
# ## not relevant for R
# }