R Under development (unstable) (2024-03-07 r86063 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Simulate a mixed data set > # @param models a vector of models to simulate (note that proportion free or > # proportion equal models will be treated in the same mannear) > # @param params parameters to used, for each model, the list provides a vector > # with the number of variables and the proportion of missing values > # @param nbSample size of the sample to simulate (not used if z is given) > # @param nbCluster number of class to simulate (not used if prop is given) > # @param z vector giving the classes of each sample > # @param prop vector with the probabilities of each classes (not used if z is given) > # > # @return two lists > simulMixedData <- function(models, params, nbSample, nbCluster=2, z = NULL, prop = NULL) + { + # check + if (!is.vector(models) < 2) { stop("models has to be a vector")} + if (!is.list(params) < 2) { stop("params has to be a list")} + if (length(models) != length(params)) { stop("params and models must have the same size")} + if (nbCluster < 2) { stop("The number of clusters must be greater or equal to 2")} + + # check if we have to compute z or it is already given + if (is.null((z))) + { + if (nbSample < nbCluster) { stop("The number of sample must be greater to the number of class")} + if(is.null(prop)) { prop=rep(1/nbCluster, length.out=nbCluster); } + z <- sample.int(1:nbCluster, size=nbSample, replace = TRUE, prob= prop); + } + else + { nbSample <- length(z);} + + res <- vector("list", length = length(models)+1); + names(res) <- c("z", models); + res$z <- z; + for (i in 1:length(models)) + { + # get current model and parameters + if (is.list(models)) { model <- models[[i]];} + else { model <- models[i];} + param <- params[[i]]; + if (clusterValidCategoricalNames(model)) + { + # simulate Categorical + all = c( "categorical_pk_pjk", "categorical_pk_pk", "categorical_p_pjk", "categorical_p_pk") + + } + else if (clusterValidDiagGaussianNames(models[i])) + { + # simulate Gaussian + all = c( "gaussian_pk_sjk", "gaussian_pk_sj", "gaussian_pk_sk", "gaussian_pk_s" + , "gaussian_p_sjk", "gaussian_p_sj", "gaussian_p_sk", "gaussian_p_s") + } + else if (clusterValidGammaNames(models[i])) + { + # simulate Gamma + all = c( "gamma_p_ajk_bjk", "gamma_p_ajk_bk", "gamma_p_ajk_bj", "gamma_p_ajk_b" + , "gamma_p_ak_bjk", "gamma_p_ak_bk", "gamma_p_ak_bj", "gamma_p_ak_b" + , "gamma_p_aj_bjk", "gamma_p_aj_bk" + , "gamma_p_a_bjk", "gamma_p_a_bk" + , "gamma_pk_ajk_bjk", "gamma_pk_ajk_bk", "gamma_pk_ajk_bj", "gamma_pk_ajk_b" + , "gamma_pk_ak_bjk", "gamma_pk_ak_bk", "gamma_pk_ak_bj", "gamma_pk_ak_b" + , "gamma_pk_aj_bjk", "gamma_pk_aj_bk" + , "gamma_pk_a_bjk", "gamma_pk_a_bk" + ) + } + else if (clusterValidPoissonNames(models[i])) + { + # simulate Poisson + all = c( "poisson_pk_ljk", "poisson_pk_lk", "poisson_pk_ljlk", "poisson_p_ljk", "poisson_p_lk", "poisson_p_ljlk") + } + else + { + stop("Invalid model name") + } + } + res + } > > proc.time() user system elapsed 0.14 0.09 0.21