# library(DescTools) # library(haven) #for read_dta # library(MASS) #for polr # library(nnet) #for multinom # library(VGAM) # library(plyr) #for testing recode of factor, using revalue # # source("R/LinMod.R") # # # # ==== notes === # # #checks needed: # # A) Ensure data parameter should work it is 1) explicitly defined, 2 found in environment, 3) not found # # B) Check "special" parameters (substitute, weight, and na.action parameters) # # C) check non-literal variables # # hsb2 <- as.data.frame(read_dta("https://stats.idre.ucla.edu/stat/stata/notes/hsb2.dta")) # hsb2$honcomp <- hsb2$write >= 60 # # hsb2$write_cat <- cut(hsb2$write, breaks = c(30,40,50,60,70)) # hsb2$race_cat <- factor(hsb2$race) # # # # ==== GLM ==== # # #"Data" and "model" object components are both usable (we give priority to model) # base.logit <- glm(honcomp ~ female + read + science + ses, hsb2, family="binomial") # PseudoR2(base.logit) # # #"Data" object is reference to global environment (but we have a model object) # base2.logit <- glm(hsb2$honcomp ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, family="binomial") # PseudoR2(base2.logit) # # #POSSIBLE ISSUE: no model object (only data), and a non-literal DV (eg, read > 60) # #A1 tests are covered above # # #A2a - variables in global environment # y <- hsb2$honcomp # test_a2.logit <- glm(y ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, family="binomial", model = FALSE) # PseudoR2(test_a2.logit) # #NB: doesn't give useful name of object that needs new evaluation # # #A3a - "data" object is reference to global environment # z <- hsb2$honcomp # test_a3a.logit <- glm(z ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, family="binomial", model = FALSE, x = TRUE, y = TRUE) # rm(z) # PseudoR2(test_a3a.logit) # # #A3b - "data" object is contained in data frame # tempdf <- hsb2 # test_a3b.logit <- glm(honcomp ~ female + read + science + ses, tempdf, family="binomial", model = FALSE) # PseudoR2(test_a3b.logit) # tempdf <- tempdf[1:100,] # PseudoR2(test_a3b.logit) # rm(tempdf) # PseudoR2(test_a3b.logit) # # # # ---- B ---- # #WEIGHTS # #Weights are created on-the-fly via runif # test_b1.logit <- glm(honcomp ~ female + read + science + ses, hsb2, family="binomial", weights = runif(nrow(hsb2)), model = FALSE) # PseudoR2(test_b1.logit) # PseudoR2(test_b1.logit) # # #Weights are saved # test_weights <- runif(nrow(hsb2)) # test_b2.logit <- glm(honcomp ~ female + read + science + ses, data = hsb2, family="binomial", weights = test_weights, model = FALSE) # rm(test_weights) # PseudoR2(test_b2.logit) # # withna.df <- rbind(hsb2[1:100,], NA, NA, hsb2[101:200,]) # test_b3.logit <- glm(honcomp ~ female + read + science + ses, data = withna.df, family="binomial", weights = runif(nrow(withna.df)), model = FALSE) # PseudoR2(test_b3.logit) # # #NA.ACTION # #Could try using the na.omit attribute of glms here to handle, but it's a lot of work for little return # test_b4.logit <- glm(honcomp ~ female + read + science + ses, data = rbind(hsb2, NA), family="binomial", model = FALSE, na.action = na.omit) # PseudoR2(test_b4.logit) # # test_naAction <- na.omit # test_b5.logit <- glm(honcomp ~ female + read + science + ses, data = rbind(hsb2, NA), family="binomial", model = TRUE, na.action = test_naAction) # rm(test_naAction) # PseudoR2(test_b5.logit) # # test_naAction <- na.omit # test_b6.logit <- glm(honcomp ~ female + read + science + ses, data = rbind(hsb2, NA), family="binomial", model = FALSE, na.action = test_naAction) # rm(test_naAction) # PseudoR2(test_b6.logit) # # # # ---- C ---- # # #DV, With model # test_c1.logit <- glm((hsb2$write_cat == "(30,40]" | hsb2$write_cat == "(40,50]") ~ female + read + science + ses, hsb2, family="binomial") # PseudoR2(test_c1.logit) # # #DV, without model, out of data frame # test_c2.logit <- glm((hsb2$write_cat == "(30,40]" | hsb2$write_cat == "(40,50]") ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, family="binomial", model = FALSE) # PseudoR2(test_c2.logit) # # #DV, without model, in data frmae # test_c3.logit <- glm((write_cat == "(30,40]" | hsb2$write_cat == "(40,50]") ~ female + read + science + ses, hsb2, family="binomial", model = FALSE) # PseudoR2(test_c3.logit) # # #IV, without model, no data frame # test_c4.logit <- glm(hsb2$honcomp ~ hsb2$female + (hsb2$read > 50) + hsb2$science + hsb2$ses, family="binomial", model = FALSE) # PseudoR2(test_c4.logit) # # #IV, without model, with data frame # test_c5.logit <- glm(honcomp ~ female + (read > 50) + science + ses, family="binomial", data = hsb2, model = FALSE) # PseudoR2(test_c5.logit) # # # # ==== POLR ==== # # #"Data" and "model" object components are both usable (we give priority to model) # base.polr <- polr(write_cat ~ female + read + science + ses, hsb2) # PseudoR2(base.polr) # # # ---- A ---- # # #A1: polr does not return "data" component, so explicit reference is impossible # # #A2: polr object "call" component references valid data frame # #Unlike in glm, polr doesn't save a data object # test_a2.polr <- polr(write_cat ~ female + read + science + ses, hsb2, model = FALSE) # PseudoR2(test_a2.polr) # # #POSSIBLE ISSUE: no model object (only data), and a non-literal DV (eg, read > 60) # # #A3 # #"call" references objects in global environment # y <- cut(hsb2$write, breaks = c(30,40,50,60,70)) # test_a3a.polr <- polr(y ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_a3a.polr) # # #A3b - "call" references invalid data frame # tempdf <- hsb2 # test_a3b.polr <- polr(write_cat ~ female + read + science + ses, data = tempdf, model = FALSE) # rm(tempdf) # PseudoR2(test_a3b.polr) # # # # # ---- B ---- # #WEIGHTS # #Weights are created on-the-fly via runif # test_b1.polr <- polr(write_cat ~ female + read + science + ses, hsb2, weights = runif(nrow(hsb2)), model = FALSE) # PseudoR2(test_b1.polr) # # #Weights are saved # test_weights <- runif(nrow(hsb2)) # test_b2.polr <- polr(write_cat ~ female + read + science + ses, weights = test_weights, data = hsb2, model = FALSE) # PseudoR2(test_b2.polr) # rm(test_weights) # PseudoR2(test_b2.polr) # # withna.df <- rbind(hsb2[1:100,], NA, NA, hsb2[101:200,]) # test_b3.polr <- polr(write_cat ~ female + read + science + ses, data = withna.df, weights = runif(nrow(withna.df)), model = FALSE) # PseudoR2(test_b3.polr) # # #NA.ACTION # test_b4.polr <- polr(write_cat ~ female + read + science + ses, data = rbind(hsb2, NA), model = FALSE, na.action = na.omit) # PseudoR2(test_b4.polr) # # test_naAction <- na.omit # test_b5.polr <- polr(write_cat~ female + read + science + ses, data = rbind(hsb2, NA), model = TRUE, na.action = test_naAction) # PseudoR2(test_b5.polr) # test_naAction <- na.fail # PseudoR2(test_b5.polr) # # test_naAction <- na.omit # test_b6.logit <- glm(honcomp ~ female + read + science + ses, data = rbind(hsb2, NA), family="binomial", model = FALSE, na.action = test_naAction) # PseudoR2(test_b6.logit) # test_naAction <- na.fail # PseudoR2(test_b6.logit) # # # ---- C ---- # # # #DV, without model, out of data frame # test_c1.polr <- polr(revalue(write_cat, c("(30,40]" = "(30,50]", "(40,50]" = "(30,50]")) ~ female + read + science + ses, hsb2, model = FALSE) # PseudoR2(test_c1.polr) # # #DV, without model, in data frmae # test_c2.polr <- polr(revalue(hsb2$write_cat, c("(30,40]" = "(30,50]", "(40,50]" = "(30,50]")) ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_c2.polr) # # #IV, without model, no data frame # test_c3.polr <- polr(hsb2$write_cat ~ hsb2$female + (hsb2$read > 50) + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_c3.polr) # # #IV, without model, with data frame # test_c4.polr <- polr(write_cat ~ female + (read > 50) + science + ses, hsb2, model = FALSE) # PseudoR2(test_c4.polr) # # # # ==== Multinom ==== # # #"Data" and "model" object components are both usable (we give priority to model) # base.multinom <- multinom(race_cat ~ female + read + science + ses, hsb2, model = TRUE) # PseudoR2(base.multinom) # # # ---- A ---- # # #A1: multinom does not return "data" component, so expciCit reference is impossible # # #A2: multinom object "call" component references valid data frame # test_a2.multinom <- multinom(race_cat ~ female + read + science + ses, hsb2, model = FALSE) # PseudoR2(test_a2.multinom) # # #A3 # #Unlike in glm, multinom won't save an enviornment labelled as "data" # #"call" references objects in global environment # #NOTE: could theoretically get the variables from call$formula, althiugh this would be risky # y_nominal <- hsb2$race_cat # test_a3a.multinom <- multinom(y_nominal ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_a3a.multinom) # # #A3b - "call" references invalid data frame # tempdf <- hsb2 # test_a3b.multinom <- multinom(race_cat ~ female + read + science + ses, data = tempdf, model = FALSE) # PseudoR2(test_a3b.multinom) # rm(tempdf) # PseudoR2(test_a3b.multinom) # # # # # ---- B ---- # #WEIGHTS # #Weights are created on-the-fly via runif # #Multinom saves a "weights" element, which is equivalent to the glm "prior.weights" element # test_b1.multinom <- multinom(race_cat ~ female + read + science + ses, hsb2, weights = runif(nrow(hsb2)), model = TRUE) # PseudoR2(test_b1.multinom) # # #Weights are saved # test_weights <- runif(nrow(hsb2)) # test_b2.multinom <- multinom(race_cat ~ female + read + science + ses, weights = test_weights, data = hsb2, model = FALSE) # PseudoR2(test_b2.multinom) # rm(test_weights) # PseudoR2(test_b2.multinom) # # withna.df <- rbind(hsb2[1:100,], NA, NA, hsb2[101:200,]) # test_b3.multinom <- multinom(race_cat ~ female + read + science + ses, data = withna.df, weights = runif(nrow(withna.df)), model = FALSE) # PseudoR2(test_b3.multinom) # # #NA.ACTION # test_b4.multinom <- multinom(race_cat ~ female + read + science + ses, data = rbind(hsb2, NA), model = FALSE, na.action = na.omit) # PseudoR2(test_b4.multinom) # # test_naAction <- na.omit # test_b5.multinom <- multinom(race_cat ~ female + read + science + ses, data = rbind(hsb2, NA), model = TRUE, na.action = test_naAction) # PseudoR2(test_b5.multinom) # test_naAction <- na.fail # PseudoR2(test_b5.multinom) # # #QUIETLY RE-FIT MULTINOM # # # ---- C ---- # # #DV, With model # test_c1.multinom <- multinom(revalue(race_cat, c("1" = "1", "2" = "1")) ~ female + read + science + ses, hsb2, model = TRUE) # PseudoR2(test_c1.multinom) # # #DV, without model, out of data frame # test_c2.multinom <- multinom(revalue(hsb2$race_cat, c("1" = "1", "2" = "1")) ~ hsb2$female + hsb2$read + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_c2.multinom) # # #DV, without model, in data frame # test_c2.multinom <- multinom(revalue(race_cat, c("1" = "1", "2" = "1")) ~ female + read + science + ses, hsb2, model = FALSE) # PseudoR2(test_c2.multinom) # # #IV, without model, no data frame # test_c4.multinom <- multinom(hsb2$race_cat ~ hsb2$female + (hsb2$read > 50) + hsb2$science + hsb2$ses, model = FALSE) # PseudoR2(test_c4.multinom) # # #IV, without model, with data frame # test_c5.multinom <- multinom(race_cat ~ female + (read > 50) + science + ses, data = hsb2, model = FALSE) # PseudoR2(test_c5.multinom) # # # # # === VGLM ==== # # #Because of the very wide variety of possible VGLM models and related parameters + functional forms, we can't easily take the same testing approach as above # #We'll instead starty by testing the models listed in the VGAM help file # # # Example 1. See help(glm) # print(d.AD <- data.frame(treatment = gl(3, 3), # outcome = gl(3, 1, 9), # counts = c(18,17,15,20,10,20,25,13,12))) # vglm.D93 <- vglm(counts ~ outcome + treatment, family = poissonff, # data = d.AD, trace = TRUE, model = TRUE) # summary(vglm.D93) # PseudoR2(vglm.D93) # # # # Example 2. Multinomial logit model # pneumo <- transform(pneumo, let = log(exposure.time)) # vglm.pneumo <- vglm(cbind(normal, mild, severe) ~ let, multinomial, data = pneumo, model = TRUE) # PseudoR2(vglm.pneumo) # # # Example 3. Proportional odds model # fit3 <- vglm(cbind(normal, mild, severe) ~ let, propodds, data = pneumo, model = TRUE) # PseudoR2(fit3) # # # Example 4. Bivariate logistic model # fit4 <- vglm(cbind(nBnW, nBW, BnW, BW) ~ age, binom2.or, coalminers, model = TRUE) # PseudoR2(fit4) # # # # Example 5. The use of the xij argument (simple case). # # The constraint matrix for 'op' has one column. # nn <- 1000 # eyesdat <- round(data.frame(lop = runif(nn), # rop = runif(nn), # op = runif(nn)), digits = 2) # eyesdat <- transform(eyesdat, eta1 = -1 + 2 * lop, # eta2 = -1 + 2 * lop) # eyesdat <- transform(eyesdat, # leye = rbinom(nn, size = 1, prob = logit(eta1, inverse = TRUE)), # reye = rbinom(nn, size = 1, prob = logit(eta2, inverse = TRUE))) # fit5 <- vglm(cbind(leye, reye) ~ op, # binom2.or(exchangeable = TRUE, zero = 3), # data = eyesdat, trace = TRUE, # xij = list(op ~ lop + rop + fill(lop)), # form2 = ~ op + lop + rop + fill(lop), # model = TRUE) # PseudoR2(fit5)