library( "sampleSelection" ) options( digits = 3 ) ## Wooldridge( 2003 ): example 17.5, page 590 data(Mroz87) myProbit <- glm( lfp ~ nwifeinc + educ + exper + I( exper^2 ) + age + kids5 + kids618, family = binomial( link = "probit" ), data=Mroz87 ) Mroz87$IMR <- invMillsRatio( myProbit )$IMR1 myHeckit <- lm( log( wage ) ~ educ + exper + I( exper^2 ) + IMR, data = Mroz87[ Mroz87$lfp == 1, ] ) # using NO labor force participation as endogenous variable Mroz87$nolfp <- 1 - Mroz87$lfp myProbit2 <- glm( nolfp ~ nwifeinc + educ + exper + I( exper^2 ) + age + kids5 + kids618, family = binomial( link = "probit" ), data=Mroz87 ) all.equal( invMillsRatio( myProbit )$IMR1, invMillsRatio( myProbit2 )$IMR0 ) # should be true # example for bivariate probit suppressPackageStartupMessages( library( "mvtnorm" ) ) suppressPackageStartupMessages( library( "VGAM" ) ) set.seed( 321 ) nObs <- 10000 # error terms (trivariate normal) sigma <- symMatrix( c( 2, 0.7, 1.2, 1, 0.5, 1 ) ) myData <- as.data.frame( rmvnorm( nObs, c( 0, 0, 0 ), sigma ) ) names( myData ) <- c( "e0", "e1", "e2" ) # exogenous variables (indepently normal) myData$x0 <- rnorm( nObs ) myData$x1 <- rnorm( nObs ) myData$x2 <- rnorm( nObs ) # endogenous variables myData$y0 <- -1.5 + 0.8 * myData$x1 + myData$e0 myData$y1 <- ( 0.3 + 0.4 * myData$x1 + 0.3 * myData$x2 + myData$e1 ) > 0 myData$y2 <- ( -0.1 + 0.6 * myData$x1 + 0.7 * myData$x2 + myData$e2 ) > 0 # bivariate probit (using rhobit transformation) bProbit <- vglm( cbind( y1, y2 ) ~ x1 + x2, family = binom2.rho, data = myData ) summary( bProbit ) # inverse Mills Ratios imr <- invMillsRatio( bProbit ) # tests # E[ e0 | y1* > 0 & y2* > 0 ] mean( myData$e0[ myData$y1 & myData$y2 ] ) mean( sigma[1,2] * imr$IMR11a + sigma[1,3] * imr$IMR11b, na.rm = TRUE ) # E[ e0 | y1* > 0 & y2* <= 0 ] mean( myData$e0[ myData$y1 & !myData$y2 ] ) mean( sigma[1,2] * imr$IMR10a + sigma[1,3] * imr$IMR10b, na.rm = TRUE ) # E[ e0 | y1* <= 0 & y2* > 0 ] mean( myData$e0[ !myData$y1 & myData$y2 ] ) mean( sigma[1,2] * imr$IMR01a + sigma[1,3] * imr$IMR01b, na.rm = TRUE ) # E[ e0 | y1* <= 0 & y2* <= 0 ] mean( myData$e0[ !myData$y1 & !myData$y2 ] ) mean( sigma[1,2] * imr$IMR00a + sigma[1,3] * imr$IMR00b, na.rm = TRUE ) # E[ e0 | y1* > 0 ] mean( myData$e0[ myData$y1 ] ) mean( sigma[1,2] * imr$IMR1X, na.rm = TRUE ) # E[ e0 | y1* <= 0 ] mean( myData$e0[ !myData$y1 ] ) mean( sigma[1,2] * imr$IMR0X, na.rm = TRUE ) # E[ e0 | y2* > 0 ] mean( myData$e0[ myData$y2 ] ) mean( sigma[1,3] * imr$IMRX1, na.rm = TRUE ) # E[ e0 | y2* <= 0 ] mean( myData$e0[ !myData$y2 ] ) mean( sigma[1,3] * imr$IMRX0, na.rm = TRUE ) # estimation for y1* > 0 and y2* > 0 selection <- myData$y1 & myData$y2 # OLS estimation ols11 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols11 ) # heckman type estimation heckit11 <- lm( y0 ~ x1 + IMR11a + IMR11b, data = cbind( myData, imr ), subset = selection ) summary( heckit11 ) # estimation for y1* > 0 and y2* <= 0 selection <- myData$y1 & !myData$y2 # OLS estimation ols10 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols10 ) # heckman type estimation heckit10 <- lm( y0 ~ x1 + IMR10a + IMR10b, data = cbind( myData, imr ), subset = selection ) summary( heckit10 ) # estimation for y1* <= 0 and y2* > 0 selection <- !myData$y1 & myData$y2 # OLS estimation ols01 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols01 ) # heckman type estimation heckit01 <- lm( y0 ~ x1 + IMR01a + IMR01b, data = cbind( myData, imr ), subset = selection ) summary( heckit01 ) # estimation for y1* <= 0 and y2* <= 0 selection <- !myData$y1 & !myData$y2 # OLS estimation ols00 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols00 ) # heckman type estimation heckit00 <- lm( y0 ~ x1 + IMR00a + IMR00b, data = cbind( myData, imr ), subset = selection ) summary( heckit00 ) # estimation for y1* > 0 selection <- myData$y1 # OLS estimation ols1X <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols1X ) # heckman type estimation heckit1X <- lm( y0 ~ x1 + IMR1X, data = cbind( myData, imr ), subset = selection ) summary( heckit1X ) # estimation for y1* <= 0 selection <- !myData$y1 # OLS estimation ols0X <- lm( y0 ~ x1, data = myData, subset = selection ) summary( ols0X ) # heckman type estimation heckit0X <- lm( y0 ~ x1 + IMR0X, data = cbind( myData, imr ), subset = selection ) summary( heckit0X ) # estimation for y2* > 0 selection <- myData$y2 # OLS estimation olsX1 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( olsX1 ) # heckman type estimation heckitX1 <- lm( y0 ~ x1 + IMRX1, data = cbind( myData, imr ), subset = selection ) summary( heckitX1 ) # estimation for y2* <= 0 selection <- !myData$y2 # OLS estimation olsX0 <- lm( y0 ~ x1, data = myData, subset = selection ) summary( olsX0 ) # heckman type estimation heckitX0 <- lm( y0 ~ x1 + IMRX0, data = cbind( myData, imr ), subset = selection ) summary( heckitX0 )