R Under development (unstable) (2026-01-18 r89306 ucrt) -- "Unsuffered Consequences" Copyright (C) 2026 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > library( "sampleSelection" ) Loading required package: maxLik Loading required package: miscTools Please cite the 'maxLik' package as: Henningsen, Arne and Toomet, Ott (2011). maxLik: A package for maximum likelihood estimation in R. Computational Statistics 26(3), 443-458. DOI 10.1007/s00180-010-0217-1. If you have questions, suggestions, or comments regarding the 'maxLik' package, please use a forum or 'tracker' at maxLik's R-Forge site: https://r-forge.r-project.org/projects/maxlik/ > options( digits = 3 ) > > ## Wooldridge( 2003 ): example 17.5, page 590 > data(Mroz87) > myProbit <- glm( lfp ~ nwifeinc + educ + exper + I( exper^2 ) + age + + kids5 + kids618, family = binomial( link = "probit" ), data=Mroz87 ) > Mroz87$IMR <- invMillsRatio( myProbit )$IMR1 > myHeckit <- lm( log( wage ) ~ educ + exper + I( exper^2 ) + IMR, + data = Mroz87[ Mroz87$lfp == 1, ] ) > > # using NO labor force participation as endogenous variable > Mroz87$nolfp <- 1 - Mroz87$lfp > myProbit2 <- glm( nolfp ~ nwifeinc + educ + exper + I( exper^2 ) + age + + kids5 + kids618, family = binomial( link = "probit" ), data=Mroz87 ) > all.equal( invMillsRatio( myProbit )$IMR1, invMillsRatio( myProbit2 )$IMR0 ) [1] TRUE > # should be true > > # example for bivariate probit > suppressPackageStartupMessages( library( "mvtnorm" ) ) > suppressPackageStartupMessages( library( "VGAM" ) ) > set.seed( 321 ) > > nObs <- 10000 > > # error terms (trivariate normal) > sigma <- symMatrix( c( 2, 0.7, 1.2, 1, 0.5, 1 ) ) > myData <- as.data.frame( rmvnorm( nObs, c( 0, 0, 0 ), sigma ) ) > names( myData ) <- c( "e0", "e1", "e2" ) > > # exogenous variables (indepently normal) > myData$x0 <- rnorm( nObs ) > myData$x1 <- rnorm( nObs ) > myData$x2 <- rnorm( nObs ) > > # endogenous variables > myData$y0 <- -1.5 + 0.8 * myData$x1 + myData$e0 > myData$y1 <- ( 0.3 + 0.4 * myData$x1 + 0.3 * myData$x2 + myData$e1 ) > 0 > myData$y2 <- ( -0.1 + 0.6 * myData$x1 + 0.7 * myData$x2 + myData$e2 ) > 0 > > # bivariate probit (using rhobit transformation) > bProbit <- vglm( cbind( y1, y2 ) ~ x1 + x2, family = binom2.rho, + data = myData ) > summary( bProbit ) Call: vglm(formula = cbind(y1, y2) ~ x1 + x2, family = binom2.rho, data = myData) Coefficients: Estimate Std. Error z value Pr(>|z|) (Intercept):1 0.3031 0.0134 22.70 < 2e-16 *** (Intercept):2 -0.1004 0.0141 -7.14 9.6e-13 *** (Intercept):3 1.1049 0.0400 27.65 < 2e-16 *** x1:1 0.4050 0.0142 28.57 < 2e-16 *** x1:2 0.5743 0.0158 36.31 < 2e-16 *** x2:1 0.3108 0.0137 22.70 < 2e-16 *** x2:2 0.6796 0.0162 41.87 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Names of linear predictors: probitlink(mu1), probitlink(mu2), rhobitlink(rho) Log-likelihood: -10858 on 29993 degrees of freedom Number of Fisher scoring iterations: 2 > > # inverse Mills Ratios > imr <- invMillsRatio( bProbit ) > > # tests > # E[ e0 | y1* > 0 & y2* > 0 ] > mean( myData$e0[ myData$y1 & myData$y2 ] ) [1] 0.834 > mean( sigma[1,2] * imr$IMR11a + sigma[1,3] * imr$IMR11b, na.rm = TRUE ) [1] 0.836 > # E[ e0 | y1* > 0 & y2* <= 0 ] > mean( myData$e0[ myData$y1 & !myData$y2 ] ) [1] -0.361 > mean( sigma[1,2] * imr$IMR10a + sigma[1,3] * imr$IMR10b, na.rm = TRUE ) [1] -0.385 > # E[ e0 | y1* <= 0 & y2* > 0 ] > mean( myData$e0[ !myData$y1 & myData$y2 ] ) [1] 0.385 > mean( sigma[1,2] * imr$IMR01a + sigma[1,3] * imr$IMR01b, na.rm = TRUE ) [1] 0.397 > # E[ e0 | y1* <= 0 & y2* <= 0 ] > mean( myData$e0[ !myData$y1 & !myData$y2 ] ) [1] -0.915 > mean( sigma[1,2] * imr$IMR00a + sigma[1,3] * imr$IMR00b, na.rm = TRUE ) [1] -0.878 > # E[ e0 | y1* > 0 ] > mean( myData$e0[ myData$y1 ] ) [1] 0.402 > mean( sigma[1,2] * imr$IMR1X, na.rm = TRUE ) [1] 0.395 > # E[ e0 | y1* <= 0 ] > mean( myData$e0[ !myData$y1 ] ) [1] -0.642 > mean( sigma[1,2] * imr$IMR0X, na.rm = TRUE ) [1] -0.611 > # E[ e0 | y2* > 0 ] > mean( myData$e0[ myData$y2 ] ) [1] 0.755 > mean( sigma[1,3] * imr$IMRX1, na.rm = TRUE ) [1] 0.759 > # E[ e0 | y2* <= 0 ] > mean( myData$e0[ !myData$y2 ] ) [1] -0.686 > mean( sigma[1,3] * imr$IMRX0, na.rm = TRUE ) [1] -0.674 > > # estimation for y1* > 0 and y2* > 0 > selection <- myData$y1 & myData$y2 > # OLS estimation > ols11 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols11 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -3.913 -0.816 -0.024 0.755 4.200 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.5372 0.0208 -25.9 <2e-16 *** x1 0.4943 0.0202 24.4 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.18 on 3876 degrees of freedom Multiple R-squared: 0.133, Adjusted R-squared: 0.133 F-statistic: 597 on 1 and 3876 DF, p-value: <2e-16 > # heckman type estimation > heckit11 <- lm( y0 ~ x1 + IMR11a + IMR11b, data = cbind( myData, imr ), + subset = selection ) > summary( heckit11 ) Call: lm(formula = y0 ~ x1 + IMR11a + IMR11b, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -3.465 -0.783 -0.046 0.717 4.586 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.9881 0.2414 -4.09 4.4e-05 *** x1 0.6541 0.0628 10.41 < 2e-16 *** IMR11a -1.0083 0.8354 -1.21 0.23 IMR11b 1.4290 0.1430 9.99 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.12 on 3874 degrees of freedom Multiple R-squared: 0.218, Adjusted R-squared: 0.217 F-statistic: 360 on 3 and 3874 DF, p-value: <2e-16 > > # estimation for y1* > 0 and y2* <= 0 > selection <- myData$y1 & !myData$y2 > # OLS estimation > ols10 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols10 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -4.104 -0.730 0.009 0.738 4.747 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.8968 0.0239 -79.5 <2e-16 *** x1 0.4749 0.0271 17.6 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.11 on 2189 degrees of freedom Multiple R-squared: 0.123, Adjusted R-squared: 0.123 F-statistic: 308 on 1 and 2189 DF, p-value: <2e-16 > # heckman type estimation > heckit10 <- lm( y0 ~ x1 + IMR10a + IMR10b, data = cbind( myData, imr ), + subset = selection ) > summary( heckit10 ) Call: lm(formula = y0 ~ x1 + IMR10a + IMR10b, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -4.216 -0.627 0.064 0.664 3.912 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.7762 0.5802 -3.06 0.0022 ** x1 0.9096 0.0693 13.13 <2e-16 *** IMR10a 0.9884 0.5211 1.90 0.0580 . IMR10b 1.1651 0.1030 11.31 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.995 on 2187 degrees of freedom Multiple R-squared: 0.295, Adjusted R-squared: 0.294 F-statistic: 304 on 3 and 2187 DF, p-value: <2e-16 > > # estimation for y1* <= 0 and y2* > 0 > selection <- !myData$y1 & myData$y2 > # OLS estimation > ols01 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols01 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -2.815 -0.771 -0.014 0.748 3.411 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.0832 0.0382 -28.37 <2e-16 *** x1 0.3809 0.0402 9.47 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.09 on 823 degrees of freedom Multiple R-squared: 0.0983, Adjusted R-squared: 0.0972 F-statistic: 89.8 on 1 and 823 DF, p-value: <2e-16 > # heckman type estimation > heckit01 <- lm( y0 ~ x1 + IMR01a + IMR01b, data = cbind( myData, imr ), + subset = selection ) > summary( heckit01 ) Call: lm(formula = y0 ~ x1 + IMR01a + IMR01b, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -2.4883 -0.6542 0.0264 0.6345 3.0166 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.653 1.759 -0.94 0.35 x1 0.772 0.159 4.85 1.5e-06 *** IMR01a 0.619 1.043 0.59 0.55 IMR01b 1.214 0.134 9.03 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 0.943 on 821 degrees of freedom Multiple R-squared: 0.331, Adjusted R-squared: 0.329 F-statistic: 135 on 3 and 821 DF, p-value: <2e-16 > > # estimation for y1* <= 0 and y2* <= 0 > selection <- !myData$y1 & !myData$y2 > # OLS estimation > ols00 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols00 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -4.693 -0.821 0.027 0.842 3.965 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.5476 0.0242 -105.2 <2e-16 *** x1 0.5084 0.0233 21.8 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.21 on 3104 degrees of freedom Multiple R-squared: 0.133, Adjusted R-squared: 0.132 F-statistic: 475 on 1 and 3104 DF, p-value: <2e-16 > # heckman type estimation > heckit00 <- lm( y0 ~ x1 + IMR00a + IMR00b, data = cbind( myData, imr ), + subset = selection ) > summary( heckit00 ) Call: lm(formula = y0 ~ x1 + IMR00a + IMR00b, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -4.804 -0.761 0.051 0.780 3.798 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.91755 0.25434 -7.54 6.2e-14 *** x1 0.74220 0.05287 14.04 < 2e-16 *** IMR00a 0.00757 0.40554 0.02 0.99 IMR00b 1.57196 0.17513 8.98 < 2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.16 on 3102 degrees of freedom Multiple R-squared: 0.209, Adjusted R-squared: 0.209 F-statistic: 274 on 3 and 3102 DF, p-value: <2e-16 > > # estimation for y1* > 0 > selection <- myData$y1 > # OLS estimation > ols1X <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols1X ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -4.873 -0.886 -0.033 0.889 4.639 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.0686 0.0174 -61.6 <2e-16 *** x1 0.6738 0.0178 37.9 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.31 on 6067 degrees of freedom Multiple R-squared: 0.192, Adjusted R-squared: 0.191 F-statistic: 1.44e+03 on 1 and 6067 DF, p-value: <2e-16 > # heckman type estimation > heckit1X <- lm( y0 ~ x1 + IMR1X, data = cbind( myData, imr ), + subset = selection ) > summary( heckit1X ) Call: lm(formula = y0 ~ x1 + IMR1X, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -4.935 -0.882 -0.034 0.887 4.774 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.4618 0.0655 -22.33 < 2e-16 *** x1 0.8006 0.0270 29.66 < 2e-16 *** IMR1X 0.6448 0.1035 6.23 5.1e-10 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.31 on 6066 degrees of freedom Multiple R-squared: 0.197, Adjusted R-squared: 0.196 F-statistic: 743 on 2 and 6066 DF, p-value: <2e-16 > > # estimation for y1* <= 0 > selection <- !myData$y1 > # OLS estimation > ols0X <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( ols0X ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -5.062 -0.909 0.023 0.902 4.473 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.2035 0.0224 -98.2 <2e-16 *** x1 0.6214 0.0220 28.2 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.32 on 3929 degrees of freedom Multiple R-squared: 0.169, Adjusted R-squared: 0.169 F-statistic: 798 on 1 and 3929 DF, p-value: <2e-16 > # heckman type estimation > heckit0X <- lm( y0 ~ x1 + IMR0X, data = cbind( myData, imr ), + subset = selection ) > summary( heckit0X ) Call: lm(formula = y0 ~ x1 + IMR0X, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -5.118 -0.904 0.006 0.904 4.343 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.4380 0.1048 -13.72 < 2e-16 *** x1 0.8190 0.0343 23.88 < 2e-16 *** IMR0X 0.7998 0.1070 7.47 9.5e-14 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.32 on 3928 degrees of freedom Multiple R-squared: 0.18, Adjusted R-squared: 0.18 F-statistic: 432 on 2 and 3928 DF, p-value: <2e-16 > > # estimation for y2* > 0 > selection <- myData$y2 > # OLS estimation > olsX1 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( olsX1 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -3.819 -0.821 -0.028 0.773 4.296 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -0.6386 0.0185 -34.6 <2e-16 *** x1 0.5055 0.0182 27.7 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.18 on 4701 degrees of freedom Multiple R-squared: 0.141, Adjusted R-squared: 0.14 F-statistic: 769 on 1 and 4701 DF, p-value: <2e-16 > # heckman type estimation > heckitX1 <- lm( y0 ~ x1 + IMRX1, data = cbind( myData, imr ), + subset = selection ) > summary( heckitX1 ) Call: lm(formula = y0 ~ x1 + IMRX1, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -3.255 -0.766 -0.051 0.714 4.732 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.4685 0.0394 -37.2 <2e-16 *** x1 0.7702 0.0206 37.4 <2e-16 *** IMRX1 1.1613 0.0495 23.5 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.12 on 4700 degrees of freedom Multiple R-squared: 0.231, Adjusted R-squared: 0.23 F-statistic: 705 on 2 and 4700 DF, p-value: <2e-16 > > # estimation for y2* <= 0 > selection <- !myData$y2 > # OLS estimation > olsX0 <- lm( y0 ~ x1, data = myData, subset = selection ) > summary( olsX0 ) Call: lm(formula = y0 ~ x1, data = myData, subset = selection) Residuals: Min 1Q Median 3Q Max -4.991 -0.817 0.036 0.835 5.185 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -2.2608 0.0176 -128.5 <2e-16 *** x1 0.5600 0.0180 31.1 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.21 on 5295 degrees of freedom Multiple R-squared: 0.154, Adjusted R-squared: 0.154 F-statistic: 965 on 1 and 5295 DF, p-value: <2e-16 > # heckman type estimation > heckitX0 <- lm( y0 ~ x1 + IMRX0, data = cbind( myData, imr ), + subset = selection ) > summary( heckitX0 ) Call: lm(formula = y0 ~ x1 + IMRX0, data = cbind(myData, imr), subset = selection) Residuals: Min 1Q Median 3Q Max -5.115 -0.729 0.056 0.790 4.685 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -1.4624 0.0365 -40.1 <2e-16 *** x1 0.8521 0.0208 41.0 <2e-16 *** IMRX0 1.2582 0.0511 24.6 <2e-16 *** --- Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 Residual standard error: 1.15 on 5294 degrees of freedom Multiple R-squared: 0.241, Adjusted R-squared: 0.241 F-statistic: 841 on 2 and 5294 DF, p-value: <2e-16 > > proc.time() user system elapsed 7.56 0.32 7.87