# Load ivreg package
library(ivreg)

## Load data.frame for tests
data("CigaretteDemand", package = "ivreg")

test_that("Expect error when object from ivreg(..., model=FALSE)", {
  merror <- ivreg(packs ~ rprice + rincome | salestax + rincome,
                  data = CigaretteDemand, model = FALSE)
  expect_error(fsw(merror))
})

test_that("Check run after ivreg model", {
  object <- ivreg(packs ~ rprice + rincome | salestax + cigtax + packsdiff,
                  data = CigaretteDemand)
  res = fsw(object)
  expect_equal(res$fswres[1,1], 4.884, tolerance = 1e-2)
  expect_equal(res$fswres[2,1], 3.450, tolerance = 1e-2)
})

test_that("Check run with ivreg model object with transformations of outcome", {
  object <- ivreg(log(packs) ~ rprice + rincome |
                    salestax + cigtax + packsdiff,
                  data = CigaretteDemand)
  res = fsw(object)
  expect_equal(res$fswres[1,1], 4.884, tolerance = 1e-2)
  expect_equal(res$fswres[2,1], 3.450, tolerance = 1e-2)
})

test_that("Check error with transformation of exposure", {
# Note check if there is a way to work out if there is a transformed variable in a formula
  object <- ivreg(packs ~ log(rprice) + rincome |
                    salestax + cigtax + packsdiff,
                  data = CigaretteDemand)
  expect_error(fsw(object))
})

test_that("Check error with two transformed exposures", {
  object <- ivreg(packs ~ log(rprice) + log(rincome) |
                    salestax + cigtax + packsdiff,
                  data = CigaretteDemand)
  expect_error(fsw(object))
})

test_that("Check run with transformation of instrument", {
  object <- ivreg(packs ~ rprice + rincome |
                    salestax + cigtax + I(packsdiff^2),
                  data = CigaretteDemand)
  expect_error(fsw(object))
})

# Check error with a single endogenous variable
test_that("Require two or more exposures", {
  object <- ivreg(packs ~ rprice | salestax + rincome,
                  data = CigaretteDemand)
  expect_error(fsw(object))
})


# lfe package - modified example from condfstat() helpfile

test_that("Check approx. equivalence with lfe package", {
  skip_if_not_installed("lfe")
  library(lfe)
  set.seed(12345)
  n <- 4000
  z1 <- rnorm(n)
  z2 <- rnorm(n)
  u <- rnorm(n)
  # make x1, x2 correlated with errors u
  x1 <- z1 + z2 + 0.2*u + rnorm(n)
  x2 <- z1 + 0.94*z2 - 0.3*u + rnorm(n)
  y <- x1 + x2 + u
  dat <- data.frame(x1,x2,y,z1,z2)
  est <- felm(y ~ 1 | 0 | (x1 | x2 ~ z1 + z2), data = dat)
  # summary(est)
  ## Not run:
  # summary(est$stage1, lhs='x1')
  # summary(est$stage1, lhs='x2')
  ## End(Not run)
  # the joint significance of the instruments in both the first stages are ok:
  # t(sapply(est$stage1$lhs, function(lh) waldtest(est$stage1, ~ z1|z2, lhs = lh)))
  # everything above looks fine, t-tests for instruments,
  # as well as F-tests for excluded instruments in the 1st stages.
  # The conditional F-test reveals that the instruments are jointly weak
  # (it's close to being only one instrument, z1+z2, for both x1 and x2)
  lfefstat <- condfstat(est, quantiles = c(0.05, 0.95))
  mod2 <- ivreg(y ~ x1 + x2 | z1 + z2, data = dat)
  fstat = fsw(mod2)
  expect_equal(lfefstat[1], fstat$fswres[1,1], tolerance = 1e-2)
  expect_equal(lfefstat[2], fstat$fswres[2,1], tolerance = 1e-2)
})


# Stata ivreg2 example

test_that("Compare with Stata ivreg2 output", {
  skip_if_not_installed("haven")
  skip_if_not_installed("lfe")
  library(haven)
  library(ivreg)
  library(lfe)
  url <- "http://fmwww.bc.edu/ec-p/data/wooldridge/mroz.dta"
  dat <- haven::read_dta(url)
  mod <- ivreg(lwage ~ educ + exper | age + kidslt6 + kidsge6, data = dat)
  condf <- fsw(mod)
  expect_equal(condf$fswres[1,1], 6.69, tolerance = 1e-2)
  expect_equal(condf$fswres[2,1], 81.81, tolerance = 1e-2)
})


# Copy-pasted output from some model fits

# Using lfe package
# modst2 <- felm(lwage ~ 1 | 0 | (educ | exper ~ age + kidslt6 + kidsge6), data = dat)
# summary(modst2)
# t(sapply(modst2$stage1$lhs, function(lh) waldtest(modst2$stage1, ~ age | kidslt6 | kidsge6, lhs = lh)))
# condfstat(modst2, quantiles = c(0.025, 0.975))

# . use http://fmwww.bc.edu/ec-p/data/wooldridge/mroz.dta, clear
#
# .
# . ivreg2 lwage (educ exper = age kidslt6 kidsge6), first
#
# First-stage regressions
# -----------------------
#
#
#   First-stage regression of educ:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   educ |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |  -.0185412   .0163449    -1.13   0.257    -.0506683    .0135859
# kidslt6 |   .6984283   .2966854     2.35   0.019     .1152709    1.281586
# kidsge6 |   -.222821   .0906154    -2.46   0.014    -.4009324   -.0447096
# _cons |   13.64009   .7644499    17.84   0.000     12.13751    15.14268
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  3,   424) =     4.47
# Prob > F      =   0.0042
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  2,   424) =     6.69
# Prob > F      =   0.0014
#
#
# First-stage regression of exper:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   exper |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |   .3948754   .0496446     7.95   0.000     .2972953    .4924555
# kidslt6 |  -.7469412   .9011267    -0.83   0.408    -2.518173    1.024291
# kidsge6 |  -1.430306   .2752275    -5.20   0.000    -1.971286   -.8893254
# _cons |  -1.500019   2.321874    -0.65   0.519    -6.063837    3.063798
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  3,   424) =    55.04
# Prob > F      =   0.0000
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  2,   424) =    81.81
# Prob > F      =   0.0000
#
#
#
# Summary results for first-stage regressions
# -------------------------------------------
#
#   (Underid)            (Weak id)
# Variable     | F(  3,   424)  P-val | SW Chi-sq(  2) P-val | SW F(  2,   424)
# educ         |       4.47    0.0042 |       13.51   0.0012 |        6.69
# exper        |      55.04    0.0000 |      165.17   0.0000 |       81.81
#
# Stock-Yogo weak ID F test critical values for single endogenous regressor:
#   5% maximal IV relative bias    13.91
# 10% maximal IV relative bias     9.08
# 20% maximal IV relative bias     6.46
# 30% maximal IV relative bias     5.39
# 10% maximal IV size             22.30
# 15% maximal IV size             12.83
# 20% maximal IV size              9.54
# 25% maximal IV size              7.80
# Source: Stock-Yogo (2005).  Reproduced by permission.
# NB: Critical values are for Sanderson-Windmeijer F statistic.
#
# Underidentification test
# Ho: matrix of reduced form coefficients has rank=K1-1 (underidentified)
# Ha: matrix has rank=K1 (identified)
# Anderson canon. corr. LM statistic       Chi-sq(2)=13.10    P-val=0.0014
#
# Weak identification test
# Ho: equation is weakly identified
# Cragg-Donald Wald F statistic                                       4.46
#
# Stock-Yogo weak ID test critical values for K1=2 and L1=3:
#   10% maximal IV size             13.43
# 15% maximal IV size              8.18
# 20% maximal IV size              6.40
# 25% maximal IV size              5.45
# Source: Stock-Yogo (2005).  Reproduced by permission.
#
# Weak-instrument-robust inference
# Tests of joint significance of endogenous regressors B1 in main equation
# Ho: B1=0 and orthogonality conditions are valid
# Anderson-Rubin Wald test           F(3,424)=       2.08     P-val=0.1025
# Anderson-Rubin Wald test           Chi-sq(3)=      6.29     P-val=0.0983
# Stock-Wright LM S statistic        Chi-sq(3)=      6.20     P-val=0.1023
#
# Number of observations               N  =        428
# Number of regressors                 K  =          3
# Number of endogenous regressors      K1 =          2
# Number of instruments                L  =          4
# Number of excluded instruments       L1 =          3
#
# IV (2SLS) estimation
# --------------------
#
#   Estimates efficient for homoskedasticity only
# Statistics consistent for homoskedasticity only
#
# Number of obs =      428
# F(  2,   425) =     3.03
# Prob > F      =   0.0492
# Total (centered) SS     =  223.3274513                Centered R2   =   0.1482
# Total (uncentered) SS   =   829.594813                Uncentered R2 =   0.7707
# Residual SS             =  190.2315236                Root MSE      =    .6667
#
# ------------------------------------------------------------------------------
#   lwage |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   educ |   .1058361   .0806975     1.31   0.190    -.0523281    .2640003
# exper |   .0161527    .007568     2.13   0.033     .0013197    .0309858
# _cons |  -.3601821   1.029787    -0.35   0.727    -2.378528    1.658164
# ------------------------------------------------------------------------------
#   Underidentification test (Anderson canon. corr. LM statistic):          13.101
# Chi-sq(2) P-val =    0.0014
# ------------------------------------------------------------------------------
#   Weak identification test (Cragg-Donald Wald F statistic):                4.463
# Stock-Yogo weak ID test critical values: 10% maximal IV size             13.43
# 15% maximal IV size              8.18
# 20% maximal IV size              6.40
# 25% maximal IV size              5.45
# Source: Stock-Yogo (2005).  Reproduced by permission.
# ------------------------------------------------------------------------------
#   Sargan statistic (overidentification test of all instruments):           1.168
# Chi-sq(1) P-val =    0.2798
# ------------------------------------------------------------------------------
#   Instrumented:         educ exper
# Excluded instruments: age kidslt6 kidsge6
# ------------------------------------------------------------------------------
#
#   . mat list e(first)
#
# e(first)[21,2]
# educ      exper
# rmse  2.2580465  6.8583957
# sheapr2  .03061089  .28010273
# pr2  .03063228  .28029844
# F  4.4661716  55.044363
# df          3          3
# df_r        424        424
# pvalue  .00421033  4.562e-30
# SWF  6.6942505  81.812373
# SWFdf1          2          2
# SWFdf2        424        424
# SWFp  .00137303  8.961e-31
# SWchi2  13.514808  165.16838
# SWchi2p  .00116224  1.362e-36
# SWr2  .03061009  .27845108
# APF  6.6930319  82.489816
# APFdf1          2          2
# APFdf2        424        424
# APFp  .00137466  5.499e-31
# APchi2  13.512347  166.53604
# APchi2p  .00116367  6.873e-37
# APr2  .03060469  .28011093

# Code from Appendix A.3 of Sanderson and Windmeijer
# clear
# set obs 4000
# set seed 12345 // added TP
# gen w1 = rnormal()
# gen w2 = rnormal()
# gen z1 = rnormal()
# gen z2 = rnormal()
# gen z3 = rnormal()
# gen z4 = rnormal()
# gen z5 = rnormal()
# mat covmat = (1, .5, .5 \ .5, 1, .5 \ .5, .5, 1)
# drawnorm x1 x2 x3, cov(covmat)
# ivregress 2sls x1 (x2 x3 = z1 z2 z3 z4 z5) w1 w2
# predict res123, r
# reg res123 z1 z2 z3 z4 z5 w1 w2
# test z1 z2 z3 z4 z5
# scalar Fsw = r(F)*r(df)/(r(df)-2)
# di Fsw

# . clear
#
# . set obs 4000
# number of observations (_N) was 0, now 4,000
#
# . set seed 12345 // added TP
#
# . gen w1 = rnormal()
#
# . gen w2 = rnormal()
#
# . gen z1 = rnormal()
#
# . gen z2 = rnormal()
#
# . gen z3 = rnormal()
#
# . gen z4 = rnormal()
#
# . gen z5 = rnormal()
#
# . mat covmat = (1, .5, .5 \ .5, 1, .5 \ .5, .5, 1)
#
# . drawnorm x1 x2 x3, cov(covmat)
#
# .
# . ivregress 2sls x1 (x2 x3 = z1 z2 z3 z4 z5) w1 w2
#
# Instrumental variables (2SLS) regression          Number of obs   =      4,000
# Wald chi2(4)    =       4.90
# Prob > chi2     =     0.2974
# R-squared       =     0.1259
# Root MSE        =     .94388
#
# ------------------------------------------------------------------------------
#   x1 |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   x2 |  -.1861774   .4125931    -0.45   0.652    -.9948449    .6224902
# x3 |    .667248   .5329029     1.25   0.211    -.3772225    1.711718
# w1 |     .02143   .0174925     1.23   0.221    -.0128547    .0557147
# w2 |   .0146589    .016231     0.90   0.366    -.0171534    .0464711
# _cons |    .000549    .015748     0.03   0.972    -.0303166    .0314145
# ------------------------------------------------------------------------------
#   Instrumented:  x2 x3
# Instruments:   w1 w2 z1 z2 z3 z4 z5
#
# . predict res123, r
#
# . reg res123 z1 z2 z3 z4 z5 w1 w2
#
# Source |       SS           df       MS      Number of obs   =     4,000
# -------------+----------------------------------   F(7, 3992)      =      0.22
# Model |  1.35045648         7  .192922355   Prob > F        =    0.9818
# Residual |  3562.27268     3,992  .892352877   R-squared       =    0.0004
# -------------+----------------------------------   Adj R-squared   =   -0.0014
# Total |  3563.62314     3,999  .891128567   Root MSE        =    .94464
#
# ------------------------------------------------------------------------------
#   res123 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   z1 |     .00306   .0148837     0.21   0.837    -.0261203    .0322403
# z2 |  -.0031092   .0150206    -0.21   0.836     -.032558    .0263396
# z3 |   .0123012   .0146714     0.84   0.402    -.0164629    .0410653
# z4 |   .0087919   .0148553     0.59   0.554    -.0203327    .0379166
# z5 |  -.0088948   .0150712    -0.59   0.555    -.0384428    .0206531
# w1 |   .0002732   .0149939     0.02   0.985    -.0291231    .0296695
# w2 |   .0004532   .0149238     0.03   0.976    -.0288058    .0297123
# _cons |  -.0002686   .0149467    -0.02   0.986    -.0295724    .0290353
# ------------------------------------------------------------------------------
#
#   . test z1 z2 z3 z4 z5
#
# ( 1)  z1 = 0
# ( 2)  z2 = 0
# ( 3)  z3 = 0
# ( 4)  z4 = 0
# ( 5)  z5 = 0
#
# F(  5,  3992) =    0.30
# Prob > F =    0.9115
#
# . scalar Fsw = r(F)*r(df)/(r(df)-2)
#
# . di Fsw
# .50445533
#
# .
# . ivreg2 x1 (x2 x3 = z1 z2 z3 z4 z5) w1 w2, first
#
# First-stage regressions
# -----------------------
#
#
#   First-stage regression of x2:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                   4000
# ------------------------------------------------------------------------------
#   x2 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   z1 |  -.0019387   .0157783    -0.12   0.902     -.032873    .0289955
# z2 |   .0325831   .0159235     2.05   0.041     .0013641     .063802
# z3 |   .0023125   .0155533     0.15   0.882    -.0281806    .0328056
# z4 |   .0164893   .0157482     1.05   0.295     -.014386    .0473645
# z5 |   .0065758   .0159771     0.41   0.681    -.0247482    .0378998
# w1 |   .0178871   .0158951     1.13   0.261    -.0132762    .0490504
# w2 |  -.0006284   .0158209    -0.04   0.968    -.0316461    .0303894
# _cons |  -.0133442   .0158451    -0.84   0.400    -.0444094     .017721
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  5,  3992) =     1.08
# Prob > F      =   0.3715
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  4,  3992) =     1.59
# Prob > F      =   0.1750
#
#
# First-stage regression of x3:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                   4000
# ------------------------------------------------------------------------------
#   x3 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   z1 |  -.0271183   .0157085    -1.73   0.084    -.0579157    .0036792
# z2 |   .0049029   .0158531     0.31   0.757    -.0261779    .0359838
# z3 |   .0021507   .0154845     0.14   0.890    -.0282075    .0325088
# z4 |   .0015835   .0156785     0.10   0.920    -.0291552    .0323222
# z5 |  -.0064682   .0159064    -0.41   0.684    -.0376536    .0247172
# w1 |   .0127237   .0158248     0.80   0.421    -.0183018    .0437491
# w2 |  -.0118418   .0157509    -0.75   0.452    -.0427223    .0190387
# _cons |  -.0022251    .015775    -0.14   0.888    -.0331529    .0287027
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  5,  3992) =     0.65
# Prob > F      =   0.6610
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  4,  3992) =     0.90
# Prob > F      =   0.4654
#
#
#
# Summary results for first-stage regressions
# -------------------------------------------
#
#   (Underid)            (Weak id)
# Variable     | F(  5,  3992)  P-val | SW Chi-sq(  4) P-val | SW F(  4,  3992)
# x2           |       1.08    0.3715 |        6.36   0.1739 |        1.59
# x3           |       0.65    0.6610 |        3.59   0.4642 |        0.90
#
# Stock-Yogo weak ID F test critical values for single endogenous regressor:
#   5% maximal IV relative bias    18.37
# 10% maximal IV relative bias    10.83
# 20% maximal IV relative bias     6.77
# 30% maximal IV relative bias     5.25
# 10% maximal IV size             26.87
# 15% maximal IV size             15.09
# 20% maximal IV size             10.98
# 25% maximal IV size              8.84
# Source: Stock-Yogo (2005).  Reproduced by permission.
# NB: Critical values are for Sanderson-Windmeijer F statistic.
#
# Underidentification test
# Ho: matrix of reduced form coefficients has rank=K1-1 (underidentified)
# Ha: matrix has rank=K1 (identified)
# Anderson canon. corr. LM statistic       Chi-sq(4)=2.99     P-val=0.5594
#
# Weak identification test
# Ho: equation is weakly identified
# Cragg-Donald Wald F statistic                                       0.60
#
# Stock-Yogo weak ID test critical values for K1=2 and L1=5:
#   5% maximal IV relative bias    13.97
# 10% maximal IV relative bias     8.78
# 20% maximal IV relative bias     5.91
# 30% maximal IV relative bias     4.79
# 10% maximal IV size             19.45
# 15% maximal IV size             11.22
# 20% maximal IV size              8.38
# 25% maximal IV size              6.89
# Source: Stock-Yogo (2005).  Reproduced by permission.
#
# Weak-instrument-robust inference
# Tests of joint significance of endogenous regressors B1 in main equation
# Ho: B1=0 and orthogonality conditions are valid
# Anderson-Rubin Wald test           F(5,3992)=      0.55     P-val=0.7395
# Anderson-Rubin Wald test           Chi-sq(5)=      2.75     P-val=0.7386
# Stock-Wright LM S statistic        Chi-sq(5)=      2.75     P-val=0.7389
#
# Number of observations               N  =       4000
# Number of regressors                 K  =          5
# Number of endogenous regressors      K1 =          2
# Number of instruments                L  =          8
# Number of excluded instruments       L1 =          5
#
# IV (2SLS) estimation
# --------------------
#
#   Estimates efficient for homoskedasticity only
# Statistics consistent for homoskedasticity only
#
# Number of obs =     4000
# F(  4,  3995) =     1.22
# Prob > F      =   0.2982
# Total (centered) SS     =  4076.803985                Centered R2   =   0.1259
# Total (uncentered) SS   =  4076.816472                Uncentered R2 =   0.1259
# Residual SS             =  3563.623141                Root MSE      =    .9439
#
# ------------------------------------------------------------------------------
#   x1 |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   x2 |  -.1861774   .4125931    -0.45   0.652    -.9948449    .6224902
# x3 |    .667248   .5329029     1.25   0.211    -.3772225    1.711718
# w1 |     .02143   .0174925     1.23   0.221    -.0128547    .0557147
# w2 |   .0146589    .016231     0.90   0.366    -.0171534    .0464711
# _cons |    .000549    .015748     0.03   0.972    -.0303166    .0314145
# ------------------------------------------------------------------------------
#   Underidentification test (Anderson canon. corr. LM statistic):           2.990
# Chi-sq(4) P-val =    0.5594
# ------------------------------------------------------------------------------
#   Weak identification test (Cragg-Donald Wald F statistic):                0.597
# Stock-Yogo weak ID test critical values:  5% maximal IV relative bias    13.97
# 10% maximal IV relative bias     8.78
# 20% maximal IV relative bias     5.91
# 30% maximal IV relative bias     4.79
# 10% maximal IV size             19.45
# 15% maximal IV size             11.22
# 20% maximal IV size              8.38
# 25% maximal IV size              6.89
# Source: Stock-Yogo (2005).  Reproduced by permission.
# ------------------------------------------------------------------------------
#   Sargan statistic (overidentification test of all instruments):           1.516
# Chi-sq(3) P-val =    0.6786
# ------------------------------------------------------------------------------
#   Instrumented:         x2 x3
# Included instruments: w1 w2
# Excluded instruments: z1 z2 z3 z4 z5
# ------------------------------------------------------------------------------
#
#   . mat list e(first)
#
# e(first)[21,2]
# x2         x3
# rmse  1.0014257  .99699546
# sheapr2   .0017641  .00106746
# pr2  .00134586  .00081439
# F  1.0759845  .65073595
# df          5          5
# df_r       3992       3992
# pvalue  .37147303  .66095627
# SWF  1.5865039  .89590568
# SWFdf1          4          4
# SWFdf2       3992       3992
# SWFp  .17498432  .46538383
# SWchi2   6.358733  3.5908043
# SWchi2p  .17391183  .46420632
# SWr2  .00158716   .0008969
# APF  1.3046394  .78902226
# APFdf1          4          4
# APFdf2       3992       3992
# APFp  .26580003  .53212677
# APchi2  5.2290154  3.1624138
# APchi2p  .26459576  .53102306
# APr2  .00130555  .00078998


# . use http://fmwww.bc.edu/ec-p/data/wooldridge/mroz.dta, clear
#
# .
# end of do-file
#
# . do "C:\Users\eptmp\AppData\Local\Temp\STD620_000000.tmp"
#
# . ivreg2 lwage (educ exper = age kidslt6), first
#
# First-stage regressions
# -----------------------
#
#
#   First-stage regression of educ:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   educ |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |  -.0028069   .0151294    -0.19   0.853    -.0325446    .0269308
# kidslt6 |   .7354883   .2980564     2.47   0.014     .1496402    1.321336
# _cons |   12.67358   .6595434    19.22   0.000     11.37721    13.96996
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  2,   425) =     3.63
# Prob > F      =   0.0273
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  1,   425) =     5.74
# Prob > F      =   0.0170
#
#
# First-stage regression of exper:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   exper |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |   .4958752   .0470592    10.54   0.000     .4033775    .5883729
# kidslt6 |  -.5090499   .9270899    -0.55   0.583    -2.331302    1.313202
# _cons |   -7.70411   2.051478    -3.76   0.000    -11.73642   -3.671804
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  2,   425) =    65.08
# Prob > F      =   0.0000
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  1,   425) =    22.57
# Prob > F      =   0.0000
#
#
#
# Summary results for first-stage regressions
# -------------------------------------------
#
#   (Underid)            (Weak id)
# Variable     | F(  2,   425)  P-val | SW Chi-sq(  1) P-val | SW F(  1,   425)
# educ         |       3.63    0.0273 |        5.78   0.0162 |        5.74
# exper        |      65.08    0.0000 |       22.73   0.0000 |       22.57
#
# Stock-Yogo weak ID F test critical values for single endogenous regressor:
#   10% maximal IV size             19.93
# 15% maximal IV size             11.59
# 20% maximal IV size              8.75
# 25% maximal IV size              7.25
# Source: Stock-Yogo (2005).  Reproduced by permission.
# NB: Critical values are for Sanderson-Windmeijer F statistic.
#
# Underidentification test
# Ho: matrix of reduced form coefficients has rank=K1-1 (underidentified)
# Ha: matrix has rank=K1 (identified)
# Anderson canon. corr. LM statistic       Chi-sq(1)=5.70     P-val=0.0170
#
# Weak identification test
# Ho: equation is weakly identified
# Cragg-Donald Wald F statistic                                       2.87
#
# Stock-Yogo weak ID test critical values for K1=2 and L1=2:
#   10% maximal IV size              7.03
# 15% maximal IV size              4.58
# 20% maximal IV size              3.95
# 25% maximal IV size              3.63
# Source: Stock-Yogo (2005).  Reproduced by permission.
#
# Weak-instrument-robust inference
# Tests of joint significance of endogenous regressors B1 in main equation
# Ho: B1=0 and orthogonality conditions are valid
# Anderson-Rubin Wald test           F(2,425)=       0.64     P-val=0.5263
# Anderson-Rubin Wald test           Chi-sq(2)=      1.29     P-val=0.5234
# Stock-Wright LM S statistic        Chi-sq(2)=      1.29     P-val=0.5244
#
# Number of observations               N  =        428
# Number of regressors                 K  =          3
# Number of endogenous regressors      K1 =          2
# Number of instruments                L  =          3
# Number of excluded instruments       L1 =          2
#
# IV (2SLS) estimation
# --------------------
#
#   Estimates efficient for homoskedasticity only
# Statistics consistent for homoskedasticity only
#
# Number of obs =      428
# F(  2,   425) =     0.67
# Prob > F      =   0.5129
# Total (centered) SS     =  223.3274513                Centered R2   =   0.0415
# Total (uncentered) SS   =   829.594813                Uncentered R2 =   0.7420
# Residual SS             =  214.0604775                Root MSE      =    .7072
#
# ------------------------------------------------------------------------------
#   lwage |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   educ |   .0074314   .1290546     0.06   0.954    -.2455111    .2603738
# exper |    .010421   .0098027     1.06   0.288     -.008792     .029634
# _cons |   .9602385   1.694897     0.57   0.571    -2.361699    4.282176
# ------------------------------------------------------------------------------
#   Underidentification test (Anderson canon. corr. LM statistic):           5.699
# Chi-sq(1) P-val =    0.0170
# ------------------------------------------------------------------------------
#   Weak identification test (Cragg-Donald Wald F statistic):                2.868
# Stock-Yogo weak ID test critical values: 10% maximal IV size              7.03
# 15% maximal IV size              4.58
# 20% maximal IV size              3.95
# 25% maximal IV size              3.63
# Source: Stock-Yogo (2005).  Reproduced by permission.
# ------------------------------------------------------------------------------
#   Sargan statistic (overidentification test of all instruments):           0.000
# (equation exactly identified)
# ------------------------------------------------------------------------------
#   Instrumented:         educ exper
# Excluded instruments: age kidslt6
# ------------------------------------------------------------------------------
#
#   . mat list e(first)
#
# e(first)[21,2]
# educ      exper
# rmse  2.2714133  7.0651217
# sheapr2  .01346799  .18786232
# pr2  .01680838  .23445676
# F  3.6328429  65.080662
# df          2          2
# df_r        425        425
# pvalue  .02726533  2.206e-25
# SWF  5.7388476  22.573095
# SWFdf1          1          1
# SWFdf2        425        425
# SWFp  .01702542  2.772e-06
# SWchi2  5.7793571  22.732434
# SWchi2p  .01621547  1.862e-06
# SWr2  .01332326  .05043443
# APF  5.8204037  104.26978
# APFdf1          1          1
# APFdf2        425        425
# APFp  .01626387  4.869e-22
# APchi2  5.8614889   105.0058
# APchi2p   .0154757  1.218e-24
# APr2  .01351005  .19700687

# // 2 endogenous variables
#
# . use http://fmwww.bc.edu/ec-p/data/wooldridge/mroz.dta, clear
#
# .
# . ivreg2 lwage (educ exper = age kidslt6 kidsge6) if !missing(lwage,educ,exper,age,kidslt6,kidsge6), first
#
# First-stage regressions
# -----------------------
#
#
#   First-stage regression of educ:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   educ |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |  -.0185412   .0163449    -1.13   0.257    -.0506683    .0135859
# kidslt6 |   .6984283   .2966854     2.35   0.019     .1152709    1.281586
# kidsge6 |   -.222821   .0906154    -2.46   0.014    -.4009324   -.0447096
# _cons |   13.64009   .7644499    17.84   0.000     12.13751    15.14268
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  3,   424) =     4.47
# Prob > F      =   0.0042
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  2,   424) =     6.69
# Prob > F      =   0.0014
#
#
# First-stage regression of exper:
#
#   Statistics consistent for homoskedasticity only
# Number of obs =                    428
# ------------------------------------------------------------------------------
#   exper |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |   .3948754   .0496446     7.95   0.000     .2972953    .4924555
# kidslt6 |  -.7469412   .9011267    -0.83   0.408    -2.518173    1.024291
# kidsge6 |  -1.430306   .2752275    -5.20   0.000    -1.971286   -.8893254
# _cons |  -1.500019   2.321874    -0.65   0.519    -6.063837    3.063798
# ------------------------------------------------------------------------------
#   F test of excluded instruments:
#   F(  3,   424) =    55.04
# Prob > F      =   0.0000
# Sanderson-Windmeijer multivariate F test of excluded instruments:
#   F(  2,   424) =    81.81
# Prob > F      =   0.0000
#
#
#
# Summary results for first-stage regressions
# -------------------------------------------
#
#   (Underid)            (Weak id)
# Variable     | F(  3,   424)  P-val | SW Chi-sq(  2) P-val | SW F(  2,   424)
# educ         |       4.47    0.0042 |       13.51   0.0012 |        6.69
# exper        |      55.04    0.0000 |      165.17   0.0000 |       81.81
#
# Stock-Yogo weak ID F test critical values for single endogenous regressor:
#   5% maximal IV relative bias    13.91
# 10% maximal IV relative bias     9.08
# 20% maximal IV relative bias     6.46
# 30% maximal IV relative bias     5.39
# 10% maximal IV size             22.30
# 15% maximal IV size             12.83
# 20% maximal IV size              9.54
# 25% maximal IV size              7.80
# Source: Stock-Yogo (2005).  Reproduced by permission.
# NB: Critical values are for Sanderson-Windmeijer F statistic.
#
# Underidentification test
# Ho: matrix of reduced form coefficients has rank=K1-1 (underidentified)
# Ha: matrix has rank=K1 (identified)
# Anderson canon. corr. LM statistic       Chi-sq(2)=13.10    P-val=0.0014
#
# Weak identification test
# Ho: equation is weakly identified
# Cragg-Donald Wald F statistic                                       4.46
#
# Stock-Yogo weak ID test critical values for K1=2 and L1=3:
#   10% maximal IV size             13.43
# 15% maximal IV size              8.18
# 20% maximal IV size              6.40
# 25% maximal IV size              5.45
# Source: Stock-Yogo (2005).  Reproduced by permission.
#
# Weak-instrument-robust inference
# Tests of joint significance of endogenous regressors B1 in main equation
# Ho: B1=0 and orthogonality conditions are valid
# Anderson-Rubin Wald test           F(3,424)=       2.08     P-val=0.1025
# Anderson-Rubin Wald test           Chi-sq(3)=      6.29     P-val=0.0983
# Stock-Wright LM S statistic        Chi-sq(3)=      6.20     P-val=0.1023
#
# Number of observations               N  =        428
# Number of regressors                 K  =          3
# Number of endogenous regressors      K1 =          2
# Number of instruments                L  =          4
# Number of excluded instruments       L1 =          3
#
# IV (2SLS) estimation
# --------------------
#
#   Estimates efficient for homoskedasticity only
# Statistics consistent for homoskedasticity only
#
# Number of obs =      428
# F(  2,   425) =     3.03
# Prob > F      =   0.0492
# Total (centered) SS     =  223.3274513                Centered R2   =   0.1482
# Total (uncentered) SS   =   829.594813                Uncentered R2 =   0.7707
# Residual SS             =  190.2315236                Root MSE      =    .6667
#
# ------------------------------------------------------------------------------
#   lwage |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   educ |   .1058361   .0806975     1.31   0.190    -.0523281    .2640003
# exper |   .0161527    .007568     2.13   0.033     .0013197    .0309858
# _cons |  -.3601821   1.029787    -0.35   0.727    -2.378528    1.658164
# ------------------------------------------------------------------------------
#   Underidentification test (Anderson canon. corr. LM statistic):          13.101
# Chi-sq(2) P-val =    0.0014
# ------------------------------------------------------------------------------
#   Weak identification test (Cragg-Donald Wald F statistic):                4.463
# Stock-Yogo weak ID test critical values: 10% maximal IV size             13.43
# 15% maximal IV size              8.18
# 20% maximal IV size              6.40
# 25% maximal IV size              5.45
# Source: Stock-Yogo (2005).  Reproduced by permission.
# ------------------------------------------------------------------------------
#   Sargan statistic (overidentification test of all instruments):           1.168
# Chi-sq(1) P-val =    0.2798
# ------------------------------------------------------------------------------
#   Instrumented:         educ exper
# Excluded instruments: age kidslt6 kidsge6
# ------------------------------------------------------------------------------
#
#   .
# . ivregress 2sls educ (exper = age kidslt6 kidsge6) if !missing(lwage,educ,exper,age,kidslt6,kidsge6)
#
# Instrumental variables (2SLS) regression          Number of obs   =        428
# Wald chi2(1)    =       0.01
# Prob > chi2     =     0.9121
# R-squared       =     0.0002
# Root MSE        =     2.2825
#
# ------------------------------------------------------------------------------
#   educ |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   exper |  -.0028589    .025898    -0.11   0.912     -.053618    .0479002
# _cons |   12.69615   .3552098    35.74   0.000     11.99995    13.39235
# ------------------------------------------------------------------------------
#   Instrumented:  exper
# Instruments:   age kidslt6 kidsge6
#
# . cap noi drop res12
# variable res12 not found
#
# . predict res12 if !missing(lwage,educ,exper,age,kidslt6,kidsge6), r
# (325 missing values generated)
#
# . reg res12 age kidslt6 kidsge6 if !missing(lwage,educ,exper,age,kidslt6,kidsge6)
#
# Source |       SS           df       MS      Number of obs   =       428
# -------------+----------------------------------   F(3, 424)       =      4.46
# Model |  68.2525159         3  22.7508386   Prob > F        =    0.0042
# Residual |  2161.48673       424  5.09784606   R-squared       =    0.0306
# -------------+----------------------------------   Adj R-squared   =    0.0238
# Total |  2229.73925       427  5.22187177   Root MSE        =    2.2578
#
# ------------------------------------------------------------------------------
#   res12 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |  -.0174123   .0163434    -1.07   0.287    -.0495365    .0147119
# kidslt6 |   .6962929   .2966584     2.35   0.019     .1131886    1.279397
# kidsge6 |  -.2269101   .0906072    -2.50   0.013    -.4050053   -.0488149
# _cons |   .9396522   .7643804     1.23   0.220    -.5627945    2.442099
# ------------------------------------------------------------------------------
#
#   . test age kidslt6 kidsge6
#
# ( 1)  age = 0
# ( 2)  kidslt6 = 0
# ( 3)  kidsge6 = 0
#
# F(  3,   424) =    4.46
# Prob > F =    0.0042
#
# . scalar Fsw12 = r(F)*(r(df))/(r(df) - 1)
#
# . di Fsw12
# 6.6942504
#
# .
# . ivregress 2sls exper (educ = age kidslt6 kidsge6) if !missing(lwage,educ,exper,age,kidslt6,kidsge6)
#
# Instrumental variables (2SLS) regression          Number of obs   =        428
# Wald chi2(1)    =       0.11
# Prob > chi2     =     0.7392
# R-squared       =          .
# Root MSE        =     8.0694
#
# ------------------------------------------------------------------------------
#   exper |      Coef.   Std. Err.      z    P>|z|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   educ |  -.3250531   .9762929    -0.33   0.739    -2.238552    1.588446
# _cons |   17.15219   12.36493     1.39   0.165    -7.082621      41.387
# ------------------------------------------------------------------------------
#   Instrumented:  educ
# Instruments:   age kidslt6 kidsge6
#
# . cap noi drop res21
# variable res21 not found
#
# . predict res21 if !missing(lwage,educ,exper,age,kidslt6,kidsge6), r
# (325 missing values generated)
#
# . reg res21 age kidslt6 kidsge6 if !missing(lwage,educ,exper,age,kidslt6,kidsge6)
#
# Source |       SS           df       MS      Number of obs   =       428
# -------------+----------------------------------   F(3, 424)       =     54.54
# Model |  7760.24452         3  2586.74817   Prob > F        =    0.0000
# Residual |  20109.0836       424  47.4270839   R-squared       =    0.2785
# -------------+----------------------------------   Adj R-squared   =    0.2733
# Total |  27869.3281       427  65.2677473   Root MSE        =    6.8867
#
# ------------------------------------------------------------------------------
#   res21 |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
# -------------+----------------------------------------------------------------
#   age |   .3888485   .0498497     7.80   0.000     .2908652    .4868318
# kidslt6 |  -.5199149   .9048498    -0.57   0.566    -2.298465    1.258635
# kidsge6 |  -1.502734   .2763647    -5.44   0.000     -2.04595   -.9595189
# _cons |  -14.21846   2.331468    -6.10   0.000    -18.80113   -9.635782
# ------------------------------------------------------------------------------
#
#   . test age kidslt6 kidsge6
#
# ( 1)  age = 0
# ( 2)  kidslt6 = 0
# ( 3)  kidsge6 = 0
#
# F(  3,   424) =   54.54
# Prob > F =    0.0000
#
# . scalar Fsw21 = r(F)*(r(df))/(r(df) - 1)
#
# . di Fsw21
# 81.812373