R Under development (unstable) (2023-11-17 r85550 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2023 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(rpart)
> library(rpart.LAD)
Loading required package: Rcpp
> 
> 
> # test 1
> mystate <- data.frame(state.x77, region = state.region)
> names(mystate) <- casefold(names(mystate)) #remove mixed case
> 
> # Rsqured fit
> fit0 <- rpart(murder ~ ., data = mystate, minsplit = 10)
> print(summary(fit0))
Call:
rpart(formula = murder ~ ., data = mystate, minsplit = 10)
  n= 50 

          CP nsplit rel error    xerror       xstd
1 0.53620316      0 1.0000000 1.0121859 0.13359163
2 0.13626024      1 0.4637968 0.5941589 0.11843937
3 0.08363538      2 0.3275366 0.4951365 0.09253520
4 0.06040376      3 0.2439012 0.5354895 0.09767669
5 0.02249711      4 0.1834975 0.5310509 0.09993223
6 0.01350310      5 0.1610004 0.4566351 0.08152396
7 0.01000000      6 0.1474972 0.4672700 0.08369011

Variable importance
  life.exp    hs.grad illiteracy      frost     region       area population 
        31         18         14         13         10          8          4 
    income 
         2 

Node number 1: 50 observations,    complexity param=0.5362032
  mean=7.378, MSE=13.35492 
  left son=2 (21 obs) right son=3 (29 obs)
  Primary splits:
      life.exp   < 70.915  to the right, improve=0.5362032, (0 missing)
      illiteracy < 1.35    to the left,  improve=0.4486033, (0 missing)
      region     splits as  LRLL,        improve=0.3615610, (0 missing)
      frost      < 97.5    to the right, improve=0.3361042, (0 missing)
      hs.grad    < 44.3    to the right, improve=0.3305300, (0 missing)
  Surrogate splits:
      hs.grad    < 55.6    to the right, agree=0.78, adj=0.476, (0 split)
      illiteracy < 0.75    to the left,  agree=0.74, adj=0.381, (0 split)
      frost      < 125.5   to the right, agree=0.70, adj=0.286, (0 split)
      area       < 9579    to the left,  agree=0.70, adj=0.286, (0 split)
      region     splits as  LRLL,        agree=0.70, adj=0.286, (0 split)

Node number 2: 21 observations,    complexity param=0.06040376
  mean=4.233333, MSE=4.16127 
  left son=4 (13 obs) right son=5 (8 obs)
  Primary splits:
      region   splits as  LRLR,        improve=0.4615619, (0 missing)
      hs.grad  < 60.95   to the left,  improve=0.3590784, (0 missing)
      area     < 82386.5 to the left,  improve=0.3302317, (0 missing)
      frost    < 92.5    to the right, improve=0.3145708, (0 missing)
      life.exp < 71.715  to the right, improve=0.2730069, (0 missing)
  Surrogate splits:
      hs.grad  < 59.4    to the left,  agree=0.905, adj=0.750, (0 split)
      frost    < 92.5    to the right, agree=0.857, adj=0.625, (0 split)
      area     < 81941.5 to the left,  agree=0.857, adj=0.625, (0 split)
      income   < 4143    to the right, agree=0.714, adj=0.250, (0 split)
      life.exp < 72.84   to the left,  agree=0.667, adj=0.125, (0 split)

Node number 3: 29 observations,    complexity param=0.1362602
  mean=9.655172, MSE=7.665922 
  left son=6 (21 obs) right son=7 (8 obs)
  Primary splits:
      life.exp   < 69.395  to the right, improve=0.4092776, (0 missing)
      frost      < 97.5    to the right, improve=0.3890938, (0 missing)
      illiteracy < 1.45    to the left,  improve=0.3789950, (0 missing)
      hs.grad    < 47.6    to the right, improve=0.3129022, (0 missing)
      population < 2276.5  to the left,  improve=0.2388471, (0 missing)
  Surrogate splits:
      illiteracy < 1.95    to the left,  agree=0.828, adj=0.375, (0 split)
      hs.grad    < 41.45   to the right, agree=0.828, adj=0.375, (0 split)
      income     < 3573    to the right, agree=0.759, adj=0.125, (0 split)
      frost      < 62.5    to the right, agree=0.759, adj=0.125, (0 split)

Node number 4: 13 observations,    complexity param=0.0135031
  mean=3.146154, MSE=1.450178 
  left son=8 (10 obs) right son=9 (3 obs)
  Primary splits:
      life.exp   < 71.735  to the right, improve=0.47827780, (0 missing)
      frost      < 121     to the right, improve=0.29156330, (0 missing)
      area       < 31865.5 to the right, improve=0.25268720, (0 missing)
      region     splits as  R-L-,        improve=0.25268720, (0 missing)
      population < 4255    to the left,  improve=0.09768783, (0 missing)
  Surrogate splits:
      income < 4374.5  to the right, agree=0.846, adj=0.333, (0 split)

Node number 5: 8 observations
  mean=6, MSE=3.525 

Node number 6: 21 observations,    complexity param=0.08363538
  mean=8.561905, MSE=5.56712 
  left son=12 (4 obs) right son=13 (17 obs)
  Primary splits:
      population < 1101    to the left,  improve=0.4776956, (0 missing)
      hs.grad    < 52.85   to the right, improve=0.4254729, (0 missing)
      illiteracy < 0.75    to the left,  improve=0.4158945, (0 missing)
      frost      < 141     to the right, improve=0.4087926, (0 missing)
      area       < 37873.5 to the left,  improve=0.3026274, (0 missing)
  Surrogate splits:
      frost      < 141     to the right, agree=0.952, adj=0.75, (0 split)
      illiteracy < 0.65    to the left,  agree=0.905, adj=0.50, (0 split)
      hs.grad    < 53.9    to the right, agree=0.905, adj=0.50, (0 split)

Node number 7: 8 observations
  mean=12.525, MSE=1.801875 

Node number 8: 10 observations
  mean=2.69, MSE=0.6989 

Node number 9: 3 observations
  mean=4.666667, MSE=0.9488889 

Node number 12: 4 observations
  mean=5.2, MSE=2.545 

Node number 13: 17 observations,    complexity param=0.02249711
  mean=9.352941, MSE=2.99308 
  left son=26 (9 obs) right son=27 (8 obs)
  Primary splits:
      frost      < 97.5    to the right, improve=0.2952370, (0 missing)
      area       < 46398.5 to the left,  improve=0.2913309, (0 missing)
      hs.grad    < 52.85   to the right, improve=0.1881725, (0 missing)
      illiteracy < 1.15    to the left,  improve=0.1534758, (0 missing)
      income     < 4726    to the left,  improve=0.1248603, (0 missing)
  Surrogate splits:
      illiteracy < 1.15    to the left,  agree=0.882, adj=0.75, (0 split)
      hs.grad    < 48.3    to the right, agree=0.765, adj=0.50, (0 split)
      region     splits as  LRLL,        agree=0.765, adj=0.50, (0 split)
      population < 12048.5 to the left,  agree=0.647, adj=0.25, (0 split)
      income     < 4221    to the right, agree=0.647, adj=0.25, (0 split)

Node number 26: 9 observations
  mean=8.466667, MSE=2.693333 

Node number 27: 8 observations
  mean=10.35, MSE=1.4525 

n= 50 

node), split, n, deviance, yval
      * denotes terminal node

 1) root 50 667.745800  7.378000  
   2) life.exp>=70.915 21  87.386670  4.233333  
     4) region=Northeast,North Central 13  18.852310  3.146154  
       8) life.exp>=71.735 10   6.989000  2.690000 *
       9) life.exp< 71.735 3   2.846667  4.666667 *
     5) region=South,West 8  28.200000  6.000000 *
   3) life.exp< 70.915 29 222.311700  9.655172  
     6) life.exp>=69.395 21 116.909500  8.561905  
      12) population< 1101 4  10.180000  5.200000 *
      13) population>=1101 17  50.882350  9.352941  
        26) frost>=97.5 9  24.240000  8.466667 *
        27) frost< 97.5 8  11.620000 10.350000 *
     7) life.exp< 69.395 8  14.415000 12.525000 *
> plot(fit0); text(fit0)
> xerror0 <- colMeans((xpred.rpart(fit0) - mystate$murder)^2)
> print(xerror0)
0.76810158 0.27030200 0.10675287 0.07107666 0.03686340 0.01742931 0.01162029 
 13.911413   8.230116   6.673434   7.303747   7.125458   6.524766   6.413669 
> 
> #LAD fit
> fit1 <- rpart(murder ~ ., data = mystate, method = LAD, minsplit = 10)
> print(summary(fit1))
Call:
rpart(formula = murder ~ ., data = mystate, method = LAD, minsplit = 10)
  n= 50 

          CP nsplit rel error
1 0.38723942      0 1.0000000
2 0.09286166      1 0.6127606
3 0.05306380      2 0.5198989
4 0.04548326      3 0.4668351
5 0.03916614      4 0.4213519
6 0.03663929      5 0.3821857
7 0.01516109      6 0.3455464
8 0.01000000      7 0.3303853

Variable importance
  life.exp illiteracy      frost    hs.grad     income     region       area 
        24         15         14         14         12         10          9 
population 
         4 

Node number 1: 50 observations,    complexity param=0.3872394
  median= 6.8, LAD=3.1660000 
  left son=2 (21 obs) right son=3 (29 obs)
  Primary splits:
      life.exp   < 70.915  to the right, improve=1.212, (0 missing)
      illiteracy < 1.35    to the left,  improve=1.008, (0 missing)
      hs.grad    < 52.85   to the right, improve=0.820, (0 missing)
      frost      < 97.5    to the right, improve=0.788, (0 missing)
      region     splits as  LRLL,        improve=0.684, (0 missing)
  Surrogate splits:
      hs.grad    < 55.6    to the right, agree=0.78, adj=0.476, (0 split)
      illiteracy < 0.75    to the left,  agree=0.74, adj=0.381, (0 split)
      frost      < 125.5   to the right, agree=0.70, adj=0.286, (0 split)
      area       < 9579    to the left,  agree=0.70, adj=0.286, (0 split)
      region     splits as  LRLL,        agree=0.70, adj=0.286, (0 split)

Node number 2: 21 observations,    complexity param=0.0530638
  median= 4.2, LAD=1.5857140 
  left son=4 (13 obs) right son=5 (8 obs)
  Primary splits:
      hs.grad  < 59.4    to the left,  improve=0.4000000, (0 missing)
      region   splits as  LRLR,        improve=0.3714286, (0 missing)
      frost    < 126.5   to the right, improve=0.3476190, (0 missing)
      area     < 80538   to the left,  improve=0.2809524, (0 missing)
      life.exp < 71.885  to the right, improve=0.2666667, (0 missing)
  Surrogate splits:
      area     < 80538   to the left,  agree=0.905, adj=0.750, (0 split)
      frost    < 63      to the right, agree=0.810, adj=0.500, (0 split)
      income   < 4644    to the left,  agree=0.667, adj=0.125, (0 split)
      life.exp < 72.84   to the left,  agree=0.667, adj=0.125, (0 split)

Node number 3: 29 observations,    complexity param=0.09286166
  median=10.3, LAD=2.1965520 
  left son=6 (12 obs) right son=7 (17 obs)
  Primary splits:
      illiteracy < 1.15    to the left,  improve=0.5068966, (0 missing)
      hs.grad    < 52.85   to the right, improve=0.4827586, (0 missing)
      frost      < 97.5    to the right, improve=0.4724138, (0 missing)
      life.exp   < 69.395  to the right, improve=0.4413793, (0 missing)
      area       < 37873.5 to the left,  improve=0.3793103, (0 missing)
  Surrogate splits:
      frost      < 100.5   to the right, agree=0.931, adj=0.833, (0 split)
      hs.grad    < 48.3    to the right, agree=0.828, adj=0.583, (0 split)
      income     < 4221    to the right, agree=0.793, adj=0.500, (0 split)
      region     splits as  LRLR,        agree=0.793, adj=0.500, (0 split)
      population < 1101    to the left,  agree=0.724, adj=0.333, (0 split)

Node number 4: 13 observations,    complexity param=0.03663929
  median= 3.0, LAD=1.0615380 
  left son=8 (9 obs) right son=9 (4 obs)
  Primary splits:
      life.exp   < 71.735  to the right, improve=0.3923077, (0 missing)
      frost      < 121     to the right, improve=0.2307692, (0 missing)
      income     < 4224    to the right, improve=0.1923077, (0 missing)
      area       < 55202.5 to the right, improve=0.1615385, (0 missing)
      population < 746.5   to the left,  improve=0.1000000, (0 missing)
  Surrogate splits:
      income < 4075    to the right, agree=0.846, adj=0.50, (0 split)
      frost  < 121     to the right, agree=0.769, adj=0.25, (0 split)
      region splits as  LRL-,        agree=0.769, adj=0.25, (0 split)

Node number 5: 8 observations
  median= 4.5, LAD=1.3875000 

Node number 6: 12 observations,    complexity param=0.03916614
  median= 7.1, LAD=2.0083330 
  left son=12 (7 obs) right son=13 (5 obs)
  Primary splits:
      income     < 4658.5  to the left,  improve=0.5166667, (0 missing)
      hs.grad    < 52.85   to the right, improve=0.4333333, (0 missing)
      area       < 50357   to the left,  improve=0.3666667, (0 missing)
      population < 2590    to the left,  improve=0.2750000, (0 missing)
      life.exp   < 70.18   to the right, improve=0.2666667, (0 missing)
  Surrogate splits:
      life.exp   < 70.255  to the right, agree=0.917, adj=0.8, (0 split)
      illiteracy < 0.85    to the left,  agree=0.833, adj=0.6, (0 split)
      frost      < 105.5   to the right, agree=0.750, adj=0.4, (0 split)
      area       < 20405.5 to the right, agree=0.750, adj=0.4, (0 split)
      region     splits as  LRLL,        agree=0.750, adj=0.4, (0 split)

Node number 7: 17 observations,    complexity param=0.04548326
  median=11.0, LAD=1.4647060 
  left son=14 (12 obs) right son=15 (5 obs)
  Primary splits:
      life.exp   < 69.13   to the right, improve=0.42352940, (0 missing)
      illiteracy < 1.95    to the left,  improve=0.32941180, (0 missing)
      frost      < 62.5    to the right, improve=0.24117650, (0 missing)
      population < 2276.5  to the left,  improve=0.19411760, (0 missing)
      hs.grad    < 47.6    to the right, improve=0.09411765, (0 missing)
  Surrogate splits:
      illiteracy < 1.95    to the left,  agree=0.882, adj=0.6, (0 split)
      income     < 3573    to the right, agree=0.765, adj=0.2, (0 split)
      hs.grad    < 41.45   to the right, agree=0.765, adj=0.2, (0 split)
      frost      < 62.5    to the right, agree=0.765, adj=0.2, (0 split)

Node number 8: 9 observations
  median= 2.4, LAD=0.5111111 

Node number 9: 4 observations
  median= 5.2, LAD=0.8500000 

Node number 12: 7 observations
  median= 6.9, LAD=1.4285710 

Node number 13: 5 observations
  median=10.3, LAD=1.5800000 

Node number 14: 12 observations,    complexity param=0.01516109
  median=10.6, LAD=1.0666670 
  left son=28 (5 obs) right son=29 (7 obs)
  Primary splits:
      population < 2799.5  to the left,  improve=0.20000000, (0 missing)
      income     < 3664.5  to the left,  improve=0.19166670, (0 missing)
      area       < 40554   to the left,  improve=0.13333330, (0 missing)
      frost      < 83.5    to the right, improve=0.11666670, (0 missing)
      hs.grad    < 53.95   to the right, improve=0.08333333, (0 missing)
  Surrogate splits:
      income  < 3664.5  to the left,  agree=0.833, adj=0.6, (0 split)
      hs.grad < 53.95   to the right, agree=0.833, adj=0.6, (0 split)
      frost   < 97.5    to the right, agree=0.833, adj=0.6, (0 split)
      region  splits as  RR-L,        agree=0.833, adj=0.6, (0 split)
      area    < 50371.5 to the right, agree=0.750, adj=0.4, (0 split)

Node number 15: 5 observations
  median=13.2, LAD=0.9800000 

Node number 28: 5 observations
  median= 9.7, LAD=1.3800000 

Node number 29: 7 observations
  median=10.9, LAD=0.5000000 

n= 50 

node), split, n, deviance, yval
      * denotes terminal node

 1) root 50 158.3  6.8  
   2) life.exp>=70.915 21  33.3  4.2  
     4) hs.grad< 59.4 13  13.8  3.0  
       8) life.exp>=71.735 9   4.6  2.4 *
       9) life.exp< 71.735 4   3.4  5.2 *
     5) hs.grad>=59.4 8  11.1  4.5 *
   3) life.exp< 70.915 29  63.7 10.3  
     6) illiteracy< 1.15 12  24.1  7.1  
      12) income< 4658.5 7  10.0  6.9 *
      13) income>=4658.5 5   7.9 10.3 *
     7) illiteracy>=1.15 17  24.9 11.0  
      14) life.exp>=69.13 12  12.8 10.6  
        28) population< 2799.5 5   6.9  9.7 *
        29) population>=2799.5 7   3.5 10.9 *
      15) life.exp< 69.13 5   4.9 13.2 *
> plot(fit1); text(fit1)
> xerror1 <- colMeans((xpred.rpart(fit1) - mystate$murder)^2)
> print(xerror1)
0.69361971 0.18963041 0.07019681 0.04912754 0.04220668 0.03788165 0.02356887 
   14.0982    10.7026    10.2886     9.5452     9.9272     9.9272     9.9336 
0.01231304 
    8.4284 
> 
> xerror1b <- colMeans
> 
> 
> # test 2: example from book
> #library(MASS)
> #fit3 <- rpart(medv ~ ., data = Boston, method = LAD)
> #plot(fit3)
> #text(fit3)
> # unfortunately, it does not reproduce the plot entirely... reasons?
> 
> proc.time()
   user  system elapsed 
   0.43    0.15    0.56