R Under development (unstable) (2024-01-12 r85803 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> ####
> #### William Ryan
> #### 12 September, 2021
> #### Test script for complex robust linear fit (rlm).
> #### Modified from rlm in MASS.
> #### Test script based on Wuber's answer on stackexchange https://stats.stackexchange.com/questions/66088/analysis-with-complex-data-anything-different#
> ####
> 
> 
> library(complexlm)
Loading required package: MASS

Attaching package: 'complexlm'

The following objects are masked from 'package:MASS':

    psi.bisquare, psi.hampel, psi.huber, rlm

The following objects are masked from 'package:stats':

    cor, cov, lm, lm.fit, lm.wfit, mad, mahalanobis, var

The following object is masked from 'package:base':

    range

> library(MASS, include.only = "mvrnorm")
> library(ggplot2)
> # Synthesize data.
> # (1) the independent variable `w`.
> #
> w.max <- 6 # Max extent of the independent values
> w <- expand.grid(seq(-w.max,w.max, 3), seq(-w.max,w.max, 3))
> w <- complex(real=w[[1]], imaginary=w[[2]])
> w <- w[Mod(w) <= w.max] ### This drops any element of w that has a modulus greater than w.max
> n <- length(w)
> #
> # (2) the dependent variable `z`.
> #
> beta <- c(-20+5i, complex(argument=2*pi/3, modulus=3/2)) ### The fist is the intercept, the 2nd the slope.
> print(beta)
[1] -20.00+5.000000i  -0.75+1.299038i
> sigma <- 1.5; rho <- 0.7 # Parameters of the error distribution
> set.seed(17)
> e <- mvrnorm(n, c(0,0), matrix(c(1,rho,rho,1)*sigma^2, 2)) ### A bunch of random numbers to add errors to the example data.
> e <- complex(real=e[,1], imaginary=e[,2]) ### Make those random numbers complex.
> z <- as.vector((desX <- cbind(rep(1,n), w)) %*% beta + e) ### The design matrix desX is defined in this line.
> z.pure <- as.vector((desX <- cbind(rep(1,n), w)) %*% beta) # z data without the noise.
> # 
> # Add m outliers to z. Draw the outliers from a 
> #
> z.clean <- z # Preserve outlier free z.
> m <- 3
> outlier.pos <- sample(1:n, m) # Positions of outliers in z.
> outliers <- mvrnorm(m, c(5.8, -7.32),  matrix(c(1,rho,rho,1)*sigma^2, 2)) # The real and imaginary values of the outliers.
> outliers <- complex(real=outliers[,1], imaginary=outliers[,2]) # Make the outliers comlex numbers.
> z <- replace(z, outlier.pos, outliers)
> #
> # Collect everything into a dataframe.
> #
> fitframe <- data.frame(w, z.pure, z.clean, z)
> #
> # Whuber's ordinary complex linear fit.
> #
> print(beta.hat <- solve(Conj(t(desX)) %*% desX, Conj(t(desX)) %*% z), digits=4)
                 [,1]
[1,] -13.6046+2.0829i
[2,]  -0.1656+0.7124i
> z.whuber <- beta.hat[2] * w + beta.hat[1]
> fitframe$z.whuber <- z.whuber
> fitframe$res.whuber <- as.vector(z - desX %*% beta.hat)
> #
> # Robust complex linear fit.
> #
> print(mytestfit <- rlm(x = w, y = z, interc = TRUE)) # Uses default psi=psi.huber, the Huber objective function.
Call:
rlm(x = w, y = z, interc = TRUE)
Converged in 18 iterations

Coefficients:
          (intercept)                     w 
-18.9125204+5.294413i  -0.5504508+1.098552i

Degrees of freedom: 13 total; 11 residual
Scale estimate: 2.26 
> rbeta.hat <- mytestfit$coefficients
> fitframe$z.robust <- mytestfit$coefficients[2] * w + mytestfit$coefficients[1]
> fitframe$res.robust <- mytestfit$residuals
> # Robust complex linear fit, with Hampel objective function.
> #
> print(mytestfitHam <- rlm(x = w, y = z, psi = psi.hampel, interc = TRUE)) # Uses psi=psi.hampel, the Hampel objective function.
Call:
rlm(x = w, y = z, psi = psi.hampel, interc = TRUE)
Converged in 9 iterations

Coefficients:
           (intercept)                      w 
-13.6739082+2.1406753i  -0.1870532+0.7302799i

Degrees of freedom: 13 total; 11 residual
Scale estimate: 13.1 
> rHambeta.hat <- mytestfitHam$coefficients
> fitframe$z.robustHam <- mytestfitHam$coefficients[2] * w + mytestfitHam$coefficients[1]
> fitframe$res.robustHam <- mytestfitHam$residuals
> # Robust complex linear fit, with Tukey's bisquare objective function.
> #
> print(mytestfitBi <- rlm(x = w, y = z, psi = psi.bisquare, interc = TRUE)) # Uses psi=psi.bisquare, Tukey's bisquare objective function.
Call:
rlm(x = w, y = z, psi = psi.bisquare, interc = TRUE)
Converged in 6 iterations

Coefficients:
          (intercept)                     w 
-19.7175660+5.751964i  -0.5898994+1.139501i

Degrees of freedom: 13 total; 11 residual
Scale estimate: 2.58 
> rBibeta.hat <- mytestfitBi$coefficients
> fitframe$z.robustBi <- mytestfitBi$coefficients[2] * w + mytestfitBi$coefficients[1]
> fitframe$res.robustBi <- mytestfitBi$residuals
> 
> library(reshape2)
> meltedfitframe <- melt(fitframe, id = "w")
> meltedfitframe$variable <- factor(meltedfitframe$variable, ordered = TRUE)
> 
> betterplot2 <- ggplot(meltedfitframe[meltedfitframe$variable != "res.whuber" & meltedfitframe$variable != "res.robust" & meltedfitframe$variable != "res.robustMM" & meltedfitframe$variable != "z.clean",], aes(x = Re(value), y = Im(value), color = as.factor(variable), shape = as.factor(variable))) +
+   geom_point(size = 3) +
+   scale_shape_manual(values = c('z.pure'=19, 'z.clean'=0, 'z'=20, 'z.whuber'=18, 'z.robust'=18), labels = c('z.pure'='pure relationsip', 'z.clean'='random noise added', 'z'='noise and outliers', 'z.whuber'='from ordinary fit', 'z.robust'='from robust fit')) +
+   scale_color_manual(values = c('z.pure'="cyan3", 'z.clean'='forestgreen', 'z'='black', 'z.whuber'='red', 'z.robust'='blue'), labels = c('z.pure'='pure relationsip', 'z.clean'='random noise added', 'z'='noise and outliers', 'z.whuber'='from ordinary fit', 'z.robust'='from robust fit')) +
+   geom_line(data = meltedfitframe[meltedfitframe$variable == "z" | meltedfitframe$variable == "z.whuber",], aes(group = w, lty = "ordinary lm"), size = 0.4, color = "lightcoral") +
+   geom_line(data = meltedfitframe[meltedfitframe$variable == "z" | meltedfitframe$variable == "z.robust",], aes(group = w, lty = "robust lm"), size = 0.4, color = "royalblue") +
+   scale_linetype_manual("Residuals", values = c('dotted', 'dotted'), guide=guide_legend(override.aes = list(size = c(0.5, 0.5), color = c("lightcoral", 'royalblue')))) +
+   labs(y = "Imaginary", x = "Real", shape = "z values", color = "z values", title = "Generated and Fit Values of z") 
Warning message:
Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead. 
> betterplot2 # Need to decide if I like cyan2 or cyan3 better.
Don't know how to automatically pick scale for object of type <complex>.
Defaulting to continuous.
Don't know how to automatically pick scale for object of type <complex>.
Defaulting to continuous.
Warning message:
Removed 52 rows containing missing values (`geom_point()`). 
> 
> #### Now plot the residuals.
> residframe <- data.frame(w = w, pure.whub = fitframe$z.pure - fitframe$z.whuber, pure.rob = fitframe$z.pure - fitframe$z.robust,
+                          clean.whub = fitframe$z.clean - fitframe$z.whuber, clean.rob = fitframe$z.clean - fitframe$z.robust,
+                          zz.whuber = fitframe$res.whuber, zz.robust = fitframe$res.robust)
> meltresidframe <- melt(residframe, id = "w")
> assign_label <- function(varr) {
+   if (varr == 'pure.whub') return('z.pure - z.olm (Ordinary residuals from pure relationsip.)')
+   if (varr == 'pure.rob') return('z.pure - z.rlm (Robust residuals from pure relationship.)')
+   if (varr == 'clean.whub') return('z.clean - z.olm (Ordinary residuals from noisy data.)')
+   if (varr == 'clean.rob') return('z.clean - z.rlm (Robust residuals from noisy data.)')
+   if (varr == 'zz.whuber') return('z - z.olm (Ordinary residuals from noisy data with outliers.)')
+   if (varr == 'zz.robust') return('z - z.rlm (Robust residuals from noisy data with outliers.)')
+ }
> meltresidframe$label <- as.factor(unlist(lapply(meltresidframe$variable, assign_label))) ### This gives us more readable labels for the facets of the next plot.
> 
> residplot <- ggplot(meltresidframe, aes(x = Re(value), y = Im(value), color = as.factor(variable))) + 
+   geom_segment(aes(x = 0, y = 0, xend = Re(value), yend = Im(value)), arrow = arrow(length = unit(0.03, "npc")), size = .4) +
+   facet_wrap(vars(label), nrow = 3, ncol = 2) +
+   labs(y = "imaginary", x = "real", title = "Complex Residuals") +
+   theme(legend.position = "none") +
+   coord_fixed()
> residplot
> 
> absresidplot <- ggplot(meltresidframe, aes(y = Mod(value)^2, x = label, color = Arg(value))) +
+   geom_jitter(width = 0.1) + 
+   scale_colour_gradientn(colours=rainbow(6, start = 0, end = 1, s = 1, v = 0.8)) +
+   labs(y = "Modulus Squared", x = "Residual", color = 'Argument', title = "Squared Modulus of Residuals")
> absresidplot
> 
> ## A bunch of plots showing the transformation from w to z
> ggplot(fitframe, aes(x = Re(w), y = Im(w))) +
+   geom_point(size = 3) +
+   labs(y = "Imaginary", x = "Real", title = "Independant Variable (Current)") +
+   theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) +
+   coord_fixed()
> ggplot(fitframe, aes(x = Re(z.pure), y = Im(z.pure))) +
+   geom_point(size = 3, shape = 19, color = 'cyan2') +
+   labs(y = "Imaginary", x = "Real", title = "Dependant Variable (Voltage)") +
+   theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) +
+   coord_fixed() +
+   ylim(-15,15) + xlim(-28, 2)
> ggplot(fitframe, aes(x = Re(z.clean), y = Im(z.clean))) +
+   geom_point(size = 3, shape = 0, color = "forestgreen") +
+   labs(y = "Imaginary", x = "Real", title = "Noisy Dependant Variable (Voltage)") +
+   theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) +
+   coord_fixed() +
+   ylim(-15,15) + xlim(-28, 2)
> ggplot(fitframe, aes(x = Re(z), y = Im(z))) +
+   geom_point(size = 3, shape = 20, color = "black") +
+   labs(y = "Imaginary", x = "Real", title = "Noisy Dependant Variable with Outliers (Voltage)") +
+   theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) +
+   coord_fixed() +
+   ylim(-19,19) + xlim(-28, 8)
> 
> ####
> #### A test to compare the performance of zmodel.matrix to model.matrix
> ####
> #library(profvis)
> set.seed(4242)
> slop <- 4.23
> interc <- 1.4
> Xt <- -20:20
> tframe <- data.frame(Xt=Xt, Yt= Xt * slop + interc + rnorm(length(Xt)))
> testerms <- terms(Yt ~ Xt)
> zmodel.matrix(testerms, tframe)
   (intercept)  Xt
1            1 -20
2            1 -19
3            1 -18
4            1 -17
5            1 -16
6            1 -15
7            1 -14
8            1 -13
9            1 -12
10           1 -11
11           1 -10
12           1  -9
13           1  -8
14           1  -7
15           1  -6
16           1  -5
17           1  -4
18           1  -3
19           1  -2
20           1  -1
21           1   0
22           1   1
23           1   2
24           1   3
25           1   4
26           1   5
27           1   6
28           1   7
29           1   8
30           1   9
31           1  10
32           1  11
33           1  12
34           1  13
35           1  14
36           1  15
37           1  16
38           1  17
39           1  18
40           1  19
41           1  20
attr(,"assign")
[1] 0 1
> model.matrix(testerms, tframe)
   (Intercept)  Xt
1            1 -20
2            1 -19
3            1 -18
4            1 -17
5            1 -16
6            1 -15
7            1 -14
8            1 -13
9            1 -12
10           1 -11
11           1 -10
12           1  -9
13           1  -8
14           1  -7
15           1  -6
16           1  -5
17           1  -4
18           1  -3
19           1  -2
20           1  -1
21           1   0
22           1   1
23           1   2
24           1   3
25           1   4
26           1   5
27           1   6
28           1   7
29           1   8
30           1   9
31           1  10
32           1  11
33           1  12
34           1  13
35           1  14
36           1  15
37           1  16
38           1  17
39           1  18
40           1  19
41           1  20
attr(,"assign")
[1] 0 1
> 
> proc.time()
   user  system elapsed 
   2.59    0.21    2.79