R Under development (unstable) (2024-01-12 r85803 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > #### > #### William Ryan > #### 12 September, 2021 > #### Test script for complex robust linear fit (rlm). > #### Modified from rlm in MASS. > #### Test script based on Wuber's answer on stackexchange https://stats.stackexchange.com/questions/66088/analysis-with-complex-data-anything-different# > #### > > > library(complexlm) Loading required package: MASS Attaching package: 'complexlm' The following objects are masked from 'package:MASS': psi.bisquare, psi.hampel, psi.huber, rlm The following objects are masked from 'package:stats': cor, cov, lm, lm.fit, lm.wfit, mad, mahalanobis, var The following object is masked from 'package:base': range > library(MASS, include.only = "mvrnorm") > library(ggplot2) > # Synthesize data. > # (1) the independent variable `w`. > # > w.max <- 6 # Max extent of the independent values > w <- expand.grid(seq(-w.max,w.max, 3), seq(-w.max,w.max, 3)) > w <- complex(real=w[[1]], imaginary=w[[2]]) > w <- w[Mod(w) <= w.max] ### This drops any element of w that has a modulus greater than w.max > n <- length(w) > # > # (2) the dependent variable `z`. > # > beta <- c(-20+5i, complex(argument=2*pi/3, modulus=3/2)) ### The fist is the intercept, the 2nd the slope. > print(beta) [1] -20.00+5.000000i -0.75+1.299038i > sigma <- 1.5; rho <- 0.7 # Parameters of the error distribution > set.seed(17) > e <- mvrnorm(n, c(0,0), matrix(c(1,rho,rho,1)*sigma^2, 2)) ### A bunch of random numbers to add errors to the example data. > e <- complex(real=e[,1], imaginary=e[,2]) ### Make those random numbers complex. > z <- as.vector((desX <- cbind(rep(1,n), w)) %*% beta + e) ### The design matrix desX is defined in this line. > z.pure <- as.vector((desX <- cbind(rep(1,n), w)) %*% beta) # z data without the noise. > # > # Add m outliers to z. Draw the outliers from a > # > z.clean <- z # Preserve outlier free z. > m <- 3 > outlier.pos <- sample(1:n, m) # Positions of outliers in z. > outliers <- mvrnorm(m, c(5.8, -7.32), matrix(c(1,rho,rho,1)*sigma^2, 2)) # The real and imaginary values of the outliers. > outliers <- complex(real=outliers[,1], imaginary=outliers[,2]) # Make the outliers comlex numbers. > z <- replace(z, outlier.pos, outliers) > # > # Collect everything into a dataframe. > # > fitframe <- data.frame(w, z.pure, z.clean, z) > # > # Whuber's ordinary complex linear fit. > # > print(beta.hat <- solve(Conj(t(desX)) %*% desX, Conj(t(desX)) %*% z), digits=4) [,1] [1,] -13.6046+2.0829i [2,] -0.1656+0.7124i > z.whuber <- beta.hat[2] * w + beta.hat[1] > fitframe$z.whuber <- z.whuber > fitframe$res.whuber <- as.vector(z - desX %*% beta.hat) > # > # Robust complex linear fit. > # > print(mytestfit <- rlm(x = w, y = z, interc = TRUE)) # Uses default psi=psi.huber, the Huber objective function. Call: rlm(x = w, y = z, interc = TRUE) Converged in 18 iterations Coefficients: (intercept) w -18.9125204+5.294413i -0.5504508+1.098552i Degrees of freedom: 13 total; 11 residual Scale estimate: 2.26 > rbeta.hat <- mytestfit$coefficients > fitframe$z.robust <- mytestfit$coefficients[2] * w + mytestfit$coefficients[1] > fitframe$res.robust <- mytestfit$residuals > # Robust complex linear fit, with Hampel objective function. > # > print(mytestfitHam <- rlm(x = w, y = z, psi = psi.hampel, interc = TRUE)) # Uses psi=psi.hampel, the Hampel objective function. Call: rlm(x = w, y = z, psi = psi.hampel, interc = TRUE) Converged in 9 iterations Coefficients: (intercept) w -13.6739082+2.1406753i -0.1870532+0.7302799i Degrees of freedom: 13 total; 11 residual Scale estimate: 13.1 > rHambeta.hat <- mytestfitHam$coefficients > fitframe$z.robustHam <- mytestfitHam$coefficients[2] * w + mytestfitHam$coefficients[1] > fitframe$res.robustHam <- mytestfitHam$residuals > # Robust complex linear fit, with Tukey's bisquare objective function. > # > print(mytestfitBi <- rlm(x = w, y = z, psi = psi.bisquare, interc = TRUE)) # Uses psi=psi.bisquare, Tukey's bisquare objective function. Call: rlm(x = w, y = z, psi = psi.bisquare, interc = TRUE) Converged in 6 iterations Coefficients: (intercept) w -19.7175660+5.751964i -0.5898994+1.139501i Degrees of freedom: 13 total; 11 residual Scale estimate: 2.58 > rBibeta.hat <- mytestfitBi$coefficients > fitframe$z.robustBi <- mytestfitBi$coefficients[2] * w + mytestfitBi$coefficients[1] > fitframe$res.robustBi <- mytestfitBi$residuals > > library(reshape2) > meltedfitframe <- melt(fitframe, id = "w") > meltedfitframe$variable <- factor(meltedfitframe$variable, ordered = TRUE) > > betterplot2 <- ggplot(meltedfitframe[meltedfitframe$variable != "res.whuber" & meltedfitframe$variable != "res.robust" & meltedfitframe$variable != "res.robustMM" & meltedfitframe$variable != "z.clean",], aes(x = Re(value), y = Im(value), color = as.factor(variable), shape = as.factor(variable))) + + geom_point(size = 3) + + scale_shape_manual(values = c('z.pure'=19, 'z.clean'=0, 'z'=20, 'z.whuber'=18, 'z.robust'=18), labels = c('z.pure'='pure relationsip', 'z.clean'='random noise added', 'z'='noise and outliers', 'z.whuber'='from ordinary fit', 'z.robust'='from robust fit')) + + scale_color_manual(values = c('z.pure'="cyan3", 'z.clean'='forestgreen', 'z'='black', 'z.whuber'='red', 'z.robust'='blue'), labels = c('z.pure'='pure relationsip', 'z.clean'='random noise added', 'z'='noise and outliers', 'z.whuber'='from ordinary fit', 'z.robust'='from robust fit')) + + geom_line(data = meltedfitframe[meltedfitframe$variable == "z" | meltedfitframe$variable == "z.whuber",], aes(group = w, lty = "ordinary lm"), size = 0.4, color = "lightcoral") + + geom_line(data = meltedfitframe[meltedfitframe$variable == "z" | meltedfitframe$variable == "z.robust",], aes(group = w, lty = "robust lm"), size = 0.4, color = "royalblue") + + scale_linetype_manual("Residuals", values = c('dotted', 'dotted'), guide=guide_legend(override.aes = list(size = c(0.5, 0.5), color = c("lightcoral", 'royalblue')))) + + labs(y = "Imaginary", x = "Real", shape = "z values", color = "z values", title = "Generated and Fit Values of z") Warning message: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. ℹ Please use `linewidth` instead. > betterplot2 # Need to decide if I like cyan2 or cyan3 better. Don't know how to automatically pick scale for object of type . Defaulting to continuous. Don't know how to automatically pick scale for object of type . Defaulting to continuous. Warning message: Removed 52 rows containing missing values (`geom_point()`). > > #### Now plot the residuals. > residframe <- data.frame(w = w, pure.whub = fitframe$z.pure - fitframe$z.whuber, pure.rob = fitframe$z.pure - fitframe$z.robust, + clean.whub = fitframe$z.clean - fitframe$z.whuber, clean.rob = fitframe$z.clean - fitframe$z.robust, + zz.whuber = fitframe$res.whuber, zz.robust = fitframe$res.robust) > meltresidframe <- melt(residframe, id = "w") > assign_label <- function(varr) { + if (varr == 'pure.whub') return('z.pure - z.olm (Ordinary residuals from pure relationsip.)') + if (varr == 'pure.rob') return('z.pure - z.rlm (Robust residuals from pure relationship.)') + if (varr == 'clean.whub') return('z.clean - z.olm (Ordinary residuals from noisy data.)') + if (varr == 'clean.rob') return('z.clean - z.rlm (Robust residuals from noisy data.)') + if (varr == 'zz.whuber') return('z - z.olm (Ordinary residuals from noisy data with outliers.)') + if (varr == 'zz.robust') return('z - z.rlm (Robust residuals from noisy data with outliers.)') + } > meltresidframe$label <- as.factor(unlist(lapply(meltresidframe$variable, assign_label))) ### This gives us more readable labels for the facets of the next plot. > > residplot <- ggplot(meltresidframe, aes(x = Re(value), y = Im(value), color = as.factor(variable))) + + geom_segment(aes(x = 0, y = 0, xend = Re(value), yend = Im(value)), arrow = arrow(length = unit(0.03, "npc")), size = .4) + + facet_wrap(vars(label), nrow = 3, ncol = 2) + + labs(y = "imaginary", x = "real", title = "Complex Residuals") + + theme(legend.position = "none") + + coord_fixed() > residplot > > absresidplot <- ggplot(meltresidframe, aes(y = Mod(value)^2, x = label, color = Arg(value))) + + geom_jitter(width = 0.1) + + scale_colour_gradientn(colours=rainbow(6, start = 0, end = 1, s = 1, v = 0.8)) + + labs(y = "Modulus Squared", x = "Residual", color = 'Argument', title = "Squared Modulus of Residuals") > absresidplot > > ## A bunch of plots showing the transformation from w to z > ggplot(fitframe, aes(x = Re(w), y = Im(w))) + + geom_point(size = 3) + + labs(y = "Imaginary", x = "Real", title = "Independant Variable (Current)") + + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) + + coord_fixed() > ggplot(fitframe, aes(x = Re(z.pure), y = Im(z.pure))) + + geom_point(size = 3, shape = 19, color = 'cyan2') + + labs(y = "Imaginary", x = "Real", title = "Dependant Variable (Voltage)") + + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) + + coord_fixed() + + ylim(-15,15) + xlim(-28, 2) > ggplot(fitframe, aes(x = Re(z.clean), y = Im(z.clean))) + + geom_point(size = 3, shape = 0, color = "forestgreen") + + labs(y = "Imaginary", x = "Real", title = "Noisy Dependant Variable (Voltage)") + + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) + + coord_fixed() + + ylim(-15,15) + xlim(-28, 2) > ggplot(fitframe, aes(x = Re(z), y = Im(z))) + + geom_point(size = 3, shape = 20, color = "black") + + labs(y = "Imaginary", x = "Real", title = "Noisy Dependant Variable with Outliers (Voltage)") + + theme(axis.text = element_text(size = 14), axis.title = element_text(size = 16), plot.title = element_text(size = 18)) + + coord_fixed() + + ylim(-19,19) + xlim(-28, 8) > > #### > #### A test to compare the performance of zmodel.matrix to model.matrix > #### > #library(profvis) > set.seed(4242) > slop <- 4.23 > interc <- 1.4 > Xt <- -20:20 > tframe <- data.frame(Xt=Xt, Yt= Xt * slop + interc + rnorm(length(Xt))) > testerms <- terms(Yt ~ Xt) > zmodel.matrix(testerms, tframe) (intercept) Xt 1 1 -20 2 1 -19 3 1 -18 4 1 -17 5 1 -16 6 1 -15 7 1 -14 8 1 -13 9 1 -12 10 1 -11 11 1 -10 12 1 -9 13 1 -8 14 1 -7 15 1 -6 16 1 -5 17 1 -4 18 1 -3 19 1 -2 20 1 -1 21 1 0 22 1 1 23 1 2 24 1 3 25 1 4 26 1 5 27 1 6 28 1 7 29 1 8 30 1 9 31 1 10 32 1 11 33 1 12 34 1 13 35 1 14 36 1 15 37 1 16 38 1 17 39 1 18 40 1 19 41 1 20 attr(,"assign") [1] 0 1 > model.matrix(testerms, tframe) (Intercept) Xt 1 1 -20 2 1 -19 3 1 -18 4 1 -17 5 1 -16 6 1 -15 7 1 -14 8 1 -13 9 1 -12 10 1 -11 11 1 -10 12 1 -9 13 1 -8 14 1 -7 15 1 -6 16 1 -5 17 1 -4 18 1 -3 19 1 -2 20 1 -1 21 1 0 22 1 1 23 1 2 24 1 3 25 1 4 26 1 5 27 1 6 28 1 7 29 1 8 30 1 9 31 1 10 32 1 11 33 1 12 34 1 13 35 1 14 36 1 15 37 1 16 38 1 17 39 1 18 40 1 19 41 1 20 attr(,"assign") [1] 0 1 > > proc.time() user system elapsed 2.59 0.21 2.79