R Under development (unstable) (2024-10-11 r87226 ucrt) -- "Unsuffered Consequences" Copyright (C) 2024 The R Foundation for Statistical Computing Platform: x86_64-w64-mingw32/x64 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > # Model part -------------------------------------------------------------- > ## check what is loaded > # dllpath <- getLoadedDLLs() > # getDLLRegisteredRoutines(dllpath$WeightSVM[[2]]) > > ## load dataset > require('WeightSVM') Loading required package: WeightSVM > data(iris) > > ## classification mode > # default with factor response: > model1 <- wsvm(Species ~ ., weight = rep(1,150), data = iris) # same weights > model2 <- wsvm(x = iris[,1:4], y = iris[,5], weight = c(rep(0.08, 50),rep(1,100))) # less weights to setosa > # alternatively the traditional interface: > x <- subset(iris, select = -Species) > y <- iris$Species > model3 <- wsvm(x, y, weight = rep(10,150)) # similar to model 1, but larger weights for all subjects > > ## weight length, weight is not null !!! weight range!! number of zeros !! Inf > # These models provide error/warning info > # wsvm(x, y) # no weight > # wsvm(x, y, weight = rep(10,100)) # wrong lengty > # wsvm(x, y, weight = c(Inf, rep(1,149))) # contains inf weight > # wsvm(x, y, weight = rep(0,150)) # all weights are zeros > # wsvm(x, y, weight = c(rep(0,25),rep(-1,25),rep(1,100))) # drop subjects with zero or nagetive weights > > print(model1) Call: wsvm(formula = Species ~ ., weight = rep(1, 150), data = iris) Parameters: SVM-Type: C-classification SVM-Kernel: radial cost: 1 Number of Support Vectors: 51 > summary(model1) Call: wsvm(formula = Species ~ ., weight = rep(1, 150), data = iris) Parameters: SVM-Type: C-classification SVM-Kernel: radial cost: 1 Number of Support Vectors: 51 ( 8 22 21 ) Number of Classes: 3 Levels: setosa versicolor virginica > > # test with train data > pred <- predict(model1, iris[,1:4]) > # (same as:) > pred <- fitted(model1) > > # Check accuracy: > table(pred, y) # model 1, equal weights y pred setosa versicolor virginica setosa 50 0 0 versicolor 0 48 2 virginica 0 2 48 > > # compute decision values and probabilities: > pred <- predict(model1, x, decision.values = TRUE) > attr(pred, "decision.values")[1:4,] setosa/versicolor setosa/virginica versicolor/virginica 1 1.196152 1.091757 0.6708810 2 1.064621 1.056185 0.8483518 3 1.180842 1.074542 0.6439798 4 1.110699 1.053012 0.6782041 > > # visualize (classes by color, SV by crosses): > plot(cmdscale(dist(iris[,-5])), + col = as.integer(iris[,5]), + pch = c("o","+")[1:150 %in% model1$index + 1]) # model 1 > plot(cmdscale(dist(iris[,-5])), + col = as.integer(iris[,5]), + pch = c("o","+")[1:150 %in% model2$index + 1]) # In model 2, less support vectors are based on setosa > > > ## try regression mode on two dimensions > # create data > x <- seq(0.1, 5, by = 0.05) > y <- log(x) + rnorm(x, sd = 0.2) > > # estimate model and predict input values > model1 <- wsvm(x, y, weight = rep(1,99)) > model2 <- wsvm(x, y, weight = seq(99,1,length.out = 99)) # decreasing weights > > # visualize > plot(x, y) > points(x, log(x), col = 2) > points(x, fitted(model1), col = 4) > points(x, fitted(model2), col = 3) # better fit for the first few points > > ## density-estimation > # create 2-dim. normal with rho=0: > X <- data.frame(a = rnorm(1000), b = rnorm(1000)) > attach(X) > > # formula interface: > model <- wsvm(~ a + b, gamma = 0.1, weight = c(seq(5000,1,length.out = 500),1:500)) > > # test: > newdata <- data.frame(a = c(0, 4), b = c(0, 4)) > > # visualize: > plot(X, col = 1:1000 %in% model$index + 1, xlim = c(-5,5), ylim=c(-5,5)) > points(newdata, pch = "+", col = 2, cex = 5) > > ## class weights: > i2 <- iris > levels(i2$Species)[3] <- "versicolor" > summary(i2$Species) setosa versicolor 50 100 > wts <- 100 / table(i2$Species) > wts setosa versicolor 2 1 > m <- wsvm(Species ~ ., data = i2, class.weights = wts, weight=rep(1,150)) > > ## extract coefficients for linear kernel > > # a. regression > x <- 1:100 > y <- x + rnorm(100) > m <- wsvm(y ~ x, scale = FALSE, kernel = "linear", weight = rep(1,100)) > coef(m) (Intercept) x 0.4146877 0.9970113 > plot(y ~ x) > abline(m, col = "red") > > # b. classification > # transform iris data to binary problem, and scale data > setosa <- as.factor(iris$Species == "setosa") > iris2 = scale(iris[,-5]) > > # fit binary C-classification model > model1 <- wsvm(setosa ~ Petal.Width + Petal.Length, + data = iris2, kernel = "linear", weight = rep(1,150)) > model2 <- wsvm(setosa ~ Petal.Width + Petal.Length, + data = iris2, kernel = "linear", weight = c(rep(0.08, 50),rep(1,100))) # less weights to setosa > > # plot data and separating hyperplane > plot(Petal.Length ~ Petal.Width, data = iris2, col = setosa) > (cf <- coef(model1)) (Intercept) Petal.Width Petal.Length -1.658953 -1.172523 -1.358030 > abline(-cf[1]/cf[3], -cf[2]/cf[3], col = "red") > (cf2 <- coef(model2)) (Intercept) Petal.Width Petal.Length -1.5255176 -1.0815128 -0.9354167 > abline(-cf2[1]/cf2[3], -cf2[2]/cf2[3], col = "red", lty = 2) > > # plot margin and mark support vectors > abline(-(cf[1] + 1)/cf[3], -cf[2]/cf[3], col = "blue") > abline(-(cf[1] - 1)/cf[3], -cf[2]/cf[3], col = "blue") > points(model1$SV, pch = 5, cex = 2) > abline(-(cf2[1] + 1)/cf2[3], -cf2[2]/cf2[3], col = "blue", lty = 2) > abline(-(cf2[1] - 1)/cf2[3], -cf2[2]/cf2[3], col = "blue", lty = 2) > points(model2$SV, pch = 6, cex = 2) > > > # Tuning ------------------------------------------------------------------ > data(iris) > > obj <- tune_wsvm(Species~., weight = c(rep(0.8, 50),rep(1,100)), data = iris, ranges = list(gamma = 2^(-1:10), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "fix")) > > # set.seed(11) > # obj <- tune_wsvm(Species~., weight = c(rep(1, 52),rep(0,98)), data = iris, use_zero_weight = TRUE, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "bootstrap")) > > summary(obj) Parameter tuning of 'wsvm': - sampling method: fixed training/validation set - best parameters: gamma cost 0.5 4 - best performance: 0.08438819 - Detailed performance results: gamma cost error dispersion 1 0.5 4 0.08438819 NA 2 1.0 4 0.10126582 NA 3 2.0 4 0.10126582 NA 4 4.0 4 0.10126582 NA 5 8.0 4 0.18565401 NA 6 16.0 4 0.35021097 NA 7 32.0 4 0.48945148 NA 8 64.0 4 0.62025316 NA 9 128.0 4 0.62025316 NA 10 256.0 4 0.78059072 NA 11 512.0 4 0.78059072 NA 12 1024.0 4 0.78059072 NA 13 0.5 8 0.12658228 NA 14 1.0 8 0.14345992 NA 15 2.0 8 0.10126582 NA 16 4.0 8 0.10126582 NA 17 8.0 8 0.18565401 NA 18 16.0 8 0.35021097 NA 19 32.0 8 0.48945148 NA 20 64.0 8 0.62025316 NA 21 128.0 8 0.62025316 NA 22 256.0 8 0.78059072 NA 23 512.0 8 0.78059072 NA 24 1024.0 8 0.78059072 NA 25 0.5 16 0.12658228 NA 26 1.0 16 0.14345992 NA 27 2.0 16 0.10126582 NA 28 4.0 16 0.10126582 NA 29 8.0 16 0.18565401 NA 30 16.0 16 0.35021097 NA 31 32.0 16 0.48945148 NA 32 64.0 16 0.62025316 NA 33 128.0 16 0.62025316 NA 34 256.0 16 0.78059072 NA 35 512.0 16 0.78059072 NA 36 1024.0 16 0.78059072 NA > plot(obj, transform.x = log2, transform.y = log2) > plot(obj, type = "perspective", theta = 120, phi = 45) > > > best.tune_wsvm(Species~.,weight = c(rep(0.08, 50),rep(1,100)), data = iris, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "fix")) Call: best.tune_wsvm(Species ~ ., weight = c(rep(0.08, 50), rep(1, 100)), data = iris, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "fix")) Parameters: SVM-Type: C-classification SVM-Kernel: radial cost: 8 Number of Support Vectors: 47 > > > proc.time() user system elapsed 0.89 0.10 1.07