R Under development (unstable) (2024-10-10 r87224 ucrt) -- "Unsuffered Consequences"
Copyright (C) 2024 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> # Model part --------------------------------------------------------------
> ## check what is loaded
> # dllpath <- getLoadedDLLs()
> # getDLLRegisteredRoutines(dllpath$WeightSVM[[2]])
> 
> ## load dataset
> require('WeightSVM')
Loading required package: WeightSVM
> data(iris)
> 
> ## classification mode
> # default with factor response:
> model1 <- wsvm(Species ~ ., weight = rep(1,150), data = iris) # same weights
> model2 <- wsvm(x = iris[,1:4], y = iris[,5], weight = c(rep(0.08, 50),rep(1,100))) # less weights to setosa
> # alternatively the traditional interface:
> x <- subset(iris, select = -Species)
> y <- iris$Species
> model3 <- wsvm(x, y, weight = rep(10,150)) # similar to model 1, but larger weights for all subjects
> 
> ## weight length, weight is not null !!! weight range!! number of zeros !! Inf
> # These models provide error/warning info
> # wsvm(x, y) # no weight
> # wsvm(x, y, weight = rep(10,100)) # wrong lengty
> # wsvm(x, y, weight = c(Inf, rep(1,149))) # contains inf weight
> # wsvm(x, y, weight = rep(0,150)) # all weights are zeros
> # wsvm(x, y, weight = c(rep(0,25),rep(-1,25),rep(1,100))) # drop subjects with zero or nagetive weights
> 
> print(model1)

Call:
wsvm(formula = Species ~ ., weight = rep(1, 150), data = iris)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 

Number of Support Vectors:  51

> summary(model1)

Call:
wsvm(formula = Species ~ ., weight = rep(1, 150), data = iris)


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  1 

Number of Support Vectors:  51

 ( 8 22 21 )


Number of Classes:  3 

Levels: 
 setosa versicolor virginica



> 
> # test with train data
> pred <- predict(model1, iris[,1:4])
> # (same as:)
> pred <- fitted(model1)
> 
> # Check accuracy:
> table(pred, y) # model 1, equal weights
            y
pred         setosa versicolor virginica
  setosa         50          0         0
  versicolor      0         48         2
  virginica       0          2        48
> 
> # compute decision values and probabilities:
> pred <- predict(model1, x, decision.values = TRUE)
> attr(pred, "decision.values")[1:4,]
  setosa/versicolor setosa/virginica versicolor/virginica
1          1.196152         1.091757            0.6708810
2          1.064621         1.056185            0.8483518
3          1.180842         1.074542            0.6439798
4          1.110699         1.053012            0.6782041
> 
> # visualize (classes by color, SV by crosses):
> plot(cmdscale(dist(iris[,-5])),
+      col = as.integer(iris[,5]),
+      pch = c("o","+")[1:150 %in% model1$index + 1]) # model 1
> plot(cmdscale(dist(iris[,-5])),
+      col = as.integer(iris[,5]),
+      pch = c("o","+")[1:150 %in% model2$index + 1]) # In model 2, less support vectors are based on setosa
> 
> 
> ## try regression mode on two dimensions
> # create data
> x <- seq(0.1, 5, by = 0.05)
> y <- log(x) + rnorm(x, sd = 0.2)
> 
> # estimate model and predict input values
> model1 <- wsvm(x, y, weight = rep(1,99))
> model2 <- wsvm(x, y, weight = seq(99,1,length.out = 99)) # decreasing weights
> 
> # visualize
> plot(x, y)
> points(x, log(x), col = 2)
> points(x, fitted(model1), col = 4)
> points(x, fitted(model2), col = 3) # better fit for the first few points
> 
> ## density-estimation
> # create 2-dim. normal with rho=0:
> X <- data.frame(a = rnorm(1000), b = rnorm(1000))
> attach(X)
> 
> # formula interface:
> model <- wsvm(~ a + b, gamma = 0.1, weight = c(seq(5000,1,length.out = 500),1:500))
> 
> # test:
> newdata <- data.frame(a = c(0, 4), b = c(0, 4))
> 
> # visualize:
> plot(X, col = 1:1000 %in% model$index + 1, xlim = c(-5,5), ylim=c(-5,5))
> points(newdata, pch = "+", col = 2, cex = 5)
> 
> ## class weights:
> i2 <- iris
> levels(i2$Species)[3] <- "versicolor"
> summary(i2$Species)
    setosa versicolor 
        50        100 
> wts <- 100 / table(i2$Species)
> wts

    setosa versicolor 
         2          1 
> m <- wsvm(Species ~ ., data = i2, class.weights = wts, weight=rep(1,150))
> 
> ## extract coefficients for linear kernel
> 
> # a. regression
> x <- 1:100
> y <- x + rnorm(100)
> m <- wsvm(y ~ x, scale = FALSE, kernel = "linear", weight = rep(1,100))
> coef(m)
(Intercept)           x 
  0.2647816   0.9980938 
> plot(y ~ x)
> abline(m, col = "red")
> 
> # b. classification
> # transform iris data to binary problem, and scale data
> setosa <- as.factor(iris$Species == "setosa")
> iris2 = scale(iris[,-5])
> 
> # fit binary C-classification model
> model1 <- wsvm(setosa ~ Petal.Width + Petal.Length,
+           data = iris2, kernel = "linear", weight = rep(1,150))
> model2 <- wsvm(setosa ~ Petal.Width + Petal.Length,
+                data = iris2, kernel = "linear", weight = c(rep(0.08, 50),rep(1,100))) # less weights to setosa
> 
> # plot data and separating hyperplane
> plot(Petal.Length ~ Petal.Width, data = iris2, col = setosa)
> (cf <- coef(model1))
 (Intercept)  Petal.Width Petal.Length 
   -1.658953    -1.172523    -1.358030 
> abline(-cf[1]/cf[3], -cf[2]/cf[3], col = "red")
> (cf2 <- coef(model2))
 (Intercept)  Petal.Width Petal.Length 
  -1.5255176   -1.0815128   -0.9354167 
> abline(-cf2[1]/cf2[3], -cf2[2]/cf2[3], col = "red", lty = 2)
> 
> # plot margin and mark support vectors
> abline(-(cf[1] + 1)/cf[3], -cf[2]/cf[3], col = "blue")
> abline(-(cf[1] - 1)/cf[3], -cf[2]/cf[3], col = "blue")
> points(model1$SV, pch = 5, cex = 2)
> abline(-(cf2[1] + 1)/cf2[3], -cf2[2]/cf2[3], col = "blue", lty = 2)
> abline(-(cf2[1] - 1)/cf2[3], -cf2[2]/cf2[3], col = "blue", lty = 2)
> points(model2$SV, pch = 6, cex = 2)
> 
> 
> # Tuning ------------------------------------------------------------------
> data(iris)
> 
> obj <- tune_wsvm(Species~., weight = c(rep(0.8, 50),rep(1,100)), data = iris, ranges = list(gamma = 2^(-1:10), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "fix"))
> 
> # set.seed(11)
> # obj <- tune_wsvm(Species~., weight = c(rep(1, 52),rep(0,98)), data = iris, use_zero_weight = TRUE, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "bootstrap"))
> 
> summary(obj)

Parameter tuning of 'wsvm':

- sampling method: fixed training/validation set 

- best parameters:
 gamma cost
   0.5    8

- best performance: 0.05932203 

- Detailed performance results:
    gamma cost      error dispersion
1     0.5    4 0.08050847         NA
2     1.0    4 0.08050847         NA
3     2.0    4 0.07627119         NA
4     4.0    4 0.11440678         NA
5     8.0    4 0.11016949         NA
6    16.0    4 0.27966102         NA
7    32.0    4 0.44491525         NA
8    64.0    4 0.52542373         NA
9   128.0    4 0.72033898         NA
10  256.0    4 0.76271186         NA
11  512.0    4 0.76271186         NA
12 1024.0    4 0.76271186         NA
13    0.5    8 0.05932203         NA
14    1.0    8 0.08050847         NA
15    2.0    8 0.07627119         NA
16    4.0    8 0.11440678         NA
17    8.0    8 0.11016949         NA
18   16.0    8 0.27966102         NA
19   32.0    8 0.44491525         NA
20   64.0    8 0.52542373         NA
21  128.0    8 0.72033898         NA
22  256.0    8 0.76271186         NA
23  512.0    8 0.76271186         NA
24 1024.0    8 0.76271186         NA
25    0.5   16 0.05932203         NA
26    1.0   16 0.08050847         NA
27    2.0   16 0.07627119         NA
28    4.0   16 0.11440678         NA
29    8.0   16 0.11016949         NA
30   16.0   16 0.27966102         NA
31   32.0   16 0.44491525         NA
32   64.0   16 0.52542373         NA
33  128.0   16 0.72033898         NA
34  256.0   16 0.76271186         NA
35  512.0   16 0.76271186         NA
36 1024.0   16 0.76271186         NA

> plot(obj, transform.x = log2, transform.y = log2)
> plot(obj, type = "perspective", theta = 120, phi = 45)
> 
> 
> best.tune_wsvm(Species~.,weight = c(rep(0.08, 50),rep(1,100)), data = iris, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), tunecontrol = tune.control(sampling = "fix"))

Call:
best.tune_wsvm(Species ~ ., weight = c(rep(0.08, 50), rep(1, 100)), 
    data = iris, ranges = list(gamma = 2^(-1:1), cost = 2^(2:4)), 
    tunecontrol = tune.control(sampling = "fix"))


Parameters:
   SVM-Type:  C-classification 
 SVM-Kernel:  radial 
       cost:  4 

Number of Support Vectors:  54

> 
> 
> proc.time()
   user  system elapsed 
   0.67    0.10    0.76