\donttest{ if (!requireNamespace("ggplot2", quietly = TRUE)) utils::install.packages( "ggplot2", repos = "https://cloud.r-project.org", quiet = TRUE ) ### linear regression with one-dimensional covariate library(fastcpd) set.seed(1) p <- 1 x <- mvtnorm::rmvnorm(300, rep(0, p), diag(p)) theta_0 <- matrix(c(1, -1, 0.5)) y <- c( x[1:100, ] * theta_0[1, ] + rnorm(100, 0, 1), x[101:200, ] * theta_0[2, ] + rnorm(100, 0, 1), x[201:300, ] * theta_0[3, ] + rnorm(100, 0, 1) ) result <- fastcpd( formula = y ~ . - 1, data = data.frame(y = y, x = x), family = "lm" ) plot(result) summary(result) ### custom logistic regression library(fastcpd) set.seed(1) p <- 5 x <- matrix(rnorm(375 * p, 0, 1), ncol = p) theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1)) y <- c( rbinom(200, 1, 1 / (1 + exp(-x[1:200, ] %*% theta[1, ]))), rbinom(175, 1, 1 / (1 + exp(-x[201:375, ] %*% theta[2, ]))) ) data <- data.frame(y = y, x = x) result_builtin <- suppressWarnings(fastcpd( formula = y ~ . - 1, data = data, family = "binomial" )) logistic_loss <- function(data, theta) { x <- data[, -1] y <- data[, 1] u <- x %*% theta nll <- -y * u + log(1 + exp(u)) nll[u > 10] <- -y[u > 10] * u[u > 10] + u[u > 10] sum(nll) } logistic_loss_gradient <- function(data, theta) { x <- data[nrow(data), -1] y <- data[nrow(data), 1] c(-(y - 1 / (1 + exp(-x %*% theta)))) * x } logistic_loss_hessian <- function(data, theta) { x <- data[nrow(data), -1] prob <- 1 / (1 + exp(-x %*% theta)) (x %o% x) * c((1 - prob) * prob) } result_custom <- fastcpd( formula = y ~ . - 1, data = data, epsilon = 1e-5, cost = logistic_loss, cost_gradient = logistic_loss_gradient, cost_hessian = logistic_loss_hessian ) cat( "Change points detected by built-in logistic regression model: ", result_builtin@cp_set, "\n", "Change points detected by custom logistic regression model: ", result_custom@cp_set, "\n", sep = "" ) result_custom_two_epochs <- fastcpd( formula = y ~ . - 1, data = data, k = function(x) 1, epsilon = 1e-5, cost = logistic_loss, cost_gradient = logistic_loss_gradient, cost_hessian = logistic_loss_hessian ) summary(result_custom_two_epochs) ### custom cost function huber regression library(fastcpd) set.seed(1) n <- 400 + 300 + 500 p <- 5 x <- mvtnorm::rmvnorm(n, mean = rep(0, p), sigma = diag(p)) theta <- rbind( mvtnorm::rmvnorm(1, mean = rep(0, p - 3), sigma = diag(p - 3)), mvtnorm::rmvnorm(1, mean = rep(5, p - 3), sigma = diag(p - 3)), mvtnorm::rmvnorm(1, mean = rep(9, p - 3), sigma = diag(p - 3)) ) theta <- cbind(theta, matrix(0, 3, 3)) theta <- theta[rep(seq_len(3), c(400, 300, 500)), ] y_true <- rowSums(x * theta) factor <- c( 2 * stats::rbinom(400, size = 1, prob = 0.95) - 1, 2 * stats::rbinom(300, size = 1, prob = 0.95) - 1, 2 * stats::rbinom(500, size = 1, prob = 0.95) - 1 ) y <- factor * y_true + stats::rnorm(n) data <- cbind.data.frame(y, x) huber_threshold <- 1 huber_loss <- function(data, theta) { residual <- data[, 1] - data[, -1, drop = FALSE] %*% theta indicator <- abs(residual) <= huber_threshold sum( residual^2 / 2 * indicator + huber_threshold * ( abs(residual) - huber_threshold / 2 ) * (1 - indicator) ) } huber_loss_gradient <- function(data, theta) { residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta) if (abs(residual) <= huber_threshold) { -residual * data[nrow(data), -1] } else { -huber_threshold * sign(residual) * data[nrow(data), -1] } } huber_loss_hessian <- function(data, theta) { residual <- c(data[nrow(data), 1] - data[nrow(data), -1] %*% theta) if (abs(residual) <= huber_threshold) { outer(data[nrow(data), -1], data[nrow(data), -1]) } else { 0.01 * diag(length(theta)) } } huber_regression_result <- fastcpd( formula = y ~ . - 1, data = data, beta = (p + 1) * log(n) / 2, cost = huber_loss, cost_gradient = huber_loss_gradient, cost_hessian = huber_loss_hessian ) summary(huber_regression_result) }