Title: | Robust Multi-Model Subset Selection |
---|---|
Description: | Efficient algorithms for generating ensembles of robust, sparse and diverse models via robust multi-model subset selection (RMSS). The robust ensembles are generated by minimizing the sum of the least trimmed square loss of the models in the ensembles under constraints for the size of the models and the sharing of the predictors. Tuning parameters for the robustness, sparsity and diversity of the robust ensemble are selected by cross-validation. |
Authors: | Anthony Christidis [aut, cre], Gabriela Cohen-Freue [aut] |
Maintainer: | Anthony Christidis <[email protected]> |
License: | GPL (>= 2) |
Version: | 1.1.1 |
Built: | 2024-11-01 05:40:21 UTC |
Source: | https://github.com/anthonychristidis/rmss |
coef.cv.RMSS
returns the coefficients for a cv.RMSS object.
## S3 method for class 'cv.RMSS' coef( object, h_ind = NULL, t_ind = NULL, u_ind = NULL, individual_models = FALSE, group_index = NULL, ... )
## S3 method for class 'cv.RMSS' coef( object, h_ind = NULL, t_ind = NULL, u_ind = NULL, individual_models = FALSE, group_index = NULL, ... )
object |
An object of class cv.RMSS. |
h_ind |
Index for robustness parameter. |
t_ind |
Index for sparsity parameter. |
u_ind |
Index for diversity parameter. |
individual_models |
Argument to determine whether the coefficients of each model are returned. Default is FALSE. |
group_index |
Groups included in the ensemble. Default setting includes all the groups. |
... |
Additional arguments for compatibility. |
The coefficients for the cv.RMSS object.
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
coef.RMSS
returns the coefficients for a RMSS object.
## S3 method for class 'RMSS' coef( object, h_ind, t_ind, u_ind, individual_models = FALSE, group_index = NULL, ... )
## S3 method for class 'RMSS' coef( object, h_ind, t_ind, u_ind, individual_models = FALSE, group_index = NULL, ... )
object |
An object of class RMSS. |
h_ind |
Index for robustness parameter. |
t_ind |
Index for sparsity parameter. |
u_ind |
Index for diversity parameter. |
individual_models |
Argument to determine whether the coefficients of each model are returned. Default is FALSE. |
group_index |
Groups included in the ensemble. Default setting includes all the groups. |
... |
Additional arguments for compatibility. |
The coefficients for the RMSS object.
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
cv.RMSS
performs the cross-validation procedure for robust multi-model subset selection.
cv.RMSS( x, y, n_models, h_grid, t_grid, u_grid, initial_estimator = c("robStepSplitReg", "srlars")[1], tolerance = 0.1, max_iter = 1000, neighborhood_search = FALSE, neighborhood_search_tolerance = 0.1, cv_criterion = c("tau", "trimmed")[1], n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1 )
cv.RMSS( x, y, n_models, h_grid, t_grid, u_grid, initial_estimator = c("robStepSplitReg", "srlars")[1], tolerance = 0.1, max_iter = 1000, neighborhood_search = FALSE, neighborhood_search_tolerance = 0.1, cv_criterion = c("tau", "trimmed")[1], n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1 )
x |
Design matrix. |
y |
Response vector. |
n_models |
Number of models into which the variables are split. |
h_grid |
Grid for robustness parameter. |
t_grid |
Grid for sparsity parameter. |
u_grid |
Grid for diversity parameter. |
initial_estimator |
Method used for initial estimator. Must be one of "robStepSplitReg" (default) or "srlars". |
tolerance |
Tolerance level for convergence of PSBGD algorithm. |
max_iter |
Maximum number of iterations in PSBGD algorithm. |
neighborhood_search |
Neighborhood search to improve solution. Default is FALSE. |
neighborhood_search_tolerance |
Tolerance parameter for neighborhood search. Default is 1e-1. |
cv_criterion |
Criterion to use for cross-validation procedure. Must be one of "tau" (default) or "trimmed". |
n_folds |
Number of folds for cross-validation procedure. Default is 5. |
alpha |
Proportion of trimmed samples for cross-validation procedure. Default is 1/4. |
gamma |
Weight parameter for ensemble MSPE (gamma) and average MSPE of individual models (1 - gamma). Default is 1. |
n_threads |
Number of threads used by OpenMP for multithreading over the folds. Default is 1. |
An object of class cv.RMSS
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
predict.cv.RMSS
returns the predictions for a cv.RMSS object.
## S3 method for class 'cv.RMSS' predict( object, newx, h_ind = NULL, t_ind = NULL, u_ind = NULL, group_index = NULL, dynamic = FALSE, ... )
## S3 method for class 'cv.RMSS' predict( object, newx, h_ind = NULL, t_ind = NULL, u_ind = NULL, group_index = NULL, dynamic = FALSE, ... )
object |
An object of class cv.RMSS. |
newx |
New data for predictions. |
h_ind |
Index for robustness parameter. |
t_ind |
Index for sparsity parameter. |
u_ind |
Index for diversity parameter. |
group_index |
Groups included in the ensemble. Default setting includes all the groups. |
dynamic |
Argument to determine whether dynamic predictions are used based on deviating cells. Default is FALSE. |
... |
Additional arguments for compatibility. |
The predictions for the cv.RMSS object.
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # CV RMSS rmss_fit <- cv.RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1, n_folds = 5, alpha = 1/4, gamma = 1, n_threads = 1) rmss_coefs <- coef(rmss_fit, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = rmss_fit$h_opt, t_ind = rmss_fit$t_opt, u_ind = rmss_fit$u_opt, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
predict.RMSS
returns the predictions for a RMSS object.
## S3 method for class 'RMSS' predict( object, newx, h_ind, t_ind, u_ind, group_index = NULL, dynamic = FALSE, ... )
## S3 method for class 'RMSS' predict( object, newx, h_ind, t_ind, u_ind, group_index = NULL, dynamic = FALSE, ... )
object |
An object of class RMSS. |
newx |
New data for predictions. |
h_ind |
Index for robustness parameter. |
t_ind |
Index for sparsity parameter. |
u_ind |
Index for diversity parameter. |
group_index |
Groups included in the ensemble. Default setting includes all the groups. |
dynamic |
Argument to determine whether dynamic predictions are used based on deviating cells. Default is FALSE. |
... |
Additional arguments for compatibility. |
The predictions for the RMSS object.
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
RMSS
performs robust multi-model subset selection.
RMSS( x, y, n_models, h_grid, t_grid, u_grid, initial_estimator = c("robStepSplitReg", "srlars")[1], tolerance = 0.1, max_iter = 1000, neighborhood_search = FALSE, neighborhood_search_tolerance = 0.1 )
RMSS( x, y, n_models, h_grid, t_grid, u_grid, initial_estimator = c("robStepSplitReg", "srlars")[1], tolerance = 0.1, max_iter = 1000, neighborhood_search = FALSE, neighborhood_search_tolerance = 0.1 )
x |
Design matrix. |
y |
Response vector. |
n_models |
Number of models into which the variables are split. |
h_grid |
Grid for robustness parameter. |
t_grid |
Grid for sparsity parameter. |
u_grid |
Grid for diversity parameter. |
initial_estimator |
Method used for initial estimator. Must be one of "robStepSplitReg" (default) or "srlars". |
tolerance |
Tolerance level for convergence of PSBGD algorithm. |
max_iter |
Maximum number of iterations in PSBGD algorithm. |
neighborhood_search |
Neighborhood search to improve solution. Default is FALSE. |
neighborhood_search_tolerance |
Tolerance parameter for neighborhood search. Default is 1e-1. |
An object of class RMSS
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), initial_estimator = "robStepSplitReg", tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2
trimmed_samples
returns the coefficients for a RMSS or cv.RMSS object.
trimmed_samples( object, h_ind = NULL, t_ind = NULL, u_ind = NULL, group_index = NULL, ... )
trimmed_samples( object, h_ind = NULL, t_ind = NULL, u_ind = NULL, group_index = NULL, ... )
object |
An object of class RMSS |
h_ind |
Index for robustness parameter. |
t_ind |
Index for sparsity parameter. |
u_ind |
Index for diversity parameter. |
group_index |
Groups included in the ensemble. Default setting includes all the groups. |
... |
Additional arguments for compatibility. |
The trimmed samples for the RMSS or cv.RMSS object.
Anthony-Alexander Christidis, [email protected]
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2 trimmed_id <- trimmed_samples(rmss_fit, h_ind = 1, t_ind = 1, u_ind = 1)
# Simulation parameters n <- 50 p <- 100 rho <- 0.8 rho.inactive <- 0.2 group.size <- 5 p.active <- 15 snr <- 2 contamination.prop <- 0.3 # Setting the seed set.seed(0) # Block Correlation sigma.mat <- matrix(0, p, p) sigma.mat[1:p.active, 1:p.active] <- rho.inactive for(group in 0:(p.active/group.size - 1)) sigma.mat[(group*group.size+1):(group*group.size+group.size), (group*group.size+1):(group*group.size+group.size)] <- rho diag(sigma.mat) <- 1 # Simulation of beta vector true.beta <- c(runif(p.active, 0, 5)*(-1)^rbinom(p.active, 1, 0.7), rep(0, p - p.active)) # Setting the SD of the variance sigma <- as.numeric(sqrt(t(true.beta) %*% sigma.mat %*% true.beta)/sqrt(snr)) # Simulation of test data m <- 2e3 x_test <- mvnfast::rmvn(m, mu = rep(0, p), sigma = sigma.mat) y_test <- x_test %*% true.beta + rnorm(m, 0, sigma) # Simulation of uncontaminated data x <- mvnfast::rmvn(n, mu = rep(0, p), sigma = sigma.mat) y <- x %*% true.beta + rnorm(n, 0, sigma) # Contamination of data contamination_indices <- 1:floor(n*contamination.prop) k_lev <- 2 k_slo <- 100 x_train <- x y_train <- y beta_cont <- true.beta beta_cont[true.beta!=0] <- beta_cont[true.beta!=0]*(1 + k_slo) beta_cont[true.beta==0] <- k_slo*max(abs(true.beta)) for(cont_id in contamination_indices){ a <- runif(p, min = -1, max = 1) a <- a - as.numeric((1/p)*t(a) %*% rep(1, p)) x_train[cont_id,] <- mvnfast::rmvn(1, rep(0, p), 0.1^2*diag(p)) + k_lev * a / as.numeric(sqrt(t(a) %*% solve(sigma.mat) %*% a)) y_train[cont_id] <- t(x_train[cont_id,]) %*% beta_cont } # RMSS rmss_fit <- RMSS(x = x_train, y = y_train, n_models = 3, h_grid = c(35), t_grid = c(6, 8, 10), u_grid = c(1:3), tolerance = 1e-1, max_iter = 1e3, neighborhood_search = FALSE, neighborhood_search_tolerance = 1e-1) rmss_coefs <- coef(rmss_fit, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models) sens_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/p.active spec_rmss <- sum(which((rmss_coefs[-1]!=0)) <= p.active)/sum(rmss_coefs[-1]!=0) rmss_preds <- predict(rmss_fit, newx = x_test, h_ind = 1, t_ind = 2, u_ind = 1, group_index = 1:rmss_fit$n_models, dynamic = FALSE) rmss_mspe <- mean((y_test - rmss_preds)^2)/sigma^2 trimmed_id <- trimmed_samples(rmss_fit, h_ind = 1, t_ind = 1, u_ind = 1)