Title: | Best Split Selection Modeling for Low-Dimensional Data |
---|---|
Description: | Functions to generate or sample from all possible splits of features or variables into a number of specified groups. Also computes the best split selection estimator (for low-dimensional data) as defined in Christidis, Van Aelst and Zamar (2019) <arXiv:1812.05678>. |
Authors: | Anthony Christidis <[email protected]>, Stefan Van Aelst <[email protected]>, Ruben Zamar <[email protected]> |
Maintainer: | Anthony Christidis <[email protected]> |
License: | GPL (>= 2) |
Version: | 1.0.3 |
Built: | 2025-01-04 05:42:01 UTC |
Source: | https://github.com/anthonychristidis/splitselect |
coef.cv.splitSelect
returns the coefficients for a cv.splitSelect for new data.
## S3 method for class 'cv.splitSelect' coef(object, optimal.only = TRUE, ...)
## S3 method for class 'cv.splitSelect' coef(object, optimal.only = TRUE, ...)
object |
An object of class cv.splitSelect. |
optimal.only |
A boolean variable (TRUE default) to indicate if only the coefficient of the optimal split are returned. |
... |
Additional arguments for compatibility. |
A matrix with the coefficients of the cv.splitSelect
object.
Anthony-Alexander Christidis, [email protected]
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0, nfolds=10) coef(split.out)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0, nfolds=10) coef(split.out)
coef.splitSelect
returns the coefficients for a splitSelect object.
## S3 method for class 'splitSelect' coef(object, ...)
## S3 method for class 'splitSelect' coef(object, ...)
object |
An object of class splitSelect. |
... |
Additional arguments for compatibility. |
A matrix with the coefficients of the splitSelect
object.
Anthony-Alexander Christidis, [email protected]
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) coef(split.out)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) coef(split.out)
cv.splitSelect
performs the best split selection algorithm with cross-validation
cv.splitSelect( x, y, intercept = TRUE, G, use.all = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], alphas = 0, nsample = NULL, fix.partition = NULL, fix.split = NULL, nfolds = 10, parallel = FALSE, cores = getOption("mc.cores", 2L) )
cv.splitSelect( x, y, intercept = TRUE, G, use.all = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], alphas = 0, nsample = NULL, fix.partition = NULL, fix.split = NULL, nfolds = 10, parallel = FALSE, cores = getOption("mc.cores", 2L) )
x |
Design matrix. |
y |
Response vector. |
intercept |
Boolean variable to determine if there is intercept (default is TRUE) or not. |
G |
Number of groups into which the variables are split. Can have more than one value. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
family |
Description of the error distribution and link function to be used for the model. Must be one of "gaussian" or "binomial". |
group.model |
Model used for the groups. Must be one of "glmnet" or "LS". |
alphas |
Elastic net mixing parameter. Should be between 0 (default) and 1. |
nsample |
Number of sample splits for each value of G. If NULL, then all splits will be considered (unless there is overflow). |
fix.partition |
Optional list with G elements indicating the partitions (in each row) to be considered for the splits. |
fix.split |
Optional matrix with p columns indicating the groups (in each row) to be considered for the splits. |
nfolds |
Number of folds for the cross-validation procedure. |
parallel |
Boolean variable to determine if parallelization of the function. Default is FALSE. |
cores |
Number of cores for the parallelization for the function. |
An object of class cv.splitSelect.
Anthony-Alexander Christidis, [email protected]
coef.cv.splitSelect
, predict.cv.splitSelect
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0, nfolds=10)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0, nfolds=10)
generate_partitions
returns a matrix with the number of possible objects in each group using splits.
generate_partitions(p, G, use.all = TRUE)
generate_partitions(p, G, use.all = TRUE)
p |
Number of variables or objects to split. |
G |
Number of groups into which the variables are split. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
A matrix or list with the number of possible objects in each group using splits.
Anthony-Alexander Christidis, [email protected]
# Generating the possible split partitions of 6 variables in 3 groups # Using all the variables split.3groups.all <- generate_partitions(6, 3) split.3groups.all # Without using all the variables split.3groups <- generate_partitions(6, 3, use.all=FALSE) split.3groups
# Generating the possible split partitions of 6 variables in 3 groups # Using all the variables split.3groups.all <- generate_partitions(6, 3) split.3groups.all # Without using all the variables split.3groups <- generate_partitions(6, 3, use.all=FALSE) split.3groups
generate_splits
returns a matrix with the different splits of the variables in reach row.
generate_splits(p, G, use.all = TRUE, fix.partition = NULL, verbose = TRUE)
generate_splits(p, G, use.all = TRUE, fix.partition = NULL, verbose = TRUE)
p |
Number of variables or objects to split. |
G |
Number of groups into which the variables are split. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
fix.partition |
Optional matrix with G columns (or list if more than one value of G) indicating the partitions (in each row) to be considered for the splits. |
verbose |
Boolean variable to determine if console output for cross-validation progress is printed (default is TRUE). |
A matrix with the different splits of the variables in the groups.
Anthony-Alexander Christidis, [email protected]
# Generating the possible splits of 6 variables in 3 groups # Using all the variables split.3groups.all <- generate_splits(6, 3) split.3groups.all # Without using all the variables split.3groups <- generate_splits(6, 3, use.all=FALSE) split.3groups
# Generating the possible splits of 6 variables in 3 groups # Using all the variables split.3groups.all <- generate_splits(6, 3) split.3groups.all # Without using all the variables split.3groups <- generate_splits(6, 3, use.all=FALSE) split.3groups
nsplits
returns the total number of possible splits of variables into groups.
nsplit(p, G, use.all = TRUE, fix.partition = NULL)
nsplit(p, G, use.all = TRUE, fix.partition = NULL)
p |
Number of variables or objects to split. |
G |
Number of groups into which the variables are split. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
fix.partition |
Optional matrix with G columns (or list if more than one value of G) indicating the partitions (in each row) to be considered for the splits. |
A numeric vector with the total number of possible splits.
Anthony-Alexander Christidis, [email protected]
# Compute the total number of possible splits of 6 variables into 3 groups # We use all the variables out.n.splits.all <- nsplit(p=6, G=3, use.all=TRUE) out.n.splits.all # We don't enforce using all the variables out.n.splits <- nsplit(p=6, G=3, use.all=FALSE) out.n.splits
# Compute the total number of possible splits of 6 variables into 3 groups # We use all the variables out.n.splits.all <- nsplit(p=6, G=3, use.all=TRUE) out.n.splits.all # We don't enforce using all the variables out.n.splits <- nsplit(p=6, G=3, use.all=FALSE) out.n.splits
predict.cv.splitSelect
returns the prediction for cv.splitSelect for new data.
## S3 method for class 'cv.splitSelect' predict(object, newx, optimal.only = TRUE, ...)
## S3 method for class 'cv.splitSelect' predict(object, newx, optimal.only = TRUE, ...)
object |
An object of class cv.splitSelect. |
newx |
A matrix with the new data. |
optimal.only |
A boolean variable (TRUE default) to indicate if only the predictions of the optimal split are returned. |
... |
Additional arguments for compatibility. |
A matrix with the predictions of the cv.splitSelect
object.
Anthony-Alexander Christidis, [email protected]
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) predict(split.out, newx=x.test)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- cv.splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) predict(split.out, newx=x.test)
predict.splitSelect
returns the prediction for splitSelect for new data.
## S3 method for class 'splitSelect' predict(object, newx, ...)
## S3 method for class 'splitSelect' predict(object, newx, ...)
object |
An object of class splitSelect. |
newx |
A matrix with the new data. |
... |
Additional arguments for compatibility. |
A matrix with the predictions of the splitSelect
object.
Anthony-Alexander Christidis, [email protected]
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) predict(split.out, newx=x.test)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0) predict(split.out, newx=x.test)
rsplit
returns a matrix with random splits of the variables in groups.
rsplit(n, p, G, use.all = TRUE, fix.partition = NULL, verbose = TRUE)
rsplit(n, p, G, use.all = TRUE, fix.partition = NULL, verbose = TRUE)
n |
Number of sample splits. |
p |
Number of variables or objects to split. |
G |
Number of groups into which the variables are split. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
fix.partition |
Optional matrix with G columns indicating the partitions (in each row) to be considered for the splits. |
verbose |
Boolean variable to determine if console output for cross-validation progress is printed (default is TRUE). |
A matrix or list with the number of possible objects in each group using splits.
Anthony-Alexander Christidis, [email protected]
# Generating sample splits of 6 variables in 3 groups # Using all the variables random.splits <- rsplit(100, 6, 3) # Using fixed partitions random.splits.fixed <- rsplit(100, 6, 3, fix.partition=matrix(c(2,2,2), nrow=1))
# Generating sample splits of 6 variables in 3 groups # Using all the variables random.splits <- rsplit(100, 6, 3) # Using fixed partitions random.splits.fixed <- rsplit(100, 6, 3, fix.partition=matrix(c(2,2,2), nrow=1))
splitSelect
performs the best split selection algorithm.
splitSelect( x, y, intercept = TRUE, G, use.all = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], lambdas = NULL, alphas = 0, nsample = NULL, fix.partition = NULL, fix.split = NULL, parallel = FALSE, cores = getOption("mc.cores", 2L), verbose = TRUE )
splitSelect( x, y, intercept = TRUE, G, use.all = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], lambdas = NULL, alphas = 0, nsample = NULL, fix.partition = NULL, fix.split = NULL, parallel = FALSE, cores = getOption("mc.cores", 2L), verbose = TRUE )
x |
Design matrix. |
y |
Response vector. |
intercept |
Boolean variable to determine if there is intercept (default is TRUE) or not. |
G |
Number of groups into which the variables are split. Can have more than one value. |
use.all |
Boolean variable to determine if all variables must be used (default is TRUE). |
family |
Description of the error distribution and link function to be used for the model. Must be one of "gaussian" or "binomial". |
group.model |
Model used for the groups. Must be one of "glmnet" or "LS". |
lambdas |
The shinkrage parameters for the "glmnet" regularization. If NULL (default), optimal values are chosen. |
alphas |
Elastic net mixing parameter. Should be between 0 (default) and 1. |
nsample |
Number of sample splits for each value of G. If NULL, then all splits will be considered (unless there is overflow). |
fix.partition |
Optional list with G elements indicating the partitions (in each row) to be considered for the splits. |
fix.split |
Optional matrix with p columns indicating the groups (in each row) to be considered for the splits. |
parallel |
Boolean variable to determine if parallelization of the function. Default is FALSE. |
cores |
Number of cores for the parallelization for the function. |
verbose |
Boolean variable to determine if console output for cross-validation progress is printed (default is TRUE). |
An object of class splitSelect.
Anthony-Alexander Christidis, [email protected]
coef.splitSelect
, predict.splitSelect
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0)
# Setting the parameters p <- 4 n <- 30 n.test <- 5000 beta <- rep(5,4) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) # Generating the coefficients for a fixed partition of the variables split.out <- splitSelect(x.train, y.train, G=2, use.all=TRUE, fix.partition=list(matrix(c(2,2), ncol=2, byrow=TRUE)), fix.split=NULL, intercept=TRUE, group.model="glmnet", alphas=0)
splitSelect_coef
generates the coefficients for a particular split of variables into groups.
splitSelect_coef( x, y, variables.split, intercept = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], lambdas = NULL, alphas = 0 )
splitSelect_coef( x, y, variables.split, intercept = TRUE, family = c("gaussian", "binomial")[1], group.model = c("glmnet", "LS", "Logistic")[1], lambdas = NULL, alphas = 0 )
x |
Design matrix. |
y |
Response vector. |
variables.split |
A vector with the split of the variables into groups as values. |
intercept |
Boolean variable to determine if there is intercept (default is TRUE) or not. |
family |
Description of the error distribution and link function to be used for the model. Must be one of "gaussian" or "binomial". |
group.model |
Model used for the groups. Must be one of "glmnet" or "LS". |
lambdas |
The shinkrage parameters for the "glmnet" regularization. If NULL (default), optimal values are chosen. |
alphas |
Elastic net mixing parameter. Should be between 0 (default) and 1. |
A vector with the regression coefficients for the split.
Anthony-Alexander Christidis, [email protected]
# Setting the parameters p <- 6 n <- 30 n.test <- 5000 group.beta <- -3 beta <- c(rep(1, 2), rep(group.beta, p-2)) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split splitSelect_coef(x.train, y.train, variables.split=matrix(c(1,2,1,2,1,2), nrow=1))
# Setting the parameters p <- 6 n <- 30 n.test <- 5000 group.beta <- -3 beta <- c(rep(1, 2), rep(group.beta, p-2)) rho <- 0.1 r <- 0.9 SNR <- 3 # Creating the target matrix with "kernel" set to rho target_cor <- function(r, p){ Gamma <- diag(p) for(i in 1:(p-1)){ for(j in (i+1):p){ Gamma[i,j] <- Gamma[j,i] <- r^(abs(i-j)) } } return(Gamma) } # AR Correlation Structure Sigma.r <- target_cor(r, p) Sigma.rho <- target_cor(rho, p) sigma.epsilon <- as.numeric(sqrt((t(beta) %*% Sigma.rho %*% beta)/SNR)) # Simulate some data x.train <- mvnfast::rmvn(30, mu=rep(0,p), sigma=Sigma.r) y.train <- 1 + x.train %*% beta + rnorm(n=n, mean=0, sd=sigma.epsilon) x.test <- mvnfast::rmvn(n.test, mu=rep(0,p), sigma=Sigma.rho) y.test <- 1 + x.test %*% beta + rnorm(n.test, sd=sigma.epsilon) # Generating the coefficients for a fixed split splitSelect_coef(x.train, y.train, variables.split=matrix(c(1,2,1,2,1,2), nrow=1))