Generate ordinal categorical data — genOrdCat • simstudy

Ordinal categorical data is added to an existing data set. Correlations can be added via correlation matrix or rho and corstr.

genOrdCat(
  dtName,
  adjVar = NULL,
  baseprobs,
  catVar = "cat",
  asFactor = TRUE,
  idname = "id",
  prefix = "grp",
  rho = 0,
  corstr = "ind",
  corMatrix = NULL,
  npVar = NULL,
  npAdj = NULL
)

Arguments

dtName: Name of complete data set
adjVar: Adjustment variable name in dtName - determines logistic shift. This is specified assuming a cumulative logit link.
baseprobs: Baseline probability expressed as a vector or matrix of probabilities. The values (per row) must sum to <= 1. If rowSums(baseprobs) < 1, an additional category is added with probability 1 - rowSums(baseprobs). The number of rows represents the number of new categorical variables. The number of columns represents the number of possible responses - if an particular category has fewer possible responses, assign zero probability to non-relevant columns.
catVar: Name of the new categorical field. Defaults to "cat". Can be a character vector with a name for each new variable defined via baseprobs. Will be overridden by prefix if more than one variable is defined and length(catVar) == 1.
asFactor: If asFactor == TRUE (default), new field is returned as a factor. If asFactor == FALSE, new field is returned as an integer.
idname: Name of the id column in dtName.
prefix: A string. The names of the new variables will be a concatenation of the prefix and a sequence of integers indicating the variable number.
rho: Correlation coefficient, -1 < rho < 1. Use if corMatrix is not provided.
corstr: Correlation structure of the variance-covariance matrix defined by sigma and rho. Options include "ind" for an independence structure, "cs" for a compound symmetry structure, and "ar1" for an autoregressive structure.
corMatrix: Correlation matrix can be entered directly. It must be symmetrical and positive definite. It is not a required field; if a matrix is not provided, then a structure and correlation coefficient rho must be specified. (The matrix created via rho and corstr must also be positive definite.)
npVar: Vector of variable names that indicate which variables are to violate the proportionality assumption.
npAdj: Matrix with a row for each npVar and a column for each category. Each value represents the deviation from the proportional odds assumption on the logistic scale.

Value

Original data.table with added categorical field.

Examples

# Ordinal Categorical Data ----

def1 <- defData(
  varname = "male",
  formula = 0.45, dist = "binary", id = "idG"
)
def1 <- defData(def1,
  varname = "z",
  formula = "1.2*male", dist = "nonrandom"
)
def1
#>    varname  formula variance      dist     link
#>     <char>   <char>    <num>    <char>   <char>
#> 1:    male     0.45        0    binary identity
#> 2:       z 1.2*male        0 nonrandom identity

## Generate data

set.seed(20)

dx <- genData(1000, def1)

probs <- c(0.40, 0.25, 0.15)

dx <- genOrdCat(dx,
  adjVar = "z", idname = "idG", baseprobs = probs,
  catVar = "grp"
)
#> Warning: Probabilities do not sum to 1. Adding category to all rows!
dx
#> Key: <idG>
#>         idG  male     z    grp
#>       <int> <int> <num> <fctr>
#>    1:     1     1   1.2      2
#>    2:     2     1   1.2      3
#>    3:     3     0   0.0      4
#>    4:     4     0   0.0      1
#>    5:     5     1   1.2      4
#>   ---                         
#>  996:   996     1   1.2      2
#>  997:   997     0   0.0      1
#>  998:   998     0   0.0      1
#>  999:   999     0   0.0      2
#> 1000:  1000     0   0.0      1

# Correlated Ordinal Categorical Data ----

baseprobs <- matrix(c(
  0.2, 0.1, 0.1, 0.6,
  0.7, 0.2, 0.1, 0,
  0.5, 0.2, 0.3, 0,
  0.4, 0.2, 0.4, 0,
  0.6, 0.2, 0.2, 0
),
nrow = 5, byrow = TRUE
)

set.seed(333)
dT <- genData(1000)

dX <- genOrdCat(dT,
  adjVar = NULL, baseprobs = baseprobs,
  prefix = "q", rho = .125, corstr = "cs", asFactor = FALSE
)
dX
#> Key: <id>
#>          id    q1    q2    q3    q4    q5
#>       <int> <int> <int> <int> <int> <int>
#>    1:     1     4     3     1     3     3
#>    2:     2     4     1     3     3     1
#>    3:     3     4     1     3     3     1
#>    4:     4     3     1     3     1     1
#>    5:     5     4     1     1     3     1
#>   ---                                    
#>  996:   996     4     1     2     2     1
#>  997:   997     2     1     3     2     1
#>  998:   998     4     3     1     3     1
#>  999:   999     2     2     3     3     1
#> 1000:  1000     4     2     2     3     1

dM <- data.table::melt(dX, id.vars = "id")
dProp <- dM[, prop.table(table(value)), by = variable]
dProp[, response := c(1:4, 1:3, 1:3, 1:3, 1:3)]
#>     variable      V1 response
#>       <fctr> <table>    <int>
#>  1:       q1   0.192        1
#>  2:       q1   0.096        2
#>  3:       q1   0.109        3
#>  4:       q1   0.603        4
#>  5:       q2   0.672        1
#>  6:       q2   0.209        2
#>  7:       q2   0.119        3
#>  8:       q3   0.492        1
#>  9:       q3   0.190        2
#> 10:       q3   0.318        3
#> 11:       q4   0.379        1
#> 12:       q4   0.222        2
#> 13:       q4   0.399        3
#> 14:       q5   0.592        1
#> 15:       q5   0.203        2
#> 16:       q5   0.205        3

data.table::dcast(dProp, variable ~ response,
  value.var = "V1", fill = 0
)
#> Key: <variable>
#>    variable       1       2       3       4
#>      <fctr> <table> <table> <table> <table>
#> 1:       q1   0.192   0.096   0.109   0.603
#> 2:       q2   0.672   0.209   0.119   0.000
#> 3:       q3   0.492   0.190   0.318   0.000
#> 4:       q4   0.379   0.222   0.399   0.000
#> 5:       q5   0.592   0.203   0.205   0.000

# proportional odds assumption violated

d1 <- defData(varname = "rx", formula = "1;1", dist = "trtAssign")
d1 <- defData(d1, varname = "z", formula = "0 - 1.2*rx", dist = "nonrandom")

dd <- genData(1000, d1)

baseprobs <- c(.4, .3, .2, .1)
npAdj <- c(0, 1, 0, 0)

dn <- genOrdCat(
  dtName = dd, adjVar = "z",
  baseprobs = baseprobs,
  npVar = "rx", npAdj = npAdj
)