Code
#install.packages("remotes")
#remotes::install_github("DevPsyLab/petersenlab")#install.packages("remotes")
#remotes::install_github("DevPsyLab/petersenlab")data(Oxboys, package = "nlme")
Oxboys_addNA <- data.frame(Oxboys)
Oxboys_addNA <- Oxboys_addNA %>%
rename(id = Subject)
Oxboys_addNA$id <- as.integer(Oxboys_addNA$id)
Oxboys_addNA$Occasion <- as.integer(Oxboys_addNA$Occasion)
set.seed(52242)
Oxboys_addNA[sample(1:nrow(Oxboys_addNA), 25), "height"] <- NA
dataToImpute <- Oxboys_addNAhttps://stefvanbuuren.name/fimd/sec-nonignorable.html (archived at https://perma.cc/N7WW-HDZF)
https://cran.r-project.org/web/packages/missingHE/vignettes/Fitting_MNAR_models_in_missingHE.html (archived at https://perma.cc/9X25-5D8G)
R packages:
jomopanAmeliaMplusR packages:
mice
?mice::mice.impute.pmm) can be useful to obtain bounded imputations for non-normally distributed variablesdescribe(dataToImpute)md.pattern(dataToImpute, rotate.names = TRUE) id age Occasion height
209 1 1 1 1 0
25 1 1 1 0 1
0 0 0 25 25
varsToImpute <- c("height")
Y <- varsToImputenumImputations <- 5 # generally use 100 imputations; this example uses 5 for speednumCores <- parallelly::availableCores() - 1https://simongrund1.github.io/posts/multiple-imputation-for-three-level-and-cross-classified-data/ (archived at https://perma.cc/N4PP-A3V6)
mice
https://stefvanbuuren.name/fimd/sec-multioutcome.html#methods (archived at https://perma.cc/8CDA-TS3K)
?mice::mice.impute.2l.norm
?mice::mice.impute.2l.pan
?mice::mice.impute.2l.lmer
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?miceadds::mice.impute.2l.continuous
?micemd::mice.impute.2l.2stage.norm
?micemd::mice.impute.2l.2stage.pmm
?micemd::mice.impute.2l.glm.norm
?micemd::mice.impute.2l.jomohttps://stefvanbuuren.name/fimd/sec-catoutcome.html#methods-1 (archived at https://perma.cc/5QHF-YRP6)
?mice::mice.impute.2l.bin
?miceadds::mice.impute.2l.binary
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?micemd::mice.impute.2l.2stage.bin
?micemd::mice.impute.2l.glm.binhttps://stefvanbuuren.name/fimd/sec-multioutcome.html#methods (archived at https://perma.cc/8CDA-TS3K)
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?micemd::mice.impute.2l.2stage.pmmhttps://stefvanbuuren.name/fimd/sec-catoutcome.html#methods-1 (archived at https://perma.cc/5QHF-YRP6)
?micemd::mice.impute.2l.2stage.pois
?micemd::mice.impute.2l.glm.pois
?countimp::mice.impute.2l.poisson
?countimp::mice.impute.2l.nb2
?countimp::mice.impute.2l.zihnbmeth <- make.method(dataToImpute)
meth[1:length(meth)] <- ""
meth[Y] <- "2l.pmm" # specify the imputation method here; this can differ by outcome variableA predictor matrix is a matrix of values, where:
The values are:
0
-2
1
2
3
4
pred <- make.predictorMatrix(dataToImpute)
pred[1:nrow(pred), 1:ncol(pred)] <- 0
pred[Y, "id"] <- (-2) # cluster variable
pred[Y, "Occasion"] <- 1 # fixed effect predictor
pred[Y, "age"] <- 2 # random effect predictor
pred[Y, Y] <- 1 # fixed effect predictor
diag(pred) <- 0
pred id age height Occasion
id 0 0 0 0
age 0 0 0 0
height -2 2 0 1
Occasion 0 0 0 0
mi_mice <- mice(
as.data.frame(dataToImpute),
method = meth,
predictorMatrix = pred,
m = numImputations,
maxit = 5, # generally use 100 maximum iterations; this example uses 5 for speed
seed = 52242)
iter imp variable
1 1 height
1 2 height
1 3 height
1 4 height
1 5 height
2 1 height
2 2 height
2 3 height
2 4 height
2 5 height
3 1 height
3 2 height
3 3 height
3 4 height
3 5 height
4 1 height
4 2 height
4 3 height
4 4 height
4 5 height
5 1 height
5 2 height
5 3 height
5 4 height
5 5 height
jomo
level1Vars <- c("height")
level2Vars <- c("v3","v4")
clusterVars <- c("id")
fullyObservedCovariates <- c("age","Occasion")
set.seed(52242)
mi_jomo <- jomo(
Y = data.frame(dataToImpute[, level1Vars]),
#Y2 = data.frame(dataToImpute[, level2Vars]),
X = data.frame(dataToImpute[, fullyObservedCovariates]),
clus = data.frame(dataToImpute[, clusterVars]),
nimp = numImputations,
meth = "random"
)Amelia
! in the console output indicates that the current estimated complete data covariance matrix is not invertible* in the console output indicates that the likelihood has not monotonically increased in that stepboundVars <- c("height")
boundCols <- which(names(dataToImpute) %in% boundVars)
boundLower <- 100
boundUpper <- 200
varBounds <- cbind(boundCols, boundLower, boundUpper)
set.seed(52242)
mi_amelia <- amelia(
dataToImpute,
m = numImputations,
ts = "age",
cs = "id",
polytime = 1,
intercs = TRUE,
#ords = ordinalVars,
#bounds = varBounds,
parallel = "snow",
#ncpus = numCores,
empri = .01*nrow(dataToImpute)) # ridge prior for numerical stability-- Imputation 1 --
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
21
-- Imputation 2 --
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
-- Imputation 3 --
1 2 3 4 5 6 7 8
-- Imputation 4 --
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
121 122 123
-- Imputation 5 --
1 2 3 4 5 6 7 8 9 10 11 12
Mplus
Mplus Data FileSave R object as Mplus data file:
prepareMplusData(dataToImpute, file.path("dataToImpute.dat"))Mplus Syntax for Multilevel ImputationMplus syntax for multilevel imputation:
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!! MPLUS SYNTAX LINES CANNOT EXCEED 90 CHARACTERS;
!!!!! VARIABLE NAMES AND PARAMETER LABELS CANNOT EXCEED 8 CHARACTERS EACH;
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
TITLE: Model Title
DATA: FILE = "dataToImpute.dat";
VARIABLE:
NAMES = id age height Occasion;
MISSING = .;
USEVARIABLES ARE age height Occasion;
!CATEGORICAL ARE INSERT_NAMES_OF_CATEGORICAL_VARIABLES_HERE;
CLUSTER = id;
ANALYSIS:
TYPE = twolevel basic;
bseed = 52242;
PROCESSORS = 2;
DATA IMPUTATION:
IMPUTE = age(0-100) height Occasion; !put ' (c)' after categorical vars
NDATASETS = 100;
SAVE = imp*.dat
Putting a range of values after a variable (e.g., 0-100) sets the lower and upper bounds of the imputed values. This would save a implist.dat file that can be used to run the model on the multiply imputed data, as shown here.
https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html (archived at https://perma.cc/4SNE-RCSR)
mi_parallel_mice <- futuremice(
dataToImpute,
method = meth,
predictorMatrix = pred,
m = numImputations,
maxit = 5, # generally use 100 maximum iterations; this example uses 5 for speed
parallelseed = 52242,
n.core = numCores,
packages = "miceadds")mi_mice$loggedEventsNULL
On convergence, the streams of the trace plots should intermingle and be free of any trend (at the later iterations). Convergence is diagnosed when the variance between different sequences is no larger than the variance within each individual sequence.
mice
mi_mice_long <- complete(
mi_mice,
action = "long",
include = TRUE)
mi_mice_long$newVar <- mi_mice_long$age * mi_mice_long$heightjomo
Amelia
for(i in 1:length(mi_amelia$imputations)){
mi_amelia$imputations[[i]]$newVar <- mi_amelia$imputations[[i]]$age * mi_amelia$imputations[[i]]$height
}mids objectmice
mi_mice_mids <- as.mids(mi_mice_long)jomo
mi_jomo_mids <- miceadds::jomo2mids(mi_jomo)Amelia
mi_amelia_mids <- miceadds::datlist2mids(mi_amelia$imputations)Mplus
mids2mplus(mi_mice_mids, path = file.path("InsertFilePathHere", fsep = ""))
mids2mplus(mi_jomo_mids, path = file.path("InsertFilePathHere", fsep = ""))
mids2mplus(mi_amelia_mids, path = file.path("InsertFilePathHere", fsep = ""))mice::predict_mi(
fit_lm,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)$fit
fit lwr upr
1 143.0912 126.6977 159.4847
2 144.7508 128.3986 161.1030
3 146.0963 129.7770 162.4156
4 147.3095 130.8591 163.7599
5 149.8360 133.5256 166.1463
6 151.5224 135.2058 167.8390
$se.fit
[1] 1.1025053 0.9291539 0.7638337 1.3045752 0.7125974 0.7486208
$df
[1] 1460.8931 1415.7428 637.8606 935.7190 6385.4620 20758.0027
$residual.scale
[1] 8.290191
mice::predict_mi(
fit_lmer,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)Error in `predict_mi.mira()`:
! `predict_mi()` currently only works with the linear model.
Error in `predict_mi.mira()`:
! `predict_mi()` currently only works with the linear model.
https://stefvanbuuren.name/fimd (archived at https://perma.cc/46U2-QTM6)
https://www.gerkovink.com/miceVignettes/Multi_level/Multi_level_data.html (archived at https://perma.cc/SF32-D7ZF)
R version 4.5.2 (2025-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] MplusAutomation_1.2 broom.mixed_0.2.9.6 lme4_1.1-38
[4] Matrix_1.7-4 nlme_3.1-168 furrr_0.3.1
[7] future_1.68.0 parallelly_1.46.0 jomo_2.7-6
[10] Amelia_1.8.3 Rcpp_1.1.0 mitml_0.4-5
[13] miceadds_3.18-36 micemd_1.10.1 mice_3.19.0
[16] psych_2.5.6 lubridate_1.9.4 forcats_1.0.1
[19] stringr_1.6.0 dplyr_1.1.4 purrr_1.2.0
[22] readr_2.1.6 tidyr_1.3.2 tibble_3.3.0
[25] ggplot2_4.0.1 tidyverse_2.0.0
loaded via a namespace (and not attached):
[1] RColorBrewer_1.1-3 jsonlite_2.0.0 GJRM_0.2-6.8
[4] shape_1.4.6.1 magrittr_2.0.4 farver_2.1.2
[7] nloptr_2.2.1 rmarkdown_2.30 vctrs_0.6.5
[10] minqa_1.2.8 htmltools_0.5.9 survey_4.4-8
[13] broom_1.0.11 htmlwidgets_1.6.4 gsubfn_0.7
[16] plyr_1.8.9 copula_1.1-6 sfsmisc_1.1-23
[19] lifecycle_1.0.4 startupmsg_1.0.0 iterators_1.0.14
[22] pkgconfig_2.0.3 R6_2.6.1 fastmap_1.2.0
[25] rbibutils_2.4 magic_1.6-1 digest_0.6.39
[28] numDeriv_2016.8-1.1 ismev_1.43 timechange_0.3.0
[31] pspline_1.0-21 httr_1.4.7 abind_1.4-8
[34] mgcv_1.9-3 compiler_4.5.2 mvmeta_1.0.3
[37] pander_0.6.6 withr_3.0.2 gsl_2.1-9
[40] S7_0.2.1 backports_1.5.0 DBI_1.2.3
[43] fastDummies_1.7.5 pan_1.9 MASS_7.3-65
[46] distr_2.9.7 VineCopula_2.6.1 tools_4.5.2
[49] pbivnorm_0.6.0 foreign_0.8-90 trust_0.1-8
[52] otel_0.2.0 nnet_7.3-20 glue_1.8.0
[55] stabledist_0.7-2 grid_4.5.2 checkmate_2.3.3
[58] generics_0.1.4 gtable_0.3.6 tzdb_0.5.0
[61] data.table_1.17.8 hms_1.1.4 foreach_1.5.2
[64] pillar_1.11.1 mitools_2.4 splines_4.5.2
[67] lattice_0.22-7 survival_3.8-3 gmp_0.7-5
[70] tidyselect_1.2.1 ADGofTest_0.3 knitr_1.51
[73] reformulas_0.4.3 stats4_4.5.2 xfun_0.55
[76] mixmeta_1.2.2 texreg_1.39.4 matrixStats_1.5.0
[79] proto_1.0.0 scam_1.2-20 stringi_1.8.7
[82] VGAM_1.1-14 yaml_2.3.12 boot_1.3-32
[85] evaluate_1.0.5 codetools_0.2-20 evd_2.3-7.1
[88] cli_3.6.5 rpart_4.1.24 xtable_1.8-4
[91] Rdpack_2.6.4 distrEx_2.9.6 globals_0.18.0
[94] coda_0.19-4.1 parallel_4.5.2 listenv_0.10.0
[97] gamlss.dist_6.1-1 Rmpfr_1.1-2 glmnet_4.1-10
[100] mvtnorm_1.3-3 scales_1.4.0 pcaPP_2.0-5
[103] rlang_1.1.6 mnormt_2.1.1
---
title: "Multiple Imputation"
output:
html_document:
code_folding: show
---
# Preamble
## Install Libraries
```{r}
#install.packages("remotes")
#remotes::install_github("DevPsyLab/petersenlab")
```
## Load Libraries
```{r}
library("tidyverse")
library("psych")
library("mice")
library("micemd")
library("miceadds")
library("mitml")
library("Amelia")
library("jomo")
library("parallelly")
library("future")
library("furrr")
library("nlme")
library("lme4")
library("broom.mixed")
library("MplusAutomation")
```
## Data
```{r}
data(Oxboys, package = "nlme")
Oxboys_addNA <- data.frame(Oxboys)
Oxboys_addNA <- Oxboys_addNA %>%
rename(id = Subject)
Oxboys_addNA$id <- as.integer(Oxboys_addNA$id)
Oxboys_addNA$Occasion <- as.integer(Oxboys_addNA$Occasion)
set.seed(52242)
Oxboys_addNA[sample(1:nrow(Oxboys_addNA), 25), "height"] <- NA
dataToImpute <- Oxboys_addNA
```
# Types of Missingness
1. Missing Completely at Random (MCAR)
- the probability of being missing is the same for all cases
- missingness is not related to variables in the model
1. (Conditionally) Missing at Random (MAR)
- missingness is related to variables in the model, but once we condition on the variables in the model, the missingness is haphazard
- the unobserved values do not play a role
1. Missing Not at Random (MNAR)
- missingness is related to variables that are not in the model (i.e., for reasons that are unknown)
- unobserved variables carry information about whether a case will have missing data
# Approaches to Handle Missing Data
## For MCAR/MAR Missingness
1. Full Information Maximum Likelihood (FIML)
1. Multiple Imputation
## For MNAR Missingness
<https://stefvanbuuren.name/fimd/sec-nonignorable.html> (archived at <https://perma.cc/N7WW-HDZF>)
<https://cran.r-project.org/web/packages/missingHE/vignettes/Fitting_MNAR_models_in_missingHE.html> (archived at <https://perma.cc/9X25-5D8G>)
1. find more data about the causes for the missingness
1. sensitivity analyses (what-if analyses) to see how sensitive the results are under various scenarios
- <https://stefvanbuuren.name/fimd/sec-MCAR.html> (archived at <https://perma.cc/9KM9-3NX3>)
1. selection models
- simultaneously estimate the focal model and a missingness model, where the missingness model has the missing data indicator as a dependent variable, as predicted by the original dependent variable, the original predictors, and any additional covariates etc.
- if the missingness model is approximately correct, the focal model adjusts in way that removes nonresponse bias
- similar to a mediation process
- X → Y
- Y → missingness
- X → missingness
- <https://quantitudepod.org/s4e08-craig-enders/> (archived at <https://perma.cc/FY9L-L3F7>)
1. pattern-mixture models
- missing data indicator is a predictor variable
- similar to a moderation process; subgroups of cases have different parameter estimates
# Approaches to Multiple Imputation
- multiple imputation by joint modeling
- assumes that the variables in follow a joint distribution, e.g.:
- multivariate normal (i.e., multivariate normal imputation)
- `R` packages:
- `jomo`
- `pan`
- `Amelia`
- `Mplus`
- multiple imputation by chained equations
- aka:
- fully conditional specification multiple imputation
- sequential regression multiple imputation
- `R` packages:
- `mice`
- predictive mean matching (`?mice::mice.impute.pmm`) can be useful to obtain bounded imputations for non-normally distributed variables
# Describe Missing Data
```{r}
describe(dataToImpute)
md.pattern(dataToImpute, rotate.names = TRUE)
```
# Pre-Imputation Setup
## Specify Variables to Impute
```{r}
varsToImpute <- c("height")
Y <- varsToImpute
```
## Specify Number of Imputations
```{r}
numImputations <- 5 # generally use 100 imputations; this example uses 5 for speed
```
## Detect Cores
```{r}
numCores <- parallelly::availableCores() - 1
```
# Multilevel Multiple Imputation
## Three-Level and Cross-Classified Data
<https://simongrund1.github.io/posts/multiple-imputation-for-three-level-and-cross-classified-data/> (archived at <https://perma.cc/N4PP-A3V6>)
## Methods
### `mice` {#mice}
#### Types
##### Continuous Outcomes
<https://stefvanbuuren.name/fimd/sec-multioutcome.html#methods> (archived at <https://perma.cc/8CDA-TS3K>)
```{r}
#| eval: false
?mice::mice.impute.2l.norm
?mice::mice.impute.2l.pan
?mice::mice.impute.2l.lmer
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?miceadds::mice.impute.2l.continuous
?micemd::mice.impute.2l.2stage.norm
?micemd::mice.impute.2l.2stage.pmm
?micemd::mice.impute.2l.glm.norm
?micemd::mice.impute.2l.jomo
```
##### Binary Outcomes
<https://stefvanbuuren.name/fimd/sec-catoutcome.html#methods-1> (archived at <https://perma.cc/5QHF-YRP6>)
```{r}
#| eval: false
?mice::mice.impute.2l.bin
?miceadds::mice.impute.2l.binary
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?micemd::mice.impute.2l.2stage.bin
?micemd::mice.impute.2l.glm.bin
```
##### Ordinal Outcomes
<https://stefvanbuuren.name/fimd/sec-multioutcome.html#methods> (archived at <https://perma.cc/8CDA-TS3K>)
```{r}
#| eval: false
?miceadds::mice.impute.2l.pmm
?miceadds::mice.impute.2l.contextual.pmm
?micemd::mice.impute.2l.2stage.pmm
```
##### Count Outcomes
<https://stefvanbuuren.name/fimd/sec-catoutcome.html#methods-1> (archived at <https://perma.cc/5QHF-YRP6>)
```{r}
#| eval: false
?micemd::mice.impute.2l.2stage.pois
?micemd::mice.impute.2l.glm.pois
?countimp::mice.impute.2l.poisson
?countimp::mice.impute.2l.nb2
?countimp::mice.impute.2l.zihnb
```
#### Specifying the Imputation Method
```{r}
meth <- make.method(dataToImpute)
meth[1:length(meth)] <- ""
meth[Y] <- "2l.pmm" # specify the imputation method here; this can differ by outcome variable
```
#### Specifying the Predictor Matrix
A predictor matrix is a matrix of values, where:
- columns with non-zero values are predictors of the variable specified in the given row
- the diagonal of the predictor matrix should be zero because a variable cannot predict itself
The values are:
- NOT a predictor of the outcome: `0`
- cluster variable: `-2`
- fixed effect of predictor: `1`
- fixed effect and random effect of predictor: `2`
- include cluster mean of predictor in addition to fixed effect of predictor: `3`
- include cluster mean of predictor in addition to fixed effect and random effect of predictor: `4`
```{r}
pred <- make.predictorMatrix(dataToImpute)
pred[1:nrow(pred), 1:ncol(pred)] <- 0
pred[Y, "id"] <- (-2) # cluster variable
pred[Y, "Occasion"] <- 1 # fixed effect predictor
pred[Y, "age"] <- 2 # random effect predictor
pred[Y, Y] <- 1 # fixed effect predictor
diag(pred) <- 0
pred
```
#### Syntax
```{r}
mi_mice <- mice(
as.data.frame(dataToImpute),
method = meth,
predictorMatrix = pred,
m = numImputations,
maxit = 5, # generally use 100 maximum iterations; this example uses 5 for speed
seed = 52242)
```
### `jomo` {#jomo}
```{r}
#| eval: false
level1Vars <- c("height")
level2Vars <- c("v3","v4")
clusterVars <- c("id")
fullyObservedCovariates <- c("age","Occasion")
set.seed(52242)
mi_jomo <- jomo(
Y = data.frame(dataToImpute[, level1Vars]),
#Y2 = data.frame(dataToImpute[, level2Vars]),
X = data.frame(dataToImpute[, fullyObservedCovariates]),
clus = data.frame(dataToImpute[, clusterVars]),
nimp = numImputations,
meth = "random"
)
```
### `Amelia` {#amelia}
- `!` in the console output indicates that the current estimated complete data covariance matrix is not invertible
- `*` in the console output indicates that the likelihood has not monotonically increased in that step
```{r}
boundVars <- c("height")
boundCols <- which(names(dataToImpute) %in% boundVars)
boundLower <- 100
boundUpper <- 200
varBounds <- cbind(boundCols, boundLower, boundUpper)
set.seed(52242)
mi_amelia <- amelia(
dataToImpute,
m = numImputations,
ts = "age",
cs = "id",
polytime = 1,
intercs = TRUE,
#ords = ordinalVars,
#bounds = varBounds,
parallel = "snow",
#ncpus = numCores,
empri = .01*nrow(dataToImpute)) # ridge prior for numerical stability
```
### `Mplus` {#mplus}
#### Save `Mplus` Data File
Save `R` object as `Mplus` data file:
```{r}
prepareMplusData(dataToImpute, file.path("dataToImpute.dat"))
```
#### `Mplus` Syntax for Multilevel Imputation
`Mplus` syntax for multilevel imputation:
```Mplus
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!! MPLUS SYNTAX LINES CANNOT EXCEED 90 CHARACTERS;
!!!!! VARIABLE NAMES AND PARAMETER LABELS CANNOT EXCEED 8 CHARACTERS EACH;
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
TITLE: Model Title
DATA: FILE = "dataToImpute.dat";
VARIABLE:
NAMES = id age height Occasion;
MISSING = .;
USEVARIABLES ARE age height Occasion;
!CATEGORICAL ARE INSERT_NAMES_OF_CATEGORICAL_VARIABLES_HERE;
CLUSTER = id;
ANALYSIS:
TYPE = twolevel basic;
bseed = 52242;
PROCESSORS = 2;
DATA IMPUTATION:
IMPUTE = age(0-100) height Occasion; !put ' (c)' after categorical vars
NDATASETS = 100;
SAVE = imp*.dat
```
Putting a range of values after a variable (e.g., `0-100`) sets the lower and upper bounds of the imputed values.
This would save a `implist.dat` file that can be used to run the model on the multiply imputed data, as shown [here](mplus.qmd#multipleImputation).
# Parallel Processing
<https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html> (archived at <https://perma.cc/4SNE-RCSR>)
```{r}
mi_parallel_mice <- futuremice(
dataToImpute,
method = meth,
predictorMatrix = pred,
m = numImputations,
maxit = 5, # generally use 100 maximum iterations; this example uses 5 for speed
parallelseed = 52242,
n.core = numCores,
packages = "miceadds")
```
# Imputation Diagnostics
## Logged Events
```{r}
mi_mice$loggedEvents
```
## Trace Plots
On convergence, the streams of the trace plots should intermingle and be free of any trend (at the later iterations).
Convergence is diagnosed when the variance between different sequences is no larger than the variance within each individual sequence.
- <https://stefvanbuuren.name/fimd/sec-algoptions.html> (archived at <https://perma.cc/4S54-465R>)
```{r}
plot(mi_mice, c("height"))
```
## Density Plots
```{r}
densityplot(mi_mice)
densityplot(mi_mice, ~ height)
```
## Strip Plots
```{r}
stripplot(mi_mice)
stripplot(mi_mice, height ~ .imp)
```
# Post-Processing
## Modify/Create New Variables
### `mice`
```{r}
mi_mice_long <- complete(
mi_mice,
action = "long",
include = TRUE)
mi_mice_long$newVar <- mi_mice_long$age * mi_mice_long$height
```
### `jomo`
```{r}
#| eval: false
mi_jomo <- mi_jomo %>%
rename(height = dataToImpute...level1Vars.)
mi_jomo$newVar <- mi_jomo$age * mi_jomo$height
```
### `Amelia`
```{r}
for(i in 1:length(mi_amelia$imputations)){
mi_amelia$imputations[[i]]$newVar <- mi_amelia$imputations[[i]]$age * mi_amelia$imputations[[i]]$height
}
```
## Convert to `mids` object
### `mice`
```{r}
mi_mice_mids <- as.mids(mi_mice_long)
```
### `jomo`
```{r}
#| eval: false
mi_jomo_mids <- miceadds::jomo2mids(mi_jomo)
```
### `Amelia`
```{r}
mi_amelia_mids <- miceadds::datlist2mids(mi_amelia$imputations)
```
## Export for `Mplus`
```{r}
#| eval: false
mids2mplus(mi_mice_mids, path = file.path("InsertFilePathHere", fsep = ""))
mids2mplus(mi_jomo_mids, path = file.path("InsertFilePathHere", fsep = ""))
mids2mplus(mi_amelia_mids, path = file.path("InsertFilePathHere", fsep = ""))
```
# Fit Model to Multiply Imputed Data
## Multiple Regression
```{r}
fit_lm <- with(
data = mi_mice,
expr = lm(height ~ age + Occasion))
```
### Pool Results Across Models
```{r}
fit_lm_pooled <- mice::pool(fit_lm)
fit_lm_pooled
summary(fit_lm_pooled)
```
## Mixed Model
```{r}
fit_lmer <- with(
data = mi_mice,
expr = lme4::lmer(height ~ age + Occasion + (1|id)))
```
### Pool Results Across Models
```{r}
fit_lmer_pooled <- mice::pool(fit_lmer)
fit_lmer_pooled
summary(fit_lmer_pooled)
```
# Model-Implied Predictions
```{r}
#| output: false
mice::predict_mi(
fit_lm,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)
```
```{r}
#| echo: false
fit_lm_fitted <- mice::predict_mi(
fit_lm,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)
fit_lm_fitted$fit <- head(fit_lm_fitted$fit)
fit_lm_fitted$se.fit <- head(fit_lm_fitted$se.fit)
fit_lm_fitted$df <- head(fit_lm_fitted$df)
fit_lm_fitted
```
```{r}
#| output: false
mice::predict_mi(
fit_lmer,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)
```
```{r}
#| echo: false
fit_lmer_fitted <- mice::predict_mi(
fit_lmer,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)
```
```{r}
#| echo: false
#| eval: false
fit_lmer_fitted <- mice::predict_mi(
fit_lmer,
#newdata = # can include if want to make predictions based on another (i.e., "new") data object
se.fit = TRUE,
interval = c("prediction")
)
fit_lmer_fitted$fit <- head(fit_lmer_fitted$fit)
fit_lmer_fitted$se.fit <- head(fit_lmer_fitted$se.fit)
fit_lmer_fitted$df <- head(fit_lmer_fitted$df)
fit_lmer_fitted
```
# Resources
<https://stefvanbuuren.name/fimd> (archived at <https://perma.cc/46U2-QTM6>)
<https://www.gerkovink.com/miceVignettes/Multi_level/Multi_level_data.html> (archived at <https://perma.cc/SF32-D7ZF>)
# Session Info
```{r}
#| code-fold: true
sessionInfo()
```