Multiple imputation convergence

Author

Thomas E. Metherell

Dependencies

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.1
library(purrr)
library(smcfcs)
Warning: package 'smcfcs' was built under R version 4.4.2
library(targets)
Warning: package 'targets' was built under R version 4.4.2
library(tidyr)

datalist <- tribble(
  ~id, ~name,
  "sm1", "Social media use (period 1)",
  "sm2", "Social media use (period 2)",
  "NCem1", "Number of employees at primary responding parent's place of work (period 1)",
  "NCem2", "Number of employees at primary responding parent's place of work (period 2)",
  "NCcd1", "Attitude towards copying/downloading music etc. (period 1)",
  "NCcd2", "Attitude towards copying/downloading music etc. (period 2)",
  "CXsl1", "Sleep (period 1)",
  "CXsl2", "Sleep (period 2)",
  "CXex1", "Exercise (period 1)",
  "CXex2", "Exercise (period 2)",
  "CXsi1", "In-person social interaction (period 1)",
  "CXsi2", "In-person social interaction (period 2)",
  "PCtd1", "SDQ Total Difficulties score (period 1)",
  "PCtd2", "SDQ Total Difficulties score (period 2)"
)

cat_datalist <- tribble(
  ~id, ~name,
  "sm1", "Period 1",
  "sm2", "Period 2"
)

catXh_datalist <- tribble(
  ~id, ~name,
  "sm2", "Period 2"
)

interax_datalist <- tribble(
  ~id, ~name,
  "sm1_sex", "Sex (period 1)",
  "sm1_eth", "Ethnicity (period 1)",
  "sm1_inc", "Income (period 1)",
  "sm1_mh", "Prior mental health (period 1)",
  "sm2_sex", "Sex (period 2)",
  "sm2_eth", "Ethnicity (period 2)",
  "sm2_inc", "Income (period 2)",
  "sm2_wlt", "Wealth (period 2)",
  "sm2_mh", "Prior mental health (period 2)"
)

cat_interax_datalist <- tribble(
  ~id, ~name,
  "sm1_sex", "Sex (period 1)",
  "sm1_inc", "Income (period 1)",
  "sm1_mh", "Prior mental health (period 1)",
  "sm2_sex", "Sex (period 2)",
  "sm2_inc", "Income (period 2)",
  "sm2_wlt", "Wealth (period 2)",
  "sm2_mh", "Prior mental health (period 2)"
)

# The below are hotfixes for a bug in smcfcs' plotting function. These will be patched in an upcoming release
get_coef_names <- function (smformula, dat, intercept) 
{
  rhs <- gsub(x = smformula, pattern = ".*~", replacement = "")
  smformula_matrix <- as.formula(paste0("~ +", rhs))
  if (grepl(x = rhs, pattern = "strata")) {
    strata_var <- gsub(x = rhs, pattern = ".*\\(|\\).*", 
      replacement = "")
    rm_strata <- as.formula(paste0("~ . - strata(", strata_var, 
      ")"))
    smformula_matrix <- update(smformula_matrix, rm_strata)
  }
  model_mat <- stats::model.matrix(object = smformula_matrix, 
    data = dat)
  if (intercept == FALSE) {
    model_mat <- model_mat[, !(colnames(model_mat) %in% 
      "(Intercept)")]
  }
  coef_names <- colnames(model_mat)
  return(coef_names)
}

prep_iters <- function (x) 
{
  M <- dim(x$smCoefIter)[1]
  smtype <- x$smInfo$smtype
  smformula <- if(inherits(x$smInfo$smformula, "formula")) deparse1(x$smInfo$smformula) else x$smInfo$smformula
  dat <- x$impDatasets[[1]]
  numit <- dim(x$smCoefIter)[3]
  if (numit < 2) {
    stop("Re-run smcfcs() with numit >= 2 in order to assess convergence")
  }
  coef_names <- get_coef_names(smformula, dat, intercept = FALSE)
  ests_list <- lapply(X = seq_len(M), function(m) {
    coef_dat <- as.data.frame(t(x$smCoefIter[m, , ]))
    coef_dat$iters <- seq_len(numit)
    coef_dat$imp <- m
    return(coef_dat)
  })
  ests_df <- do.call(rbind.data.frame, ests_list)
  colnames(ests_df) <- c(coef_names, "iters", "imp")
  ests_long <- stats::reshape(data = ests_df, varying = coef_names, 
    timevar = "covar", v.names = "value", idvar = c("imp", 
      "iters"), direction = "long", times = coef_names)
  return(ests_long)
}

plot_mi <- function(id, name, type){
  cat(paste("###", name, "\n"))
  
  if(type == "main"){
    imputed_data <- tar_read_raw(paste("data_imputed_list", id, sep = "_"))
  } else {
    imputed_data <- tar_read_raw(paste(type, "data_imputed_list", id, sep = "_"))
  }
  
  dfs_plot <- lapply(imputed_data, function(x) prep_iters(x))
  for(i in seq_along(1:length(dfs_plot))){
    dfs_plot[[i]] <- dfs_plot[[i]] %>%
      mutate(imp = imp + 3*(i-1))
  }
  df_plot <- reduce(dfs_plot, rbind)
  
  print(ggplot(data = df_plot, aes(x = .data$iters, y = .data$value, col = factor(.data$imp))) +
    geom_line() +
    theme(legend.position = "none") + 
    labs(x = "Iterations", y = "Coefficient") + 
    facet_wrap(~covar))
  
  cat("\n\n")
}

Convergence plots (main analyses)

Below are the plots used to assess convergence of the multiple imputation for each exposure.

Social media use (period 1)

Social media use (period 2)

Number of employees at primary responding parent’s place of work (period 1)

Number of employees at primary responding parent’s place of work (period 2)

Attitude towards copying/downloading music etc. (period 1)

Attitude towards copying/downloading music etc. (period 2)

Sleep (period 1)

Sleep (period 2)

Exercise (period 1)

Exercise (period 2)

In-person social interaction (period 1)

In-person social interaction (period 2)

SDQ Total Difficulties score (period 1)

SDQ Total Difficulties score (period 2)

Convergence plots (sensitivity analyses)

Below are the plots used to assess convergence of the multiple imputation for each exposure’s sensitivity analyses.

Social media use (period 1)

Social media use (period 2)

Number of employees at primary responding parent’s place of work (period 1)

Number of employees at primary responding parent’s place of work (period 2)

Attitude towards copying/downloading music etc. (period 1)

Attitude towards copying/downloading music etc. (period 2)

Sleep (period 1)

Sleep (period 2)

Exercise (period 1)

Exercise (period 2)

In-person social interaction (period 1)

In-person social interaction (period 2)

SDQ Total Difficulties score (period 1)

SDQ Total Difficulties score (period 2)

Convergence plots (categorical social media use)

Below are the plots used to assess convergence of the multiple imputation for the data with categorised social media use variables.

Period 1

Period 2

Alternative “high” use thresholds for period 2

1 hour

Period 2

3 hours

Period 2

4 hours

Period 2

Convergence plots (interactions)

Below are the plots used to assess convergence of the multiple imputation for analyses with interaction terms.

Sex (period 1)

Ethnicity (period 1)

Income (period 1)

Prior mental health (period 1)

Sex (period 2)

Ethnicity (period 2)

Income (period 2)

Wealth (period 2)

Prior mental health (period 2)

Convergence plots (interactions, categorised social media use)

Below are the plots used to assess convergence of the multiple imputation for analyses with interaction terms and categorised social media use.

Sex (period 1)

Income (period 1)

Prior mental health (period 1)

Sex (period 2)

Income (period 2)

Wealth (period 2)

Prior mental health (period 2)