Descriptives report

Author

Thomas E. Metherell

Dependencies

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.1
library(haven)
library(knitr)
Warning: package 'knitr' was built under R version 4.4.2
library(targets)
Warning: package 'targets' was built under R version 4.4.2
data <- tar_read(descriptive_dataset)

# Recoding ethnicity to make easier to read
data$DDC06E00 <- recode(data$DDC06E00, `0` = "White", `1` = "Ethnic minority")

Social media use

Age 11

The number of participants with non-missing social media use data at age 11 is 8617.

data_smu1_plot <- data

data_smu1_plot <- data_smu1_plot %>%
  select(MCSID, CNUM, ECQ09X00) %>%
  filter(!is.na(ECQ09X00)) %>%
  mutate(ECQ09X00 = case_match(ECQ09X00,
    1 ~ "Never",
    2 ~ "Less often than once a month",
    3 ~ "At least once a month",
    4 ~ "At least once a week",
    5 ~ "Most days"
  ))

data_smu1_plot$ECQ09X00 <- factor(data_smu1_plot$ECQ09X00, ordered = TRUE, levels = c("Never", "Less often than once a month", "At least once a month", "At least once a week", "Most days"))

ggplot(data_smu1_plot, aes(x = ECQ09X00)) +
  geom_bar() +
  labs(x = "Social media use frequency (age 11)", y = "Number of participants") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

Numbers in categories of usage

Cross-tabulated with categorical interaction variables
data_smu1_cat <- data %>% mutate(ECQ09X00 = case_when(
  ECQ09X00 == 1 ~ "Never",
  ECQ09X00 %in% c(2, 3, 4) ~ "Less often than most days",
  ECQ09X00 == 5 ~ "Most days"
))

data_smu1_cat$ECQ09X00 <- factor(data_smu1_cat$ECQ09X00, ordered = TRUE, levels = c("Never", "Less often than most days", "Most days"))
table(data_smu1_cat$ECQ09X00, data_smu1_cat$AHCSEX00, data_smu1_cat$HES_ei1) %>% data.frame() %>% rename(`Social media use frequency` = Var1, `Sex assigned at birth` = Var2, `HES event (period 1)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)), Freq = case_when(`Social media use frequency` %in% c("Less often than most days", "Most days") & `Sex assigned at birth` == "Male" & `HES event (period 1)?` == 1 ~ ".", TRUE ~ Freq)) |> kable()
Social media use frequency Sex assigned at birth HES event (period 1)? Freq
Never Male 0 2411
Less often than most days Male 0 998
Most days Male 0 639
Never Female 0 2313
Less often than most days Female 0 948
Most days Female 0 790
Never Male 1 15
Less often than most days Male 1 .
Most days Male 1 .
Never Female 1 14
Less often than most days Female 1 <10
Most days Female 1 <10
table(data_smu1_cat$ECQ09X00, data_smu1_cat$DDC06E00, data_smu1_cat$HES_ei1) %>% data.frame() %>% rename(`Social media use frequency` = Var1, Ethnicity = Var2, `HES event (period 1)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)))|> kable()
Social media use frequency Ethnicity HES event (period 1)? Freq
Never White 0 3320
Less often than most days White 0 1470
Most days White 0 1132
Never Ethnic minority 0 1015
Less often than most days Ethnic minority 0 311
Most days Ethnic minority 0 162
Never White 1 21
Less often than most days White 1 13
Most days White 1 13
Never Ethnic minority 1 <10
Less often than most days Ethnic minority 1 <10
Most days Ethnic minority 1 <10

Characteristics of non-users vs users

Non-users
data_subset_smu1_0 <- filter(data, data$ECQ09X00 == 1)

cat(paste("Income (DDOEDE00):\nMean:", as.character(round(mean(data_subset_smu1_0$DDOEDE00, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu1_0$DDOEDE00, na.rm = TRUE), 1))))
Income (DDOEDE00):
Mean: 419.9 
Standard deviation: 245.2
Users
data_subset_smu1_1 <- filter(data, data$ECQ09X00 != 1)

cat(paste("Income (DDOEDE00):\nMean:", as.character(round(mean(data_subset_smu1_1$DDOEDE00, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu1_1$DDOEDE00, na.rm = TRUE), 1))))
Income (DDOEDE00):
Mean: 359.1 
Standard deviation: 216.9

Age 14

The number of participants with non-missing social media use data at age 14 is 2478. The histogram is truncated at 3 hours, 7.5 minutes to prevent disclosure (number of participants with usage between 2 hours, 52.5 minutes and 3 hours, 7.5 minutes = 11).

ggplot(data[data$mcs6_sm_per_day < 187.5,], aes(x = mcs6_sm_per_day)) +
  geom_histogram(binwidth = 15) +
  labs(x = "Social media use time in minutes per day (age 14)", y = "Density") +
  scale_x_continuous(breaks = seq(0, 180, 30)) +
  theme_bw()
Warning: Removed 10108 rows containing non-finite outside the scale range
(`stat_bin()`).

Percentiles:

smu_2_percentiles <- quantile(data$mcs6_sm_per_day, c(seq(0, 0.9, 0.1), 0.99), na.rm = TRUE) %>% data.frame()

names(smu_2_percentiles) <- "Social media use per day (minutes)"

smu_2_percentiles |> kable()
Social media use per day (minutes)
0% 0.0
10% 0.0
20% 0.0
30% 0.0
40% 0.0
50% 0.0
60% 15.0
70% 30.0
80% 45.0
90% 85.0
99% 267.3

Numbers in categories of usage

Total
1-hour threshold
data_smu2_cat1 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 60 ~ "Moderate",
  mcs6_sm_per_day > 60 ~ "High"
))

table(data_smu2_cat1$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()
Social media use per day Freq
High 352
Moderate 867
None 1259
2-hour threshold
data_smu2_cat2 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 120 ~ "Moderate",
  mcs6_sm_per_day > 120 ~ "High"
))

Suppressed to prevent disclosure.

3-hour threshold
data_smu2_cat3 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 180 ~ "Moderate",
  mcs6_sm_per_day > 180 ~ "High"
))

table(data_smu2_cat3$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()
Social media use per day Freq
High 57
Moderate 1162
None 1259
4-hour threshold
data_smu2_cat4 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 240 ~ "Moderate",
  mcs6_sm_per_day > 240 ~ "High"
))

table(data_smu2_cat4$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()
Social media use per day Freq
High 30
Moderate 1189
None 1259
Cross-tabulated with categorical interaction variables

The following are for a 2-hour threshold only.

table(data_smu2_cat2$mcs6_sm_per_day, data_smu2_cat2$AHCSEX00, data_smu2_cat2$HES_ei2) %>% data.frame() %>% rename(`Social media use per day` = Var1, `Sex assigned at birth` = Var2, `HES event (period 2)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)), Freq = case_when(`Social media use per day` %in% c("Moderate", "High") & `Sex assigned at birth` == "Female" & `HES event (period 2)?` == 1 ~ ".", TRUE ~ Freq)) |> kable()
Social media use per day Sex assigned at birth HES event (period 2)? Freq
High Male 0 26
Moderate Male 0 338
None Male 0 706
High Female 0 100
Moderate Female 0 667
None Female 0 473
High Male 1 <10
Moderate Male 1 <10
None Male 1 10
High Female 1 .
Moderate Female 1 .
None Female 1 19
table(data_smu2_cat2$mcs6_sm_per_day, data_smu2_cat2$DDC06E00, data_smu2_cat2$HES_ei2) %>% data.frame() %>% rename(`Social media use per day` = Var1, Ethnicity = Var2, `HES event (period 2)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()
Social media use per day Ethnicity HES event (period 2)? Freq
High White 0 102
Moderate White 0 785
None White 0 914
High Ethnic minority 0 22
Moderate Ethnic minority 0 185
None Ethnic minority 0 183
High White 1 <10
Moderate White 1 <10
None White 1 24
High Ethnic minority 1 <10
Moderate Ethnic minority 1 <10
None Ethnic minority 1 <10

Characteristics of non-users vs users

Non-users
data_subset_smu2_0 <- filter(data, data$mcs6_sm_per_day == 0)

cat(paste("Income (EOEDE000):\nMean:", as.character(round(mean(data_subset_smu2_0$EOEDE000, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_0$EOEDE000, na.rm = TRUE), 1))))
Income (EOEDE000):
Mean: 459 
Standard deviation: 186.9
cat(paste("Household wealth (mcs5_wealth):\nMean:", as.character(round(mean(data_subset_smu2_0$mcs5_wealth, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_0$mcs5_wealth, na.rm = TRUE), 1))))
Household wealth (mcs5_wealth):
Mean: 198377.6 
Standard deviation: 283326.9
Users
data_subset_smu2_1 <- filter(data, data$mcs6_sm_per_day != 0)

cat(paste("Income (EOEDE000):\nMean:", as.character(round(mean(data_subset_smu2_1$EOEDE000, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_1$EOEDE000, na.rm = TRUE), 1))))
Income (EOEDE000):
Mean: 450 
Standard deviation: 178
cat(paste("Household wealth (mcs5_wealth):\nMean:", as.character(round(mean(data_subset_smu2_1$mcs5_wealth, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_1$mcs5_wealth, na.rm = TRUE), 1))))
Household wealth (mcs5_wealth):
Mean: 224134.4 
Standard deviation: 479176.2

HES episodes

The total number of participants with a psychiatric episode recorded in period 1 is 65.

The total number of participants with a psychiatric episode recorded in period 2 is 173.

Cross-tabulation

data_crosstab <- data %>% select(MCSID, CNUM, CONSENT, HES_ei1, HES_ei2)

data_crosstab <- data_crosstab %>% mutate(
  HES_ei1 = if_else(CONSENT == 0, NA, HES_ei1),
  HES_ei2 = if_else(CONSENT == 0, NA, HES_ei2)
)

table(
  data_crosstab$HES_ei1 %>% case_match(0 ~ "No (period 1)", 1 ~ "Yes (period 1)", .default = NA), 
  data_crosstab$HES_ei2 %>% case_match(0 ~ "No (period 2)", 1 ~ "Yes (period 2)", .default = NA)
) |> kable()
No (period 2) Yes (period 2)
No (period 1) 5975 156
Yes (period 1) 48 17

In total 6390 eligible participants did not consent to HES linkage.

Note that in the subsequent tables that the totals will not add up to the numbers above because the numbers of participants with missing responses to the grouping variables are not shown.

By region

Period 1

data_HES1 <- data %>% filter(HES_ei1 == 1)

table((data_HES1 %>% mutate(DAREGN00 = as_factor(DAREGN00)))$DAREGN00) %>% data.frame() %>% rename(`Region of residence` = Var1) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()
Region of residence Freq
North East <10
North West 16
Yorkshire and the Humber <10
East Midlands <10
West Midlands <10
East of England <10
London 13
South East <10
South West <10

Period 2

data_HES2 <- data %>% filter(HES_ei2 == 1)

table((data_HES2 %>% mutate(EAREGN00 = as_factor(EAREGN00)))$EAREGN00) %>% data.frame() %>% rename(`Region of residence` = Var1) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()
Region of residence Freq
North East <10
North West 18
Yorkshire and the Humber 19
East Midlands 11
West Midlands 15
East of England 19
London 21
South East 34
South West 18

By sex assigned at birth

Period 1

table(data_HES1$AHCSEX00) %>% data.frame() %>% rename(`Sex assigned at birth` = Var1) |> kable()
Sex assigned at birth Freq
Male 30
Female 30

Period 2

table(data_HES2$AHCSEX00) %>% data.frame() %>% rename(`Sex assigned at birth` = Var1) |> kable()
Sex assigned at birth Freq
Male 53
Female 109

By ethnicity

Period 1

table(data_HES1$DDC06E00) %>% data.frame() %>% rename(`Ethnicity` = Var1) |> kable()
Ethnicity Freq
White 49
Ethnic minority 11

Period 2

table(data_HES2$DDC06E00) %>% data.frame() %>% rename(`Ethnicity` = Var1) |> kable()
Ethnicity Freq
White 128
Ethnic minority 23