Descriptives report

Author

Thomas E. Metherell

Dependencies

library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(ggplot2)

Warning: package 'ggplot2' was built under R version 4.4.1

library(haven)
library(knitr)

Warning: package 'knitr' was built under R version 4.4.2

library(targets)

Warning: package 'targets' was built under R version 4.4.2

data <- tar_read(descriptive_dataset)

# Recoding ethnicity to make easier to read
data$DDC06E00 <- recode(data$DDC06E00, `0` = "White", `1` = "Ethnic minority")

Social media use

Age 11

The number of participants with non-missing social media use data at age 11 is 8617.

data_smu1_plot <- data

data_smu1_plot <- data_smu1_plot %>%
  select(MCSID, CNUM, ECQ09X00) %>%
  filter(!is.na(ECQ09X00)) %>%
  mutate(ECQ09X00 = case_match(ECQ09X00,
    1 ~ "Never",
    2 ~ "Less often than once a month",
    3 ~ "At least once a month",
    4 ~ "At least once a week",
    5 ~ "Most days"
  ))

data_smu1_plot$ECQ09X00 <- factor(data_smu1_plot$ECQ09X00, ordered = TRUE, levels = c("Never", "Less often than once a month", "At least once a month", "At least once a week", "Most days"))

ggplot(data_smu1_plot, aes(x = ECQ09X00)) +
  geom_bar() +
  labs(x = "Social media use frequency (age 11)", y = "Number of participants") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))

Numbers in categories of usage

Cross-tabulated with categorical interaction variables

data_smu1_cat <- data %>% mutate(ECQ09X00 = case_when(
  ECQ09X00 == 1 ~ "Never",
  ECQ09X00 %in% c(2, 3, 4) ~ "Less often than most days",
  ECQ09X00 == 5 ~ "Most days"
))

data_smu1_cat$ECQ09X00 <- factor(data_smu1_cat$ECQ09X00, ordered = TRUE, levels = c("Never", "Less often than most days", "Most days"))

table(data_smu1_cat$ECQ09X00, data_smu1_cat$AHCSEX00, data_smu1_cat$HES_ei1) %>% data.frame() %>% rename(`Social media use frequency` = Var1, `Sex assigned at birth` = Var2, `HES event (period 1)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)), Freq = case_when(`Social media use frequency` %in% c("Less often than most days", "Most days") & `Sex assigned at birth` == "Male" & `HES event (period 1)?` == 1 ~ ".", TRUE ~ Freq)) |> kable()

Social media use frequency	Sex assigned at birth	HES event (period 1)?	Freq
Never	Male	0	2411
Less often than most days	Male	0	998
Most days	Male	0	639
Never	Female	0	2313
Less often than most days	Female	0	948
Most days	Female	0	790
Never	Male	1	15
Less often than most days	Male	1	.
Most days	Male	1	.
Never	Female	1	14
Less often than most days	Female	1	<10
Most days	Female	1	<10

table(data_smu1_cat$ECQ09X00, data_smu1_cat$DDC06E00, data_smu1_cat$HES_ei1) %>% data.frame() %>% rename(`Social media use frequency` = Var1, Ethnicity = Var2, `HES event (period 1)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)))|> kable()

Social media use frequency	Ethnicity	HES event (period 1)?	Freq
Never	White	0	3320
Less often than most days	White	0	1470
Most days	White	0	1132
Never	Ethnic minority	0	1015
Less often than most days	Ethnic minority	0	311
Most days	Ethnic minority	0	162
Never	White	1	21
Less often than most days	White	1	13
Most days	White	1	13
Never	Ethnic minority	1	<10
Less often than most days	Ethnic minority	1	<10
Most days	Ethnic minority	1	<10

Characteristics of non-users vs users

Non-users

data_subset_smu1_0 <- filter(data, data$ECQ09X00 == 1)

cat(paste("Income (DDOEDE00):\nMean:", as.character(round(mean(data_subset_smu1_0$DDOEDE00, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu1_0$DDOEDE00, na.rm = TRUE), 1))))

Income (DDOEDE00):
Mean: 419.9 
Standard deviation: 245.2

Users

data_subset_smu1_1 <- filter(data, data$ECQ09X00 != 1)

cat(paste("Income (DDOEDE00):\nMean:", as.character(round(mean(data_subset_smu1_1$DDOEDE00, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu1_1$DDOEDE00, na.rm = TRUE), 1))))

Income (DDOEDE00):
Mean: 359.1 
Standard deviation: 216.9

Age 14

The number of participants with non-missing social media use data at age 14 is 2478. The histogram is truncated at 3 hours, 7.5 minutes to prevent disclosure (number of participants with usage between 2 hours, 52.5 minutes and 3 hours, 7.5 minutes = 11).

ggplot(data[data$mcs6_sm_per_day < 187.5,], aes(x = mcs6_sm_per_day)) +
  geom_histogram(binwidth = 15) +
  labs(x = "Social media use time in minutes per day (age 14)", y = "Density") +
  scale_x_continuous(breaks = seq(0, 180, 30)) +
  theme_bw()

Warning: Removed 10108 rows containing non-finite outside the scale range
(`stat_bin()`).

Percentiles:

smu_2_percentiles <- quantile(data$mcs6_sm_per_day, c(seq(0, 0.9, 0.1), 0.99), na.rm = TRUE) %>% data.frame()

names(smu_2_percentiles) <- "Social media use per day (minutes)"

smu_2_percentiles |> kable()

	Social media use per day (minutes)
0%	0.0
10%	0.0
20%	0.0
30%	0.0
40%	0.0
50%	0.0
60%	15.0
70%	30.0
80%	45.0
90%	85.0
99%	267.3

Numbers in categories of usage

Total

1-hour threshold

data_smu2_cat1 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 60 ~ "Moderate",
  mcs6_sm_per_day > 60 ~ "High"
))

table(data_smu2_cat1$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()

Social media use per day	Freq
High	352
Moderate	867
None	1259

2-hour threshold

data_smu2_cat2 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 120 ~ "Moderate",
  mcs6_sm_per_day > 120 ~ "High"
))

Suppressed to prevent disclosure.

3-hour threshold

data_smu2_cat3 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 180 ~ "Moderate",
  mcs6_sm_per_day > 180 ~ "High"
))

table(data_smu2_cat3$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()

Social media use per day	Freq
High	57
Moderate	1162
None	1259

4-hour threshold

data_smu2_cat4 <- data %>% mutate(mcs6_sm_per_day = case_when(
  mcs6_sm_per_day == 0 ~ "None",
  mcs6_sm_per_day > 0 & mcs6_sm_per_day <= 240 ~ "Moderate",
  mcs6_sm_per_day > 240 ~ "High"
))

table(data_smu2_cat4$mcs6_sm_per_day) %>% data.frame() %>% rename(`Social media use per day` = Var1) |> kable()

Social media use per day	Freq
High	30
Moderate	1189
None	1259

Cross-tabulated with categorical interaction variables

The following are for a 2-hour threshold only.

table(data_smu2_cat2$mcs6_sm_per_day, data_smu2_cat2$AHCSEX00, data_smu2_cat2$HES_ei2) %>% data.frame() %>% rename(`Social media use per day` = Var1, `Sex assigned at birth` = Var2, `HES event (period 2)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq)), Freq = case_when(`Social media use per day` %in% c("Moderate", "High") & `Sex assigned at birth` == "Female" & `HES event (period 2)?` == 1 ~ ".", TRUE ~ Freq)) |> kable()

Social media use per day	Sex assigned at birth	HES event (period 2)?	Freq
High	Male	0	26
Moderate	Male	0	338
None	Male	0	706
High	Female	0	100
Moderate	Female	0	667
None	Female	0	473
High	Male	1	<10
Moderate	Male	1	<10
None	Male	1	10
High	Female	1	.
Moderate	Female	1	.
None	Female	1	19

table(data_smu2_cat2$mcs6_sm_per_day, data_smu2_cat2$DDC06E00, data_smu2_cat2$HES_ei2) %>% data.frame() %>% rename(`Social media use per day` = Var1, Ethnicity = Var2, `HES event (period 2)?` = Var3) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()

Social media use per day	Ethnicity	HES event (period 2)?	Freq
High	White	0	102
Moderate	White	0	785
None	White	0	914
High	Ethnic minority	0	22
Moderate	Ethnic minority	0	185
None	Ethnic minority	0	183
High	White	1	<10
Moderate	White	1	<10
None	White	1	24
High	Ethnic minority	1	<10
Moderate	Ethnic minority	1	<10
None	Ethnic minority	1	<10

Characteristics of non-users vs users

Non-users

data_subset_smu2_0 <- filter(data, data$mcs6_sm_per_day == 0)

cat(paste("Income (EOEDE000):\nMean:", as.character(round(mean(data_subset_smu2_0$EOEDE000, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_0$EOEDE000, na.rm = TRUE), 1))))

Income (EOEDE000):
Mean: 459 
Standard deviation: 186.9

cat(paste("Household wealth (mcs5_wealth):\nMean:", as.character(round(mean(data_subset_smu2_0$mcs5_wealth, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_0$mcs5_wealth, na.rm = TRUE), 1))))

Household wealth (mcs5_wealth):
Mean: 198377.6 
Standard deviation: 283326.9

Users

data_subset_smu2_1 <- filter(data, data$mcs6_sm_per_day != 0)

cat(paste("Income (EOEDE000):\nMean:", as.character(round(mean(data_subset_smu2_1$EOEDE000, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_1$EOEDE000, na.rm = TRUE), 1))))

Income (EOEDE000):
Mean: 450 
Standard deviation: 178

cat(paste("Household wealth (mcs5_wealth):\nMean:", as.character(round(mean(data_subset_smu2_1$mcs5_wealth, na.rm = TRUE), 1)), "\nStandard deviation:", as.character(round(sd(data_subset_smu2_1$mcs5_wealth, na.rm = TRUE), 1))))

Household wealth (mcs5_wealth):
Mean: 224134.4 
Standard deviation: 479176.2

HES episodes

The total number of participants with a psychiatric episode recorded in period 1 is 65.

The total number of participants with a psychiatric episode recorded in period 2 is 173.

Cross-tabulation

data_crosstab <- data %>% select(MCSID, CNUM, CONSENT, HES_ei1, HES_ei2)

data_crosstab <- data_crosstab %>% mutate(
  HES_ei1 = if_else(CONSENT == 0, NA, HES_ei1),
  HES_ei2 = if_else(CONSENT == 0, NA, HES_ei2)
)

table(
  data_crosstab$HES_ei1 %>% case_match(0 ~ "No (period 1)", 1 ~ "Yes (period 1)", .default = NA), 
  data_crosstab$HES_ei2 %>% case_match(0 ~ "No (period 2)", 1 ~ "Yes (period 2)", .default = NA)
) |> kable()

	No (period 2)	Yes (period 2)
No (period 1)	5975	156
Yes (period 1)	48	17

In total 6390 eligible participants did not consent to HES linkage.

Note that in the subsequent tables that the totals will not add up to the numbers above because the numbers of participants with missing responses to the grouping variables are not shown.

By region

Period 1

data_HES1 <- data %>% filter(HES_ei1 == 1)

table((data_HES1 %>% mutate(DAREGN00 = as_factor(DAREGN00)))$DAREGN00) %>% data.frame() %>% rename(`Region of residence` = Var1) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()

Region of residence	Freq
North East	<10
North West	16
Yorkshire and the Humber	<10
East Midlands	<10
West Midlands	<10
East of England	<10
London	13
South East	<10
South West	<10

Period 2

data_HES2 <- data %>% filter(HES_ei2 == 1)

table((data_HES2 %>% mutate(EAREGN00 = as_factor(EAREGN00)))$EAREGN00) %>% data.frame() %>% rename(`Region of residence` = Var1) %>% mutate(Freq = case_match(Freq, 0:9 ~ "<10", .default = as.character(Freq))) |> kable()

Region of residence	Freq
North East	<10
North West	18
Yorkshire and the Humber	19
East Midlands	11
West Midlands	15
East of England	19
London	21
South East	34
South West	18

By sex assigned at birth

Period 1

table(data_HES1$AHCSEX00) %>% data.frame() %>% rename(`Sex assigned at birth` = Var1) |> kable()

Sex assigned at birth	Freq
Male	30
Female	30

Period 2

table(data_HES2$AHCSEX00) %>% data.frame() %>% rename(`Sex assigned at birth` = Var1) |> kable()

Sex assigned at birth	Freq
Male	53
Female	109

By ethnicity

Period 1

table(data_HES1$DDC06E00) %>% data.frame() %>% rename(`Ethnicity` = Var1) |> kable()

Ethnicity	Freq
White	49
Ethnic minority	11

Period 2

table(data_HES2$DDC06E00) %>% data.frame() %>% rename(`Ethnicity` = Var1) |> kable()

Ethnicity	Freq
White	128
Ethnic minority	23