# Clear work space
rm(list = ls())
# Load packages
suppressPackageStartupMessages({
library(tidyverse)
library(openxlsx)
library(Hmisc)
})
source(here::here("Code/Helper.R"))
# Load preprocessed data with invMR
dat <- readRDS(here::here("Data/data_imr.rds"))
# Set options
options(pillar.sigfig = 4)
dat %>%
summarise('Samples (N)' = length(unique(sid)),
'Interviewers (N)' = length(unique(iid)),
'Countries (N)' = length(unique(cntryISO)),
'Participants (N)' = n(),
'Sample size (Mdn)' = median(table(sid)),
'Sample size (Min)' = min(table(sid)),
'Sample size (Max)' = max(table(sid)),
'Female (N)' = sum(sex == "female"),
'Female (%)' = round(mean(sex == "female") * 100, 2),
"Age (M)" = mean(age, na.rm = TRUE),
"Age (SD)" = sd(age, na.rm = TRUE),
"Age (Min)" = min(age, na.rm = TRUE),
"Age (Max)" = max(age, na.rm = TRUE),
"Migration (%)" = round(mean(mig == "Yes") * 100, 2),
"Years education (M)" = mean(eduyrs, na.rm = TRUE),
"Years education (SD)" = sd(eduyrs, na.rm = TRUE),
"Work: Paid work (%)" = round(mean(work == "Paid work", na.rm = TRUE) * 100, 2),
"Work: Education (%)" = round(mean(work == "Education", na.rm = TRUE) * 100, 2),
"Work: Retired (%)" = round(mean(work == "Retired", na.rm = TRUE) * 100, 2),
"Work: Other (%)" = round(mean(work == "Other", na.rm = TRUE) * 100, 2),
"Day: Monday" = round(mean(dow == "Monday", na.rm = TRUE) * 100, 2),
"Day: Tueday" = round(mean(dow == "Tuesday", na.rm = TRUE) * 100, 2),
"Day: Wednesday" = round(mean(dow == "Wednesday", na.rm = TRUE) * 100, 2),
"Day: Thursday" = round(mean(dow == "Thursday", na.rm = TRUE) * 100, 2),
"Day: Friday" = round(mean(dow == "Friday", na.rm = TRUE) * 100, 2),
"Day: Saturday" = round(mean(dow == "Saturday", na.rm = TRUE) * 100, 2),
"Day: Sunday" = round(mean(dow == "Sunday", na.rm = TRUE) * 100, 2),
"Time (M)" = round(mean(tod), 2),
"Time (SD)" = round(sd(tod), 2),
"Quarter of year: 1st" = round(mean(quart == "1st", na.rm = TRUE) * 100, 2),
"Quarter of year: 2nd" = round(mean(quart == "2nd", na.rm = TRUE) * 100, 2),
"Quarter of year: 3rd" = round(mean(quart == "3rd", na.rm = TRUE) * 100, 2),
"Quarter of year: 4th" = round(mean(quart == "4th", na.rm = TRUE) * 100, 2)) %>%
mutate_at(vars(everything()), ~round(., digits = 2)) %>%
pivot_longer(everything()) %>%
print(n = 100)
## # A tibble: 33 x 2
## name value
## <chr> <dbl>
## 1 Samples (N) 221
## 2 Interviewers (N) 30615
## 3 Countries (N) 37
## 4 Participants (N) 408637
## 5 Sample size (Mdn) 1830
## 6 Sample size (Min) 103
## 7 Sample size (Max) 3008
## 8 Female (N) 220172
## 9 Female (%) 53.88
## 10 Age (M) 48.26
## 11 Age (SD) 18.57
## 12 Age (Min) 13
## 13 Age (Max) 100
## 14 Migration (%) 4.37
## 15 Years education (M) 12.29
## 16 Years education (SD) 4.020
## 17 Work: Paid work (%) 49.18
## 18 Work: Education (%) 8.24
## 19 Work: Retired (%) 24.55
## 20 Work: Other (%) 18.03
## 21 Day: Monday 15.76
## 22 Day: Tueday 16.53
## 23 Day: Wednesday 16.61
## 24 Day: Thursday 15.54
## 25 Day: Friday 13.32
## 26 Day: Saturday 13.72
## 27 Day: Sunday 8.530
## 28 Time (M) 14.67
## 29 Time (SD) 3.14
## 30 Quarter of year: 1st 26.83
## 31 Quarter of year: 2nd 7.83
## 32 Quarter of year: 3rd 12.25
## 33 Quarter of year: 4th 53.09
dat %>%
group_by(iid) %>%
summarise('N' = n(),
'isex' = mean(isex == "female"),
"iage1" = mean(iage == "-30", na.rm = TRUE),
"iage2" = mean(iage == "31-40", na.rm = TRUE),
"iage3" = mean(iage == "41-50", na.rm = TRUE),
"iage4" = mean(iage == "51-60", na.rm = TRUE),
"iage5" = mean(iage == "61-70", na.rm = TRUE),
"iage6" = mean(iage == "71+", na.rm = TRUE),
"iisMo" = mean(iisMo),
"iisFr" = mean(iisFr),
"iisSa" = mean(iisSa),
"iisSu" = mean(iisSu),
.groups = "keep") %>%
ungroup() %>%
summarise('Number of interviewers' = n(),
'Number of interviews (Mdn)' = median(N),
'Number of interviews (Min)' = min(N),
'Number of interviews (Max)' = max(N),
'Female (%)' = round(mean(isex, na.rm = TRUE) * 100, 2),
"Age: -30 (%)" = round(mean(iage1, na.rm = TRUE), 2),
"Age: 31-40 (%)" = round(mean(iage2, na.rm = TRUE), 2),
"Age: 41-50 (%)" = round(mean(iage3, na.rm = TRUE), 2),
"Age: 51-60 (%)" = round(mean(iage4, na.rm = TRUE), 2),
"Age: 61-70 (%)" = round(mean(iage5, na.rm = TRUE), 2),
"Age: 71+ (%)" = round(mean(iage6, na.rm = TRUE), 2),
"Interviews on Monday (Mdn)" = median(iisMo, na.rm = TRUE),
"Interviews on Monday (Min)" = min(iisMo, na.rm = TRUE),
"Interviews on Monday (Max)" = max(iisMo, na.rm = TRUE),
"Interviews on Friday (Mdn)" = median(iisFr, na.rm = TRUE),
"Interviews on Friday (Min)" = min(iisFr, na.rm = TRUE),
"Interviews on Friday (Max)" = max(iisFr, na.rm = TRUE),
"Interviews on Saturday (Mdn)" = median(iisSa, na.rm = TRUE),
"Interviews on Saturday (Min)" = min(iisSa, na.rm = TRUE),
"Interviews on Saturday (Max)" = max(iisSa, na.rm = TRUE),
"Interviews on Sunday (Mdn)" = median(iisSu, na.rm = TRUE),
"Interviews on Sunday (Min)" = min(iisSu, na.rm = TRUE),
"Interviews on Sunday (Max)" = max(iisSu, na.rm = TRUE)) %>%
pivot_longer(everything()) %>%
print(n = 100)
## # A tibble: 23 x 2
## name value
## <chr> <dbl>
## 1 Number of interviewers 30615
## 2 Number of interviews (Mdn) 10
## 3 Number of interviews (Min) 1
## 4 Number of interviews (Max) 176
## 5 Female (%) 68.66
## 6 Age: -30 (%) 0.13
## 7 Age: 31-40 (%) 0.13
## 8 Age: 41-50 (%) 0.21
## 9 Age: 51-60 (%) 0.290
## 10 Age: 61-70 (%) 0.2
## 11 Age: 71+ (%) 0.04
## 12 Interviews on Monday (Mdn) 1
## 13 Interviews on Monday (Min) 0
## 14 Interviews on Monday (Max) 37
## 15 Interviews on Friday (Mdn) 1
## 16 Interviews on Friday (Min) 0
## 17 Interviews on Friday (Max) 34
## 18 Interviews on Saturday (Mdn) 1
## 19 Interviews on Saturday (Min) 0
## 20 Interviews on Saturday (Max) 53
## 21 Interviews on Sunday (Mdn) 0
## 22 Interviews on Sunday (Min) 0
## 23 Interviews on Sunday (Max) 36
dat %>%
group_by(dow) %>%
summarise('N' = n(),
'M (LS)' = wtd.mean(ls, wgt),
'SD (LS)' = wtd.var(ls, wgt)^.5,
'Mdn (LS)' = wtd.quantile(ls, wgt, .5),
'M (HP)' = wtd.mean(happy, wgt),
'SD (HP)' = wtd.var(happy, wgt)^.5,
'Mdn (hp)' = wtd.quantile(happy, wgt, .5),
.groups = "keep") %>%
ungroup() %>%
bind_rows(dat %>%
summarise('dow' = 'Week',
'N' = n(),
'M (LS)' = wtd.mean(ls, wgt),
'SD (LS)' = wtd.var(ls, wgt)^.5,
'Mdn (LS)' = wtd.quantile(ls, wgt, .5),
'M (HP)' = wtd.mean(happy, wgt),
'SD (HP)' = wtd.var(happy, wgt)^.5,
'Mdn (hp)' = wtd.quantile(happy, wgt, .5)))
## ls happy
## ls 1.000000 0.691762
## happy 0.691762 1.000000
Effect sizes for the day of week effects are calculated as standardized mean differences between the mean of a specific day and the overall mean.
(sdesc <- dat %>%
group_by(sid) %>%
summarise('Country' = as.character(Mode(cntry)),
'Round' = Mode(wave),
'N' = n(),
'Female (%)' = round(mean(sex == "female") * 100, 2),
"Age (M)" = round(mean(age, na.rm = TRUE), 2),
"Age (SD)" = round(sd(age, na.rm = TRUE), 2),
"Paid work (%)" = round(mean(work == "Paid work", na.rm = TRUE) * 100, 2),
"Monday (%)" = round(mean(isMo == "Monday", na.rm = TRUE) * 100, 2),
"Friday (%)" = round(mean(isFr == "Friday", na.rm = TRUE) * 100, 2),
"Saturday (%)" = round(mean(isSa == "Saturday", na.rm = TRUE) * 100, 2),
"Sunday (%)" = round(mean(isSu == "Sunday", na.rm = TRUE) * 100, 2),
# effect sizes
"LS_Mo_d" = round((wtd.mean(ls, wgt) -
wtd.mean(ls[isMo == "Monday"],
wgt[isMo == "Monday"])) /
wtd.var(ls, wgt)^.5, 3),
"LS_Mo_sd_d" = round((LS_Mo_d^2 / sum(wgt))^.5, 3),
"LS_Fr_d" = round((wtd.mean(ls, wgt) -
wtd.mean(ls[isFr == "Friday"],
wgt[isFr == "Friday"])) /
wtd.var(ls, wgt)^.5, 3),
"LS_Fr_sd_d" = round((LS_Fr_d^2 / sum(wgt))^.5, 3),
"LS_Sa_d" = round((wtd.mean(ls, wgt) -
wtd.mean(ls[isSa == "Saturday"],
wgt[isSa == "Saturday"])) /
wtd.var(ls, wgt)^.5, 3),
"LS_Sa_sd_d" = round((LS_Sa_d^2 / sum(wgt))^.5, 3),
"LS_Su_d" = round((wtd.mean(ls, wgt) -
wtd.mean(ls[isSu == "Sunday"],
wgt[isSu == "Sunday"])) /
wtd.var(ls)^.5, 3),
"LS_Su_sd_d" = round((LS_Su_d^2 / sum(wgt))^.5, 3),
"HP_Mo_d" = round((wtd.mean(happy, wgt) -
wtd.mean(happy[isMo == "Monday"],
wgt[isMo == "Monday"])) /
wtd.var(happy)^.5, 3),
"HP_Mo_sd_d" = round((HP_Mo_d^2 / sum(wgt))^.5, 3),
"HP_Fr_d" = round((wtd.mean(happy, wgt) -
wtd.mean(happy[isFr == "Friday"],
wgt[isFr == "Friday"])) /
wtd.var(happy)^.5, 3),
"HP_Fr_sd_d" = round((HP_Fr_d^2 / sum(wgt))^.5, 3),
"HP_Sa_d" = round((wtd.mean(happy, wgt) -
wtd.mean(happy[isSa == "Saturday"],
wgt[isSa == "Saturday"])) /
wtd.var(happy)^.5, 3),
"HP_Sa_sd_d" = round((HP_Sa_d^2 / sum(wgt))^.5, 3),
"HP_Su_d" = round((wtd.mean(happy, wgt) -
wtd.mean(happy[isSu == "Sunday"],
wgt[isSu == "Sunday"])) /
wtd.var(happy)^.5, 3),
"HP_Su_sd_d" = round((HP_Su_d^2 / sum(wgt))^.5, 3),
.groups = "keep") %>%
ungroup() %>%
dplyr::select(-sid) %>%
arrange(Country, Round))
# Format numbers
x <- sdesc %>%
mutate(across(c('Female (%)', 'Age (M)', 'Age (SD)', 'Paid work (%)',
'Monday (%)', 'Friday (%)', 'Saturday (%)', 'Sunday (%)'),
~format(., digits = 4))) %>%
mutate(across(c('LS_Mo_d', 'LS_Mo_sd_d', 'LS_Fr_d', 'LS_Fr_sd_d',
'LS_Sa_d', 'LS_Sa_sd_d', 'LS_Su_d', 'LS_Su_sd_d',
'HP_Mo_d', 'HP_Mo_sd_d', 'HP_Fr_d', 'HP_Fr_sd_d',
'HP_Sa_d', 'HP_Sa_sd_d', 'HP_Su_d', 'HP_Su_sd_d'),
~format(., digits = 5))) %>%
mutate(across(everything(), as.character)) %>%
mutate(across(everything(), ~ifelse(. == "0.000", "<0.001", .)))
# Save results
write.xlsx(x,
file = here::here("Tables/Sample description.xlsx"),
sheetName = "Samples",
colNames = TRUE,
rowNames = FALSE)
rm(x)