# Clear work space
rm(list = ls())

# Load packages
suppressPackageStartupMessages({
    library(tidyverse)
    library(openxlsx)
    library(Hmisc)
})
source(here::here("Code/Helper.R"))

# Load preprocessed data with invMR
dat <- readRDS(here::here("Data/data_imr.rds"))

# Set options
options(pillar.sigfig = 4)

1 Participant characteristics

dat %>%
    summarise('Samples (N)' = length(unique(sid)),
              'Interviewers (N)' = length(unique(iid)),
              'Countries (N)' = length(unique(cntryISO)),
              'Participants (N)' = n(),
              'Sample size (Mdn)' = median(table(sid)),
              'Sample size (Min)' = min(table(sid)),
              'Sample size (Max)' = max(table(sid)),
              'Female (N)' = sum(sex == "female"),
              'Female (%)' = round(mean(sex == "female") * 100, 2),
              "Age (M)" = mean(age, na.rm = TRUE),
              "Age (SD)" = sd(age, na.rm = TRUE),
              "Age (Min)" = min(age, na.rm = TRUE),
              "Age (Max)" = max(age, na.rm = TRUE),
              "Migration (%)" = round(mean(mig == "Yes") * 100, 2),
              "Years education (M)" = mean(eduyrs, na.rm = TRUE),
              "Years education (SD)" = sd(eduyrs, na.rm = TRUE),
              "Work: Paid work (%)" = round(mean(work == "Paid work", na.rm = TRUE) * 100, 2),
              "Work: Education (%)" = round(mean(work == "Education", na.rm = TRUE) * 100, 2),
              "Work: Retired (%)" = round(mean(work == "Retired", na.rm = TRUE) * 100, 2),
              "Work: Other (%)" = round(mean(work == "Other", na.rm = TRUE) * 100, 2),
              "Day: Monday" = round(mean(dow == "Monday", na.rm = TRUE) * 100, 2),
              "Day: Tueday" = round(mean(dow == "Tuesday", na.rm = TRUE) * 100, 2),
              "Day: Wednesday" = round(mean(dow == "Wednesday", na.rm = TRUE) * 100, 2),
              "Day: Thursday" = round(mean(dow == "Thursday", na.rm = TRUE) * 100, 2),
              "Day: Friday" = round(mean(dow == "Friday", na.rm = TRUE) * 100, 2),
              "Day: Saturday" = round(mean(dow == "Saturday", na.rm = TRUE) * 100, 2),
              "Day: Sunday" = round(mean(dow == "Sunday", na.rm = TRUE) * 100, 2),
              "Time (M)" = round(mean(tod), 2),
              "Time (SD)" = round(sd(tod), 2),
              "Quarter of year: 1st" = round(mean(quart == "1st", na.rm = TRUE) * 100, 2),
              "Quarter of year: 2nd" = round(mean(quart == "2nd", na.rm = TRUE) * 100, 2),
              "Quarter of year: 3rd" = round(mean(quart == "3rd", na.rm = TRUE) * 100, 2),
              "Quarter of year: 4th" = round(mean(quart == "4th", na.rm = TRUE) * 100, 2)) %>%
    mutate_at(vars(everything()), ~round(., digits = 2)) %>%
    pivot_longer(everything()) %>%
    print(n = 100)
## # A tibble: 33 x 2
##    name                      value
##    <chr>                     <dbl>
##  1 Samples (N)             221    
##  2 Interviewers (N)      30615    
##  3 Countries (N)            37    
##  4 Participants (N)     408637    
##  5 Sample size (Mdn)      1830    
##  6 Sample size (Min)       103    
##  7 Sample size (Max)      3008    
##  8 Female (N)           220172    
##  9 Female (%)               53.88 
## 10 Age (M)                  48.26 
## 11 Age (SD)                 18.57 
## 12 Age (Min)                13    
## 13 Age (Max)               100    
## 14 Migration (%)             4.37 
## 15 Years education (M)      12.29 
## 16 Years education (SD)      4.020
## 17 Work: Paid work (%)      49.18 
## 18 Work: Education (%)       8.24 
## 19 Work: Retired (%)        24.55 
## 20 Work: Other (%)          18.03 
## 21 Day: Monday              15.76 
## 22 Day: Tueday              16.53 
## 23 Day: Wednesday           16.61 
## 24 Day: Thursday            15.54 
## 25 Day: Friday              13.32 
## 26 Day: Saturday            13.72 
## 27 Day: Sunday               8.530
## 28 Time (M)                 14.67 
## 29 Time (SD)                 3.14 
## 30 Quarter of year: 1st     26.83 
## 31 Quarter of year: 2nd      7.83 
## 32 Quarter of year: 3rd     12.25 
## 33 Quarter of year: 4th     53.09

2 Interviewer characteristics

dat %>%
    group_by(iid) %>%
    summarise('N' = n(),
              'isex'  = mean(isex == "female"),
              "iage1" = mean(iage == "-30", na.rm = TRUE),
              "iage2" = mean(iage == "31-40", na.rm = TRUE),
              "iage3" = mean(iage == "41-50", na.rm = TRUE),
              "iage4" = mean(iage == "51-60", na.rm = TRUE),
              "iage5" = mean(iage == "61-70", na.rm = TRUE),
              "iage6" = mean(iage == "71+", na.rm = TRUE),
              "iisMo" = mean(iisMo),
              "iisFr" = mean(iisFr),
              "iisSa" = mean(iisSa),
              "iisSu" = mean(iisSu),
              .groups = "keep") %>%
    ungroup() %>%
    summarise('Number of interviewers' = n(),
              'Number of interviews (Mdn)' = median(N),
              'Number of interviews (Min)' = min(N),
              'Number of interviews (Max)' = max(N),
              'Female (%)' = round(mean(isex, na.rm = TRUE) * 100, 2),
              "Age: -30 (%)" = round(mean(iage1, na.rm = TRUE), 2),
              "Age: 31-40 (%)" = round(mean(iage2, na.rm = TRUE), 2),
              "Age: 41-50 (%)" = round(mean(iage3, na.rm = TRUE), 2),
              "Age: 51-60 (%)" = round(mean(iage4, na.rm = TRUE), 2),
              "Age: 61-70 (%)" = round(mean(iage5, na.rm = TRUE), 2),
              "Age: 71+ (%)" = round(mean(iage6, na.rm = TRUE), 2),
              "Interviews on Monday (Mdn)" = median(iisMo, na.rm = TRUE),
              "Interviews on Monday (Min)" = min(iisMo, na.rm = TRUE),
              "Interviews on Monday (Max)" = max(iisMo, na.rm = TRUE),
              "Interviews on Friday (Mdn)" = median(iisFr, na.rm = TRUE),
              "Interviews on Friday (Min)" = min(iisFr, na.rm = TRUE),
              "Interviews on Friday (Max)" = max(iisFr, na.rm = TRUE),
              "Interviews on Saturday (Mdn)" = median(iisSa, na.rm = TRUE),
              "Interviews on Saturday (Min)" = min(iisSa, na.rm = TRUE),
              "Interviews on Saturday (Max)" = max(iisSa, na.rm = TRUE),
              "Interviews on Sunday (Mdn)" = median(iisSu, na.rm = TRUE),
              "Interviews on Sunday (Min)" = min(iisSu, na.rm = TRUE),
              "Interviews on Sunday (Max)" = max(iisSu, na.rm = TRUE)) %>%
    pivot_longer(everything()) %>%
    print(n = 100)
## # A tibble: 23 x 2
##    name                             value
##    <chr>                            <dbl>
##  1 Number of interviewers       30615    
##  2 Number of interviews (Mdn)      10    
##  3 Number of interviews (Min)       1    
##  4 Number of interviews (Max)     176    
##  5 Female (%)                      68.66 
##  6 Age: -30 (%)                     0.13 
##  7 Age: 31-40 (%)                   0.13 
##  8 Age: 41-50 (%)                   0.21 
##  9 Age: 51-60 (%)                   0.290
## 10 Age: 61-70 (%)                   0.2  
## 11 Age: 71+ (%)                     0.04 
## 12 Interviews on Monday (Mdn)       1    
## 13 Interviews on Monday (Min)       0    
## 14 Interviews on Monday (Max)      37    
## 15 Interviews on Friday (Mdn)       1    
## 16 Interviews on Friday (Min)       0    
## 17 Interviews on Friday (Max)      34    
## 18 Interviews on Saturday (Mdn)     1    
## 19 Interviews on Saturday (Min)     0    
## 20 Interviews on Saturday (Max)    53    
## 21 Interviews on Sunday (Mdn)       0    
## 22 Interviews on Sunday (Min)       0    
## 23 Interviews on Sunday (Max)      36

3 Well-Being scores

dat %>%
    group_by(dow) %>%
    summarise('N' = n(),
              'M (LS)' = wtd.mean(ls, wgt),
              'SD (LS)' = wtd.var(ls, wgt)^.5,
              'Mdn (LS)' = wtd.quantile(ls, wgt, .5),
              'M (HP)' = wtd.mean(happy, wgt),
              'SD (HP)' = wtd.var(happy, wgt)^.5,
              'Mdn (hp)' = wtd.quantile(happy, wgt, .5),
              .groups = "keep") %>%
    ungroup() %>%
    bind_rows(dat %>%
                  summarise('dow' = 'Week',
                            'N' = n(),
                            'M (LS)' = wtd.mean(ls, wgt),
                            'SD (LS)' = wtd.var(ls, wgt)^.5,
                            'Mdn (LS)' = wtd.quantile(ls, wgt, .5),
                            'M (HP)' = wtd.mean(happy, wgt),
                            'SD (HP)' = wtd.var(happy, wgt)^.5,
                            'Mdn (hp)' = wtd.quantile(happy, wgt, .5)))
# Correlation
cov.wt(dat[, c("ls", "happy")], wt = dat$wgt, cor = TRUE)$cor
##             ls    happy
## ls    1.000000 0.691762
## happy 0.691762 1.000000

4 Sample description

Effect sizes for the day of week effects are calculated as standardized mean differences between the mean of a specific day and the overall mean.

(sdesc <- dat %>%
            group_by(sid) %>%
            summarise('Country' = as.character(Mode(cntry)),
                      'Round' = Mode(wave),
                      'N' = n(),
                      'Female (%)' = round(mean(sex == "female") * 100, 2),
                      "Age (M)" = round(mean(age, na.rm = TRUE), 2),
                      "Age (SD)" = round(sd(age, na.rm = TRUE), 2),
                      "Paid work (%)" = round(mean(work == "Paid work", na.rm = TRUE) * 100, 2),
                      "Monday (%)" = round(mean(isMo == "Monday", na.rm = TRUE) * 100, 2),
                      "Friday (%)" = round(mean(isFr == "Friday", na.rm = TRUE) * 100, 2),
                      "Saturday (%)" = round(mean(isSa == "Saturday", na.rm = TRUE) * 100, 2),
                      "Sunday (%)" = round(mean(isSu == "Sunday", na.rm = TRUE) * 100, 2),
                      # effect sizes
                      "LS_Mo_d" = round((wtd.mean(ls, wgt) -
                                             wtd.mean(ls[isMo == "Monday"],
                                                      wgt[isMo == "Monday"])) /
                                            wtd.var(ls, wgt)^.5, 3),
                      "LS_Mo_sd_d" = round((LS_Mo_d^2 / sum(wgt))^.5, 3),
                      "LS_Fr_d" = round((wtd.mean(ls, wgt) -
                                             wtd.mean(ls[isFr == "Friday"],
                                                      wgt[isFr == "Friday"])) /
                                            wtd.var(ls, wgt)^.5, 3),
                      "LS_Fr_sd_d" = round((LS_Fr_d^2 / sum(wgt))^.5, 3),
                      "LS_Sa_d" = round((wtd.mean(ls, wgt) -
                                             wtd.mean(ls[isSa == "Saturday"],
                                                      wgt[isSa == "Saturday"])) /
                                            wtd.var(ls, wgt)^.5, 3),
                      "LS_Sa_sd_d" = round((LS_Sa_d^2 / sum(wgt))^.5, 3),
                      "LS_Su_d" = round((wtd.mean(ls, wgt) -
                                             wtd.mean(ls[isSu ==  "Sunday"],
                                                      wgt[isSu == "Sunday"])) /
                                            wtd.var(ls)^.5, 3),
                      "LS_Su_sd_d" = round((LS_Su_d^2 / sum(wgt))^.5, 3),
                      "HP_Mo_d" = round((wtd.mean(happy, wgt) -
                                             wtd.mean(happy[isMo == "Monday"],
                                                      wgt[isMo == "Monday"])) /
                                            wtd.var(happy)^.5, 3),
                      "HP_Mo_sd_d" = round((HP_Mo_d^2 / sum(wgt))^.5, 3),
                      "HP_Fr_d" = round((wtd.mean(happy, wgt) -
                                             wtd.mean(happy[isFr == "Friday"],
                                                      wgt[isFr == "Friday"])) /
                                            wtd.var(happy)^.5, 3),
                      "HP_Fr_sd_d" = round((HP_Fr_d^2 / sum(wgt))^.5, 3),
                      "HP_Sa_d" = round((wtd.mean(happy, wgt) -
                                             wtd.mean(happy[isSa == "Saturday"],
                                                      wgt[isSa == "Saturday"])) /
                                            wtd.var(happy)^.5, 3),
                      "HP_Sa_sd_d" = round((HP_Sa_d^2 / sum(wgt))^.5, 3),
                      "HP_Su_d" = round((wtd.mean(happy, wgt) -
                                             wtd.mean(happy[isSu == "Sunday"],
                                                      wgt[isSu == "Sunday"])) /
                                            wtd.var(happy)^.5, 3),
                      "HP_Su_sd_d" = round((HP_Su_d^2 / sum(wgt))^.5, 3),
                      .groups = "keep") %>%
            ungroup() %>%
            dplyr::select(-sid) %>%
            arrange(Country, Round))

5 Save descriptives

# Format numbers
x <- sdesc %>%
        mutate(across(c('Female (%)', 'Age (M)', 'Age (SD)', 'Paid work (%)',
                        'Monday (%)', 'Friday (%)', 'Saturday (%)', 'Sunday (%)'),
                      ~format(., digits = 4))) %>%
        mutate(across(c('LS_Mo_d', 'LS_Mo_sd_d', 'LS_Fr_d', 'LS_Fr_sd_d',
                        'LS_Sa_d', 'LS_Sa_sd_d', 'LS_Su_d', 'LS_Su_sd_d',
                        'HP_Mo_d', 'HP_Mo_sd_d', 'HP_Fr_d', 'HP_Fr_sd_d',
                        'HP_Sa_d', 'HP_Sa_sd_d', 'HP_Su_d', 'HP_Su_sd_d'),
                      ~format(., digits = 5))) %>%
        mutate(across(everything(), as.character)) %>%
        mutate(across(everything(), ~ifelse(. == "0.000", "<0.001", .)))

# Save results
write.xlsx(x,
           file = here::here("Tables/Sample description.xlsx"),
           sheetName = "Samples",
           colNames = TRUE,
           rowNames = FALSE)
rm(x)