# Clear work space
rm(list = ls())

# Load packages
suppressPackageStartupMessages({
    library(tidyverse)
    library(haven)
    library(doBy)
    library(jsonlite)
    library(openxlsx)
})
source(here::here("Code/Helper.R"))

# Set options
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"

1 Load rawdata

The data from the European Value Survey (ESS) were retrieved from https://europeansocialsurvey.org. Relevant information is included in the integrated file and data from Interviewer’s questionnaire.

1.1 ESS round 1

ESS Round 1: European Social Survey Round 1 Data (2002). Data file edition 6.6. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS1-2002.

Round 1 does not provide an interviewer identification number or information on the interviewers’ sex and age. Therefore, the interviewer identification number is imputed from the contact form using the interviewer making the first contact. For missing information, the respective identification number of the second or third interviewer is imputed.

path <- "~/Datasets/ESS/Round 1/"
dat1   <- read_spss(paste0(path, "ESS1e06_6.sav"))    # integrated data
dat2   <- read_spss(paste0(path, "ess1contacts.sav")) # contact forms
rm(path)

ess1 <- left_join(dat1  %>%
                      rename(sex = gndr,
                             age = agea,
                             work = mnactic,
                             hours = wkhct,
                             mig = ctzcntr,
                             relig = rlgdgr,
                             children = chldhm,
                             ls = stflife,
                             wgt = pspwght,
                             inwdds = inwdd,
                             inwmms = inwmm,
                             inwyys = inwyr) %>%
                      select(idno, cntry, sex, age, eduyrs, work, hours,
                             mig, relig, children, partner,
                             ipcrtiv:impfun, ls, happy, wgt,
                             inwdds, inwmms, inwyys, inwshh, inwsmm),
                  dat2 %>%
                      filter(!is.na(IDNO)) %>%
                      rename(idno = IDNO,
                             cntry = CNTRY) %>%
                      mutate(intnum = ifelse(is.na(INTNUM1), INTNUM2, INTNUM1),
                             intnum = ifelse(is.na(intnum), INTNUM2, intnum)) %>%
                      select(idno, cntry, intnum),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry) %>%
        mutate(wave = 1,
               isex = NA,
               iage = NA)
rm(dat1, dat2)

1.2 ESS round 2

ESS Round 2: European Social Survey Round 2 Data (2004). Data file edition 3.6. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS2-2004.

Round 2 does not provide information on the interviewers’ sex and age.

There are inconsistencies for the Icelandic respondent identification number. Because the interviewer data could not be properly matched for all respondents, the interviewer identification number is missing for about 40% of the Icelandic data.

path <- "~/Datasets/ESS/Round 2/"
dat1   <- read_spss(paste0(path, "ESS2e03_6.sav"))    # integrated data
dat1IT <- read_spss(paste0(path, "ESS2IT.sav"))       # data for Italy
dat2   <- read_spss(paste0(path, "ESS2INTe03_2.por")) # interviewer questionnaire
rm(path)

ess2 <- left_join(bind_rows(dat1, dat1IT) %>%
                      rename(sex = gndr,
                             age = agea,
                             work = mnactic,
                             hours = wkhct,
                             mig = ctzcntr,
                             relig = rlgdgr,
                             children = chldhm,
                             ls = stflife,
                             wgt = pspwght,
                             inwdds = inwdd,
                             inwmms = inwmm,
                             inwyys = inwyr) %>%
                      select(idno, cntry, sex, age, eduyrs, work, hours,
                             mig, relig, children, partner,
                             ipcrtiv:impfun, ls, happy, wgt,
                             inwdds, inwmms, inwyys, inwshh, inwsmm),
                  dat2 %>%
                      rename(idno = IDNO,
                             cntry = CNTRY,
                             intnum = INTNUM) %>%
                      select(idno, cntry, intnum),
                  by = c("idno", "cntry")) %>%
        mutate(wave = 2,
               isex = NA,
               iage = NA) %>%
        rename(cntryISO = cntry)
rm(dat1, dat1IT, dat2)

1.3 ESS round 3

ESS Round 3: European Social Survey Round 3 Data (2006). Data file edition 3.7. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS3-2006.

Round 3 does not provide information on the interviewers’ sex and age.

No design weights are available for Latvia and Romania.

path <- "~/Datasets/ESS/Round 3/"
dat1     <- read_spss(paste0(path, "ESS3e03_7.sav"))  # integrated data
dat1LVRO <- read_spss(paste0(path, "ESS3LVRO.sav"))   # data for Latvia and Romania
dat2     <- read_spss(paste0(path, "ESS3INTe02.por")) # interviewer questionnaire
rm(path)

ess3 <- left_join(bind_rows(dat1, dat1LVRO) %>%
                        select(-age) %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        # no design weights for Latvia and Romania
                        mutate(wgt = ifelse(is.na(wgt), 1, wgt)),
                  dat2 %>%
                        rename(idno = IDNO, cntry = CNTRY, intnum = INTNUM) %>%
                        select(idno, cntry, intnum),
                  by = c("idno", "cntry")) %>%
        mutate(wave = 3,
               isex = NA,
               iage = NA) %>%
        rename(cntryISO = cntry)
rm(dat1, dat1LVRO, dat2)

1.4 ESS round 4

ESS Round 4: European Social Survey Round 4 Data (2008). Data file edition 4.5. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS4-2008.

Information on the interviewers’ sex and age is not provided for Sweden.

No design weights are available for Latvia.

path <- "~/Datasets/ESS/Round 4/"
dat1   <- read_spss(paste0(path, "ESS4e04_5.sav"))  # integrated data
dat1AT <- read_spss(paste0(path, "ESS4AT.sav"))     # data for Austria
dat1LT <- read_spss(paste0(path, "ESS4LT.sav"))     # data for Latvia
dat2   <- read_spss(paste0(path, "ESS4INTe03.sav")) # interviewer questionnaire
rm(path)

ess4 <- left_join(bind_rows(dat1, dat1AT, dat1LT) %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        # no design weights for Latvia
                        mutate(wgt = ifelse(is.na(wgt), 1, wgt),
                               wave = 4),
                    dat2 %>%
                        rename(isex = intgndr,
                               iage = intage) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat1AT, dat1LT, dat2)

1.5 ESS round 5

ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 3.4. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS5-2010.

Information on the interviewers’ sex and age is not provided for Sweden.

Information on the interview date is not available for Estonia.

Information on the interview time is not available for Ireland

path <- "~/Datasets/ESS/Round 5/"
dat1   <- read_spss(paste0(path, "ESS5e03_4.sav"))   # integrated data
dat1AT <- read_spss(paste0(path, "ESS5ATe1_1.sav"))  # data for Austria
dat2   <- read_spss(paste0(path, "ESS5INTe03.sav"))  # interviewer questionnaire
dat2AT <- read_spss(paste0(path, "ESS5intAT.sav"))   # interviewer questionnaire for Austria
rm(path)

ess5 <- left_join(bind_rows(dat1, dat1AT) %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               partner = icpart1,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        mutate(wave = 5),
                  bind_rows(dat2, dat2AT) %>%
                        rename(isex = intgndr,
                               iage = intage) %>%
                        # create age categories
                        mutate(intagea = cut(intagea, c(0, 30, 40, 50, 60, 70, 100)),
                               intagea = as.numeric(intagea),
                               iage = ifelse(is.na(iage), intagea, iage)) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat1AT, dat2, dat2AT)

For 7 interviewers in Cyprus, inconsistent information on their sex and age is provided. These inconsistencies are corrected.

ess5 <- left_join(ess5 %>%
                      select(-isex, -iage),
                  ess5 %>%
                      group_by(cntryISO, intnum) %>%
                      summarise(isex = Mode(isex),
                                iage = Mode(iage),
                                .groups = "keep"),
                  by = c("cntryISO", "intnum"))

1.6 ESS round 6

ESS Round 6: European Social Survey Round 6 Data (2012). Data file edition 2.4. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS6-2012.

Information on the interviewers’ sex and age is not provided for Ukraine.

path <- "~/Datasets/ESS/Round 6/"
dat1 <- read_spss(paste0(path, "ESS6e02_4.sav"))  # integrated data
dat2 <- read_spss(paste0(path, "ESS6INTe02_1.sav")) # interviewer questionnaire
rm(path)

ess6 <- left_join(dat1 %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               partner = icpart1,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        mutate(wave = 6),
                  dat2 %>%
                        rename(isex = intgndr,
                               iage = intagea) %>%
                        # create age categories
                        mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
                               iage = as.numeric(iage)) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat2)

For 2 interviewers in the United Kingdom, inconsistent information on their sex and age is provided. These inconsistencies are corrected.

ess6 <- left_join(ess6 %>%
                      select(-isex, -iage),
                  ess6 %>%
                      group_by(cntryISO, intnum) %>%
                      summarise(isex = Mode(isex),
                                iage = Mode(iage),
                                .groups = "keep"),
                  by = c("cntryISO", "intnum"))

1.7 ESS round 7

ESS Round 7: European Social Survey Round 7 Data (2014). Data file edition 2.2. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS7-2014.

Information on the interview time is not available for Portugal.

path <- "~/Datasets/ESS/Round 7/"
dat1 <- read_spss(paste0(path, "ESS7e02_2.sav"))  # integrated data
dat2 <- read_spss(paste0(path, "ESS7INTe02_1.sav")) # interviewer questionnaire
rm(path)

ess7 <- left_join(dat1 %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               partner = icpart1,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        mutate(wave = 7),
                  dat2 %>%
                        rename(isex = intgndr,
                               iage = intagea) %>%
                        # create age categories
                        mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
                               iage = as.numeric(iage)) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat2)

1.8 ESS round 8

ESS Round 8: European Social Survey Round 8 Data (2016). Data file edition 2.1. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS8-2016.

path <- "~/Datasets/ESS/Round 8/"
dat1 <- read_spss(paste0(path, "ESS8e02_1.sav"))  # integrated data
dat2 <- read_spss(paste0(path, "ESS8INTe02.sav")) # interviewer questionnaire
rm(path)

ess8 <- left_join(dat1 %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               children = chldhm,
                               partner = icpart1,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        mutate(wave = 8),
                  dat2 %>%
                        rename(isex = intgndr,
                               iage = intagea) %>%
                        # create age categories
                        mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
                               iage = as.numeric(iage)) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat2)

1.9 ESS round 9

ESS Round 9: European Social Survey Round 9 Data (2018). Data file edition 2.0. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS9-2018.

Information on the interviewers’ age is not provided for Montenegro.

Information on the interview date is not available for Bulgaria.

path <- "~/Datasets/ESS/Round 9/"
dat1 <- read_spss(paste0(path, "ESS9e02.sav"))    # integrated data
dat2 <- read_spss(paste0(path, "ESS9INTe02.sav")) # interviewer questionnaire
rm(path)

ess9 <- left_join(dat1 %>%
                        mutate(across(rshipa2:rshipa10, ~{.x %in% 1}, .names = "X{.col}")) %>%
                        mutate(across(rshipa2:rshipa10, ~{.x %in% 2}, .names = "Y{.col}")) %>%
                        rowwise() %>%
                        mutate(partner = ifelse(sum(Xrshipa2:Xrshipa10) > 0, 1, 2),
                               children = ifelse(sum(Yrshipa2:Yrshipa10) > 0, 1, 2)) %>%
                        ungroup() %>%
                        rename(sex = gndr,
                               age = agea,
                               work = mnactic,
                               hours = wkhct,
                               mig = ctzcntr,
                               relig = rlgdgr,
                               mig = ctzcntr,
                               ls = stflife,
                               wgt = pspwght) %>%
                        select(idno, cntry, sex, age, eduyrs, work, hours,
                               mig, relig, children, partner,
                               ipcrtiv:impfun, ls, happy, wgt,
                               inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
                        mutate(wave = 9),
                  dat2 %>%
                        rename(isex = intgndr,
                               iage = intagea) %>%
                        # create age categories
                        mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
                               iage = as.numeric(iage)) %>%
                        select(idno, cntry, intnum, iage, isex),
                  by = c("idno", "cntry")) %>%
        rename(cntryISO = cntry)
rm(dat1, dat2)

For 2 interviewers in Montenegro, inconsistent information on their sex and age is provided. These inconsistencies are corrected.

ess9 <- left_join(ess9 %>%
                      select(-isex, -iage),
                  ess9 %>%
                      group_by(cntryISO, intnum) %>%
                      summarise(isex = Mode(isex),
                                iage = Mode(iage),
                                .groups = "keep"),
                  by = c("cntryISO", "intnum"))

1.10 Merge ESS rounds

dat <- bind_rows(ess1, ess2)
dat <- bind_rows(dat, ess3, ess4, ess5)
dat <- bind_rows(dat, ess6, ess7, ess8, ess9)
rm(ess1, ess2, ess3, ess4, ess5, ess6, ess7, ess8, ess9)

2 Recode variables

# Country
attributes(dat$cntryISO) <- NULL
dat$cntry <- factor(dat$cntryISO,
                    levels = c("AL", "AT", "BE", "BG", "CH", "CY", "CZ",
                               "DE", "DK", "EE", "ES", "FI", "FR", "GB",
                               "GR", "HR", "HU", "IE", "IL", "IS", "IT",
                               "LT", "LU", "LV", "ME", "NL", "NO", "PL",
                               "PT", "RO", "RS", "RU", "SE", "SI", "SK",
                               "TR", "UA", "XK"),
                    labels = c("Albania", "Austria", "Belgium",
                               "Bulgaria", "Switzerland", "Cyprus",
                               "Czechia", "Germany", "Denmark",
                               "Estonia", "Spain", "Finland",
                               "France", "United Kingdom", "Greece",
                               "Croatia", "Hungary", "Ireland",
                               "Israel", "Iceland", "Italy",
                               "Lithuania", "Luxembourg", "Latvia",
                               "Montenegro", "Netherlands", "Norway",
                               "Poland", "Portugal", "Romania",
                               "Serbia", "Russia", "Sweden",
                               "Slovenia", "Slovakia", "Turkey",
                               "Ukraine", "Kosovo"))
dat$cntry <- relevel(dat$cntry, ref = "Germany")
table(dat$cntry, useNA = "always")
## 
##        Germany        Albania        Austria        Belgium       Bulgaria 
##          25700           1201          17736          16110          10522 
##    Switzerland         Cyprus        Czechia        Denmark        Estonia 
##          15402           5190          17616          10836          15314 
##          Spain        Finland         France United Kingdom         Greece 
##          17169          17955          17061          19830           9759 
##        Croatia        Hungary        Ireland         Israel        Iceland 
##           4943          14793          20463          14910           2211 
##          Italy      Lithuania     Luxembourg         Latvia     Montenegro 
##           9067          11995           3187           4858           1200 
##    Netherlands         Norway         Poland       Portugal        Romania 
##          16859          14654          15624          16043           4285 
##         Serbia         Russia         Sweden       Slovenia       Slovakia 
##           2043          12458          15929          12232           9874 
##         Turkey        Ukraine         Kosovo           <NA> 
##           4272           9987           1295              0
# Sex (1 = male, 2 = female)
dat$sex <- factor(dat$sex,
                  levels = c(1, 2),
                  labels = c("male", "female"))
dat$sex <- relevel(dat$sex, ref = "male")
table(dat$sex, useNA = "always")
## 
##   male female   <NA> 
## 203171 237077    335
# Age (in years)
dat$age[dat$age > 100] <- NA
table(dat$age, useNA = "always")
## 
##   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27 
##    1    9 2833 5215 5733 5863 5880 5690 5886 5901 5902 5978 6018 6207 6215 
##   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42 
## 6317 6454 6648 6842 6762 6880 7039 7214 7222 7294 7266 7317 7802 7497 7413 
##   43   44   45   46   47   48   49   50   51   52   53   54   55   56   57 
## 7342 7361 7426 7450 7321 7538 7466 7603 7324 7586 7455 7401 7505 7547 7263 
##   58   59   60   61   62   63   64   65   66   67   68   69   70   71   72 
## 7196 7362 7346 6931 7033 6845 6761 6743 6622 6387 6298 6104 5955 5576 5303 
##   73   74   75   76   77   78   79   80   81   82   83   84   85   86   87 
## 5132 4635 4626 4181 3951 3607 3429 3243 2803 2510 2187 1803 1565 1328 1058 
##   88   89   90   91   92   93   94   95   96   97   98   99  100 <NA> 
##  813  647  713  333  206  161  110   73   38   21   14   12    8 2028
# Interviewer sex (1 = male, 2 = female)
# Note: Missing for several samples!
dat$isex <- factor(dat$isex,
                   levels = c(1, 2),
                   labels = c("male", "female"))
table(dat$isex, useNA = "always")
## 
##   male female   <NA> 
##  97581 198308 144694
# Interviewer age (in categories)
# Note: Missing for several samples!
dat$iage <- factor(dat$iage,
                   levels = 1:6,
                   labels = c("-30", "31-40", "41-50",
                              "51-60", "61-70", "71+"))
table(dat$iage, useNA = "always")
## 
##    -30  31-40  41-50  51-60  61-70    71+   <NA> 
##  34589  34897  62348  84747  61233  14017 148752
# Years of full-time education completed
dat$eduyrs <- recodeVar(dat$eduyrs, 0:30, 0:30, default = NA)
table(dat$eduyrs, useNA = "always")
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
##  2911   742  1184  2675  8661  5623  9469  9779 27501 26438 30301 45830 71882 
##    13    14    15    16    17    18    19    20    21    22    23    24    25 
## 40805 30494 30260 28318 21083 16856  8127  7747  2680  2167  1226   836   813 
##    26    27    28    29    30  <NA> 
##   244   161   111    73   231  5355
# Main activity last 7 days
# (1 = Paid work,
#  2 = Education,
#  3 = Unemployed, looking for job,
#  4 = Unemployed, not looking for job,
#  5 = Permanently sick or disabled,
#  6 = Retired,
#  7 = Community or military service,
#  8 = Housework, looking after children, others,
#  9 = Other)
dat$work <- recodeVar(dat$work,
                      1:9,
                      c(1, 2, 4, 4, 4, 3, 4, 4, 4),
                      default = NA)
dat$work <- factor(dat$work,
                   levels = 1:4,
                   labels = c("Paid work", "Education", "Retired", "Other"))
dat$work <- relevel(dat$work, ref = "Paid work")
table(dat$work, useNA = "always")
## 
## Paid work Education   Retired     Other      <NA> 
##    214523     36723    107078     79667      2592
# Contractual work hours
dat$hours <- round(dat$hours)
dat$hours[dat$hours > 80] <- 80
dat$hours[dat$work != "Paid work"] <- 0
table(dat$hours, useNA = "always")
## 
##      0      1      2      3      4      5      6      7      8      9     10 
## 224308    109     80    101    216    255    261    203   1019    256   1090 
##     11     12     13     14     15     16     17     18     19     20     21 
##    121    751    177    265   1147    924    288   1044    583   5946    682 
##     22     23     24     25     26     27     28     29     30     31     32 
##    900    467   1909   2692    552    550   1226    308   7287    257   2659 
##     33     34     35     36     37     38     39     40     41     42     43 
##    557    736   9819   4908   9172  17472   5270  81724   1026   7620   1404 
##     44     45     46     47     48     49     50     51     52     53     54 
##    686   5051    458    156   4076    155   5654     31    141     44    215 
##     55     56     57     58     59     60     61     62     63     64     65 
##    775    529     22     50      7   3349      8     37     48     49    268 
##     66     67     68     69     70     71     72     73     74     75     76 
##     52     10     21      2   1071      4    268      2     18     93     10 
##     77     78     79     80   <NA> 
##     28     19      3   1192  17640
# Citizen of country (reverse coded as migration)
dat$mig <- factor(dat$mig, levels = 1:2, labels = c("No", "Yes"))
dat$mig <- relevel(dat$mig, ref = "No")
table(dat$mig, useNA = "always")
## 
##     No    Yes   <NA> 
## 421564  18694    325
# Living with a partner (1 = yes, 2 = no)
dat$partner <- factor(dat$partner,
                      levels = c(1, 2),
                      labels = c("yes", "no"))
dat$partner <- relevel(dat$partner, ref = "yes")
table(dat$partner, useNA = "always")
## 
##    yes     no   <NA> 
## 256384 181704   2495
# Living with children at home (1 = yes, 2 = no)
dat$children <- factor(dat$children,
                       levels = c(1, 2),
                       labels = c("yes", "no"))
dat$children <- relevel(dat$children, ref = "yes")
table(dat$children, useNA = "always")
## 
##    yes     no   <NA> 
## 151600 287886   1097
# How religious are you
table(dat$relig, useNA = "always")
## 
##     0     1     2     3     4     5     6     7     8     9    10  <NA> 
## 63784 24766 30585 34258 28063 71884 42841 49795 44203 19210 26913  4281
# Schwartz higher-order values
sapply(select(dat, ipcrtiv:impfun), table, useNA = "always")
##      ipcrtiv imprich ipeqopt ipshabt impsafe impdiff ipfrule ipudrst ipmodst
## 1      83940   18139  134801   48172  120907   59396   46457   81575   69768
## 2     138670   45914  172880  109207  154356  114166  117352  176245  141819
## 3     106995   80144   73682  104112   79737  103839  101790  102141  104417
## 4      52388   92966   28210   74568   40978   74439   72565   43308   61829
## 5      32281  134063   11908   68579   24028   57027   64316   16377   37547
## 6       9222   53739    3034   19216    4981   15139   19728    3874    8743
## <NA>   17087   15618   16068   16729   15596   16577   18375   17063   16460
##      ipgdtim impfree iphlppl ipsuces ipstrgv ipadvnt ipbhprp iprspot iplylfr
## 1      58444  123280  104085   46413  119412   26594   73188   48085  145706
## 2     121431  164416  177932  109892  154063   57086  153586  110360  187453
## 3     104175   82965   96493  109007   82310   79342  100570  105517   63234
## 4      70354   35312   35090   77355   40461   85167   56050   75418   20815
## 5      51962   14981    8990   63024   20357  115013   32505   65888    5726
## 6      16807    3548    2089   17403    5297   60617    7611   17402    1656
## <NA>   17410   16081   15904   17489   18683   16764   17073   17913   15993
##      impenv imptrad impfun
## 1    130278   86523  53973
## 2    168444  136440 112957
## 3     82246   94054 104959
## 4     31672   56412  76206
## 5      9530   37149  53445
## 6      2336   14193  22653
## <NA>  16077   15812  16390
dat <- dat %>%
          mutate(OTC = 7 - rowMeans(select(., "impdiff", "ipadvnt", "ipcrtiv",
                                              "impfree", "impfun", "ipgdtim"), na.rm = TRUE),
                 CON = 7 - rowMeans(select(., "ipmodst", "imptrad", "ipbhprp",
                                              "ipfrule", "impsafe", "ipstrgv"), na.rm = TRUE),
                 STR = 7 - rowMeans(select(., "iphlppl", "iplylfr", "ipeqopt",
                                              "ipudrst", "impenv"), na.rm = TRUE),
                 SEN = 7 - rowMeans(select(., "imprich", "iprspot", "ipshabt",
                                              "ipsuces"), na.rm = TRUE),
                 opendim = OTC - CON,
                 selfdim = STR - SEN)

# How satisfied with life as a whole
# (0 = Extremely dissatisfied,
#  10 = Extremely satisfied)
table(dat$ls, useNA = "always")
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##   8494   5792  10837  18795  20341  51214  39725  73007 105163  59942  44817 
##   <NA> 
##   2456
# How happy are you
# (0 = Extremely unhappy,
#  10 = Extremely happy)
table(dat$happy, useNA = "always")
## 
##      0      1      2      3      4      5      6      7      8      9     10 
##   3551   3251   6637  12518  15657  47338  39387  78244 116651  68836  45460 
##   <NA> 
##   3053
# Country identification number
dat$cid <- factor(dat$cntryISO)
length(unique(dat$cid)) # number of countries
## [1] 38
# Sample identification number
# Note: As wave and country to create a unique
#       identifier for each sample
dat$sid <- factor(paste0(dat$cntryISO, dat$wave))
length(unique(dat$sid)) # number of samples
## [1] 232
# Interviewer identification number
# Note: Merged with sample identification number
#       to create a unique identifier across samples
dat$iid <- factor(paste0(dat$sid, "_", dat$intnum))
dat$iid[is.na(dat$intnum)] <- NA
median(c(table(dat$iid))) # median number of interviews per interviewer
## [1] 11
# Day of the week
dat$date <- strptime(paste0(dat$inwdds, "/", dat$inwmms, "/", dat$inwyys),
                     "%d/%m/%Y")
dat$dow <- factor(weekdays(dat$date, abbreviate = FALSE),
                  levels = c("Monday", "Tuesday", "Wednesday",
                             "Thursday", "Friday", "Saturday", "Sunday"),
                  labels = c("Monday", "Tuesday", "Wednesday",
                             "Thursday", "Friday", "Saturday", "Sunday"))
table(dat$dow, useNA = "always")
## 
##    Monday   Tuesday Wednesday  Thursday    Friday  Saturday    Sunday 
##     69681     71811     72441     67954     57036     57856     38860 
##      <NA> 
##      4944
# Dummy indicators for day of the week
dat$isMo <- factor(ifelse(dat$dow == "Monday", 1, 0),
                   levels = c(0, 1),
                   labels = c("other day", "Monday"))
dat$isMo <- relevel(dat$isMo, ref = "other day")
dat$isFr <- factor(ifelse(dat$dow == "Friday", 1, 0),
                   levels = c(0, 1),
                   labels = c("other day", "Friday"))
dat$isFr <- relevel(dat$isFr, ref = "other day")
dat$isSa <- factor(ifelse(dat$dow == "Saturday", 1, 0),
                   levels = c(0, 1),
                   labels = c("other day", "Saturday"))
dat$isSa <- relevel(dat$isSa, ref = "other day")
dat$isSu <- factor(ifelse(dat$dow == "Sunday", 1, 0),
                   levels = c(0, 1),
                   labels = c("other day", "Sunday"))
dat$isSu <- relevel(dat$isSu, ref = "other day")
table(dat$isMo, useNA = "always")
## 
## other day    Monday      <NA> 
##    365958     69681      4944
table(dat$isFr, useNA = "always")
## 
## other day    Friday      <NA> 
##    378603     57036      4944
table(dat$isSa, useNA = "always")
## 
## other day  Saturday      <NA> 
##    377783     57856      4944
table(dat$isSu, useNA = "always")
## 
## other day    Sunday      <NA> 
##    396779     38860      4944
# Quarter of the year
dat$quart <- recodeVar(dat$inwmms, 1:12, c(rep(1, 3), rep(2, 3),
                                           rep(3, 3), rep(4, 3)),
                       default = NA)
dat$quart <- factor(dat$quart,
                    levels = 1:4,
                    labels = c("1st", "2nd", "3rd", "4th"))
dat$quart <- relevel(dat$quart, ref = "1st")
table(dat$quart, useNA = "always")
## 
##    1st    2nd    3rd    4th   <NA> 
## 115169  35020  54603 233121   2670
# Time of the day
# Note: Missing for two samples.
dat$tod <- round(dat$inwshh) + round(dat$inwsmm) / 60
sum(dat$tod >= 22 | dat$tod <= 4, na.rm=T) / sum(!is.na(dat$tod)) # % recoded
## [1] 0.001416059
dat$tod[dat$tod >= 22 | dat$tod <= 4] <- 22
dat$tod[dat$tod < 8] <- 8
table(round(dat$tod), useNA = "always")
## 
##     8     9    10    11    12    13    14    15    16    17    18    19    20 
##  3179 12962 39696 33477 34058 32258 46275 38725 48753 42299 45732 29121 18199 
##    21    22  <NA> 
##  4711  1328  9810
# Number of interviews on specific days
#   conducted by each interviewer
dat <- left_join(dat,
                 dat %>%
                   group_by(iid) %>%
                   summarise(iisMo = sum(dow %in% "Monday"),
                             iisFr = sum(dow %in% "Friday"),
                             iisSa = sum(dow %in% "Saturday"),
                             iisSu = sum(dow %in% "Sunday"),
                             .groups = "keep") %>%
                   ungroup() %>%
                   mutate(iisMo = ifelse(is.na(iid), NA, iisMo),
                          iisFr = ifelse(is.na(iid), NA, iisFr),
                          iisSa = ifelse(is.na(iid), NA, iisSa),
                          iisSu = ifelse(is.na(iid), NA, iisSu)),
                 by = "iid")
table(dat$iisMo, useNA = "always")
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 79951 60683 62152 54824 43904 33939 25152 18535 13580  9525  7652  4887  4387 
##    13    14    15    16    17    18    19    20    21    22    23    24    25 
##  2736  2413  1094  1300  1323   588   989   194   842    99   549   548   448 
##    27    28    29    30    31    32    33    34    35    36    37    40  <NA> 
##   295    95   261   136   127   179   120   128   176   180   297   142  6153
table(dat$iisFr, useNA = "always")
## 
##     0     1     2     3     4     5     6     7     8     9    10    11    12 
## 96117 72415 69283 55061 39263 27767 21855 14079 10594  6899  5433  3587  3076 
##    13    14    15    16    17    18    19    20    21    22    23    24    25 
##  1660  1452   935   993   923   292    82   599   317   321    39   270   526 
##    29    31    34  <NA> 
##   284   129   179  6153
table(dat$iisSa, useNA = "always")
## 
##      0      1      2      3      4      5      6      7      8      9     10 
## 135294  70065  55887  42818  33287  22576  19004  12538   9236   6196   5085 
##     11     12     13     14     15     16     17     18     19     20     21 
##   4443   3208   2832   2754   1803   1428   1047   1478    594    545    225 
##     22     23     24     25     26     27     28     29     30     31     34 
##    192    207    115    105    111    230     47    129    109    282     80 
##     37     38     47     49     53   <NA> 
##     75     67     94    133    111   6153
table(dat$iisSu, useNA = "always")
## 
##      0      1      2      3      4      5      6      7      8      9     10 
## 220199  60373  40733  27901  21718  15036  12206   8658   6725   4512   3252 
##     11     12     13     14     15     16     17     18     19     20     21 
##   2693   1677   1758   1649    843    656    745    580    319    543    255 
##     22     23     24     25     26     27     30     31     34     36   <NA> 
##     95    138    195     96    110    143     61     56    310    195   6153

3 Holidays

Interviews conducted on a public holiday should be excluded from the analyses. Therefore, all public holidays for each sample are retrieved from https://date.nager.at. Because the website does not list respective dates for Kosovo, Israel, and Mazedonia, the respective public holidays were manually retrieved from https://www.timeanddate.com/holidays.

# Load holidays
if (!file.exists(here::here("Data/holidays.rds"))) {

    holidays <- c()

    # Retrieve public holidays from nager.at
    for (i in unique(dat$sid)) {

        # Country and survey year
        cntry <- unclass(dat$cntryISO[dat$sid == i])[1]
        year  <- unclass(dat$inwyys[dat$sid == i])[1]

        # Countries are not supported
        if (cntry %in% c("IL", "ME", "XK")) next

        # No year available
        if (is.na(year)) next

        # URL for API
        url <- paste0("https://date.nager.at/api/v2/publicholidays/",
                      year, "/", cntry)

        # Fetch data
        h <- fromJSON(url) %>%
                filter(type == "Public") %>%
                rename(cntryISO = countryCode, holiday_date = date,
                       holiday_name = name) %>%
                select(cntryISO, holiday_date, holiday_name)

        # Add data
        holidays <- bind_rows(holidays, h)
    }
    rm(i, cntry, year, url, h)

    # Retrieve manually coded public holidays from timeanddate.com
    h <- read.xlsx(here::here("Data/holidays_ILXKME.xlsx"),
                   sheet = "Holidays") %>%
            rename(holiday_name = name) %>%
            mutate(holiday_date = paste(year,
                                        str_pad(month, 2, "left", 0),
                                        str_pad(day, 2, "left", 0),
                                        sep = "-")) %>%
            select(cntryISO, holiday_name, holiday_date)
    holidays <- bind_rows(holidays, h) %>%
                    arrange(cntryISO, holiday_date)
    rm(h)

    # Save holidays
    saveRDS(holidays, here::here("Data/holidays.rds"))
}
holidays <- readRDS(here::here("Data/holidays.rds"))

# Determine holidays
dat$hday <- paste(dat$inwyys,
                  str_pad(dat$inwmms, 2, "left", 0),
                  str_pad(dat$inwdds, 2, "left", 0),
                  sep = "-")
dat$isHoliday <- apply(dat[, c("cntryISO", "hday")], 1, function(x) {
    as.numeric(x[2] %in% holidays$holiday_date[holidays$cntryISO %in% x[1]])
})
dat$isHoliday[is.na(dat$dow)] <- NA
dat$hday <- NULL
dat$isHoliday <- factor(dat$isHoliday,
                        levels = c(0, 1),
                        labels = c("no", "yes"))
table(dat$isHoliday, useNA = "always")
## 
##     no    yes   <NA> 
## 429325   6314   4944
rm(holidays)

4 Select data

4.1 Select cases

# Available data
(N <- nrow(dat))               # sample size
## [1] 440583
(k <- length(unique(dat$sid))) # samples
## [1] 232
# Samples without interview date
dat <- dat %>%
            filter(!(cntryISO %in% "EE" & wave == 5) &
                   !(cntryISO %in% "BG" & wave == 9))
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Samples without interviewer identification number
dat <- dat %>%
            filter(!(cntryISO %in% c("AT", "FR", "SE") & wave == 1))
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Samples from Israel
dat <- dat %>%
            filter(!(cntryISO %in% c("IL")))
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Participants without interview day
dat <- dat %>%
          filter(!is.na(dow))
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Interviews not on a public holiday
dat <- dat %>%
          filter(isHoliday == "no")
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# No interviewer identification number available
dat <- dat %>%
          filter(!is.na(iid))
dat %>%
    summarise('N' = n(),
              'k' = length(unique(sid)),
              'N of original' = paste0(round(N / !!N * 100, 2), "%"),
              'k of original' = paste0(round(k / !!k * 100, 2), "%"))
rm(N, k)

4.2 Select variables

dat <- dat %>%
         select(sid, cid, cntryISO, cntry, wave, iid, wgt,
                sex, age, eduyrs, work, hours,
                mig, relig, children, partner,
                opendim, selfdim, ls, happy,
                dow, tod, quart, isMo, isFr, isSa, isSu,
                isex, iage, iisMo, iisFr, iisSa, iisSu)

4.3 Missing values

# Missing values by item (%)
dat %>%
    select(-sid, -cntryISO, -cntry, -wave, -wgt) %>%
    summarise(across(everything(), ~mean(is.na(.x)))) %>%
    mutate(across(everything(), ~round(.x * 100, 2))) %>%
    pivot_longer(everything())
# Impute missing values (with < 5%) with mode/median
dat <- dat %>%
          mutate(across(c(sex, age, work, mig,
                          children, partner),
                        ~if_else(is.na(.), Mode(.), .))) %>%
          mutate(across(c(eduyrs, hours, relig, opendim,
                          selfdim, ls, happy, tod),
                        ~ifelse(is.na(.), median(., na.rm = TRUE), .)))

# Drop empty levels from factors
dat$cntry <- droplevels(dat$cntry)
dat$iid   <- droplevels(dat$iid)
dat$sid   <- droplevels(dat$sid)

5 Set contrasts

For the day of the week indicators effect-coding is used to examine the deviation from the mean, while dummy-coding is used for the remaining categorical variables.

# Sex
contrasts(dat$sex) <- contr.treatment(2, base = 1)
colnames(attributes(dat$sex)$contrasts) <- "female"

# Work
contrasts(dat$work) <- contr.treatment(4, base = 1)
colnames(attributes(dat$work)$contrasts) <- levels(dat$work)[-1]

# Children at home
contrasts(dat$children) <- contr.treatment(2, base = 2)
colnames(attributes(dat$children)$contrasts) <- "yes"

# Living with partner
contrasts(dat$partner) <- contr.treatment(2, base = 2)
colnames(attributes(dat$partner)$contrasts) <- "yes"

# Migration
contrasts(dat$mig) <- contr.treatment(2, base = 1)
colnames(attributes(dat$mig)$contrasts) <- "yes"

# Quarter of year
contrasts(dat$quart) <- contr.treatment(4, base = 1)
colnames(attributes(dat$quart)$contrasts) <- levels(dat$quart)[-1]

# Indicator for Monday
contrasts(dat$isMo) <- rbind(-1, 1)
colnames(attributes(dat$isMo)$contrasts) <- "Monday"

# Indicator for Friday
contrasts(dat$isFr) <- rbind(-1, 1)
colnames(attributes(dat$isFr)$contrasts) <- "Friday"

# Indicator for Saturday
contrasts(dat$isSa) <- rbind(-1, 1)
colnames(attributes(dat$isSa)$contrasts) <- "Saturday"

# Indicator for Sunday
contrasts(dat$isSu) <- rbind(-1, 1)
colnames(attributes(dat$isSu)$contrasts) <- "Sunday"

# Sample identification number
contrasts(dat$sid) <- contr.sum(length(unique(dat$sid)))

6 Save data

saveRDS(dat, here::here("Data/data.rds"))