# Clear work space
rm(list = ls())
# Load packages
suppressPackageStartupMessages({
library(tidyverse)
library(haven)
library(doBy)
library(jsonlite)
library(openxlsx)
})
source(here::here("Code/Helper.R"))
# Set options
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
The data from the European Value Survey (ESS) were retrieved from https://europeansocialsurvey.org. Relevant information is included in the integrated file and data from Interviewer’s questionnaire.
ESS Round 1: European Social Survey Round 1 Data (2002). Data file edition 6.6. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS1-2002.
Round 1 does not provide an interviewer identification number or information on the interviewers’ sex and age. Therefore, the interviewer identification number is imputed from the contact form using the interviewer making the first contact. For missing information, the respective identification number of the second or third interviewer is imputed.
path <- "~/Datasets/ESS/Round 1/"
dat1 <- read_spss(paste0(path, "ESS1e06_6.sav")) # integrated data
dat2 <- read_spss(paste0(path, "ess1contacts.sav")) # contact forms
rm(path)
ess1 <- left_join(dat1 %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
ls = stflife,
wgt = pspwght,
inwdds = inwdd,
inwmms = inwmm,
inwyys = inwyr) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm),
dat2 %>%
filter(!is.na(IDNO)) %>%
rename(idno = IDNO,
cntry = CNTRY) %>%
mutate(intnum = ifelse(is.na(INTNUM1), INTNUM2, INTNUM1),
intnum = ifelse(is.na(intnum), INTNUM2, intnum)) %>%
select(idno, cntry, intnum),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry) %>%
mutate(wave = 1,
isex = NA,
iage = NA)
rm(dat1, dat2)
ESS Round 2: European Social Survey Round 2 Data (2004). Data file edition 3.6. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS2-2004.
Round 2 does not provide information on the interviewers’ sex and age.
There are inconsistencies for the Icelandic respondent identification number. Because the interviewer data could not be properly matched for all respondents, the interviewer identification number is missing for about 40% of the Icelandic data.
path <- "~/Datasets/ESS/Round 2/"
dat1 <- read_spss(paste0(path, "ESS2e03_6.sav")) # integrated data
dat1IT <- read_spss(paste0(path, "ESS2IT.sav")) # data for Italy
dat2 <- read_spss(paste0(path, "ESS2INTe03_2.por")) # interviewer questionnaire
rm(path)
ess2 <- left_join(bind_rows(dat1, dat1IT) %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
ls = stflife,
wgt = pspwght,
inwdds = inwdd,
inwmms = inwmm,
inwyys = inwyr) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm),
dat2 %>%
rename(idno = IDNO,
cntry = CNTRY,
intnum = INTNUM) %>%
select(idno, cntry, intnum),
by = c("idno", "cntry")) %>%
mutate(wave = 2,
isex = NA,
iage = NA) %>%
rename(cntryISO = cntry)
rm(dat1, dat1IT, dat2)
ESS Round 3: European Social Survey Round 3 Data (2006). Data file edition 3.7. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS3-2006.
Round 3 does not provide information on the interviewers’ sex and age.
No design weights are available for Latvia and Romania.
path <- "~/Datasets/ESS/Round 3/"
dat1 <- read_spss(paste0(path, "ESS3e03_7.sav")) # integrated data
dat1LVRO <- read_spss(paste0(path, "ESS3LVRO.sav")) # data for Latvia and Romania
dat2 <- read_spss(paste0(path, "ESS3INTe02.por")) # interviewer questionnaire
rm(path)
ess3 <- left_join(bind_rows(dat1, dat1LVRO) %>%
select(-age) %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
# no design weights for Latvia and Romania
mutate(wgt = ifelse(is.na(wgt), 1, wgt)),
dat2 %>%
rename(idno = IDNO, cntry = CNTRY, intnum = INTNUM) %>%
select(idno, cntry, intnum),
by = c("idno", "cntry")) %>%
mutate(wave = 3,
isex = NA,
iage = NA) %>%
rename(cntryISO = cntry)
rm(dat1, dat1LVRO, dat2)
ESS Round 4: European Social Survey Round 4 Data (2008). Data file edition 4.5. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS4-2008.
Information on the interviewers’ sex and age is not provided for Sweden.
No design weights are available for Latvia.
path <- "~/Datasets/ESS/Round 4/"
dat1 <- read_spss(paste0(path, "ESS4e04_5.sav")) # integrated data
dat1AT <- read_spss(paste0(path, "ESS4AT.sav")) # data for Austria
dat1LT <- read_spss(paste0(path, "ESS4LT.sav")) # data for Latvia
dat2 <- read_spss(paste0(path, "ESS4INTe03.sav")) # interviewer questionnaire
rm(path)
ess4 <- left_join(bind_rows(dat1, dat1AT, dat1LT) %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
# no design weights for Latvia
mutate(wgt = ifelse(is.na(wgt), 1, wgt),
wave = 4),
dat2 %>%
rename(isex = intgndr,
iage = intage) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat1AT, dat1LT, dat2)
ESS Round 5: European Social Survey Round 5 Data (2010). Data file edition 3.4. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS5-2010.
Information on the interviewers’ sex and age is not provided for Sweden.
Information on the interview date is not available for Estonia.
Information on the interview time is not available for Ireland
path <- "~/Datasets/ESS/Round 5/"
dat1 <- read_spss(paste0(path, "ESS5e03_4.sav")) # integrated data
dat1AT <- read_spss(paste0(path, "ESS5ATe1_1.sav")) # data for Austria
dat2 <- read_spss(paste0(path, "ESS5INTe03.sav")) # interviewer questionnaire
dat2AT <- read_spss(paste0(path, "ESS5intAT.sav")) # interviewer questionnaire for Austria
rm(path)
ess5 <- left_join(bind_rows(dat1, dat1AT) %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
partner = icpart1,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
mutate(wave = 5),
bind_rows(dat2, dat2AT) %>%
rename(isex = intgndr,
iage = intage) %>%
# create age categories
mutate(intagea = cut(intagea, c(0, 30, 40, 50, 60, 70, 100)),
intagea = as.numeric(intagea),
iage = ifelse(is.na(iage), intagea, iage)) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat1AT, dat2, dat2AT)
For 7 interviewers in Cyprus, inconsistent information on their sex and age is provided. These inconsistencies are corrected.
ESS Round 6: European Social Survey Round 6 Data (2012). Data file edition 2.4. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS6-2012.
Information on the interviewers’ sex and age is not provided for Ukraine.
path <- "~/Datasets/ESS/Round 6/"
dat1 <- read_spss(paste0(path, "ESS6e02_4.sav")) # integrated data
dat2 <- read_spss(paste0(path, "ESS6INTe02_1.sav")) # interviewer questionnaire
rm(path)
ess6 <- left_join(dat1 %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
partner = icpart1,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
mutate(wave = 6),
dat2 %>%
rename(isex = intgndr,
iage = intagea) %>%
# create age categories
mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
iage = as.numeric(iage)) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat2)
For 2 interviewers in the United Kingdom, inconsistent information on their sex and age is provided. These inconsistencies are corrected.
ESS Round 7: European Social Survey Round 7 Data (2014). Data file edition 2.2. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS7-2014.
Information on the interview time is not available for Portugal.
path <- "~/Datasets/ESS/Round 7/"
dat1 <- read_spss(paste0(path, "ESS7e02_2.sav")) # integrated data
dat2 <- read_spss(paste0(path, "ESS7INTe02_1.sav")) # interviewer questionnaire
rm(path)
ess7 <- left_join(dat1 %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
partner = icpart1,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
mutate(wave = 7),
dat2 %>%
rename(isex = intgndr,
iage = intagea) %>%
# create age categories
mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
iage = as.numeric(iage)) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat2)
ESS Round 8: European Social Survey Round 8 Data (2016). Data file edition 2.1. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS8-2016.
path <- "~/Datasets/ESS/Round 8/"
dat1 <- read_spss(paste0(path, "ESS8e02_1.sav")) # integrated data
dat2 <- read_spss(paste0(path, "ESS8INTe02.sav")) # interviewer questionnaire
rm(path)
ess8 <- left_join(dat1 %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
children = chldhm,
partner = icpart1,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
mutate(wave = 8),
dat2 %>%
rename(isex = intgndr,
iage = intagea) %>%
# create age categories
mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
iage = as.numeric(iage)) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat2)
ESS Round 9: European Social Survey Round 9 Data (2018). Data file edition 2.0. NSD - Norwegian Centre for Research Data, Norway – Data Archive and distributor of ESS data for ESS ERIC. doi:10.21338/NSD-ESS9-2018.
Information on the interviewers’ age is not provided for Montenegro.
Information on the interview date is not available for Bulgaria.
path <- "~/Datasets/ESS/Round 9/"
dat1 <- read_spss(paste0(path, "ESS9e02.sav")) # integrated data
dat2 <- read_spss(paste0(path, "ESS9INTe02.sav")) # interviewer questionnaire
rm(path)
ess9 <- left_join(dat1 %>%
mutate(across(rshipa2:rshipa10, ~{.x %in% 1}, .names = "X{.col}")) %>%
mutate(across(rshipa2:rshipa10, ~{.x %in% 2}, .names = "Y{.col}")) %>%
rowwise() %>%
mutate(partner = ifelse(sum(Xrshipa2:Xrshipa10) > 0, 1, 2),
children = ifelse(sum(Yrshipa2:Yrshipa10) > 0, 1, 2)) %>%
ungroup() %>%
rename(sex = gndr,
age = agea,
work = mnactic,
hours = wkhct,
mig = ctzcntr,
relig = rlgdgr,
mig = ctzcntr,
ls = stflife,
wgt = pspwght) %>%
select(idno, cntry, sex, age, eduyrs, work, hours,
mig, relig, children, partner,
ipcrtiv:impfun, ls, happy, wgt,
inwdds, inwmms, inwyys, inwshh, inwsmm) %>%
mutate(wave = 9),
dat2 %>%
rename(isex = intgndr,
iage = intagea) %>%
# create age categories
mutate(iage = cut(iage, c(0, 30, 40, 50, 60, 70, 100)),
iage = as.numeric(iage)) %>%
select(idno, cntry, intnum, iage, isex),
by = c("idno", "cntry")) %>%
rename(cntryISO = cntry)
rm(dat1, dat2)
For 2 interviewers in Montenegro, inconsistent information on their sex and age is provided. These inconsistencies are corrected.
# Country
attributes(dat$cntryISO) <- NULL
dat$cntry <- factor(dat$cntryISO,
levels = c("AL", "AT", "BE", "BG", "CH", "CY", "CZ",
"DE", "DK", "EE", "ES", "FI", "FR", "GB",
"GR", "HR", "HU", "IE", "IL", "IS", "IT",
"LT", "LU", "LV", "ME", "NL", "NO", "PL",
"PT", "RO", "RS", "RU", "SE", "SI", "SK",
"TR", "UA", "XK"),
labels = c("Albania", "Austria", "Belgium",
"Bulgaria", "Switzerland", "Cyprus",
"Czechia", "Germany", "Denmark",
"Estonia", "Spain", "Finland",
"France", "United Kingdom", "Greece",
"Croatia", "Hungary", "Ireland",
"Israel", "Iceland", "Italy",
"Lithuania", "Luxembourg", "Latvia",
"Montenegro", "Netherlands", "Norway",
"Poland", "Portugal", "Romania",
"Serbia", "Russia", "Sweden",
"Slovenia", "Slovakia", "Turkey",
"Ukraine", "Kosovo"))
dat$cntry <- relevel(dat$cntry, ref = "Germany")
table(dat$cntry, useNA = "always")
##
## Germany Albania Austria Belgium Bulgaria
## 25700 1201 17736 16110 10522
## Switzerland Cyprus Czechia Denmark Estonia
## 15402 5190 17616 10836 15314
## Spain Finland France United Kingdom Greece
## 17169 17955 17061 19830 9759
## Croatia Hungary Ireland Israel Iceland
## 4943 14793 20463 14910 2211
## Italy Lithuania Luxembourg Latvia Montenegro
## 9067 11995 3187 4858 1200
## Netherlands Norway Poland Portugal Romania
## 16859 14654 15624 16043 4285
## Serbia Russia Sweden Slovenia Slovakia
## 2043 12458 15929 12232 9874
## Turkey Ukraine Kosovo <NA>
## 4272 9987 1295 0
# Sex (1 = male, 2 = female)
dat$sex <- factor(dat$sex,
levels = c(1, 2),
labels = c("male", "female"))
dat$sex <- relevel(dat$sex, ref = "male")
table(dat$sex, useNA = "always")
##
## male female <NA>
## 203171 237077 335
##
## 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
## 1 9 2833 5215 5733 5863 5880 5690 5886 5901 5902 5978 6018 6207 6215
## 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
## 6317 6454 6648 6842 6762 6880 7039 7214 7222 7294 7266 7317 7802 7497 7413
## 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 7342 7361 7426 7450 7321 7538 7466 7603 7324 7586 7455 7401 7505 7547 7263
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## 7196 7362 7346 6931 7033 6845 6761 6743 6622 6387 6298 6104 5955 5576 5303
## 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
## 5132 4635 4626 4181 3951 3607 3429 3243 2803 2510 2187 1803 1565 1328 1058
## 88 89 90 91 92 93 94 95 96 97 98 99 100 <NA>
## 813 647 713 333 206 161 110 73 38 21 14 12 8 2028
# Interviewer sex (1 = male, 2 = female)
# Note: Missing for several samples!
dat$isex <- factor(dat$isex,
levels = c(1, 2),
labels = c("male", "female"))
table(dat$isex, useNA = "always")
##
## male female <NA>
## 97581 198308 144694
# Interviewer age (in categories)
# Note: Missing for several samples!
dat$iage <- factor(dat$iage,
levels = 1:6,
labels = c("-30", "31-40", "41-50",
"51-60", "61-70", "71+"))
table(dat$iage, useNA = "always")
##
## -30 31-40 41-50 51-60 61-70 71+ <NA>
## 34589 34897 62348 84747 61233 14017 148752
# Years of full-time education completed
dat$eduyrs <- recodeVar(dat$eduyrs, 0:30, 0:30, default = NA)
table(dat$eduyrs, useNA = "always")
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 2911 742 1184 2675 8661 5623 9469 9779 27501 26438 30301 45830 71882
## 13 14 15 16 17 18 19 20 21 22 23 24 25
## 40805 30494 30260 28318 21083 16856 8127 7747 2680 2167 1226 836 813
## 26 27 28 29 30 <NA>
## 244 161 111 73 231 5355
# Main activity last 7 days
# (1 = Paid work,
# 2 = Education,
# 3 = Unemployed, looking for job,
# 4 = Unemployed, not looking for job,
# 5 = Permanently sick or disabled,
# 6 = Retired,
# 7 = Community or military service,
# 8 = Housework, looking after children, others,
# 9 = Other)
dat$work <- recodeVar(dat$work,
1:9,
c(1, 2, 4, 4, 4, 3, 4, 4, 4),
default = NA)
dat$work <- factor(dat$work,
levels = 1:4,
labels = c("Paid work", "Education", "Retired", "Other"))
dat$work <- relevel(dat$work, ref = "Paid work")
table(dat$work, useNA = "always")
##
## Paid work Education Retired Other <NA>
## 214523 36723 107078 79667 2592
# Contractual work hours
dat$hours <- round(dat$hours)
dat$hours[dat$hours > 80] <- 80
dat$hours[dat$work != "Paid work"] <- 0
table(dat$hours, useNA = "always")
##
## 0 1 2 3 4 5 6 7 8 9 10
## 224308 109 80 101 216 255 261 203 1019 256 1090
## 11 12 13 14 15 16 17 18 19 20 21
## 121 751 177 265 1147 924 288 1044 583 5946 682
## 22 23 24 25 26 27 28 29 30 31 32
## 900 467 1909 2692 552 550 1226 308 7287 257 2659
## 33 34 35 36 37 38 39 40 41 42 43
## 557 736 9819 4908 9172 17472 5270 81724 1026 7620 1404
## 44 45 46 47 48 49 50 51 52 53 54
## 686 5051 458 156 4076 155 5654 31 141 44 215
## 55 56 57 58 59 60 61 62 63 64 65
## 775 529 22 50 7 3349 8 37 48 49 268
## 66 67 68 69 70 71 72 73 74 75 76
## 52 10 21 2 1071 4 268 2 18 93 10
## 77 78 79 80 <NA>
## 28 19 3 1192 17640
# Citizen of country (reverse coded as migration)
dat$mig <- factor(dat$mig, levels = 1:2, labels = c("No", "Yes"))
dat$mig <- relevel(dat$mig, ref = "No")
table(dat$mig, useNA = "always")
##
## No Yes <NA>
## 421564 18694 325
# Living with a partner (1 = yes, 2 = no)
dat$partner <- factor(dat$partner,
levels = c(1, 2),
labels = c("yes", "no"))
dat$partner <- relevel(dat$partner, ref = "yes")
table(dat$partner, useNA = "always")
##
## yes no <NA>
## 256384 181704 2495
# Living with children at home (1 = yes, 2 = no)
dat$children <- factor(dat$children,
levels = c(1, 2),
labels = c("yes", "no"))
dat$children <- relevel(dat$children, ref = "yes")
table(dat$children, useNA = "always")
##
## yes no <NA>
## 151600 287886 1097
##
## 0 1 2 3 4 5 6 7 8 9 10 <NA>
## 63784 24766 30585 34258 28063 71884 42841 49795 44203 19210 26913 4281
## ipcrtiv imprich ipeqopt ipshabt impsafe impdiff ipfrule ipudrst ipmodst
## 1 83940 18139 134801 48172 120907 59396 46457 81575 69768
## 2 138670 45914 172880 109207 154356 114166 117352 176245 141819
## 3 106995 80144 73682 104112 79737 103839 101790 102141 104417
## 4 52388 92966 28210 74568 40978 74439 72565 43308 61829
## 5 32281 134063 11908 68579 24028 57027 64316 16377 37547
## 6 9222 53739 3034 19216 4981 15139 19728 3874 8743
## <NA> 17087 15618 16068 16729 15596 16577 18375 17063 16460
## ipgdtim impfree iphlppl ipsuces ipstrgv ipadvnt ipbhprp iprspot iplylfr
## 1 58444 123280 104085 46413 119412 26594 73188 48085 145706
## 2 121431 164416 177932 109892 154063 57086 153586 110360 187453
## 3 104175 82965 96493 109007 82310 79342 100570 105517 63234
## 4 70354 35312 35090 77355 40461 85167 56050 75418 20815
## 5 51962 14981 8990 63024 20357 115013 32505 65888 5726
## 6 16807 3548 2089 17403 5297 60617 7611 17402 1656
## <NA> 17410 16081 15904 17489 18683 16764 17073 17913 15993
## impenv imptrad impfun
## 1 130278 86523 53973
## 2 168444 136440 112957
## 3 82246 94054 104959
## 4 31672 56412 76206
## 5 9530 37149 53445
## 6 2336 14193 22653
## <NA> 16077 15812 16390
dat <- dat %>%
mutate(OTC = 7 - rowMeans(select(., "impdiff", "ipadvnt", "ipcrtiv",
"impfree", "impfun", "ipgdtim"), na.rm = TRUE),
CON = 7 - rowMeans(select(., "ipmodst", "imptrad", "ipbhprp",
"ipfrule", "impsafe", "ipstrgv"), na.rm = TRUE),
STR = 7 - rowMeans(select(., "iphlppl", "iplylfr", "ipeqopt",
"ipudrst", "impenv"), na.rm = TRUE),
SEN = 7 - rowMeans(select(., "imprich", "iprspot", "ipshabt",
"ipsuces"), na.rm = TRUE),
opendim = OTC - CON,
selfdim = STR - SEN)
# How satisfied with life as a whole
# (0 = Extremely dissatisfied,
# 10 = Extremely satisfied)
table(dat$ls, useNA = "always")
##
## 0 1 2 3 4 5 6 7 8 9 10
## 8494 5792 10837 18795 20341 51214 39725 73007 105163 59942 44817
## <NA>
## 2456
# How happy are you
# (0 = Extremely unhappy,
# 10 = Extremely happy)
table(dat$happy, useNA = "always")
##
## 0 1 2 3 4 5 6 7 8 9 10
## 3551 3251 6637 12518 15657 47338 39387 78244 116651 68836 45460
## <NA>
## 3053
# Country identification number
dat$cid <- factor(dat$cntryISO)
length(unique(dat$cid)) # number of countries
## [1] 38
# Sample identification number
# Note: As wave and country to create a unique
# identifier for each sample
dat$sid <- factor(paste0(dat$cntryISO, dat$wave))
length(unique(dat$sid)) # number of samples
## [1] 232
# Interviewer identification number
# Note: Merged with sample identification number
# to create a unique identifier across samples
dat$iid <- factor(paste0(dat$sid, "_", dat$intnum))
dat$iid[is.na(dat$intnum)] <- NA
median(c(table(dat$iid))) # median number of interviews per interviewer
## [1] 11
# Day of the week
dat$date <- strptime(paste0(dat$inwdds, "/", dat$inwmms, "/", dat$inwyys),
"%d/%m/%Y")
dat$dow <- factor(weekdays(dat$date, abbreviate = FALSE),
levels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"),
labels = c("Monday", "Tuesday", "Wednesday",
"Thursday", "Friday", "Saturday", "Sunday"))
table(dat$dow, useNA = "always")
##
## Monday Tuesday Wednesday Thursday Friday Saturday Sunday
## 69681 71811 72441 67954 57036 57856 38860
## <NA>
## 4944
# Dummy indicators for day of the week
dat$isMo <- factor(ifelse(dat$dow == "Monday", 1, 0),
levels = c(0, 1),
labels = c("other day", "Monday"))
dat$isMo <- relevel(dat$isMo, ref = "other day")
dat$isFr <- factor(ifelse(dat$dow == "Friday", 1, 0),
levels = c(0, 1),
labels = c("other day", "Friday"))
dat$isFr <- relevel(dat$isFr, ref = "other day")
dat$isSa <- factor(ifelse(dat$dow == "Saturday", 1, 0),
levels = c(0, 1),
labels = c("other day", "Saturday"))
dat$isSa <- relevel(dat$isSa, ref = "other day")
dat$isSu <- factor(ifelse(dat$dow == "Sunday", 1, 0),
levels = c(0, 1),
labels = c("other day", "Sunday"))
dat$isSu <- relevel(dat$isSu, ref = "other day")
table(dat$isMo, useNA = "always")
##
## other day Monday <NA>
## 365958 69681 4944
##
## other day Friday <NA>
## 378603 57036 4944
##
## other day Saturday <NA>
## 377783 57856 4944
##
## other day Sunday <NA>
## 396779 38860 4944
# Quarter of the year
dat$quart <- recodeVar(dat$inwmms, 1:12, c(rep(1, 3), rep(2, 3),
rep(3, 3), rep(4, 3)),
default = NA)
dat$quart <- factor(dat$quart,
levels = 1:4,
labels = c("1st", "2nd", "3rd", "4th"))
dat$quart <- relevel(dat$quart, ref = "1st")
table(dat$quart, useNA = "always")
##
## 1st 2nd 3rd 4th <NA>
## 115169 35020 54603 233121 2670
# Time of the day
# Note: Missing for two samples.
dat$tod <- round(dat$inwshh) + round(dat$inwsmm) / 60
sum(dat$tod >= 22 | dat$tod <= 4, na.rm=T) / sum(!is.na(dat$tod)) # % recoded
## [1] 0.001416059
dat$tod[dat$tod >= 22 | dat$tod <= 4] <- 22
dat$tod[dat$tod < 8] <- 8
table(round(dat$tod), useNA = "always")
##
## 8 9 10 11 12 13 14 15 16 17 18 19 20
## 3179 12962 39696 33477 34058 32258 46275 38725 48753 42299 45732 29121 18199
## 21 22 <NA>
## 4711 1328 9810
# Number of interviews on specific days
# conducted by each interviewer
dat <- left_join(dat,
dat %>%
group_by(iid) %>%
summarise(iisMo = sum(dow %in% "Monday"),
iisFr = sum(dow %in% "Friday"),
iisSa = sum(dow %in% "Saturday"),
iisSu = sum(dow %in% "Sunday"),
.groups = "keep") %>%
ungroup() %>%
mutate(iisMo = ifelse(is.na(iid), NA, iisMo),
iisFr = ifelse(is.na(iid), NA, iisFr),
iisSa = ifelse(is.na(iid), NA, iisSa),
iisSu = ifelse(is.na(iid), NA, iisSu)),
by = "iid")
table(dat$iisMo, useNA = "always")
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 79951 60683 62152 54824 43904 33939 25152 18535 13580 9525 7652 4887 4387
## 13 14 15 16 17 18 19 20 21 22 23 24 25
## 2736 2413 1094 1300 1323 588 989 194 842 99 549 548 448
## 27 28 29 30 31 32 33 34 35 36 37 40 <NA>
## 295 95 261 136 127 179 120 128 176 180 297 142 6153
##
## 0 1 2 3 4 5 6 7 8 9 10 11 12
## 96117 72415 69283 55061 39263 27767 21855 14079 10594 6899 5433 3587 3076
## 13 14 15 16 17 18 19 20 21 22 23 24 25
## 1660 1452 935 993 923 292 82 599 317 321 39 270 526
## 29 31 34 <NA>
## 284 129 179 6153
##
## 0 1 2 3 4 5 6 7 8 9 10
## 135294 70065 55887 42818 33287 22576 19004 12538 9236 6196 5085
## 11 12 13 14 15 16 17 18 19 20 21
## 4443 3208 2832 2754 1803 1428 1047 1478 594 545 225
## 22 23 24 25 26 27 28 29 30 31 34
## 192 207 115 105 111 230 47 129 109 282 80
## 37 38 47 49 53 <NA>
## 75 67 94 133 111 6153
##
## 0 1 2 3 4 5 6 7 8 9 10
## 220199 60373 40733 27901 21718 15036 12206 8658 6725 4512 3252
## 11 12 13 14 15 16 17 18 19 20 21
## 2693 1677 1758 1649 843 656 745 580 319 543 255
## 22 23 24 25 26 27 30 31 34 36 <NA>
## 95 138 195 96 110 143 61 56 310 195 6153
Interviews conducted on a public holiday should be excluded from the analyses. Therefore, all public holidays for each sample are retrieved from https://date.nager.at. Because the website does not list respective dates for Kosovo, Israel, and Mazedonia, the respective public holidays were manually retrieved from https://www.timeanddate.com/holidays.
# Load holidays
if (!file.exists(here::here("Data/holidays.rds"))) {
holidays <- c()
# Retrieve public holidays from nager.at
for (i in unique(dat$sid)) {
# Country and survey year
cntry <- unclass(dat$cntryISO[dat$sid == i])[1]
year <- unclass(dat$inwyys[dat$sid == i])[1]
# Countries are not supported
if (cntry %in% c("IL", "ME", "XK")) next
# No year available
if (is.na(year)) next
# URL for API
url <- paste0("https://date.nager.at/api/v2/publicholidays/",
year, "/", cntry)
# Fetch data
h <- fromJSON(url) %>%
filter(type == "Public") %>%
rename(cntryISO = countryCode, holiday_date = date,
holiday_name = name) %>%
select(cntryISO, holiday_date, holiday_name)
# Add data
holidays <- bind_rows(holidays, h)
}
rm(i, cntry, year, url, h)
# Retrieve manually coded public holidays from timeanddate.com
h <- read.xlsx(here::here("Data/holidays_ILXKME.xlsx"),
sheet = "Holidays") %>%
rename(holiday_name = name) %>%
mutate(holiday_date = paste(year,
str_pad(month, 2, "left", 0),
str_pad(day, 2, "left", 0),
sep = "-")) %>%
select(cntryISO, holiday_name, holiday_date)
holidays <- bind_rows(holidays, h) %>%
arrange(cntryISO, holiday_date)
rm(h)
# Save holidays
saveRDS(holidays, here::here("Data/holidays.rds"))
}
holidays <- readRDS(here::here("Data/holidays.rds"))
# Determine holidays
dat$hday <- paste(dat$inwyys,
str_pad(dat$inwmms, 2, "left", 0),
str_pad(dat$inwdds, 2, "left", 0),
sep = "-")
dat$isHoliday <- apply(dat[, c("cntryISO", "hday")], 1, function(x) {
as.numeric(x[2] %in% holidays$holiday_date[holidays$cntryISO %in% x[1]])
})
dat$isHoliday[is.na(dat$dow)] <- NA
dat$hday <- NULL
dat$isHoliday <- factor(dat$isHoliday,
levels = c(0, 1),
labels = c("no", "yes"))
table(dat$isHoliday, useNA = "always")
##
## no yes <NA>
## 429325 6314 4944
## [1] 440583
## [1] 232
# Samples without interview date
dat <- dat %>%
filter(!(cntryISO %in% "EE" & wave == 5) &
!(cntryISO %in% "BG" & wave == 9))
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Samples without interviewer identification number
dat <- dat %>%
filter(!(cntryISO %in% c("AT", "FR", "SE") & wave == 1))
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Samples from Israel
dat <- dat %>%
filter(!(cntryISO %in% c("IL")))
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Participants without interview day
dat <- dat %>%
filter(!is.na(dow))
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Interviews not on a public holiday
dat <- dat %>%
filter(isHoliday == "no")
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# No interviewer identification number available
dat <- dat %>%
filter(!is.na(iid))
dat %>%
summarise('N' = n(),
'k' = length(unique(sid)),
'N of original' = paste0(round(N / !!N * 100, 2), "%"),
'k of original' = paste0(round(k / !!k * 100, 2), "%"))
# Missing values by item (%)
dat %>%
select(-sid, -cntryISO, -cntry, -wave, -wgt) %>%
summarise(across(everything(), ~mean(is.na(.x)))) %>%
mutate(across(everything(), ~round(.x * 100, 2))) %>%
pivot_longer(everything())
# Impute missing values (with < 5%) with mode/median
dat <- dat %>%
mutate(across(c(sex, age, work, mig,
children, partner),
~if_else(is.na(.), Mode(.), .))) %>%
mutate(across(c(eduyrs, hours, relig, opendim,
selfdim, ls, happy, tod),
~ifelse(is.na(.), median(., na.rm = TRUE), .)))
# Drop empty levels from factors
dat$cntry <- droplevels(dat$cntry)
dat$iid <- droplevels(dat$iid)
dat$sid <- droplevels(dat$sid)
For the day of the week indicators effect-coding is used to examine the deviation from the mean, while dummy-coding is used for the remaining categorical variables.
# Sex
contrasts(dat$sex) <- contr.treatment(2, base = 1)
colnames(attributes(dat$sex)$contrasts) <- "female"
# Work
contrasts(dat$work) <- contr.treatment(4, base = 1)
colnames(attributes(dat$work)$contrasts) <- levels(dat$work)[-1]
# Children at home
contrasts(dat$children) <- contr.treatment(2, base = 2)
colnames(attributes(dat$children)$contrasts) <- "yes"
# Living with partner
contrasts(dat$partner) <- contr.treatment(2, base = 2)
colnames(attributes(dat$partner)$contrasts) <- "yes"
# Migration
contrasts(dat$mig) <- contr.treatment(2, base = 1)
colnames(attributes(dat$mig)$contrasts) <- "yes"
# Quarter of year
contrasts(dat$quart) <- contr.treatment(4, base = 1)
colnames(attributes(dat$quart)$contrasts) <- levels(dat$quart)[-1]
# Indicator for Monday
contrasts(dat$isMo) <- rbind(-1, 1)
colnames(attributes(dat$isMo)$contrasts) <- "Monday"
# Indicator for Friday
contrasts(dat$isFr) <- rbind(-1, 1)
colnames(attributes(dat$isFr)$contrasts) <- "Friday"
# Indicator for Saturday
contrasts(dat$isSa) <- rbind(-1, 1)
colnames(attributes(dat$isSa)$contrasts) <- "Saturday"
# Indicator for Sunday
contrasts(dat$isSu) <- rbind(-1, 1)
colnames(attributes(dat$isSu)$contrasts) <- "Sunday"
# Sample identification number
contrasts(dat$sid) <- contr.sum(length(unique(dat$sid)))