############################################################################################### ###################### DESCRIPTIVE OVERVIEW (POSS) ############################################ ############################################################################################### # NOTE: INCLUDES THE EXTENDED SAMPLE (ALSO INCLUDING INCOMPLETE DATA AND DATA FOR WHICH PARTICIPANTS DID NOT INDICATE USABILITY) # Load packages library(afex) library(beeswarm) library(dplyr) library(emmeans) library(ggplot2) library(gplots) library(heplots) library(lm.beta) library(mlbench) library(papaja) library(psych) library(reshape2) library(stm) library(tidyr) library(tidyverse) library(tidytext) library(writexl) ############################################################################################## ####################################### READ DATA FILES ###################################### ############################################################################################## raw_data <- read.csv("pilot/data/data_poss_pilot.csv", header = TRUE, sep = ",", fileEncoding = "UTF-16LE") variables <- read.csv("pilot/data/variables_poss_pilot.csv", header = TRUE, sep = ",", fileEncoding = "UTF-16LE") values <- read.csv("pilot/data/values_poss_pilot.csv", header = TRUE, sep = ",", fileEncoding = "UTF-16LE") ############################################################################################### ######################## INCLUDE ALL DATA FOR THESE ANALYSES ################################## ############################################################################################### # In the descriptive overview, all data sets will be used, thus als incomplete datasets and datasets were usability was not indicated all_data <- raw_data ############################################################################################### ################################## DATA PROCESSING ############################################ ############################################################################################### ################################## CREATING ID AND FACTORS #################################### # Create ID variable id <- c(1:length(all_data$CASE)) all_data <- cbind(id=id, all_data) # S-1: Create factor for gender all_data$gender[all_data$gender == -9] <- NA all_data$gender <- factor(all_data$gender, levels = 1:4, labels = c("male", "female", "other gender", "prefer not to answer")) # S-2: Create factor for country all_data$country[all_data$country == -9] <- NA country_list <- as.vector(values$MEANING[values$VAR == "country"]) country_list <- country_list[c(-197, -198)] all_data$country <- factor(all_data$country, levels = 1:196, labels = country_list) # S-3: Create factor for academic groups all_data$academic_group <- factor(all_data$academic_group, levels = 1:4, labels = c("Master student", "PhD student", "Post-doc", "Professor")) # S-5: Create factor for topic item all_data$topic[all_data$topic == -9] <- NA all_data$topic <- factor(all_data$topic, levels = 1:11, labels = c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field.")) # S-6: Where did participants learn about the survey? all_data$learned_about_survey[all_data$learned_about_survey == -9] <- NA all_data$learned_about_survey <- factor(all_data$learned_about_survey, levels = 1:6, labels = c("I was invited based on my preregistration on the OSF.", "I was invited based on my article on Web of Science.", "I was invited based on my article on PubMed.", "Social media (e.g., Twitter, Facebook)", "E-Mail list", "Other")) # G-1: Create factor for groups "PR before" vs. "No PR before" all_data$PR_before <- factor(all_data$PR_before, levels = 1:2, labels = c("yes", "no")) # G-3: Recode learned item from "1/2" to "0/1" all_data$learned_lecture <- all_data$learned_lecture - 1 all_data$learned_project <- all_data$learned_project - 1 all_data$learned_work <- all_data$learned_work - 1 all_data$learned_supervisor <- all_data$learned_supervisor - 1 all_data$learned_conversation <- all_data$learned_conversation - 1 all_data$learned_dontknow <- all_data$learned_dontknow - 1 all_data$learned_neverheard <- all_data$learned_neverheard - 1 all_data$learned_other <- all_data$learned_other - 1 # G-4: Create factor for template preference all_data$template[all_data$template == -9] <- NA all_data$template <- factor(all_data$template, levels = 1:8, labels = c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template")) ## G-5: Recoding template reason from "1/2" to "0/1" all_data$template_reason_easy <- all_data$template_reason_easy - 1 all_data$template_reason_efficient <- all_data$template_reason_efficient - 1 all_data$template_reason_comprehensive <- all_data$template_reason_comprehensive - 1 all_data$template_reason_fit <- all_data$template_reason_fit - 1 all_data$template_reason_supervisor <- all_data$template_reason_supervisor - 1 all_data$template_reason_coauthors <- all_data$template_reason_coauthors - 1 all_data$template_reason_only_known <- all_data$template_reason_only_known- 1 all_data$template_reason_first <- all_data$template_reason_first - 1 all_data$template_reason_other <- all_data$template_reason_other - 1 # G-6: Recoding platform item from "1/2" to "0/1" all_data$platform_osf <- all_data$platform_osf - 1 all_data$platform_aspredicted <- all_data$platform_aspredicted - 1 all_data$platform_zpid <- all_data$platform_zpid - 1 all_data$platform_personal_web <- all_data$platform_personal_web - 1 all_data$platform_institutional_web <- all_data$platform_institutional_web - 1 all_data$platform_offline <- all_data$platform_offline - 1 all_data$platform_other <- all_data$platform_other- 1 ## G-7: Recoding important others item from "1/2" to "0/1" all_data$important_others_coauthors <- all_data$important_others_coauthors - 1 all_data$important_others_supervisor <- all_data$important_others_supervisor - 1 all_data$important_others_editors <- all_data$important_others_editors - 1 all_data$important_others_peers <- all_data$important_others_peers - 1 all_data$important_others_institute <- all_data$important_others_institute - 1 all_data$important_others_funding <- all_data$important_others_funding - 1 all_data$important_other_none <- all_data$important_other_none- 1 all_data$important_others_other <- all_data$important_others_other- 1 # G-8: Define NA for Reading PR all_data$reading_PR[all_data$reading_PR == -9] <- NA all_data$reading_PR <- factor(all_data$reading_PR, levels = 1:5, labels = c("Always", "Most of the time", "Rarely", "Never", "Can't recall reading a paper that was preregistered.")) # G-9a: Recoding positive consequences item from "1/2" to "0/1" all_data$pos_con_transparency <- all_data$pos_con_transparency - 1 all_data$pos_con_phacking <- all_data$pos_con_phacking - 1 all_data$pos_con_publicationbias <- all_data$pos_con_publicationbias - 1 all_data$pos_con_quality <- all_data$pos_con_quality - 1 all_data$pos_con_other <- all_data$pos_con_other - 1 # G-9b: Recoding negative consequences item from "1/2" to "0/1" all_data$neg_con_cheating <- all_data$neg_con_cheating - 1 all_data$neg_con_quality <- all_data$neg_con_quality - 1 all_data$neg_con_exploratory <- all_data$neg_con_exploratory - 1 all_data$neg_con_progress <- all_data$neg_con_progress - 1 all_data$neg_con_other <- all_data$neg_con_other - 1 # M-11: Recoding first motivation item from "1/2" to "0/1" all_data$first_motivation_project <- all_data$first_motivation_project - 1 all_data$first_motivation_peers <- all_data$first_motivation_peers - 1 all_data$first_motivation_coauthors <- all_data$first_motivation_coauthors - 1 all_data$first_motivation_supervisor <- all_data$first_motivation_supervisor - 1 all_data$first_motivation_funding <- all_data$first_motivation_funding - 1 all_data$first_motivation_self <- all_data$first_motivation_self - 1 all_data$first_motivation_other <- all_data$first_motivation_other - 1 # M-12: Create factor for motivation change all_data$motivation_change[all_data$motivation_change == -9] <- NA all_data$motivation_change <- factor(all_data$motivation_change, levels = 1:3, labels = c("I am now more motivated to preregister than I was before", "I am now less motivated to preregister than I was before", "My motivation did not change over time")) # M-13: Define NA for benefits ranking all_data$benefits_trust[all_data$benefits_trust == -9] <- NA all_data$benefits_qrps[all_data$benefits_qrps == -9] <- NA all_data$benefits_publicationbias[all_data$benefits_publicationbias == -9] <- NA all_data$benefits_fraud[all_data$benefits_fraud == -9] <- NA all_data$benefits_transparency[all_data$benefits_transparency == -9] <- NA all_data$benefits_documentation[all_data$benefits_documentation == -9] <- NA all_data$benefits_practice[all_data$benefits_practice == -9] <- NA all_data$benefits_collaboration[all_data$benefits_collaboration == -9] <- NA all_data$benefits_planning[all_data$benefits_planning == -9] <- NA all_data$benefits_other1[all_data$benefits_other1 == -9] <- NA all_data$benefits_other2[all_data$benefits_other2 == -9] <- NA all_data$benefits_other3[all_data$benefits_other3 == -9] <- NA # O-11: Define NA for drawbacks ranking all_data$drawbacks_scooping[all_data$drawbacks_scooping == -9] <- NA all_data$drawbacks_time[all_data$drawbacks_time == -9] <- NA all_data$drawbacks_effort[all_data$drawbacks_effort == -9] <- NA all_data$drawbacks_exploratory[all_data$drawbacks_exploratory == -9] <- NA all_data$drawbacks_useless[all_data$drawbacks_useless == -9] <- NA all_data$drawbacks_exploited[all_data$drawbacks_exploited == -9] <- NA all_data$drawbacks_decreases_progress[all_data$drawbacks_decreases_progress == -9] <- NA all_data$drawbacks_flexibility[all_data$drawbacks_flexibility == -9] <- NA all_data$drawbacks_reward[all_data$drawbacks_reward == -9] <- NA all_data$drawbacks_other1[all_data$drawbacks_other1 == -9] <- NA all_data$drawbacks_other2[all_data$drawbacks_other2 == -9] <- NA all_data$drawbacks_other3[all_data$drawbacks_other3 == -9] <- NA # O-12a: Recoding problems item from "1/2" to "0/1" all_data$problems_insecurity <- all_data$problems_insecurity - 1 all_data$problems_conflict <- all_data$problems_conflict - 1 all_data$problems_changes <- all_data$problems_changes - 1 all_data$problems_errors <- all_data$problems_errors - 1 all_data$problems_credibility <- all_data$problems_credibility - 1 all_data$problems_flexibility <- all_data$problems_flexibility - 1 all_data$problems_scooping <- all_data$problems_scooping - 1 all_data$problems_time <- all_data$problems_time - 1 all_data$problems_none <- all_data$problems_none - 1 all_data$problems_other <- all_data$problems_other - 1 # O-12b: Recoding worries item from "1/2" to "0/1" all_data$worries_insecurity <- all_data$worries_insecurity - 1 all_data$worries_conflict <- all_data$worries_conflict - 1 all_data$worries_changes <- all_data$worries_changes - 1 all_data$worries_errors <- all_data$worries_errors - 1 all_data$worries_credibility <- all_data$worries_credibility - 1 all_data$worries_flexibility <- all_data$worries_flexibility - 1 all_data$worries_scooping <- all_data$worries_scooping - 1 all_data$worries_time <- all_data$worries_time - 1 all_data$worries_none <- all_data$worries_none - 1 all_data$worries_other <- all_data$worries_other - 1 # O-13: Define NA for ranking of reasons against PR all_data$reasons_against_time[all_data$reasons_against_time == -9] <- NA all_data$reasons_against_flexibility[all_data$reasons_against_flexibility == -9] <- NA all_data$reasons_against_never_did[all_data$reasons_against_never_did == -9] <- NA all_data$reasons_against_neverthought[all_data$reasons_against_neverthought == -9] <- NA all_data$reasons_against_submission[all_data$reasons_against_submission == -9] <- NA all_data$reasons_against_noone_reads[all_data$reasons_against_noone_reads == -9] <- NA all_data$reasons_against_not_useful[all_data$reasons_against_not_useful == -9] <- NA all_data$reasons_against_disadvantages[all_data$reasons_against_disadvantages == -9] <- NA all_data$reasons_against_other1[all_data$reasons_against_other1 == -9] <- NA all_data$reasons_against_other2[all_data$reasons_against_other2 == -9] <- NA all_data$reasons_against_other3[all_data$reasons_against_other3 == -9] <- NA # SU-1: Define NA and create factor for template format all_data$template_format[all_data$template_format == -9] <- NA all_data$template_format <- factor(all_data$template_format, levels = 1:4, labels = c("A more open preregistration template with open text input, where you are free to write what you want", "A more restricted template that gives a lot of suggestions and reminds you of left-out information", "Other", "I don't use templates at all, and rather just write my own text.")) # SU-2: Create factor for template process all_data$template_process[all_data$template_process == -9] <- NA all_data$template_process <- factor(all_data$template_process, levels = 1:3, labels = c("I would prefer a more automated, computer-assisted process (e.g., filling in boxes that are automatically presented).", "I would prefer a more open, self-administered process (e.g., word document that you can alter to fit your needs).", "Other")) # SU-3: Define NA for information ranking all_data$info_authors[all_data$info_authors == -9] <- NA all_data$info_question[all_data$info_question == -9] <- NA all_data$info_study_type[all_data$info_study_type == -9] <- NA all_data$info_hypotheses[all_data$info_hypotheses == -9] <- NA all_data$info_variables[all_data$info_variables == -9] <- NA all_data$info_design[all_data$info_design == -9] <- NA all_data$info_sample_size[all_data$info_sample_size == -9] <- NA all_data$info_sample_rationale[all_data$info_sample_rationale == -9] <- NA all_data$info_stopping[all_data$info_stopping == -9] <- NA all_data$info_inclusion[all_data$info_inclusion == -9] <- NA all_data$info_exclusion[all_data$info_exclusion == -9] <- NA all_data$info_procedure[all_data$info_procedure == -9] <- NA all_data$info_blinding[all_data$info_blinding == -9] <- NA all_data$info_randomization[all_data$info_randomization == -9] <- NA all_data$info_models[all_data$info_models == -9] <- NA all_data$info_assumptions[all_data$info_assumptions == -9] <- NA all_data$info_transformations[all_data$info_transformations == -9] <- NA all_data$info_inference[all_data$info_inference == -9] <- NA all_data$info_multiple_tests[all_data$info_multiple_tests == -9] <- NA all_data$info_dropout[all_data$info_dropout == -9] <- NA all_data$info_missing[all_data$info_missing == -9] <- NA all_data$info_exploratory[all_data$info_exploratory == -9] <- NA all_data$info_existing_data[all_data$info_existing_data == -9] <- NA all_data$info_code[all_data$info_code == -9] <- NA all_data$info_analysis_script[all_data$info_analysis_script == -9] <- NA all_data$info_material[all_data$info_material == -9] <- NA all_data$info_other1[all_data$info_other1 == -9] <- NA all_data$info_other2[all_data$info_other2 == -9] <- NA all_data$info_other3[all_data$info_other3 == -9] <- NA ############################################################################################### ################################## DESCRIPTIVE ANALYSES ####################################### ############################################################################################### ################################# RESPONSE RATE ############################################### # Response rate # TODO: für Main Study die zu anschreibende Zahl einfügen response_rate <- length(all_data$id) / 200 # also included participants that answered the social media calls response_rate # How many participants indicated that they did not participate faitfully table(raw_data$usability) ################################# TIME TO COMPLETE SURVEY ###################################### ## Analysis of total time with outliers included all_data$overall_time <- rowSums(all_data[ , c(paste0("TIME00", 1:9), paste0("TIME0", 10:16))], na.rm = TRUE) mean(all_data$overall_time) / 60 median(all_data$overall_time) / 60 sd(all_data$overall_time) / 60 boxplot(all_data$overall_time / 60) ## Analysis of total time without outliers mean(all_data$TIME_SUM) / 60 median(all_data$TIME_SUM) / 60 sd(all_data$TIME_SUM) / 60 boxplot(all_data$TIME_SUM / 60) ######################## DEMOGRAPHIC INFORMATION ABOUT SAMPLE ################################# # S-1: Gender table(all_data$gender) ############################################################################################## # S-7: Age table(all_data$age_answer) mean(all_data$age, na.rm = TRUE) sd(all_data$age, na.rm = TRUE) range(all_data$age, na.rm = TRUE) ############################################################################################## # S-2: Country country_table <- table(all_data$country) country_list <- as.vector(country_table) country_table[country_list != 0] ############################################################################################## # S-3: Academic group (percentages) table(all_data$academic_group) / length(all_data$id) * 100 # Investigation of professor types all_data$academic_group_professor_comment # Investigation of "other" comments all_data$academic_group_other_comment ############################################################################################## # S-5: Topic in psychology # Create labels topic_labels <- c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field.") ## Count frequency in each category topic_sums <- table(all_data$topic) topic_sums <- as.vector(topic_sums) ## Drawing plot topic_dataframe <- data.frame(topic_labels, topic_sums) topic_dataframe$topic_labels <- factor(topic_dataframe$topic_labels, levels = c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field."), labels = c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field.")) ggplot(topic_dataframe, aes(x=topic_labels,y=topic_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Research topics of sample")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$topic_other) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) topic <- data.frame(all_data$id, all_data$topic_other) topic$all_data.topic_other <- as.character(topic$all_data.topic_other) topic <- topic[topic$all_data.topic_other > 0, ] write_xlsx(topic, "./pilot/qualitative_analysis/all/topic.xlsx") # After coding has been done, file gets read again topic_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/topic_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics topic_labels <- c("Sport psychology", "Cognitive psychology", "Evolutionary psychology", "Health psychology", "Mindfulness", "Food psychology") # Create sum scores # TODO: add column names of coded topics topic_sums <- c(sum(topic_coded$sport_psychology), sum(topic_coded$cognitive_psychology), sum(topic_coded$evolutionary_psychology), sum(topic_coded$health_psychology), sum(topic_coded$mindfulness), sum(topic_coded$food_psychology)) # Combine and draw plot # TODO: add labels of coded topics topic_dataframe_other <- data.frame(topic_labels, topic_sums) topic_combined <- rbind(topic_dataframe, topic_dataframe_other) topic_combined$topic_labels <- factor(topic_combined$topic_labels, levels = c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field.", "Sport psychology", "Cognitive psychology", "Evolutionary psychology", "Health psychology", "Mindfulness", "Food psychology"), labels = c("Clinical psychology", "Developmental psychology", "Differential psychology", "Educational psychology", "General psychology", "Neuropsychology", "Organizational psychology", "Research methods", "Social psychology", "Other", "Psychology is not my main field.", "Sport psychology", "Cognitive psychology", "Evolutionary psychology", "Health psychology", "Mindfulness", "Food psychology")) ggplot(topic_combined, aes(x=topic_labels,y=topic_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Research topics of sample (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # S-6: Where did participants learn about the survey table(all_data$learned_about_survey) # Percentages table(all_data$learned_about_survey) / length(all_data$id) * 100 # Investigation of comments all_data$learned_about_survey_email_list all_data$learned_about_survey_other_comment ############################################################################################## ########################## DESCRIPTIVE ANALYSYS OF GENERAL QUESTIONS ######################### ############################################################################################## # G-1: Percentage of persons that preregistered before vs. had not preregistered before table(all_data$PR_before) / length(all_data$id) * 100 table(all_data$PR_before[all_data$academic_group == "Master student"]) / length(all_data$id[all_data$academic_group == "Master student"]) * 100 table(all_data$PR_before[all_data$academic_group == "PhD student"]) / length(all_data$id[all_data$academic_group == "PhD student"]) * 100 table(all_data$PR_before[all_data$academic_group == "Post-doc"]) / length(all_data$id[all_data$academic_group == "Post-doc"]) * 100 table(all_data$PR_before[all_data$academic_group == "Professor"]) / length(all_data$id[all_data$academic_group == "Professor"]) * 100 ############################################################################################## # G-2: Perceived importance (scale ranges from -3 = "Disagree" to 3 = "Agree") all_data$importance <- abs(all_data$importance - 4) mean(all_data$importance, na.rm = TRUE) sd(all_data$importance, na.rm = TRUE) # PR before yes vs. no plotmeans(importance ~ PR_before, data = all_data, n.label = FALSE, barcol = "black", xlab = "Has preregistered before", ylab = "Perceived importance") # Academic groups plotmeans(importance ~ academic_group, data = all_data, n.label = FALSE, barcol = "black", xlab = "Academic group", ylab = "Perceived importance") ############################################################################################## # G-3: WHERE DID PARTICIPANTS LEARN ABOUT PREREGISTRATION ## Creating sum scores and label names learned_sums <- c(sum(all_data$learned_lecture, na.rm = TRUE), sum(all_data$learned_project, na.rm = TRUE), sum(all_data$learned_work, na.rm = TRUE), sum(all_data$learned_supervisor, na.rm = TRUE), sum(all_data$learned_conversation, na.rm = TRUE), sum(all_data$learned_dontknow, na.rm = TRUE), sum(all_data$learned_neverheard, na.rm = TRUE), sum(all_data$learned_other, na.rm = TRUE)) learned_labels <- c("Lecture at university", "Project at university", "Official event at workplace (e.g., colloquium, talk)", "Supervisor", "Informal conversation with colleagues/peers", "I don't know.", "I have not heard about preregistration before this survey.", "Other") # Combine and draw plot learned_dataframe <- data.frame(learned_labels, learned_sums) learned_dataframe$learned_labels <- factor(learned_dataframe$learned_labels, levels = c("Lecture at university", "Project at university", "Official event at workplace (e.g., colloquium, talk)", "Supervisor", "Informal conversation with colleagues/peers", "I don't know.", "I have not heard about preregistration before this survey.", "Other"), labels = c("Lecture at university", "Project at university", "Official event at workplace (e.g., colloquium, talk)", "Supervisor", "Informal conversation with colleagues/peers", "I don't know.", "I have not heard about preregistration before this survey.", "Other")) ggplot(learned_dataframe, aes(x=learned_labels,y=learned_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Where did participants learn about preregistration")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$learned_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) learned <- data.frame(all_data$id, all_data$learned_other_comment) learned$all_data.learned_other_comment <- as.character(learned$all_data.learned_other_comment) learned <- learned[learned$all_data.learned_other_comment > 0, ] write_xlsx(learned, "./pilot/qualitative_analysis/all/learned.xlsx") # After coding has been done, file gets read again learned_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/learned_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics learned_labels <- c("PhD Workshop", "Personal investigation", "Twitter", "UKRN", "Editor") # Create sum scores # TODO: add column names of coded topics learned_sums <- c(sum(learned_coded$phd_workshop), sum(learned_coded$personal_investigation), sum(learned_coded$twitter), sum(learned_coded$UKRN), sum(learned_coded$editor)) # Combine and draw plot # TODO: add labels of coded topics learned_dataframe_other <- data.frame(learned_labels, learned_sums) learned_combined <- rbind(learned_dataframe, learned_dataframe_other) learned_combined$learned_labels <- factor(learned_combined$topic_labels, levels = c("Lecture at university", "Project at university", "Official event at workplace (e.g., colloquium, talk)", "Supervisor", "Informal conversation with colleagues/peers", "I don't know.", "I have not heard about preregistration before this survey.", "Other", "PhD Workshop", "Personal investigation", "Twitter", "UKRN", "Editor"), labels = c("Lecture at university", "Project at university", "Official event at workplace (e.g., colloquium, talk)", "Supervisor", "Informal conversation with colleagues/peers", "I don't know.", "I have not heard about preregistration before this survey.", "Other", "PhD Workshop", "Personal investigation", "Twitter", "UKRN", "Editor")) ggplot(learned_combined, aes(x=learned_labels,y=learned_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Where did participants learn about preregistration (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-4: TEMPLATE ## Label names template_labels <- c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template") ## Count frequency in each category template_sums <- table(all_data$template) template_sums <- as.vector(template_sums) ## Drawing plot template_dataframe <- data.frame(template_labels, template_sums) template_dataframe$template_labels <- factor(template_dataframe$template_labels, levels = c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template"), labels = c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template")) ggplot(template_dataframe, aes(x=template_labels,y=template_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Preferred template")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$template_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) template <- data.frame(all_data$id, all_data$template_other_comment) template$all_data.template_other_comment <- as.character(template$all_data.template_other_comment) template <- template[template$all_data.template_other_comment > 0, ] write_xlsx(template, "./pilot/qualitative_analysis/all/template.xlsx") # After coding has been done, file gets read again template_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/template_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics template_labels <- c("Preregistration of Secondary analysis", "PROSPERO", "OSF Preregistration") # Create sum scores # TODO: add column names of coded topics template_sums <- c(sum(template_coded$preregistration_secondary_analysis), sum(template_coded$prospero), sum(template_coded$osf)) # Combine and draw plot # TODO: add labels of coded topics template_dataframe_other <- data.frame(template_labels, template_sums) template_combined <- rbind(template_dataframe, template_dataframe_other) template_combined$template_labels <- factor(template_combined$topic_labels, levels = c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template", "Preregistration of Secondary analysis", "PROSPERO", "OSF Preregistration"), labels = c("AsPredicted", "OSF-Standard Pre-Data Collection Registration", "Center for Open Science Preregistration Challenge Template (OSF)", "van 't Veer & Giner-Sorolla (2016)", "Replication Recipe (Brandt et al., 2013)", "I don’t use templates.", "I don’t know.", "Other template", "Preregistration of Secondary analysis", "PROSPERO", "OSF Preregistration")) ggplot(template_combined, aes(x=template_labels,y=template_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Preferred template (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-5: Reason for the preferred template # Create labels template_reason_labels <- c("It is easy to use", "It is time-efficient", "It is comprehensive", "It fits well with my research area", "It is recommended by my supervisor", "It is preferred by my co-authors/colleagues/peers", "It is the only template I know", "It was the first template I used", "Other") # Creating sum scores for each topic (how often was each topic indicated) template_reason_sums <- c(sum(all_data$template_reason_easy, na.rm = TRUE), sum(all_data$template_reason_efficient, na.rm = TRUE), sum(all_data$template_reason_comprehensive, na.rm = TRUE), sum(all_data$template_reason_fit, na.rm = TRUE), sum(all_data$template_reason_supervisor, na.rm = TRUE), sum(all_data$template_reason_coauthors, na.rm = TRUE), sum(all_data$template_reason_only_known, na.rm = TRUE), sum(all_data$template_reason_first, na.rm = TRUE), sum(all_data$template_reason_other, na.rm = TRUE)) # Combine and draw plot template_reason_dataframe <- data.frame(template_reason_labels, template_reason_sums) template_reason_dataframe$template_reason_labels <- factor(template_reason_dataframe$template_reason_labels, levels = c("It is easy to use", "It is time-efficient", "It is comprehensive", "It fits well with my research area", "It is recommended by my supervisor", "It is preferred by my co-authors/colleagues/peers", "It is the only template I know", "It was the first template I used", "Other"), labels = c("It is easy to use", "It is time-efficient", "It is comprehensive", "It fits well with my research area", "It is recommended by my supervisor", "It is preferred by my co-authors/colleagues/peers", "It is the only template I know", "It was the first template I used", "Other")) ggplot(template_reason_dataframe, aes(x=template_reason_labels,y=template_reason_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Reasons for preferring the selected template")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$template_reason_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) template_reason <- data.frame(all_data$id, all_data$template_reason_other_comment) template_reason$all_data.template_reason_other_comment <- as.character(template_reason$all_data.template_reason_other_comment) template_reason <- template_reason[template_reason$all_data.template_reason_other_comment > 0, ] write_xlsx(template_reason, "./pilot/qualitative_analysis/all/template_reason.xlsx") # After coding has been done, file gets read again template_reason_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/template_reason_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics template_reason_labels <- c("Used during a competition", "Flexible") # Create sum scores # TODO: add column names of coded topics template_reason_sums <- c(sum(template_reason_coded$used_during_competition), sum(template_reason_coded$flexible)) # Combine and draw plot # TODO: add labels of coded topics template_reason_dataframe_other <- data.frame(template_reason_labels, template_reason_sums) template_reason_combined <- rbind(template_reason_dataframe, template_reason_dataframe_other) template_reason_combined$template_reason_labels <- factor(template_reason_combined$topic_labels, levels = c("It is easy to use", "It is time-efficient", "It is comprehensive", "It fits well with my research area", "It is recommended by my supervisor", "It is preferred by my co-authors/colleagues/peers", "It is the only template I know", "It was the first template I used", "Other", "Used during a competition", "Flexible"), labels = c("It is easy to use", "It is time-efficient", "It is comprehensive", "It fits well with my research area", "It is recommended by my supervisor", "It is preferred by my co-authors/colleagues/peers", "It is the only template I know", "It was the first template I used", "Other", "Used during a competition", "Flexible")) ggplot(template_reason_combined, aes(x=template_reason_labels,y=template_reason_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Reason for preferred template (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-6: PLATFORM ## Creating sum scores and label names platform_sums <- c(sum(all_data$platform_osf, na.rm = TRUE), sum(all_data$platform_aspredicted, na.rm = TRUE), sum(all_data$platform_zpid, na.rm = TRUE), sum(all_data$platform_personal_web, na.rm = TRUE), sum(all_data$platform_institutional_web, na.rm = TRUE), sum(all_data$platform_offline, na.rm = TRUE), sum(all_data$platform_other, na.rm = TRUE)) platform_labels <- c("OSF", "AsPredicted", "sychArchives (ZPID)", "Personal website", "Institutional website", "Offline (e.g., I only share it with my co-authors or store it at my institution)", "Other") # Combine and draw plot platform_dataframe <- data.frame(platform_labels, platform_sums) platform_dataframe$platform_labels <- factor(platform_dataframe$platform_labels, levels = c("OSF", "AsPredicted", "sychArchives (ZPID)", "Personal website", "Institutional website", "Offline (e.g., I only share it with my co-authors or store it at my institution)", "Other"), labels = c("OSF", "AsPredicted", "sychArchives (ZPID)", "Personal website", "Institutional website", "Offline (e.g., I only share it with my co-authors or store it at my institution)", "Other")) ggplot(platform_dataframe, aes(x=platform_labels,y=platform_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Preferred uploading platform/repository")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$platform_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) platform <- data.frame(all_data$id, all_data$platform_other_comment) platform$all_data.platform_other_comment <- as.character(platform$all_data.platform_other_comment) platform <- platform[platform$all_data.platform_other_comment > 0, ] write_xlsx(platform, "./pilot/qualitative_analysis/all/platform.xlsx") # After coding has been done, file gets read again platform_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/platform_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics platform_labels <- c("Journal specific", "PRISMA") # Create sum scores # TODO: add column names of coded topics platform_sums <- c(sum(platform_coded$journal_specific), sum(platform_coded$PRISMA)) # Combine and draw plot # TODO: add labels of coded topics platform_dataframe_other <- data.frame(platform_labels, platform_sums) platform_combined <- rbind(platform_dataframe, platform_dataframe_other) platform_combined$platform_labels <- factor(platform_combined$topic_labels, levels = c("OSF", "AsPredicted", "sychArchives (ZPID)", "Personal website", "Institutional website", "Offline (e.g., I only share it with my co-authors or store it at my institution)", "Other", "Journal specific", "PRISMA"), labels = c("OSF", "AsPredicted", "sychArchives (ZPID)", "Personal website", "Institutional website", "Offline (e.g., I only share it with my co-authors or store it at my institution)", "Other", "Journal specific", "PRISMA")) ggplot(platform_combined, aes(x=platform_labels,y=platform_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Preferred uploading platform/repository (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-7: Important others ## Creating sum scores and label names important_others_sums <- c(sum(all_data$important_others_coauthors, na.rm = TRUE), sum(all_data$important_others_supervisor, na.rm = TRUE), sum(all_data$important_others_editors, na.rm = TRUE), sum(all_data$important_others_peers, na.rm = TRUE), sum(all_data$important_others_institute, na.rm = TRUE), sum(all_data$important_others_funding, na.rm = TRUE), sum(all_data$important_other_none, na.rm = TRUE), sum(all_data$important_others_other, na.rm = TRUE)) important_others_labels <- c("Co-authors", "Supervisor", "Editors", "Peers/colleagues", "Institute guidelines", "Funding guidelines", "None", "Other") # Combine and draw plot important_others_dataframe <- data.frame(important_others_labels, important_others_sums) important_others_dataframe$important_others_labels <- factor(important_others_dataframe$important_others_labels, levels = c("Co-authors", "Supervisor", "Editors", "Peers/colleagues", "Institute guidelines", "Funding guidelines", "None", "Other"), labels = c("Co-authors", "Supervisor", "Editors", "Peers/colleagues", "Institute guidelines", "Funding guidelines", "None", "Other")) ggplot(important_others_dataframe, aes(x=important_others_labels,y=important_others_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Important others")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$important_other_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) important_others <- data.frame(all_data$id, all_data$important_other_other_comment) important_others$all_data.important_other_other_comment <- as.character(important_others$all_data.important_other_other_comment) important_others <- important_others[important_others$all_data.important_other_other_comment > 0, ] write_xlsx(important_others, "./pilot/qualitative_analysis/all/important_others.xlsx") # After coding has been done, file gets read again important_others_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/important_others_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics important_others_labels <- c("Preregisters all studies") # Create sum scores # TODO: add column names of coded topics important_others_sums <- c(sum(important_others_coded$preregisters_all_studies)) # Combine and draw plot # TODO: add labels of coded topics important_others_dataframe_other <- data.frame(important_others_labels, important_others_sums) important_others_combined <- rbind(important_others_dataframe, important_others_dataframe_other) important_others_combined$important_others_labels <- factor(important_others_combined$topic_labels, levels = c("Co-authors", "Supervisor", "Editors", "Peers/colleagues", "Institute guidelines", "Funding guidelines", "None", "Other", "Preregisters all studies"), labels = c("Co-authors", "Supervisor", "Editors", "Peers/colleagues", "Institute guidelines", "Funding guidelines", "None", "Other", "Preregisters all studies")) ggplot(important_others_combined, aes(x=important_others_labels,y=important_others_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Important others (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-8: READING PR # Percentage of participants that indicated each category table(all_data$reading_PR) / length(all_data$id) * 100 ############################################################################################## # G-9a: Positive consequences of mandatory preregistration ## Creating sum scores and label names pos_con_sums <- c(sum(all_data$pos_con_transparency, na.rm = TRUE), sum(all_data$pos_con_phacking, na.rm = TRUE), sum(all_data$pos_con_publicationbias, na.rm = TRUE), sum(all_data$pos_con_quality, na.rm = TRUE), sum(all_data$pos_con_other, na.rm = TRUE)) pos_con_labels <- c("Increased transparency in research", "Reduced p-hacking", "Reduced publication bias", "Increased quality of publications", "Other") # Combine and draw plot pos_con_dataframe <- data.frame(pos_con_labels, pos_con_sums) pos_con_dataframe$pos_con_labels <- factor(pos_con_dataframe$pos_con_labels, levels = c("Increased transparency in research", "Reduced p-hacking", "Reduced publication bias", "Increased quality of publications", "Other"), labels = c("Increased transparency in research", "Reduced p-hacking", "Reduced publication bias", "Increased quality of publications", "Other")) ggplot(pos_con_dataframe, aes(x=pos_con_labels,y=pos_con_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Positive consequences of mandatory preregistration")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$pos_con_other_comment) # # Counting common (often used) words # # # Subset only of relevant variables # all_data_pos_con <- all_data[ , c("id", "pos_con_other_comment", "PR_before")] # # # Present each word in a seperate line # all_data_pos_con_tidy <- all_data_pos_con %>% # unnest_tokens(word, pos_con_other_comment) # # # Remove stop words # data(stop_words) # all_data_pos_con_tidy <- all_data_pos_con_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_pos_con_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_pos_con_tidy[all_data_pos_con_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_pos_con_tidy[all_data_pos_con_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # Not applicable for pilot (only small number of responses) # Coding of the answers to get their meaning # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) pos_con <- data.frame(all_data$id, all_data$pos_con_other_comment) pos_con$all_data.pos_con_other_comment <- as.character(pos_con$all_data.pos_con_other_comment) pos_con <- pos_con[pos_con$all_data.pos_con_other_comment > 0, ] write_xlsx(pos_con, "./pilot/qualitative_analysis/all/pos_con.xlsx") # After coding has been done, file gets read again pos_con_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/pos_con_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics pos_con_labels <- c("Better studies", "Avoid pitfalls useless studies", "Well designed studies", "Well conducted studies", "Publishing of negative findings", "None") # Create sum scores # TODO: add column names of coded topics pos_con_sums <- c(sum(pos_con_coded$better_studies), sum(pos_con_coded$avoid_pitfalls_useless_studies), sum(pos_con_coded$well_designed), sum(pos_con_coded$well_conducted), sum(pos_con_coded$publishing_of_negative_findings), sum(pos_con_coded$none)) # Combine and draw plot # TODO: add labels of coded topics pos_con_dataframe_other <- data.frame(pos_con_labels, pos_con_sums) pos_con_combined <- rbind(pos_con_dataframe, pos_con_dataframe_other) pos_con_combined$pos_con_labels <- factor(pos_con_combined$topic_labels, levels = c("Increased transparency in research", "Reduced p-hacking", "Reduced publication bias", "Increased quality of publications", "Other", "Better studies", "Avoid pitfalls useless studies", "Well designed studies", "Well conducted studies", "Publishing of negative findings", "None"), labels = c("Increased transparency in research", "Reduced p-hacking", "Reduced publication bias", "Increased quality of publications", "Other", "Better studies", "Avoid pitfalls useless studies", "Well designed studies", "Well conducted studies", "Publishing of negative findings", "None")) ggplot(pos_con_combined, aes(x=pos_con_labels,y=pos_con_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Positive consequences of mandatory preregistration (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-9b: Negative consequences of mandatory preregistration ## Creating sum scores and label names neg_con_sums <- c(sum(all_data$neg_con_cheating, na.rm = TRUE), sum(all_data$neg_con_quality, na.rm = TRUE), sum(all_data$neg_con_exploratory, na.rm = TRUE), sum(all_data$neg_con_progress, na.rm = TRUE), sum(all_data$neg_con_other, na.rm = TRUE)) neg_con_labels <- c("Cheating (e.g., by uploading multiple preregistrations)", "Decreasing quality of preregistrations (e.g., only vague statements)", "Less exploratory analyses", "Decline in scientific progress", "Other") # Combine and draw plot neg_con_dataframe <- data.frame(neg_con_labels, neg_con_sums) neg_con_dataframe$neg_con_labels <- factor(neg_con_dataframe$neg_con_labels, levels = c("Cheating (e.g., by uploading multiple preregistrations)", "Decreasing quality of preregistrations (e.g., only vague statements)", "Less exploratory analyses", "Decline in scientific progress", "Other"), labels = c("Cheating (e.g., by uploading multiple preregistrations)", "Decreasing quality of preregistrations (e.g., only vague statements)", "Less exploratory analyses", "Decline in scientific progress", "Other")) ggplot(neg_con_dataframe, aes(x=neg_con_labels,y=neg_con_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Negative consequences of mandatory preregistration")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$neg_con_other_comment) # Counting common (often used) words # # # Subset only of relevant variables # all_data_neg_con <- all_data[ , c("id", "neg_con_other_comment", "PR_before")] # # # Present each word in a seperate line # all_data_neg_con_tidy <- all_data_neg_con %>% # unnest_tokens(word, neg_con_other_comment) # # # Remove stop words # data(stop_words) # all_data_neg_con_tidy <- all_data_neg_con_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_neg_con_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_neg_con_tidy[all_data_neg_con_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_neg_con_tidy[all_data_neg_con_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # Not applicable for pilot (only small number of responses) # Coding of the answers to get their meaning # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) neg_con <- data.frame(all_data$id, all_data$neg_con_other_comment) neg_con$all_data.neg_con_other_comment <- as.character(neg_con$all_data.neg_con_other_comment) neg_con <- neg_con[neg_con$all_data.neg_con_other_comment > 0, ] write_xlsx(neg_con, "./pilot/qualitative_analysis/all/neg_con.xlsx") # After coding has been done, file gets read again neg_con_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/neg_con_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics neg_con_labels <- c("None", "Overly optimistic belief in replicability", "No negative consequences", "When theory needs to be written") # Create sum scores # TODO: add column names of coded topics neg_con_sums <- c(sum(neg_con_coded$none), sum(neg_con_coded$overly_optimistic), sum(neg_con_coded$no_negative_consequences), sum(neg_con_coded$when_theory_needs_to_be_written)) # Combine and draw plot # TODO: add labels of coded topics neg_con_dataframe_other <- data.frame(neg_con_labels, neg_con_sums) neg_con_combined <- rbind(neg_con_dataframe, neg_con_dataframe_other) neg_con_combined$neg_con_labels <- factor(neg_con_combined$topic_labels, levels = c("Cheating (e.g., by uploading multiple preregistrations)", "Decreasing quality of preregistrations (e.g., only vague statements)", "Less exploratory analyses", "Decline in scientific progress", "Other", "None", "Overly optimistic belief in replicability", "No negative consequences", "When theory needs to be written"), labels = c("Cheating (e.g., by uploading multiple preregistrations)", "Decreasing quality of preregistrations (e.g., only vague statements)", "Less exploratory analyses", "Decline in scientific progress", "Other", "None", "Overly optimistic belief in replicability", "No negative consequences", "When theory needs to be written")) ggplot(neg_con_combined, aes(x=neg_con_labels,y=neg_con_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Negative consequences of mandatory preregistration (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # G-10: INTENTION TO PREREGISTER STUDIES IN THE NEAR FUTURE # -3 = Disagree, +3 = Agree # Percentage of participants that indicated each category all_data$intention <- abs(all_data$intention - 4) table(all_data$intention) / length(all_data$id) * 100 # Mean and SD mean(all_data$intention, na.rm = TRUE) sd(all_data$intention, na.rm = TRUE) # Plot mean differences between participants that have preregistered before vs. have never preregistered before, and between academic groups # PR before yes vs. no plotmeans(all_data$intention ~ PR_before, data = all_data, n.label = FALSE, barcol = "black", ylim = c(1, 7), xlab = "Has preregistered before", ylab = "Intention to preregister") # Academic groups plotmeans(all_data$intention ~ academic_group, data = all_data, n.label = FALSE, barcol = "black", ylim = c(1, 7), xlab = "Academic group", ylab = "Intention to preregister") ########################## DESCRIPTIVE ANALYSIS OF M- and O-QUESTIONS ######################## # M-11: First motivation to preregister ## Creating sum scores and label names first_motivation_sums <- c(sum(all_data$first_motivation_project, na.rm = TRUE), sum(all_data$first_motivation_peers, na.rm = TRUE), sum(all_data$first_motivation_coauthors, na.rm = TRUE), sum(all_data$first_motivation_supervisor, na.rm = TRUE), sum(all_data$first_motivation_funding, na.rm = TRUE), sum(all_data$first_motivation_self, na.rm = TRUE), sum(all_data$first_motivation_other, na.rm = TRUE)) first_motivation_labels <- c("Preregistration was mandatory for a project", "Informal conversation with colleagues/peers", "Recommendation of co-authors for a specific project", "Suggestion by my supervisor", "Requirement to get funding", "Self-motivated", "Other") # Combine and draw plot first_motivation_dataframe <- data.frame(first_motivation_labels, first_motivation_sums) first_motivation_dataframe$first_motivation_labels <- factor(first_motivation_dataframe$first_motivation_labels, levels = c("Preregistration was mandatory for a project", "Informal conversation with colleagues/peers", "Recommendation of co-authors for a specific project", "Suggestion by my supervisor", "Requirement to get funding", "Self-motivated", "Other"), labels = c("Preregistration was mandatory for a project", "Informal conversation with colleagues/peers", "Recommendation of co-authors for a specific project", "Suggestion by my supervisor", "Requirement to get funding", "Self-motivated", "Other")) ggplot(first_motivation_dataframe, aes(x=first_motivation_labels,y=first_motivation_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("First motivation to preregister")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$first_motivation_other_comment) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) first_motivation <- data.frame(all_data$id, all_data$first_motivation_other_comment) first_motivation$all_data.first_motivation_other_comment <- as.character(first_motivation$all_data.first_motivation_other_comment) first_motivation <- first_motivation[first_motivation$all_data.first_motivation_other_comment > 0, ] write_xlsx(first_motivation, "./pilot/qualitative_analysis/all/first_motivation.xlsx") # After coding has been done, file gets read again first_motivation_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/first_motivation_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics first_motivation_labels <- c("PhD workshop", "Delay of data collection due to corona", "Institute", "Editor") # Create sum scores # TODO: add column names of coded topics first_motivation_sums <- c(sum(first_motivation_coded$phd_workshop), sum(first_motivation_coded$delay_corona), sum(first_motivation_coded$institute), sum(first_motivation_coded$editor)) # Combine and draw plot # TODO: add labels of coded topics first_motivation_dataframe_other <- data.frame(first_motivation_labels, first_motivation_sums) first_motivation_combined <- rbind(first_motivation_dataframe, first_motivation_dataframe_other) first_motivation_combined$first_motivation_labels <- factor(first_motivation_combined$topic_labels, levels = c("Preregistration was mandatory for a project", "Informal conversation with colleagues/peers", "Recommendation of co-authors for a specific project", "Suggestion by my supervisor", "Requirement to get funding", "Self-motivated", "Other", "PhD workshop", "Delay of data collection due to corona", "Institute", "Editor"), labels = c("Preregistration was mandatory for a project", "Informal conversation with colleagues/peers", "Recommendation of co-authors for a specific project", "Suggestion by my supervisor", "Requirement to get funding", "Self-motivated", "Other", "PhD workshop", "Delay of data collection due to corona", "Institute", "Editor")) ggplot(first_motivation_combined, aes(x=first_motivation_labels,y=first_motivation_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("First motivation to preregister (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # M-12: Motivation change # Proportion of participants that are now more vs. less motivated (percentages) table(all_data$motivation_change[all_data$PR_before == "yes"]) / length(all_data$id[all_data$PR_before == "yes"]) * 100 # Analysis of open text (reasons for this motivation change) # POSITIVE CHANGE # Code open text input motivation_change_more_reason <- data.frame(all_data$id, all_data$motivation_change_more_reason) motivation_change_more_reason$all_data.motivation_change_more_reason <- as.character(motivation_change_more_reason$all_data.motivation_change_more_reason) motivation_change_more_reason <- motivation_change_more_reason[motivation_change_more_reason$all_data.motivation_change_more_reason > 0, ] write_xlsx(motivation_change_more_reason, "./pilot/qualitative_analysis/all/motivation_change_more_reason.xlsx") # After coding has been done, file gets read again motivation_change_more_reason_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/motivation_change_more_reason_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics motivation_change_more_reason_labels <- c("Increasingly the norm", "Experienced helpfulness", "Saves time", "Reduces uncertainty", "Easier than anticipated", "Transparent disclosure of deviations", "Learned more about it", "It is mandatory for some journals", "Helps editors/reviewers", "Increases credibility", "Negative remark: Limits flexibility") # Create sum scores # TODO: add column names of coded topics motivation_change_more_reason_sums <- c(sum(motivation_change_more_reason_coded$norm), sum(motivation_change_more_reason_coded$experienced_helpfulness), sum(motivation_change_more_reason_coded$saves_time), sum(motivation_change_more_reason_coded$reduces_uncertainty), sum(motivation_change_more_reason_coded$easier_than_anticipated), sum(motivation_change_more_reason_coded$transparent_disclosure_of_deviations), sum(motivation_change_more_reason_coded$learned_more_about_it), sum(motivation_change_more_reason_coded$mandatory_for_some_journals), sum(motivation_change_more_reason_coded$helps_editors_reviewers), sum(motivation_change_more_reason_coded$increases_credibility), sum(motivation_change_more_reason_coded$negative_limits_flexibility)) # Combine and draw plot # TODO: add labels of coded topics motivation_change_more_reason_dataframe <- data.frame(motivation_change_more_reason_labels, motivation_change_more_reason_sums) motivation_change_more_reason_dataframe$motivation_change_more_reason_labels <- factor(topic_dataframe$motivation_change_more_reason_labels, levels = c("Increasingly the norm", "Experienced helpfulness", "Saves time", "Reduces uncertainty", "Easier than anticipated", "Transparent disclosure of deviations", "Learned more about it", "It is mandatory for some journals", "Helps editors/reviewers", "Increases credibility", "Negative remark: Limits flexibility"), labels = c("Increasingly the norm", "Experienced helpfulness", "Saves time", "Reduces uncertainty", "Easier than anticipated", "Transparent disclosure of deviations", "Learned more about it", "It is mandatory for some journals", "Helps editors/reviewers", "Increases credibility", "Negative remark: Limits flexibility")) ggplot(motivation_change_more_reason_dataframe, aes(x=motivation_change_more_reason_labels,y=motivation_change_more_reason_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Reasons for positive motivation change")+ylab("Coded themes indicated by sample") # Plotting comparison of definitions between participants that have preregistered before vs. have not preregistered before, and between academic groups # Aggregate coded datasets and information about PR_before yes vs. no & academic group PR_before_group_by_id <- all_data[ , c("id", "PR_before", "academic_group")] motivation_change_more_reason_coded$id <- motivation_change_more_reason_coded$all_data.id motivation_change_more_reason_coded <- left_join(motivation_change_more_reason_coded, PR_before_group_by_id, "id") # PR before yes vs. no motivation_change_more_reason_sums_PR <- aggregate(motivation_change_more_reason_coded[ , c(5:15)], # TODO: add columns by=list(motivation_change_more_reason_coded$PR_before), sum) motivation_change_more_reason_sums_PR$PR_before <- motivation_change_more_reason_sums_PR$Group.1 motivation_change_more_reason_sums_PR$Group.1 <- NULL motivation_change_more_reason_sums_PR_long <- melt(motivation_change_more_reason_sums_PR,id.vars="PR_before") ggplot(motivation_change_more_reason_sums_PR_long,aes(x=variable,y=value,fill=factor(PR_before)))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ scale_fill_discrete(name="Prior preregistration?", labels=c("Has preregistered before", "Has not preregistered before"))+ xlab("Reasons for positive motivation change")+ylab("Frequency of indication") # Academic groups motivation_change_more_reason_sums_group <- aggregate(motivation_change_more_reason_coded[ , c(5:15)], # TODO: add columns by=list(motivation_change_more_reason_coded$academic_group), sum) motivation_change_more_reason_sums_group$academic_group <- motivation_change_more_reason_sums_group$Group.1 motivation_change_more_reason_sums_group$Group.1 <- NULL motivation_change_more_reason_sums_group_long <- melt(motivation_change_more_reason_sums_group,id.vars="academic_group") motivation_change_more_reason_sums_group_long$academic_group <- as.numeric(motivation_change_more_reason_sums_group_long$academic_group) ggplot(motivation_change_more_reason_sums_group_long,aes(x=variable,y=value,fill=factor(academic_group)))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ scale_fill_discrete(name="Academic group", labels=c("PhD Student", "Professor"))+ # TODO: check which groups are included in the data and only include these as labels (if you don't the wrong groups are labeled) xlab("Reasons for positive motivation change")+ylab("Frequency of indication") # NEGATIVE CHANGE # No comments in the pilot! # Code open text input # motivation_change_less_reason <- data.frame(all_data$id, all_data$motivation_change_more_reason_sums_group_long$academic_group) # motivation_change_less_reason$all_data.motivation_change_less_reason <- as.character(motivation_change_less_reason$all_data.motivation_change_less_reason) # motivation_change_less_reason <- motivation_change_less_reason[motivation_change_less_reason$all_data.motivation_change_less_reason > 0, ] # write_xlsx(motivation_change_less_reason, "./pilot/qualitative_analysis/all/motivation_change_less_reason.xlsx") # # After coding has been done, file gets read again # motivation_change_less_reason_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/motivation_change_less_reason_coded.xlsx") # # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # motivation_change_less_reason_labels <- c("...") # # # Create sum scores # # TODO: add column names of coded topics # motivation_change_less_reason_sums <- c(sum(...), # sum(...), # sum(...)) # # # Combine and draw plot # # TODO: add labels of coded topics # motivation_change_less_reason_dataframe <- data.frame(motivation_change_less_reason_labels, motivation_change_less_reason_sums) # # motivation_change_less_reason_dataframe$motivation_change_less_reason_labels <- factor(motivation_change_less_reason_dataframe$motivation_change_less_reason_labels, # levels = c("..."), # labels = c("....")) # # ggplot(motivation_change_less_reason_dataframe, aes(x=motivation_change_less_reason_labels,y=motivation_change_less_reason_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Reasons for negative motivation change")+ylab("Coded themes indicated by sample") # # # # Plotting comparison of definitions between participants that have preregistered before vs. have not preregistered before, and between academic groups # # Aggregate coded datasets and information about PR_before yes vs. no & academic group # motivation_change_less_reason_coded$id <- motivation_change_less_reason_coded$all_data.id # motivation_change_less_reason_coded <- left_join(motivation_change_less_reason_coded, PR_before_group_by_id, "id") # # # PR before yes vs. no # motivation_change_less_reason_sums_PR <- aggregate(motivation_change_less_reason_coded[ , c(...)], # TODO: add columns # by=list(motivation_change_less_reason_coded$PR_before), # sum) # motivation_change_less_reason_sums_PR$PR_before <- motivation_change_less_reason_sums_PR$Group.1 # motivation_change_less_reason_sums_PR$Group.1 <- NULL # # motivation_change_less_reason_sums_PR_long <- melt(motivation_change_less_reason_sums_PR,id.vars="PR_before") # # # ggplot(motivation_change_less_reason_sums_PR_long,aes(x=variable,y=value,fill=factor(PR_before)))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # scale_fill_discrete(name="Prior preregistration?", # labels=c("Has preregistered before", "Has not preregistered before"))+ # xlab("Reasons for negative motivation change")+ylab("Frequency of indication") # # # Academic groups # motivation_change_less_reason_sums_group <- aggregate(motivation_change_less_reason_coded[ , c(...)], # TODO: add columns # by=list(motivation_change_less_reason_coded$academic_group), # sum) # motivation_change_less_reason_sums_group$academic_group <- motivation_change_less_reason_sums_group$Group.1 # motivation_change_less_reason_sums_group$Group.1 <- NULL # # motivation_change_less_reason_sums_group_long <- melt(motivation_change_less_reason_sums_group,id.vars="academic_group") # motivation_change_less_reason_sums_PR_long$academic_group <- as.numeric(motivation_change_less_reason_sums_PR_long$academic_group) # # # ggplot(motivation_change_less_reason_sums_group_long,aes(x=variable,y=value,fill=factor(academic_group)))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # scale_fill_discrete(name="Academic group", # labels=c(...))+ # TODO: include only those labels of groups that are present in the data # xlab("Reasons for negative motivation change")+ylab("Frequency of indication") ############################################################################################## # M-13: Benefits (Ranking) # Average ranks (but they do not consider how often an option was ranked) benefits_data <- all_data %>% select(benefits_trust:benefits_planning) benefits_data_tall <- benefits_data %>% gather(key = Feature, value = Rank, benefits_trust:benefits_planning, na.rm = TRUE) benefits_stats <- benefits_data_tall %>% group_by(Feature) %>% summarize(Avg = mean(Rank)) benefits_stats # Reverse codings for plotting which ranks were often named AND ranked higher benefits_reversed <- benefits_data benefits_reversed$benefits_trust <- abs(benefits_reversed$benefits_trust - 6) benefits_reversed$benefits_qrps <- abs(benefits_reversed$benefits_qrps - 6) benefits_reversed$benefits_publicationbias <- abs(benefits_reversed$benefits_publicationbias - 6) benefits_reversed$benefits_fraud <- abs(benefits_reversed$benefits_fraud - 6) benefits_reversed$benefits_transparency <- abs(benefits_reversed$benefits_transparency - 6) benefits_reversed$benefits_documentation <- abs(benefits_reversed$benefits_documentation - 6) benefits_reversed$benefits_practice <- abs(benefits_reversed$benefits_practice - 6) benefits_reversed$benefits_collaboration <- abs(benefits_reversed$benefits_collaboration - 6) benefits_reversed$benefits_planning <- abs(benefits_reversed$benefits_planning - 6) ## Creating sum scores and label names benefits_reversed_sums <- c(sum(benefits_reversed$benefits_trust, na.rm = TRUE), sum(benefits_reversed$benefits_qrps, na.rm = TRUE), sum(benefits_reversed$benefits_publicationbias, na.rm = TRUE), sum(benefits_reversed$benefits_fraud, na.rm = TRUE), sum(benefits_reversed$benefits_transparency, na.rm = TRUE), sum(benefits_reversed$benefits_documentation, na.rm = TRUE), sum(benefits_reversed$benefits_practice, na.rm = TRUE), sum(benefits_reversed$benefits_collaboration, na.rm = TRUE), sum(benefits_reversed$benefits_planning, na.rm = TRUE)) benefits_reversed_labels <- c("More trust in science", "Prevention of questionable research practices", "Prevention of publication bias", "Better detection of fraud", "Higher transparency", "Better documentation", "Better scientific practice", "Increased collaboration", "More precise planning beforehand") # Combine and draw plot benefits_reversed_dataframe <- data.frame(benefits_reversed_labels, benefits_reversed_sums) benefits_reversed_dataframe$benefits_reversed_labels <- factor(benefits_reversed_dataframe$benefits_reversed_labels, levels = c("More trust in science", "Prevention of questionable research practices", "Prevention of publication bias", "Better detection of fraud", "Higher transparency", "Better documentation", "Better scientific practice", "Increased collaboration", "More precise planning beforehand"), labels = c("More trust in science", "Prevention of questionable research practices", "Prevention of publication bias", "Better detection of fraud", "Higher transparency", "Better documentation", "Better scientific practice", "Increased collaboration", "More precise planning beforehand")) ggplot(benefits_reversed_dataframe, aes(x=benefits_reversed_labels,y=benefits_reversed_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Benefits associated with preregistration")+ylab("Frequency of indication") # This plots shows a combination of which options were ranked more often AND the position they were ranked # Values on x-achsis thus cannot be interpreted directly # Analysis of "other" comments # Are there comments? table(all_data$benefits_other1) table(all_data$benefits_other2) table(all_data$benefit_other3) # Counting common (often used) words # Subset only of relevant variables # TODO: only include columns of variables where comments were given # As there were no comments here in the pilot, this is not applicable to the pilot # all_data_benefits <- all_data[ , c("id", "benefits_other1_comment", "benefits_other2_comment", "benefits_other3_comment", "PR_before")] # all_data_benefits_long <- gather(all_data_benefits, option, comment, benefits_other1_comment:benefits_other3_comment) # # # Present each word in a seperate line # all_data_benefits_tidy <- all_data_benefits_long %>% # unnest_tokens(word, comment) # # # Remove stop words # data(stop_words) # all_data_benefits_tidy <- all_data_benefits_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_benefits_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_benefits_tidy[all_data_benefits_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_benefits_tidy[all_data_benefits_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # # # Coding of the answers to get their meaning # # Code open text input # # TODO: check if all comment options were used (if this is not the case, the variable is not included in the dataset and must be deleted from this function) # benefits_other <- data.frame(all_data$id, all_data$benefits_other1, all_data$benefits_other1_comment, all_data$benefits_other2, all_data$benefits_other2_comment, all_data$benefits_other3, all_data$benefits_other3_comment) # benefits_other$all_data.benefits_other1_comment <- as.character(benefits_other$all_data.benefits_other1_comment) # benefits_other$all_data.benefits_other2_comment <- as.character(benefits_other$all_data.benefits_other2_comment) # benefits_other$all_data.benefits_other3_comment <- as.character(benefits_other$all_data.benefits_other3_comment) # benefits_other <- benefits_other[benefits_other$all_data.benefits_other1_comment != 0 & benefits_other$all_data.benefits_other2_comment != 0 & benefits_other$all_data.benefits_other3_comment != 0, ] # # benefits_other <- benefits_other[rowSums(benefits_other[ , c("all_data.benefits_other1", "all_data.benefits_other2", "all_data.benefits_other3")], na.rm = TRUE) > 0, ] # # write_xlsx(benefits_other, "./pilot/qualitative_analysis/all/benefits_other.xlsx") # # After coding has been done, file gets read again # benefits_other_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/benefits_other_coded.xlsx") # # # Reverse rankings to plot them # benefits_other_coded$... <- abs(benefits_other_coded$... - 6) # benefits_other_coded$... <- abs(benefits_other_coded$... - 6) # benefits_other_coded$... <- abs(benefits_other_coded$... - 6) # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # benefits_reversed_labels <- c("...", # "...", # "...") # # # Create sum scores # # TODO: add column names of coded topics # benefits_reversed_sums <- c(sum(benefits_other_coded$...), # sum(benefits_other_coded$...), # sum(benefits_other_coded$...)) # # # Combine and draw plot # # TODO: add labels of coded topics # benefits_other_dataframe <- data.frame(benefits_reversed_labels, benefits_reversed_sums) # benefits_combined <- rbind(benefits_reversed_dataframe, benefits_other_dataframe) # # # benefits_combined$benefits_other_labels <- factor(benefits_combined$benefits_reversed_labels, # levels = c(...), # labels = c(...)) # # ggplot(benefits_combined, aes(x=benefits_reversed_labels,y=benefits_reversed_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Benefits of preregistration indicated by sample (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # O-11: Drawbacks (Ranking) # Average ranks (but they do not consider how often an option was ranked) drawbacks_data <- all_data %>% select(drawbacks_scooping:drawbacks_reward) drawbacks_data_tall <- drawbacks_data %>% gather(key = Feature, value = Rank, drawbacks_scooping:drawbacks_reward, na.rm = TRUE) drawbacks_stats <- drawbacks_data_tall %>% group_by(Feature) %>% summarize(Avg = mean(Rank)) drawbacks_stats # Reverse codings for plotting which ranks were often named AND ranked higher drawbacks_reversed <- drawbacks_data drawbacks_reversed$drawbacks_scooping <- abs(drawbacks_reversed$drawbacks_scooping - 6) drawbacks_reversed$drawbacks_time <- abs(drawbacks_reversed$drawbacks_time - 6) drawbacks_reversed$drawbacks_effort <- abs(drawbacks_reversed$drawbacks_effort - 6) drawbacks_reversed$drawbacks_exploratory <- abs(drawbacks_reversed$drawbacks_exploratory - 6) drawbacks_reversed$drawbacks_useless <- abs(drawbacks_reversed$drawbacks_useless - 6) drawbacks_reversed$drawbacks_exploited <- abs(drawbacks_reversed$drawbacks_exploited - 6) drawbacks_reversed$drawbacks_decreases_progress <- abs(drawbacks_reversed$drawbacks_decreases_progress - 6) drawbacks_reversed$drawbacks_flexibility <- abs(drawbacks_reversed$drawbacks_flexibility - 6) drawbacks_reversed$drawbacks_reward <- abs(drawbacks_reversed$drawbacks_reward - 6) ## Creating sum scores and label names drawbacks_reversed_sums <- c(sum(drawbacks_reversed$drawbacks_scooping, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_time, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_effort, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_exploratory, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_useless, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_exploited, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_decreases_progress, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_flexibility, na.rm = TRUE), sum(drawbacks_reversed$drawbacks_reward, na.rm = TRUE)) drawbacks_reversed_labels <- c("Risk of scooping (i.e., someone taking my idea and publishing it before me)", "It takes time", "It means extra effort", "It hinders exploratory research", "It is useless", "It can be exploited (e.g., by uploading multiple preregistrations)", "It decreases scientific progress", "It decreases flexibility in study administration and analysis", "It is not sufficiently rewarded") # Combine and draw plot drawbacks_reversed_dataframe <- data.frame(drawbacks_reversed_labels, drawbacks_reversed_sums) drawbacks_reversed_dataframe$drawbacks_reversed_labels <- factor(drawbacks_reversed_dataframe$drawbacks_reversed_labels, levels = c("Risk of scooping (i.e., someone taking my idea and publishing it before me)", "It takes time", "It means extra effort", "It hinders exploratory research", "It is useless", "It can be exploited (e.g., by uploading multiple preregistrations)", "It decreases scientific progress", "It decreases flexibility in study administration and analysis", "It is not sufficiently rewarded"), labels = c("Risk of scooping (i.e., someone taking my idea and publishing it before me)", "It takes time", "It means extra effort", "It hinders exploratory research", "It is useless", "It can be exploited (e.g., by uploading multiple preregistrations)", "It decreases scientific progress", "It decreases flexibility in study administration and analysis", "It is not sufficiently rewarded")) ggplot(drawbacks_reversed_dataframe, aes(x=drawbacks_reversed_labels,y=drawbacks_reversed_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Drawbacks associated with preregistration")+ylab("Frequency of indication") # This plots shows a combination of which options were ranked more often AND the position they were ranked # Values on x-achsis thus cannot be interpreted directly # Analysis of "other" comments # Are there comments? table(all_data$drawbacks_other1) table(all_data$drawbacks_other2) table(all_data$drawbacks_other3) # Counting common (often used) words # Subset only of relevant variables # all_data_drawbacks <- all_data[ , c("id", "drawbacks_other1_comment", "drawbacks_other2_comment", "drawbacks_other3_comment", "PR_before")] # all_data_drawbacks_long <- gather(all_data_drawbacks, option, comment, drawbacks_other1_comment:drawbacks_other3_comment) # # Present each word in a seperate line # all_data_drawbacks_tidy <- all_data_drawbacks_long %>% # unnest_tokens(word, comment) # # # Remove stop words # data(stop_words) # all_data_drawbacks_tidy <- all_data_drawbacks_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_drawbacks_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_drawbacks_tidy[all_data_drawbacks_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_drawbacks_tidy[all_data_drawbacks_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Not applicable for pilot (only small number of responses) # Coding of the answers to get their meaning # Code open text input # TODO: check if all comment options were used (if this is not the case, the variable is not included in the dataset and must be deleted from this function) drawbacks_other <- data.frame(all_data$id, all_data$drawbacks_other1, all_data$drawbacks_other1_comment, all_data$drawbacks_other2, all_data$drawbacks_other2_comment, all_data$drawbacks_other3, all_data$drawbacks_other3_comment) drawbacks_other$all_data.drawbacks_other1_comment <- as.character(drawbacks_other$all_data.drawbacks_other1_comment) drawbacks_other$all_data.drawbacks_other2_comment <- as.character(drawbacks_other$all_data.drawbacks_other2_comment) drawbacks_other$all_data.drawbacks_other3_comment <- as.character(drawbacks_other$all_data.drawbacks_other3_comment) drawbacks_other <- drawbacks_other[drawbacks_other$all_data.drawbacks_other1_comment != 0 & drawbacks_other$all_data.drawbacks_other2_comment != 0 & drawbacks_other$all_data.drawbacks_other3_comment != 0, ] drawbacks_other <- drawbacks_other[rowSums(drawbacks_other[ , c("all_data.drawbacks_other1", "all_data.drawbacks_other2", "all_data.drawbacks_other3")], na.rm = TRUE) > 0, ] write_xlsx(drawbacks_other, "./pilot/qualitative_analysis/all/drawbacks_other.xlsx") # After coding has been done, file gets read again drawbacks_other_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/drawbacks_other_coded.xlsx") # Reverse rankings to plot them drawbacks_other_coded$errors_at_first <- abs(drawbacks_other_coded$errors_at_first - 6) drawbacks_other_coded$registered_reports <- abs(drawbacks_other_coded$registered_reports - 6) drawbacks_other_coded$does_not_improve_theory <- abs(drawbacks_other_coded$does_not_improve_theory - 6) # Plotting of common topics # Create labels # TODO: add labels of coded topics drawbacks_reversed_labels <- c("One is bound to make errors at first", "Registered Reports are better", "Does not improve theory") # Create sum scores # TODO: add column names of coded topics drawbacks_reversed_sums <- c(sum(drawbacks_other_coded$errors_at_first, na.rm = TRUE), sum(drawbacks_other_coded$registered_reports, na.rm = TRUE), sum(drawbacks_other_coded$does_not_improve_theory, na.rm = TRUE)) # Combine and draw plot # TODO: add labels of coded topics drawbacks_other_dataframe <- data.frame(drawbacks_reversed_labels, drawbacks_reversed_sums) drawbacks_combined <- rbind(drawbacks_reversed_dataframe, drawbacks_other_dataframe) drawbacks_combined$drawbacks_other_labels <- factor(drawbacks_combined$drawbacks_reversed_labels, levels = c("Risk of scooping (i.e., someone taking my idea and publishing it before me)", "It takes time", "It means extra effort", "It hinders exploratory research", "It is useless", "It can be exploited (e.g., by uploading multiple preregistrations)", "It decreases scientific progress", "It decreases flexibility in study administration and analysis", "It is not sufficiently rewarded", "One is bound to make errors at first", "Registered Reports are better", "Does not improve theory"), labels = c("Risk of scooping (i.e., someone taking my idea and publishing it before me)", "It takes time", "It means extra effort", "It hinders exploratory research", "It is useless", "It can be exploited (e.g., by uploading multiple preregistrations)", "It decreases scientific progress", "It decreases flexibility in study administration and analysis", "It is not sufficiently rewarded", "One is bound to make errors at first", "Registered Reports are better", "Does not improve theory")) ggplot(drawbacks_combined, aes(x=drawbacks_reversed_labels,y=drawbacks_reversed_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Drawbacks of preregistration indicated by sample (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # O-12a: Problems ## Creating sum scores and label names problems_sums <- c(sum(all_data$problems_insecurity , na.rm = TRUE), sum(all_data$problems_conflict , na.rm = TRUE), sum(all_data$problems_changes , na.rm = TRUE), sum(all_data$problems_errors , na.rm = TRUE), sum(all_data$problems_credibility , na.rm = TRUE), sum(all_data$problems_flexibility , na.rm = TRUE), sum(all_data$problems_scooping , na.rm = TRUE), sum(all_data$problems_time , na.rm = TRUE), sum(all_data$problems_none, na.rm = TRUE), sum(all_data$problems_other , na.rm = TRUE)) problems_labels <- c("I was insecure about what needs to be included in the preregistration", "Conflict with supervisor/co-author", "Study design would have needed to be changed because details did not work, but this was not possible", "Errors in the preregistration could not be changed afterwards", "Deviations were necessary and my study lost credibility", "I found it problematic to not have flexibility during my analyses", "I got scooped (i.e., someone took my idea and published it before me)", "It took very long to do the preregistration", "None", "Other") # Combine and draw plot problems_dataframe <- data.frame(problems_labels, problems_sums) problems_dataframe$problems_labels <- factor(problems_dataframe$problems_labels, levels = c("I was insecure about what needs to be included in the preregistration", "Conflict with supervisor/co-author", "Study design would have needed to be changed because details did not work, but this was not possible", "Errors in the preregistration could not be changed afterwards", "Deviations were necessary and my study lost credibility", "I found it problematic to not have flexibility during my analyses", "I got scooped (i.e., someone took my idea and published it before me)", "It took very long to do the preregistration", "None", "Other"), labels = c("I was insecure about what needs to be included in the preregistration", "Conflict with supervisor/co-author", "Study design would have needed to be changed because details did not work, but this was not possible", "Errors in the preregistration could not be changed afterwards", "Deviations were necessary and my study lost credibility", "I found it problematic to not have flexibility during my analyses", "I got scooped (i.e., someone took my idea and published it before me)", "It took very long to do the preregistration", "None", "Other")) ggplot(problems_dataframe, aes(x=problems_labels,y=problems_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Problems encountered while preregistering")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$problems_other) # Counting common (often used) words # Subset only of relevant variables # all_data_problems <- all_data[ , c("id", "problems_other_comment", "PR_before")] # # # Present each word in a seperate line # all_data_problems_tidy <- all_data_problems %>% # unnest_tokens(word, problems_other_comment) # # # Remove stop words # data(stop_words) # all_data_problems_tidy <- all_data_problems_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_problems_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_problems_tidy[all_data_problems_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_problems_tidy[all_data_problems_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # Not applicable for pilot (only small number of responses) # Coding of the answers to get their meaning # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) problems <- data.frame(all_data$id, all_data$problems_other_comment) problems$all_data.problems_other_comment <- as.character(problems$all_data.problems_other_comment) problems <- problems[problems$all_data.problems_other_comment > 0, ] write_xlsx(problems, "./pilot/qualitative_analysis/all/problems.xlsx") # After coding has been done, file gets read again problems_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/problems_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics problems_labels <- c("Deviations were necessary, but were reported transparently (no loss of credibility)") # Create sum scores # TODO: add column names of coded topics problems_sums <- c(sum(problems_coded$deviations_but_reported_transparently)) # Combine and draw plot # TODO: add labels of coded topics problems_dataframe_other <- data.frame(problems_labels, problems_sums) problems_combined <- rbind(problems_dataframe, problems_dataframe_other) problems_combined$problems_labels <- factor(problems_combined$topic_labels, levels = c("I was insecure about what needs to be included in the preregistration", "Conflict with supervisor/co-author", "Study design would have needed to be changed because details did not work, but this was not possible", "Errors in the preregistration could not be changed afterwards", "Deviations were necessary and my study lost credibility", "I found it problematic to not have flexibility during my analyses", "I got scooped (i.e., someone took my idea and published it before me)", "It took very long to do the preregistration", "None", "Other", "Deviations were necessary, but were reported transparently (no loss of credibility)"), labels = c("I was insecure about what needs to be included in the preregistration", "Conflict with supervisor/co-author", "Study design would have needed to be changed because details did not work, but this was not possible", "Errors in the preregistration could not be changed afterwards", "Deviations were necessary and my study lost credibility", "I found it problematic to not have flexibility during my analyses", "I got scooped (i.e., someone took my idea and published it before me)", "It took very long to do the preregistration", "None", "Other", "Deviations were necessary, but were reported transparently (no loss of credibility)")) ggplot(problems_combined, aes(x=problems_labels,y=problems_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Problems encountered while preregistering (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # O-12b: Worries ## Creating sum scores and label names worries_sums <- c(sum(all_data$worries_insecurity , na.rm = TRUE), sum(all_data$worries_conflict , na.rm = TRUE), sum(all_data$worries_changes , na.rm = TRUE), sum(all_data$worries_errors , na.rm = TRUE), sum(all_data$worries_credibility , na.rm = TRUE), sum(all_data$worries_flexibility , na.rm = TRUE), sum(all_data$worries_scooping , na.rm = TRUE), sum(all_data$worries_time , na.rm = TRUE), sum(all_data$worries_none, na.rm = TRUE), sum(all_data$worries_other , na.rm = TRUE)) worries_labels <- c("I would be insecure about what needs to be included in the preregistration", "My supervisor/co-author(s) would object", "Maybe the study design would need to be changed because details do not work, but this would not be possible", "Errors in the preregistration cannot be changed afterwards", "If deviations were necessary, my study would loose credibility", "Low flexibility", "Scooping (i.e., someone taking my idea and publishing it before me)", "High time costs", "None", "Other") # Combine and draw plot worries_dataframe <- data.frame(worries_labels, worries_sums) worries_dataframe$worries_labels <- factor(worries_dataframe$worries_labels, levels = c("I would be insecure about what needs to be included in the preregistration", "My supervisor/co-author(s) would object", "Maybe the study design would need to be changed because details do not work, but this would not be possible", "Errors in the preregistration cannot be changed afterwards", "If deviations were necessary, my study would loose credibility", "Low flexibility", "Scooping (i.e., someone taking my idea and publishing it before me)", "High time costs", "None", "Other"), labels = c("I would be insecure about what needs to be included in the preregistration", "My supervisor/co-author(s) would object", "Maybe the study design would need to be changed because details do not work, but this would not be possible", "Errors in the preregistration cannot be changed afterwards", "If deviations were necessary, my study would loose credibility", "Low flexibility", "Scooping (i.e., someone taking my idea and publishing it before me)", "High time costs", "None", "Other")) ggplot(worries_dataframe, aes(x=worries_labels,y=worries_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Worries associated with preregistration")+ylab("Frequency of indication") # Analysis of "other" comments # Are there comments? table(all_data$worries_other_comment) # Counting common (often used) words # Subset only of relevant variables # all_data_worries <- all_data[ , c("id", "worries_other_comment", "PR_before")] # # # Present each word in a seperate line # all_data_worries_tidy <- all_data_worries %>% # unnest_tokens(word, worries_other_comment) # # # Remove stop words # data(stop_words) # all_data_worries_tidy <- all_data_worries_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_worries_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_worries_tidy[all_data_worries_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_worries_tidy[all_data_worries_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # Not applicable for pilot (only small number of responses) # Coding of the answers to get their meaning # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) # Not applicable for pilot (no other comments) # worries <- data.frame(all_data$id, all_data$worries_other_comment) # worries$all_data.worries_other_comment <- as.character(learned$all_data.worries_other_comment) # worries <- worries[worries$all_data.worries_other_comment > 0, ] # write_xlsx(worries, "./pilot/qualitative_analysis/all/worries.xlsx") # # After coding has been done, file gets read again # worries_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/worries_coded.xlsx") # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # worries_labels <- c("...") # # # Create sum scores # # TODO: add column names of coded topics # worries_sums <- c(sum(...), # sum(...), # sum(...)) # # # Combine and draw plot # # TODO: add labels of coded topics # worries_dataframe_other <- data.frame(worries_labels, worries_sums) # worries_combined <- rbind(worries_dataframe, worries_dataframe_other) # # worries_combined$worries_labels <- factor(worries_combined$topic_labels, # levels = c("..."), # labels = c("...")) # # ggplot(worries_combined, aes(x=worries_labels,y=worries_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Worries associated with preregistration (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # O-13: Reasons against (Ranking) # Average ranks (but they do not consider how often an option was ranked) reasons_against_data <- all_data %>% select(reasons_against_time:reasons_against_disadvantages) reasons_against_data_tall <- reasons_against_data %>% gather(key = Feature, value = Rank, reasons_against_time:reasons_against_disadvantages, na.rm = TRUE) reasons_against_stats <- reasons_against_data_tall %>% group_by(Feature) %>% summarize(Avg = mean(Rank)) reasons_against_stats # Reverse codings for plotting which ranks were often named AND ranked higher reasons_against_reversed <- reasons_against_data reasons_against_reversed$reasons_against_time <- abs(reasons_against_reversed$reasons_against_time - 6) reasons_against_reversed$reasons_against_flexibility <- abs(reasons_against_reversed$reasons_against_flexibility - 6) reasons_against_reversed$reasons_against_never_did <- abs(reasons_against_reversed$reasons_against_never_did - 6) reasons_against_reversed$reasons_against_neverthought <- abs(reasons_against_reversed$reasons_against_neverthought - 6) reasons_against_reversed$reasons_against_submission <- abs(reasons_against_reversed$reasons_against_submission - 6) reasons_against_reversed$reasons_against_noone_reads <- abs(reasons_against_reversed$reasons_against_noone_reads - 6) reasons_against_reversed$reasons_against_not_useful <- abs(reasons_against_reversed$reasons_against_not_useful - 6) reasons_against_reversed$reasons_against_disadvantages <- abs(reasons_against_reversed$reasons_against_disadvantages - 6) ## Creating sum scores and label names reasons_against_reversed_sums <- c(sum(reasons_against_reversed$reasons_against_time, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_flexibility, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_never_did, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_neverthought, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_submission, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_noone_reads, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_not_useful, na.rm = TRUE), sum(reasons_against_reversed$reasons_against_disadvantages, na.rm = TRUE)) reasons_against_reversed_labels <- c("It saves time and effort", "I am more flexible", "I never had to preregister before", "I have never thought about preregistering my studies", "I don't know where I can submit my preregistration", "No one will read it anyway", "It is not useful", "It has only disadvantages for me") # Combine and draw plot reasons_against_reversed_dataframe <- data.frame(reasons_against_reversed_labels, reasons_against_reversed_sums) reasons_against_reversed_dataframe$reasons_against_reversed_labels <- factor(reasons_against_reversed_dataframe$reasons_against_reversed_labels, levels = c("It saves time and effort", "I am more flexible", "I never had to preregister before", "I have never thought about preregistering my studies", "I don't know where I can submit my preregistration", "No one will read it anyway", "It is not useful", "It has only disadvantages for me"), labels = c("It saves time and effort", "I am more flexible", "I never had to preregister before", "I have never thought about preregistering my studies", "I don't know where I can submit my preregistration", "No one will read it anyway", "It is not useful", "It has only disadvantages for me")) ggplot(reasons_against_reversed_dataframe, aes(x=reasons_against_reversed_labels,y=reasons_against_reversed_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Reasons against preregistration")+ylab("Frequency of indication") # This plots shows a combination of which options were ranked more often AND the position they were ranked # Values on x-achsis thus cannot be interpreted directly # Analysis of "other" comments # Are there comments? table(all_data$reasons_against_other1) table(all_data$reasons_against_other2) table(all_data$reasons_against_other3) # Not applicable for pilot (no other comments) # Counting common (often used) words # Subset only of relevant variables # all_data_reasons_against <- all_data[ , c("id", "reasons_against_other1_comment", "reasons_against_other2_comment", "reasons_against_other3_comment", "PR_before")] # all_data_reasons_against_long <- gather(all_data_reasons_against, option, comment, reasons_against_other1_comment:reasons_against_other3_comment) # # # Present each word in a seperate line # all_data_reasons_against_tidy <- all_data_reasons_against_long %>% # unnest_tokens(word, comment) # # # Remove stop words # data(stop_words) # all_data_reasons_against_tidy <- all_data_reasons_against_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_reasons_against_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_reasons_against_tidy[all_data_reasons_against_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_reasons_against_tidy[all_data_reasons_against_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # Not applicable for pilot (only small number of responses) # # Coding of the answers to get their meaning # # Code open text input # # TODO: check if all comment options were used (if this is not the case, the variable is not included in the dataset and must be deleted from this function) # reasons_against_other <- data.frame(all_data$id, all_data$reasons_against_other1, all_data$reasons_against_other1_comment, all_data$reasons_against_other2, all_data$reasons_against_other2_comment, all_data$reasons_against_other3, all_data$reasons_against_other3_comment) # reasons_against_other$all_data.reasons_against_other1_comment <- as.character(reasons_against_other$all_data.reasons_against_other1_comment) # reasons_against_other$all_data.reasons_against_other2_comment <- as.character(reasons_against_other$all_data.reasons_against_other2_comment) # reasons_against_other$all_data.reasons_against_other3_comment <- as.character(reasons_against_other$all_data.reasons_against_other3_comment) # reasons_against_other <- reasons_against_other[reasons_against_other$all_data.reasons_against_other1_comment != 0 & reasons_against_other$all_data.reasons_against_other2_comment != 0 & reasons_against_other$all_data.reasons_against_other3_comment != 0, ] # # reasons_against_other <- reasons_against_other[rowSums(reasons_against_other[ , c("all_data.reasons_against_other1", "all_data.reasons_against_other2", "all_data.reasons_against_other3")], na.rm = TRUE) > 0, ] # # write_xlsx(reasons_against_other, "./pilot/qualitative_analysis/all/reasons_against_other.xlsx") # # After coding has been done, file gets read again # reasons_against_other_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/reasons_against_other_coded.xlsx") # # # Reverse rankings to plot them # reasons_against_other_coded$... <- abs(reasons_against_other_coded$... - 6) # reasons_against_other_coded$... <- abs(reasons_against_other_coded$... - 6) # reasons_against_other_coded$... <- abs(reasons_against_other_coded$... - 6) # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # reasons_against_reversed_labels <- c(...) # # # Create sum scores # # TODO: add column names of coded topics # reasons_against_reversed_sums <- c(sum(reasons_against_other_coded$...), # sum(reasons_against_other_coded$...), # sum(reasons_against_other_coded$...)) # # # Combine and draw plot # # TODO: add labels of coded topics # reasons_against_other_dataframe <- data.frame(reasons_against_reversed_labels, reasons_against_reversed_sums) # reasons_against_combined <- rbind(reasons_against_reversed_dataframe, reasons_against_other_dataframe) # # # reasons_against_combined$reasons_against_other_labels <- factor(reasons_against_combined$reasons_against_reversed_labels, # levels = c(...), # labels = c(...)) # # ggplot(reasons_against_combined, aes(x=reasons_against_reversed_labels,y=reasons_against_reversed_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Reasons against preregistration indicated by sample (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################ DESCRIPTIVE ANALYSIS OF SU-QUESTIONS ############################## # SU-1: Template format # Percentage of participants that indicated each category # 1 = open template, 2 = restricted template, 3 = other, 4 = does not use templates table(all_data$template_format) / length(all_data$id) * 100 # Analysis of "other" comments # Are there comments? table(all_data$template_format_other_comment) # Not applicable for pilot (no other comments) # Code open text input # # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) # template_format <- data.frame(all_data$id, all_data$template_format_other_comment) # template_format$all_data.template_format_other_comment <- as.character(template_format$all_data.template_format_other_comment) # template_format <- template_format[template_format$all_data.template_format_other_comment > 0, ] # write_xlsx(template_format, "./pilot/qualitative_analysis/all/template_format.xlsx") # # After coding has been done, file gets read again # template_format_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/template_format_coded.xlsx") # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # template_format_labels <- c("...") # # # Create sum scores # # TODO: add column names of coded topics # template_format_sums <- c(sum(...), # sum(...), # sum(...)) # # # Combine and draw plot # # TODO: add labels of coded topics # template_format_dataframe_other <- data.frame(template_format_labels, template_format_sums) # # template_format_other$template_format_labels <- factor(template_format_combined$topic_labels, # levels = c("..."), # labels = c("...")) # # ggplot(template_format_other, aes(x=template_format_labels,y=template_format_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Other comments on the template format")+ylab("Coded themes indicated by sample") ############################################################################################## # SU-2: Template process # Percentage of participants that indicated each category # 1 = automated process, 2 = self-administered process, 3 = other table(all_data$template_process) / length(all_data$id) * 100 # Analysis of "other" comments # Are there comments? table(all_data$template_process_other_comment) # Not applicable for pilot (no other comments) # Code open text input # TODO: check if there are any comments, otherwise this does not function (because the variable is not created by soscisurvey) # template_process <- data.frame(all_data$id, all_data$template_process_other_comment ) # template_process$all_data.template_process <- as.character(template_process$all_data.template_process_other_comment) # template_process <- template_process[template_process$all_data.template_process > 0, ] # write_xlsx(template_process, "./pilot/qualitative_analysis/all/template_process.xlsx") # # After coding has been done, file gets read again # template_process_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/template_process_coded.xlsx") # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # template_process_labels <- c("...") # # # Create sum scores # # TODO: add column names of coded topics # template_process_sums <- c(sum(...), # sum(...), # sum(...)) # # # Combine and draw plot # # TODO: add labels of coded topics # template_process_dataframe_other <- data.frame(template_process_labels, template_process_sums) # # template_process_other$template_process_labels <- factor(template_process_combined$topic_labels, # levels = c("..."), # labels = c("...")) # # ggplot(template_process_other, aes(x=template_process_labels,y=template_process_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Other comments on the template process")+ylab("Coded themes indicated by sample") ############################################################################################## # SU-3: Important information for preregistration # Average ranks (but they do not consider how often an option was ranked) info_data <- all_data %>% select(info_authors:info_material) info_data_tall <- info_data %>% gather(key = Feature, value = Rank, info_authors:info_material, na.rm = TRUE) info_stats <- info_data_tall %>% group_by(Feature) %>% summarize(Avg = mean(Rank)) info_stats # Reverse codings for plotting which ranks were often named AND ranked higher info_reversed <- info_data info_reversed$info_authors <- abs(info_reversed$info_authors - 6) info_reversed$info_question <- abs(info_reversed$info_question - 6) info_reversed$info_study_type <- abs(info_reversed$info_study_type - 6) info_reversed$info_hypotheses <- abs(info_reversed$info_hypotheses - 6) info_reversed$info_variables <- abs(info_reversed$info_variables - 6) info_reversed$info_design <- abs(info_reversed$info_design - 6) info_reversed$info_sample_size <- abs(info_reversed$info_sample_size - 6) info_reversed$info_sample_rationale <- abs(info_reversed$info_sample_rationale - 6) info_reversed$info_stopping <- abs(info_reversed$info_stopping - 6) info_reversed$info_inclusion <- abs(info_reversed$info_inclusion - 6) info_reversed$info_exclusion <- abs(info_reversed$info_exclusion - 6) info_reversed$info_procedure <- abs(info_reversed$info_procedure - 6) info_reversed$info_blinding <- abs(info_reversed$info_blinding - 6) info_reversed$info_randomization <- abs(info_reversed$info_randomization - 6) info_reversed$info_models <- abs(info_reversed$info_models - 6) info_reversed$info_assumptions <- abs(info_reversed$info_assumptions - 6) info_reversed$info_transformations <- abs(info_reversed$info_transformations - 6) info_reversed$info_inference <- abs(info_reversed$info_inference - 6) info_reversed$info_multiple_tests <- abs(info_reversed$info_multiple_tests - 6) info_reversed$info_dropout <- abs(info_reversed$info_dropout - 6) info_reversed$info_missing <- abs(info_reversed$info_missing - 6) info_reversed$info_exploratory <- abs(info_reversed$info_exploratory - 6) info_reversed$info_existing_data <- abs(info_reversed$info_existing_data - 6) info_reversed$info_code <- abs(info_reversed$info_code - 6) info_reversed$info_analysis_script <- abs(info_reversed$info_analysis_script - 6) info_reversed$info_material <- abs(info_reversed$info_material - 6) ## Creating sum scores and label names info_reversed_sums <- c(sum(info_reversed$info_authors, na.rm = TRUE), sum(info_reversed$info_question, na.rm = TRUE), sum(info_reversed$info_study_type, na.rm = TRUE), sum(info_reversed$info_hypotheses, na.rm = TRUE), sum(info_reversed$info_variables, na.rm = TRUE), sum(info_reversed$info_design, na.rm = TRUE), sum(info_reversed$info_sample_size, na.rm = TRUE), sum(info_reversed$info_sample_rationale, na.rm = TRUE), sum(info_reversed$info_stopping, na.rm = TRUE), sum(info_reversed$info_inclusion, na.rm = TRUE), sum(info_reversed$info_exclusion, na.rm = TRUE), sum(info_reversed$info_procedure, na.rm = TRUE), sum(info_reversed$info_blinding, na.rm = TRUE), sum(info_reversed$info_randomization, na.rm = TRUE), sum(info_reversed$info_models, na.rm = TRUE), sum(info_reversed$info_assumptions, na.rm = TRUE), sum(info_reversed$info_transformations, na.rm = TRUE), sum(info_reversed$info_inference, na.rm = TRUE), sum(info_reversed$info_multiple_tests, na.rm = TRUE), sum(info_reversed$info_dropout, na.rm = TRUE), sum(info_reversed$info_missing, na.rm = TRUE), sum(info_reversed$info_exploratory, na.rm = TRUE), sum(info_reversed$info_existing_data, na.rm = TRUE), sum(info_reversed$info_code, na.rm = TRUE), sum(info_reversed$info_analysis_script, na.rm = TRUE), sum(info_reversed$info_material, na.rm = TRUE)) info_reversed_labels <- c("Authors", "Research question", "Study type (experimental, correlational, ...)", "Hypotheses", "Variables/conditions", "Design", "Planned sample size", "Sample size rationale", "Stopping rule", "Inclusion criteria", "Exclusion criteria", "Procedure", "Blinding", "Randomization", "Statistical models", "Assumptions of analyses (and alternative plans if violated)", "Anticipated data transformations", "Inference criteria", "Correction for multiple tests", "Handling of drop-outs", "Handling of missing data", "Ideas for exploratory analyses", "Existing data", "Experimental code", "Analysis script", "Material (e.g., used images)") # Combine and draw plot info_reversed_dataframe <- data.frame(info_reversed_labels, info_reversed_sums) info_reversed_dataframe$info_reversed_labels <- factor(info_reversed_dataframe$info_reversed_labels, levels = c("Authors", "Research question", "Study type (experimental, correlational, ...)", "Hypotheses", "Variables/conditions", "Design", "Planned sample size", "Sample size rationale", "Stopping rule", "Inclusion criteria", "Exclusion criteria", "Procedure", "Blinding", "Randomization", "Statistical models", "Assumptions of analyses (and alternative plans if violated)", "Anticipated data transformations", "Inference criteria", "Correction for multiple tests", "Handling of drop-outs", "Handling of missing data", "Ideas for exploratory analyses", "Existing data", "Experimental code", "Analysis script", "Material (e.g., used images)"), labels = c("Authors", "Research question", "Study type (experimental, correlational, ...)", "Hypotheses", "Variables/conditions", "Design", "Planned sample size", "Sample size rationale", "Stopping rule", "Inclusion criteria", "Exclusion criteria", "Procedure", "Blinding", "Randomization", "Statistical models", "Assumptions of analyses (and alternative plans if violated)", "Anticipated data transformations", "Inference criteria", "Correction for multiple tests", "Handling of drop-outs", "Handling of missing data", "Ideas for exploratory analyses", "Existing data", "Experimental code", "Analysis script", "Material (e.g., used images)")) ggplot(info_reversed_dataframe, aes(x=info_reversed_labels,y=info_reversed_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Important info to preregister")+ylab("Frequency of indication") # This plots shows a combination of which options were ranked more often AND the position they were ranked # Values on x-achsis thus cannot be interpreted directly # Analysis of "other" comments # Are there comments? table(all_data$info_other1_comment) table(all_data$info_other2_comment) table(all_data$info_other3_comment) # Not applicable for pilot (no comments) # # Counting common (often used) words # # # Subset only of relevant variables # all_data_info <- all_data[ , c("id", "info_other1_comment", "info_other2_comment", "info_other3_comment", "PR_before")] # all_data_info_long <- gather(all_data_info, option, comment, info_other1_comment:info_other3_comment) # # # Present each word in a seperate line # all_data_info_tidy <- all_data_info_long %>% # unnest_tokens(word, comment) # # # Remove stop words # data(stop_words) # all_data_info_tidy <- all_data_info_tidy %>% # anti_join(stop_words) # # # Visualisation of words that were used more than x times # all_data_info_tidy %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # # Comparison of participants that have preregistered before vs. have not preregistered before # all_data_info_tidy[all_data_info_tidy$PR_before == "yes", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # all_data_info_tidy[all_data_info_tidy$PR_before == "no", ] %>% # count(word, sort = TRUE) %>% # filter(n > x) %>% # TODO: insert an appropiate number to count which words were used very often # ggplot(aes(word, n)) + # geom_col() + # coord_flip() # # Coding of the answers to get their meaning # # Code open text input # # TODO: check if all comment options were used (if this is not the case, the variable is not included in the dataset and must be deleted from this function) # info_other <- data.frame(all_data$id, all_data$info_other1, all_data$info_other1_comment, all_data$info_other2, all_data$info_other2_comment, all_data$info_other3, all_data$info_other3_comment) # info_other$all_data.info_other1_comment <- as.character(info_other$all_data.info_other1_comment) # info_other$all_data.info_other2_comment <- as.character(info_other$all_data.info_other2_comment) # info_other$all_data.info_other3_comment <- as.character(info_other$all_data.info_other3_comment) # info_other <- info_other[info_other$all_data.info_other1_comment != 0 & info_other$all_data.info_other2_comment != 0 & info_other$all_data.info_other3_comment != 0, ] # # info_other <- info_other[rowSums(info_other[ , c("all_data.info_other1", "all_data.info_other2", "all_data.info_other3")], na.rm = TRUE) > 0, ] # # write_xlsx(info_other, "./pilot/qualitative_analysis/all/info_other.xlsx") # # After coding has been done, file gets read again # info_other_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/info_other_coded.xlsx") # # # Reverse rankings to plot them # info_other_coded$... <- abs(info_other_coded$... - 6) # info_other_coded$... <- abs(info_other_coded$... - 6) # info_other_coded$... <- abs(info_other_coded$... - 6) # # # Plotting of common topics # # Create labels # # TODO: add labels of coded topics # info_reversed_labels <- c(...) # # # Create sum scores # # TODO: add column names of coded topics # info_reversed_sums <- c(sum(info_other_coded$...), # sum(info_other_coded$...), # sum(info_other_coded$...)) # # # Combine and draw plot # # TODO: add labels of coded topics # info_other_dataframe <- data.frame(info_reversed_labels, info_reversed_sums) # info_combined <- rbind(info_reversed_dataframe, info_other_dataframe) # # # info_combined$info_other_labels <- factor(info_combined$info_reversed_labels, # levels = c(...), # labels = c(...)) # # ggplot(info_combined, aes(x=info_reversed_labels,y=info_reversed_sums))+ # geom_bar(stat="identity",position="dodge")+ # coord_flip()+ # xlab("Most important information to preregister indicated by sample (with \"other\" commments)")+ylab("Coded themes indicated by sample") ############################################################################################## # SU-4: Increase motivation # Code open text input increase_motivation <- data.frame(all_data$id, all_data$increase_motivation) increase_motivation$all_data.increase_motivation <- as.character(increase_motivation$all_data.increase_motivation) increase_motivation <- increase_motivation[increase_motivation$all_data.increase_motivation > 0, ] write_xlsx(increase_motivation, "./pilot/qualitative_analysis/all/increase_motivation.xlsx") # After coding has been done, file gets read again increase_motivation_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/increase_motivation_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics increase_motivation_labels <- c("Badges", "Rewards at conferences", "Reassure that exploratory analyses are possible", "Advantage for publication", "Requirement for publication", "Criteria for jobs", "More education", "More acknowledgement") # Create sum scores # TODO: add column names of coded topics increase_motivation_sums <- c(sum(increase_motivation_coded$badges), sum(increase_motivation_coded$rewards_at_conferences), sum(increase_motivation_coded$reassuring_exploratory_is_possible), sum(increase_motivation_coded$advantage_publication), sum(increase_motivation_coded$requirement_publication), sum(increase_motivation_coded$criteria_jobs), sum(increase_motivation_coded$education), sum(increase_motivation_coded$acknowledgement)) # Combine and draw plot # TODO: add labels of coded topics increase_motivation_dataframe <- data.frame(increase_motivation_labels, increase_motivation_sums) increase_motivation_dataframe$increase_motivation_labels <- factor(increase_motivation_dataframe$increase_motivation_labels, levels = c("Badges", "Rewards at conferences", "Reassure that exploratory analyses are possible", "Advantage for publication", "Requirement for publication", "Criteria for jobs", "More education", "More acknowledgement"), labels = c("Badges", "Rewards at conferences", "Reassure that exploratory analyses are possible", "Advantage for publication", "Requirement for publication", "Criteria for jobs", "More education", "More acknowledgement")) ggplot(increase_motivation_dataframe, aes(x=increase_motivation_labels,y=increase_motivation_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions to improve the motivation to preregister")+ylab("Coded themes indicated by sample") ############################################################################################## # SU-5: Decrease obstacles # Code open text input decrease_obstacles <- data.frame(all_data$id, all_data$decrease_obstacles) decrease_obstacles$all_data.decrease_obstacles <- as.character(decrease_obstacles$all_data.decrease_obstacles) decrease_obstacles <- decrease_obstacles[decrease_obstacles$all_data.decrease_obstacles > 0, ] write_xlsx(decrease_obstacles, "./pilot/qualitative_analysis/all/decrease_obstacles.xlsx") # After coding has been done, file gets read again decrease_obstacles_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/decrease_obstacles_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics decrease_obstacles_labels <- c("Keep talking about preregistration", "Highlight benefits", "Restricted possibility for modification (with timestamp)", "Communicate clearer that deviations are acceptable (if disclosed)", "Set it as the norm", "Make forms easier") # Create sum scores # TODO: add column names of coded topics decrease_obstacles_sums <- c(sum(decrease_obstacles_coded$keep_talking_about_it), sum(decrease_obstacles_coded$highlight_benefits), sum(decrease_obstacles_coded$modification), sum(decrease_obstacles_coded$deviations_possible), sum(decrease_obstacles_coded$norm), sum(decrease_obstacles_coded$forms_easier)) # Combine and draw plot # TODO: add labels of coded topics decrease_obstacles_dataframe <- data.frame(decrease_obstacles_labels, decrease_obstacles_sums) decrease_obstacles_dataframe$decrease_obstacles_labels <- factor(decrease_obstacles_dataframe$decrease_obstacles_labels, levels = c("Keep talking about preregistration", "Highlight benefits", "Restricted possibility for modification (with timestamp)", "Communicate clearer that deviations are acceptable (if disclosed)", "Set it as the norm", "Make forms easier"), labels = c("Keep talking about preregistration", "Highlight benefits", "Restricted possibility for modification (with timestamp)", "Communicate clearer that deviations are acceptable (if disclosed)", "Set it as the norm", "Make forms easier")) ggplot(decrease_obstacles_dataframe, aes(x=decrease_obstacles_labels,y=decrease_obstacles_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions to improve the motivation to preregister")+ylab("Coded themes indicated by sample") ############################################################################################## # SU-6: Improvements # Improvements regarding templates # Code open text input improvements_template <- data.frame(all_data$id, all_data$improvements_template) improvements_template$all_data.improvements_template <- as.character(improvements_template$all_data.improvements_template) improvements_template <- improvements_template[improvements_template$all_data.improvements_template > 0, ] write_xlsx(improvements_template, "./pilot/qualitative_analysis/all/improvements_template.xlsx") # After coding has been done, file gets read again improvements_template_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/improvements_template_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics improvements_template_labels <- c("Gap text, which you fill in with your own details", "Only include relevant questions", "Fork preregistrations to make changes", "More room for analyses that were difficult to foresee (e.g., for junior scientists)", "Improve editing by multiple collaborators") # Create sum scores # TODO: add column names of coded topics improvements_template_sums <- c(sum(improvements_template_coded$gap_text), sum(improvements_template_coded$shorter), sum(improvements_template_coded$fork), sum(improvements_template_coded$more_room), sum(improvements_template_coded$improve_edits_by_collaborators)) # Combine and draw plot # TODO: add labels of coded topics improvements_template_dataframe <- data.frame(improvements_template_labels, improvements_template_sums) improvements_template_dataframe$improvements_template_labels <- factor(improvements_template_dataframe$improvements_template_labels, levels = c("Gap text, which you fill in with your own details", "Only include relevant questions", "Fork preregistrations to make changes", "More room for analyses that were difficult to foresee (e.g., for junior scientists)", "Improve editing by multiple collaborators"), labels = c("Gap text, which you fill in with your own details", "Only include relevant questions", "Fork preregistrations to make changes", "More room for analyses that were difficult to foresee (e.g., for junior scientists)", "Improve editing by multiple collaborators")) ggplot(improvements_template_dataframe, aes(x=improvements_template_labels,y=improvements_template_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions for improving preregistration templates")+ylab("Coded themes indicated by sample") # Improvements regarding repositories and publication # Code open text input improvements_repositories_publication <- data.frame(all_data$id, all_data$improvements_repositories_publication) improvements_repositories_publication$all_data.improvements_repositories_publication <- as.character(improvements_repositories_publication$all_data.improvements_repositories_publication) improvements_repositories_publication <- improvements_repositories_publication[improvements_repositories_publication$all_data.improvements_repositories_publication > 0, ] write_xlsx(improvements_repositories_publication, "./pilot/qualitative_analysis/all/improvements_repositories_publication.xlsx") # After coding has been done, file gets read again improvements_repositories_publication_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/improvements_repositories_publication_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics improvements_repositories_publication_labels <- c("More guidance on preregistering different studies (e.g., qualitative, mixed-methods)") # Create sum scores # TODO: add column names of coded topics improvements_repositories_publication_sums <- c(sum(improvements_repositories_publication_coded$more_guidance)) # Combine and draw plot # TODO: add labels of coded topics improvements_repositories_publication_dataframe <- data.frame(improvements_repositories_publication_labels, improvements_repositories_publication_sums) improvements_repositories_publication_dataframe$improvements_repositories_publication_labels <- factor(improvements_repositories_publication_dataframe$improvements_repositories_publication_labels, levels = c("More guidance on preregistering different studies (e.g., qualitative, mixed-methods)"), labels = c("More guidance on preregistering different studies (e.g., qualitative, mixed-methods)")) ggplot(improvements_repositories_publication_dataframe, aes(x=improvements_repositories_publication_labels,y=improvements_repositories_publication_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions for improving repositories and publication of preregistration")+ylab("Coded themes indicated by sample") # Improvements regarding review process # Code open text input improvements_review <- data.frame(all_data$id, all_data$improvements_review) improvements_review$all_data.improvements_review <- as.character(improvements_review$all_data.improvements_review) improvements_review <- improvements_review[improvements_review$all_data.improvements_review > 0, ] write_xlsx(improvements_review, "./pilot/qualitative_analysis/all/improvements_review.xlsx") # After coding has been done, file gets read again improvements_review_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/improvements_review_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics improvements_review_labels <- c("Increase awareness in reviewers/editors", "Make it a requirement to publish") # Create sum scores # TODO: add column names of coded topics improvements_review_sums <- c(sum(improvements_review_coded$awareness_reviewers_editors), sum(improvements_review_coded$requirement_for_publication)) # Combine and draw plot # TODO: add labels of coded topics improvements_review_dataframe <- data.frame(improvements_review_labels, improvements_review_sums) improvements_review_dataframe$improvements_review_labels <- factor(improvements_review_dataframe$improvements_review_labels, levels = c("Increase awareness in reviewers/editors", "Make it a requirement to publish"), labels = c("Increase awareness in reviewers/editors", "Make it a requirement to publish")) ggplot(improvements_review_dataframe, aes(x=improvements_review_labels,y=improvements_review_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions for improving the review process for preregistration")+ylab("Coded themes indicated by sample") # Improvements regarding the integration of preregistrations in papers # Code open text input improvements_papers <- data.frame(all_data$id, all_data$improvements_papers) improvements_papers$all_data.improvements_papers <- as.character(improvements_papers$all_data.improvements_papers) improvements_papers <- improvements_papers[improvements_papers$all_data.improvements_papers > 0, ] write_xlsx(improvements_papers, "./pilot/qualitative_analysis/all/improvements_papers.xlsx") # After coding has been done, file gets read again improvements_papers_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/improvements_papers_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics improvements_papers_labels <- c("Articles should show not only that a study was preregistered, but also which aspects, and in which there were deviations") # Create sum scores # TODO: add column names of coded topics improvements_papers_sums <- c(sum(improvements_papers_coded$articles_show)) # Combine and draw plot # TODO: add labels of coded topics improvements_papers_dataframe <- data.frame(improvements_papers_labels, improvements_papers_sums) improvements_papers_dataframe$improvements_papers_labels <- factor(improvements_papers_dataframe$improvements_papers_labels, levels = c("Articles should show not only that a study was preregistered, but also which aspects, and in which there were deviations"), labels = c("Articles should show not only that a study was preregistered, but also which aspects, and in which there were deviations")) ggplot(improvements_papers_dataframe, aes(x=improvements_papers_labels,y=improvements_papers_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions for improving the integration of preregistrations in papers")+ylab("Coded themes indicated by sample") # Improvements regarding education about preregistration # Code open text input # improvements_education <- data.frame(all_data$id, all_data$improvements_education) # improvements_education$all_data.improvements_education <- as.character(improvements_education$all_data.improvements_education) # improvements_education <- improvements_education[improvements_education$all_data.improvements_education > 0, ] # write_xlsx(improvements_education, "./pilot/qualitative_analysis/all/improvements_education.xlsx") improvements_education <- data.frame(all_data$id, all_data$impovements_education) improvements_education$all_data.improvements_education <- improvements_education$all_data.impovements_education improvements_education$all_data.improvements_education <- as.character(improvements_education$all_data.improvements_education) improvements_education <- improvements_education[improvements_education$all_data.improvements_education > 0, ] write_xlsx(improvements_education, "./pilot/qualitative_analysis/all/improvements_education.xlsx") # After coding has been done, file gets read again improvements_education_coded <- readxl::read_excel("./pilot/qualitative_analysis/all/coded/improvements_education_coded.xlsx") # Plotting of common topics # Create labels # TODO: add labels of coded topics improvements_education_labels <- c("Promotion by universities", "More webinars/lectures") # Create sum scores # TODO: add column names of coded topics improvements_education_sums <- c(sum(improvements_education_coded$universities), sum(improvements_education_coded$webinars_lectures)) # Combine and draw plot # TODO: add labels of coded topics improvements_education_dataframe <- data.frame(improvements_education_labels, improvements_education_sums) improvements_education_dataframe$improvements_education_labels <- factor(improvements_education_dataframe$improvements_education_labels, levels = c("Promotion by universities", "More webinars/lectures"), labels = c("Promotion by universities", "More webinars/lectures")) ggplot(improvements_education_dataframe, aes(x=improvements_education_labels,y=improvements_education_sums))+ geom_bar(stat="identity",position="dodge")+ coord_flip()+ xlab("Suggestions for improving education of preregistration")+ylab("Coded themes indicated by sample") # NOTE: # Measures for inter-rater reliability will be added