#install.packages("remotes")
#remotes::install_github("DevPsyLab/petersenlab")
#remotes::install_github("paulhendricks/anonymizer")
library("anonymizer")
library("tidyverse")
set.seed(52242)
sampleSize <- 100
ID <- 1:sampleSize
X <- rnorm(sampleSize)
Y <- rnorm(sampleSize)
mydata <- data.frame(
ID = ID,
X = X,
Y = Y)
To help protect participant anonymity, it is important to anonymize participant IDs so their data cannot be stitched together across papers. To anonymize participant IDs, use the following script and change the seed for every paper so that a given participant gets a different anonymized code each time.
library("tidyverse")
library("remotes")
#install.packages("anonymizer")
remotes::install_github("paulhendricks/anonymizer")
library("anonymizer")
library("tidyverse")
# Generate Random Anonymized ID
mydata$anonymizedID <- anonymize(c(
mydata$ID),
.algo = "crc32",
.seed = 20230426) # change seed for every paper (based on the date) so that participant gets a new code each time
# Re-Sort Data by Random Anonymized ID to Mix-Up Participants (so they are not in the same order for every paper)
mydata <- mydata %>%
select(anonymizedID, everything()) %>%
arrange(anonymizedID)
# Remove the Original ID Column
mydata <- mydata %>%
select(-ID)
# Generate Random Anonymized ID
mydata$anonymizedID <- anonymize(c(
mydata$ID),
.algo = "crc32",
.seed = 20230426) # change seed for every paper (based on the date) so that participant gets a new code each time
# Re-Sort Data by Random Anonymized ID to Mix-Up Participants (so they are not in the same order for every paper)
mydata <- mydata %>%
select(anonymizedID, everything()) %>%
arrange(anonymizedID)
# Print the Data
mydata
# Remove the Original ID Column
mydata <- mydata %>%
select(-ID)
sessionInfo()
R version 4.4.2 (2024-10-31)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 22.04.5 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[5] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
[9] ggplot2_3.5.1 tidyverse_2.0.0 anonymizer_0.2.2
loaded via a namespace (and not attached):
[1] gtable_0.3.6 jsonlite_1.8.9 compiler_4.4.2 tidyselect_1.2.1
[5] jquerylib_0.1.4 scales_1.3.0 yaml_2.3.10 fastmap_1.2.0
[9] R6_2.5.1 generics_0.1.3 knitr_1.49 munsell_0.5.1
[13] bslib_0.8.0 pillar_1.9.0 tzdb_0.4.0 rlang_1.1.4
[17] utf8_1.2.4 stringi_1.8.4 cachem_1.1.0 xfun_0.49
[21] sass_0.4.9 timechange_0.3.0 cli_3.6.3 withr_3.0.2
[25] magrittr_2.0.3 digest_0.6.37 grid_4.4.2 hms_1.1.3
[29] lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.1 glue_1.8.0
[33] fansi_1.0.6 colorspace_2.1-1 rmarkdown_2.29 tools_4.4.2
[37] pkgconfig_2.0.3 htmltools_0.5.8.1