ggplot2
ggplot2
ggplot2
ggplot2
ggplot2
ggplot2
Exploratory data analysis is important for understanding your data, checking for data issues/errors, and checking assumptions for different statistical models.
LOOK AT YOUR DATA—this is one of the most overlooked steps in data analysis!
#install.packages("remotes")
#remotes::install_github("DevPsyLab/petersenlab")
library("petersenlab")
library("car")
library("vioplot")
library("ellipse")
library("nlme")
library("effects")
library("corrplot")
library("ggplot2")
library("psych")
library("tidyverse")
library("purrr")
library("naniar")
library("mvnormtest")
library("ggExtra")
library("XICOR")
set.seed(52242)
n <- 1000
ID <- rep(1:100, each = 10)
predictor <- rbeta(n, 1.5, 5) * 100
outcome <- predictor + rnorm(n, mean = 0, sd = 20) + 50
predictorOverplot <- sample(1:50, n, replace = TRUE)
outcomeOverplot <- predictorOverplot + sample(1:75, n, replace = TRUE)
categorical1 <- sample(1:5, size = n, replace = TRUE)
categorical2 <- sample(1:5, size = n, replace = TRUE)
mydata <- data.frame(
ID = ID,
predictor = predictor,
outcome = outcome,
predictorOverplot = predictorOverplot,
outcomeOverplot = outcomeOverplot,
categorical1 = categorical1,
categorical2 = categorical2)
mydata[sample(1:n, size = 10), "predictor"] <- NA
mydata[sample(1:n, size = 10), "outcome"] <- NA
mydata[sample(1:n, size = 10), "predictorOverplot"] <- NA
mydata[sample(1:n, size = 10), "outcomeOverplot"] <- NA
mydata[sample(1:n, size = 30), "categorical1"] <- NA
mydata[sample(1:n, size = 70), "categorical2"] <- NA
round(data.frame(psych::describe(mydata)), 2)
vars <dbl> | n <dbl> | mean <dbl> | sd <dbl> | median <dbl> | trimmed <dbl> | mad <dbl> | min <dbl> | max <dbl> | ||
---|---|---|---|---|---|---|---|---|---|---|
ID | 1 | 1000 | 50.50 | 28.88 | 50.50 | 50.50 | 37.06 | 1.00 | 100.00 | |
predictor | 2 | 990 | 22.64 | 15.42 | 19.61 | 21.02 | 14.40 | 0.11 | 81.69 | |
outcome | 3 | 990 | 73.36 | 25.88 | 73.17 | 73.02 | 25.71 | 5.82 | 171.90 | |
predictorOverplot | 4 | 990 | 25.54 | 14.45 | 26.00 | 25.62 | 17.79 | 1.00 | 50.00 | |
outcomeOverplot | 5 | 990 | 63.53 | 25.58 | 63.00 | 63.65 | 28.17 | 2.00 | 123.00 | |
categorical1 | 6 | 970 | 2.96 | 1.40 | 3.00 | 2.95 | 1.48 | 1.00 | 5.00 | |
categorical2 | 7 | 930 | 2.95 | 1.39 | 3.00 | 2.94 | 1.48 | 1.00 | 5.00 |
length(unique(mydata$ID))
[1] 100
map(mydata, ~mean(is.na(.))) %>% t %>% t
[,1]
ID 0
predictor 0.01
outcome 0.01
predictorOverplot 0.01
outcomeOverplot 0.01
categorical1 0.03
categorical2 0.07
mydata %>%
select(categorical1, categorical2) %>%
sapply(function(x) table(x, useNA = "always")) %>%
t()
1 2 3 4 5 <NA>
categorical1 196 204 191 199 180 30
categorical2 195 179 193 202 161 70
round(colMeans(mydata, na.rm = TRUE), 2)
ID predictor outcome predictorOverplot
50.50 22.64 73.36 25.54
outcomeOverplot categorical1 categorical2
63.53 2.96 2.95
round(apply(mydata, 2, function(x) mean(x, na.rm = TRUE)), 2)
ID predictor outcome predictorOverplot
50.50 22.64 73.36 25.54
outcomeOverplot categorical1 categorical2
63.53 2.96 2.95
mydata %>%
summarise(across(everything(),
.fns = list(mean = ~ mean(., na.rm = TRUE)))) %>%
round(., 2)
ID_mean <dbl> | predictor_mean <dbl> | outcome_mean <dbl> | predictorOverplot_mean <dbl> | outcomeOverplot_mean <dbl> | categorical1_mean <dbl> | categorical2_mean <dbl> |
---|---|---|---|---|---|---|
50.5 | 22.64 | 73.36 | 25.54 | 63.53 | 2.96 | 2.95 |
round(apply(mydata, 2, function(x) median(x, na.rm = TRUE)), 2)
ID predictor outcome predictorOverplot
50.50 19.61 73.17 26.00
outcomeOverplot categorical1 categorical2
63.00 3.00 3.00
mydata %>%
summarise(across(everything(),
.fns = list(median = ~ median(., na.rm = TRUE)))) %>%
round(., 2)
ID_median <dbl> | predictor_median <dbl> | outcome_median <dbl> | predictorOverplot_median <dbl> | outcomeOverplot_median <dbl> | categorical1_median <dbl> | |
---|---|---|---|---|---|---|
50.5 | 19.61 | 73.17 | 26 | 63 | 3 |
round(apply(mydata, 2, function(x) Mode(x, multipleModes = "mean")), 2)
ID predictor outcome predictorOverplot
50.50 22.64 73.36 35.00
outcomeOverplot categorical1 categorical2
63.33 2.00 4.00
mydata %>%
summarise(across(everything(),
.fns = list(mode = ~ Mode(., multipleModes = "mean")))) %>%
round(., 2)
ID_mode <dbl> | predictor_mode <dbl> | outcome_mode <dbl> | predictorOverplot_mode <dbl> | outcomeOverplot_mode <dbl> | categorical1_mode <dbl> | categorical2_mode <dbl> |
---|---|---|---|---|---|---|
50.5 | 22.64 | 73.36 | 35 | 63.33 | 2 | 4 |
Compute all of these measures of central tendency:
mydata %>%
summarise(across(everything(),
.fns = list(mean = ~ mean(., na.rm = TRUE),
median = ~ median(., na.rm = TRUE),
mode = ~ Mode(., multipleModes = "mean")),
.names = "{.col}.{.fn}")) %>%
round(., 2) %>%
pivot_longer(cols = everything(),
names_to = c("variable","index"),
names_sep = "\\.") %>%
pivot_wider(names_from = index,
values_from = value)
variable <chr> | mean <dbl> | median <dbl> | mode <dbl> | |
---|---|---|---|---|
ID | 50.50 | 50.50 | 50.50 | |
predictor | 22.64 | 19.61 | 22.64 | |
outcome | 73.36 | 73.17 | 73.36 | |
predictorOverplot | 25.54 | 26.00 | 35.00 | |
outcomeOverplot | 63.53 | 63.00 | 63.33 | |
categorical1 | 2.96 | 3.00 | 2.00 | |
categorical2 | 2.95 | 3.00 | 4.00 |
Compute all of these measures of dispersion:
mydata %>%
summarise(across(everything(),
.fns = list(SD = ~ sd(., na.rm = TRUE),
min = ~ min(., na.rm = TRUE),
max = ~ max(., na.rm = TRUE),
skewness = ~ skew(., na.rm = TRUE),
kurtosis = ~ kurtosi(., na.rm = TRUE)),
.names = "{.col}.{.fn}")) %>%
round(., 2) %>%
pivot_longer(cols = everything(),
names_to = c("variable","index"),
names_sep = "\\.") %>%
pivot_wider(names_from = index,
values_from = value)
variable <chr> | SD <dbl> | min <dbl> | max <dbl> | skewness <dbl> | kurtosis <dbl> |
---|---|---|---|---|---|
ID | 28.88 | 1.00 | 100.00 | 0.00 | -1.20 |
predictor | 15.42 | 0.11 | 81.69 | 0.97 | 0.71 |
outcome | 25.88 | 5.82 | 171.90 | 0.18 | 0.11 |
predictorOverplot | 14.45 | 1.00 | 50.00 | -0.05 | -1.19 |
outcomeOverplot | 25.58 | 2.00 | 123.00 | -0.04 | -0.75 |
categorical1 | 1.40 | 1.00 | 5.00 | 0.03 | -1.29 |
categorical2 | 1.39 | 1.00 | 5.00 | 0.01 | -1.27 |
Consider transforming data if skewness > |0.8| or if kurtosis > |3.0|.
Add summary statistics to the bottom of correlation matrices in papers:
cor.table(mydata, type = "manuscript")
ID <chr> | predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | categorical1 <chr> | categorical2 <chr> | |
---|---|---|---|---|---|---|---|
1. ID | 1.00 | ||||||
2. predictor | -.02 | 1.00 | |||||
3. outcome | -.03 | .64*** | 1.00 | ||||
4. predictorOverplot | .03 | .04 | .03 | 1.00 | |||
5. outcomeOverplot | .00 | .04 | .06† | .57*** | 1.00 | ||
6. categorical1 | .04 | .00 | -.02 | .02 | .01 | 1.00 | |
7. categorical2 | .01 | -.05 | -.02 | .04 | .04 | -.09* | 1.00 |
summaryTable <- mydata %>%
summarise(across(everything(),
.fns = list(n = ~ length(na.omit(.)),
missingness = ~ mean(is.na(.)) * 100,
M = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
min = ~ min(., na.rm = TRUE),
max = ~ max(., na.rm = TRUE),
skewness = ~ skew(., na.rm = TRUE),
kurtosis = ~ kurtosi(., na.rm = TRUE)),
.names = "{.col}.{.fn}")) %>%
pivot_longer(cols = everything(),
names_to = c("variable","index"),
names_sep = "\\.") %>%
pivot_wider(names_from = index,
values_from = value)
summaryTableTransposed <- summaryTable[-1] %>%
t() %>%
as.data.frame() %>%
setNames(summaryTable$variable) %>%
round(., digits = 2)
summaryTableTransposed
ID <dbl> | predictor <dbl> | outcome <dbl> | predictorOverplot <dbl> | outcomeOverplot <dbl> | categorical1 <dbl> | categorical2 <dbl> | |
---|---|---|---|---|---|---|---|
n | 1000.00 | 990.00 | 990.00 | 990.00 | 990.00 | 970.00 | 930.00 |
missingness | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | 3.00 | 7.00 |
M | 50.50 | 22.64 | 73.36 | 25.54 | 63.53 | 2.96 | 2.95 |
SD | 28.88 | 15.42 | 25.88 | 14.45 | 25.58 | 1.40 | 1.39 |
min | 1.00 | 0.11 | 5.82 | 1.00 | 2.00 | 1.00 | 1.00 |
max | 100.00 | 81.69 | 171.90 | 50.00 | 123.00 | 5.00 | 5.00 |
skewness | 0.00 | 0.97 | 0.18 | -0.05 | -0.04 | 0.03 | 0.01 |
kurtosis | -1.20 | 0.71 | 0.11 | -1.19 | -0.75 | -1.29 | -1.27 |
See here for resources for creating figures in R.
hist(mydata$outcome)
ggplot2
ggplot(mydata, aes(x = outcome)) +
geom_histogram(color = 1)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_bin()`).
hist(mydata$outcome, prob = TRUE)
lines(density(mydata$outcome, na.rm = TRUE))
rug(mydata$outcome)
ggplot2
ggplot(mydata, aes(x = outcome)) +
geom_histogram(aes(y = after_stat(density)), color = 1) +
geom_density() +
geom_rug()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_bin()`).
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_density()`).
plot(density(mydata$outcome, na.rm = TRUE))
ggplot2
ggplot(mydata, aes(x = outcome)) +
geom_density()
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_density()`).
boxplot(mydata$outcome, horizontal = TRUE)
ggplot2
ggplot(mydata, aes(x = outcome)) +
geom_boxplot()
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_boxplot()`).
vioplot(na.omit(mydata$outcome), horizontal = TRUE)
ggplot2
ggplot(mydata, aes(x = "", y = outcome)) +
geom_violin()
Warning: Removed 10 rows containing non-finite outside the scale range
(`stat_ydensity()`).
For more advanced scatterplots, see here.
cor(mydata, use = "pairwise.complete.obs")
ID predictor outcome predictorOverplot
ID 1.000000000 -0.019094589 -0.02501796 0.02984671
predictor -0.019094589 1.000000000 0.63517610 0.04350431
outcome -0.025017962 0.635176098 1.00000000 0.02976820
predictorOverplot 0.029846711 0.043504314 0.02976820 1.00000000
outcomeOverplot 0.003150872 0.043344473 0.06289224 0.56564445
categorical1 0.040442534 -0.002422711 -0.01532232 0.02498938
categorical2 0.008920415 -0.046927177 -0.01868628 0.03880135
outcomeOverplot categorical1 categorical2
ID 0.003150872 0.040442534 0.008920415
predictor 0.043344473 -0.002422711 -0.046927177
outcome 0.062892238 -0.015322318 -0.018686282
predictorOverplot 0.565644447 0.024989385 0.038801346
outcomeOverplot 1.000000000 0.011871644 0.043682632
categorical1 0.011871644 1.000000000 -0.086310192
categorical2 0.043682632 -0.086310192 1.000000000
cor.test( ~ predictor + outcome, data = mydata)
Pearson's product-moment correlation
data: predictor and outcome
t = 25.718, df = 978, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5962709 0.6711047
sample estimates:
cor
0.6351761
cor.table(mydata)
ID <chr> | predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | categorical1 <chr> | categorical2 <chr> | |
---|---|---|---|---|---|---|---|
1. ID.r | 1.00 | -.02 | -.03 | .03 | .00 | .04 | .01 |
2. sig | NA | .55 | .43 | .35 | .92 | .21 | .79 |
3. n | 1000 | 990 | 990 | 990 | 990 | 970 | 930 |
4. predictor.r | -.02 | 1.00 | .64*** | .04 | .04 | .00 | -.05 |
5. sig | .55 | NA | .00 | .17 | .18 | .94 | .15 |
6. n | 990 | 990 | 980 | 981 | 980 | 960 | 922 |
7. outcome.r | -.03 | .64*** | 1.00 | .03 | .06† | -.02 | -.02 |
8. sig | .43 | .00 | NA | .35 | .05 | .64 | .57 |
9. n | 990 | 980 | 990 | 980 | 980 | 960 | 920 |
10. predictorOverplot.r | .03 | .04 | .03 | 1.00 | .57*** | .02 | .04 |
cor(mydata, use = "pairwise.complete.obs", method = "spearman")
ID predictor outcome predictorOverplot
ID 1.000000000 -0.01181085 -0.02805450 0.02925155
predictor -0.011810853 1.00000000 0.60003760 0.02672008
outcome -0.028054497 0.60003760 1.00000000 0.02460831
predictorOverplot 0.029251546 0.02672008 0.02460831 1.00000000
outcomeOverplot 0.001416626 0.03278463 0.07073642 0.54841297
categorical1 0.040888369 -0.01162260 -0.01906631 0.02350886
categorical2 0.009137467 -0.03763251 -0.02273699 0.04062487
outcomeOverplot categorical1 categorical2
ID 0.001416626 0.04088837 0.009137467
predictor 0.032784628 -0.01162260 -0.037632515
outcome 0.070736421 -0.01906631 -0.022736992
predictorOverplot 0.548412967 0.02350886 0.040624873
outcomeOverplot 1.000000000 0.01505255 0.044184959
categorical1 0.015052553 1.00000000 -0.086290083
categorical2 0.044184959 -0.08629008 1.000000000
cor.test( ~ predictor + outcome, data = mydata, method = "spearman")
Spearman's rank correlation rho
data: predictor and outcome
S = 62740170, p-value < 2.2e-16
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.6000376
cor.table(mydata, correlation = "spearman")
ID <chr> | predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | categorical1 <chr> | categorical2 <chr> | |
---|---|---|---|---|---|---|---|
1. ID.r | 1.00 | -.01 | -.03 | .03 | .00 | .04 | .01 |
2. sig | NA | .71 | .38 | .36 | .96 | .20 | .78 |
3. n | 1000 | 990 | 990 | 990 | 990 | 970 | 930 |
4. predictor.r | -.01 | 1.00 | .60*** | .03 | .03 | -.01 | -.04 |
5. sig | .71 | NA | .00 | .40 | .31 | .72 | .25 |
6. n | 990 | 990 | 980 | 981 | 980 | 960 | 922 |
7. outcome.r | -.03 | .60*** | 1.00 | .02 | .07* | -.02 | -.02 |
8. sig | .38 | .00 | NA | .44 | .03 | .56 | .49 |
9. n | 990 | 980 | 990 | 980 | 980 | 960 | 920 |
10. predictorOverplot.r | .03 | .03 | .02 | 1.00 | .55*** | .02 | .04 |
Xi (ξ) is an index of the degree of dependence between two variables, which is useful as an index of nonlinear correlation.
Chatterjee, S. (2021). A new coefficient of correlation. Journal of the American Statistical Association, 116(536), 2009-2022. https://doi.org/10.1080/01621459.2020.1758115
calculateXI(
mydata$predictor,
mydata$outcome)
[1] 0.2319062
plot(
mydata$predictor,
mydata$outcome)
abline(lm(
outcomeOverplot ~ predictorOverplot,
data = mydata,
na.action = "na.exclude"))
ggplot2
ggplot(mydata, aes(x = predictor, y = outcome)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE)
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 20 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).
scatterplot <-
ggplot(mydata, aes(x = predictor, y = outcome)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE)
densityMarginal <- ggMarginal(
scatterplot,
type = "density",
xparams = list(fill = "gray"),
yparams = list(fill = "gray"))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 20 rows containing non-finite outside the scale range
(`stat_smooth()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 20 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).
print(densityMarginal, newpage = TRUE)
ggplot(mydata, aes(x = predictorOverplot, y = outcomeOverplot)) +
geom_point(position = "jitter", alpha = 0.3) +
geom_density2d()
Warning: Removed 20 rows containing non-finite outside the scale range
(`stat_density2d()`).
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).
smoothScatter(mydata$predictorOverplot, mydata$outcomeOverplot)
mydata_nomissing <- na.omit(mydata[,c("predictor","outcome")])
dataEllipse(mydata_nomissing$predictor, mydata_nomissing$outcome, levels = c(0.5, .95))
vwReg(outcome ~ predictor, data = mydata)
The Shapiro-Wilk test of normality does not accept more than 5000 cases because it will reject the hypothesis that data come from a normal distribution with even slight deviations from normality.
shapiro.test(na.omit(mydata$outcome)) #subset to keep only the first 5000 rows: mydata$outcome[1:5000]
Shapiro-Wilk normality test
data: na.omit(mydata$outcome)
W = 0.9971, p-value = 0.06998
mydata %>%
na.omit %>%
t %>%
mshapiro.test
Shapiro-Wilk normality test
data: Z
W = 0.98542, p-value = 1.339e-07
https://upload.wikimedia.org/wikipedia/commons/7/74/InferentialStatisticalDecisionMakingTrees.pdf (archived at https://perma.cc/L2QR-ALFA)
mcar_test(mydata)
statistic <dbl> | df <dbl> | p.value <dbl> | missing.patterns <int> | |
---|---|---|---|---|
59.68147 | 61 | 0.5238153 | 12 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")])
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor.r | 1.00 | .64*** | .04 | .04 |
2. sig | NA | .00 | .17 | .18 |
3. n | 990 | 980 | 981 | 980 |
4. outcome.r | .64*** | 1.00 | .03 | .06† |
5. sig | .00 | NA | .35 | .05 |
6. n | 980 | 990 | 980 | 980 |
7. predictorOverplot.r | .04 | .03 | 1.00 | .57*** |
8. sig | .17 | .35 | NA | .00 |
9. n | 981 | 980 | 990 | 980 |
10. outcomeOverplot.r | .04 | .06† | .57*** | 1.00 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], type = "manuscript")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .64*** | 1.00 | ||
3. predictorOverplot | .04 | .03 | 1.00 | |
4. outcomeOverplot | .04 | .06† | .57*** | 1.00 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], type = "manuscriptBig")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .64 | 1.00 | ||
3. predictorOverplot | .04 | .03 | 1.00 | |
4. outcomeOverplot | .04 | .06 | .57 | 1.00 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], correlation = "spearman")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor.r | 1.00 | .60*** | .03 | .03 |
2. sig | NA | .00 | .40 | .31 |
3. n | 990 | 980 | 981 | 980 |
4. outcome.r | .60*** | 1.00 | .02 | .07* |
5. sig | .00 | NA | .44 | .03 |
6. n | 980 | 990 | 980 | 980 |
7. predictorOverplot.r | .03 | .02 | 1.00 | .55*** |
8. sig | .40 | .44 | NA | .00 |
9. n | 981 | 980 | 990 | 980 |
10. outcomeOverplot.r | .03 | .07* | .55*** | 1.00 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], type = "manuscript", correlation = "spearman")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .60*** | 1.00 | ||
3. predictorOverplot | .03 | .02 | 1.00 | |
4. outcomeOverplot | .03 | .07* | .55*** | 1.00 |
cor.table(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], type = "manuscriptBig", correlation = "spearman")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | outcomeOverplot <chr> | |
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .60 | 1.00 | ||
3. predictorOverplot | .03 | .02 | 1.00 | |
4. outcomeOverplot | .03 | .07 | .55 | 1.00 |
Examine the associations among variables controlling for a covariate
(outcomeOverplot
).
partialcor.table(mydata[,c("predictor","outcome","predictorOverplot")], z = mydata[,c("outcomeOverplot")])
predictor <chr> | outcome <chr> | predictorOverplot <chr> | ||
---|---|---|---|---|
1. predictor.r | 1.00 | .63*** | .02 | |
2. sig | NA | .00 | .47 | |
3. n | 980 | 970 | 971 | |
4. outcome.r | .63*** | 1.00 | -.01 | |
5. sig | .00 | NA | .83 | |
6. n | 970 | 980 | 970 | |
7. predictorOverplot.r | .02 | -.01 | 1.00 | |
8. sig | .47 | .83 | NA | |
9. n | 971 | 970 | 980 |
partialcor.table(mydata[,c("predictor","outcome","predictorOverplot")], z = mydata[,c("outcomeOverplot")], type = "manuscript")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | ||
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .63*** | 1.00 | ||
3. predictorOverplot | .02 | -.01 | 1.00 |
partialcor.table(mydata[,c("predictor","outcome","predictorOverplot")], z = mydata[,c("outcomeOverplot")], type = "manuscriptBig")
predictor <chr> | outcome <chr> | predictorOverplot <chr> | ||
---|---|---|---|---|
1. predictor | 1.00 | |||
2. outcome | .63 | 1.00 | ||
3. predictorOverplot | .02 | -.01 | 1.00 |
corrplot(cor(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")], use = "pairwise.complete.obs"))
scatterplotMatrix(~ predictor + outcome + predictorOverplot + outcomeOverplot, data = mydata, use = "pairwise.complete.obs")
pairs.panels(mydata[,c("predictor","outcome","predictorOverplot","outcomeOverplot")])
multipleRegressionModel <- lm(outcome ~ predictor + predictorOverplot,
data = mydata,
na.action = "na.exclude")
allEffects(multipleRegressionModel)
model: outcome ~ predictor + predictorOverplot
predictor effect
predictor
0.1 20 40 60 80
49.23335 70.37778 91.62847 112.87915 134.12983
predictorOverplot effect
predictorOverplot
1 10 30 40 50
73.13937 73.15070 73.17586 73.18845 73.20103
plot(allEffects(multipleRegressionModel))
multilevelRegressionModel <- lme(outcome ~ predictor + predictorOverplot, random = ~ 1|ID,
method = "ML",
data = mydata,
na.action = "na.exclude")
allEffects(multilevelRegressionModel)
model: outcome ~ predictor + predictorOverplot
predictor effect
predictor
0.1 20 40 60 80
49.23335 70.37778 91.62847 112.87915 134.12983
predictorOverplot effect
predictorOverplot
1 10 30 40 50
73.13937 73.15070 73.17586 73.18845 73.20103
plot(allEffects(multilevelRegressionModel))
sessionInfo()
R version 4.5.0 (2025-04-11)
Platform: x86_64-pc-linux-gnu
Running under: Ubuntu 24.04.2 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
time zone: UTC
tzcode source: system (glibc)
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] XICOR_0.4.1 ggExtra_0.10.1 mvnormtest_0.1-9-3 naniar_1.1.0
[5] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[9] purrr_1.0.4 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
[13] tidyverse_2.0.0 psych_2.5.3 ggplot2_3.5.2 corrplot_0.95
[17] effects_4.2-2 nlme_3.1-168 ellipse_0.5.0 vioplot_0.5.1
[21] zoo_1.8-14 sm_2.2-6.0 car_3.1-3 carData_3.0-5
[25] petersenlab_1.1.5
loaded via a namespace (and not attached):
[1] Rdpack_2.6.4 DBI_1.2.3 mnormt_2.1.1 gridExtra_2.3
[5] rlang_1.1.6 magrittr_2.0.3 compiler_4.5.0 mgcv_1.9-1
[9] vctrs_0.6.5 reshape2_1.4.4 quadprog_1.5-8 pkgconfig_2.0.3
[13] fastmap_1.2.0 backports_1.5.0 labeling_0.4.3 pbivnorm_0.6.0
[17] promises_1.3.2 rmarkdown_2.29 psychTools_2.5.3 tzdb_0.5.0
[21] nloptr_2.2.1 visdat_0.6.0 xfun_0.52 cachem_1.1.0
[25] jsonlite_2.0.0 later_1.4.2 parallel_4.5.0 lavaan_0.6-19
[29] cluster_2.1.8.1 R6_2.6.1 bslib_0.9.0 stringi_1.8.7
[33] RColorBrewer_1.1-3 boot_1.3-31 rpart_4.1.24 estimability_1.5.1
[37] jquerylib_0.1.4 Rcpp_1.0.14 knitr_1.50 base64enc_0.1-3
[41] httpuv_1.6.16 Matrix_1.7-3 splines_4.5.0 nnet_7.3-20
[45] timechange_0.3.0 tidyselect_1.2.1 rstudioapi_0.17.1 abind_1.4-8
[49] yaml_2.3.10 miniUI_0.1.2 lattice_0.22-6 plyr_1.8.9
[53] shiny_1.10.0 withr_3.0.2 evaluate_1.0.3 foreign_0.8-90
[57] survival_3.8-3 isoband_0.2.7 survey_4.4-2 norm_1.0-11.1
[61] pillar_1.10.2 KernSmooth_2.23-26 rtf_0.4-14.1 checkmate_2.3.2
[65] stats4_4.5.0 reformulas_0.4.1 insight_1.2.0 generics_0.1.4
[69] mix_1.0-13 hms_1.1.3 scales_1.4.0 minqa_1.2.8
[73] xtable_1.8-4 glue_1.8.0 Hmisc_5.2-3 tools_4.5.0
[77] data.table_1.17.2 lme4_1.1-37 mvtnorm_1.3-3 grid_4.5.0
[81] mitools_2.4 rbibutils_2.3 colorspace_2.1-1 htmlTable_2.4.3
[85] Formula_1.2-5 cli_3.6.5 viridisLite_0.4.2 gtable_0.3.6
[89] R.methodsS3_1.8.2 sass_0.4.10 digest_0.6.37 htmlwidgets_1.6.4
[93] farver_2.1.2 R.oo_1.27.1 htmltools_0.5.8.1 lifecycle_1.0.4
[97] mime_0.13 MASS_7.3-65