plot(0,0)
abline(a=1,b=0)


set.seed(234)#I don't like randomness in my lectures


obs = 1000 #We sampled the WM of 100 persons
mean_wm = 6 #magical number 6
sd_wm = 1 #+/- 1
wm_v = rnorm(obs, mean_wm,sd_wm)#Random number generator
youngest = 1
oldest = 99
age_v = runif(obs, youngest,oldest)#Random number generator
data = data.frame(wm_v, age_v)


plot(data$age_v, data$wm_v, xlab= "Age",ylab="WM span")


b_age = -0.04 #aging of one unit (e.g. year) decreases the WM of 0.4 unit
data$wm_v = data$wm_v + b_age * data$age_v


plot(age_v, wm_v + (age_v*b_age), xlab="Age (year)", ylab="WM span")
#above is an operation on vector ; 
# for each indiv i : wm_i = wm_v[i] + (age_v[i] * b_age)


model <- lm(wm_v ~ age_v, data=data)


summary(model)

Call:
lm(formula = wm_v ~ age_v, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.96585 -0.64057  0.04763  0.64576  3.14293 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.891183   0.062811   93.79   <2e-16 ***
age_v       -0.038256   0.001091  -35.07   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9842 on 998 degrees of freedom
Multiple R-squared:  0.552,	Adjusted R-squared:  0.5516 
F-statistic:  1230 on 1 and 998 DF,  p-value: < 2.2e-16


model$coeff#accessing coefficients


#plot(data$age_v, data$wm_v)
#abline(model$coeff[1], model$coeff[2], lwd=3, col="blue")
#We can even extend the prediction :
plot(data$age_v, data$wm_v,xlim = c(1,150))
abline(model$coeff[1], model$coeff[2], lwd=3, col="blue")


resid = data$wm_v - (model$coeff[1] + model$coeff[2] * data$age_v)
hist(resid)


require("performance")#Useful automated assumption check package 
check_model(model)

Loading required namespace: qqplotr

For confidence bands, please install `qqplotr`.


data_exp = data
data_exp$wm_v = exp(data_exp$wm_v)
exp_model <- lm(wm_v ~ age_v, data=data_exp)


summary(exp_model)

Call:
lm(formula = wm_v ~ age_v, data = data_exp)

Residuals:
    Min      1Q  Median      3Q     Max 
-329.31 -105.46  -25.38   51.80 2610.69 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 371.9083    14.3770   25.87   <2e-16 ***
age_v        -4.5319     0.2497  -18.15   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 225.3 on 998 degrees of freedom
Multiple R-squared:  0.2481,	Adjusted R-squared:  0.2474 
F-statistic: 329.4 on 1 and 998 DF,  p-value: < 2.2e-16


plot(data_exp$age_v, data_exp$wm_v)
abline(exp_model$coeff[1], exp_model$coeff[2], lwd=3, col="blue")


check_model(exp_model)

Loading required namespace: qqplotr

For confidence bands, please install `qqplotr`.


mean_double_task = 2
wm_v = rnorm(obs, mean_double_task,sd_wm)#Different mean
data2 = data.frame(wm_v, age_v)#age is the same


# Inducing an interaction :
b_age_double_task = -0.02 #aging of one unit (e.g. year) decreases the WM of 0.2 unit when double
data2$wm_v = data2$wm_v + b_age_double_task * data2$age_v


#Merging 
data$condition = "single"
data2$condition = "double"
data_int = rbind(data, data2)


subset = data_int[data_int$condition == "single",]#lisibility/clarity purpose
plot(subset$age_v, subset$wm_v, xlab="Age (year)", ylab="WM span", col="red")

subset = data_int[data_int$condition == "double",]#lisibility/clarity purpose
points(subset$age_v, subset$wm_v, xlab="Age (year)", ylab="WM span", col="blue")


model_add <- lm(wm_v ~ age_v + condition, data=data_int)


#install.packages("sjPlot")
library("sjPlot")
library(sjlabelled)
library(sjmisc)
library(ggplot2)


plot_model(model_add, type = "pred", terms = c("age_v", "condition"), 
           show.data = TRUE, ci.lvl = NA, line.size=3)


summary(model_add)

Call:
lm(formula = wm_v ~ age_v + condition, data = data_int)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.4833 -0.7029  0.0307  0.7009  3.2708 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      2.4564125  0.0520062   47.23   <2e-16 ***
age_v           -0.0292721  0.0008094  -36.17   <2e-16 ***
conditionsingle  2.9854702  0.0461787   64.65   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.033 on 1997 degrees of freedom
Multiple R-squared:  0.7332,	Adjusted R-squared:  0.7329 
F-statistic:  2744 on 2 and 1997 DF,  p-value: < 2.2e-16


model_int <- lm(wm_v ~ age_v * condition, data=data_int)#We switched to "*"
#Or more explicitely :
model_int <- lm(wm_v ~ age_v + condition + age_v:condition, data=data_int)#same as above


plot_model(model_int, type = "pred", terms = c("age_v", "condition"), 
           show.data = TRUE, ci.lvl = NA, line.size=3)


summary(model_int)

Call:
lm(formula = wm_v ~ age_v + condition + age_v:condition, data = data_int)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2730 -0.6653  0.0310  0.6679  3.1429 

Coefficients:
                       Estimate Std. Error t value Pr(>|t|)    
(Intercept)            2.007112   0.063852   31.43   <2e-16 ***
age_v                 -0.020288   0.001109  -18.29   <2e-16 ***
conditionsingle        3.884071   0.090301   43.01   <2e-16 ***
age_v:conditionsingle -0.017968   0.001568  -11.46   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1 on 1996 degrees of freedom
Multiple R-squared:  0.7496,	Adjusted R-squared:  0.7493 
F-statistic:  1992 on 3 and 1996 DF,  p-value: < 2.2e-16


data_int$age_v_recoded = (data_int$age_v - 1)/100
data_int$condition_recoded = ifelse(data_int$condition == "single", 0, 1)


head(data_int)


tail(data_int)


model_int_recoded <- lm(wm_v ~ age_v_recoded * condition_recoded, data=data_int)
summary(model_int_recoded)

Call:
lm(formula = wm_v ~ age_v_recoded * condition_recoded, data = data_int)

Residuals:
    Min      1Q  Median      3Q     Max 
-3.2730 -0.6653  0.0310  0.6679  3.1429 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                      5.85293    0.06289   93.06   <2e-16 ***
age_v_recoded                   -3.82563    0.11090  -34.49   <2e-16 ***
condition_recoded               -3.86610    0.08894  -43.47   <2e-16 ***
age_v_recoded:condition_recoded  1.79683    0.15684   11.46   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1 on 1996 degrees of freedom
Multiple R-squared:  0.7496,	Adjusted R-squared:  0.7493 
F-statistic:  1992 on 3 and 1996 DF,  p-value: < 2.2e-16


data_int$condition_treatment = ifelse(data_int$condition =="single",0,1)
data_int$condition_sum = ifelse(data_int$condition =="single",-1,1 )
summary(lm(wm_v ~ age_v * condition_treatment, data=data_int))$coefficients
summary(lm(wm_v ~ age_v * condition_sum, data=data_int))$coefficients


mean_double_task_same = 5
mean_double_task_diff = 2

wm_v = rnorm(obs, mean_double_task_same,sd_wm)#Different mean
data_same = data.frame(wm_v, age_v)

wm_v = rnorm(obs, mean_double_task_diff,sd_wm)#Different mean
data_diff = data.frame(wm_v, age_v)

#Merging 
data$condition = "a-single"
data_same$condition = "d-same"
data_diff$condition = "d-diff"
data_threemod = rbind(data, data_same, data_diff)
data_threemod$condition = as.factor(data_threemod$condition)#We are working with the factor class
print(levels(data_threemod$condition))

[1] "a-single" "d-diff"   "d-same"


data_threemod$condition_sum = data_threemod$condition
contrasts(data_threemod$condition_sum) = contr.sum(3)#Defining the contrast matrix
print(contrasts(data_threemod$condition_sum)) #printing it
sum_cont = lm(wm_v ~ age_v * condition_sum, data=data_threemod)
summary(sum_cont)$coefficients

         [,1] [,2]
a-single    1    0
d-diff      0    1
d-same     -1   -1


data_threemod$condition_treatment = data_threemod$condition
contrasts(data_threemod$condition_treatment) = contr.treatment(3)
print(contrasts(data_threemod$condition_treatment))
treatment_cont = lm(wm_v ~ age_v * condition_treatment, data=data_threemod)
summary(treatment_cont)$coefficients

         2 3
a-single 0 0
d-diff   1 0
d-same   0 1


data_threemod$condition_sum = data_threemod$condition
contrasts(data_threemod$condition_sum,how.many=2) = matrix(c(-1,0,1,-1,1,0),ncol=2)
print(contrasts(data_threemod$condition_sum))
sum_cont = lm(wm_v ~ age_v * condition_sum, data=data_threemod)
summary(sum_cont)$coefficients

         [,1] [,2]
a-single   -1   -1
d-diff      0    1
d-same      1    0


library('MASS')
varcovmat = matrix(c(1, .97, .97, 1), nrow=2)
data = mvrnorm(n=100, mu=c(0, 0), Sigma=varcovmat, empirical=TRUE)
iq = data[, 1]*15 +100#Just rescaling for "natural" scale
mem = data[, 2]*15 +100 #same


plot(iq,mem)


rt = rnorm(100, 500,50) + -1*iq #Only relationship simulated is with IQ


plot(iq, rt)


model_rt = lm(rt ~ iq + mem)#Additive, we did not include an interaction


summary(model_rt)

Call:
lm(formula = rt ~ iq + mem)

Residuals:
     Min       1Q   Median       3Q      Max 
-107.392  -31.466   -1.463   24.007   94.377 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 483.2304    29.1289  16.589   <2e-16 ***
iq           -0.7652     1.1764  -0.651    0.517    
mem          -0.0725     1.1764  -0.062    0.951    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.68 on 97 degrees of freedom
Multiple R-squared:  0.08092,	Adjusted R-squared:  0.06197 
F-statistic:  4.27 on 2 and 97 DF,  p-value: 0.0167


check_collinearity(model_rt)


model_rt = lm(rt ~ iq)


summary(model_rt)

Call:
lm(formula = rt ~ iq)

Residuals:
     Min       1Q   Median       3Q      Max 
-107.806  -31.207   -1.267   23.974   93.890 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 483.0129    28.7670  16.791  < 2e-16 ***
iq           -0.8356     0.2845  -2.937  0.00413 ** 
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 42.46 on 98 degrees of freedom
Multiple R-squared:  0.08089,	Adjusted R-squared:  0.07151 
F-statistic: 8.624 on 1 and 98 DF,  p-value: 0.004134

	wm_v	age_v	condition	age_v_recoded	condition_recoded
	<dbl>	<dbl>	<chr>	<dbl>	<dbl>
1	6.3150246	8.643629	single	0.076436287	0
2	2.6518688	32.378705	single	0.313787047	0
3	0.6671325	95.841536	single	0.948415365	0
4	6.7755663	17.391671	single	0.163916705	0
5	5.9695408	37.239943	single	0.362399432	0
6	6.0742064	1.648316	single	0.006483155	0

	wm_v	age_v	condition	age_v_recoded	condition_recoded
	<dbl>	<dbl>	<chr>	<dbl>	<dbl>
1995	0.6745658	34.98079	double	0.3398079	1
1996	3.6278110	12.01254	double	0.1101254	1
1997	0.9268044	92.42589	double	0.9142589	1
1998	0.7432773	12.68647	double	0.1168647	1
1999	-0.6180795	50.42658	double	0.4942658	1
2000	0.3038832	98.45748	double	0.9745748	1

parameter	Sum	Treatment
Intercept	mean between both conditions	mean when standard condition
Slope	Difference between conditions	Difference between conditions

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	5.89118292	0.063852450	92.26244	0.000000e+00
age_v	-0.03825628	0.001109038	-34.49500	6.297549e-205
condition_treatment	-3.88407058	0.090301001	-43.01249	1.327709e-286
age_v:condition_treatment	0.01796830	0.001568417	11.45633	1.783488e-29

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	3.94914763	0.0451505007	87.46631	0.000000e+00
age_v	-0.02927213	0.0007842086	-37.32697	9.007915e-232
condition_sum	-1.94203529	0.0451505007	-43.01249	1.327709e-286
age_v:condition_sum	0.00898415	0.0007842086	11.45633	1.783488e-29

Linear Mixed Models : application to data in cognitive science¶

Module 1 : A reminder on linear models¶

Organization of the course¶

Why are we interested in linear models¶

Organization of the first module¶

Generalities on linear models¶

Parameter estimation¶

Implementation in R¶

Let's fit a lm¶

Now what if we feed the wrong distribution ?¶

Moving to linear models with multiple predictors¶

Building on contrast coding of factors¶

Think about your contrast¶

Collinearity¶

Statistical inference on LM¶

Statistical inference on LM¶

Limitations of the classical LM¶

Using Linear Mixed Models¶

Moving to module 2¶

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	4.31972552	0.0368106263	117.34996	0.000000e+00
age_v	-0.01283485	0.0006393552	-20.07468	3.356279e-84
condition_sum1	1.57145741	0.0520580870	30.18661	5.407327e-175
condition_sum2	0.72329773	0.0520580870	13.89405	1.403789e-42
age_v:condition_sum1	-0.02542142	0.0009041848	-28.11529	1.508955e-154
age_v:condition_sum2	0.01225058	0.0009041848	13.54875	1.239886e-40

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	5.89118292	0.063363297	92.97469	0.000000e+00
age_v	-0.03825628	0.001100542	-34.76129	1.036410e-222
condition_treatment2	-0.96246299	0.089609234	-10.74067	1.974392e-26
condition_treatment3	-3.92146477	0.089609234	-43.76184	9.881313e-324
age_v:condition_treatment2	0.04044263	0.001556402	25.98469	2.067890e-134
age_v:condition_treatment3	0.03937246	0.001556402	25.29710	4.009068e-128

	Estimate	Std. Error	t value	Pr(>\|t\|)
(Intercept)	4.26320701	0.0365828164	116.53578	0.000000e+00
age_v	-0.01165125	0.0006353985	-18.33692	2.903960e-71
condition_sum1	-2.29348885	0.0517359152	-44.33069	0.000000e+00
condition_sum2	0.66551293	0.0517359152	12.86365	6.719274e-37
age_v:condition_sum1	0.01276743	0.0008985891	14.20831	2.183827e-44
age_v:condition_sum2	0.01383760	0.0008985891	15.39925	1.489765e-51

A check_collinearity: 2 × 3
	Term	VIF	SE_factor
	<chr>	<dbl>	<dbl>
1	iq	16.92047	4.11345
2	mem	16.92047	4.11345