-
Notifications
You must be signed in to change notification settings - Fork 0
/
strint_train.Rmd
223 lines (196 loc) · 9.81 KB
/
strint_train.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
---
title: "Structural Learning and Transfer - Training Phase"
output: html_document
---
```{r setup, include=FALSE}
#knitr::opts_chunk$set(echo = TRUE)
```
## Install new packages
```{r}
#install.packages('plotrix')
#install.packages("grid")
#install.packages("reshape")
#install.packages("ez")
#install.packages("Cairo")
```
## Load packages
```{r Load packages}
library(plyr)
library(ggplot2)
library(plotrix)
#library(grid)
#library(gridExtra)
#library(lattice)
#library(reshape) # This one interferes with dplyr...don't use it.
library(ez)
library(dplyr)
library(afex)
```
## Column rearranger
```{r}
##arrange df vars by position
##'vars' must be a named vector, e.g. c("var.name"=1)
arrange.vars <- function(data, vars){
##stop if not a data.frame (but should work for matrices as well)
stopifnot(is.data.frame(data))
##sort out inputs
data.nms <- names(data)
var.nr <- length(data.nms)
var.nms <- names(vars)
var.pos <- vars
##sanity checks
stopifnot( !any(duplicated(var.nms)),
!any(duplicated(var.pos)) )
stopifnot( is.character(var.nms),
is.numeric(var.pos) )
stopifnot( all(var.nms %in% data.nms) )
stopifnot( all(var.pos > 0),
all(var.pos <= var.nr) )
##prepare output
out.vec <- character(var.nr)
out.vec[var.pos] <- var.nms
out.vec[-var.pos] <- data.nms[ !(data.nms %in% var.nms) ]
stopifnot( length(out.vec)==var.nr )
##re-arrange vars by position
data <- data[ , out.vec]
return(data)
}
```
## Load in Data
```{r Load in Data, include=FALSE}
setwd("//Volumes//mnl//Data//Adaptation//structural_interference//Post_Step_3_train") # Mac
#setwd("Z:\\Data\\Adaptation\\structural_interference\\Post_Step_3_train") # PC
trainData = read.delim('train_raw',header = FALSE, sep = ",", na.strings = 'NaN')
numGroup = 2 # Number of groups (works only for my current dataset)
colnames(trainData) = c('group', 'subjectID', 'trial', 'target_theta',
'MT', 'MT_c', 'MT_st',
'rmse', 'rmse_c', 'rmse_st',
'ide', 'ide_c', 'ide_st',
'norm_jerk', 'norm_jerk_c', 'norm_jerk_st',
'mov_int', 'mov_int_c', 'mov_int_st',
'end_X_pos', 'end_Y_pos',
'tstamp_start', 'tstamp_end',
'velPeak', 'velPeak_c', 'velPeak_st',
'velPeakTime', 'velPeakTime_c', 'velPeakTime_st',
'RT', 'RT_c', 'RT_st',
'wrong_trial')
factors = c('group', 'subjectID')
trainData[,factors] = lapply(trainData[,factors], factor)
# This will get rid of oulier subjects BEFORE generating the dataset to follow.
outliers = c(0)
for (i in 1:length(outliers)){
trainData = subset(trainData, trainData$subjectID != outliers[i])
}
numSub = nrow(trainData)/272 # number of subjects in the data.frame (scrubbed of outliers)
numTrial = nrow(trainData)/numSub # number of trials per subject
# Rename entries for target_theta to indicate whether trial was "up" (90 deg), or "down" (270 deg)
trainData$target_theta = as.character(trainData$target_theta)
trainData$target_theta = revalue(trainData$target_theta, c("4.712389" = "down", "1.570796" = "up"))
trainData$target_theta = as.factor(trainData$target_theta)
# Normalize to baseline. I will make a new variable (*.bc) that subtracts the mean of all KB trials from all values.
# WARNING: If you get this error 'replacement has length zero' it is because plyr was loaded and is interfering with dplyr. You can fix it by putting "dplyr::" in front of the function. EXAMPLE - dplyr::group_by() %>% dplyr::summarise(). A better fix is to load dplyr AFTER plyr in the package installation chunk.
# Apply to ide_c
temp = trainData %>% select(c(group, subjectID, trial, ide_c)) %>% filter(trial %in% 1:16) %>% group_by(subjectID) %>% summarise(avg.ide.kb = mean(ide_c, na.rm = TRUE))
temp2 = data.frame(matrix(ncol = 2, nrow = numSub*272))
for(i in 1:numSub){
temp2$X1[(272*(i-1)+1):(272*i)] = rep(temp$subjectID[i],272)
temp2$X2[(272*(i-1)+1):(272*i)] = rep(temp$avg.ide.kb[i],272)
}
trainData$ide.bc = trainData$ide_c - temp2$X2
rm(temp, temp2) # Clean up workspace
# Apply to rmse_c
temp = trainData %>% select(c(group, subjectID, trial, rmse_c)) %>% filter(trial %in% 1:16) %>% group_by(subjectID) %>% summarise(avg.rmse.kb = mean(rmse_c, na.rm = TRUE))
temp2 = data.frame(matrix(ncol = 2, nrow = numSub*272))
for(i in 1:numSub){
temp2$X1[(272*(i-1)+1):(272*i)] = rep(temp$subjectID[i],272)
temp2$X2[(272*(i-1)+1):(272*i)] = rep(temp$avg.rmse.kb[i],272)
}
trainData$rmse.bc = trainData$rmse_c - temp2$X2
rm(temp, temp2) # Clean up workspace
# Apply to mov_int_c
temp = trainData %>% select(c(group, subjectID, trial, mov_int_c)) %>% filter(trial %in% 1:16) %>% group_by(subjectID) %>% summarise(avg.mov_int.kb = mean(mov_int_c, na.rm = TRUE))
temp2 = data.frame(matrix(ncol = 2, nrow = numSub*272))
for(i in 1:numSub){
temp2$X1[(272*(i-1)+1):(272*i)] = rep(temp$subjectID[i],272)
temp2$X2[(272*(i-1)+1):(272*i)] = rep(temp$avg.mov_int.kb[i],272)
}
trainData$mov_int.bc = trainData$mov_int_c - temp2$X2
rm(temp, temp2) # Clean up workspace
# Apply to norm_jerk_c
temp = trainData %>% select(c(group, subjectID, trial, norm_jerk_c)) %>% filter(trial %in% 1:16) %>% group_by(subjectID) %>% summarise(avg.norm_jerk.kb = mean(norm_jerk_c, na.rm = TRUE))
temp2 = data.frame(matrix(ncol = 2, nrow = numSub*272))
for(i in 1:numSub){
temp2$X1[(272*(i-1)+1):(272*i)] = rep(temp$subjectID[i],272)
temp2$X2[(272*(i-1)+1):(272*i)] = rep(temp$avg.norm_jerk.kb[i],272)
}
trainData$norm_jerk.bc = trainData$norm_jerk_c - temp2$X2
rm(temp, temp2) # Clean up workspace
# Some trials have RT less than 100ms (which is inhuman). Need to replace these values with NA, as well as velPeakTime
trainData = trainData %>%
mutate(velPeakTime_c = replace(velPeakTime_c, which(RT_c < 100), NA)) %>%
mutate(MT_c = replace(MT_c, which(RT_c < 100), NA)) %>%
mutate(mov_int_c = replace(mov_int_c, which(RT_c < 100), NA)) %>%
mutate(RT_c = replace(RT_c, which(RT_c < 100), NA)) # WARNING: RT_c has to be last in this chain.
# Define trial numbers for the different phases
vb = 1:16
ex = 17:256
pe = 257:272
```
## Data Wrangling
```{r Caclulate means}
# Create 'block' factor
nr = nrow(trainData)
blockSize = 4
trainData$block = as.factor(rep(1:(numTrial/blockSize), each = blockSize, times = numSub))
trainData = arrange.vars(trainData, c("block" = 4))
# Take mean by block
trainData_mbb = trainData %>% group_by(subjectID, group, block) %>% summarise_each(funs(mean(., na.rm = TRUE))) %>% select(-c(trial, target_theta, wrong_trial))
# Take absolute value to get |CE| for each 10 trial block
#trainData_mbb[4:ncol(trainData_mbb)] = lapply(trainData_mbb[4:ncol(trainData_mbb)], function(x) abs(x)) # NOTE: this assumes columns 1-3 are the only factors
# Find mean by group
trainData_mbg = trainData_mbb %>% group_by(group, block) %>% summarise_all(funs(mean(., na.rm = TRUE), std.error(., na.rm = TRUE))) %>% select(-c(subjectID_mean))
# Create a phase variable (phase1 = vb, phase2 = kb, phase3 = ex, phase4 = pe)
phase1 = rep(1,16/blockSize)
phase2 = rep(2,240/blockSize)
phase3 = rep(3,16/blockSize)
phase = rep(c(phase1,phase2,phase3),numGroup)
trainData_mbg$phase = as.factor(phase)
trainData_mbg = arrange.vars(trainData_mbg, c("phase"= 3))
trainData_mbg$block = as.numeric(trainData_mbg$block) # make block numeric
```
## Plot individual P Data
```{r Plot individual data and color by uptrial vs. downtrial. Consider this for determining mirror/iso}
#Data = subset(trainData, trainData$subjectID == 4)
#exposure_phase = 17:56
#ggplot(data = Data[exposure_phase,], aes(x = trial, y = ide.bc, color = target_theta))+
# geom_point()+
# geom_smooth()
```
## Plot grouped, baseline corrected data POST-EXPOSURE ONLY
```{r Plot ide.bc}
pd = position_dodge(width = 0.4)
block_ex = subset(trainData_mbg, trainData_mbg$phase==3)$block
group_ex = subset(trainData_mbg, trainData_mbg$phase==3)$group
ggplot(data = subset(trainData_mbg, trainData_mbg$phase==3), aes(x = block, y = ide.bc_mean, group = group_ex))+
geom_point(aes(color = group_ex), position = pd)+
geom_errorbar(data = subset(trainData_mbg, trainData_mbg$phase==3), aes(x = block, ymin = ide.bc_mean-ide.bc_std.error, ymax = ide.bc_mean+ide.bc_std.error, color = group),
position = pd, width = 0)+
geom_line(aes(color = group_ex), position = pd)
```
```{r Plot rmse.bc}
pd = position_dodge(width = 0.4)
block_ex = subset(trainData_mbg, trainData_mbg$phase==3)$block
group_ex = subset(trainData_mbg, trainData_mbg$phase==3)$group
ggplot(data = subset(trainData_mbg, trainData_mbg$phase==3), aes(x = block, y = rmse.bc_mean, group = group_ex))+
geom_point(aes(color = group_ex), position = pd)+
geom_errorbar(data = subset(trainData_mbg, trainData_mbg$phase==3), aes(x = block, ymin = rmse.bc_mean-rmse.bc_std.error, ymax = rmse.bc_mean+rmse.bc_std.error, color = group),
position = pd, width = 0)+
geom_line(aes(color = group_ex), position = pd)
```
## Stats
```{r t-test during post-exposure}
t.test(subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(1))$ide.bc, subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(2))$ide.bc, paired = FALSE)
t.test(subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(1))$rmse.bc, subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(2))$rmse.bc, paired = FALSE)
t.test(subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(1))$mov_int.bc, subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(2))$mov_int.bc, paired = FALSE)
t.test(subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(1))$MT_c, subset(trainData_mbb, trainData_mbb$block %in% c(65:68) & trainData_mbb$group %in% c(2))$MT_c, paired = FALSE)
```