-
Notifications
You must be signed in to change notification settings - Fork 0
/
final_report_analysis.Rmd
269 lines (222 loc) · 10 KB
/
final_report_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
---
title: "Midterm-Paper"
author: "Team 4"
date: "2023-06-08"
output:
html_document:
df_print: paged
---
```{r hidden=TRUE}
setwd(".../GitHub/statistics_healthcare")
```
H1_sit: μ(sit_phone) ≠ μ(sit_no_phone)
H0_sit: μ(sit_phone) = μ(sit_no_phone)
H1_walk: μ(walk_phone) < μ(walk_no_phone)
H0_walk: μ(walk_phone) >= μ(walk_no_phone)
## Data Preparation
```{r}
# Prepare dataframe for analysis
testing_dat <- data.frame(
Id=integer(),
SitP=double(),
SitNP=double(),
WalkP=double(),
WalkNP=double(),
stringsAsFactors=FALSE
)
for (index in c(1:15)){
# Imported preprocessed data
dat <- read.csv(sprintf("Data_clean_15/log_p%s.txt", index), sep="|")
# Acceleration
dat$acceleration <- sqrt(dat$x^2 + dat$y^2 + dat$z^2)
# "Stratify" data
sitting_phone <- dat[dat$statusId == 2, ] # this is the id for sitting on phone
sitting_no_phone <- dat[dat$statusId == 1, ] # this is the id for sitting
walking_phone <- dat[dat$statusId == 4, ] # this is the id for walking on phone
walking_no_phone <- dat[dat$statusId == 3, ] # this is the id for walking
# We will minimize the effects of starting and ending phase on the statistical analysis by
# eliminating the first and latter 2500 timestamps:
sitting_phone <- sitting_phone[2500:(nrow(sitting_phone)-2500),]
sitting_no_phone <- sitting_no_phone[2500:(nrow(sitting_no_phone)-2500),]
walking_phone <- walking_phone[2500:(nrow(walking_phone)-2500),]
walking_no_phone <- walking_no_phone[2500:(nrow(walking_no_phone)-2500),]
# Normalize timestamps to beginnings of recordings
min_sp <- min(sitting_phone$timestamp)
min_snp <- min(sitting_no_phone$timestamp)
min_wp <- min(walking_phone$timestamp)
min_wnp <- min(walking_no_phone$timestamp)
sitting_phone$timestamp <- (sitting_phone$timestamp - min_sp)
sitting_no_phone$timestamp <- (sitting_no_phone$timestamp - min_snp)
walking_phone$timestamp <- (walking_phone$timestamp - min_wp)
walking_no_phone$timestamp <- (walking_no_phone$timestamp - min_wnp)
# Plot sitting data
{plot(sitting_phone$timestamp/1000, sitting_phone$acceleration,type = "l",col = "red",xlab = "Timestamp [s]",xlim = c(0, 55), ylim=c(0,30),ylab = "Acceleration magnitude sitting [m/s^2]")
lines(sitting_no_phone$timestamp/1000, sitting_no_phone$acceleration,col = "blue")
legend("topleft", legend=c("on phone", "off phone"),col=c("red", "blue"), lty=1, cex=0.8)}
# Plot walking phone data
{plot(walking_phone$timestamp/1000, walking_phone$acceleration,type = "l",col = 2,xlab = "Timestamp [s]",xlim = c(0, 55), ylim=c(0,30),ylab = "Acceleration magnitude walking [m/s^2]")
lines(walking_no_phone$timestamp/1000, walking_no_phone$acceleration,col = 4)
legend("topleft", legend=c("on phone", "off phone"),col=c("red", "blue"), lty=1:2, cex=0.8)}
# Add entry for this specific participant to testing data
testing_dat[nrow(testing_dat) + 1,] = c(
index,
mean(sitting_phone$acceleration),
mean(sitting_no_phone$acceleration),
mean(walking_phone$acceleration),
mean(walking_no_phone$acceleration)
)
}
```
## Normal Distribution Check for 15 Participants - Histograms
```{r}
{par(mfrow=c(2,2))
# Histogram Sit with phone
hist(testing_dat$SitP, breaks = seq(min(testing_dat$SitP), max(testing_dat$SitP), length.out = 30), ylim = c(1, 8),main = "Histogram of Sitting with Phone", xlab = "Mean Acceleration Magnitude [m/s^2]", ylab = "Frequency", xlim = c(9.5, 13), col = "black")
# Histogram Sit without phone
hist(testing_dat$SitNP, breaks = seq(min(testing_dat$SitNP), max(testing_dat$SitNP), length.out = 10), ylim = c(1, 8),main = "Histogram of Sitting without Phone", xlab = "Mean Acceleration Magnitude [m/s^2]", ylab = "Frequency", xlim = c(9.5, 13), col = "black")
# Histogram Walk with phone
hist(testing_dat$WalkP, breaks = seq(min(testing_dat$WalkP), max(testing_dat$WalkP), length.out = 10), ylim = c(1, 8),main = "Histogram of Walking with Phone", xlab = "Mean Acceleration Magnitude [m/s^2]", ylab = "Frequency", xlim = c(9.5, 13), col = "blue")
#Histogram Walk without phone
hist(testing_dat$WalkNP, breaks = seq(min(testing_dat$WalkNP), max(testing_dat$WalkNP),length.out =10), ylim = c(1, 8),main = "Histogram of Walking without Phone", xlab = "Mean Acceleration Magnitude [m/s^2]", ylab = "Frequency", xlim = c(9.5, 13), col = "blue")}
# Use Shapiro-Wilk test to see if they might be normally distributed
shapiro.test(testing_dat$SitP)
shapiro.test(testing_dat$SitNP)
shapiro.test(testing_dat$WalkP)
shapiro.test(testing_dat$WalkNP)
```
## Normal Distribution Check for 15 Participants - Empirical Cumulative Distribution Function
```{r}
# ECDF Sit with phone
n <- 15
plot(sort(testing_dat$SitP), (1:n)/n, type="s", ylim=c(0,1), xlim=c(9.5,12))
# ECDF Sit without phone
plot(sort(testing_dat$SitNP), (1:n)/n, type="s", ylim=c(0,1), xlim=c(9.5,12))
# ECDF Walk with phone
plot(sort(testing_dat$WalkP), (1:n)/n, type="s", ylim=c(0,1), xlim=c(9.5,12))
# ECDF Walk without phone
plot(sort(testing_dat$WalkNP), (1:n)/n, type="s", ylim=c(0,1), xlim=c(9.5,12))
```
## Testing Wilcox
Hypotheses:
> When sitting, people induce different movement in their phone while using it than while not using it.
> When walking, people induce less movement in their phone while using it than while not using it.
```{r}
# For none of the categories we can really assume normality of accelerations
# from this small data. This very likely might change when the experiment is
# conducted with more people
# But for now this means we use Mann-Whitney test
# Test first hypothesis: Different movement sitting while using than not
wilcox.test(testing_dat$SitP, testing_dat$SitNP, alternative = "two.sided",paired=TRUE)
# We can reject Null-Hypothesis, and confirm there is substantially different
# movement in the phone while using it sitting than not using it sitting
wilcox.test(testing_dat$WalkP, testing_dat$WalkNP, alternative="less",paired=TRUE)
# We can reject Null-Hypothesis, and confirm there is substantially higher
# movement in the phone while using it walking than not using it walking
```
## Testing T-test
```{r}
SitP.mean <- mean(testing_dat$SitP)
SitNP.mean <- mean(testing_dat$SitNP)
WalkP.mean <- mean(testing_dat$WalkP)
WalkNP.mean <- mean(testing_dat$WalkNP)
SitNP.mean - SitP.mean
n <- 15
sqrt(n)*(SitNP.mean - SitP.mean)/sd(testing_dat$SitNP - testing_dat$SitP)
# Reject H0_sit => Accept H1_sit => μ(sit_phone) ≠ μ(sit_no_phone)
t.test(testing_dat$SitNP, testing_dat$SitP,alternative="two.sided", paired=TRUE)
# Reject H0_walk => Accept H1_walk => μ(walk_phone) < μ(walk_no_phone)
t.test(testing_dat$WalkP, testing_dat$WalkNP,alternative="less", paired=TRUE)
```
# Variance Tests - split data to independent participants
```{r}
# 7 participants
SitP_7p <- testing_dat$SitP[1:7]
# Next 7 participants
SitNP_7p <- testing_dat$SitNP[8:14]
# 7 participants
WalkP_7p <- testing_dat$WalkP[1:7]
# Next 7 participants
WalkNP_7p <- testing_dat$WalkNP[8:14]
var(SitP_7p)
var(SitNP_7p)
var(WalkP_7p)
var(WalkNP_7p)
boxplot(SitP_7p, SitNP_7p,names=c("SitP_7p", "SitNP_7p"))
boxplot(SitP_7p, SitNP_7p,names=c("SitP_7p", "SitNP_7p"), ylim=c(9.5,12.5))
boxplot(WalkP_7p, WalkNP_7p,names=c("WalkP_7p", "WalkNP_7p"), ylim=c(9.5,12.5))
#Unequal variance Var(SitP_7p)!=Var(SitNP_7p) && Var(WalkP_7p)!=Var(WalkNP_7p)
```
## F-Test
```{r}
# H1_sit_independent: var(sit_phone) ≠ var(sit_no_phone)
var(SitP_7p) / var(SitNP_7p)
var.test(SitP_7p, SitNP_7p)
# H1_walk_independent: var(walk_phone) ≠var(walk_no_phone)
var(WalkP_7p) / var(WalkNP_7p)
var.test(WalkP_7p, WalkNP_7p)
```
## Prepare Data Frames for Upcoming Tests
```{r}
# Prepare data frames special format needed for this test
#Sit
SitNP_P_7p <- data.frame(mag=c(SitP_7p,SitNP_7p), group=factor(c(rep("Sit_P", 7), rep("Sit_NP",7))))
#walk
WalkNP_P_7p <- data.frame(mag=c(WalkP_7p,WalkNP_7p), group=factor(c(rep("Walk_P", 7), rep("Walk_NP",7))))
```
## Levene’s Test
```{r}
#load libraries
#install.packages("car")
library(car)
#center = mean
leveneTest(mag ~ group, data=SitNP_P_7p,center=mean)
leveneTest(mag ~ group, data=WalkNP_P_7p,center=mean)
#center = median
leveneTest(mag ~ group, data=SitNP_P_7p,center=median)
leveneTest(mag ~ group, data=WalkNP_P_7p,center=median)
```
## Fligner-Killeen test
```{r}
fligner.test(mag ~ group, data=SitNP_P_7p)
fligner.test(mag ~ group, data=WalkNP_P_7p)
```
## Power
```{r}
# Calculate the differences between the two modes sitting
differences_sit <- testing_dat$SitP - testing_dat$SitNP
differences_walk <- testing_dat$WalkNP -testing_dat$WalkP
# Compute the standard deviation of the differences
sd_differences_sit <- sd(differences_sit)
sd_differences_walk <- sd(differences_walk)
# Calculate the mean difference
mean_difference_sit <- mean(differences_sit)
mean_difference_walk <- mean(differences_walk)
# data and sd from feasibility study
power.t.test(delta=mean_difference_sit, sd=sd_differences_sit*sqrt(2), sig.level = 0.05,power=0.80, type="paired", alternative="two.sided")
power.t.test(delta=mean_difference_walk, sd=sd_differences_walk*sqrt(2), sig.level = 0.05,power=0.80, type="paired", alternative="one.sided")
# data and sd from feasibility study
power.t.test(delta=mean_difference_sit, sd=sd_differences_sit*sqrt(2), sig.level = 0.01,power=0.80, type="paired", alternative="two.sided")
power.t.test(delta=mean_difference_walk, sd=sd_differences_walk*sqrt(2), sig.level = 0.01,power=0.80, type="paired", alternative="one.sided")
```
## Binomial Test
```{r}
par(mfrow=c(1,2))
plot(testing_dat$WalkP, type="h", main="WalkingP",ylim=c(9.8,10),xlim=c(0,15), ylab = "Acceleration Magnitude", xlab = "Participant")
sum(testing_dat$WalkP > 10)
binom.test(3, 15, 0.1)
```
## Two proportions
```{r}
par(mfrow=c(1,2))
plot(WalkP_7p, type="h", main="WalkingP",ylim=c(9.8,10),xlim=c(0,15), ylab = "Acceleration Magnitude", xlab = "Participant")
plot(WalkNP_7p, type="h", main="WalkingNP",ylim=c(9.8,10),xlim=c(0,15), ylab = "Acceleration Magnitude", xlab = "Participant")
par(mfrow=c(1,2))
count1 = sum(WalkP_7p > 9.888)
count2 = sum(WalkNP_7p > 9.888)
positive <- c(count1, count2)
total <- c(7, 7)
# We compute the proportions for Group 1 and Group 2
positive/total
# Test
prop.test(positive, total)
```