-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_analysis.R
116 lines (104 loc) · 4.33 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
require(dplyr)
#############
# Functions #
#############
#
# Concatenate the training and test data sets into a tidy data.frame.
#
concatenate_sets <- function(features, activities) {
#
# Reads features from the file "UCI HAR Dataset/features.txt" and filters only
# the desired feature names.
#
# Returns a data.frame with columns:
# * index: the position of that feature in the 561-feature vector
# * label: the label of that feature
#
read_features <- function() {
read.table("UCI HAR Dataset/features.txt",
col.names = c("index", "label")) %>%
filter(grepl("-(mean|std)\\(\\)", label))
}
#
# Reads the activities from file "UCI HAR Dataset/activity_labels.txt".
#
# Returns a data.frame with columns:
# * id: the activity identifier
# * label: the label of that activity
#
read_activities <- function() {
read.table("UCI HAR Dataset/activity_labels.txt",
col.names = c("id", "label"),
colClasses = c("integer", "factor"))
}
#
# Merges the files with feature vector values, activity ids and subjects
# into a single data.frame.
#
# Returns a data.frame with columns:
# * activity: the activity label
# * subject: the subject identifier
# * ...: one column per selected feature with the label as given in the
# features data.frame
#
merge_files <- function(filename_x, filename_y, filename_subjects) {
# Read the file containing the feature vector values
file_set <- read.table(
filename_x,
stringsAsFactors = FALSE)
# Reduces the data.frame to only the desired features
file_set <- file_set[,features$index]
# Rename columns with the feature labels
names(file_set) <- features$label
# Read the subject ids
file_subjects <- read.table(
filename_subjects,
stringsAsFactors = FALSE,
col.names = c("subject"))
# Read the activity ids
file_activities <- read.table(
filename_y,
col.names = c("activity"),
stringsAsFactors = FALSE)
# Combine the columns of the former data.frames
file_set <- cbind(
activity_id = file_activities$activity,
subject = file_subjects$subject,
file_set)
# Replace the activity_id column by the activity label
file_set %>%
inner_join(activities, by = c("activity_id" = "id")) %>%
select(-activity_id) %>%
rename(activity = label)
}
features <- read_features()
activities <- read_activities()
# Read training data
training_set <- merge_files("UCI HAR Dataset/train/X_train.txt",
"UCI HAR Dataset/train/y_train.txt",
"UCI HAR Dataset/train/subject_train.txt")
# Read test data
test_set <- merge_files("UCI HAR Dataset/test/X_test.txt",
"UCI HAR Dataset/test/y_test.txt",
"UCI HAR Dataset/test/subject_test.txt")
# Combine rows of the former data.frames
rbind(training_set, test_set)
}
#
# For each activity, for each subject, computes the mean of the feature vector
# values.
#
summarise_set <- function(to_summarise) {
to_summarise %>%
group_by(activity, subject) %>%
summarise_each(funs(mean))
}
#############
# MAIN PART #
#############
# Gather all data together and make them prettier
full_set <- concatenate_sets(features, activities)
# Compute the means by activity, by subject
summary_set <- summarise_set(full_set)
# Write the result in a flat file called "feature_means.txt"
write.table(summary_set, file = "feature_means.txt", row.names = FALSE)