forked from trasapong/R
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Rbasics-2-1-titanic.R
70 lines (50 loc) · 1.49 KB
/
Rbasics-2-1-titanic.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Rbasics-2-1-titanic.R
# Titanic Data
# https://WWW.kaggle.com/c/titanic
# or search "titanic dataset csv download"
# see data dict on the page
# read in the data
# can use GUI to import data
getwd()
# Note: windows system path : C:\Users\trasa\Google Drive\work\Courses\R_teaching_resources\R_code
# change \ to / or //
setwd("C:/Users/trasa/Google Drive/work/Courses/R_teaching_resources/R_code")
# or
# setwd("C:\Users\trasa\Google Drive\work\Courses\R_teaching_resources\R_code")
getwd()
dir()
files <- dir()
files
?read.csv
titanic <- read.csv("titanic.csv")
View(titanic)
summary(titanic)
str(titanic)
titanic$Sex <- as.factor(titanic$Sex)
titanic$Embarked <- as.factor(titanic$Embarked)
str(titanic)
summary(titanic)
head(titanic)
head(titanic,3)
tail(titanic)
# create new vars
titanic$SurvivedLabel <- ifelse(titanic$Survived == 1, "Survived","Died")
str(titanic)
titanic$SurvivedLabel <- as.factor(titanic$SurvivedLabel)
str(titanic)
titanic$familySize <- 1 + titanic$SibSp + titanic$Parch
str(titanic)
# basic questions
summary(titanic$Fare)
hist(titanic$Fare)
aggregate(Survived~Sex,FUN=mean,data=titanic)
aggregate(Survived~Sex+Pclass,FUN=mean,data=titanic)
# filtering
male <- titanic[titanic$Sex == "male",]
library(ggplot2)
ggplot(titanic, aes(x=familySize, fill = SurvivedLabel)) +
theme_bw() +
facet_wrap(Sex ~ Pclass) +
geom_histogram(binwidth = 1)
# try copy above command, change familySize -> Fare
#################################################################################