-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine_tri_data.R
63 lines (44 loc) · 1.33 KB
/
combine_tri_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# combine_tri_data
# takes original trinomial CSV file in and combines it with the split trinomial CSV file generated
# separate_trinomial.py. Removes non-trinomials.
# Grab the command line arguments
args <- commandArgs(TRUE)
# Original file is first arg, split trinomials are the second, new data is third.
originalfile <- args[1]
splitfile <- args[2]
newfile <- args[3]
# Read in the original.
original.set <- read.csv(original.set)
split.set <- read.csv(splitfile)
# Make a new data frame.
attach(original.set)
attach(split.set)
(tri_data_split <- data.frame(
title = Title,
year = Year,
volnum = Volume.Number,
issuenum = Issue.Number,
pagenum = Page.Number,
primauth = Primary.Author.s.Last.Name,
tri_instance = Trinomial.Instance,
st = state,
ct = county,
site = row,
jstor = JSTOR.LINK,
ststring = State,
ctstring = County,
comm = Comments,
avoid = Avoid..H.M.L.
))
# Add full combined trinomial back to the data set.
attach(tri_data_split)
tri_data_split$fulltri <- paste(st, ct, site, sep="")
detach(tri_data_split)
detach(original.set)
detach(split.set)
# filter out site codes that take the format of ST's but are in fact not.
attach(tri_data_split)
tri_cleaned <- subset(tri_data_split, ct != "NA", ct != "ML")
detach(tri_data_split)
# write cleaned version to CSV
write.csv(tri_cleaned, newfile)