-
Notifications
You must be signed in to change notification settings - Fork 2
/
Importing and redefining keywords to search facebook posts
67 lines (54 loc) · 2.3 KB
/
Importing and redefining keywords to search facebook posts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
### Workflow for importing and processing the keywords###
### The code was developed as part of the Ancient Identities in Modern Britain (IARH) project
### It imports and redefines the keywords that are going to be used to search Facebook posts
### Project: Ancient Identities in Modern Britain (IARH); ancientidentities.org
### Author: Marta Krzyzanska
# Set working directory
setwd("~path")
# Require xlsx package. If you have not installed it already, run install.packages() first.
require(xlsx)
###Part 1: Importing the keywords
#load the list of keywords
keywords <- read.xlsx("#Enter_file_name.xlsx",1)
#Make a separate lists of the keywords to look for in the pages and in the posts
#Substract one from the number of unique keywords if one of them is NA
pkn <- length(unique(keywords[[1]])) - 1
pageKeywords <-as.character(unique(keywords[[1]])[1:pkn])
pokn <- length(unique(keywords[[2]]))-1
postKeywords <- as.character(unique(keywords[[2]])[1:pokn])
###Part 2: Redefining keywords
#Adds spaces and interpunction with spaces to keywords that are shorter than 5 letters and to the word "Penda".
#so that the search function will not pick them up if they are not a separate word.
#Word "Penda" was included as it frequently features as part of another word.
#Word "celt" wa excluded as it is not likely to form a part of another word in English and is one of the most relavant keywords.
j=length(postKeywords)+1
k=j
i=1
while(i < k){
if ((nchar(postKeywords[i])<5 && postKeywords[i] != "Celt") || postKeywords[i] == "Penda" ){
postKeywords[j] <- paste(" ",postKeywords[i],"\\. ",sep="")
j=j+1
postKeywords[j] <- paste(" ",postKeywords[i],"\\, ",sep="")
j=j+1
postKeywords[j] <- paste(" ",postKeywords[i],"\\! ",sep="")
j=j+1
postKeywords[j] <- paste(" ",postKeywords[i],"\\? ",sep="")
j=j+1
postKeywords[j] <- paste(" ",postKeywords[i],"\\; ",sep="")
j=j+1
postKeywords[j] <- paste(" ",postKeywords[i],"\\: ",sep="")
postKeywords[i] <-paste(" ",postKeywords[i]," ",sep="")
j=j+1
i=i+1}else{i=i+1}}
######Part 2: gets the data that features the keywords
### my_posts is a list of lists of posts extracted from pages
i=1
j=length(my_posts)+1
while (i<j){
if(length(my_posts[[i]])>0){
print (i)
my_posts[[i]] <- searchPosts(my_posts[[i]],postKeywords)
my_posts[[i]] <- subset(my_posts[[i]], my_posts[[i]]$keywords != "" )
i=i+1}else{
i=i+1}
}