-
Notifications
You must be signed in to change notification settings - Fork 0
/
CAwithFactominer_lexicon.r
executable file
·65 lines (35 loc) · 1.45 KB
/
CAwithFactominer_lexicon.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
library(FactoMineR)
pdf("Results.pdf")
#read data
files <- list.files(pattern = '*.count')
files
import.list <-lapply(files, read.table, sep="\t", header=TRUE, quote = "", stringsAsFactors = FALSE)
#import.list
data <- Reduce(function(x, y) merge(x, y, all=T, by= "Word"), import.list, accumulate=F)
nr_texts <- length(files)
nr_texts_plus_one <- length(files)+1
data_rownames <- data.frame(data[,-1], row.names=data[,1])
matrix <-data.matrix(data_rownames, rownames.force = NA)
#TODO change this with smoothing algorithm 1/n
matrix[is.na(matrix)] <- 0
data[is.na(data)] <- 0
data[1:3,1:3]
pattern_ids<-data[,1:1]
rownames(data) <- pattern_ids
data[1:3,1:3]
matrix[1:3,1:3]
#nomralize matrix
matrix <- sweep(matrix, 2, colSums(matrix), FUN="/")
matrix <- scale(matrix, center=FALSE, scale=colSums(matrix))
#matrix[1:3,1:3]
matrix <- t(matrix)
#matrix[1:3,1:3]
########################################################################################################
#ANALYSIS
########################################################################################################
#Correspondence analys
cares <-CA(matrix)
plot(cares,invisible=c("col"), title="just characters")
plot(cares,selectCol="contrib 10", unselect=1, title="CA Factor Map - Contrib 10", autoLab="yes", col.row="black", col.col="grey70")
plot(cares,selectCol="contrib 50", unselect=1, title="CA Factor Map - Contrib 50", autoLab="yes", col.row="black", col.col="grey70")
dev.off()