-
Notifications
You must be signed in to change notification settings - Fork 0
/
pps_exist_KNZ_CommonAndPseudo_bf.R
259 lines (214 loc) · 7.64 KB
/
pps_exist_KNZ_CommonAndPseudo_bf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#This script looks for Pearson-preserving surrogates for a dataset
#
#The script is currently written to work on the KNZ dataset, with
#non-common species lumped into one pseudo-species. To change the
#dataset on which the script operates, change the first section,
#then the rest should run, perhaps with a bit of hand-holding.
#
#Dan Reuman
#2019 04 12
rm(list=ls())
#***Change this section for different data and other settings
#**Data
#Importing the dataset matrix of years by species
d<-readRDS("./Results/knz_results/skewness_results/ts_knz_CP.RDS")# common species, all other combined as a pseudospecies
#**Settings
p<-seq(from=-1,to=1,by=0.1)
numreps<-250
resloc<-"./Results/Results_pps_exist_KNZ_CommonAndPseudo/"
theseed1<-103
theseed2<-1001
numpd<-100000
numsimsforcheck<-1000
#***Code imports, packages, and other setup
library(matrixcalc)
library(mvtnorm)
source("copmap.R")
source("getinv.R")
source("getinvrg.R")
source("alignranks.R")
if (!dir.exists(resloc))
{
dir.create(resloc)
}
#***For each pair of species, find the preimage of the corelation
#under the map defined by copmap.R
set.seed(theseed1)
numsp<-dim(d)[2]
#receptacle for results, the preimage in allparms[,,2], the range
#specified by the other values
allparms<-array(NA,c(numsp,numsp,3))
#loop through each pair of species
for (c1 in 1:(numsp-1))
{
print(paste0("c1: ",c1))
for (c2 in (c1+1):numsp)
{
#get the time series for these two species
x<-d[,c1]
y<-d[,c2]
#get the (stochastic) map
thisres<-copmap(x,y,p,numreps,"cor")
#find the inverse range
allparms[c1,c2,c(1,3)]<-getinvrg(p,thisres,cor(x,y))
#find the actual preimage
allparms[c1,c2,2]<-getinv(p,thisres,cor(x,y),"median")
#make and save a plot
mn<-apply(FUN=mean,MARGIN=2,X=thisres)
quants<-apply(FUN=quantile,MARGIN=2,X=thisres,probs=c(.25,.5,.75))
pdf(paste0(resloc,"mapplot_",c1,"_",c2,".pdf"))
plot(p,mn,type='l',ylim=range(mn,quants))
lines(p,quants[2,],type='l',lty="dashed")
lines(p,quants[1,],type='l',lty="dotted")
lines(p,quants[3,],type='l',lty="dotted")
lines(range(p),rep(cor(x,y),2))
lines(rep(allparms[c1,c2,1],2),c(-1,1))
lines(rep(allparms[c1,c2,2],2),c(-1,1))
lines(rep(allparms[c1,c2,3],2),c(-1,1))
dev.off()
}
}
#make sure the preimage is within the preimage range in all cases,
#and you got a sensible range and a single-point preimage in all cases
sum(allparms[,,1]<allparms[,,2],na.rm=TRUE)
sum(allparms[,,2]<allparms[,,3],na.rm=TRUE)
(numsp^2-numsp)/2
#***Now look for positive definite
#First check to see if we got lucky and the preimage itself is pos def
m1<-allparms[,,2]
m1[is.na(m1)]<-0
m1<-m1+t(m1)
diag(m1)<-1
m2<-p2P(P2p(m1))
sum(m1==m2)
prod(dim(m1))
is.positive.semi.definite(m1)
eigen(m1)$values
#If m1 is pos semi def, we can skip some of the next steps
#now use nearPD and see if you get a result between hlo and hhi
hlo<-P2p(t(allparms[,,1]))
hpre<-P2p(t(allparms[,,2]))
hhi<-P2p(t(allparms[,,3]))
hmid<-(hlo+hhi)/2
bestmatNPD<-matrix(as.numeric(Matrix::nearPD(m2,corr=TRUE)$mat),numsp,numsp)
bestmatNPDvect<-P2p(bestmatNPD)
sum(bestmatNPDvect<=hhi & bestmatNPDvect>=hlo)
length(bestmatNPDvect)
inds<-which(bestmatNPDvect>hhi | bestmatNPDvect<hlo)
cbind(hlo[inds],bestmatNPDvect[inds],hhi[inds])
#so the nearPD matrix is outside the bounds given by hlo and hhi for
#a couple of the coords, so I won't bother with it, but there are only two
#bad coords and they are not that bad so I may be able to make it work
#somehow
#prepare to search the space of parameters desribed by allparms for
#pos def
set.seed(theseed2)
allposdef<-matrix(NA,numpd,length(hlo)+1)
rowcount<-1
fn<-function(h)
{
res<-min(eigen(p2P(h))$values)
if (res>0)
{
allposdef[rowcount,]<<-c(h,res)
rowcount<<-rowcount+1 #note the use of global variables, watch out!
print("Found one!")
}
return(res)
}
#The below optimizations will crash out once they have found
#numpd positive definite results. This is intentional.
#do a search starting with the preimage itself
optim(hpre,fn,method="L-BFGS-B",lower=hlo,upper=hhi,
control=list(fnscale=-1))
#do a search starting from the midpoint of the preimage range
optim(hmid,fn,method="L-BFGS-B",lower=hlo,upper=hhi,
control=list(fnscale=-1))
#now do a bunch of searches from random start points
while (all(is.na(allposdef[numpd,])))
{
startvec<-(hhi-hlo)*runif(length(hlo))+hlo
optim(startvec,fn,method="L-BFGS-B",lower=hlo,upper=hhi,
control=list(fnscale=-1))
}
#***
saveRDS(allposdef,file=paste0(resloc,"PosDefMats.RDS"))
#Now find the one that is closest to hpre
allposdef<-allposdef[1:sum(!is.na(allposdef[,1])),1:(dim(allposdef)[2]-1)]
hpremat<-matrix(hpre,dim(allposdef)[1],length(hpre),byrow = TRUE)
sizemat<-matrix((hhi-hlo)/2,dim(allposdef)[1],length(hlo),byrow = TRUE)
dist<-apply(X=((allposdef-hpremat)/sizemat)^2,FUN=sum,MARGIN=1)
besth<-allposdef[which(dist==min(dist)),]
sum(besth>=hlo)
sum(besth<=hhi)
length(hhi)
bestmat<-p2P(besth) #this is supposed to be my parameter matrix for a normal copula!
isSymmetric(bestmat)
is.positive.semi.definite(bestmat)
eigen(bestmat)$values
#***Now make a plot showing where in the ranges given by hlo to hhi
#the selected values sit
inds<-order(besth)
plot(1:length(besth),besth[inds],type='l',ylim=range(besth,hlo,hhi))
lines(1:length(besth),hlo[inds],type='l',lty="dashed")
lines(1:length(besth),hhi[inds],type='l',lty="dashed")
#looks pretty good
#***Now assess whether you get reasonable pearson-preserving surrogates
#using bestmat
sims<-rmvnorm((dim(d)[1])*numsimsforcheck,sigma=bestmat)
sims<-aperm(array(sims,c(dim(d)[1],numsimsforcheck,dim(d)[2])),c(1,3,2))
dim(sims)
sd<-d
for (counter in 1:numsp)
{
sd[,counter]<-sort(d[,counter])
}#need to sort the columns of d to use alignranks
remapd<-alignranks(sd,sims)
holder<-apply(FUN=cor,MARGIN=3,X=remapd)
allcors<-array(holder,c(numsp,numsp,numsimsforcheck))
gtres<-apply(FUN=sum,MARGIN=c(1,2),X=(allcors>array(cor(d),c(numsp,numsp,numsimsforcheck))))
ltres<-apply(FUN=sum,MARGIN=c(1,2),X=(allcors<array(cor(d),c(numsp,numsp,numsimsforcheck))))
eqres<-apply(FUN=sum,MARGIN=c(1,2),X=(allcors==array(cor(d),c(numsp,numsp,numsimsforcheck))))
sum(diag(eqres)==numsimsforcheck)
length(diag(eqres))
diag(eqres)<-NA
sum(eqres==0,na.rm=TRUE)
(numsp^2-numsp)
inds<-which(eqres!=0,arr.ind = TRUE)
sum(diag(gtres)==0)
sum(diag(ltres)==0)
diag(gtres)<-NA
diag(ltres)<-NA
hist(gtres)#very few of these should be outside the range 250-750,
#and if they are not outside that range you have pretty
#good surrogates
hist(ltres)#very few of these should be outside the range 250-750,
#and if they are not outside that range you have pretty
#good surrogates
saveRDS(bestmat,file=paste0(resloc,"FinalParameterMatrix.RDS"))
#This is the final result. If you generate data from a normal copula
#with this parameter matrix and then use alignranks, that will give
#ok Pearson-preserving surrogates.
#***Now produce 100000 surrogates and save for later use
sims<-rmvnorm((dim(d)[1])*100000,sigma=bestmat)
sims<-aperm(array(sims,c(dim(d)[1],100000,dim(d)[2])),c(1,3,2))
dim(sims)
sd<-d
for (counter in 1:numsp)
{
sd[,counter]<-sort(d[,counter])
}#need to sort the columns of d to use aligncall
surrogs<-alignranks(sd,sims)
saveRDS(surrogs,file=paste0(resloc,"SomeSurrogates.RDS"))
#***compute CV_com^2 for surrogs
totpop<-apply(FUN=sum,X=surrogs,MARGIN=c(1,3))
allvars<-apply(FUN=var,X=totpop,MARGIN=2)
allmnsq<-apply(FUN=function(x){(mean(x))^2},X=totpop,MARGIN=2)
allCVcomsq<-allvars/allmnsq
hist(allCVcomsq,50)
totpop<-apply(FUN=sum,X=d,MARGIN=1)
CVd<-var(totpop)/((mean(totpop))^2)
lines(rep(CVd,2),c(1,1000),col="red")
sum(allCVcomsq<CVd)/length(allCVcomsq)
#really good!
save.image(file=paste0(resloc,"WholeWorkspace.RDS"))