-
Notifications
You must be signed in to change notification settings - Fork 3
/
precompute_G.py
80 lines (66 loc) · 1.88 KB
/
precompute_G.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import random
import numpy as np
import copy
import torch
import sys
import torch.optim as optim
import pprint as pp
import pandas as pd
import utils.hg_ops as hgo
import pickle as pkl
from config import get_config
import getopt
if len(sys.argv) < 2:
sys.exit("Use: python precompute_G.py -c config.SHINE.yaml")
opts, extraparams = getopt.getopt(sys.argv[1:], 's:c:',
['seed=', 'config='])
for o,p in opts:
if o in ['-s', '--seed']:
seed = int(p)
if o in ['-c', '--config']:
fncfg = p
cfg = get_config(fncfg)
ddn = cfg['data_root']
fn_H = cfg['fn_H']
fn_G = cfg['fn_G']
fn_m = cfg['fn_m']
fn_train = cfg['fn_train']
dataset = cfg['on_dataset']
use_subj_edge = cfg['use_subj_edge']
tfidf_H = cfg['tfidf_H']
if 'transductive' in cfg:
transductive = cfg['transductive']
else:
transductive = False
train_idx = pd.read_csv(fn_train, header=None)[0]
f = open(fn_m, 'rb')
if dataset == 'MC3':
[m, cf, y] = pkl.load(f)
elif dataset == 'disgenet':
[m, y] = pkl.load(f)
else:
sys.exit(f'unrecognized dataset {dataset}')
f.close()
print(f'Computing G...')
if transductive:
if dataset == 'MC3':
H, gene_idx, subj_idx, cf, y, yuniques = hgo.construct_Hexp(fn_H, fn_m, dataset)
elif dataset == 'disgenet':
H, gene_idx, subj_idx, y, yuniques = hgo.construct_Hexp(fn_H, fn_m, dataset)
elif use_subj_edge:
H, pathway_idx, subj_idx = hgo.construct_Hexp_inductive(fn_H, m.iloc[train_idx])
else:
H = hgo.construct_H(fn_H, tfidf = tfidf_H)
G = hgo.generate_G_from_H(H)
f = open(fn_G, 'wb')
if transductive:
if dataset == 'MC3':
pkl.dump([G, gene_idx, subj_idx, cf, y, yuniques], f, -1)
elif dataset == 'disgenet':
pkl.dump([G, gene_idx, subj_idx, y, yuniques], f, -1)
elif use_subj_edge:
pkl.dump([G, pathway_idx, subj_idx], f, -1)
else:
pkl.dump([G], f, -1)
f.close()