From df30c67723dd16c0153e409befdc53a98fd20d6c Mon Sep 17 00:00:00 2001 From: Sourcery AI <> Date: Sat, 11 Jun 2022 08:15:50 +0000 Subject: [PATCH] 'Refactored by Sourcery' --- apps/kg/dataloader/KGDataset.py | 71 +++++++-------- apps/kg/dataloader/sampler.py | 31 +++---- apps/kg/eval.py | 19 ++-- apps/kg/kvserver.py | 28 +++--- apps/kg/models/general_models.py | 33 +++---- apps/kg/models/mxnet/score_fun.py | 10 +-- apps/kg/models/mxnet/tensor_models.py | 4 +- apps/kg/models/pytorch/score_fun.py | 10 +-- apps/kg/models/pytorch/tensor_models.py | 12 ++- apps/kg/partition.py | 49 +++++------ apps/kg/tests/test_score.py | 19 ++-- apps/kg/train.py | 4 +- apps/kg/train_mxnet.py | 16 ++-- apps/kg/train_pytorch.py | 42 ++++----- apps/kg/utils.py | 6 +- .../benchmarks/api/bench_add_self_loop.py | 6 +- benchmarks/benchmarks/api/bench_batch.py | 8 +- .../api/bench_builtin_apply_edges.py | 6 +- .../api/bench_builtin_apply_edges_hetero.py | 11 ++- .../api/bench_builtin_multi_update_all.py | 17 ++-- .../api/bench_builtin_update_all_coo.py | 2 +- .../api/bench_builtin_update_all_csc.py | 6 +- benchmarks/benchmarks/api/bench_edge_ids.py | 4 +- .../benchmarks/api/bench_edge_subgraph.py | 6 +- benchmarks/benchmarks/api/bench_find_edges.py | 4 +- .../benchmarks/api/bench_format_conversion.py | 2 +- .../api/bench_heterograph_construction.py | 6 +- .../api/bench_homograph_edge_construction.py | 2 +- .../api/bench_homograph_scipy_construction.py | 2 +- benchmarks/benchmarks/api/bench_in_degrees.py | 3 +- benchmarks/benchmarks/api/bench_in_edges.py | 3 +- .../benchmarks/api/bench_in_subgraph.py | 4 +- benchmarks/benchmarks/api/bench_khop.py | 2 +- benchmarks/benchmarks/api/bench_knn_graph.py | 4 +- .../benchmarks/api/bench_metis_partition.py | 2 +- .../benchmarks/api/bench_nn_graphconv.py | 4 +- .../api/bench_nn_heterographconv.py | 14 +-- .../benchmarks/api/bench_node_subgraph.py | 4 +- .../benchmarks/api/bench_random_walk.py | 4 +- benchmarks/benchmarks/api/bench_readout.py | 10 +-- benchmarks/benchmarks/api/bench_reverse.py | 2 +- .../benchmarks/api/bench_sample_neighbors.py | 4 +- benchmarks/benchmarks/api/bench_to_block.py | 2 +- .../benchmarks/api/bench_udf_apply_edges.py | 2 +- .../api/bench_udf_multi_update_all.py | 18 ++-- .../benchmarks/api/bench_udf_update_all.py | 2 +- benchmarks/benchmarks/api/bench_unbatch.py | 6 +- .../benchmarks/kernel/bench_edgesoftmax.py | 4 +- .../benchmarks/kernel/bench_gsddmm_u_dot_v.py | 4 +- .../benchmarks/kernel/bench_gspmm_copy_u.py | 4 +- .../kernel/bench_gspmm_u_mul_e_sum.py | 4 +- benchmarks/benchmarks/model_acc/bench_gat.py | 9 +- benchmarks/benchmarks/model_acc/bench_gcn.py | 7 +- .../benchmarks/model_acc/bench_gcn_udf.py | 7 +- benchmarks/benchmarks/model_acc/bench_rgcn.py | 9 +- .../benchmarks/model_acc/bench_rgcn_ns.py | 19 ++-- benchmarks/benchmarks/model_acc/bench_sage.py | 7 +- .../benchmarks/model_acc/bench_sage_ns.py | 8 +- .../benchmarks/model_speed/bench_gat.py | 8 +- .../benchmarks/model_speed/bench_gat_ns.py | 2 +- .../benchmarks/model_speed/bench_gcn_udf.py | 6 +- .../benchmarks/model_speed/bench_pinsage.py | 6 +- .../benchmarks/model_speed/bench_rgcn.py | 12 +-- .../model_speed/bench_rgcn_hetero_ns.py | 19 ++-- .../model_speed/bench_rgcn_homogeneous_ns.py | 7 +- .../benchmarks/model_speed/bench_sage.py | 6 +- .../benchmarks/model_speed/bench_sage_ns.py | 2 +- .../model_speed/bench_sage_unsupervised_ns.py | 11 +-- .../multigpu/bench_multigpu_rgcn.py | 2 +- .../multigpu/bench_multigpu_sage.py | 7 +- benchmarks/benchmarks/multigpu/rgcn_model.py | 6 +- benchmarks/benchmarks/utils.py | 33 +++---- benchmarks/scripts/generate_excel.py | 40 +++++---- benchmarks/scripts/replace_branch.py | 14 ++- dglgo/dglgo/cli/export_cli.py | 4 +- dglgo/dglgo/cli/recipe_cli.py | 2 +- dglgo/dglgo/model/edge_encoder/bilinear.py | 3 +- dglgo/dglgo/model/edge_encoder/dot.py | 3 +- dglgo/dglgo/model/graph_encoder/pna.py | 16 ++-- dglgo/dglgo/model/node_encoder/gat.py | 7 +- dglgo/dglgo/pipeline/graphpred/gen.py | 7 +- dglgo/dglgo/pipeline/linkpred/gen.py | 7 +- dglgo/dglgo/pipeline/nodepred/gen.py | 7 +- dglgo/dglgo/pipeline/nodepred_sample/gen.py | 8 +- dglgo/dglgo/utils/factory.py | 87 ++++++------------- dglgo/tests/test_pipeline.py | 16 ++-- docs/source/conf.py | 2 +- examples/mxnet/_deprecated/gcmc/data.py | 59 +++++++------ examples/mxnet/_deprecated/gcmc/model.py | 57 +++++++----- examples/mxnet/_deprecated/gcmc/train.py | 3 +- examples/mxnet/_deprecated/gcmc/utils.py | 20 ++--- .../mxnet/_deprecated/sampling/gcn_cv_sc.py | 20 ++--- .../mxnet/_deprecated/sampling/gcn_ns_sc.py | 6 +- .../_deprecated/sampling/graphsage_cv.py | 4 +- .../sampling/multi_process_train.py | 6 +- .../_deprecated/sampling/run_store_server.py | 2 +- examples/mxnet/_deprecated/sampling/train.py | 6 +- 97 files changed, 539 insertions(+), 602 deletions(-) diff --git a/apps/kg/dataloader/KGDataset.py b/apps/kg/dataloader/KGDataset.py index 29c77c84bd61..add6eade662e 100644 --- a/apps/kg/dataloader/KGDataset.py +++ b/apps/kg/dataloader/KGDataset.py @@ -36,7 +36,7 @@ def _download_and_extract(url, path, filename): os.makedirs(path, exist_ok=True) f_remote = requests.get(url, stream=True) sz = f_remote.headers.get('content-length') - assert f_remote.status_code == 200, 'fail to open {}'.format(url) + assert f_remote.status_code == 200, f'fail to open {url}' with open(fn, 'wb') as writer: for chunk in f_remote.iter_content(chunk_size=1024*1024): writer.write(chunk) @@ -65,8 +65,7 @@ def _parse_srd_format(format): def _file_line(path): with open(path) as f: - for i, l in enumerate(f): - pass + pass return i + 1 class KGDataset: @@ -117,7 +116,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=[0,1,2]): if path is None: return None - print('Reading {} triples....'.format(mode)) + print(f'Reading {mode} triples....') heads = [] tails = [] rels = [] @@ -134,7 +133,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=[0,1,2]): heads = np.array(heads, dtype=np.int64) tails = np.array(tails, dtype=np.int64) rels = np.array(rels, dtype=np.int64) - print('Finished. Read {} {} triples.'.format(len(heads), mode)) + print(f'Finished. Read {len(heads)} {mode} triples.') return (heads, rels, tails) @@ -164,7 +163,7 @@ def read_triple(self, path, mode): heads = [] tails = [] rels = [] - print('Reading {} triples....'.format(mode)) + print(f'Reading {mode} triples....') with open(path) as f: for line in f: h, r, t = line.strip().split('\t') @@ -175,7 +174,7 @@ def read_triple(self, path, mode): heads = np.array(heads, dtype=np.int64) tails = np.array(tails, dtype=np.int64) rels = np.array(rels, dtype=np.int64) - print('Finished. Read {} {} triples.'.format(len(heads), mode)) + print(f'Finished. Read {len(heads)} {mode} triples.') return (heads, rels, tails) @@ -195,11 +194,11 @@ class KGDatasetFB15k(KGDataset): ''' def __init__(self, path, name='FB15k'): self.name = name - url = 'https://data.dgl.ai/dataset/{}.zip'.format(name) + url = f'https://data.dgl.ai/dataset/{name}.zip' if not os.path.exists(os.path.join(path, name)): print('File not found. Downloading from', url) - _download_and_extract(url, path, name + '.zip') + _download_and_extract(url, path, f'{name}.zip') self.path = os.path.join(path, name) super(KGDatasetFB15k, self).__init__(os.path.join(self.path, 'entities.dict'), @@ -224,11 +223,11 @@ class KGDatasetFB15k237(KGDataset): ''' def __init__(self, path, name='FB15k-237'): self.name = name - url = 'https://data.dgl.ai/dataset/{}.zip'.format(name) + url = f'https://data.dgl.ai/dataset/{name}.zip' if not os.path.exists(os.path.join(path, name)): print('File not found. Downloading from', url) - _download_and_extract(url, path, name + '.zip') + _download_and_extract(url, path, f'{name}.zip') self.path = os.path.join(path, name) super(KGDatasetFB15k237, self).__init__(os.path.join(self.path, 'entities.dict'), @@ -253,11 +252,11 @@ class KGDatasetWN18(KGDataset): ''' def __init__(self, path, name='wn18'): self.name = name - url = 'https://data.dgl.ai/dataset/{}.zip'.format(name) + url = f'https://data.dgl.ai/dataset/{name}.zip' if not os.path.exists(os.path.join(path, name)): print('File not found. Downloading from', url) - _download_and_extract(url, path, name + '.zip') + _download_and_extract(url, path, f'{name}.zip') self.path = os.path.join(path, name) super(KGDatasetWN18, self).__init__(os.path.join(self.path, 'entities.dict'), @@ -282,11 +281,11 @@ class KGDatasetWN18rr(KGDataset): ''' def __init__(self, path, name='wn18rr'): self.name = name - url = 'https://data.dgl.ai/dataset/{}.zip'.format(name) + url = f'https://data.dgl.ai/dataset/{name}.zip' if not os.path.exists(os.path.join(path, name)): print('File not found. Downloading from', url) - _download_and_extract(url, path, name + '.zip') + _download_and_extract(url, path, f'{name}.zip') self.path = os.path.join(path, name) super(KGDatasetWN18rr, self).__init__(os.path.join(self.path, 'entities.dict'), @@ -310,11 +309,11 @@ class KGDatasetFreebase(KGDataset): ''' def __init__(self, path, name='Freebase'): self.name = name - url = 'https://data.dgl.ai/dataset/{}.zip'.format(name) + url = f'https://data.dgl.ai/dataset/{name}.zip' if not os.path.exists(os.path.join(path, name)): print('File not found. Downloading from', url) - _download_and_extract(url, path, '{}.zip'.format(name)) + _download_and_extract(url, path, f'{name}.zip') self.path = os.path.join(path, name) super(KGDatasetFreebase, self).__init__(os.path.join(self.path, 'entity2id.txt'), @@ -337,7 +336,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=None): heads = [] tails = [] rels = [] - print('Reading {} triples....'.format(mode)) + print(f'Reading {mode} triples....') with open(path) as f: if skip_first_line: _ = f.readline() @@ -350,7 +349,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=None): heads = np.array(heads, dtype=np.int64) tails = np.array(tails, dtype=np.int64) rels = np.array(rels, dtype=np.int64) - print('Finished. Read {} {} triples.'.format(len(heads), mode)) + print(f'Finished. Read {len(heads)} {mode} triples.') return (heads, rels, tails) class KGDatasetUDDRaw(KGDataset): @@ -369,8 +368,7 @@ class KGDatasetUDDRaw(KGDataset): def __init__(self, path, name, files, format): self.name = name for f in files: - assert os.path.exists(os.path.join(path, f)), \ - 'File {} now exist in {}'.format(f, path) + assert os.path.exists(os.path.join(path, f)), f'File {f} now exist in {path}' assert len(format) == 3 format = _parse_srd_format(format) @@ -437,8 +435,7 @@ class KGDatasetUDD(KGDataset): def __init__(self, path, name, files, format): self.name = name for f in files: - assert os.path.exists(os.path.join(path, f)), \ - 'File {} now exist in {}'.format(f, path) + assert os.path.exists(os.path.join(path, f)), f'File {f} now exist in {path}' format = _parse_srd_format(format) if len(files) == 3: @@ -458,14 +455,14 @@ def __init__(self, path, name, files, format): def read_entity(self, entity_path): n_entities = 0 with open(entity_path) as f_ent: - for line in f_ent: + for _ in f_ent: n_entities += 1 return None, n_entities def read_relation(self, relation_path): n_relations = 0 with open(relation_path) as f_rel: - for line in f_rel: + for _ in f_rel: n_relations += 1 return None, n_relations @@ -473,7 +470,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=[0,1,2]): heads = [] tails = [] rels = [] - print('Reading {} triples....'.format(mode)) + print(f'Reading {mode} triples....') with open(path) as f: if skip_first_line: _ = f.readline() @@ -486,7 +483,7 @@ def read_triple(self, path, mode, skip_first_line=False, format=[0,1,2]): heads = np.array(heads, dtype=np.int64) tails = np.array(tails, dtype=np.int64) rels = np.array(rels, dtype=np.int64) - print('Finished. Read {} {} triples.'.format(len(heads), mode)) + print(f'Finished. Read {len(heads)} {mode} triples.') return (heads, rels, tails) def get_dataset(data_path, data_name, format_str, files=None): @@ -502,7 +499,7 @@ def get_dataset(data_path, data_name, format_str, files=None): elif data_name == 'wn18rr': dataset = KGDatasetWN18rr(data_path) else: - assert False, "Unknown dataset {}".format(data_name) + assert False, f"Unknown dataset {data_name}" elif format_str.startswith('raw_udd'): # user defined dataset format = format_str[8:] @@ -512,13 +509,13 @@ def get_dataset(data_path, data_name, format_str, files=None): format = format_str[4:] dataset = KGDatasetUDD(data_path, data_name, files, format) else: - assert False, "Unknown format {}".format(format_str) + assert False, f"Unknown format {format_str}" return dataset def get_partition_dataset(data_path, data_name, part_id): - part_name = os.path.join(data_name, 'partition_'+str(part_id)) + part_name = os.path.join(data_name, f'partition_{str(part_id)}') path = os.path.join(data_path, part_name) if not os.path.exists(path): @@ -547,19 +544,15 @@ def get_partition_dataset(data_path, data_name, part_id): partition_book = [] with open(partition_book_path) as f: - for line in f: - partition_book.append(int(line)) - + partition_book.extend(int(line) for line in f) local_to_global = [] with open(local2global_path) as f: - for line in f: - local_to_global.append(int(line)) - + local_to_global.extend(int(line) for line in f) return dataset, partition_book, local_to_global def get_server_partition_dataset(data_path, data_name, part_id): - part_name = os.path.join(data_name, 'partition_'+str(part_id)) + part_name = os.path.join(data_name, f'partition_{str(part_id)}') path = os.path.join(data_path, part_name) if not os.path.exists(path): @@ -589,9 +582,7 @@ def get_server_partition_dataset(data_path, data_name, part_id): local_to_global = [] with open(local2global_path) as f: - for line in f: - local_to_global.append(int(line)) - + local_to_global.extend(int(line) for line in f) global_to_local = [0] * n_entities for i in range(len(local_to_global)): global_id = local_to_global[i] diff --git a/apps/kg/dataloader/sampler.py b/apps/kg/dataloader/sampler.py index c125f3ac49b9..2b4ebd74ecc4 100644 --- a/apps/kg/dataloader/sampler.py +++ b/apps/kg/dataloader/sampler.py @@ -64,7 +64,7 @@ def SoftRelationPartition(edges, n, threshold=0.05): Whether there exists some relations belongs to multiple partitions """ heads, rels, tails = edges - print('relation partition {} edges into {} parts'.format(len(heads), n)) + print(f'relation partition {len(heads)} edges into {n} parts') uniq, cnts = np.unique(rels, return_counts=True) idx = np.flip(np.argsort(cnts)) cnts = cnts[idx] @@ -73,16 +73,12 @@ def SoftRelationPartition(edges, n, threshold=0.05): edge_cnts = np.zeros(shape=(n,), dtype=np.int64) rel_cnts = np.zeros(shape=(n,), dtype=np.int64) rel_dict = {} - rel_parts = [] cross_rel_part = [] - for _ in range(n): - rel_parts.append([]) - + rel_parts = [[] for _ in range(n)] large_threshold = int(len(rels) * threshold) capacity_per_partition = int(len(rels) / n) # ensure any relation larger than the partition capacity will be split - large_threshold = capacity_per_partition if capacity_per_partition < large_threshold \ - else large_threshold + large_threshold = min(capacity_per_partition, large_threshold) num_cross_part = 0 for i in range(len(cnts)): cnt = cnts[i] @@ -108,8 +104,8 @@ def SoftRelationPartition(edges, n, threshold=0.05): rel_dict[r] = r_parts for i, edge_cnt in enumerate(edge_cnts): - print('part {} has {} edges and {} relations'.format(i, edge_cnt, rel_cnts[i])) - print('{}/{} duplicated relation across partitions'.format(num_cross_part, len(cnts))) + print(f'part {i} has {edge_cnt} edges and {rel_cnts[i]} relations') + print(f'{num_cross_part}/{len(cnts)} duplicated relation across partitions') parts = [] for i in range(n): @@ -171,7 +167,7 @@ def BalancedRelationPartition(edges, n): Whether there exists some relations belongs to multiple partitions """ heads, rels, tails = edges - print('relation partition {} edges into {} parts'.format(len(heads), n)) + print(f'relation partition {len(heads)} edges into {n} parts') uniq, cnts = np.unique(rels, return_counts=True) idx = np.flip(np.argsort(cnts)) cnts = cnts[idx] @@ -180,10 +176,7 @@ def BalancedRelationPartition(edges, n): edge_cnts = np.zeros(shape=(n,), dtype=np.int64) rel_cnts = np.zeros(shape=(n,), dtype=np.int64) rel_dict = {} - rel_parts = [] - for _ in range(n): - rel_parts.append([]) - + rel_parts = [[] for _ in range(n)] max_edges = (len(rels) // n) + 1 num_cross_part = 0 for i in range(len(cnts)): @@ -210,8 +203,8 @@ def BalancedRelationPartition(edges, n): rel_dict[r] = r_parts for i, edge_cnt in enumerate(edge_cnts): - print('part {} has {} edges and {} relations'.format(i, edge_cnt, rel_cnts[i])) - print('{}/{} duplicated relation across partitions'.format(num_cross_part, len(cnts))) + print(f'part {i} has {edge_cnt} edges and {rel_cnts[i]} relations') + print(f'{num_cross_part}/{len(cnts)} duplicated relation across partitions') parts = [] for i in range(n): @@ -259,7 +252,7 @@ def RandomPartition(edges, n): Edges of each partition """ heads, rels, tails = edges - print('random partition {} edges into {} parts'.format(len(heads), n)) + print(f'random partition {len(heads)} edges into {n} parts') idx = np.random.permutation(len(heads)) heads[:] = heads[idx] rels[:] = rels[idx] @@ -271,7 +264,7 @@ def RandomPartition(edges, n): start = part_size * i end = min(part_size * (i + 1), len(idx)) parts.append(idx[start:end]) - print('part {} has {} edges'.format(i, len(parts[-1]))) + print(f'part {i} has {len(parts[-1])} edges') return parts def ConstructGraph(edges, n_entities, args): @@ -624,7 +617,7 @@ def get_edges(self, eval_type): elif eval_type == 'test': return self.test else: - raise Exception('get invalid type: ' + eval_type) + raise Exception(f'get invalid type: {eval_type}') def create_sampler(self, eval_type, batch_size, neg_sample_size, neg_chunk_size, filter_false_neg, mode='head', num_workers=32, rank=0, ranks=1): diff --git a/apps/kg/eval.py b/apps/kg/eval.py index 1daa0d79b330..1be14965ca82 100644 --- a/apps/kg/eval.py +++ b/apps/kg/eval.py @@ -85,12 +85,11 @@ def __init__(self): help='number of thread used') def parse_args(self): - args = super().parse_args() - return args + return super().parse_args() def get_logger(args): if not os.path.exists(args.model_path): - raise Exception('No existing model_path: ' + args.model_path) + raise Exception(f'No existing model_path: {args.model_path}') log_file = os.path.join(args.model_path, 'eval.log') @@ -103,7 +102,7 @@ def get_logger(args): ) logger = logging.getLogger(__name__) - print("Logs are being recorded at: {}".format(log_file)) + print(f"Logs are being recorded at: {log_file}") return logger @@ -194,16 +193,18 @@ def main(args): proc.start() total_metrics = {} - metrics = {} logs = [] - for i in range(args.num_proc): + for _ in range(args.num_proc): log = queue.get() logs = logs + log - for metric in logs[0].keys(): - metrics[metric] = sum([log[metric] for log in logs]) / len(logs) + metrics = { + metric: sum(log[metric] for log in logs) / len(logs) + for metric in logs[0].keys() + } + for k, v in metrics.items(): - print('Test average {} at [{}/{}]: {}'.format(k, args.step, args.max_step, v)) + print(f'Test average {k} at [{args.step}/{args.max_step}]: {v}') for proc in procs: proc.join() diff --git a/apps/kg/kvserver.py b/apps/kg/kvserver.py index 16d836a8b292..5c85ede48bcd 100644 --- a/apps/kg/kvserver.py +++ b/apps/kg/kvserver.py @@ -38,8 +38,8 @@ class KGEServer(KVServer): def _push_handler(self, name, ID, data, target): """Row-Sparse Adagrad updater """ - original_name = name[0:-6] - state_sum = target[original_name+'_state-data-'] + original_name = name[:-6] + state_sum = target[f'{original_name}_state-data-'] grad_sum = (data * data).mean(1) state_sum.index_add_(0, ID, grad_sum) std = state_sum[ID] # _sparse_mask @@ -95,7 +95,7 @@ def __init__(self): def get_server_data(args, machine_id): - """Get data from data_path/dataset/part_machine_id + """Get data from data_path/dataset/part_machine_id Return: glocal2local, entity_emb, @@ -103,21 +103,21 @@ def get_server_data(args, machine_id): relation_emb, relation_emb_state """ - g2l, dataset = get_server_partition_dataset( - args.data_path, - args.dataset, - machine_id) + g2l, dataset = get_server_partition_dataset( + args.data_path, + args.dataset, + machine_id) - # Note that the dataset doesn't ccontain the triple - print('n_entities: ' + str(dataset.n_entities)) - print('n_relations: ' + str(dataset.n_relations)) + # Note that the dataset doesn't ccontain the triple + print(f'n_entities: {str(dataset.n_entities)}') + print(f'n_relations: {str(dataset.n_relations)}') - args.soft_rel_part = False - args.strict_rel_part = False + args.soft_rel_part = False + args.strict_rel_part = False - model = load_model(None, args, dataset.n_entities, dataset.n_relations) + model = load_model(None, args, dataset.n_entities, dataset.n_relations) - return g2l, model.entity_emb.emb, model.entity_emb.state_sum, model.relation_emb.emb, model.relation_emb.state_sum + return g2l, model.entity_emb.emb, model.entity_emb.state_sum, model.relation_emb.emb, model.relation_emb.state_sum def start_server(args): diff --git a/apps/kg/models/general_models.py b/apps/kg/models/general_models.py index a9c08c138612..d5efe0e0c6d5 100644 --- a/apps/kg/models/general_models.py +++ b/apps/kg/models/general_models.py @@ -92,11 +92,7 @@ def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma, self.entity_emb = ExternalEmbedding(args, n_entities, entity_dim, F.cpu() if args.mix_cpu_gpu else device) # For RESCAL, relation_emb = relation_dim * entity_dim - if model_name == 'RESCAL': - rel_dim = relation_dim * entity_dim - else: - rel_dim = relation_dim - + rel_dim = relation_dim * entity_dim if model_name == 'RESCAL' else relation_dim self.rel_dim = rel_dim self.entity_dim = entity_dim self.strict_rel_part = args.strict_rel_part @@ -107,7 +103,7 @@ def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma, else: self.global_relation_emb = ExternalEmbedding(args, n_relations, rel_dim, F.cpu()) - if model_name == 'TransE' or model_name == 'TransE_l2': + if model_name in ['TransE', 'TransE_l2']: self.score_func = TransEScore(gamma, 'l2') elif model_name == 'TransE_l1': self.score_func = TransEScore(gamma, 'l1') @@ -126,7 +122,7 @@ def __init__(self, args, model_name, n_entities, n_relations, hidden_dim, gamma, self.score_func = RESCALScore(relation_dim, entity_dim) elif model_name == 'RotatE': self.score_func = RotatEScore(gamma, self.emb_init) - + self.model_name = model_name self.head_neg_score = self.score_func.create_neg(True) self.tail_neg_score = self.score_func.create_neg(False) @@ -157,13 +153,13 @@ def save_emb(self, path, dataset): dataset : str Dataset name as prefix to the saved embeddings. """ - self.entity_emb.save(path, dataset+'_'+self.model_name+'_entity') + self.entity_emb.save(path, f'{dataset}_{self.model_name}_entity') if self.strict_rel_part or self.soft_rel_part: - self.global_relation_emb.save(path, dataset+'_'+self.model_name+'_relation') + self.global_relation_emb.save(path, f'{dataset}_{self.model_name}_relation') else: - self.relation_emb.save(path, dataset+'_'+self.model_name+'_relation') + self.relation_emb.save(path, f'{dataset}_{self.model_name}_relation') - self.score_func.save(path, dataset+'_'+self.model_name) + self.score_func.save(path, f'{dataset}_{self.model_name}') def load_emb(self, path, dataset): """Load the model. @@ -175,9 +171,9 @@ def load_emb(self, path, dataset): dataset : str Dataset name as prefix to the saved embeddings. """ - self.entity_emb.load(path, dataset+'_'+self.model_name+'_entity') - self.relation_emb.load(path, dataset+'_'+self.model_name+'_relation') - self.score_func.load(path, dataset+'_'+self.model_name) + self.entity_emb.load(path, f'{dataset}_{self.model_name}_entity') + self.relation_emb.load(path, f'{dataset}_{self.model_name}_relation') + self.score_func.load(path, f'{dataset}_{self.model_name}') def reset_parameters(self): """Re-initialize the model. @@ -286,12 +282,11 @@ def predict_neg_score(self, pos_g, neg_g, to_device=None, gpu_id=-1, trace=False neg_score = self.tail_neg_score(head, rel, neg_tail, num_chunks, chunk_size, neg_sample_size) - if neg_deg_sample: - neg_g.neg_sample_size = neg_sample_size - mask = mask.reshape(num_chunks, chunk_size, neg_sample_size) - return neg_score * mask - else: + if not neg_deg_sample: return neg_score + neg_g.neg_sample_size = neg_sample_size + mask = mask.reshape(num_chunks, chunk_size, neg_sample_size) + return neg_score * mask def forward_test(self, pos_g, neg_g, logs, gpu_id=-1): """Do the forward and generate ranking results. diff --git a/apps/kg/models/mxnet/score_fun.py b/apps/kg/models/mxnet/score_fun.py index 51637073b758..3312ad1a4379 100644 --- a/apps/kg/models/mxnet/score_fun.py +++ b/apps/kg/models/mxnet/score_fun.py @@ -30,14 +30,12 @@ def batched_l2_dist(a, b): squared_res = nd.add(nd.linalg_gemm( a, nd.transpose(b, axes=(0, 2, 1)), nd.broadcast_axes(nd.expand_dims(b_squared, axis=-2), axis=1, size=a.shape[1]), alpha=-2 ), nd.expand_dims(a_squared, axis=-1)) - res = nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) - return res + return nd.sqrt(nd.clip(squared_res, 1e-30, np.finfo(np.float32).max)) def batched_l1_dist(a, b): a = nd.expand_dims(a, axis=-2) b = nd.expand_dims(b, axis=-3) - res = nd.norm(a - b, ord=1, axis=-1) - return res + return nd.norm(a - b, ord=1, axis=-1) class TransEScore(nn.Block): """ TransE score function @@ -200,10 +198,10 @@ def update(self, gpu_id=-1): self.projection_emb.update(gpu_id) def save(self, path, name): - self.projection_emb.save(path, name+'projection') + self.projection_emb.save(path, f'{name}projection') def load(self, path, name): - self.projection_emb.load(path, name+'projection') + self.projection_emb.load(path, f'{name}projection') def prepare_local_emb(self, projection_emb): self.global_projection_emb = self.projection_emb diff --git a/apps/kg/models/mxnet/tensor_models.py b/apps/kg/models/mxnet/tensor_models.py index 58757184bd3f..aa4d5ff4afc9 100644 --- a/apps/kg/models/mxnet/tensor_models.py +++ b/apps/kg/models/mxnet/tensor_models.py @@ -162,7 +162,7 @@ def save(self, path, name): name : str Embedding name. """ - emb_fname = os.path.join(path, name+'.npy') + emb_fname = os.path.join(path, f'{name}.npy') np.save(emb_fname, self.emb.asnumpy()) def load(self, path, name): @@ -175,5 +175,5 @@ def load(self, path, name): name : str Embedding name. """ - emb_fname = os.path.join(path, name+'.npy') + emb_fname = os.path.join(path, f'{name}.npy') self.emb = nd.array(np.load(emb_fname)) diff --git a/apps/kg/models/pytorch/score_fun.py b/apps/kg/models/pytorch/score_fun.py index bece2001f78a..27fb24e0d20a 100644 --- a/apps/kg/models/pytorch/score_fun.py +++ b/apps/kg/models/pytorch/score_fun.py @@ -30,12 +30,10 @@ def batched_l2_dist(a, b): squared_res = th.baddbmm( b_squared.unsqueeze(-2), a, b.transpose(-2, -1), alpha=-2 ).add_(a_squared.unsqueeze(-1)) - res = squared_res.clamp_min_(1e-30).sqrt_() - return res + return squared_res.clamp_min_(1e-30).sqrt_() def batched_l1_dist(a, b): - res = th.cdist(a, b, p=1) - return res + return th.cdist(a, b, p=1) class TransEScore(nn.Module): """TransE score function @@ -167,10 +165,10 @@ def update(self, gpu_id=-1): self.projection_emb.update(gpu_id) def save(self, path, name): - self.projection_emb.save(path, name+'projection') + self.projection_emb.save(path, f'{name}projection') def load(self, path, name): - self.projection_emb.load(path, name+'projection') + self.projection_emb.load(path, f'{name}projection') def prepare_local_emb(self, projection_emb): self.global_projection_emb = self.projection_emb diff --git a/apps/kg/models/pytorch/tensor_models.py b/apps/kg/models/pytorch/tensor_models.py index 3f215850128a..f9d65401ae63 100644 --- a/apps/kg/models/pytorch/tensor_models.py +++ b/apps/kg/models/pytorch/tensor_models.py @@ -39,7 +39,11 @@ logsigmoid = functional.logsigmoid def get_device(args): - return th.device('cpu') if args.gpu[0] < 0 else th.device('cuda:' + str(args.gpu[0])) + return ( + th.device('cpu') + if args.gpu[0] < 0 + else th.device(f'cuda:{str(args.gpu[0])}') + ) norm = lambda x, p: x.norm(p=p)**p get_scalar = lambda x: x.detach().item() @@ -161,7 +165,7 @@ def init(self, emb_init): def setup_cross_rels(self, cross_rels, global_emb): cpu_bitmap = th.zeros((self.num,), dtype=th.bool) - for i, rel in enumerate(cross_rels): + for rel in cross_rels: cpu_bitmap[rel] = 1 self.cpu_bitmap = cpu_bitmap self.has_cross_rel = True @@ -301,7 +305,7 @@ def save(self, path, name): name : str Embedding name. """ - file_name = os.path.join(path, name+'.npy') + file_name = os.path.join(path, f'{name}.npy') np.save(file_name, self.emb.cpu().detach().numpy()) def load(self, path, name): @@ -314,5 +318,5 @@ def load(self, path, name): name : str Embedding name. """ - file_name = os.path.join(path, name+'.npy') + file_name = os.path.join(path, f'{name}.npy') self.emb = th.Tensor(np.load(file_name)) diff --git a/apps/kg/partition.py b/apps/kg/partition.py index f6b0997e4d27..fd29763b2b65 100644 --- a/apps/kg/partition.py +++ b/apps/kg/partition.py @@ -35,24 +35,22 @@ def write_txt_graph(path, file_name, part_dict, total_nodes): if not os.path.exists(partition_path): os.mkdir(partition_path) triple_file = os.path.join(partition_path, file_name) - f = open(triple_file, 'w') - graph = part_dict[part_id] - src, dst = graph.all_edges(form='uv', order='eid') - rel = graph.edata['tid'] - assert len(src) == len(rel) - src = F.asnumpy(src) - dst = F.asnumpy(dst) - rel = F.asnumpy(rel) - for i in range(len(src)): - f.write(str(src[i])+'\t'+str(rel[i])+'\t'+str(dst[i])+'\n') - f.close() + with open(triple_file, 'w') as f: + graph = part_dict[part_id] + src, dst = graph.all_edges(form='uv', order='eid') + rel = graph.edata['tid'] + assert len(src) == len(rel) + src = F.asnumpy(src) + dst = F.asnumpy(dst) + rel = F.asnumpy(rel) + for i in range(len(src)): + f.write(str(src[i])+'\t'+str(rel[i])+'\t'+str(dst[i])+'\n') # Get local2global l2g_file = os.path.join(partition_path, 'local_to_global.txt') - f = open(l2g_file, 'w') - pid = F.asnumpy(graph.parent_nid) - for i in range(len(pid)): - f.write(str(pid[i])+'\n') - f.close() + with open(l2g_file, 'w') as f: + pid = F.asnumpy(graph.parent_nid) + for i in range(len(pid)): + f.write(str(pid[i])+'\n') # Update partition_book partition = F.asnumpy(graph.ndata['part_id']) for i in range(len(pid)): @@ -61,10 +59,9 @@ def write_txt_graph(path, file_name, part_dict, total_nodes): for part_id in part_dict: partition_path = path + str(part_id) pb_file = os.path.join(partition_path, 'partition_book.txt') - f = open(pb_file, 'w') - for i in range(len(partition_book)): - f.write(str(partition_book[i])+'\n') - f.close() + with open(pb_file, 'w') as f: + for i in range(len(partition_book)): + f.write(str(partition_book[i])+'\n') def main(): parser = argparse.ArgumentParser(description='Partition a knowledge graph') @@ -105,9 +102,10 @@ def main(): num_inner_nodes = len(np.nonzero(F.asnumpy(part.ndata['inner_node']))[0]) num_inner_edges = len(np.nonzero(F.asnumpy(part.edata['inner_edge']))[0]) - print('part {} has {} nodes and {} edges. {} nodes and {} edges are inside the partition'.format( - part_id, part.number_of_nodes(), part.number_of_edges(), - num_inner_nodes, num_inner_edges)) + print( + f'part {part_id} has {part.number_of_nodes()} nodes and {part.number_of_edges()} edges. {num_inner_nodes} nodes and {num_inner_edges} edges are inside the partition' + ) + tot_num_inner_edges += num_inner_edges part.copy_from_parent() @@ -118,8 +116,9 @@ def main(): txt_file_graph = os.path.join(txt_file_graph, 'partition_') write_txt_graph(txt_file_graph, 'train.txt', part_dict, g.number_of_nodes()) - print('there are {} edges in the graph and {} edge cuts for {} partitions.'.format( - g.number_of_edges(), g.number_of_edges() - tot_num_inner_edges, len(part_dict))) + print( + f'there are {g.number_of_edges()} edges in the graph and {g.number_of_edges() - tot_num_inner_edges} edge cuts for {len(part_dict)} partitions.' + ) if __name__ == '__main__': main() \ No newline at end of file diff --git a/apps/kg/tests/test_score.py b/apps/kg/tests/test_score.py index bd09b8a15496..252220188fea 100644 --- a/apps/kg/tests/test_score.py +++ b/apps/kg/tests/test_score.py @@ -28,17 +28,15 @@ if backend.lower() == 'mxnet': import mxnet as mx mx.random.seed(42) - np.random.seed(42) - from models.mxnet.score_fun import * from models.mxnet.tensor_models import ExternalEmbedding else: import torch as th th.manual_seed(42) - np.random.seed(42) - from models.pytorch.score_fun import * from models.pytorch.tensor_models import ExternalEmbedding +np.random.seed(42) + from models.general_models import KEModel from dataloader.sampler import create_neg_subgraph @@ -124,8 +122,10 @@ def predict_neg_score(self, pos_g, neg_g): rel = pos_g.edata['emb'] neg_head, tail = self.head_neg_prepare(pos_g.edata['id'], num_chunks, neg_head, tail, -1, False) - neg_score = self.head_neg_score(neg_head, rel, tail, - num_chunks, chunk_size, neg_sample_size) + return self.head_neg_score( + neg_head, rel, tail, num_chunks, chunk_size, neg_sample_size + ) + else: neg_tail_ids = neg_g.ndata['id'][neg_g.tail_nid] neg_tail = self.entity_emb[neg_tail_ids] @@ -134,10 +134,9 @@ def predict_neg_score(self, pos_g, neg_g): rel = pos_g.edata['emb'] head, neg_tail = self.tail_neg_prepare(pos_g.edata['id'], num_chunks, head, neg_tail, -1, False) - neg_score = self.tail_neg_score(head, rel, neg_tail, - num_chunks, chunk_size, neg_sample_size) - - return neg_score + return self.tail_neg_score( + head, rel, neg_tail, num_chunks, chunk_size, neg_sample_size + ) def check_score_func(func_name): batch_size = 10 diff --git a/apps/kg/train.py b/apps/kg/train.py index 631b133d815c..44a3e44a9fb6 100644 --- a/apps/kg/train.py +++ b/apps/kg/train.py @@ -131,7 +131,7 @@ def get_logger(args): if not os.path.exists(args.save_path): os.mkdir(args.save_path) - folder = '{}_{}_'.format(args.model_name, args.dataset) + folder = f'{args.model_name}_{args.dataset}_' n = len([x for x in os.listdir(args.save_path) if x.startswith(folder)]) folder += str(n) args.save_path = os.path.join(args.save_path, folder) @@ -149,7 +149,7 @@ def get_logger(args): ) logger = logging.getLogger(__name__) - print("Logs are being recorded at: {}".format(log_file)) + print(f"Logs are being recorded at: {log_file}") return logger diff --git a/apps/kg/train_mxnet.py b/apps/kg/train_mxnet.py index eedfa8393816..867fd4bd19bd 100644 --- a/apps/kg/train_mxnet.py +++ b/apps/kg/train_mxnet.py @@ -35,7 +35,7 @@ def load_model(logger, args, n_entities, n_relations, ckpt=None): if ckpt is not None: assert False, "We do not support loading model emb for genernal Embedding" - logger.info('Load model {}'.format(args.model_name)) + logger.info(f'Load model {args.model_name}') return model def load_model_from_checkpoint(logger, args, n_entities, n_relations, ckpt_path): @@ -60,7 +60,7 @@ def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=Non model.prepare_relation(mx.gpu(gpu_id)) start = time.time() - for step in range(0, args.max_step): + for step in range(args.max_step): pos_g, neg_g = next(train_sampler) args.step = step with mx.autograd.record(): @@ -72,7 +72,7 @@ def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=Non if step % args.log_interval == 0: for k in logs[0].keys(): v = sum(l[k] for l in logs) / len(logs) - print('[Train]({}/{}) average {}: {}'.format(step, args.max_step, k, v)) + print(f'[Train]({step}/{args.max_step}) average {k}: {v}') logs = [] print(time.time() - start) start = time.time() @@ -99,18 +99,18 @@ def test(args, model, test_samplers, rank=0, mode='Test', queue=None): if args.strict_rel_part: model.load_relation(mx.gpu(gpu_id)) + #print('Number of tests: ' + len(sampler)) + count = 0 for sampler in test_samplers: - #print('Number of tests: ' + len(sampler)) - count = 0 for pos_g, neg_g in sampler: model.forward_test(pos_g, neg_g, logs, gpu_id) metrics = {} - if len(logs) > 0: + if logs: for metric in logs[0].keys(): - metrics[metric] = sum([log[metric] for log in logs]) / len(logs) + metrics[metric] = sum(log[metric] for log in logs) / len(logs) for k, v in metrics.items(): - print('{} average {}: {}'.format(mode, k, v)) + print(f'{mode} average {k}: {v}') for i in range(len(test_samplers)): test_samplers[i] = test_samplers[i].reset() diff --git a/apps/kg/train_pytorch.py b/apps/kg/train_pytorch.py index 56c297bbdea6..6b5e75a69c11 100644 --- a/apps/kg/train_pytorch.py +++ b/apps/kg/train_pytorch.py @@ -48,8 +48,8 @@ class KGEClient(KVClient): def _push_handler(self, name, ID, data, target): """Row-Sparse Adagrad updater """ - original_name = name[0:-6] - state_sum = target[original_name+'_state-data-'] + original_name = name[:-6] + state_sum = target[f'{original_name}_state-data-'] grad_sum = (data * data).mean(1) state_sum.index_add_(0, ID, grad_sum) std = state_sum[ID] # _sparse_mask @@ -122,7 +122,7 @@ def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=Non if args.async_update: model.create_async_update() if args.strict_rel_part or args.soft_rel_part: - model.prepare_relation(th.device('cuda:' + str(gpu_id))) + model.prepare_relation(th.device(f'cuda:{str(gpu_id)}')) if args.soft_rel_part: model.prepare_cross_rels(cross_rels) @@ -131,7 +131,7 @@ def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=Non update_time = 0 forward_time = 0 backward_time = 0 - for step in range(0, args.max_step): + for step in range(args.max_step): start1 = time.time() pos_g, neg_g = next(train_sampler) sample_time += time.time() - start1 @@ -163,7 +163,7 @@ def train(args, model, train_sampler, valid_samplers=None, rank=0, rel_parts=Non if (step + 1) % args.log_interval == 0: for k in logs[0].keys(): v = sum(l[k] for l in logs) / len(logs) - print('[{}][Train]({}/{}) average {}: {}'.format(rank, (step + 1), args.max_step, k, v)) + print(f'[{rank}][Train]({step + 1}/{args.max_step}) average {k}: {v}') logs = [] print('[{}][Train] {} steps take {:.3f} seconds'.format(rank, args.log_interval, time.time() - start)) @@ -202,7 +202,7 @@ def test(args, model, test_samplers, rank=0, mode='Test', queue=None): gpu_id = -1 if args.strict_rel_part or args.soft_rel_part: - model.load_relation(th.device('cuda:' + str(gpu_id))) + model.load_relation(th.device(f'cuda:{str(gpu_id)}')) with th.no_grad(): logs = [] @@ -211,14 +211,14 @@ def test(args, model, test_samplers, rank=0, mode='Test', queue=None): model.forward_test(pos_g, neg_g, logs, gpu_id) metrics = {} - if len(logs) > 0: + if logs: for metric in logs[0].keys(): - metrics[metric] = sum([log[metric] for log in logs]) / len(logs) + metrics[metric] = sum(log[metric] for log in logs) / len(logs) if queue is not None: queue.put(logs) else: for k, v in metrics.items(): - print('[{}]{} average {}: {}'.format(rank, mode, k, v)) + print(f'[{rank}]{mode} average {k}: {v}') test_samplers[0] = test_samplers[0].reset() test_samplers[1] = test_samplers[1].reset() @@ -246,15 +246,13 @@ def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, ran client.barrier() print('Total train time {:.3f} seconds'.format(time.time() - train_time_start)) - model = None - if client.get_id() % args.num_client == 0: # pull full model from kvstore args.num_test_proc = args.num_client dataset_full = get_dataset(args.data_path, args.dataset, args.format) - print('Full data n_entities: ' + str(dataset_full.n_entities)) - print("Full data n_relations: " + str(dataset_full.n_relations)) + print(f'Full data n_entities: {str(dataset_full.n_entities)}') + print(f"Full data n_relations: {str(dataset_full.n_relations)}") model_test = load_model(None, args, dataset_full.n_entities, dataset_full.n_relations) eval_dataset = EvalDataset(dataset_full, args) @@ -272,7 +270,7 @@ def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, ran relation_id = F.arange(0, model_test.n_relations) relation_data = client.pull(name='relation_emb', id_tensor=relation_id) model_test.relation_emb.emb[relation_id] = relation_data - + print("Pull entity_emb ... ") # split model into 100 small parts start = 0 @@ -280,6 +278,8 @@ def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, ran entity_id = F.arange(0, model_test.n_entities) count = int(model_test.n_entities / 100) end = start + count + model = None + while True: print("Pull %d / 100 ..." % percent) if end >= model_test.n_entities: @@ -338,16 +338,18 @@ def dist_train_test(args, model, train_sampler, entity_pb, relation_pb, l2g, ran proc.start() total_metrics = {} - metrics = {} logs = [] - for i in range(args.num_test_proc): + for _ in range(args.num_test_proc): log = queue.get() logs = logs + log - - for metric in logs[0].keys(): - metrics[metric] = sum([log[metric] for log in logs]) / len(logs) + + metrics = { + metric: sum(log[metric] for log in logs) / len(logs) + for metric in logs[0].keys() + } + for k, v in metrics.items(): - print('Test average {} : {}'.format(k, v)) + print(f'Test average {k} : {v}') for proc in procs: proc.join() diff --git a/apps/kg/utils.py b/apps/kg/utils.py index 21f23673a04a..56cb15468e75 100644 --- a/apps/kg/utils.py +++ b/apps/kg/utils.py @@ -23,6 +23,8 @@ def get_compatible_batch_size(batch_size, neg_sample_size): if neg_sample_size < batch_size and batch_size % neg_sample_size != 0: old_batch_size = batch_size batch_size = int(math.ceil(batch_size / neg_sample_size) * neg_sample_size) - print('batch size ({}) is incompatible to the negative sample size ({}). Change the batch size to {}'.format( - old_batch_size, neg_sample_size, batch_size)) + print( + f'batch size ({old_batch_size}) is incompatible to the negative sample size ({neg_sample_size}). Change the batch size to {batch_size}' + ) + return batch_size diff --git a/benchmarks/benchmarks/api/bench_add_self_loop.py b/benchmarks/benchmarks/api/bench_add_self_loop.py index 0b64dab684cb..f1fa9b221959 100644 --- a/benchmarks/benchmarks/api/bench_add_self_loop.py +++ b/benchmarks/benchmarks/api/bench_add_self_loop.py @@ -17,13 +17,13 @@ def track_time(graph_name, format): graph = graph.to(device) # dry run - for i in range(3): + for _ in range(3): g = graph.add_self_loop() # timing - + with utils.Timer() as t: - for i in range(3): + for _ in range(3): edges = graph.add_self_loop() return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_batch.py b/benchmarks/benchmarks/api/bench_batch.py index 330d15cb394c..e26a950c8f7c 100644 --- a/benchmarks/benchmarks/api/bench_batch.py +++ b/benchmarks/benchmarks/api/bench_batch.py @@ -11,18 +11,18 @@ def track_time(batch_size): ds = dgl.data.QM7bDataset() # prepare graph graphs = [] - for graph in ds[0:batch_size][0]: + for graph in ds[:batch_size][0]: g = graph.to(device) graphs.append(g) # dry run - for i in range(10): + for _ in range(10): g = dgl.batch(graphs) # timing - + with utils.Timer() as t: - for i in range(100): + for _ in range(100): g = dgl.batch(graphs) return t.elapsed_secs / 100 diff --git a/benchmarks/benchmarks/api/bench_builtin_apply_edges.py b/benchmarks/benchmarks/api/bench_builtin_apply_edges.py index 7c265972e17a..ac52bee4745c 100644 --- a/benchmarks/benchmarks/api/bench_builtin_apply_edges.py +++ b/benchmarks/benchmarks/api/bench_builtin_apply_edges.py @@ -25,13 +25,13 @@ def track_time(graph_name, format, feat_size, reduce_type): } # dry run - for i in range(3): + for _ in range(3): graph.apply_edges(reduce_builtin_dict[reduce_type]) # timing - + with utils.Timer() as t: - for i in range(10): + for _ in range(10): graph.apply_edges(reduce_builtin_dict[reduce_type]) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_builtin_apply_edges_hetero.py b/benchmarks/benchmarks/api/bench_builtin_apply_edges_hetero.py index c48f765711e9..0331eb514f3d 100644 --- a/benchmarks/benchmarks/api/bench_builtin_apply_edges_hetero.py +++ b/benchmarks/benchmarks/api/bench_builtin_apply_edges_hetero.py @@ -12,15 +12,14 @@ @utils.parametrize('format', ['coo', 'csr']) @utils.parametrize('feat_size', [8, 128, 512]) @utils.parametrize('reduce_type', ['u->e']) #, 'e->u']) - def track_time( num_relations, format, feat_size, reduce_type): device = utils.get_bench_device() dd = {} candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[ 0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()] for i in range(num_relations): - dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i % - len(candidate_edges)] + dd['n1', f'e_{i}', 'n2'] = candidate_edges[i % len(candidate_edges)] + graph = dgl.heterograph(dd) graph = graph.to(device) @@ -36,13 +35,13 @@ def track_time( num_relations, format, feat_size, reduce_type): } # dry run - for i in range(3): + for _ in range(3): graph.apply_edges(reduce_builtin_dict[reduce_type]) # timing - + with utils.Timer() as t: - for i in range(10): + for _ in range(10): graph.apply_edges(reduce_builtin_dict[reduce_type]) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_builtin_multi_update_all.py b/benchmarks/benchmarks/api/bench_builtin_multi_update_all.py index 32dddc465fb6..e08134903033 100644 --- a/benchmarks/benchmarks/api/bench_builtin_multi_update_all.py +++ b/benchmarks/benchmarks/api/bench_builtin_multi_update_all.py @@ -17,8 +17,8 @@ def track_time(feat_size, num_relations, multi_reduce_type): candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[ 0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()] for i in range(num_relations): - dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i % - len(candidate_edges)] + dd['n1', f'e_{i}', 'n2'] = candidate_edges[i % len(candidate_edges)] + graph = dgl.heterograph(dd) graph = graph.to(device) @@ -28,18 +28,19 @@ def track_time(feat_size, num_relations, multi_reduce_type): (graph.num_nodes('n2'), feat_size), device=device) # dry run - update_dict = {} - for i in range(num_relations): - update_dict['e_{}'.format(i)] = ( - fn.copy_src('h', 'm'), fn.sum('m', 'h')) + update_dict = { + f'e_{i}': (fn.copy_src('h', 'm'), fn.sum('m', 'h')) + for i in range(num_relations) + } + graph.multi_update_all( update_dict, multi_reduce_type) # timing - + with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph.multi_update_all( update_dict, multi_reduce_type) diff --git a/benchmarks/benchmarks/api/bench_builtin_update_all_coo.py b/benchmarks/benchmarks/api/bench_builtin_update_all_coo.py index 7290e3f09619..393db40776cd 100644 --- a/benchmarks/benchmarks/api/bench_builtin_update_all_coo.py +++ b/benchmarks/benchmarks/api/bench_builtin_update_all_coo.py @@ -40,7 +40,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type): # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type]) diff --git a/benchmarks/benchmarks/api/bench_builtin_update_all_csc.py b/benchmarks/benchmarks/api/bench_builtin_update_all_csc.py index ad286dabff89..db5fb7f02f12 100644 --- a/benchmarks/benchmarks/api/bench_builtin_update_all_csc.py +++ b/benchmarks/benchmarks/api/bench_builtin_update_all_csc.py @@ -34,15 +34,15 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type): } # dry run - - for i in range(3): + + for _ in range(3): graph.update_all(msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type]) # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): graph.update_all( msg_builtin_dict[msg_type], reduce_builtin_dict[reduce_type]) diff --git a/benchmarks/benchmarks/api/bench_edge_ids.py b/benchmarks/benchmarks/api/bench_edge_ids.py index 8e1e3fd69a51..f3c28cd7c524 100644 --- a/benchmarks/benchmarks/api/bench_edge_ids.py +++ b/benchmarks/benchmarks/api/bench_edge_ids.py @@ -25,13 +25,13 @@ def track_time(graph_name, format, fraction, return_uv): u = u.to(device) v = v.to(device) # dry run - for i in range(10): + for _ in range(10): out = graph.edge_ids(u[0], v[0]) # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): edges = graph.edge_ids(u, v, return_uv=return_uv) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_edge_subgraph.py b/benchmarks/benchmarks/api/bench_edge_subgraph.py index e3307d1b2888..4a9865cae0d4 100644 --- a/benchmarks/benchmarks/api/bench_edge_subgraph.py +++ b/benchmarks/benchmarks/api/bench_edge_subgraph.py @@ -21,13 +21,13 @@ def track_time(graph_name, format, seed_egdes_num): seed_edges = np.random.randint(0, graph.num_edges(), seed_egdes_num) # dry run - for i in range(3): + for _ in range(3): dgl.edge_subgraph(graph, seed_edges) # timing - + with utils.Timer() as t: - for i in range(3): + for _ in range(3): dgl.edge_subgraph(graph, seed_edges) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_find_edges.py b/benchmarks/benchmarks/api/bench_find_edges.py index da0611290040..0573d6ca74d4 100644 --- a/benchmarks/benchmarks/api/bench_find_edges.py +++ b/benchmarks/benchmarks/api/bench_find_edges.py @@ -25,9 +25,9 @@ def track_time(graph_name, format, fraction): i*10, dtype=torch.int64, device=device)) # timing - + with utils.Timer() as t: - for i in range(10): + for _ in range(10): edges = graph.find_edges(eids) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_format_conversion.py b/benchmarks/benchmarks/api/bench_format_conversion.py index 8a1385a1887d..c10d569abee3 100644 --- a/benchmarks/benchmarks/api/bench_format_conversion.py +++ b/benchmarks/benchmarks/api/bench_format_conversion.py @@ -24,7 +24,7 @@ def track_time(graph_name, format): # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): gg = graph.formats([to_format]) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_heterograph_construction.py b/benchmarks/benchmarks/api/bench_heterograph_construction.py index 0e34b1e040d8..3c4353c0918a 100644 --- a/benchmarks/benchmarks/api/bench_heterograph_construction.py +++ b/benchmarks/benchmarks/api/bench_heterograph_construction.py @@ -16,15 +16,15 @@ def track_time(num_relations): candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[ 0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()] for i in range(num_relations): - dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i % - len(candidate_edges)] + dd['n1', f'e_{i}', 'n2'] = candidate_edges[i % len(candidate_edges)] + # dry run graph = dgl.heterograph(dd) # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph = dgl.heterograph(dd) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_homograph_edge_construction.py b/benchmarks/benchmarks/api/bench_homograph_edge_construction.py index 2cf9f8ccc6a2..7e2236c65229 100644 --- a/benchmarks/benchmarks/api/bench_homograph_edge_construction.py +++ b/benchmarks/benchmarks/api/bench_homograph_edge_construction.py @@ -23,7 +23,7 @@ def track_time(size): # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): g = dgl.graph(edge_list[size]) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_homograph_scipy_construction.py b/benchmarks/benchmarks/api/bench_homograph_scipy_construction.py index f1a51446afca..fee8ff48bce7 100644 --- a/benchmarks/benchmarks/api/bench_homograph_scipy_construction.py +++ b/benchmarks/benchmarks/api/bench_homograph_scipy_construction.py @@ -24,7 +24,7 @@ def track_time(size, scipy_format): # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): dgl.from_scipy(matrix_dict[size]) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_in_degrees.py b/benchmarks/benchmarks/api/bench_in_degrees.py index 63c088d37a05..b0b1803a08c0 100644 --- a/benchmarks/benchmarks/api/bench_in_degrees.py +++ b/benchmarks/benchmarks/api/bench_in_degrees.py @@ -9,7 +9,6 @@ @utils.benchmark('time', timeout=1200) @utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster']) @utils.parametrize_gpu('graph_name', ['cora', 'livejournal']) -# in_degrees on coo is not supported on cuda @utils.parametrize_cpu('format', ['coo', 'csc']) @utils.parametrize_gpu('format', ['csc']) @utils.parametrize('fraction', [0.01, 0.1]) @@ -27,7 +26,7 @@ def track_time(graph_name, format, fraction): # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): edges = graph.in_degrees(nids) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_in_edges.py b/benchmarks/benchmarks/api/bench_in_edges.py index 339349e2a432..f39e249d7224 100644 --- a/benchmarks/benchmarks/api/bench_in_edges.py +++ b/benchmarks/benchmarks/api/bench_in_edges.py @@ -9,7 +9,6 @@ @utils.benchmark('time', timeout=1200) @utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster']) @utils.parametrize_gpu('graph_name', ['cora', 'livejournal']) -# in_edges on coo is not supported on cuda @utils.parametrize_cpu('format', ['coo', 'csc']) @utils.parametrize_gpu('format', ['csc']) @utils.parametrize('fraction', [0.01, 0.1]) @@ -28,7 +27,7 @@ def track_time(graph_name, format, fraction): # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): edges = graph.in_edges(nids) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_in_subgraph.py b/benchmarks/benchmarks/api/bench_in_subgraph.py index ca19d45b01f2..2bede8d524a5 100644 --- a/benchmarks/benchmarks/api/bench_in_subgraph.py +++ b/benchmarks/benchmarks/api/bench_in_subgraph.py @@ -20,12 +20,12 @@ def track_time(graph_name, format, seed_nodes_num): seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) # dry run - for i in range(3): + for _ in range(3): dgl.in_subgraph(graph, seed_nodes) # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): dgl.in_subgraph(graph, seed_nodes) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_khop.py b/benchmarks/benchmarks/api/bench_khop.py index 2c9975e4bb7b..412062881d7f 100644 --- a/benchmarks/benchmarks/api/bench_khop.py +++ b/benchmarks/benchmarks/api/bench_khop.py @@ -20,7 +20,7 @@ def track_time(graph_name, format, k): # timing with utils.Timer() as t: - for i in range(10): + for _ in range(10): gg = dgl.khop_graph(graph, k) return t.elapsed_secs / 10 diff --git a/benchmarks/benchmarks/api/bench_knn_graph.py b/benchmarks/benchmarks/api/bench_knn_graph.py index eb26896d4458..74364d817f5f 100644 --- a/benchmarks/benchmarks/api/bench_knn_graph.py +++ b/benchmarks/benchmarks/api/bench_knn_graph.py @@ -16,11 +16,11 @@ def track_time(size, dim, k, algorithm): features = np.random.RandomState(42).randn(size, dim) feat = torch.tensor(features, dtype=torch.float, device=device) # dry run - for i in range(1): + for _ in range(1): dgl.knn_graph(feat, k, algorithm=algorithm) # timing with utils.Timer() as t: - for i in range(5): + for _ in range(5): dgl.knn_graph(feat, k, algorithm=algorithm) return t.elapsed_secs / 5 diff --git a/benchmarks/benchmarks/api/bench_metis_partition.py b/benchmarks/benchmarks/api/bench_metis_partition.py index 32f9bdf685b8..cec2a16afd3d 100644 --- a/benchmarks/benchmarks/api/bench_metis_partition.py +++ b/benchmarks/benchmarks/api/bench_metis_partition.py @@ -19,7 +19,7 @@ def track_time(graph_name, k): # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): gg = dgl.transforms.metis_partition(graph, k) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_nn_graphconv.py b/benchmarks/benchmarks/api/bench_nn_graphconv.py index 923f7641f326..8794df12338a 100644 --- a/benchmarks/benchmarks/api/bench_nn_graphconv.py +++ b/benchmarks/benchmarks/api/bench_nn_graphconv.py @@ -21,11 +21,11 @@ def track_time(graph_name, feat_dim, aggr_type): model = SAGEConv(feat_dim, feat_dim, aggr_type, activation=F.relu, bias=False).to(device) # dry run - for i in range(3): + for _ in range(3): model(graph, feat) # timing with utils.Timer() as t: - for i in range(50): + for _ in range(50): model(graph, feat) return t.elapsed_secs / 50 diff --git a/benchmarks/benchmarks/api/bench_nn_heterographconv.py b/benchmarks/benchmarks/api/bench_nn_heterographconv.py index c3451ea6c3b3..1ff3a092d08f 100644 --- a/benchmarks/benchmarks/api/bench_nn_heterographconv.py +++ b/benchmarks/benchmarks/api/bench_nn_heterographconv.py @@ -13,31 +13,31 @@ @utils.parametrize('feat_dim', [4, 32, 256]) @utils.parametrize('num_relations', [5, 50, 200]) def track_time(feat_dim, num_relations): - device = utils.get_bench_device() + device = utils.get_bench_device() dd = {} nn_dict = {} candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[ 0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()] for i in range(num_relations): - dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i % - len(candidate_edges)] - nn_dict['e_{}'.format(i)] = SAGEConv(feat_dim, feat_dim, 'mean', activation=F.relu) + dd['n1', f'e_{i}', 'n2'] = candidate_edges[i % len(candidate_edges)] + + nn_dict[f'e_{i}'] = SAGEConv(feat_dim, feat_dim, 'mean', activation=F.relu) # dry run feat_dict = {} graph = dgl.heterograph(dd) for i in range(num_relations): - etype = 'e_{}'.format(i) + etype = f'e_{i}' feat_dict[etype] = torch.randn((graph[etype].num_nodes(), feat_dim), device=device) conv = HeteroGraphConv(nn_dict).to(device) # dry run - for i in range(3): + for _ in range(3): conv(graph, feat_dict) # timing with utils.Timer() as t: - for i in range(50): + for _ in range(50): conv(graph, feat_dict) return t.elapsed_secs / 50 diff --git a/benchmarks/benchmarks/api/bench_node_subgraph.py b/benchmarks/benchmarks/api/bench_node_subgraph.py index d0789fac0e3a..83c6faf7212e 100644 --- a/benchmarks/benchmarks/api/bench_node_subgraph.py +++ b/benchmarks/benchmarks/api/bench_node_subgraph.py @@ -20,12 +20,12 @@ def track_time(graph_name, format, seed_nodes_num): seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) # dry run - for i in range(3): + for _ in range(3): dgl.node_subgraph(graph, seed_nodes) # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): dgl.node_subgraph(graph, seed_nodes) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_random_walk.py b/benchmarks/benchmarks/api/bench_random_walk.py index 7196a6258abb..f6157aba01cb 100644 --- a/benchmarks/benchmarks/api/bench_random_walk.py +++ b/benchmarks/benchmarks/api/bench_random_walk.py @@ -22,12 +22,12 @@ def track_time(graph_name, num_seeds, length, algorithm): print(graph_name, num_seeds, length) alg = globals()[algorithm] # dry run - for i in range(5): + for _ in range(5): _ = alg(graph, seeds, length=length) # timing with utils.Timer() as t: - for i in range(50): + for _ in range(50): _ = alg(graph, seeds, length=length) return t.elapsed_secs / 50 diff --git a/benchmarks/benchmarks/api/bench_readout.py b/benchmarks/benchmarks/api/bench_readout.py index e3936c5cb6a9..b57f9d858a35 100644 --- a/benchmarks/benchmarks/api/bench_readout.py +++ b/benchmarks/benchmarks/api/bench_readout.py @@ -14,22 +14,22 @@ def track_time(batch_size, feat_size, readout_op, type): device = utils.get_bench_device() ds = dgl.data.QM7bDataset() # prepare graph - graphs = ds[0:batch_size][0] + graphs = ds[:batch_size][0] g = dgl.batch(graphs).to(device) if type == 'node': g.ndata['h'] = torch.randn((g.num_nodes(), feat_size), device=device) - for i in range(10): + for _ in range(10): out = dgl.readout_nodes(g, 'h', op=readout_op) with utils.Timer() as t: - for i in range(50): + for _ in range(50): out = dgl.readout_nodes(g, 'h', op=readout_op) elif type == 'edge': g.edata['h'] = torch.randn((g.num_edges(), feat_size), device=device) - for i in range(10): + for _ in range(10): out = dgl.readout_edges(g, 'h', op=readout_op) with utils.Timer() as t: - for i in range(50): + for _ in range(50): out = dgl.readout_edges(g, 'h', op=readout_op) else: raise Exception("Unknown type") diff --git a/benchmarks/benchmarks/api/bench_reverse.py b/benchmarks/benchmarks/api/bench_reverse.py index aa00043ff983..7b108af41610 100644 --- a/benchmarks/benchmarks/api/bench_reverse.py +++ b/benchmarks/benchmarks/api/bench_reverse.py @@ -20,7 +20,7 @@ def track_time(graph_name, format): # timing with utils.Timer() as t: - for i in range(100): + for _ in range(100): gg = dgl.reverse(graph) return t.elapsed_secs / 100 diff --git a/benchmarks/benchmarks/api/bench_sample_neighbors.py b/benchmarks/benchmarks/api/bench_sample_neighbors.py index b3e1ce1045ee..2b82a05bf138 100644 --- a/benchmarks/benchmarks/api/bench_sample_neighbors.py +++ b/benchmarks/benchmarks/api/bench_sample_neighbors.py @@ -21,13 +21,13 @@ def track_time(graph_name, format, seed_nodes_num, fanout): seed_nodes = np.random.randint(0, graph.num_nodes(), seed_nodes_num) # dry run - for i in range(3): + for _ in range(3): dgl.sampling.sample_neighbors( graph, seed_nodes, fanout, edge_dir=edge_dir) # timing with utils.Timer() as t: - for i in range(50): + for _ in range(50): dgl.sampling.sample_neighbors( graph, seed_nodes, fanout, edge_dir=edge_dir) diff --git a/benchmarks/benchmarks/api/bench_to_block.py b/benchmarks/benchmarks/api/bench_to_block.py index 0ce99ccebd90..7290786e9bf8 100644 --- a/benchmarks/benchmarks/api/bench_to_block.py +++ b/benchmarks/benchmarks/api/bench_to_block.py @@ -20,7 +20,7 @@ def track_time(graph_name, num_seed_nodes, fanout): dgl.sampling.sample_neighbors(graph, [1, 2, 3], fanout) subg_list = [] - for i in range(10): + for _ in range(10): seed_nodes = np.random.randint( 0, graph.num_nodes(), size=num_seed_nodes) subg = dgl.sampling.sample_neighbors(graph, seed_nodes, fanout) diff --git a/benchmarks/benchmarks/api/bench_udf_apply_edges.py b/benchmarks/benchmarks/api/bench_udf_apply_edges.py index 15af226d7e34..7ac3f20669c2 100644 --- a/benchmarks/benchmarks/api/bench_udf_apply_edges.py +++ b/benchmarks/benchmarks/api/bench_udf_apply_edges.py @@ -29,7 +29,7 @@ def track_time(graph_name, format, feat_size, reduce_type): # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph.apply_edges(reduce_udf_dict[reduce_type]) return t.elapsed_secs / 3 diff --git a/benchmarks/benchmarks/api/bench_udf_multi_update_all.py b/benchmarks/benchmarks/api/bench_udf_multi_update_all.py index 4a73f2340803..32289898d020 100644 --- a/benchmarks/benchmarks/api/bench_udf_multi_update_all.py +++ b/benchmarks/benchmarks/api/bench_udf_multi_update_all.py @@ -19,8 +19,8 @@ def track_time(feat_size, num_relations, multi_reduce_type): candidate_edges = [dgl.data.CoraGraphDataset(verbose=False)[0].edges(), dgl.data.PubmedGraphDataset(verbose=False)[ 0].edges(), dgl.data.CiteseerGraphDataset(verbose=False)[0].edges()] for i in range(num_relations): - dd[('n1', 'e_{}'.format(i), 'n2')] = candidate_edges[i % - len(candidate_edges)] + dd['n1', f'e_{i}', 'n2'] = candidate_edges[i % len(candidate_edges)] + graph = dgl.heterograph(dd) graph = graph.to(device) @@ -30,17 +30,21 @@ def track_time(feat_size, num_relations, multi_reduce_type): (graph.num_nodes('n2'), feat_size), device=device) # dry run - update_dict = {} - for i in range(num_relations): - update_dict['e_{}'.format(i)] = ( - lambda edges: {'x': edges.src['h']}, lambda nodes: {'h_new': torch.sum(nodes.mailbox['x'], dim=1)}) + update_dict = { + f'e_{i}': ( + lambda edges: {'x': edges.src['h']}, + lambda nodes: {'h_new': torch.sum(nodes.mailbox['x'], dim=1)}, + ) + for i in range(num_relations) + } + graph.multi_update_all( update_dict, multi_reduce_type) # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph.multi_update_all( update_dict, multi_reduce_type) diff --git a/benchmarks/benchmarks/api/bench_udf_update_all.py b/benchmarks/benchmarks/api/bench_udf_update_all.py index 0cef1cbb60ac..8af715fc5c01 100644 --- a/benchmarks/benchmarks/api/bench_udf_update_all.py +++ b/benchmarks/benchmarks/api/bench_udf_update_all.py @@ -38,7 +38,7 @@ def track_time(graph_name, format, feat_size, msg_type, reduce_type): # timing with utils.Timer() as t: - for i in range(3): + for _ in range(3): graph.update_all(msg_udf_dict[msg_type], reduct_udf_dict[reduce_type]) diff --git a/benchmarks/benchmarks/api/bench_unbatch.py b/benchmarks/benchmarks/api/bench_unbatch.py index 17360a682536..88e420680757 100644 --- a/benchmarks/benchmarks/api/bench_unbatch.py +++ b/benchmarks/benchmarks/api/bench_unbatch.py @@ -10,16 +10,16 @@ def track_time(batch_size): device = utils.get_bench_device() ds = dgl.data.QM7bDataset() # prepare graph - graphs = ds[0:batch_size][0] + graphs = ds[:batch_size][0] bg = dgl.batch(graphs).to(device) # dry run - for i in range(10): + for _ in range(10): glist = dgl.unbatch(bg) # timing with utils.Timer() as t: - for i in range(100): + for _ in range(100): glist = dgl.unbatch(bg) return t.elapsed_secs / 100 diff --git a/benchmarks/benchmarks/kernel/bench_edgesoftmax.py b/benchmarks/benchmarks/kernel/bench_edgesoftmax.py index 0ee6c10ed4f0..25653fe78cc5 100644 --- a/benchmarks/benchmarks/kernel/bench_edgesoftmax.py +++ b/benchmarks/benchmarks/kernel/bench_edgesoftmax.py @@ -14,12 +14,12 @@ def track_time(graph, num_heads): score = torch.randn((graph.num_edges(),num_heads)).requires_grad_(True).float().to(device) # dry run - for i in range(3): + for _ in range(3): y = dgl.ops.edge_softmax(graph, score) # timing with utils.Timer(device) as t: - for i in range(100): + for _ in range(100): y = dgl.ops.edge_softmax(graph, score) return t.elapsed_secs / 100 diff --git a/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py b/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py index 111e12965e56..1cbfb36f2f9a 100644 --- a/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py +++ b/benchmarks/benchmarks/kernel/bench_gsddmm_u_dot_v.py @@ -27,12 +27,12 @@ def track_flops(graph, feat_size, num_heads): x = torch.randn(graph.num_nodes(), num_heads, feat_size // num_heads, device=device) # dry run - for i in range(3): + for _ in range(3): y = dgl.ops.u_dot_v(graph, x, x) # timing with utils.Timer(device) as t: - for i in range(10): + for _ in range(10): y = dgl.ops.u_dot_v(graph, x, x) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10) diff --git a/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py b/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py index fd6d827b834b..d528f60b3320 100644 --- a/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py +++ b/benchmarks/benchmarks/kernel/bench_gspmm_copy_u.py @@ -24,12 +24,12 @@ def track_flops(graph, feat_size, reducer): raise ValueError('Invalid reducer', reducer) # dry run - for i in range(3): + for _ in range(3): y = op(graph, x) # timing with utils.Timer(device) as t: - for i in range(10): + for _ in range(10): y = op(graph, x) return calc_gflops(graph, feat_size, t.elapsed_secs / 10) diff --git a/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py b/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py index 00cd6ba0b12c..6fbca48b4e06 100644 --- a/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py +++ b/benchmarks/benchmarks/kernel/bench_gspmm_u_mul_e_sum.py @@ -29,12 +29,12 @@ def track_flops(graph, feat_size, num_heads): w = torch.randn(graph.num_edges(), num_heads, 1, device=device) # dry run - for i in range(3): + for _ in range(3): y = dgl.ops.u_mul_e_sum(graph, x, w) # timing with utils.Timer(device) as t: - for i in range(10): + for _ in range(10): y = dgl.ops.u_mul_e_sum(graph, x, w) return calc_gflops(graph, feat_size, num_heads, t.elapsed_secs / 10) diff --git a/benchmarks/benchmarks/model_acc/bench_gat.py b/benchmarks/benchmarks/model_acc/bench_gat.py index c2cbfacdde78..1987b4c04334 100644 --- a/benchmarks/benchmarks/model_acc/bench_gat.py +++ b/benchmarks/benchmarks/model_acc/bench_gat.py @@ -41,9 +41,7 @@ def forward(self, g, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](g, h).flatten(1) - # output projection - logits = self.gat_layers[-1](g, h).mean(1) - return logits + return self.gat_layers[-1](g, h).mean(1) def evaluate(model, g, features, labels, mask): model.eval() @@ -87,12 +85,11 @@ def track_acc(data): optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) - for epoch in range(200): + for _ in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() - acc = evaluate(model, g, features, labels, test_mask) - return acc + return evaluate(model, g, features, labels, test_mask) diff --git a/benchmarks/benchmarks/model_acc/bench_gcn.py b/benchmarks/benchmarks/model_acc/bench_gcn.py index 0c43a852c390..e44a32b8adc1 100644 --- a/benchmarks/benchmarks/model_acc/bench_gcn.py +++ b/benchmarks/benchmarks/model_acc/bench_gcn.py @@ -19,7 +19,7 @@ def __init__(self, # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers - for i in range(n_layers - 1): + for _ in range(n_layers - 1): self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) @@ -80,12 +80,11 @@ def track_acc(data): optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) - for epoch in range(200): + for _ in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() - acc = evaluate(model, g, features, labels, test_mask) - return acc + return evaluate(model, g, features, labels, test_mask) diff --git a/benchmarks/benchmarks/model_acc/bench_gcn_udf.py b/benchmarks/benchmarks/model_acc/bench_gcn_udf.py index 880296bbe323..8b49e2fa7a38 100644 --- a/benchmarks/benchmarks/model_acc/bench_gcn_udf.py +++ b/benchmarks/benchmarks/model_acc/bench_gcn_udf.py @@ -49,7 +49,7 @@ def __init__(self, # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers - for i in range(n_layers - 1): + for _ in range(n_layers - 1): self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) @@ -110,12 +110,11 @@ def track_acc(data): optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) - for epoch in range(200): + for _ in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() - acc = evaluate(model, g, features, labels, test_mask) - return acc + return evaluate(model, g, features, labels, test_mask) diff --git a/benchmarks/benchmarks/model_acc/bench_rgcn.py b/benchmarks/benchmarks/model_acc/bench_rgcn.py index b9b37e26ac81..7604ff611bfd 100644 --- a/benchmarks/benchmarks/model_acc/bench_rgcn.py +++ b/benchmarks/benchmarks/model_acc/bench_rgcn.py @@ -24,7 +24,7 @@ def __init__(self, num_bases, activation=F.relu, dropout=dropout, low_mem=low_mem)) # h2h - for i in range(num_hidden_layers): + for _ in range(num_hidden_layers): self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis", num_bases, activation=F.relu, dropout=dropout, low_mem=low_mem)) @@ -75,7 +75,7 @@ def track_acc(data, lowmem, use_type_count): train_mask = g.nodes[category].data.pop('train_mask').bool().to(device) test_mask = g.nodes[category].data.pop('test_mask').bool().to(device) labels = g.nodes[category].data.pop('labels').to(device) - + # calculate norm for each edge type and store in edge for canonical_etype in g.canonical_etypes: u, v, eid = g.all_edges(form='all', etype=canonical_etype) @@ -126,12 +126,11 @@ def track_acc(data, lowmem, use_type_count): weight_decay=l2norm) model.train() - for epoch in range(30): + for _ in range(30): logits = model(g, feats, edge_type, edge_norm) loss = F.cross_entropy(logits[train_idx], train_labels) optimizer.zero_grad() loss.backward() optimizer.step() - acc = evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx) - return acc + return evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx) diff --git a/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py b/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py index e23255d43351..10319d1c9473 100644 --- a/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py +++ b/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py @@ -70,7 +70,7 @@ def __init__(self, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) # h2h - for idx in range(self.num_hidden_layers): + for _ in range(self.num_hidden_layers): self.layers.append(RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, @@ -164,11 +164,10 @@ def forward(self, node_ids, node_tids, type_ids, features): tsd_ids = node_ids.to(self.node_embeds.weight.device) embeds = th.empty(node_ids.shape[0], self.embed_size, device=self.device) for ntype in range(self.num_of_ntype): + loc = node_tids == ntype if features[ntype] is not None: - loc = node_tids == ntype embeds[loc] = features[ntype][type_ids[loc]].to(self.device) @ self.embeds[str(ntype)].to(self.device) else: - loc = node_tids == ntype embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.device) return embeds @@ -178,7 +177,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats): embed_layer.eval() eval_logits = [] eval_seeds = [] - + with th.no_grad(): for sample_data in eval_loader: th.cuda.empty_cache() @@ -192,7 +191,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats): eval_seeds.append(blocks[-1].dstdata['type_id'].cpu().detach()) eval_logits = th.cat(eval_logits) eval_seeds = th.cat(eval_seeds) - + return eval_logits, eval_seeds @@ -317,11 +316,11 @@ def track_acc(data): emb_optimizer = th.optim.SparseAdam(list(embed_layer.node_embeds.parameters()), lr=lr) print("start training...") - for epoch in range(n_epochs): + for _ in range(n_epochs): model.train() embed_layer.train() - for i, sample_data in enumerate(train_loader): + for sample_data in train_loader: input_nodes, output_nodes, blocks = sample_data feats = embed_layer(input_nodes, blocks[0].srcdata['ntype'], @@ -341,6 +340,6 @@ def track_acc(data): test_logits, test_seeds = evaluate(model, embed_layer, test_loader, node_feats) test_loss = F.cross_entropy(test_logits, labels[test_seeds].cpu()).item() - test_acc = th.sum(test_logits.argmax(dim=1) == labels[test_seeds].cpu()).item() / len(test_seeds) - - return test_acc + return th.sum( + test_logits.argmax(dim=1) == labels[test_seeds].cpu() + ).item() / len(test_seeds) diff --git a/benchmarks/benchmarks/model_acc/bench_sage.py b/benchmarks/benchmarks/model_acc/bench_sage.py index c85ec7165575..7938698a0f9c 100644 --- a/benchmarks/benchmarks/model_acc/bench_sage.py +++ b/benchmarks/benchmarks/model_acc/bench_sage.py @@ -23,7 +23,7 @@ def __init__(self, # input layer self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type)) # hidden layers - for i in range(n_layers - 1): + for _ in range(n_layers - 1): self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type)) # output layer self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type)) # activation None @@ -78,12 +78,11 @@ def track_acc(data): optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4) - for epoch in range(200): + for _ in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() loss.backward() optimizer.step() - acc = evaluate(model, g, features, labels, test_mask) - return acc + return evaluate(model, g, features, labels, test_mask) diff --git a/benchmarks/benchmarks/model_acc/bench_sage_ns.py b/benchmarks/benchmarks/model_acc/bench_sage_ns.py index 6f99d94ad32d..179090e9a5ca 100644 --- a/benchmarks/benchmarks/model_acc/bench_sage_ns.py +++ b/benchmarks/benchmarks/model_acc/bench_sage_ns.py @@ -25,7 +25,7 @@ def __init__(self, self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) - for i in range(1, n_layers - 1): + for _ in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.dropout = nn.Dropout(dropout) @@ -164,7 +164,7 @@ def track_acc(data): optimizer = optim.Adam(model.parameters(), lr=lr) # dry run one epoch - for step, (input_nodes, seeds, blocks) in enumerate(dataloader): + for input_nodes, seeds, blocks in dataloader: # Load the input features as well as output labels #batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) blocks = [block.int().to(device) for block in blocks] @@ -179,10 +179,10 @@ def track_acc(data): optimizer.step() # Training loop - for epoch in range(num_epochs): + for _ in range(num_epochs): # Loop over the dataloader to sample the computation dependency graph as a list of # blocks. - for step, (input_nodes, seeds, blocks) in enumerate(dataloader): + for input_nodes, seeds, blocks in dataloader: # Load the input features as well as output labels #batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device) blocks = [block.int().to(device) for block in blocks] diff --git a/benchmarks/benchmarks/model_speed/bench_gat.py b/benchmarks/benchmarks/model_speed/bench_gat.py index 7a5450f37a89..82842f9e9dd9 100644 --- a/benchmarks/benchmarks/model_speed/bench_gat.py +++ b/benchmarks/benchmarks/model_speed/bench_gat.py @@ -42,9 +42,7 @@ def forward(self, g, inputs): h = inputs for l in range(self.num_layers): h = self.gat_layers[l](g, h).flatten(1) - # output projection - logits = self.gat_layers[-1](g, h).mean(1) - return logits + return self.gat_layers[-1](g, h).mean(1) @utils.benchmark('time') @utils.parametrize('data', ['cora', 'pubmed']) @@ -81,7 +79,7 @@ def track_time(data): weight_decay=5e-4) # dry run - for epoch in range(10): + for _ in range(10): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() @@ -90,7 +88,7 @@ def track_time(data): # timing t0 = time.time() - for epoch in range(num_epochs): + for _ in range(num_epochs): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() diff --git a/benchmarks/benchmarks/model_speed/bench_gat_ns.py b/benchmarks/benchmarks/model_speed/bench_gat_ns.py index f3dfbba4d160..79f8e6f552bd 100644 --- a/benchmarks/benchmarks/model_speed/bench_gat_ns.py +++ b/benchmarks/benchmarks/model_speed/bench_gat_ns.py @@ -33,7 +33,7 @@ def __init__(self, attn_drop=dropout, activation=activation, negative_slope=0.2)) - for i in range(1, n_layers - 1): + for _ in range(1, n_layers - 1): self.layers.append(dglnn.GATConv(n_hidden * num_heads, n_hidden, num_heads=num_heads, diff --git a/benchmarks/benchmarks/model_speed/bench_gcn_udf.py b/benchmarks/benchmarks/model_speed/bench_gcn_udf.py index cfadcdb5da19..1c195caf3f18 100644 --- a/benchmarks/benchmarks/model_speed/bench_gcn_udf.py +++ b/benchmarks/benchmarks/model_speed/bench_gcn_udf.py @@ -50,7 +50,7 @@ def __init__(self, # input layer self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) # hidden layers - for i in range(n_layers - 1): + for _ in range(n_layers - 1): self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) # output layer self.layers.append(GraphConv(n_hidden, n_classes)) @@ -102,7 +102,7 @@ def track_time(data): lr=1e-2, weight_decay=5e-4) # dry run - for epoch in range(5): + for _ in range(5): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() @@ -110,7 +110,7 @@ def track_time(data): optimizer.step() with utils.Timer(device) as t: - for epoch in range(200): + for _ in range(200): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() diff --git a/benchmarks/benchmarks/model_speed/bench_pinsage.py b/benchmarks/benchmarks/model_speed/bench_pinsage.py index bdc8d82e18a3..e3bc674a7b56 100644 --- a/benchmarks/benchmarks/model_speed/bench_pinsage.py +++ b/benchmarks/benchmarks/model_speed/bench_pinsage.py @@ -134,7 +134,7 @@ def __init__(self, hidden_dims, n_layers): def forward(self, blocks, h): for layer, block in zip(self.convs, blocks): - h_dst = h[:block.number_of_nodes('DST/' + block.ntypes[0])] + h_dst = h[:block.number_of_nodes(f'DST/{block.ntypes[0]}')] h = layer(block, (h, h_dst), block.edata['weights']) return h @@ -159,7 +159,7 @@ def forward(self, ndata): module = self.inputs[feature] if isinstance(module, (BagOfWords, BagOfWordsPretrained)): # Textual feature; find the length and pass it to the textual module. - length = ndata[feature + '__len'] + length = ndata[f'{feature}__len'] result = module(data, length) else: result = module(data) @@ -327,7 +327,7 @@ def assign_textual_node_features(ndata, textset, ntype): tokens = tokens.t() ndata[field_name] = tokens - ndata[field_name + '__len'] = lengths + ndata[f'{field_name}__len'] = lengths def assign_features_to_blocks(blocks, g, textset, ntype): # For the first block (which is closest to the input), copy the features from diff --git a/benchmarks/benchmarks/model_speed/bench_rgcn.py b/benchmarks/benchmarks/model_speed/bench_rgcn.py index 61dca9f2208c..eac9bf2b1525 100644 --- a/benchmarks/benchmarks/model_speed/bench_rgcn.py +++ b/benchmarks/benchmarks/model_speed/bench_rgcn.py @@ -23,7 +23,7 @@ def __init__(self, self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis", num_bases, activation=F.relu, dropout=dropout)) # h2h - for i in range(num_hidden_layers): + for _ in range(num_hidden_layers): self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis", num_bases, activation=F.relu, dropout=dropout)) # o2h @@ -41,11 +41,7 @@ def forward(self, g, h, r, norm): def track_time(data, use_type_count): # args if data == 'aifb': - if dgl.__version__.startswith("0.8"): - num_bases = None - else: - num_bases = -1 - + num_bases = None if dgl.__version__.startswith("0.8") else -1 l2norm = 0. elif data == 'am': num_bases = 40 @@ -65,7 +61,7 @@ def track_time(data, use_type_count): train_mask = g.nodes[category].data.pop('train_mask').bool().to(device) test_mask = g.nodes[category].data.pop('test_mask').bool().to(device) labels = g.nodes[category].data.pop('labels').to(device) - + # calculate norm for each edge type and store in edge for canonical_etype in g.canonical_etypes: u, v, eid = g.all_edges(form='all', etype=canonical_etype) @@ -116,7 +112,7 @@ def track_time(data, use_type_count): model.train() t0 = time.time() - for epoch in range(num_epochs): + for _ in range(num_epochs): logits = model(g, feats, edge_type, edge_norm) loss = F.cross_entropy(logits[train_idx], train_labels) optimizer.zero_grad() diff --git a/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py b/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py index a44aad5bfc7f..359b6e412d64 100644 --- a/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py +++ b/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py @@ -174,13 +174,13 @@ def forward(self, block=None): DGLHeteroGraph The block graph fed with embeddings. """ - embeds = {} - for ntype in block.ntypes: - if self.node_feats[ntype] is None: - embeds[ntype] = self.node_embeds[ntype](block.nodes(ntype)).to(self.device) - else: - embeds[ntype] = self.node_feats[ntype][block.nodes(ntype)].to(self.device) @ self.embeds[ntype] - return embeds + return { + ntype: self.node_embeds[ntype](block.nodes(ntype)).to(self.device) + if self.node_feats[ntype] is None + else self.node_feats[ntype][block.nodes(ntype)].to(self.device) + @ self.embeds[ntype] + for ntype in block.ntypes + } class EntityClassify(nn.Module): def __init__(self, @@ -194,8 +194,7 @@ def __init__(self, self.g = g self.h_dim = h_dim self.out_dim = out_dim - self.rel_names = list(set(g.etypes)) - self.rel_names.sort() + self.rel_names = sorted(set(g.etypes)) if num_bases < 0 or num_bases > len(self.rel_names): self.num_bases = len(self.rel_names) else: @@ -211,7 +210,7 @@ def __init__(self, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, dropout=self.dropout, weight=False)) # h2h - for i in range(self.num_hidden_layers): + for _ in range(self.num_hidden_layers): self.layers.append(RelGraphConvLayer( self.h_dim, self.h_dim, self.rel_names, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, diff --git a/benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py b/benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py index 95517cce48f2..6669235e6e3e 100644 --- a/benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py +++ b/benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py @@ -70,7 +70,7 @@ def __init__(self, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm)) # h2h - for idx in range(self.num_hidden_layers): + for _ in range(self.num_hidden_layers): self.layers.append(RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, @@ -164,11 +164,10 @@ def forward(self, node_ids, node_tids, type_ids, features): tsd_ids = node_ids.to(self.node_embeds.weight.device) embeds = th.empty(node_ids.shape[0], self.embed_size, device=self.device) for ntype in range(self.num_of_ntype): + loc = node_tids == ntype if features[ntype] is not None: - loc = node_tids == ntype embeds[loc] = features[ntype][type_ids[loc]].to(self.device) @ self.embeds[str(ntype)].to(self.device) else: - loc = node_tids == ntype embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.device) return embeds @@ -305,7 +304,7 @@ def track_time(data): loss.backward() optimizer.step() emb_optimizer.step() - + # start timer at before iter_start if step == iter_start - 1: t0 = time.time() diff --git a/benchmarks/benchmarks/model_speed/bench_sage.py b/benchmarks/benchmarks/model_speed/bench_sage.py index eaa36aa1f729..eefa74e8245f 100644 --- a/benchmarks/benchmarks/model_speed/bench_sage.py +++ b/benchmarks/benchmarks/model_speed/bench_sage.py @@ -24,7 +24,7 @@ def __init__(self, # input layer self.layers.append(SAGEConv(in_feats, n_hidden, aggregator_type)) # hidden layers - for i in range(n_layers - 1): + for _ in range(n_layers - 1): self.layers.append(SAGEConv(n_hidden, n_hidden, aggregator_type)) # output layer self.layers.append(SAGEConv(n_hidden, n_classes, aggregator_type)) # activation None @@ -72,7 +72,7 @@ def track_time(data): weight_decay=5e-4) # dry run - for i in range(10): + for _ in range(10): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() @@ -81,7 +81,7 @@ def track_time(data): # timing t0 = time.time() - for epoch in range(num_epochs): + for _ in range(num_epochs): logits = model(g, features) loss = loss_fcn(logits[train_mask], labels[train_mask]) optimizer.zero_grad() diff --git a/benchmarks/benchmarks/model_speed/bench_sage_ns.py b/benchmarks/benchmarks/model_speed/bench_sage_ns.py index 6db74a21d013..8f89f15df3cd 100644 --- a/benchmarks/benchmarks/model_speed/bench_sage_ns.py +++ b/benchmarks/benchmarks/model_speed/bench_sage_ns.py @@ -24,7 +24,7 @@ def __init__(self, self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) - for i in range(1, n_layers - 1): + for _ in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.dropout = nn.Dropout(dropout) diff --git a/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py b/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py index 843f5c1df722..d8630529014c 100644 --- a/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py +++ b/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py @@ -34,8 +34,7 @@ def load_subtensor(g, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ - batch_inputs = g.ndata['features'][input_nodes].to(device) - return batch_inputs + return g.ndata['features'][input_nodes].to(device) class SAGE(nn.Module): def __init__(self, @@ -51,7 +50,7 @@ def __init__(self, self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) - for i in range(1, n_layers - 1): + for _ in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.dropout = nn.Dropout(dropout) @@ -70,8 +69,7 @@ def load_subtensor(g, input_nodes, device): """ Copys features and labels of a set of nodes onto GPU. """ - batch_inputs = g.ndata['features'][input_nodes].to(device) - return batch_inputs + return g.ndata['features'][input_nodes].to(device) class CrossEntropyLoss(nn.Module): def forward(self, block_outputs, pos_graph, neg_graph): @@ -86,8 +84,7 @@ def forward(self, block_outputs, pos_graph, neg_graph): score = th.cat([pos_score, neg_score]) label = th.cat([th.ones_like(pos_score), th.zeros_like(neg_score)]).long() - loss = F.binary_cross_entropy_with_logits(score, label.float()) - return loss + return F.binary_cross_entropy_with_logits(score, label.float()) @utils.benchmark('time', 600) @utils.parametrize('data', ['reddit']) diff --git a/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py b/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py index 7e323f9d28f0..e21f6b419339 100644 --- a/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py +++ b/benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py @@ -65,7 +65,7 @@ def __init__(self, self.num_bases, activation=F.relu, self_loop=self.use_self_loop, low_mem=self.low_mem, dropout=self.dropout, layer_norm=layer_norm)) # h2h - for idx in range(self.num_hidden_layers): + for _ in range(self.num_hidden_layers): self.layers.append(RelGraphConv( self.h_dim, self.h_dim, self.num_rels, "basis", self.num_bases, activation=F.relu, self_loop=self.use_self_loop, diff --git a/benchmarks/benchmarks/multigpu/bench_multigpu_sage.py b/benchmarks/benchmarks/multigpu/bench_multigpu_sage.py index 6ab7cca687b0..0e1b03246a2b 100644 --- a/benchmarks/benchmarks/multigpu/bench_multigpu_sage.py +++ b/benchmarks/benchmarks/multigpu/bench_multigpu_sage.py @@ -30,7 +30,7 @@ def __init__(self, self.n_classes = n_classes self.layers = nn.ModuleList() self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, 'mean')) - for i in range(1, n_layers - 1): + for _ in range(1, n_layers - 1): self.layers.append(dglnn.SAGEConv(n_hidden, n_hidden, 'mean')) self.layers.append(dglnn.SAGEConv(n_hidden, n_classes, 'mean')) self.dropout = nn.Dropout(dropout) @@ -174,8 +174,5 @@ def track_time(data): for p in procs: p.join() time_records = result_queue.get(block=False) - num_exclude = 10 # exclude first 10 iterations - if len(time_records) < 15: - # exclude less if less records - num_exclude = int(len(time_records)*0.3) + num_exclude = int(len(time_records)*0.3) if len(time_records) < 15 else 10 return np.mean(time_records[num_exclude:]) diff --git a/benchmarks/benchmarks/multigpu/rgcn_model.py b/benchmarks/benchmarks/multigpu/rgcn_model.py index 327ffdf24b9e..51e56625e261 100644 --- a/benchmarks/benchmarks/multigpu/rgcn_model.py +++ b/benchmarks/benchmarks/multigpu/rgcn_model.py @@ -112,11 +112,7 @@ def __init__(self, def dgl_emb(self): """ """ - if self.dgl_sparse: - embs = [emb for emb in self.node_embeds.values()] - return embs - else: - return [] + return list(self.node_embeds.values()) if self.dgl_sparse else [] def forward(self, node_ids, node_tids, type_ids, features): """Forward computation diff --git a/benchmarks/benchmarks/utils.py b/benchmarks/benchmarks/utils.py index 9475be409e41..7aabe2241a42 100644 --- a/benchmarks/benchmarks/utils.py +++ b/benchmarks/benchmarks/utils.py @@ -24,7 +24,7 @@ def _download(url, path, filename): os.makedirs(path, exist_ok=True) f_remote = requests.get(url, stream=True) sz = f_remote.headers.get('content-length') - assert f_remote.status_code == 200, 'fail to open {}'.format(url) + assert f_remote.status_code == 200, f'fail to open {url}' with open(fn, 'wb') as writer: for chunk in f_remote.iter_content(chunk_size=1024*1024): writer.write(chunk) @@ -78,7 +78,7 @@ def get_graph(name, format = None): elif name == 'pubmed': g = dgl.data.PubmedGraphDataset(verbose=False)[0] elif name == 'livejournal': - bin_path = "/tmp/dataset/livejournal/livejournal_{}.bin".format(format) + bin_path = f"/tmp/dataset/livejournal/livejournal_{format}.bin" if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] @@ -86,7 +86,7 @@ def get_graph(name, format = None): g = get_livejournal().formats(format) dgl.save_graphs(bin_path, [g]) elif name == "friendster": - bin_path = "/tmp/dataset/friendster/friendster_{}.bin".format(format) + bin_path = f"/tmp/dataset/friendster/friendster_{format}.bin" if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] @@ -95,7 +95,7 @@ def get_graph(name, format = None): g = dgl.compact_graphs(get_friendster()).formats(format) dgl.save_graphs(bin_path, [g]) elif name == "reddit": - bin_path = "/tmp/dataset/reddit/reddit_{}.bin".format(format) + bin_path = f"/tmp/dataset/reddit/reddit_{format}.bin" if os.path.exists(bin_path): g_list, _ = dgl.load_graphs(bin_path) g = g_list[0] @@ -274,7 +274,7 @@ def load_nowplaying_rs(): # Prepare torchtext dataset and vocabulary fields = {} examples = [] - for i in range(g.number_of_nodes(item_ntype)): + for _ in range(g.number_of_nodes(item_ntype)): example = torchtext.data.Example.fromlist( [], []) examples.append(example) @@ -310,10 +310,7 @@ def process_data(name): def get_bench_device(): device = os.environ.get('DGL_BENCH_DEVICE', 'cpu') - if device.lower() == "gpu": - return "cuda:0" - else: - return device + return "cuda:0" if device.lower() == "gpu" else device def setup_track_time(*args, **kwargs): @@ -442,10 +439,10 @@ def check(self, func): if self.conf is None: return True else: - for enabled_testname in self.enabled_tests: - if enabled_testname in funcfullname: - return True - return False + return any( + enabled_testname in funcfullname + for enabled_testname in self.enabled_tests + ) filter = TestFilter() @@ -461,7 +458,8 @@ def check(self, func): parametrize_gpu = parametrize else: raise Exception( - "Unknown device. Must be one of ['cpu', 'gpu'], but got {}".format(device)) + f"Unknown device. Must be one of ['cpu', 'gpu'], but got {device}" + ) def skip_if_gpu(): @@ -497,7 +495,7 @@ def skip_if_not_4gpu(): def _wrapper(func): if GPU_COUNT != 4: # skip if not enabled - print("Skip {}".format(func.__name__)) + print(f"Skip {func.__name__}") func.benchmark_name = "skip_" + func.__name__ return func return _wrapper @@ -545,10 +543,7 @@ def _wrapper(func): class Timer: def __init__(self, device=None): self.timer = default_timer - if device is None: - self.device = get_bench_device() - else: - self.device = device + self.device = get_bench_device() if device is None else device def __enter__(self): if self.device == 'cuda:0': diff --git a/benchmarks/scripts/generate_excel.py b/benchmarks/scripts/generate_excel.py index c6b730f9cc44..ff43e2cdb225 100644 --- a/benchmarks/scripts/generate_excel.py +++ b/benchmarks/scripts/generate_excel.py @@ -10,10 +10,7 @@ def get_branch_name_from_hash(hash): stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stderr = process.communicate() - if len(stderr) > 0: - return hash[:10] - else: - return stdout.decode("utf-8") .strip("\n") + return hash[:10] if len(stderr) > 0 else stdout.decode("utf-8") .strip("\n") def main(): @@ -52,22 +49,27 @@ def main(): def dict_to_csv(output_results_dict): with open("../results/benchmarks.json") as f: benchmark_conf = json.load(f) - unit_dict = {} - for k, v in benchmark_conf.items(): - if k != 'version': - unit_dict[k] = v['unit'] + unit_dict = {k: v['unit'] for k, v in benchmark_conf.items() if k != 'version'} result_list = [] for machine, per_machine_result in output_results_dict.items(): for commit, test_cases in per_machine_result.items(): branch_name = get_branch_name_from_hash(commit) - result_column_name = "number_{}".format(branch_name) + result_column_name = f"number_{branch_name}" # per_commit_result_list = [] for test_case_name, results in test_cases.items(): - for result in results: - result_list.append( - {"test_name": test_case_name, 'params': result['params'], 'unit': unit_dict[test_case_name], "number": result['result'], 'commit': branch_name, 'machine': machine}) - df = pd.DataFrame(result_list) - return df + result_list.extend( + { + "test_name": test_case_name, + 'params': result['params'], + 'unit': unit_dict[test_case_name], + "number": result['result'], + 'commit': branch_name, + 'machine': machine, + } + for result in results + ) + + return pd.DataFrame(result_list) def side_by_side_view(df): @@ -76,7 +78,15 @@ def side_by_side_view(df): for commit in commits[1:]: per_commit_df = df.loc[df['commit'] == commit] full_df: pd.DataFrame = full_df.merge( - per_commit_df, on=['test_name', 'params', 'machine', 'unit'], how='outer', suffixes=("_{}".format(full_df.iloc[0]["commit"]), "_{}".format(per_commit_df.iloc[0]["commit"]))) + per_commit_df, + on=['test_name', 'params', 'machine', 'unit'], + how='outer', + suffixes=( + f'_{full_df.iloc[0]["commit"]}', + f'_{per_commit_df.iloc[0]["commit"]}', + ), + ) + full_df = full_df.loc[:, ~full_df.columns.str.startswith('commit')] return full_df diff --git a/benchmarks/scripts/replace_branch.py b/benchmarks/scripts/replace_branch.py index f8f8e79a055c..f6401e7d75b4 100644 --- a/benchmarks/scripts/replace_branch.py +++ b/benchmarks/scripts/replace_branch.py @@ -45,9 +45,9 @@ def json_minify(string, strip_space=True): in_multi = True elif val == '//': in_single = True - elif val == '*/' and in_multi and not (in_string or in_single): + elif val == '*/' and in_multi and not in_string and not in_single: in_multi = False - elif val in '\r\n' and not (in_multi or in_string) and in_single: + elif val in '\r\n' and not (in_multi or in_string): in_single = False elif not ((in_multi or in_single) or (val in ' \r\n\t' and strip_space)): new_str.append(val) @@ -60,10 +60,7 @@ def json_minify(string, strip_space=True): def add_prefix(branch_name): - if '/' not in branch_name: - return "origin/"+branch_name - else: - return branch_name + return f"origin/{branch_name}" if '/' not in branch_name else branch_name def change_branch(branch_str: str): @@ -76,6 +73,5 @@ def change_branch(branch_str: str): json.dump(config_json, f) -if __name__ == "__main__": - if "BRANCH_STR" in os.environ: - change_branch(os.environ["BRANCH_STR"]) +if __name__ == "__main__" and "BRANCH_STR" in os.environ: + change_branch(os.environ["BRANCH_STR"]) diff --git a/dglgo/dglgo/cli/export_cli.py b/dglgo/dglgo/cli/export_cli.py index 1dc01234e116..20e9924c14a0 100644 --- a/dglgo/dglgo/cli/export_cli.py +++ b/dglgo/dglgo/cli/export_cli.py @@ -21,7 +21,9 @@ def export( f_code = isort.code(f_code) with open(output, "w") as f: f.write(f_code) - print("The python script is generated at {}, based on config file {}".format(Path(output).absolute(), Path(cfg).absolute())) + print( + f"The python script is generated at {Path(output).absolute()}, based on config file {Path(cfg).absolute()}" + ) if __name__ == "__main__": export_app = typer.Typer() diff --git a/dglgo/dglgo/cli/recipe_cli.py b/dglgo/dglgo/cli/recipe_cli.py index 4ad97088e73d..cfb1c29f0e21 100644 --- a/dglgo/dglgo/cli/recipe_cli.py +++ b/dglgo/dglgo/cli/recipe_cli.py @@ -41,7 +41,7 @@ def get_recipe(recipe_name: Optional[str] = typer.Argument(None, help="The recip current_dir = Path(os.getcwd()) recipe_path = recipe_dir / recipe_name shutil.copy(recipe_path, current_dir) - print("Recipe {} is copied to {}".format(recipe_path.absolute(), current_dir.absolute())) + print(f"Recipe {recipe_path.absolute()} is copied to {current_dir.absolute()}") recipe_app = typer.Typer(help="Get example recipes") diff --git a/dglgo/dglgo/model/edge_encoder/bilinear.py b/dglgo/dglgo/model/edge_encoder/bilinear.py index 6a4b24ac79d8..c42931b8979c 100644 --- a/dglgo/dglgo/model/edge_encoder/bilinear.py +++ b/dglgo/dglgo/model/edge_encoder/bilinear.py @@ -27,8 +27,7 @@ def __init__(self, self.bilinear = nn.Bilinear(in_size, in_size, hidden_size, bias=bias) lins_list = [] for _ in range(num_layers-2): - lins_list.append(nn.Linear(hidden_size, hidden_size, bias=bias)) - lins_list.append(nn.ReLU()) + lins_list.extend((nn.Linear(hidden_size, hidden_size, bias=bias), nn.ReLU())) lins_list.append(nn.Linear(hidden_size, out_size, bias=bias)) self.linear = nn.Sequential(*lins_list) diff --git a/dglgo/dglgo/model/edge_encoder/dot.py b/dglgo/dglgo/model/edge_encoder/dot.py index 6ef08ff2ebd7..9cc59c0f652e 100644 --- a/dglgo/dglgo/model/edge_encoder/dot.py +++ b/dglgo/dglgo/model/edge_encoder/dot.py @@ -12,8 +12,7 @@ def __init__(self, super(DotPredictor, self).__init__() lins_list = [] for _ in range(num_layers-2): - lins_list.append(nn.Linear(in_size, hidden_size, bias=bias)) - lins_list.append(nn.ReLU()) + lins_list.extend((nn.Linear(in_size, hidden_size, bias=bias), nn.ReLU())) lins_list.append(nn.Linear(hidden_size, out_size, bias=bias)) self.linear = nn.Sequential(*lins_list) diff --git a/dglgo/dglgo/model/graph_encoder/pna.py b/dglgo/dglgo/model/graph_encoder/pna.py index 0864a5aa8d90..6e2cfc4234d6 100644 --- a/dglgo/dglgo/model/graph_encoder/pna.py +++ b/dglgo/dglgo/model/graph_encoder/pna.py @@ -27,8 +27,7 @@ def aggregate_var(h): """variance aggregation""" h_mean_squares = torch.mean(h * h, dim=1) h_mean = torch.mean(h, dim=1) - var = torch.relu(h_mean_squares - h_mean * h_mean) - return var + return torch.relu(h_mean_squares - h_mean * h_mean) def aggregate_std(h): """standard deviation aggregation""" @@ -105,10 +104,7 @@ def __init__(self, self.dropout = nn.Dropout(dropout) self.residual = residual - if batch_norm: - self.bn = nn.BatchNorm1d(feat_size) - else: - self.bn = None + self.bn = nn.BatchNorm1d(feat_size) if batch_norm else None def reduce(self, nodes): h = nodes.mailbox['m'] @@ -213,12 +209,12 @@ def __init__(self, num_mlp_layers=num_mlp_layers) for _ in range(num_layers)]) - if readout == 'sum': - self.pool = SumPooling() - elif readout == 'mean': + if readout == 'mean': self.pool = AvgPooling() + elif readout == 'sum': + self.pool = SumPooling() else: - raise ValueError("Expect readout to be 'sum' or 'mean', got {}".format(readout)) + raise ValueError(f"Expect readout to be 'sum' or 'mean', got {readout}") self.pred = MLP(embed_size, data_info['out_size'], decreasing_hidden_size=True) def forward(self, graph, node_feat, edge_feat=None): diff --git a/dglgo/dglgo/model/node_encoder/gat.py b/dglgo/dglgo/model/node_encoder/gat.py index 5c148d047436..14141604cc65 100644 --- a/dglgo/dglgo/model/node_encoder/gat.py +++ b/dglgo/dglgo/model/node_encoder/gat.py @@ -75,13 +75,10 @@ def forward(self, graph, node_feat, edge_feat=None): h = node_feat for l in range(self.num_layers - 1): h = self.gat_layers[l](graph, h).flatten(1) - # output projection - logits = self.gat_layers[-1](graph, h).mean(1) - return logits + return self.gat_layers[-1](graph, h).mean(1) def forward_block(self, blocks, node_feat, edge_feat=None): h = node_feat for l in range(self.num_layers - 1): h = self.gat_layers[l](blocks[l], h).flatten(1) - logits = self.gat_layers[-1](blocks[-1], h).mean(1) - return logits + return self.gat_layers[-1](blocks[-1], h).mean(1) diff --git a/dglgo/dglgo/pipeline/graphpred/gen.py b/dglgo/dglgo/pipeline/graphpred/gen.py index e66f8d178115..dbe67f7c6a61 100644 --- a/dglgo/dglgo/pipeline/graphpred/gen.py +++ b/dglgo/dglgo/pipeline/graphpred/gen.py @@ -80,7 +80,7 @@ def config( if cfg is None: cfg = "_".join(["graphpred", data.value, model.value]) + ".yaml" yaml.dump(comment_dict, Path(cfg).open("w")) - print("Configuration file is generated at {}".format(Path(cfg).absolute())) + print(f"Configuration file is generated at {Path(cfg).absolute()}") return config @@ -113,7 +113,10 @@ def gen_script(cls, user_cfg_dict): generated_train_cfg["lr_scheduler"].pop("name") if user_cfg_dict["data"].get("split_ratio", None) is not None: - render_cfg["data_initialize_code"] = "{}, split_ratio={}".format(render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"]) + render_cfg[ + "data_initialize_code" + ] = f'{render_cfg["data_initialize_code"]}, split_ratio={user_cfg_dict["data"]["split_ratio"]}' + render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict return template.render(**render_cfg) diff --git a/dglgo/dglgo/pipeline/linkpred/gen.py b/dglgo/dglgo/pipeline/linkpred/gen.py index 723e055ad097..18a909eead69 100644 --- a/dglgo/dglgo/pipeline/linkpred/gen.py +++ b/dglgo/dglgo/pipeline/linkpred/gen.py @@ -105,7 +105,7 @@ def config( cfg = "_".join(["linkpred", data.value, node_model.value, edge_model.value]) + ".yaml" yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) - print("Configuration file is generated at {}".format(Path(cfg).absolute())) + print(f"Configuration file is generated at {Path(cfg).absolute()}") return config @@ -149,7 +149,10 @@ def gen_script(cls, user_cfg_dict): if user_cfg_dict["data"].get("split_ratio", None) is not None: assert user_cfg_dict["data"].get("neg_ratio", None) is not None, "Please specify both split_ratio and neg_ratio" - render_cfg["data_initialize_code"] = "{}, split_ratio={}, neg_ratio={}".format(render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"], user_cfg_dict["data"]["neg_ratio"]) + render_cfg[ + "data_initialize_code" + ] = f'{render_cfg["data_initialize_code"]}, split_ratio={user_cfg_dict["data"]["split_ratio"]}, neg_ratio={user_cfg_dict["data"]["neg_ratio"]}' + generated_user_cfg["data"].pop("split_ratio") generated_user_cfg["data"].pop("neg_ratio") diff --git a/dglgo/dglgo/pipeline/nodepred/gen.py b/dglgo/dglgo/pipeline/nodepred/gen.py index 79f4945e5d96..7de44f7246b7 100644 --- a/dglgo/dglgo/pipeline/nodepred/gen.py +++ b/dglgo/dglgo/pipeline/nodepred/gen.py @@ -84,7 +84,7 @@ def config( if cfg is None: cfg = "_".join(["nodepred", data.value, model.value]) + ".yaml" yaml.dump(comment_dict, Path(cfg).open("w")) - print("Configuration file is generated at {}".format(Path(cfg).absolute())) + print(f"Configuration file is generated at {Path(cfg).absolute()}") return config @@ -121,7 +121,10 @@ def gen_script(cls, user_cfg_dict): if user_cfg_dict["data"].get("split_ratio", None) is not None: - render_cfg["data_initialize_code"] = "{}, split_ratio={}".format(render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"]) + render_cfg[ + "data_initialize_code" + ] = f'{render_cfg["data_initialize_code"]}, split_ratio={user_cfg_dict["data"]["split_ratio"]}' + render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict return template.render(**render_cfg) diff --git a/dglgo/dglgo/pipeline/nodepred_sample/gen.py b/dglgo/dglgo/pipeline/nodepred_sample/gen.py index ae49f3137ce9..ebcb7b3925f6 100644 --- a/dglgo/dglgo/pipeline/nodepred_sample/gen.py +++ b/dglgo/dglgo/pipeline/nodepred_sample/gen.py @@ -113,8 +113,7 @@ def config( cfg = "_".join(["nodepred-ns", data.value, model.value]) + ".yaml" yaml = ruamel.yaml.YAML() yaml.dump(comment_dict, Path(cfg).open("w")) - print("Configuration file is generated at {}".format( - Path(cfg).absolute())) + print(f"Configuration file is generated at {Path(cfg).absolute()}") return config @@ -154,7 +153,10 @@ def gen_script(user_cfg_dict): if user_cfg_dict["data"].get("split_ratio", None) is not None: - render_cfg["data_initialize_code"] = "{}, split_ratio={}".format(render_cfg["data_initialize_code"], user_cfg_dict["data"]["split_ratio"]) + render_cfg[ + "data_initialize_code" + ] = f'{render_cfg["data_initialize_code"]}, split_ratio={user_cfg_dict["data"]["split_ratio"]}' + render_cfg["user_cfg_str"] = f"cfg = {str(generated_user_cfg)}" render_cfg["user_cfg"] = user_cfg_dict diff --git a/dglgo/dglgo/utils/factory.py b/dglgo/dglgo/utils/factory.py index de849b1d1517..f66d99d3aacd 100644 --- a/dglgo/dglgo/utils/factory.py +++ b/dglgo/dglgo/utils/factory.py @@ -25,7 +25,7 @@ def get_cfg_func(self): pass @abstractstaticmethod - def gen_script(user_cfg_dict: dict): + def gen_script(self): pass @abstractstaticmethod @@ -60,19 +60,16 @@ def register(self, return self def get_dataset_enum(self): - enum_class = enum.Enum( - "DatasetName", {v["name"]: k for k, v in self.registry.items()}) - return enum_class + return enum.Enum( + "DatasetName", {v["name"]: k for k, v in self.registry.items()} + ) def get_dataset_classname(self, name): return self.registry[name]["class_name"] def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) - type_annotation_dict = {} - for k, param in dict(sigs.parameters).items(): - type_annotation_dict[k] = param.annotation - return type_annotation_dict + return {k: param.annotation for k, param in dict(sigs.parameters).items()} def get_pydantic_config(self): @@ -105,8 +102,7 @@ def get_class_name(self, name): return self.registry[name]["class_name"] def get_generated_code_dict(self, name, args='**cfg["data"]'): - d = {} - d["data_import_code"] = self.registry[name]["import_code"] + d = {"data_import_code": self.registry[name]["import_code"]} data_initialize_code = self.registry[name]["class_name"] extra_args_dict = self.registry[name]["extra_args"] if len(extra_args_dict) > 0: @@ -261,9 +257,7 @@ def call_generator(cls, generator_name, cfg): @classmethod def get_pipeline_enum(cls): - enum_class = enum.Enum( - "PipelineName", {k: k for k, v in cls.registry.items()}) - return enum_class + return enum.Enum("PipelineName", {k: k for k, v in cls.registry.items()}) model_dir = Path(__file__).parent.parent / "model" @@ -278,9 +272,7 @@ def __init__(self): """ Internal registry for available executors """ def get_model_enum(self): - enum_class = enum.Enum( - "ModelName", {k: k for k, v in self.registry.items()}) - return enum_class + return enum.Enum("ModelName", {k: k for k, v in self.registry.items()}) def register(self, model_name: str) -> Callable: @@ -301,20 +293,16 @@ def get_source_code(self, model_name): def get_constructor_default_args(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) - default_map = {} - for k, param in dict(sigs.parameters).items(): - default_map[k] = param.default - return default_map + return {k: param.default for k, param in dict(sigs.parameters).items()} def get_pydantic_constructor_arg_type(self, model_name: str): model_enum = self.get_model_enum() arg_dict = self.get_constructor_default_args(model_name) - type_annotation_dict = {} # type_annotation_dict["name"] = Literal[""] exempt_keys = ['self', 'in_size', 'out_size', 'data_info'] - for k, param in arg_dict.items(): - if k not in exempt_keys: - type_annotation_dict[k] = arg_dict[k] + type_annotation_dict = { + k: arg_dict[k] for k, param in arg_dict.items() if k not in exempt_keys + } class Base(DGLBaseModel): name: Literal[model_name] @@ -324,15 +312,10 @@ def get_constructor_doc_dict(self, name): model_class = self.registry[name] docs = inspect.getdoc(model_class.__init__) param_docs = docscrape.NumpyDocString(docs) - param_docs_dict = {} - for param in param_docs["Parameters"]: - param_docs_dict[param.name] = param.desc[0] - return param_docs_dict + return {param.name: param.desc[0] for param in param_docs["Parameters"]} def get_pydantic_model_config(self): - model_list = [] - for k in self.registry: - model_list.append(self.get_pydantic_constructor_arg_type(k)) + model_list = [self.get_pydantic_constructor_arg_type(k) for k in self.registry] output = model_list[0] for m in model_list[1:]: output = Union[output, m] @@ -343,10 +326,7 @@ def get_model_class_name(self, model_name): def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) - type_annotation_dict = {} - for k, param in dict(sigs.parameters).items(): - type_annotation_dict[k] = param.annotation - return type_annotation_dict + return {k: param.annotation for k, param in dict(sigs.parameters).items()} def filter(self, filter_func): new_fac = ModelFactory() @@ -364,9 +344,9 @@ def __init__(self): self.registry = {} def get_model_enum(self): - enum_class = enum.Enum( - "NegativeSamplerName", {k: k for k, v in self.registry.items()}) - return enum_class + return enum.Enum( + "NegativeSamplerName", {k: k for k, v in self.registry.items()} + ) def register(self, sampler_name: str) -> Callable: @@ -381,32 +361,25 @@ def inner_wrapper(wrapped_class) -> Callable: def get_constructor_default_args(self, sampler_name): sigs = inspect.signature(self.registry[sampler_name].__init__) - default_map = {} - for k, param in dict(sigs.parameters).items(): - default_map[k] = param.default - return default_map + return {k: param.default for k, param in dict(sigs.parameters).items()} def get_pydantic_constructor_arg_type(self, sampler_name: str): model_enum = self.get_model_enum() arg_dict = self.get_constructor_default_args(sampler_name) - type_annotation_dict = {} # type_annotation_dict["name"] = Literal[""] exempt_keys = ['self', 'in_size', 'out_size', 'redundancy'] - for k, param in arg_dict.items(): - if k not in exempt_keys or param is None: - if k == 'k' or k == 'redundancy': - type_annotation_dict[k] = 3 - else: - type_annotation_dict[k] = arg_dict[k] + type_annotation_dict = { + k: 3 if k in ['k', 'redundancy'] else arg_dict[k] + for k, param in arg_dict.items() + if k not in exempt_keys or param is None + } class Base(DGLBaseModel): name: Literal[sampler_name] return create_model(f'{sampler_name.upper()}SamplerConfig', **type_annotation_dict, __base__=Base) def get_pydantic_model_config(self): - model_list = [] - for k in self.registry: - model_list.append(self.get_pydantic_constructor_arg_type(k)) + model_list = [self.get_pydantic_constructor_arg_type(k) for k in self.registry] output = model_list[0] for m in model_list[1:]: output = Union[output, m] @@ -417,19 +390,13 @@ def get_model_class_name(self, model_name): def get_constructor_arg_type(self, model_name): sigs = inspect.signature(self.registry[model_name].__init__) - type_annotation_dict = {} - for k, param in dict(sigs.parameters).items(): - type_annotation_dict[k] = param.annotation - return type_annotation_dict + return {k: param.annotation for k, param in dict(sigs.parameters).items()} def get_constructor_doc_dict(self, name): model_class = self.registry[name] docs = inspect.getdoc(model_class) param_docs = docscrape.NumpyDocString(docs) - param_docs_dict = {} - for param in param_docs["Parameters"]: - param_docs_dict[param.name] = param.desc[0] - return param_docs_dict + return {param.name: param.desc[0] for param in param_docs["Parameters"]} NegativeSamplerFactory = SamplerFactory() diff --git a/dglgo/tests/test_pipeline.py b/dglgo/tests/test_pipeline.py index 55bfe20cdef5..56fad7650064 100644 --- a/dglgo/tests/test_pipeline.py +++ b/dglgo/tests/test_pipeline.py @@ -23,12 +23,18 @@ class ExperimentSpec(NamedTuple): def test_train(spec): cfg_path = "/tmp/test.yaml" run = subprocess.run(["dgl", "config", spec.pipeline, "--data", spec.dataset, "--model", spec.model, "--cfg", cfg_path], timeout=spec.timeout, capture_output=True) - assert run.stderr is None or len(run.stderr) == 0, "Found error message: {}".format(run.stderr) + assert ( + run.stderr is None or len(run.stderr) == 0 + ), f"Found error message: {run.stderr}" + output = run.stdout.decode("utf-8") print(output) run = subprocess.run(["dgl", "train", "--cfg", cfg_path], timeout=spec.timeout, capture_output=True) - assert run.stderr is None or len(run.stderr) == 0, "Found error message: {}".format(run.stderr) + assert ( + run.stderr is None or len(run.stderr) == 0 + ), f"Found error message: {run.stderr}" + output = run.stdout.decode("utf-8") print(output) @@ -40,7 +46,7 @@ def setup_recipe_folder(): @pytest.mark.parametrize("file", [str(f) for f in Path(TEST_RECIPE_FOLDER).glob("*.yaml")]) def test_recipe(file, setup_recipe_folder): - print("DGL enter train {}".format(file)) + print(f"DGL enter train {file}") try: run = subprocess.run(["dgl", "train", "--cfg", file], timeout=5, capture_output=True) sh_stdout, sh_stderr = run.stdout, run.stderr @@ -56,7 +62,7 @@ def test_recipe(file, setup_recipe_folder): continue else: assert len(line) == 0, error_str - print("{} stdout: {}".format(file, sh_stdout)) - print("{} stderr: {}".format(file, sh_stderr)) + print(f"{file} stdout: {sh_stdout}") + print(f"{file} stderr: {sh_stderr}") # test_recipe( , None) \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index b226039eb17b..261ddf1b8687 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -231,7 +231,7 @@ dglbackend = os.environ.get("DGLBACKEND", "") if dglbackend == 'mxnet': sphinx_gallery_conf['filename_pattern'] = "/*(?<=mx)\.py" -if dglbackend == 'pytorch': +elif dglbackend == 'pytorch': sphinx_gallery_conf['filename_pattern'] = "/*(? 0: - ctx = mx.gpu(g.worker_id % args.num_gpus) - else: - ctx = mx.cpu() - + ctx = mx.gpu(g.worker_id % args.num_gpus) if args.num_gpus > 0 else mx.cpu() train_nid = mx.nd.array(np.nonzero(train_mask.asnumpy())[0]).astype(np.int64) test_nid = mx.nd.array(np.nonzero(test_mask.asnumpy())[0]).astype(np.int64) diff --git a/examples/mxnet/_deprecated/sampling/run_store_server.py b/examples/mxnet/_deprecated/sampling/run_store_server.py index 9af75184107e..6086b08720d4 100644 --- a/examples/mxnet/_deprecated/sampling/run_store_server.py +++ b/examples/mxnet/_deprecated/sampling/run_store_server.py @@ -64,7 +64,7 @@ def main(args): n_test_samples)) # create GCN model - print('graph name: ' + graph_name) + print(f'graph name: {graph_name}') g = dgl.contrib.graph_store.create_graph_store_server(data.graph, graph_name, "shared_mem", args.num_workers, False, edge_dir='in') g.ndata['features'] = features diff --git a/examples/mxnet/_deprecated/sampling/train.py b/examples/mxnet/_deprecated/sampling/train.py index 5019be0595d6..5fe8df3471fc 100644 --- a/examples/mxnet/_deprecated/sampling/train.py +++ b/examples/mxnet/_deprecated/sampling/train.py @@ -16,11 +16,7 @@ def main(args): # load and preprocess dataset data = load_data(args) - if args.gpu >= 0: - ctx = mx.gpu(args.gpu) - else: - ctx = mx.cpu() - + ctx = mx.gpu(args.gpu) if args.gpu >= 0 else mx.cpu() if args.self_loop and not args.dataset.startswith('reddit'): data.graph.add_edges_from([(i,i) for i in range(len(data.graph))])