-
Notifications
You must be signed in to change notification settings - Fork 7
/
utils.py
148 lines (127 loc) · 4.55 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import json
import numpy as np
import pandas as pd
from tensorflow.keras.utils import to_categorical
def load_multi_csv(filenames, concat=True, discard_empty=False, verbose=True):
"""
Load CSV data from multiple files and concatenate as DataFrame
Args:
filenames: A list of strings or file-like objects
concat: If False, return a list instead of DataFrame.
discard_empty: If True, all empty DataFrames will be discarded.
verbose: Whether to print loaded file stats or not
Returns:
A list or a DataFrame
"""
df_list = []
for fn in filenames:
df = pd.read_csv(fn)
print_args = [fn, df.shape]
if discard_empty and df.empty:
print_args.append("- discarded")
else:
df_list.append(df)
if verbose:
print(*print_args)
if not concat:
# print([df.shape for df in df_list])
return df_list
thedf = pd.concat(df_list, ignore_index=True) if df_list else pd.DataFrame()
if verbose:
print(thedf.shape)
return thedf
def make_xy_3d(
big_df,
numsteps,
skip_size=1,
categorical=True,
add_time_y=False,
y_dim=1,
num_classes=None,
):
"""
Extract `X` and y` chunks from `big_df` assuming `big_df` is a time-series
data.
Convert DataFrame of shape (?, n_features) into 3D array of shape
(n_samples, numsteps, n_features-y_dim) for X.
If y_dim=1, shape of y will be (n_samples,). If `categorical` is also True
then the shape will be (n_samples, n_classes).
If y_dim>1, shape of y will be (n_samples, y_dim).
If y_dim=0, return None in the place of y.
If `add_time_y` is True, an extra time dimension will be added.
`num_classes` should be provided when the data is too few that the amount
of classes cannot be inferred for `to_categorical` function."""
X, y = [], []
if isinstance(big_df, pd.DataFrame):
big_df = big_df.values
for i in range(0, big_df.shape[0], skip_size):
arr = big_df[i : i + numsteps] # 2d array of shape (numsteps, n_features)
if arr.shape[0] != numsteps:
break
if y_dim == 0:
X.append(arr[np.newaxis, :, :])
else:
X.append(arr[np.newaxis, :, :-y_dim])
if y_dim == 1:
y.append(arr[:, -1] if add_time_y else arr[-1, -1])
else:
y.append(arr[:, -y_dim:] if add_time_y else arr[-1, -y_dim:])
if y_dim == 0:
return np.concatenate(X), None
return (
np.concatenate(X),
to_categorical(np.array(y), num_classes=num_classes)
if categorical
else np.array(y),
)
def is_jsonable(x):
try:
json.dumps(x)
return True
except:
return False
def make_json_serializable(obj, serializer=None, inplace=False):
"""
Ensure the object will be serializable by converting the non-serializable part to be serializable.
If `inplace` then will mutate the object directly.
`serializer` is the function that receives the non-serializable object and return a serializable object.
If it is None, the default `serializer` will be something that converts the object to string
"""
if isinstance(obj, dict):
serialized_obj = obj if inplace else dict()
for key, val in obj.items():
serialized_obj[key] = make_json_serializable(
val, serializer=serializer, inplace=inplace
)
return serialized_obj
elif isinstance(obj, list):
serialized_obj = obj if inplace else list()
for i, val in enumerate(obj):
val = make_json_serializable(val, serializer=serializer, inplace=inplace)
if inplace:
serialized_obj[i] = val
else:
serialized_obj.append(val)
return serialized_obj
elif is_jsonable(obj):
return obj
else:
if serializer is None:
def serializer(x):
if hasattr(x, "name"):
return x.name
if hasattr(x, "__name__"):
return x.__name__
return type(x).__name__
obj_ser = serializer(obj)
print("serialize", repr(obj), "as", repr(obj_ser))
return obj_ser
def summarize_to_list(model, **summary_kwargs):
"""
Call `model.summary()` and append each line to a list.
Then return the list.
"""
summary = []
add_to_summary = lambda line: summary.append(line)
model.summary(print_fn=add_to_summary, **summary_kwargs)
return summary