-
Notifications
You must be signed in to change notification settings - Fork 4
/
spp_layer.py
66 lines (49 loc) · 2.35 KB
/
spp_layer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import numpy as np
import math
import tensorflow as tf
# input feature maps is of the form: N-C-(WH)/(HW)
# ex. spatial_pyramid:
# [[1, 1], [2, 2], [3, 3], [4, 5]]
# each row is a level of pyramid with nxm pooling
def np_spatial_pyramid_pooling(input_feature_maps, spatial_pyramid, dtype=np.float32):
assert input_feature_maps.ndim == 4
assert spatial_pyramid.ndim == 2
assert spatial_pyramid.shape[1] == 2
batch_size = input_feature_maps.shape[0]
num_channels = input_feature_maps.shape[1]
h = input_feature_maps.shape[2]
w = input_feature_maps.shape[3]
num_levels = spatial_pyramid.shape[0]
# N-C-W*H
flattened_feature_maps = np.reshape(input_feature_maps, (batch_size, num_channels, -1))
num_px = flattened_feature_maps.shape[2]
bins_per_level = np.prod(spatial_pyramid, axis=1)
num_bins = np.sum(bins_per_level)
stack = []
# stride tricks, then max pool along one dimension
# then stride tricks again and max pool along the other dimension
# but whats the length and stride?
# ceil(w/n) for window size, floor(w/n) for stride,
# where w is the original dim, and n is the number of bins along the dim
# but this implementation may leave out some pixels (consider w = 5, n = 3)
sizeof_item = np.dtype(dtype).itemsize
for i in range(num_levels):
n_h = spatial_pyramid[i][0]
n_w = spatial_pyramid[i][1]
l = math.ceil(w/n_w)
s = math.floor(w/n_w)
ar = np.lib.stride_tricks.as_strided(flattened_feature_maps, (batch_size, num_channels, h, n_w, l),
(sizeof_item*num_px*num_channels, sizeof_item*num_px, sizeof_item*w, sizeof_item*s, sizeof_item))
ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2)).copy()
l = math.ceil(h/n_h)
s = math.floor(h/n_h)
ar = np.lib.stride_tricks.as_strided(ar, (batch_size, num_channels, n_w, n_h, l),
(sizeof_item*n_w*h*num_channels, sizeof_item*n_w*h, sizeof_item*h, sizeof_item*s, sizeof_item))
ar = np.transpose(np.amax(ar, axis=4), (0, 1, 3, 2))
# for debugging purposes, monitor "ar" here, before flattening
stack.append(np.reshape(ar, (batch_size, num_channels, -1)))
stack = np.concatenate(stack, axis=2)
print(stack.shape)
return stack
def tf_spatial_pyramid_pooling(tf_input_feature_maps, tf_spatial_pyramid, dtype=tf.float32):
return tf.py_func(np_spatial_pyramid_pooling, [tf_input_feature_maps, tf_spatial_pyramid], dtype)