-
Notifications
You must be signed in to change notification settings - Fork 1
/
cutter.py
64 lines (53 loc) · 2.51 KB
/
cutter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import torch
import torch.nn.functional as F
import cv2
import numpy as np
def is_enough_presentable(image, threshold=0.5):
"""
Filter image by histogram value
:param image: image to check
:param threshold: threshold of filtering [0..1]
:return: boolean value is image enough presentable or not
"""
grayscale = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
histogram, _ = np.histogram(grayscale, bins=100, range=(0.0, 1.0))
if np.sum(histogram[:80]) / np.sum(histogram) >= threshold:
return True
return False
class Cutter:
def __init__(self, kernel_size=(1024, 1024), stride=(1024, 1024)):
self.kernel_h, self.kernel_w = kernel_size
self.stride_h, self.stride_w = stride
def split(self, image):
"""
Split image to many little patches
:param image: torch.Tensor of (H, W, C) format
:return: torch.Tensor of (N, C, H, W) format
"""
self.orig_size = image.shape[:2]
# padding of right and bottom sides of image
pad_size = (self.kernel_w - self.orig_size[1] % self.kernel_w) % self.kernel_w
padded_image = F.pad(image.permute(2, 0, 1), (0, pad_size), "constant", 0)
pad_size = (self.kernel_h - self.orig_size[0] % self.kernel_h) % self.kernel_h
padded_image = F.pad(padded_image.permute(0, 2, 1), (0, pad_size), "constant", 0)
# dimension correction (to (B, C, H, W) format)
tensor_image = torch.unsqueeze(padded_image, dim=0).permute(0, 1, 3, 2)
# splitting
windows = tensor_image.unfold(2, self.kernel_h, self.stride_h).unfold(3, self.kernel_w, self.stride_w)
self.unfold_shape = windows.size()
# dimension correction (to (N, C, H, W) format)
windows = windows.permute(2, 3, 0, 1, 4, 5).reshape(-1, 3, self.kernel_h, self.kernel_w)
return windows
def merge(self, windows):
"""
Merge many mask patches to mask of original image size
:param windows: patches to merge of format (N, C, H, W)
:return: mask of original image size (1, C, H, W)
"""
window_number_h = self.unfold_shape[2]
window_number_w = self.unfold_shape[3]
_, c, window_h, window_w = windows.size()
x_image = windows.view(1, window_number_h, window_number_w, c, window_h, window_w)
x_image = x_image.permute(0, 3, 1, 4, 2, 5).contiguous()
x_image = x_image.view(1, c, window_number_h * window_h, window_number_w * window_w)
return x_image[0, :, :self.orig_size[0], :self.orig_size[1]]