-
Notifications
You must be signed in to change notification settings - Fork 0
/
val_detection.py
64 lines (44 loc) · 2.04 KB
/
val_detection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from bbox_augmentor import *
from utils.utils import imagenet_fill
from torch.utils.data import Dataset
class Validate_Detection(Dataset):
__doc__ = r"""
Images are resized and padded to be square-size inputs, and loaded without labels.
Output:
img_id: file name of image without its format
image: 3D tensor in square-size and RGB, CHW format
scale: how much scaled from the original
pad: how much padded from the original
* scale and pad is necessary to fit a bbox-prediction to the original-scale label.
"""
def __init__(self,
root: str,
img_size: int,
dataset_stat: Tuple = ((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
):
self.root = root
self.img_paths = os.listdir(self.root)
self.img_paths.sort()
self.augmentor = Bbox_Augmentor(total_prob=1, min_area=0, min_visibility=0,
dataset_stat=dataset_stat, ToTensor=True, with_label=False)
self.augmentor.append(A.LongestMaxSize(img_size, p=1))
self.augmentor.append(A.PadIfNeeded(img_size, img_size, border_mode=cv2.BORDER_CONSTANT, value=imagenet_fill(), p=1))
self.augmentor.make_compose()
self.img_size = img_size
self.dataset_stat = dataset_stat
def __len__(self):
return len(self.img_paths)
def __getitem__(self, index: int) -> Tuple[str, Tensor, float, Tensor]:
img_path = self.img_paths[index]
img_id = img_path.split(".")[0]
image = cv2.imread(os.path.join(self.root, img_path))
h, w, _ = image.shape
scale = self.img_size / max(h, w)
diff = np.abs(h - w)
p1 = diff // 2
p2 = diff - diff // 2
pad = (0, p1, 0, p2) if w >= h else (p1, 0, p2, 0)
pad = torch.tensor(pad, device=device)
image = self.augmentor(image, None, None)['image']
image = image.to(device=device)
return img_id, image, scale, pad