-
Notifications
You must be signed in to change notification settings - Fork 13
/
preprocessing.py
232 lines (195 loc) · 8.82 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 1 14:12:22 2020
@author: danish
"""
import numpy as np
import glob
import os
import math
import cv2
from tqdm import tqdm
import json
def Frame_Extractor(v_file, path='./', ext='.avi', frames_dir='train_1', extract_rate='all', frames_ext='.jpg'):
"""
A method which extracts the frames from the guven video. It can ex
Parameters
----------
v_file : str
Name of the video file, without extension.
path : str
Path to the video file, if the video is in the current working directory do not specify this argument.
ext : str, optional
Extension of the given Video File e.g `.avi`, `.mp4`. The default is '.avi'.
frames_dir : str, optional
Path to the directory where frames will be saved. The default is 'train_1'.
extract_rate : int or str, optional
This argument specifies how many frames should be extrcated from each 1 second of video. If the value is
`all` it will etract all the frames in every second i.e if the frame rate of video is 25 fps it will
extrcat all 25 frames. Other wise specify a number if you want to etract specific numbers of frames
per each second e.g if 5 is given it will extrcat 5 frames from each 1 second. The default is `all`.
frames_ext : str, optional
The extension for the extracted frames/images e.h '.tif' or '.jpg'. The default is '.jpg'.
Returns
-------
None.
"""
os.makedirs(frames_dir, exist_ok=True)
# capturing the video from the given path
if ext not in v_file:
v_file += ext
cap = cv2.VideoCapture(path+v_file)
frameRate = cap.get(5) #frame rate
#duration = int(cap.get(7)/frameRate)
os.makedirs(frames_dir+'/'+v_file, exist_ok=True)
count = 0
while(cap.isOpened()):
frameId = cap.get(1) #current frame number
ret, frame = cap.read()
if (ret != True):
break
if type(extract_rate)==int:
if extract_rate>frameRate:
print('Frame rate of Given Video: {0} fps'.format(frameRate))
raise ValueError('The value of `extract_rate` argument can not be greater than the Frame Rate of the video.')
if (frameId % extract_rate == 0) and extract_rate>1:
# storing the frames in a new folder named train_1
filename = frames_dir + '/' + v_file+ '/'+"_frame{0}".format(count)+frames_ext;count+=1
cv2.imwrite(filename, frame)
elif extract_rate==1:
if (frameId % math.floor(frameRate) == 0):
filename = frames_dir + '/' + v_file+ '/'+"_frame{0}".format(count)+frames_ext;count+=1
cv2.imwrite(filename, frame)
elif type(extract_rate)==str:
if extract_rate=='all':
# storing the frames in a new folder named train_1
filename = frames_dir + '/' + v_file+ '/'+ v_file + "_frame{0}".format(count)+frames_ext;count+=1
cv2.imwrite(filename, frame)
else:
raise ValueError('Invalid Value for argument `extract_rate`, it can be either `all` or an integer value.')
cap.release()
def ReadFileNames(path, frames_ext='.tif'):
"""
This method will retrieve the Folder/File names from the dataset.
Parameters
----------
path : string
Location of the data set that needs to be preprocessed.
Returns
-------
onlyfiles : list
A list containing all the subfolder names in the dataset.
file_names : list
A list containing all the names of the frames/images from 'onlyfiles'.
directories: list
A list containing all the names of the folders containing the images.
"""
directories = [name for name in os.listdir(path) if os.path.isdir(path+'/'+name)]
onlyfiles = []
file_names = []
for i in range (len(directories)):
files = glob.glob(path+'/'+directories[i]+'/*{0}'.format(frames_ext))
names = []
for file in files:
names.append(file.split("\\")[1])
file_names.append(names)
onlyfiles.append(files)
return onlyfiles, file_names, directories
def ToJson(obj, name, path='./', json_dir=False):
""" Write the Given Object to JSON file on the given path and name.
Arguments:
----------
obj: The object/variable you want to write to JSON file, it can be list or dictionary.
name: Name for the JSON file.
path: Path in which you want to save the JSON file. If want to save the JSON file in the current
directory, do not specify the value of this argument.
json_dir: Boolean value if set to `True` a new directory with the name of `JSON` will be added at
the end of the given path.
Returns:
--------
None"""
if json_dir:
os.makedirs(path+'/JSON', exist_ok=True)
with open(path+'/JSON/{0}.json'.format(name), 'w') as f:
json.dump(obj, f)
f.close()
elif not json_dir:
if '.json' in name:
pass
else:
name=name+'.json'
with open(path+'/'+name, 'w') as f:
json.dump(obj, f)
f.close()
def ProcessImg(img_name, read_path, write=True, write_path=None, res_shape=(128,128)):
if write and write_path is None:
raise TypeError('The value of argument cannot be `None` when, `write` is set to True. Provide a valid path, where processed image should be stored!')
img=cv2.imread(read_path)
#img=img_to_array(img)
#Resize the Image to (227,227)
img=cv2.resize(img,res_shape)
rgb_weights = [0.2989, 0.5870, 0.1140]
gray = np.dot(img, rgb_weights)
if write:
os.makedirs(write_path, exist_ok=True)
cv2.imwrite(write_path+'/'+img_name, gray)
return gray
def GlobalNormalization(img_list, name=None, path='Train_Data', save_data=True):
img_arr = np.array(img_list)
batch,height,width = img_arr.shape
#Reshape to (227,227,batch_size)
img_arr.resize(height,width,batch)
#Normalize
img_arr=(img_arr-img_arr.mean())/(img_arr.std())
#Clip negative Values
img_arr=np.clip(img_arr,0,1)
if save_data:
if name==None:
raise TypeError('The value of the `name` argument cannot be `None` type, when `save_data` is set to True. Provide value with `str` datatype.')
if '.npy' not in name:
name += '.npy'
os.makedirs(path, exist_ok=True)
np.save(path+'/'+name, img_arr)
print('\n------ Data Save Succefully at this path: {0} ------\n'.format(path))
return img_arr
def Vid2Frame(vid_path, frames_dir, ext_vid='.avi', frames_ext='.tif'):
vids = glob.glob(vid_path+'/*{0}'.format(ext_vid))
for vid in tqdm(vids):
path = vid.split('\\')[0]+'/'
v_file = vid.split('\\')[1]
Frame_Extractor(v_file, path=path, ext=ext_vid, frames_dir=frames_dir,
extract_rate='all', frames_ext=frames_ext)
def Fit_Preprocessing(path, frames_ext):
if frames_ext is None:
raise TypeError('Invalid Value for argument `frames_ext`, it cannot be None. Give proper extensions of the frames e.g: `.tif` or `.png` etc!')
print('\n\nProcessing Images in this Dataset Path: {0}\n'.format(path))
onlyfiles, file_names, dirs = ReadFileNames(path, frames_ext)
img_list = []
for i in tqdm(range(len(onlyfiles))):
images = onlyfiles[i]
count = 0
for img in images:
img.split('/')
img_name = dirs[i]+'_'+file_names[i][count]
write_path = 'ProcessedImages/'+path.split('/')[1]
gray = ProcessImg(img_name, read_path=img, write=True,
write_path=write_path, res_shape=(227,227))
img_list.append(gray)
count += 1
return img_list
if __name__=='__main__':
# vid_paths = ['AvenueDataset/training_videos', 'AvenueDataset/testing_videos']
# for vid_path in vid_paths:
# print('\n\nExtracting Frame from the videos, Path: {0}\n'.format(vid_path))
# frames_dir = 'Datasets/'+vid_path
# Vid2Frame(vid_path, frames_dir, ext_vid='.avi', frames_ext='.tif')
# print('\n-------- Frames are Extracted from All Videos Succesfully! --------\n')
#path = frames_dir
# paths = ['Datasets/UCSDped1/Train', 'Datasets/UCSDped2/Train',
# 'Datasets/AvenueDataset/training_videos']
paths = ['Datasets/UCSDped1/Test', 'Datasets/UCSDped2/Test',
'Datasets/AvenueDataset/testing_videos']
for path in paths:
img_list = Fit_Preprocessing(path, frames_ext='.tif')
name='Test_{0}.npy'.format(path.split('/')[1])
img_arr = GlobalNormalization(img_list, name, path='Test_Data', save_data=True)