uoguelph-mlrg · hma02 · Aug 28, 2015 · Aug 28, 2015 · Aug 28, 2015 · Sep 6, 2015
diff --git a/get_cov_matrix.py b/get_cov_matrix.py
@@ -0,0 +1,118 @@
+'''
+Load data in parallel with train.py
+'''
+
+import time
+import math
+import yaml
+import glob
+import numpy as np
+
+import hickle as hkl
+
+def unpack_configs(config, ext_data='.hkl', ext_label='.npy'):
+    flag_para_load = config['para_load']
+
+    # Load Training/Validation Filenames and Labels
+    train_folder = config['train_folder']
+    val_folder = config['val_folder']
+    label_folder = config['label_folder']
+    train_filenames = sorted(glob.glob(train_folder + '/*' + ext_data))
+    val_filenames = sorted(glob.glob(val_folder + '/*' + ext_data))
+    train_labels = np.load(label_folder + 'train_labels' + ext_label)
+    val_labels = np.load(label_folder + 'val_labels' + ext_label)
+    img_mean = np.load(config['mean_file'])
+    img_mean = img_mean[:, :, :, np.newaxis].astype('float32')
+    return (flag_para_load, 
+            train_filenames, val_filenames, train_labels, val_labels, img_mean)
+
+
+if __name__ == '__main__':
+
+    with open('config.yaml', 'r') as f:
+        config = yaml.load(f)
+    with open('spec_2gpu.yaml', 'r') as f:
+        config = dict(config.items() + yaml.load(f).items())
+
+
+    # UNPACK CONFIGS
+    (flag_para_load, train_filenames, val_filenames,
+     train_labels, val_labels, img_mean) = unpack_configs(config)
+
+    train_filenames = train_filenames[:]
+
+    batch_size = config['batch_size']
+
+    img_size = 256
+    n_train_filenames = len(train_filenames)
+
+    print 'batch_size: %d, %d images' % (batch_size,n_train_filenames)
+
+    div_const =  1.0 * img_size * img_size * batch_size * n_train_filenames
+
+
+
+    RR = 0.0
+    RG = 0.0
+    RB = 0.0
+    GG = 0.0
+    GB = 0.0
+    BB = 0.0
+    R_mean = 0.0
+    G_mean = 0.0
+    B_mean = 0.0
+
+
+    for hkl_name in train_filenames:
+
+        # print hkl_name
+	      print hkl_name
+        data = hkl.load(hkl_name).astype('int64') # c01b (3,256,256,batch_size)
+
+        R=data[0,:,:,:].flatten()
+        G=data[1,:,:,:].flatten()
+        B=data[2,:,:,:].flatten()    
+
+
+  	RR += np.dot(R,R)/div_const
+  	RG += np.dot(R,G)/div_const
+  	RB += np.dot(R,B)/div_const
+  	GG += np.dot(G,G)/div_const
+  	GB += np.dot(G,B)/div_const
+  	BB += np.dot(B,B)/div_const
+
+  	R_mean += np.mean(R)
+  	G_mean += np.mean(G)
+  	B_mean += np.mean(B)
+
+
+
+    R_mean /=  n_train_filenames
+    G_mean /=  n_train_filenames
+    B_mean /=  n_train_filenames
+
+    print RR,RG,RB,GG,GB,BB,R_mean,G_mean,B_mean
+
+    RR = RR - R_mean*R_mean
+    RG = RG - R_mean*G_mean
+    RB = RB - R_mean*B_mean
+    GG = GG - G_mean*G_mean
+    GB = GB - G_mean*B_mean
+    BB = BB - B_mean*B_mean
+
+# symmetrical, so just calculate 6 elements
+#
+#		                sum(R*R)/N-rr   sum(R*G)/N-rg	  sum(R*B)/N-rb
+#		    
+#   RGB_Cov =   		                sum(G*G)/N-gg	  sum(G*B)/N-gb
+#				
+#						                                        sum(B*B)/N-bb
+#
+#
+    RGB_Cov = np.asarray([[RR,RG,RB],
+			  [RG,GG,GB],
+			  [RB,GB,BB]])
+    print RGB_Cov
+
+    np.save('./RGB_Cov_matrix.npy',RGB_Cov)
+    np.save('./RGB_mean.npy', [R_mean,G_mean,B_mean])
diff --git a/proc_load.py b/proc_load.py
@@ -82,6 +82,9 @@ def crop_and_mirror(data, param_rand, flag_batch=True, cropsize=227):
 
 def fun_load(config, sock_data=5000):
 
+    RGB_cov_matrix = np.load('./RGB_Cov_matrix.npy')
+    Lambda, P = np.linalg.eigh(RGB_cov_matrix) # Lambda is an array of eigenvalues, P is an array of eigenvectors
+
     send_queue = config['queue_l2t']
     recv_queue = config['queue_t2l']
     # recv_queue and send_queue are multiprocessing.Queue
@@ -116,9 +119,21 @@ def fun_load(config, sock_data=5000):
         hkl_name = recv_queue.get()
 
         # print hkl_name
-        data = hkl.load(hkl_name) - img_mean
-        # print 'load ', time.time() - bgn_time
-
+        data = hkl.load(hkl_name) - img_mean # c01b (3,256,256,batch_size)
+
+        # RGB intensity regularization
+        for img_index in range(config['batch_size']):
+
+            Alpha = np.random.normal(0, 0.01, 3)
+
+            Q = Lambda*Alpha # elementwise multiplication
+
+            Z = np.dot(P,Q)
+            data[0,:,:,img_index] += Z[0] 
+            data[1,:,:,img_index] += Z[1]
+            data[2,:,:,img_index] += Z[2]
+
+        #print Z
         param_rand = recv_queue.get()
 
         data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)

diff --git a/proc_load_old.py b/proc_load_old.py
@@ -0,0 +1,140 @@
+'''
+Load data in parallel with train.py
+'''
+
+import time
+import math
+
+import numpy as np
+import zmq
+import pycuda.driver as drv
+import pycuda.gpuarray as gpuarray
+import hickle as hkl
+
+
+def get_params_crop_and_mirror(param_rand, data_shape, cropsize):
+
+    center_margin = (data_shape[2] - cropsize) / 2
+    crop_xs = round(param_rand[0] * center_margin * 2)
+    crop_ys = round(param_rand[1] * center_margin * 2)
+    if False:
+        # this is true then exactly replicate Ryan's code, in the batch case
+        crop_xs = math.floor(param_rand[0] * center_margin * 2)
+        crop_ys = math.floor(param_rand[1] * center_margin * 2)
+
+    flag_mirror = bool(round(param_rand[2]))
+
+    return crop_xs, crop_ys, flag_mirror
+
+
+def crop_and_mirror(data, param_rand, flag_batch=True, cropsize=227):
+    '''
+    when param_rand == (0.5, 0.5, 0), it means no randomness
+    '''
+    # print param_rand
+
+    # if param_rand == (0.5, 0.5, 0), means no randomness and do validation
+    if param_rand[0] == 0.5 and param_rand[1] == 0.5 and param_rand[2] == 0:
+        flag_batch = True
+
+    if flag_batch:
+        # mirror and crop the whole batch
+        crop_xs, crop_ys, flag_mirror = \
+            get_params_crop_and_mirror(param_rand, data.shape, cropsize)
+
+        # random mirror
+        if flag_mirror:
+            data = data[:, :, ::-1, :]
+
+        # random crop
+        data = data[:, crop_xs:crop_xs + cropsize,
+                    crop_ys:crop_ys + cropsize, :]
+
+    else:
+        # mirror and crop each batch individually
+        # to ensure consistency, use the param_rand[1] as seed
+        np.random.seed(int(10000 * param_rand[1]))
+
+        data_out = np.zeros((data.shape[0], cropsize, cropsize,
+                             data.shape[3])).astype('float32')
+
+        for ind in range(data.shape[3]):
+            # generate random numbers
+            tmp_rand = np.float32(np.random.rand(3))
+            tmp_rand[2] = round(tmp_rand[2])
+
+            # get mirror/crop parameters
+            crop_xs, crop_ys, flag_mirror = \
+                get_params_crop_and_mirror(tmp_rand, data.shape, cropsize)
+
+            # do image crop/mirror
+            img = data[:, :, :, ind]
+            if flag_mirror:
+                img = img[:, :, ::-1]
+            img = img[:, crop_xs:crop_xs + cropsize,
+                      crop_ys:crop_ys + cropsize]
+            data_out[:, :, :, ind] = img
+
+        data = data_out
+
+    return np.ascontiguousarray(data, dtype='float32')
+
+
+def fun_load(config, sock_data=5000):
+
+    send_queue = config['queue_l2t']
+    recv_queue = config['queue_t2l']
+    # recv_queue and send_queue are multiprocessing.Queue
+    # recv_queue is only for receiving
+    # send_queue is only for sending
+
+    # if need to do random crop and mirror
+    flag_batch = config['batch_crop_mirror']
+
+    drv.init()
+    dev = drv.Device(int(config['gpu'][-1]))
+    ctx = dev.make_context()
+    sock = zmq.Context().socket(zmq.PAIR)
+    sock.bind('tcp://*:{0}'.format(sock_data))
+
+    shape, dtype, h = sock.recv_pyobj()
+    print 'shared_x information received'
+
+    gpu_data_remote = gpuarray.GPUArray(shape, dtype,
+                                        gpudata=drv.IPCMemoryHandle(h))
+    gpu_data = gpuarray.GPUArray(shape, dtype)
+
+    img_mean = recv_queue.get()
+    print 'img_mean received'
+
+    # The first time, do the set ups and other stuff
+
+    # receive information for loading
+
+    while True:
+        # getting the hkl file name to load
+        hkl_name = recv_queue.get()
+
+        # print hkl_name
+        data = hkl.load(hkl_name) - img_mean
+        # print 'load ', time.time() - bgn_time
+
+        param_rand = recv_queue.get()
+
+        data = crop_and_mirror(data, param_rand, flag_batch=flag_batch)
+
+        gpu_data.set(data)
+
+        # wait for computation on last minibatch to finish
+        msg = recv_queue.get()
+        assert msg == 'calc_finished'
+
+        drv.memcpy_peer(gpu_data_remote.ptr,
+                        gpu_data.ptr,
+                        gpu_data.dtype.itemsize *
+                        gpu_data.size,
+                        ctx, ctx)
+
+        ctx.synchronize()
+
+        send_queue.put('copy_finished')
diff --git a/spec_2gpu.yaml b/spec_2gpu.yaml
@@ -1,7 +1,7 @@
 # If want to input None, use !!null
 
 gpu0: 'gpu0'
-gpu1: 'gpu2'
+gpu1: 'gpu1'
 sock_gpu: 5000
 sock_data0: 5001
 sock_data1: 5002

diff --git a/train_2gpu.py b/train_2gpu.py
@@ -191,7 +191,7 @@ def train_net(config, private_config):
                                        minibatch_range, batch_size,
                                        train_filenames, train_labels,
                                        flag_para_load,
-                                       config['batch_crop_mirror']
+                                       config['batch_crop_mirror'],
                                        send_queue=load_send_queue,
                                        recv_queue=load_recv_queue)