USGS-R · jzwart · Jun 7, 2021 · May 25, 2021 · May 26, 2021 · May 26, 2021
diff --git a/river_dl/RGCN.py b/river_dl/RGCN.py
@@ -11,22 +11,36 @@
 
 
 class RGCN(layers.Layer):
-    def __init__(self, hidden_size, A, flow_in_temp=False, rand_seed=None):
+    def __init__(
+        self, 
+        hidden_size, 
+        A, 
+        tasks=1, 
+        dropout=0,  # I propose changing this to 'recurrent_dropout' and adding another option for 'dropout' since these will map to the options for the tf LSTM layers https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell ; and also https://arxiv.org/pdf/1512.05287.pdf 
+        flow_in_temp=False, 
+        rand_seed=None,
+        return_state=False
+    ):
         """
 
         :param hidden_size: [int] the number of hidden units
         :param A: [numpy array] adjacency matrix
+        :param tasks: [int] number of prediction tasks to perform - currently supports either 1 or 2 prediction tasks 
+        :param dropout: [float] value between 0 and 1 for the probability of a reccurent element to be zero  
         :param flow_in_temp: [bool] whether the flow predictions should feed
         into the temp predictions
         :param rand_seed: [int] the random seed for initialization
+        :param return_state: [bool] whether the hidden (h) and cell (c) states of LSTM should be returned 
         """
         super().__init__()
         self.hidden_size = hidden_size
         self.A = A.astype("float32")
+        self.tasks = tasks 
         self.flow_in_temp = flow_in_temp
+        self.return_state = return_state
 
         # set up the layer
-        self.lstm = tf.keras.layers.LSTMCell(hidden_size)
+        self.lstm = tf.keras.layers.LSTMCell(hidden_size, recurrent_dropout=dropout)
 
         ### set up the weights ###
         w_initializer = tf.random_normal_initializer(
@@ -88,6 +102,7 @@ def __init__(self, hidden_size, A, flow_in_temp=False, rand_seed=None):
             shape=[hidden_size], initializer="zeros", name="b_c"
         )
 
+        # will be doing two task predictions if flow_in_temp == True 
         if self.flow_in_temp:
             # was W2
             self.W_out_flow = self.add_weight(
@@ -108,25 +123,45 @@ def __init__(self, hidden_size, A, flow_in_temp=False, rand_seed=None):
                 shape=[1], initializer="zeros", name="b_out"
             )
         else:
-            # was W2
-            self.W_out = self.add_weight(
-                shape=[hidden_size, 2], initializer=w_initializer, name="W_out"
-            )
-            # was b2
-            self.b_out = self.add_weight(
-                shape=[2], initializer="zeros", name="b_out"
-            )
+            if self.tasks == 2: 
+                # was W2
+                self.W_out = self.add_weight(
+                    shape=[hidden_size, 2], initializer=w_initializer, name="W_out"
+                )
+                # was b2
+                self.b_out = self.add_weight(
+                    shape=[2], initializer="zeros", name="b_out"
+                )
+            else: 
+                # was W2
+                self.W_out = self.add_weight(
+                    shape=[hidden_size, 1], initializer=w_initializer, name="W_out"
+                )
+                # was b2
+                self.b_out = self.add_weight(
+                    shape=[1], initializer="zeros", name="b_out"
+                )
 
     @tf.function
     def call(self, inputs, **kwargs):
+        h_list = []
+        c_list = []
         graph_size = self.A.shape[0]
         hidden_state_prev, cell_state_prev = (
             tf.zeros([graph_size, self.hidden_size]),
             tf.zeros([graph_size, self.hidden_size]),
         )
         out = []
         n_steps = inputs.shape[1]
+        h_update = tf.cast(kwargs['h_init'], tf.float32)
+        c_update = tf.cast(kwargs['c_init'], tf.float32)
+        if self.return_state:
+            # set the initial h & c states to the supplied h and c states if using DA 
+            hidden_state_prev = h_update 
+            cell_state_prev = c_update 
         for t in range(n_steps):
+            seq, state = self.lstm(inputs[:, t, :], states=[h_update, c_update])
+            h, c = state # are these used anywhere? 
             h_graph = tf.nn.tanh(
                 tf.matmul(
                     self.A,
@@ -176,23 +211,77 @@ def call(self, inputs, **kwargs):
 
             hidden_state_prev = h_update
             cell_state_prev = c_update
+
+            h_list.append(h_update)
+            c_list.append(c_update)
+
+        h_list = tf.stack(h_list)
+        c_list = tf.stack(c_list)
+        h_list = tf.transpose(h_list, [1, 0, 2])
+        c_list = tf.transpose(c_list, [1, 0, 2])
         out = tf.stack(out)
         out = tf.transpose(out, [1, 0, 2])
-        return out
+
+        if self.return_state: 
+            return out, h_list, c_list 
+        else:
+            return out
 
 
 class RGCNModel(tf.keras.Model):
-    def __init__(self, hidden_size, A, flow_in_temp=False, rand_seed=None):
+    def __init__(
+        self, 
+        hidden_size, 
+        A, 
+        tasks=1, 
+        dropout=0, # I propose changing this to 'recurrent_dropout' and adding another option for 'dropout' since these will map to the options for the tf LSTM layers https://www.tensorflow.org/api_docs/python/tf/keras/layers/LSTMCell ; and also https://arxiv.org/pdf/1512.05287.pdf 
+        flow_in_temp=False, 
+        rand_seed=None,
+        return_state=False
+    ):
         """
         :param hidden_size: [int] the number of hidden units
         :param A: [numpy array] adjacency matrix
+        :param tasks: [int] number of prediction tasks to perform - currently supports either 1 or 2 prediction tasks 
+        :param dropout: [float] value between 0 and 1 for the probability of a reccurent element to be zero  
         :param flow_in_temp: [bool] whether the flow predictions should feed
         into the temp predictions
         :param rand_seed: [int] the random seed for initialization
+        :param return_state: [bool] whether the hidden (h) and cell (c) states of LSTM should be returned 
         """
         super().__init__()
-        self.rgcn_layer = RGCN(hidden_size, A, flow_in_temp, rand_seed)
+        self.return_state = return_state
+        self.hidden_size = hidden_size 
+        self.tasks = tasks 
+        self.dropout = dropout 
+        self.rnn_layer = tf.keras.layers.LSTM(
+            hidden_size, 
+            return_sequences=True, 
+            stateful=True,
+            return_state=return_state,
+            recurrent_dropout=dropout)
+
+        self.rgcn_layer = RGCN(
+            hidden_size, 
+            A,
+            tasks,
+            dropout,
+            flow_in_temp, 
+            rand_seed,
+            return_state)
+
+        self.h_gr = None
+        self.c_gr = None
 
     def call(self, inputs, **kwargs):
-        output = self.rgcn_layer(inputs)
+        batch_size = inputs.shape[0]
+        h_init = kwargs.get('h_init', tf.zeros([batch_size, self.hidden_size]))
+        c_init = kwargs.get('c_init', tf.zeros([batch_size, self.hidden_size]))
+        if self.return_state: 
+            output, h_gr, c_gr = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
+            self.h_gr = h_gr
+            self.c_gr = c_gr
-            output, h_gr, c_gr = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
-            self.h_gr = h_gr
-            self.c_gr = c_gr
+            output, self.h_gr, self.c_gr = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
-            output, h_gr, c_gr = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
-            self.h_gr = h_gr
-            self.c_gr = c_gr
+            output, self.h_gr, self.c_gr = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
+        else:
+            output = self.rgcn_layer(inputs, h_init=h_init, c_init=c_init)
+
         return output
diff --git a/river_dl/loss_functions.py b/river_dl/loss_functions.py
@@ -69,19 +69,23 @@ def samplewise_nnse_loss(y_true, y_pred):
     return 1 - nnse_val
 
 
-def nnse_masked_one_var(data, y_pred, var_idx):
-    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
+def nnse_masked_one_var(data, y_pred, var_idx, tasks):
+    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx, tasks)
     return nnse_loss(y_true, y_pred)
 
 
-def nnse_one_var_samplewise(data, y_pred, var_idx):
-    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
+def nnse_one_var_samplewise(data, y_pred, var_idx, tasks):
+    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx, tasks)
     return samplewise_nnse_loss(y_true, y_pred)
 
 
-def y_data_components(data, y_pred, var_idx):
-    weights = data[:, :, -2:]
-    y_true = data[:, :, :-2]
+def y_data_components(data, y_pred, var_idx, tasks):
+    if tasks == 2: 
+        weights = data[:, :, -2:]
+        y_true = data[:, :, :-2]
+    else: 
+        weights = data[:, :, -1:]
+        y_true = data[:, :, :-1]
 
     # ensure y_pred, weights, and y_true are all tensors the same data type
     y_true = tf.convert_to_tensor(y_true)
@@ -99,23 +103,27 @@ def y_data_components(data, y_pred, var_idx):
     return y_true, y_pred, weights
 
 
-def rmse_masked_one_var(data, y_pred, var_idx):
-    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
+def rmse_masked_one_var(data, y_pred, var_idx, tasks):
+    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx, tasks)
     return rmse(y_true, y_pred)
 
 
-def weighted_masked_rmse(lamb=0.5):
+def weighted_masked_rmse(lamb=0.5, tasks=1):
     """
     calculate a weighted, masked rmse.
     :param lamb: [float] (short for lambda). The factor that the auxiliary loss
     will be multiplied by before added to the main loss.
+    :param tasks: [int] number of prediction tasks to perform - currently supports either 1 or 2 prediction tasks 
     """
 
     def rmse_masked_combined(data, y_pred):
-        rmse_main = rmse_masked_one_var(data, y_pred, 0)
-        rmse_aux = rmse_masked_one_var(data, y_pred, 1)
-        rmse_loss = rmse_main + lamb * rmse_aux
-        return rmse_loss
+        rmse_main = rmse_masked_one_var(data, y_pred, 0, tasks)
+        if tasks == 2: 
+            rmse_aux = rmse_masked_one_var(data, y_pred, 1, tasks)
+            rmse_loss = rmse_main + lamb * rmse_aux
+            return rmse_loss
+        else: 
+            return rmse_main 
 
     return rmse_masked_combined
 
@@ -181,8 +189,8 @@ def kge_norm_loss(y_true, y_pred):
     return 1 - norm_kge(y_true, y_pred)
 
 
-def kge_loss_one_var(data, y_pred, var_idx):
-    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx)
+def kge_loss_one_var(data, y_pred, var_idx, tasks):
+    y_true, y_pred, weights = y_data_components(data, y_pred, var_idx, tasks)
     return kge_loss(y_true, y_pred)