Feat/softmax/per item (#884)

* adding a 'per_element' option to the softmax function * removing NormalizingFunction logic on transfer mechanism execution now that the per_element option on SoftMax handles this within the function * updating pytests that were affected by the new per_element option on SoftMax * renaming per_element --> per_item * documenting the new per_item attr on SoftMax function
PrincetonUniversity · Jul 20, 2018 · c508833 · c508833
1 parent 99d78b3
commit c508833
Show file tree

Hide file tree

Showing 5 changed files with 54 additions and 63 deletions.
diff --git a/psyneulink/components/functions/function.py b/psyneulink/components/functions/function.py
@@ -216,8 +216,8 @@
     MAX_ABS_INDICATOR, MAX_ABS_VAL, MAX_ABS_DIFF, MAX_INDICATOR, MAX_VAL, \
     NOISE, NORMALIZING_FUNCTION_TYPE, NORMAL_DIST_FUNCTION, \
     OBJECTIVE_FUNCTION_TYPE, OFFSET, ONE_HOT_FUNCTION, OPERATION, ORNSTEIN_UHLENBECK_INTEGRATOR_FUNCTION, \
-    OUTPUT_STATES, OUTPUT_TYPE, \
-    PARAMETER_STATE_PARAMS, PARAMS, PEARSON, PREDICTION_ERROR_DELTA_FUNCTION, PROB, PROB_INDICATOR, PRODUCT, \
+    OUTPUT_STATES, OUTPUT_TYPE, PARAMETER_STATE_PARAMS, PARAMS, PEARSON, PER_ITEM, \
+    PREDICTION_ERROR_DELTA_FUNCTION, PROB, PROB_INDICATOR, PRODUCT, \
     RANDOM_CONNECTIVITY_MATRIX, RATE, RECEIVER, BUFFER_FUNCTION, REDUCE_FUNCTION, RELU_FUNCTION, RL_FUNCTION, \
     SCALE, SIMPLE_INTEGRATOR_FUNCTION, SLOPE, SOFTMAX_FUNCTION, STABILITY_FUNCTION, STANDARD_DEVIATION, SUM, \
     TDLEARNING_FUNCTION, TIME_STEP_SIZE, TRANSFER_FUNCTION_TYPE, \
@@ -3874,6 +3874,10 @@ class SoftMax(NormalizingFunction):
         specifies the format of array returned by `function <SoftMax.function>`
         (see `output <SoftMax.output>` for details).
 
+    per_item : boolean : default True
+        for 2d variables, determines whether the SoftMax function will be applied to the entire variable (per_item =
+        False), or applied to each item in the variable separately (per_item = True).
+
     params : Dict[param keyword: param value] : default None
         a `parameter dictionary <ParameterState_Specification>` that specifies the parameters for the
         function.  Values specified for parameters in the dictionary override any assigned to those parameters in
@@ -3908,6 +3912,10 @@ class SoftMax(NormalizingFunction):
               sum of values to 1 (i.e., their `Luce Ratio <https://en.wikipedia.org/wiki/Luce%27s_choice_axiom>`_),
               0 for all others.
 
+    per_item : boolean : default True
+        for 2d variables, determines whether the SoftMax function will be applied to the entire variable (per_item =
+        False), or applied to each item in the variable separately (per_item = True).
+
     bounds : None if `output <SoftMax.output>` == MAX_VAL, else (0,1) : default (0,1)
 
     owner : Component
@@ -3939,12 +3947,14 @@ def __init__(self,
                  default_variable=None,
                  gain: parameter_spec = 1.0,
                  output: tc.enum(ALL, MAX_VAL, MAX_INDICATOR, PROB) = ALL,
+                 per_item=True,
                  params: tc.optional(dict) = None,
                  owner=None,
                  prefs: is_pref_set = None):
 
         # Assign args to params and functionParams dicts (kwConstants must == arg names)
         params = self._assign_args_to_param_dicts(gain=gain,
+                                                  per_item=per_item,
                                                   output=output,
                                                   params=params)
 
@@ -3974,6 +3984,25 @@ def _instantiate_function(self, function, function_params=None, context=None):
 
         super()._instantiate_function(function, function_params=function_params, context=context)
 
+    def apply_softmax(self, input_value, gain, output_type):
+        # Modulate input_value by gain
+        v = gain * input_value
+        # Shift by max to avoid extreme values:
+        v = v - np.max(v)
+        # Exponentiate
+        v = np.exp(v)
+        # Normalize (to sum to 1)
+        sm = v / np.sum(v, axis=0)
+
+        # Generate one-hot encoding based on selected output_type
+
+        if output_type in {MAX_VAL, MAX_INDICATOR}:
+            return self.one_hot_function(sm)
+        elif output_type in {PROB, PROB_INDICATOR}:
+            return self.one_hot_function([input_value, sm])
+        else:
+            return sm
+
     def function(self,
                  variable=None,
                  params=None,
@@ -4006,26 +4035,17 @@ def function(self,
         # Assign the params and return the result
         output_type = self.get_current_function_param(OUTPUT_TYPE)
         gain = self.get_current_function_param(GAIN)
-
+        per_item = self.get_current_function_param(PER_ITEM)
         # Compute softmax and assign to sm
 
-        # Modulate variable by gain
-        v = gain * variable
-        # Shift by max to avoid extreme values:
-        v = v - np.max(v)
-        # Exponentiate
-        v = np.exp(v)
-        # Normalize (to sum to 1)
-        sm = v / np.sum(v, axis=0)
-
-        # Generate one-hot encoding based on selected output_type
-
-        if output_type in {MAX_VAL, MAX_INDICATOR}:
-            return self.one_hot_function(sm)
-        elif output_type in {PROB, PROB_INDICATOR}:
-            return self.one_hot_function([variable, sm])
+        if per_item and len(np.shape(variable)) > 1:
+            output = []
+            for item in variable:
+                output.append(self.apply_softmax(item, gain, output_type))
         else:
-            return sm
+            output = self.apply_softmax(variable, gain, output_type)
+
+        return output
 
     def derivative(self, output, input=None):
         """

diff --git a/psyneulink/components/mechanisms/processing/transfermechanism.py b/psyneulink/components/mechanisms/processing/transfermechanism.py
@@ -963,13 +963,7 @@ def _instantiate_output_states(self, context=None):
         super()._instantiate_output_states(context=context)
 
     def _get_instantaneous_function_input(self, function_variable, noise):
-        if isinstance(self.function_object, NormalizingFunction):
-            if self._current_variable_index == 0:
-                self._current_noise = self._try_execute_param(noise, function_variable)
-            noise = self._current_noise[self._current_variable_index]
-            function_variable = function_variable[self._current_variable_index]
-        else:
-            noise = self._try_execute_param(noise, function_variable)
+        noise = self._try_execute_param(noise, function_variable)
         if (np.array(noise) != 0).any():
             current_input = function_variable + noise
         else:
@@ -1061,25 +1055,11 @@ def _execute(self,
         # Clip outputs
         clip = self.get_current_mechanism_param("clip")
 
-        if isinstance(self.function_object, NormalizingFunction):
-            # Apply TransferMechanism's function to each input state separately
-            value = []
-            for i in range(len(variable)):
-                self._current_variable_index = i
-                value_item = super(Mechanism, self)._execute(variable=variable,
-                                                             runtime_params=runtime_params,
-                                                             context=context)
-                value_item = self._clip_result(clip, value_item)
-                # execute returns 2d even though we passed in 1d
-                # (we passed in one item of a 2d variable)
-                value.append(np.squeeze(value_item))
-
-        else:
-            value = super(Mechanism, self)._execute(variable=variable,
-                                                    runtime_params=runtime_params,
-                                                    context=context
-                                                    )
-            value = self._clip_result(clip, value)
+        value = super(Mechanism, self)._execute(variable=variable,
+                                                runtime_params=runtime_params,
+                                                context=context
+                                                )
+        value = self._clip_result(clip, value)
 
         # Used by update_previous_value, convergence_function and delta
         self._current_value = np.atleast_2d(value)
@@ -1110,22 +1090,12 @@ def _parse_function_variable(self, variable, context=None):
         # Update according to time-scale of integration
         if integrator_mode:
             initial_value = self.get_current_mechanism_param("initial_value")
-            if isinstance(self.function_object, NormalizingFunction):
-                # only execute integrator function once, even though component.execute is called for each item in var
-                if self._current_variable_index == 0:
-                    self.integrator_function_value = self._get_integrated_function_input(variable,
-                                                                                         initial_value,
-                                                                                         noise,
-                                                                                         context)
-                # grab the item of integrator function value that corresponds to current iteration through variable
-                return self.integrator_function_value[self._current_variable_index]
 
-            else:
-                self.integrator_function_value = self._get_integrated_function_input(variable,
-                                                                                     initial_value,
-                                                                                     noise,
-                                                                                     context)
-                return self.integrator_function_value
+            self.integrator_function_value = self._get_integrated_function_input(variable,
+                                                                                 initial_value,
+                                                                                 noise,
+                                                                                 context)
+            return self.integrator_function_value
 
         else:
             return self._get_instantaneous_function_input(variable, noise)

diff --git a/psyneulink/globals/keywords.py b/psyneulink/globals/keywords.py
@@ -868,6 +868,7 @@ def _is_metric(metric):
 PROB = 'PROB'
 PROB_INDICATOR = 'PROB_INDICATOR'
 MUTUAL_ENTROPY = 'mutual entropy'
+PER_ITEM = 'per_item'
 
 INITIALIZER = 'initializer'
 INITIAL_V = 'initial_v'

diff --git a/tests/mechanisms/test_processing_mechanism.py b/tests/mechanisms/test_processing_mechanism.py
@@ -43,7 +43,7 @@ def test_processing_mechanism_Logistic_function(self):
         # assert np.allclose(PM1.value, 1.0)
 
     def test_processing_mechanism_SoftMax_function(self):
-        PM1 = ProcessingMechanism(function=SoftMax)
+        PM1 = ProcessingMechanism(function=SoftMax(per_item=False))
         PM1.execute(1.0)
         # assert np.allclose(PM1.value, 1.0)
 

diff --git a/tests/mechanisms/test_transfer_mechanism.py b/tests/mechanisms/test_transfer_mechanism.py
@@ -1248,10 +1248,10 @@ def test_initial_values_softmax(self):
         expected_result_integrator = integrator_fn.function([[1.5, 2.5], [3.5, 4.5]])
 
         S1 = SoftMax()
-        expected_result_s1 = S1.function([1.25, 2.25])
+        expected_result_s1 = S1.function([[1.25, 2.25]])
 
         S2 = SoftMax()
-        expected_result_s2 = S2.function([3.25, 4.25])
+        expected_result_s2 = S2.function([[3.25, 4.25]])
 
         assert np.allclose(expected_result_integrator, T.integrator_function_value)
         assert np.allclose(expected_result_s1, result[0])