From 9cafd2133c69c1ac33ef92a0d0e62bb31cc0e596 Mon Sep 17 00:00:00 2001
From: Katherine Mantel <kmantel@princeton.edu>
Date: Thu, 2 Jul 2020 20:31:33 -0400
Subject: [PATCH 01/25] Composition: fix UnboundLocalError when .run arg
 num_trials is 0

---
 psyneulink/core/compositions/composition.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index 7829ff5cdc9..aaecaa83d58 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -8288,6 +8288,8 @@ def run(
         # Reset gym forager environment for the current trial
         if self.env:
             trial_output = np.atleast_2d(self.env.reset())
+        else:
+            trial_output = None
 
         # Loop over the length of the list of inputs - each input represents a TRIAL
         for trial_num in range(num_trials):

From 7abba888ff061dfcf9edf9b5cfe35a600587083e Mon Sep 17 00:00:00 2001
From: Katherine Mantel <kmantel@princeton.edu>
Date: Wed, 8 Jul 2020 23:11:52 -0400
Subject: [PATCH 02/25] KWTAMechanism: fix wrong variable name in k_param
 validation

---
 .../components/mechanisms/processing/transfer/kwtamechanism.py  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
index 44f910e0f50..c1c4c95aaf8 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
@@ -493,7 +493,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
             if not isinstance(k_param, numbers.Real):
                 if not (isinstance(k_param, (np.ndarray, list)) and len(k_param) == 1):
                     raise KWTAError("k-value parameter ({}) for {} must be a single number".format(k_param, self))
-            if (isinstance(ratio_param, (np.ndarray, list)) and len(ratio_param) == 1):
+            if (isinstance(k_param, (np.ndarray, list)) and len(k_param) == 1):
                 k_num = k_param[0]
             else:
                 k_num = k_param

From e897c280755895880d6006290a6de8f82fe63fad Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:22:01 -0400
Subject: [PATCH 03/25] learning: Prevent infinite hang on small paths

---
 psyneulink/core/compositions/composition.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index aaecaa83d58..693f8c9d8a4 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -6624,7 +6624,9 @@ def bfs(start):
                         p.insert(0, curr_node)
                         curr_node = prev[curr_node]
                     p.insert(0, curr_node)
-                    pathways.append(p)
+                    # we only consider input -> projection -> ... -> output pathways (since we can't learn on only one mechanism)
+                    if len(p) >= 3:
+                        pathways.append(p)
                     continue
                 for projection, efferent_node in [(p, p.receiver.owner) for p in curr_node.efferents]:
                     if (not hasattr(projection,'learnable')) or (projection.learnable is False):

From d16e10ffd2191d7dc866871d99c4275fe625f0af Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:23:20 -0400
Subject: [PATCH 04/25] llvm/autodiff: Consolidate autodiff exec generation

---
 psyneulink/core/llvm/codegen.py               | 21 ++-----------------
 .../compositions/autodiffcomposition.py       |  2 --
 2 files changed, 2 insertions(+), 21 deletions(-)

diff --git a/psyneulink/core/llvm/codegen.py b/psyneulink/core/llvm/codegen.py
index 936ef0f60e4..9dd47f9fd13 100644
--- a/psyneulink/core/llvm/codegen.py
+++ b/psyneulink/core/llvm/codegen.py
@@ -544,23 +544,6 @@ def gen_multirun_wrapper(ctx, function: ir.Function) -> ir.Function:
     return multirun_f
 
 
-def gen_autodiffcomp_learning_exec(ctx, composition, *, tags:frozenset):
-    composition._build_pytorch_representation(composition.default_execution_id)
-    pytorch_model = composition.parameters.pytorch_representation.get(composition.default_execution_id)
-    with _gen_composition_exec_context(ctx, composition, tags=tags) as (builder, data, params, cond_gen):
-        state, _, comp_in, data, cond, = builder.function.args
-        pytorch_model._gen_llvm_training_function_body(ctx, builder, state,
-                                                       params, data)
-        node_tags = tags.union({"node_wrapper"})
-        # Call output CIM
-        output_cim_w = ctx.get_node_wrapper(composition, composition.output_CIM)
-        output_cim_f = ctx.import_llvm_function(output_cim_w, tags=node_tags)
-        builder.block.name = "invoke_" + output_cim_f.name
-        builder.call(output_cim_f, [state, params, comp_in, data, data])
-
-        return builder.function
-
-
 def gen_autodiffcomp_exec(ctx, composition, *, tags:frozenset):
     """Creates llvm bin execute for autodiffcomp"""
     assert composition.controller is None
@@ -569,8 +552,8 @@ def gen_autodiffcomp_exec(ctx, composition, *, tags:frozenset):
     with _gen_composition_exec_context(ctx, composition, tags=tags) as (builder, data, params, cond_gen):
         state, _, comp_in, _, cond = builder.function.args
 
-        pytorch_forward_func = ctx.import_llvm_function(pytorch_model, tags=tags)
-        builder.call(pytorch_forward_func, [state, params, data])
+        pytorch_func = ctx.import_llvm_function(pytorch_model, tags=tags)
+        builder.call(pytorch_func, [state, params, data])
 
         node_tags = tags.union({"node_wrapper"})
         # Call output CIM
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index ddc2a1c5dab..1879275bf97 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -423,8 +423,6 @@ def _update_learning_parameters(self, context):
     def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
         if "run" in tags:
             return pnlvm.codegen.gen_composition_run(ctx, self, tags=tags)
-        elif "learning" in tags:
-            return pnlvm.codegen.gen_autodiffcomp_learning_exec(ctx, self, tags=tags)
         else:
             return pnlvm.codegen.gen_autodiffcomp_exec(ctx, self, tags=tags)
 

From 39efd1210f0954228582f057179286a80dc506d3 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:24:03 -0400
Subject: [PATCH 05/25] llvm/pytorch: Remove dead code

---
 psyneulink/library/compositions/compiledloss.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/psyneulink/library/compositions/compiledloss.py b/psyneulink/library/compositions/compiledloss.py
index 9ee6a47aff4..afb93a59a46 100644
--- a/psyneulink/library/compositions/compiledloss.py
+++ b/psyneulink/library/compositions/compiledloss.py
@@ -7,8 +7,6 @@
 class Loss():
 
     def __init__(self):
-        self._structs = []
-
         self._DELTA_W_NUM = 0
 
 

From 2e2eb6ff202582b737ce981eb8605419e28aadc1 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:32:08 -0400
Subject: [PATCH 06/25] llvm/pytorch: Rewrite compiled learning

---
 .../core/components/mechanisms/mechanism.py   |   6 +-
 .../processing/transfermechanism.py           |   2 +-
 .../compositions/autodiffcomposition.py       |  21 +-
 .../library/compositions/compiledoptimizer.py | 157 ++---
 .../library/compositions/pytorchcomponents.py | 257 ++++++++
 .../compositions/pytorchmodelcreator.py       | 617 +++++-------------
 tests/composition/test_autodiffcomposition.py |  44 +-
 7 files changed, 543 insertions(+), 561 deletions(-)
 create mode 100644 psyneulink/library/compositions/pytorchcomponents.py

diff --git a/psyneulink/core/components/mechanisms/mechanism.py b/psyneulink/core/components/mechanisms/mechanism.py
index df3bf385726..11a5a239dae 100644
--- a/psyneulink/core/components/mechanisms/mechanism.py
+++ b/psyneulink/core/components/mechanisms/mechanism.py
@@ -2945,8 +2945,8 @@ def _fill_input(b, s_input, i):
                                        mech_params, mech_state, mech_in)
         return builder
 
-    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable):
-        fun = ctx.import_llvm_function(function)
+    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable, *, tags:frozenset):
+        fun = ctx.import_llvm_function(function, tags=tags)
         fun_in, builder = self._gen_llvm_function_input_parse(builder, ctx, fun, variable)
         fun_out = builder.alloca(fun.args[3].type.pointee)
 
@@ -2957,7 +2957,7 @@ def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, varia
     def _gen_llvm_is_finished_cond(self, ctx, builder, params, state, current):
         return pnlvm.ir.IntType(1)(1)
 
-    def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_out):
+    def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_out, *, tags:frozenset):
 
         ip_output, builder = self._gen_llvm_input_ports(ctx, builder,
                                                         params, state, arg_in)
diff --git a/psyneulink/core/components/mechanisms/processing/transfermechanism.py b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
index 139c4d0c414..309ecdbb430 100644
--- a/psyneulink/core/components/mechanisms/processing/transfermechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
@@ -1632,7 +1632,7 @@ def _gen_llvm_is_finished_cond(self, ctx, builder, params, state, current):
         cmp_str = self.parameters.termination_comparison_op.get(None)
         return builder.fcmp_ordered(cmp_str, cmp_val, threshold)
 
-    def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_out):
+    def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_out, *, tags:frozenset):
         ip_out, builder = self._gen_llvm_input_ports(ctx, builder, params, state, arg_in)
 
         if self.integrator_mode:
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 1879275bf97..5739d2de9f1 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -143,7 +143,6 @@
 import copy
 import numpy as np
 import ctypes
-import warnings
 from collections.abc import Iterable
 from toposort import toposort
 from inspect import isgenerator
@@ -281,17 +280,11 @@ def __init__(self,
     def _build_pytorch_representation(self, context=None):
         if self.scheduler is None:
             self.scheduler = Scheduler(graph=self.graph_processing)
-        if self.execution_sets is None:
-            self.execution_sets = [ x - set(self.get_nodes_by_role(NodeRole.LEARNING)) for x in self.scheduler.run(context=context)]
-            self.execution_sets = [x for x in self.execution_sets if len(x) > 0]
-        if self.parameters.pytorch_representation._get(context) is None:
-            model = PytorchModelCreator(self.graph_processing,
-                                        self.param_init_from_pnl,
-                                        self.execution_sets,
-                                        self.device,
-                                        context=context,
-                                        composition = self,
-                                        )
+        if self.parameters.pytorch_representation._get(context=context) is None:
+            model = PytorchModelCreator(composition=self,
+                                        device=self.device,
+                                        context=context)
+
             self.parameters.pytorch_representation._set(model, context, skip_history=True, skip_log=True)
 
         # Set up optimizer function
@@ -379,7 +372,6 @@ def autodiff_training(self, inputs, targets, context=None, scheduler=None):
         curr_tensor_outputs = self.parameters.pytorch_representation._get(context).forward(
             curr_tensor_inputs,
             context,
-            scheduler=scheduler,
         )
 
         for component in curr_tensor_outputs.keys():
@@ -529,9 +521,6 @@ def execute(self,
             context.execution_phase = execution_phase
 
 
-            # note that output[-1] might not be the truly most recent value
-            # HACK CW 2/5/19: the line below is a hack. In general, the output_CIM of an AutodiffComposition
-            # is not having its parameters populated correctly, and this should be fixed in the long run.
             scheduler.get_clock(context)._increment_time(TimeScale.TRIAL)
             return output
 
diff --git a/psyneulink/library/compositions/compiledoptimizer.py b/psyneulink/library/compositions/compiledoptimizer.py
index 545d1227712..5e585af59b0 100644
--- a/psyneulink/library/compositions/compiledoptimizer.py
+++ b/psyneulink/library/compositions/compiledoptimizer.py
@@ -1,39 +1,32 @@
 from psyneulink.core import llvm as pnlvm
 from psyneulink.library.compositions.pytorchllvmhelper import *
-from psyneulink.core.compositions.composition import NodeRole
 
 __all__ = ['AdamOptimizer', 'SGDOptimizer']
 
-
 class Optimizer():
 
     def __init__(self, pytorch_model):
         self._pytorch_model = pytorch_model
         self._composition = pytorch_model._composition
-        self._structs = []
 
         self._DELTA_W_NUM = 0
 
     # gets the type of the delta_w struct
     def _get_delta_w_struct_type(self, ctx):
-        delta_w = [None] * len(set(self._composition.nodes) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)))
-        for node in set(self._composition.nodes) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)):
-            node_idx = self._composition._get_node_index(node)
-            afferent_nodes = self._pytorch_model._get_afferent_nodes(node)
-            delta_w[node_idx] = [None] * len(afferent_nodes)
-            for (afferent_node, matrix) in afferent_nodes:
-                afferent_node_index = self._pytorch_model._get_afferent_node_index(
-                    node, afferent_node)
-                weights_dim_x, weights_dim_y = matrix.shape
-                delta_w_array = pnlvm.ir.ArrayType(
-                    pnlvm.ir.ArrayType(
-                        ctx.float_ty,
-                        weights_dim_y
-                    ),
-                    weights_dim_x
-                )
-                delta_w[node_idx][afferent_node_index] = delta_w_array
-            delta_w[node_idx] = pnlvm.ir.types.LiteralStructType(delta_w[node_idx])
+        delta_w = [None] * len(self._pytorch_model.projections)
+        for idx, proj in enumerate(self._pytorch_model.projections):
+            proj_matrix = proj.matrix
+            dim_x, dim_y = proj_matrix.shape
+
+            matrix = pnlvm.ir.ArrayType(
+                pnlvm.ir.ArrayType(
+                    ctx.float_ty,
+                    dim_y
+                ),
+                dim_x
+            )
+            delta_w[idx] = matrix
+
         delta_w = pnlvm.ir.types.LiteralStructType(delta_w)
         return delta_w
 
@@ -41,20 +34,7 @@ def _get_optimizer_struct_type(self, ctx, extra_types=[]):
         structs = (self._get_delta_w_struct_type(ctx), *extra_types)
         return pnlvm.ir.types.LiteralStructType(structs)
 
-    def _get_listof_gradient_struct_values(self):
-        values = []
-        for node in set(self._composition.nodes) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)):
-            node_idx = self._composition._get_node_index(node)
-            afferent_nodes = self._pytorch_model._get_afferent_nodes(node)
-            for (afferent_node, matrix) in afferent_nodes:
-                afferent_node_index = self._pytorch_model._get_afferent_node_index(
-                    node, afferent_node)
-                weights_dim_x, weights_dim_y = matrix.shape
-                values.append((node, node_idx, afferent_node,
-                               afferent_node_index, matrix, weights_dim_x, weights_dim_y))
-        return values
     # inserts logic that zeroes out a gradient struct
-
     def _gen_zero_gradient_struct(self, ctx, builder, grad_struct):
         builder.store(grad_struct.type.pointee(None),grad_struct)
 
@@ -73,9 +53,6 @@ def zero_grad(self, ctx):
 
         return llvm_func
 
-    def initialize_optimizer_struct(self, ctx, builder, optim_struct):
-        builder.store(optim_struct.type.pointee(None), optim_struct)
-
     def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
         return self.step(ctx)
 
@@ -146,31 +123,16 @@ def step(self, ctx):
         b2_pow = builder.call(pow, [b2, t_val])
         one_minus_b1_pow = builder.fsub(one_float, b1_pow)
         one_minus_b2_pow = builder.fsub(one_float, b2_pow)
-        
+
         pnlvm.helpers.printf(
                 builder, f"%f b1_pow_sub %f\nb2 pow sub %f\n",t_val, one_minus_b1_pow, one_minus_b2_pow)
-        alpha_mult = builder.call(sqrt, [one_minus_b2_pow])
-        pnlvm.helpers.printf(
-                builder, f"%f\n",alpha_mult)
-        alpha_mult = builder.fdiv(alpha_mult, one_minus_b1_pow)
-
-        # this is the new learning rate to use
-        alpha_t = builder.fmul(alpha_mult, lr)
-
-        gradient_struct_values = self._get_listof_gradient_struct_values()
 
         # 2) update first moments
-        for (node, node_idx, afferent_node, afferent_node_index, matrix, weights_dim_x, weights_dim_y) in gradient_struct_values:
-            pnlvm.helpers.printf(
-                builder, f"\t\t\t\tOPTIM UPDATE FIRST MOMENT {afferent_node.name} {node.name}\n")
+        for idx, proj in enumerate(self._pytorch_model.projections):
+            proj_idx_ir = ctx.int32_ty(idx)
 
-            node_idx_ir = ctx.int32_ty(node_idx)
-            afferent_node_index_ir = ctx.int32_ty(afferent_node_index)
-
-            m_t_ptr = builder.gep(
-                m_t, [zero, node_idx_ir, afferent_node_index_ir])
-            delta_w_ptr = builder.gep(
-                delta_w, [zero, node_idx_ir, afferent_node_index_ir])
+            m_t_ptr = builder.gep(m_t, [zero, proj_idx_ir])
+            delta_w_ptr = builder.gep(delta_w, [zero, proj_idx_ir])
 
             # m_t = m_t * b1
             gen_inject_mat_scalar_mult(ctx, builder, m_t_ptr, b1, m_t_ptr)
@@ -181,18 +143,13 @@ def step(self, ctx):
             # m_t = m_t + (1-b1)*g_t
             gen_inject_mat_add(ctx, builder, m_t_ptr, tmp_val, m_t_ptr)
 
+            pnlvm.helpers.printf_float_matrix(builder, m_t_ptr, prefix=f"mt val: {proj.sender._mechanism} -> {proj.receiver._mechanism}\n", override_debug=False)
         # 3) update second moments
-        for (node, node_idx, afferent_node, afferent_node_index, matrix, weights_dim_x, weights_dim_y) in gradient_struct_values:
-            pnlvm.helpers.printf(
-                builder, f"\t\t\t\tOPTIM UPDATE SECOND MOMENT {afferent_node.name} {node.name}\n")
+        for idx, proj in enumerate(self._pytorch_model.projections):
+            proj_idx_ir = ctx.int32_ty(idx)
 
-            node_idx_ir = ctx.int32_ty(node_idx)
-            afferent_node_index_ir = ctx.int32_ty(afferent_node_index)
-
-            v_t_ptr = builder.gep(
-                v_t, [zero, node_idx_ir, afferent_node_index_ir])
-            delta_w_ptr = builder.gep(
-                delta_w, [zero, node_idx_ir, afferent_node_index_ir])
+            v_t_ptr = builder.gep(v_t, [zero, proj_idx_ir])
+            delta_w_ptr = builder.gep(delta_w, [zero, proj_idx_ir])
 
             # v_t = v_t * b2
             gen_inject_mat_scalar_mult(ctx, builder, v_t_ptr, b2, v_t_ptr)
@@ -207,51 +164,56 @@ def step(self, ctx):
             gen_inject_mat_add(ctx, builder, v_t_ptr, delta_w_sqrd, v_t_ptr)
 
         # 4) update weights
+        # this is the new learning rate to use
+        # NOTE: This differs from the version specified in the paper to numerically match with pytorch's implementation
+        step_size = builder.fdiv(lr, one_minus_b1_pow)
+        step_size = pnlvm.helpers.fneg(builder, step_size)
+
+        for idx, proj in enumerate(self._pytorch_model.projections):
+            proj_idx_ir = ctx.int32_ty(idx)
 
-        for (node, node_idx, afferent_node, afferent_node_index, matrix, weights_dim_x, weights_dim_y) in gradient_struct_values:
-            node_idx_ir = ctx.int32_ty(node_idx)
-            afferent_node_index_ir = ctx.int32_ty(afferent_node_index)
-            
             m_t_ptr = builder.gep(
-                m_t, [zero, node_idx_ir, afferent_node_index_ir])
+                m_t, [zero, proj_idx_ir])
             v_t_ptr = builder.gep(
-                v_t, [zero, node_idx_ir, afferent_node_index_ir])
+                v_t, [zero, proj_idx_ir])
             delta_w_ptr = builder.gep(
-                delta_w, [zero, node_idx_ir, afferent_node_index_ir])
+                delta_w, [zero, proj_idx_ir])
+
+            pnlvm.helpers.printf_float_matrix(builder, delta_w_ptr, prefix=f"grad val: {proj.sender._mechanism} -> {proj.receiver._mechanism}\n", override_debug=False)
+            
             # this is messy - #TODO - cleanup this
-            weights_llvmlite, weights_dim_x, weights_dim_y = self._pytorch_model._gen_get_node_weight_ptr(
-                ctx, builder, params, node, afferent_node)
-            pnlvm.helpers.printf(
-                builder, f"OPTIM UPDATE WEIGHTS {afferent_node.name} {node.name}\n",override_debug=False)
+            weights_llvmlite = proj._extract_llvm_matrix(ctx, builder, params)
+            dim_x, dim_y = proj.matrix.shape
+            
             weight_row = None
-            with pnlvm.helpers.for_loop_zero_inc(builder, ctx.int32_ty(weights_dim_x), "optimizer_w_upd_outer") as (b1, weight_row):
+            pnlvm.helpers.printf(builder, "biascorr2 %.20f\n", one_minus_b2_pow, override_debug=False)
+            with pnlvm.helpers.for_loop_zero_inc(builder, ctx.int32_ty(dim_x), "optimizer_w_upd_outer") as (b1, weight_row):
                 weight_column = None
-                with pnlvm.helpers.for_loop_zero_inc(b1, ctx.int32_ty(weights_dim_y), "optimizer_w_upd_inner") as (b2, weight_column):
+                with pnlvm.helpers.for_loop_zero_inc(b1, ctx.int32_ty(dim_y), "optimizer_w_upd_inner") as (b2, weight_column):
                     # sqrt(v_t) + eps
-                    v_t_value = b2.load(b2.gep(
-                        v_t_ptr, [zero, weight_row, weight_column]))
+                    v_t_value = b2.load(b2.gep(v_t_ptr, [zero, weight_row, weight_column]))
                     value = b2.call(sqrt, [v_t_value])
+                    denom = b2.call(sqrt, [one_minus_b2_pow])
+                    value = b2.fdiv(value, denom)
                     value = b2.fadd(value, eps)
-
+                    pnlvm.helpers.printf(builder, "val %.20f\n", value, override_debug=False)
                     # alpha_t * m_t
                     m_t_value = b2.load(b2.gep(
                         m_t_ptr, [zero, weight_row, weight_column]))
-                    m_t_value = b2.fmul(alpha_t, m_t_value)
 
                     # value = alpha_t * m_t / (sqrt(v_t) + eps)
                     value = b2.fdiv(m_t_value, value)
+                    value = b2.fmul(step_size, value)
 
                     old_weight_ptr = b2.gep(
                         weights_llvmlite, [zero, weight_row, weight_column])
-                    
+
                     # new_weight = old_weight - value
-                    value = b2.fsub(b2.load(old_weight_ptr), value)
+                    value = b2.fadd(b2.load(old_weight_ptr), value)
                     b2.store(value, old_weight_ptr)
 
-                    delta_w_val = b2.load(b2.gep(delta_w_ptr,[zero, weight_row, weight_column]))
-                    pnlvm.helpers.printf(b2,"%f ",delta_w_val,override_debug=False)
-                pnlvm.helpers.printf(b1,"\n",override_debug=False)
-                
+                pnlvm.helpers.printf(b1, "\n", override_debug=False)
+
         pnlvm.helpers.printf(builder, f"\t\t\tOPTIM DONE UPDATE\n",override_debug=False)
 
         builder.ret_void()
@@ -280,20 +242,15 @@ def step(self, ctx):
         delta_w = builder.gep(optim_struct, [zero, ctx.int32_ty(self._DELTA_W_NUM)])
 
         lr = ctx.float_ty(self.lr)
-       
-        gradient_struct_values = self._get_listof_gradient_struct_values()
-        
+
         # update weights
-        for (node, node_idx, afferent_node, afferent_node_index, matrix, _, _) in gradient_struct_values:
-            node_idx_ir = ctx.int32_ty(node_idx)
-            afferent_node_index_ir = ctx.int32_ty(afferent_node_index)
+        for idx, proj in enumerate(self._pytorch_model.projections):
+            delta_w_ptr = builder.gep(delta_w, [zero, ctx.int32_ty(idx)])
+            weights_llvmlite = proj._extract_llvm_matrix(ctx, builder, params)
 
-            delta_w_ptr = builder.gep(delta_w,[zero,node_idx_ir,afferent_node_index_ir])
-            weights_llvmlite, _, _ = self._pytorch_model._gen_get_node_weight_ptr(ctx, builder, params, node, afferent_node)
-            
             multiplied_delta_w = gen_inject_mat_scalar_mult(ctx, builder, delta_w_ptr, lr)
             gen_inject_mat_sub(ctx, builder, weights_llvmlite, multiplied_delta_w, weights_llvmlite)
-                
+
         builder.ret_void()
 
         return llvm_func
diff --git a/psyneulink/library/compositions/pytorchcomponents.py b/psyneulink/library/compositions/pytorchcomponents.py
new file mode 100644
index 00000000000..45b62255b78
--- /dev/null
+++ b/psyneulink/library/compositions/pytorchcomponents.py
@@ -0,0 +1,257 @@
+from psyneulink.core.components.functions.transferfunctions import Linear, Logistic, ReLU
+from psyneulink.library.compositions.pytorchllvmhelper import *
+from psyneulink.core.globals.log import LogCondition
+from psyneulink.core import llvm as pnlvm
+
+import torch
+
+__all__ = ['PytorchMechanismWrapper', 'PytorchProjectionWrapper']
+
+def pytorch_function_creator(function, device, context=None):
+    """
+    Converts a PsyNeuLink function into an equivalent PyTorch lambda function.
+    NOTE: This is needed due to PyTorch limitations (see: https://github.com/PrincetonUniversity/PsyNeuLink/pull/1657#discussion_r437489990)
+    """
+    def get_fct_param_value(param_name):
+        val = function._get_current_function_param(
+            param_name, context=context)
+        if val is None:
+            val = getattr(function.defaults, param_name)
+
+        return float(val)
+
+    if isinstance(function, Linear):
+        slope = get_fct_param_value('slope')
+        intercept = get_fct_param_value('intercept')
+        return lambda x: x * slope + intercept
+
+    elif isinstance(function, Logistic):
+        gain = get_fct_param_value('gain')
+        bias = get_fct_param_value('bias')
+        offset = get_fct_param_value('offset')
+        return lambda x: 1 / (1 + torch.exp(-gain * (x + bias) + offset))
+
+    elif isinstance(function, ReLU):
+        gain = get_fct_param_value('gain')
+        bias = get_fct_param_value('bias')
+        leak = get_fct_param_value('leak')
+        return lambda x: (torch.max(input=(x - bias), other=torch.tensor([0], device=device).double()) * gain +
+                            torch.min(input=(x - bias), other=torch.tensor([0], device=device).double()) * leak)
+
+    else:
+        raise Exception(f"Function {function} is not currently supported in AutodiffCompositions!")
+
+def bin_function_derivative_creator(ctx, node, context=None):
+    """
+    Returns the compiled derivative version of a PsyNeuLink node
+    TODO: Add functionality for derivitives into base PsyNeuLink Functions, and move this functionality there
+    """
+    # first try to get cached func
+    name = node.name + "_" + node.function.name + "_derivative"
+    try:
+        llvm_func = ctx.import_llvm_function(name)
+        return llvm_func
+    except Exception as e:
+        pass
+
+    # args: 1) ptr to input vector
+    #       2) sizeof vector
+    #       3) ptr to output vector
+    float_ptr_ty = ctx.float_ty.as_pointer()
+    args = [float_ptr_ty, ctx.int32_ty, float_ptr_ty]
+    builder = ctx.create_llvm_function(args, node, name)
+    llvm_func = builder.function
+
+    input_vector, dim, output_vector = llvm_func.args
+
+    def get_fct_param_value(param_name):
+        val = node.function._get_current_function_param(
+            param_name, context)
+        if val is None:
+            val = node.function._get_current_function_param(
+                param_name, None)
+        return ctx.float_ty(val[0])
+
+    if isinstance(node.function, Linear):  # f(x) = mx + b, f'(x) = m
+        slope = get_fct_param_value('slope')
+
+        def modify_value(x):
+            return slope
+
+    elif isinstance(node.function, Logistic):  # f'(x) = f(x)(1-f(x))
+        gain = pnlvm.helpers.fneg(builder, get_fct_param_value('gain'))
+        bias = get_fct_param_value('bias')
+        offset = get_fct_param_value('offset')
+        one = ctx.float_ty(1)
+        exp = ctx.import_llvm_function("__pnl_builtin_exp")
+
+        def modify_value(x):
+            arg = builder.fadd(x, bias)
+            arg = builder.fmul(gain, arg)
+            arg = builder.fadd(arg, offset)
+
+            f_x = builder.call(exp, [arg])
+            f_x = builder.fadd(one, f_x)
+            f_x = builder.fdiv(one, f_x)
+
+            ret = builder.fsub(one, f_x)
+            ret = builder.fmul(f_x, ret)
+            return ret
+
+    else:
+        raise Exception(
+            f"Function type {node.function} is currently unsupported by compiled execution!")
+
+    # do computations
+    with pnlvm.helpers.for_loop_zero_inc(builder, dim, "derivative_loop") as (builder, iterator):
+        val_ptr = builder.gep(input_vector, [iterator])
+        val = builder.load(val_ptr)
+        val = modify_value(val)
+        output_location = builder.gep(output_vector, [iterator])
+        builder.store(val, output_location)
+
+    builder.ret_void()
+
+    return llvm_func
+
+
+class PytorchMechanismWrapper():
+    """
+    An interpretation of a mechanism as an equivalent pytorch object
+    """
+    def __init__(self, mechanism, component_idx, device, context=None):
+        self._mechanism = mechanism
+        self._idx = component_idx
+        self._context = context
+
+        self.function = pytorch_function_creator(mechanism.function, device, context)
+        self._context = context
+        self.value = None
+        self.afferents = []
+        self.efferents = []
+
+        self._target_mechanism = None
+
+    def add_efferent(self, efferent):
+        assert efferent not in self.efferents
+        self.efferents.append(efferent)
+
+    def add_afferent(self, afferent):
+        assert afferent not in self.afferents
+        self.afferents.append(afferent)
+
+
+    def collate_afferents(self):
+        """
+        Returns weight-multiplied sum of all afferent projections
+        """
+        return sum((proj.execute(proj.sender.value) for proj in self.afferents))
+
+    def execute(self, variable):
+        self.value = self.function(variable)
+
+        return self.value
+
+    def _gen_execute_llvm(self, ctx, builder, state, params, mech_input, data):
+        mech_func = ctx.import_llvm_function(self._mechanism)
+
+        mech_param = builder.gep(params, [ctx.int32_ty(0),
+                                          ctx.int32_ty(0),
+                                          ctx.int32_ty(self._idx)])
+
+        mech_state = builder.gep(state, [ctx.int32_ty(0),
+                                         ctx.int32_ty(0),
+                                         ctx.int32_ty(self._idx)])
+
+        mech_output = builder.gep(data, [ctx.int32_ty(0),
+                                         ctx.int32_ty(0),
+                                         ctx.int32_ty(self._idx)])
+
+        builder.call(mech_func, [mech_param,
+                                 mech_state,
+                                 mech_input,
+                                 mech_output])
+
+        pnlvm.helpers.printf_float_array(builder, builder.gep(mech_output, [ctx.int32_ty(0), ctx.int32_ty(0)]), prefix=f"{self} output:\n", override_debug=False)
+
+        return mech_output
+
+    def log_value(self):
+        if self._mechanism.parameters.value.log_condition != LogCondition.OFF:
+            detached_value = self.value.detach().cpu().numpy()
+            self._mechanism.output_port.parameters.value._set(detached_value, self._context)
+            self._mechanism.parameters.value._set(detached_value, self._context)
+
+    def _gen_execute_derivative_func_llvm(self, ctx, builder, mech_input):
+        derivative_func = ctx.import_llvm_function(
+                    bin_function_derivative_creator(ctx, self._mechanism, context=self._context).name)
+        return gen_inject_unary_function_call(ctx, builder, derivative_func, mech_input)
+
+    def __repr__(self):
+        return "PytorchWrapper for: " +self._mechanism.__repr__()
+
+class PytorchProjectionWrapper():
+    """
+    An interpretation of a projection as an equivalent pytorch object
+    """
+    def __init__(self, projection, component_idx, port_idx, device, sender=None, receiver=None, context=None):
+        self._projection = projection
+        self._idx = component_idx
+        self._context = context
+
+        self.sender = sender
+        self.receiver = receiver
+        self._port_idx = port_idx
+
+        matrix = projection.parameters.matrix.get(
+                            context=context)
+        if matrix is None:
+            matrix = projection.parameters.matrix.get(
+                context=None
+            )
+        self.matrix = torch.nn.Parameter(torch.tensor(matrix.copy(),
+                                         device=device,
+                                         dtype=torch.double))
+
+        if projection.learnable is False:
+            self.matrix.requires_grad = False
+
+    def execute(self, variable):
+        return torch.matmul(variable, self.matrix)
+
+    def log_matrix(self):
+        if self._projection.parameters.matrix.log_condition != LogCondition.OFF:
+            detached_matrix = self.matrix.detach().cpu().numpy()
+            self._projection.parameters.matrix._set(detached_matrix, context=self._context)
+            self._projection.parameter_ports['matrix'].parameters.value._set(detached_matrix, context=self._context)
+
+    def _extract_llvm_matrix(self, ctx, builder, params):
+        proj_params = builder.gep(params, [ctx.int32_ty(0),
+                                           ctx.int32_ty(1),
+                                           ctx.int32_ty(self._idx)])
+
+        dim_x, dim_y = self.matrix.detach().numpy().shape
+        proj_matrix = pnlvm.helpers.get_param_ptr(builder, self._projection, proj_params, "matrix")
+        proj_matrix = builder.bitcast(proj_matrix, pnlvm.ir.types.ArrayType(
+            pnlvm.ir.types.ArrayType(ctx.float_ty, dim_y), dim_x).as_pointer())
+
+        return proj_matrix
+
+    def _gen_execute_llvm(self, ctx, builder, state, params, data):
+        proj_matrix = self._extract_llvm_matrix(ctx, builder, params)
+
+        input_vec = builder.gep(data, [ctx.int32_ty(0),
+                                       ctx.int32_ty(0),
+                                       ctx.int32_ty(self.sender._idx),
+                                       ctx.int32_ty(self._port_idx)])
+
+        output_vec = gen_inject_vxm(ctx, builder, input_vec, proj_matrix)
+
+        pnlvm.helpers.printf_float_array(builder, input_vec, prefix=f"{self.sender._mechanism} -> {self.receiver._mechanism} input:\n", override_debug=False)
+        pnlvm.helpers.printf_float_matrix(builder, proj_matrix, prefix=f"{self.sender._mechanism} -> {self.receiver._mechanism} mat:\n", override_debug=False)
+        pnlvm.helpers.printf_float_array(builder, output_vec, prefix=f"{self.sender._mechanism} -> {self.receiver._mechanism} output:\n", override_debug=False)
+
+        return output_vec
+
+    def __repr__(self):
+        return "PytorchWrapper for: " +self._projection.__repr__()
diff --git a/psyneulink/library/compositions/pytorchmodelcreator.py b/psyneulink/library/compositions/pytorchmodelcreator.py
index 37c2460d0a8..034f67037e3 100644
--- a/psyneulink/library/compositions/pytorchmodelcreator.py
+++ b/psyneulink/library/compositions/pytorchmodelcreator.py
@@ -1,17 +1,13 @@
-import numpy as np
 from psyneulink.core.scheduling.time import TimeScale
 from psyneulink.core.compositions.composition import NodeRole
 from psyneulink.core.components.functions.transferfunctions import Linear, Logistic, ReLU
-from psyneulink.core.globals.context import Context, ContextFlags, handle_external_context
+from psyneulink.core.globals.context import ContextFlags, handle_external_context
 from psyneulink.core import llvm as pnlvm
-from psyneulink.library.compositions.compiledoptimizer import AdamOptimizer,SGDOptimizer
+from psyneulink.library.compositions.compiledoptimizer import AdamOptimizer, SGDOptimizer
 from psyneulink.library.compositions.compiledloss import MSELoss
-import ctypes
-from collections import deque
 from psyneulink.library.compositions.pytorchllvmhelper import *
 from psyneulink.core.globals.keywords import TARGET_MECHANISM
-from psyneulink.core.globals.log import LogCondition
-debug_env = pnlvm.debug_env
+from .pytorchcomponents import *
 
 try:
     import torch
@@ -21,19 +17,10 @@
     torch_available = False
 
 __all__ = ['PytorchModelCreator']
-# Class that is called to create pytorch representations of autodiff compositions based on their processing graphs.
-# Called to do so when the composition is run for the first time.
-
-# Note on notation: the "nodes" that are constantly referred to are vertices of the composition's processing
-# graph. For general compositions, the component a node represents can be a mechanism or a nested composition,
-# but for autodiff compositions, nodes always represent mechanisms. "Nodes" can be thought of as
-# (but are not literally) mechanisms.
-
 
 class PytorchModelCreator(torch.nn.Module):
-
     # sets up parameters of model & the information required for forward computation
-    def __init__(self, processing_graph, param_init_from_pnl, execution_sets, device, context=None, composition=None):
+    def __init__(self, composition, device, context=None):
 
         if not torch_available:
             raise Exception('Pytorch python module (torch) is not installed. Please install it with '
@@ -41,89 +28,53 @@ def __init__(self, processing_graph, param_init_from_pnl, execution_sets, device
 
         super(PytorchModelCreator, self).__init__()
 
-        self.execution_sets = execution_sets  # saved for use in the forward method
-        # dict mapping PNL nodes to their forward computation information
-        self.component_to_forward_info = {}
-        # dict mapping PNL projections to Pytorch weights
-        self.projections_to_pytorch_weights = {}
-        # list that Pytorch optimizers will use to keep track of parameters
+        # Maps Mechanism -> PytorchMechanismWrapper
+        self.nodes = []
+        self.component_map = {}
+
+        # Maps Projections -> PytorchProjectionWrappers
+        self.projections = []
+        self.projection_map = {}
+
         self.params = nn.ParameterList()
         self.device = device
-
         self._composition = composition
-
-        for i, current_exec_set in enumerate(self.execution_sets):
-            self.execution_sets[i] = current_exec_set - set(composition.get_nodes_by_role(NodeRole.LEARNING))
-            for component in current_exec_set:
-                value = None  # the node's (its mechanism's) value
-                function = self.function_creator(
-                    component, context)  # the node's function
-                afferents = {}  # dict for keeping track of afferent nodes and their connecting weights
-                if param_init_from_pnl:
-                    if component.parameters.value._get(context) is None:
-                        value = torch.tensor(component.parameters.value.get(None)[0], device=self.device)
-                    else:
-                        value = torch.tensor(component.parameters.value._get(context)[0], device=self.device)
-                else:
-                    input_length = len(
-                        component.input_ports[0].parameters.value.get(None))
-                    value = torch.zeros(
-                        input_length, device=self.device).double()
-
-                # if `node` is not an origin node (origin nodes don't have biases or afferent connections)
-                if i != 0:
-                    # iterate over incoming projections and set up pytorch weights for them
-                    for mapping_proj in component.path_afferents:
-
-                        # get projection, sender node--pdb for projection
-                        input_component = mapping_proj.sender.owner
-                        input_node = processing_graph.comp_to_vertex[input_component]
-
-                        # CW 12/3/18: Check this logic later
-                        proj_matrix = mapping_proj.parameters.matrix._get(
-                            context)
-                        if proj_matrix is None:
-                            proj_matrix = mapping_proj.parameters.matrix.get(
-                                None)
-                        # set up pytorch weights that correspond to projection. If copying params from psyneulink,
-                        # copy weight values from projection. Otherwise, use random values.
-                        if param_init_from_pnl:
-                            weights = nn.Parameter(
-                                    torch.tensor(
-                                            proj_matrix.copy(),
-                                            device=self.device).double(),
-                                    requires_grad=mapping_proj.learnable)
-                        else:
-                            weights = nn.Parameter(torch.rand(
-                                np.shape(proj_matrix), device=self.device).double())
-                        afferents[input_node] = weights
-                        self.params.append(weights)
-                        self.projections_to_pytorch_weights[mapping_proj] = weights
-                        
-                node_forward_info = {
-                    'value':value,
-                    'function':function,
-                    'afferents':afferents,
-                    'component':component}
-
-                self.component_to_forward_info[component] = node_forward_info
-
-        # CW 12/3/18: this copies by reference so in theory it only needs to be called during init
-        # but we call copy_weights_to_psyneulink after every run in order to make Autodiff less stateful
-        self.copy_weights_to_psyneulink(context)
-
+        
+        # Instantiate pytorch mechanisms
+        for node in set(composition.nodes) - set(composition.get_nodes_by_role(NodeRole.LEARNING)):
+            pytorch_node = PytorchMechanismWrapper(node, self._composition._get_node_index(node), device, context=context)
+            self.component_map[node] = pytorch_node
+            self.nodes.append(pytorch_node)
+
+        # Instantiate pytorch projections
+        for projection in composition.projections:
+            if projection.sender.owner in self.component_map and projection.receiver.owner in self.component_map:
+                proj_send = self.component_map[projection.sender.owner]
+                proj_recv = self.component_map[projection.receiver.owner]
+
+                port_idx = projection.sender.owner.output_ports.index(projection.sender)
+                new_proj = PytorchProjectionWrapper(projection, list(self._composition._inner_projections).index(projection), port_idx, device, sender=proj_send, receiver=proj_recv, context=context)
+                proj_send.add_efferent(new_proj)
+                proj_recv.add_afferent(new_proj)
+                self.projection_map[projection] = new_proj
+                self.projections.append(new_proj)
+                self.params.append(new_proj.matrix)
+
+        # Setup execution sets
+        # 1) Remove all learning-specific nodes
+        self.execution_sets = [x - set(composition.get_nodes_by_role(NodeRole.LEARNING)) for x in composition.scheduler.run(context=context)]
+        # 2) Convert to pytorchcomponent representation
+        self.execution_sets = [{self.component_map[comp] for comp in s if comp in self.component_map} for s in self.execution_sets]
+        # 3) Remove empty execution sets
+        self.execution_sets = [x for x in self.execution_sets if len(x) > 0]
 
     # gets the index of 'afferent_node' in the forward info weights list
-    def _get_afferent_node_index(self,node,afferent_node):
-        forward_info_weights = self.component_to_forward_info[node]['afferents']
-        for (idx,vertex) in enumerate(forward_info_weights):
-            if vertex.component == afferent_node:
-                return idx
+    def _get_afferent_node_index(self, node, afferent_node):
+        return [proj.receiver for proj in node.afferents].index(self.component_map[afferent_node])
 
-    # returns a list of all efferent nodes and weights stored in component_to_forward_info
-    def _get_afferent_nodes(self,node):
-        forward_info_weights = self.component_to_forward_info[node]['afferents']
-        return [(vertex.component,weights) for (vertex,weights) in forward_info_weights.items()]
+    def _get_afferent_nodes(self, node):
+        forward_info_weights = self.component_map[node].afferents
+        return [(vertex.component, weights) for (vertex, weights) in forward_info_weights.items()]
 
     # generates llvm function for self.forward
     def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
@@ -143,93 +94,31 @@ def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
         builder.ret_void()
         return builder.function
 
-    # gets a pointer for the weights matrix between node and afferent_node
-    def _gen_get_node_weight_ptr(self, ctx, builder, params, node, afferent_node):
-        node_idx = self._composition._get_node_index(node)
-        forward_info_weights = self.component_to_forward_info[node]['afferents']
-        afferent_node_index = self._get_afferent_node_index(node,afferent_node)
-        projection = [i for i in afferent_node.efferents if i.receiver.owner == node][0]
-        inner_projections = list(self._composition._inner_projections)
-        projection_idx = inner_projections.index(projection)
-        projection_params = builder.gep(params, [ctx.int32_ty(0), ctx.int32_ty(1), ctx.int32_ty(projection_idx)])
-
-        for (vertex,matrix) in forward_info_weights.items():
-            if vertex.component == afferent_node:
-                weight_matrix = matrix
-                break
-        dim_x,dim_y = weight_matrix.detach().numpy().shape
-        node_weights = pnlvm.helpers.get_param_ptr(builder, projection, projection_params, "matrix")
-        node_weights = builder.bitcast(node_weights, pnlvm.ir.types.ArrayType(
-                 pnlvm.ir.types.ArrayType(ctx.float_ty, dim_y), dim_x).as_pointer())
-
-        return node_weights,dim_x,dim_y
-
-    def _gen_llvm_forward_function_body(self, ctx, builder, state, params, arg_in, arg_out, store_z_values=False):
-        out_t = arg_out.type.pointee
-        if isinstance(out_t, pnlvm.ir.ArrayType) and isinstance(out_t.element, pnlvm.ir.ArrayType):
-            assert len(out_t) == 1
-        z_values = {}
-        for i, current_exec_set in enumerate(self.execution_sets):
+    def _gen_llvm_forward_function_body(self, ctx, builder, state, params, arg_in, data):
+        z_values = {}  # dict for storing values of terminal (output) nodes
+        for current_exec_set in self.execution_sets:
             for component in current_exec_set:
-                component_id = self._composition._get_node_index(component)
-                value = self._get_output_value_ptr(ctx, builder, arg_out, component_id)
-                afferents = self.component_to_forward_info[component]['afferents']
-
-                mech_input_ty = ctx.get_input_struct_type(component)
-                mech_input = builder.alloca(mech_input_ty)
-
-                if i == 0:
-                    # input struct provides data for input nodes
-                    input_id = self._composition.get_nodes_by_role(NodeRole.INPUT).index(component)
-                    cmp_arg = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(input_id)])
-                    # node inputs are 2d arrays in a struct
-                    input_ptr = builder.gep(mech_input, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(0)])
-                    builder.store(builder.load(cmp_arg), input_ptr)
-                else:
-                    # is_set keeps track of if we already have valid (i.e. non-garbage) values inside the alloc'd value
-                    is_set = False
-                    for j, (input_vertex, weights) in enumerate(afferents.items()):
-                        source_node = input_vertex.component
-                        source_node_idx = self._composition._get_node_index(source_node)
-                        input_value = self._get_output_value_ptr(ctx, builder, arg_out, source_node_idx)
-
-                        # We cast the ctype weights array to llvmlite pointer
-                        weights_llvmlite, _, _ = self._gen_get_node_weight_ptr(ctx, builder, params, component, source_node)
-                        pnlvm.helpers.printf_float_matrix(builder, weights_llvmlite, prefix=f"{source_node} -> {component}\tweight:\n")
-                        # node inputs are 2d arrays in a struct
-                        input_ptr = builder.gep(mech_input, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(j)])
-                        gen_inject_vxm(ctx, builder, input_value, weights_llvmlite, input_ptr)
-                        if store_z_values:
-                            if is_set == False:
-                                # copy weighted_inp to value
-                                gen_inject_vec_copy(ctx, builder, input_ptr, value)
-                                is_set = True
-                            else:
-                                # add to value
-                                gen_inject_vec_add(ctx, builder, input_ptr, value, value)
-
-                    cmp_arg = value
-                # Apply Activation Func to values
-                if store_z_values is True:
-                    z_values[component] = gen_inject_vec_copy(ctx, builder, cmp_arg)
-
-                mech_func = ctx.import_llvm_function(component)
-                mech_param = builder.gep(params, [ctx.int32_ty(0),
-                                                  ctx.int32_ty(0),
-                                                  ctx.int32_ty(component_id)])
-                mech_state = builder.gep(state, [ctx.int32_ty(0),
-                                                 ctx.int32_ty(0),
-                                                 ctx.int32_ty(component_id)])
-                mech_output = builder.gep(arg_out, [ctx.int32_ty(0),
-                                                    ctx.int32_ty(0),
-                                                    ctx.int32_ty(component_id)])
-                builder.call(mech_func, [mech_param, mech_state,
-                                         mech_input, mech_output])
-
-                if store_z_values is True:
-                    pnlvm.helpers.printf_float_array(builder, z_values[component], prefix=f"{component}\tforward input:\t")
-                pnlvm.helpers.printf_float_array(builder, value, prefix=f"{component}\tforward output:\t", suffix="\t")
-                pnlvm.helpers.printf(builder, "\n")
+                mech_input_ty = ctx.get_input_struct_type(component._mechanism)
+                variable = builder.alloca(mech_input_ty)
+                z_values[component] = builder.alloca(mech_input_ty.elements[0].elements[0])
+                builder.store(z_values[component].type.pointee(None),z_values[component])
+
+                if NodeRole.INPUT in self._composition.get_roles_by_node(component._mechanism):
+                    input_ptr = builder.gep(
+                        variable, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(0)])
+                    input_id = component._idx
+                    mech_in = builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(input_id)])
+                    builder.store(builder.load(mech_in), input_ptr)
+                for (proj_idx, proj) in enumerate(component.afferents):
+                    input_ptr = builder.gep(
+                        variable, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(proj_idx)])
+                    proj_output = proj._gen_execute_llvm(ctx, builder, state, params, data)
+                    # store in input ports struct
+                    builder.store(builder.load(proj_output), input_ptr)
+                    # HACK: Add to z_values struct
+                    gen_inject_vec_add(ctx, builder, proj_output, z_values[component], z_values[component])
+                component._gen_execute_llvm(ctx, builder, state, params, variable, data)
+
         return z_values
 
     # generates a function responsible for a single epoch of the training
@@ -247,110 +136,111 @@ def _gen_llvm_training_backprop(self, ctx, optimizer, loss):
             if isinstance(a.type, pnlvm.ir.PointerType):
                 a.attributes.add('noalias')
 
-        context, params, model_output, optim_struct = llvm_func.args
-        model_input = builder.gep(model_output, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(self._composition._get_node_index(self._composition.input_CIM))])
+        context, params, data, optim_struct = llvm_func.args
+        model_input = builder.gep(data, [ctx.int32_ty(0),
+                                         ctx.int32_ty(0),
+                                         ctx.int32_ty(self._composition._get_node_index(self._composition.input_CIM))])
+        model_output = data
         # setup useful mappings
-        input_nodes = composition.get_nodes_by_role(NodeRole.INPUT)
-        output_nodes = composition.get_nodes_by_role(NodeRole.OUTPUT)
+        input_nodes = set(self._composition.get_nodes_by_role(NodeRole.INPUT))
 
         # initialize optimizer params:
         delta_w = builder.gep(optim_struct, [ctx.int32_ty(0), ctx.int32_ty(optimizer._DELTA_W_NUM)])
 
-
         # 2) call forward computation
         z_values = self._gen_llvm_forward_function_body(
-            ctx, builder, context, params, model_input, model_output, store_z_values=True)
+            ctx, builder, context, params, model_input, data)
+        
         # 3) compute errors
-
-        error_dict = {}
-        backprop_queue = deque()
-        for node in set(output_nodes) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)):
-            backprop_queue.append(node)
-
         loss_fn = ctx.import_llvm_function(loss)
         total_loss = builder.alloca(ctx.float_ty)
-        builder.store(ctx.float_ty(0),total_loss)
-
-        while(len(backprop_queue) > 0):
-            node = backprop_queue.popleft()
-            if node in error_dict or not hasattr(node, "afferents") or node == composition.input_CIM or node in input_nodes:
-                continue
-
-            for (afferent_node,weights) in self._get_afferent_nodes(node):
-                backprop_queue.append(afferent_node)
-
-            node_idx = composition._get_node_index(node)
-
-            activation_func_derivative_bin_func = ctx.import_llvm_function(self.bin_function_derivative_creator(ctx,node).name)
-            activation_func_derivative = gen_inject_unary_function_call(ctx, builder, activation_func_derivative_bin_func, z_values[node])
-
-            error_val = builder.alloca(z_values[node].type.pointee)
-
-            error_dict[node] = error_val
-
-            if node in set(output_nodes) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)):
-                # We handle output layer here
-                # compute  dC/da = a_l - y(x) (TODO: Allow other cost functions! This only applies to MSE)
-                out_node_idx = output_nodes.index(node)
-                node_target = self._get_target_value_ptr(ctx, builder, model_input, node )
-                node_output = self._get_output_value_ptr(ctx, builder, model_output, node_idx)
-
-                tmp_loss = loss.gen_inject_lossfunc_call(ctx, builder, loss_fn, node_output, node_target)
-
-                pnlvm.helpers.printf_float_array(builder, node_target, prefix=f"{node}\ttarget:\t")
-                pnlvm.helpers.printf_float_array(builder, node_output, prefix=f"{node}\tvalue:\t")
-
-                pnlvm.helpers.printf(builder,f"{node}\tloss:\t%f\n",tmp_loss)
-                builder.store(builder.fadd(builder.load(total_loss),tmp_loss),total_loss)
-                loss_derivative = loss._gen_inject_loss_differential(ctx, builder, node_output, node_target)
-                # compute δ_l = dσ/da ⊙ σ'(z)
-
-                gen_inject_vec_hadamard(ctx, builder, activation_func_derivative, loss_derivative, error_val)
-
-            else:
-                # We propagate error backwards from next layer
-
-                is_set = False
+        builder.store(ctx.float_ty(0), total_loss)
+        
+        error_dict = {}
+        for exec_set in reversed(self.execution_sets):
+            for node in exec_set:
+                if node._mechanism in input_nodes:
+                    continue
+                node_z_value = z_values[node]
+                activation_func_derivative = node._gen_execute_derivative_func_llvm(ctx, builder, node_z_value)
+                error_val = builder.alloca(z_values[node].type.pointee)
+                error_dict[node] = error_val
+
+                if NodeRole.OUTPUT in self._composition.get_roles_by_node(node._mechanism):
+                    # We handle output layer here
+                    # compute  dC/da = a_l - y(x) (TODO: Allow other cost functions! This only applies to MSE)
+                    
+                    # 1) Lookup desired target value
+                    terminal_sequence = self._composition._terminal_backprop_sequences[node._mechanism]
+                    target_idx = self._composition.get_nodes_by_role(
+                        NodeRole.INPUT).index(terminal_sequence[TARGET_MECHANISM])
+                    node_target = builder.gep(model_input, [ctx.int32_ty(0), ctx.int32_ty(target_idx)])
+
+                    # 2) Lookup desired output value
+                    node_output = builder.gep(model_output, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(node._idx), ctx.int32_ty(0)])
+
+                    tmp_loss = loss.gen_inject_lossfunc_call(
+                        ctx, builder, loss_fn, node_output, node_target)
+
+                    pnlvm.helpers.printf_float_array(
+                        builder, node_target, prefix=f"{node}\ttarget:\t")
+                    pnlvm.helpers.printf_float_array(
+                        builder, node_output, prefix=f"{node}\tvalue:\t")
+
+                    pnlvm.helpers.printf(
+                        builder, f"{node}\tloss:\t%f\n", tmp_loss, override_debug=False)
+                    builder.store(builder.fadd(builder.load(
+                        total_loss), tmp_loss), total_loss)
+                    loss_derivative = loss._gen_inject_loss_differential(
+                        ctx, builder, node_output, node_target)
+                    # compute δ_l = dσ/da ⊙ σ'(z)
+
+                    gen_inject_vec_hadamard(
+                        ctx, builder, activation_func_derivative, loss_derivative, error_val)
 
-                # We calculate δ_(l-1) = sum (a_(l-1) W^T) ⊙ δ_l, where (l-1) is the current layer, l is layer of efferents, summed over all efferents
-                efferents = [
-                    proj.receiver._owner for proj in node.efferents]
-                for efferent_node in set(efferents) - set(self._composition.get_nodes_by_role(NodeRole.LEARNING)):
-                    efferent_node_error = error_dict[efferent_node]
+                else:
+                    # We propagate error backwards from next layer
+                    for proj_idx, proj in enumerate(node.efferents):
+                        efferent_node = proj.receiver
+                        efferent_node_error = error_dict[efferent_node]
 
-                    weights_llvmlite, _, _ = self._gen_get_node_weight_ptr(ctx, builder, params, efferent_node, node)
+                        weights_llvmlite = proj._extract_llvm_matrix(ctx, builder, params)
 
-                    if is_set is False:
-                        gen_inject_vxm_transposed(ctx, builder, efferent_node_error, weights_llvmlite, error_val)
-                        is_set = True
-                    else:
-                        new_val = gen_inject_vxm_transposed(ctx, builder, efferent_node_error, weights_llvmlite)
+                        if proj_idx == 0:
+                            gen_inject_vxm_transposed(
+                                ctx, builder, efferent_node_error, weights_llvmlite, error_val)
+                        else:
+                            new_val = gen_inject_vxm_transposed(
+                                ctx, builder, efferent_node_error, weights_llvmlite)
 
-                        gen_inject_vec_add(ctx, builder, new_val, error_val, error_val)
+                            gen_inject_vec_add(
+                                ctx, builder, new_val, error_val, error_val)
 
-                gen_inject_vec_hadamard(ctx, builder, activation_func_derivative, error_val, error_val)
+                    gen_inject_vec_hadamard(
+                        ctx, builder, activation_func_derivative, error_val, error_val)
 
-            pnlvm.helpers.printf_float_array(builder, activation_func_derivative, prefix=f"{node}\tdSigma:\t")
-            pnlvm.helpers.printf_float_array(builder, error_val, prefix=f"{node}\terror:\t")
+                pnlvm.helpers.printf_float_array(
+                    builder, activation_func_derivative, prefix=f"{node}\tdSigma:\t")
+                pnlvm.helpers.printf_float_array(
+                    builder, error_val, prefix=f"{node}\terror:\t")
 
         # 4) compute weight gradients
         for (node, err_val) in error_dict.items():
             if node in input_nodes:
                 continue
-            node_idx = self._composition._get_node_index(node)
-            for (afferent_node,weight) in self._get_afferent_nodes(node):
+            for proj in node.afferents:
                 # get a_(l-1)
-                afferent_node_idx = self._get_afferent_node_index(node,afferent_node)
-
-                afferent_node_activation = self._get_output_value_ptr(ctx,builder,model_output,self._composition._get_node_index(afferent_node))
+                afferent_node_activation = builder.gep(model_output, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(proj.sender._idx), ctx.int32_ty(0)])
 
                 # get dimensions of weight matrix
-                weights_llvmlite,weights_dim_x,weights_dim_y = self._gen_get_node_weight_ptr(ctx, builder, params, node, afferent_node)
+                weights_llvmlite = proj._extract_llvm_matrix(ctx, builder, params)
+                pnlvm.helpers.printf_float_matrix(builder, weights_llvmlite, prefix= f"{proj.sender._mechanism} -> {proj.receiver._mechanism}\n", override_debug=False)
                 # update delta_W
-                node_delta_w = builder.gep(delta_w,[ctx.int32_ty(0),ctx.int32_ty(node_idx), ctx.int32_ty(afferent_node_idx)])
+                node_delta_w = builder.gep(delta_w, [ctx.int32_ty(0), ctx.int32_ty(proj._idx)])
 
-                with pnlvm.helpers.for_loop_zero_inc(builder, ctx.int32_ty(weights_dim_x), "weight_update_loop_outer") as (b1, weight_row):
-                    with pnlvm.helpers.for_loop_zero_inc(b1, ctx.int32_ty(weights_dim_y), "weight_update_loop_inner") as (b2, weight_column):
+                dim_x, dim_y = proj.matrix.shape
+                with pnlvm.helpers.for_loop_zero_inc(builder, ctx.int32_ty(dim_x), "weight_update_loop_outer") as (b1, weight_row):
+                    with pnlvm.helpers.for_loop_zero_inc(b1, ctx.int32_ty(dim_y), "weight_update_loop_inner") as (b2, weight_column):
                         a_val = b2.load(b2.gep(afferent_node_activation,
                                                [ctx.int32_ty(0), weight_row]))
                         d_val = b2.load(b2.gep(err_val,
@@ -360,9 +250,9 @@ def _gen_llvm_training_backprop(self, ctx, optimizer, loss):
                         new_val = b2.fadd(old_val, b2.fmul(a_val, d_val))
                         b2.store(new_val, b2.gep(node_delta_w,
                                                  [ctx.int32_ty(0), weight_row, weight_column]))
-                    
-        builder.store(builder.fmul(ctx.float_ty(.5),builder.load(total_loss)),total_loss)
-        pnlvm.helpers.printf(builder,"TOTAL LOSS:\t%f\n",builder.load(total_loss))
+
+        pnlvm.helpers.printf(builder, "TOTAL LOSS:\t%.20f\n",
+                             builder.load(total_loss), override_debug=False)
         builder.ret_void()
 
         return builder.function
@@ -376,102 +266,67 @@ def _gen_llvm_training_function_body(self, ctx, builder, state, params, data):
         if loss_type == 'mse':
             loss = MSELoss()
         else:
-            raise Exception("LOSS TYPE",loss_type,"NOT SUPPORTED")
+            raise Exception("LOSS TYPE", loss_type, "NOT SUPPORTED")
 
         optimizer_step_f = ctx.import_llvm_function(optimizer)
         optimizer_struct_idx = len(state.type.pointee.elements) - 1
         optimizer_struct = builder.gep(state, [ctx.int32_ty(0), ctx.int32_ty(optimizer_struct_idx)])
         optimizer_zero_grad = ctx.import_llvm_function(optimizer.zero_grad(ctx).name)
         backprop = ctx.import_llvm_function(self._gen_llvm_training_backprop(ctx, optimizer, loss).name)
-        
+
         # # FIXME: converting this call to inlined code results in
         # # significant longer compilation times
         builder.call(optimizer_zero_grad, [optimizer_struct])
         builder.call(backprop, [state, params, data,
-                            optimizer_struct])
+                                optimizer_struct])
         builder.call(optimizer_step_f, [optimizer_struct, params])
 
-
-    def _get_output_value_ptr(self, ctx, builder, arg_out, index):
-        return builder.gep(arg_out, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(index), ctx.int32_ty(0)])
-    
-    def _get_target_value_ptr(self, ctx, builder, arg_in, output_node):
-        terminal_sequence = self._composition._terminal_backprop_sequences[output_node]
-        idx = self._composition.get_nodes_by_role(NodeRole.INPUT).index(terminal_sequence[TARGET_MECHANISM])
-        return builder.gep(arg_in, [ctx.int32_ty(0), ctx.int32_ty(idx)])
-
     def _get_compiled_optimizer(self):
         # setup optimizer
         optimizer_type = self._composition.optimizer_type
         if optimizer_type == 'adam':
-            optimizer = AdamOptimizer(self,lr = self._composition.learning_rate)
+            optimizer = AdamOptimizer(self, lr=self._composition.learning_rate)
         elif optimizer_type == 'sgd':
-            optimizer = SGDOptimizer(self,lr = self._composition.learning_rate)
+            optimizer = SGDOptimizer(self, lr=self._composition.learning_rate)
         else:
-            raise Exception("OPTIMIZER TYPE",optimizer_type,"NOT SUPPORTED")
+            raise Exception("OPTIMIZER TYPE", optimizer_type, "NOT SUPPORTED")
         return optimizer
+
     # performs forward computation for the model
     @handle_external_context()
-    def forward(self, inputs, context=None, do_logging=True, scheduler=None):
+    def forward(self, inputs, context=None):
         outputs = {}  # dict for storing values of terminal (output) nodes
-
-        for i, current_exec_set in enumerate(self.execution_sets):
-            frozen_values = {}
-            for component in current_exec_set:
-                if NodeRole.LEARNING in self._composition.get_roles_by_node(component) or NodeRole.TARGET in self._composition.get_roles_by_node(component):
-                    continue
-                frozen_values[component] = self.component_to_forward_info[component]['value']
+        for current_exec_set in self.execution_sets:
             for component in current_exec_set:
-                if NodeRole.LEARNING in self._composition.get_roles_by_node(component) or NodeRole.TARGET in self._composition.get_roles_by_node(component):
-                    continue
-                # get forward computation info for current component
-                function = self.component_to_forward_info[component]['function']
-                afferents = self.component_to_forward_info[component]['afferents']
-                # forward computation if we have origin node
-                if i == 0:
-                    value = function(inputs[component])
-                # forward computation if we do not have origin node
+                if NodeRole.INPUT in self._composition.get_roles_by_node(component._mechanism):
+                    component.execute(inputs[component._mechanism])
                 else:
-                    value = torch.zeros(
-                        len(component.input_ports[0].defaults.value), device=self.device).double()
-                    for input_node, weights in afferents.items():
-                        if input_node.component in current_exec_set:
-                            input_value = frozen_values[input_node.component]
-                        else:
-                            input_value = self.component_to_forward_info[input_node.component]['value']
-                        value += torch.matmul(input_value, weights)
-                    value = function(value)
-                # store the current value of the node
-                self.component_to_forward_info[component]['value'] = value
-                old_source = context.source
-                context.source = ContextFlags.COMMAND_LINE
-                detached_value = value.detach().cpu().numpy()
-                component.parameters.value._set(detached_value, context)
-                context.source = old_source
+                    variable = component.collate_afferents()
+                    component.execute(variable)
 
                 # save value in output list if we're at a node in the last execution set
-                if i == len(self.execution_sets) - 1:
-                    outputs[component] = value
-
-        # Maybe need to comment this out!
-        # self.copy_outputs_to_psyneulink(outputs, context)
+                if NodeRole.OUTPUT in self._composition.get_roles_by_node(component._mechanism):
+                    outputs[component._mechanism] = component.value
 
+        # NOTE: Context source needs to be set to COMMAND_LINE to force logs to update independantly of timesteps
         old_source = context.source
         context.source = ContextFlags.COMMAND_LINE
-        self.log_weights(context)
-        self.copy_outputs_to_psyneulink(outputs, context)
+        self.log_values()
+        self.log_weights()
         context.source = old_source
 
         return outputs
 
     def detach_all(self):
-        for component, info in self.component_to_forward_info.items():
-            info['value'].detach_()
+        for projection in self.projection_map.values():
+            projection.matrix.detach()
 
     def copy_weights_to_psyneulink(self, context=None):
-        for projection, weights in self.projections_to_pytorch_weights.items():
+        for projection, pytorch_rep in self.projection_map.items():
             projection.parameters.matrix._set(
-                weights.detach().cpu().numpy(), context)
+                pytorch_rep.matrix.detach().cpu().numpy(), context)
+            projection.parameter_ports['matrix'].parameters.value._set(
+                pytorch_rep.matrix.detach().cpu().numpy(), context)
 
     def copy_outputs_to_psyneulink(self, outputs, context=None):
         for component, value in outputs.items():
@@ -481,120 +336,10 @@ def copy_outputs_to_psyneulink(self, outputs, context=None):
             component.output_port.parameters.value._set(
                 detached_value, context, skip_history=True, skip_log=True)
 
-    @handle_external_context()
-    def log_weights(self, context=None):
-        for projection, weights in self.projections_to_pytorch_weights.items():
-            if projection.parameters.matrix.log_condition != LogCondition.OFF:
-                projection.parameters.matrix._set(
-                    weights.detach().cpu().numpy(), context)
-
-    # Helper method that creates a bin func that returns the derivative of the function into the builder
-    # FIXME: Add compiled derivative functions, and move these calls there
-    @handle_external_context()
-    def bin_function_derivative_creator(self, ctx, node, context=None):
-        # first try to get cached func
-        name = node.name + "_" + node.function.name + "_derivative"
-        try:
-            llvm_func = ctx.import_llvm_function(name)
-            return llvm_func
-        except Exception as e:
-            pass
-
-
-        # args: 1) ptr to input vector
-        #       2) sizeof vector
-        #       3) ptr to output vector
-        float_ptr_ty = ctx.float_ty.as_pointer()
-        args = [float_ptr_ty, ctx.int32_ty, float_ptr_ty]
-        builder = ctx.create_llvm_function(args, self,name )
-        llvm_func = builder.function
-
-        input_vector, dim, output_vector = llvm_func.args
-        def get_fct_param_value(param_name):
-            val = node.function._get_current_function_param(
-                param_name, context)
-            if val is None:
-                val = node.function._get_current_function_param(
-                    param_name, None)
-            return ctx.float_ty(val[0])
-
-        if isinstance(node.function, Linear): # f(x) = mx + b, f'(x) = m
-            slope = get_fct_param_value('slope')
-            def modify_value(x):
-                return slope
-
-        elif isinstance(node.function, Logistic):# f'(x) = f(x)(1-f(x))
-
-            neg_one = ctx.float_ty(-1)
-            gain = builder.fmul(neg_one, get_fct_param_value('gain'))
-            bias = get_fct_param_value('bias')
-            offset = get_fct_param_value('offset')
-            one = ctx.float_ty(1)
-            exp = ctx.import_llvm_function("__pnl_builtin_exp")
-
-            def modify_value(x):
-                arg = builder.fadd(x, bias)
-                arg = builder.fmul(gain, arg)
-                arg = builder.fadd(arg, offset)
-
-                f_x = builder.call(exp, [arg])
-                f_x = builder.fadd(one, f_x)
-                f_x = builder.fdiv(one, f_x)
-
-                ret = builder.fsub(one ,f_x)
-                ret = builder.fmul(f_x, ret)
-                return ret
-
-        else:
-            raise Exception(f"Function type {node.function} is currently unsupported by compiled execution!")
-
-        # do computations
-        with pnlvm.helpers.for_loop_zero_inc(builder, dim, "derivative_loop") as (builder, iterator):
-            val_ptr = builder.gep(input_vector,[iterator])
-            val = builder.load(val_ptr)
-            val = modify_value(val)
-            output_location = builder.gep(output_vector,[iterator])
-            builder.store(val,output_location)
-
-        builder.ret_void()
+    def log_weights(self):
+        for proj in self.projections:
+            proj.log_matrix()
 
-        return llvm_func
-
-    # helper method that identifies the type of function used by a node, gets the function
-    # parameters and uses them to create a function object representing the function, then returns it
-    def function_creator(self, node, context=None):
-        def get_fct_param_value(param_name):
-            val = node.function._get_current_function_param(
-                param_name, context)
-            if val is None:
-                val = node.function._get_current_function_param(
-                    param_name, Context(execution_id=None))
-            return float(val)
-
-        if isinstance(node.function, Linear):
-            slope = get_fct_param_value('slope')
-            intercept = get_fct_param_value('intercept')
-            return lambda x: x * slope + intercept
-
-        elif isinstance(node.function, Logistic):
-            gain = get_fct_param_value('gain')
-            bias = get_fct_param_value('bias')
-            offset = get_fct_param_value('offset')
-            return lambda x: 1 / (1 + torch.exp(-gain * (x + bias) + offset))
-
-        # if we have relu function (the only other kind of function allowed by the autodiff composition)
-        else:
-            gain = get_fct_param_value('gain')
-            bias = get_fct_param_value('bias')
-            leak = get_fct_param_value('leak')
-            return lambda x: (torch.max(input=(x - bias), other=torch.tensor([0], device=self.device).double()) * gain +
-                              torch.min(input=(x - bias), other=torch.tensor([0], device=self.device).double()) * leak)
-
-    # returns dict mapping psyneulink projections to corresponding pytorch weights. Pytorch weights are copied
-    # over from tensors inside Pytorch's Parameter data type to numpy arrays (and thus copied to a different
-    # memory location). This keeps the weights - and Pytorch in general - away from the user
-    def get_weights_for_projections(self):
-        weights_in_numpy = {}
-        for projection, weights in self.projections_to_pytorch_weights.items():
-            weights_in_numpy[projection] = weights.detach().cpu().numpy().copy()
-        return weights_in_numpy
+    def log_values(self):
+        for node in self.nodes:
+            node.log_value()
diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index 57a7a0fdbe2..c9aa5a49b19 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -25,6 +25,38 @@
 # Unit tests for functions of AutodiffComposition class that are new (not in Composition)
 # or override functions in Composition
 
+@pytest.mark.pytorch
+@pytest.mark.parametrize("mode", ['Python',
+                                    pytest.param('LLVMRun', marks=pytest.mark.llvm),
+                                    ])
+def test_autodiff_forward(mode):
+            # create xor model mechanisms and projections
+    xor_in = TransferMechanism(name='xor_in',
+                                default_variable=np.zeros(2))
+
+    xor_hid = TransferMechanism(name='xor_hid',
+                                default_variable=np.zeros(10),
+                                function=Logistic())
+
+    xor_out = TransferMechanism(name='xor_out',
+                                default_variable=np.zeros(1),
+                                function=Logistic())
+
+    hid_map = MappingProjection(matrix=np.random.rand(2,10))
+    out_map = MappingProjection(matrix=np.random.rand(10,1))
+
+    # put the mechanisms and projections together in an autodiff composition (AC)
+    xor = AutodiffComposition(param_init_from_pnl=True)
+
+    xor.add_node(xor_in)
+    xor.add_node(xor_hid)
+    xor.add_node(xor_out)
+
+    xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
+    xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
+
+    outputs = xor.run(inputs=[0,0], bin_execute=mode)
+    assert np.allclose(outputs, [[0.9479085241082691]])
 
 @pytest.mark.pytorch
 @pytest.mark.acconstructor
@@ -463,11 +495,9 @@ class TestTrainingCorrectness:
     # test whether xor model created as autodiff composition learns properly
     @pytest.mark.benchmark(group="XOR")
     @pytest.mark.parametrize(
-        'eps, calls, opt, from_pnl_or_not, expected', [
-            (100, 'single', 'adam', True, [[[0.09823965]], [[0.81092879]], [[0.78179557]], [[0.25593583]]]),
-            (50, 'multiple', 'adam', True, [[[0.31200036]], [[0.59406178]], [[0.60417587]], [[0.52347365]]]),
-            (100, 'single', 'adam', False, [[[0.12697489]], [[0.74632817]], [[0.80712739]], [[0.28699516]]]),
-            (50, 'multiple', 'adam', False, [[[0.2935138]], [[0.60503794]], [[0.57901045]], [[0.57705371]]])
+        'eps, calls, opt, expected', [
+            (100, 'single', 'adam', [[[0.09823965]], [[0.81092879]], [[0.78179557]], [[0.25593583]]]),
+            (50, 'multiple', 'adam', [[[0.31200036]], [[0.59406178]], [[0.60417587]], [[0.52347365]]]),
         ]
     )
     @pytest.mark.parametrize("mode", ['Python',
@@ -2319,6 +2349,10 @@ def test_autodiff_logging(self):
         xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
         xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
 
+        xor_in.set_log_conditions('value', pnl.LogCondition.TRIAL)
+        xor_hid.set_log_conditions('value', pnl.LogCondition.TRIAL)
+        xor_out.set_log_conditions('value', pnl.LogCondition.TRIAL)
+
         hid_map.set_log_conditions('matrix', pnl.LogCondition.TRIAL)
         out_map.set_log_conditions('matrix', pnl.LogCondition.TRIAL)
 

From fcc79cd85a1925140693f2131d4af8c4d4f579f5 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:25:29 -0400
Subject: [PATCH 07/25] tests/autodiff: Add forward test

---
 tests/composition/test_autodiffcomposition.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index c9aa5a49b19..ee400474d28 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -27,12 +27,12 @@
 
 @pytest.mark.pytorch
 @pytest.mark.parametrize("mode", ['Python',
-                                    pytest.param('LLVMRun', marks=pytest.mark.llvm),
-                                    ])
+                                  pytest.param('LLVMRun', marks=pytest.mark.llvm),
+                                 ])
 def test_autodiff_forward(mode):
-            # create xor model mechanisms and projections
+    # create xor model mechanisms and projections
     xor_in = TransferMechanism(name='xor_in',
-                                default_variable=np.zeros(2))
+                               default_variable=np.zeros(2))
 
     xor_hid = TransferMechanism(name='xor_hid',
                                 default_variable=np.zeros(10),
@@ -94,7 +94,6 @@ def test_report_prefs(self):
         # comp = AutodiffComposition()
         # assert comp.patience == 10
 
-
 @pytest.mark.pytorch
 @pytest.mark.acmisc
 class TestMiscTrainingFunctionality:

From a6819db308806e028a0d778f911586f4b421eb31 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:28:22 -0400
Subject: [PATCH 08/25] tests/autodiff: Enforce tighter correctness bounds

---
 tests/composition/test_autodiffcomposition.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index ee400474d28..b41df9fd73b 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -546,11 +546,9 @@ def test_xor_training_correctness(self, eps, calls, opt, from_pnl_or_not, mode,
             for i in range(eps):
                 results = xor.learn(inputs=input_dict, bin_execute=mode)
 
-        # FIXME: Improve accuracy
-        atol = 0.1 if not from_pnl_or_not and mode == 'LLVMRun' else 0.001
         assert len(results) == len(expected)
         for r, t in zip(results, expected):
-            assert np.allclose(r[0], t, atol=atol)
+            assert np.allclose(r[0], t)
 
         benchmark(xor.learn, inputs={"inputs": {xor_in: xor_inputs},
                                      "targets": {xor_out: xor_targets},
@@ -834,10 +832,7 @@ def test_semantic_net_training_correctness(self, eps, opt, from_pnl_or_not, mode
 
         for res, exp in zip(results, expected):
             for r, e in zip(res, exp):
-                if mode == 'Python':
-                    assert np.allclose(r, e)
-                else:
-                    assert np.allclose(r, e, atol=0.01)
+                assert np.allclose(r, e)
         benchmark(sem_net.learn, inputs={'inputs': inputs_dict,
                                          'targets': targets_dict,
                                          'epochs': eps}, bin_execute=mode)

From 478ad9d8e9eb32eff157c8476836a7b1a78d385e Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Tue, 9 Jun 2020 10:34:19 -0400
Subject: [PATCH 09/25] tests/autodiff: Remove autodiff.get_parameters

---
 .../compositions/autodiffcomposition.py       | 17 ----
 tests/composition/test_autodiffcomposition.py | 97 ++-----------------
 tests/composition/test_learning.py            | 19 ++--
 3 files changed, 15 insertions(+), 118 deletions(-)

diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 5739d2de9f1..1bc15e7ca21 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -539,23 +539,6 @@ def execute(self,
                                                         bin_execute=bin_execute,
                                                         )
 
-    # gives user weights and biases of the model (from the pytorch representation)
-    @handle_external_context(execution_id=NotImplemented)
-    def get_parameters(self, context=None):
-        if context.execution_id is NotImplemented:
-            context.execution_id = self.default_execution_id
-
-        pytorch_representation = self.parameters.pytorch_representation._get(context)
-
-        if pytorch_representation is None:
-            raise AutodiffCompositionError("{0} has not been run yet so parameters have not been created "
-                                           "in Pytorch."
-                                           .format(self.name))
-
-        weights = pytorch_representation.get_weights_for_projections()
-
-        return weights
-
     def _get_state_struct_type(self, ctx):
         node_state_type_list = (ctx.get_state_struct_type(m) for m in self._all_nodes)
         proj_state_type_list = (ctx.get_state_struct_type(p) for p in self._inner_projections)
diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index b41df9fd73b..564e4f5ba9f 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -410,83 +410,6 @@ def test_params_stay_separate(self,mode):
         assert not np.allclose(pt_weights_hid, hid_map.parameters.matrix.get(None))
         assert not np.allclose(pt_weights_out, out_map.parameters.matrix.get(None))
 
-    # test whether the autodiff composition's get_parameters method works as desired
-    @pytest.mark.parametrize("mode", ['Python',
-                                      pytest.param('LLVMRun', marks=pytest.mark.llvm),
-                                    #   LLVM test is disabled since parameters are currently not written back
-
-                                     ])
-    def test_get_params(self, mode):
-
-        xor_in = TransferMechanism(name='xor_in',
-                                   default_variable=np.zeros(2))
-
-        xor_hid = TransferMechanism(name='xor_hid',
-                                    default_variable=np.zeros(10),
-                                    function=Logistic())
-
-        xor_out = TransferMechanism(name='xor_out',
-                                    default_variable=np.zeros(1),
-                                    function=Logistic())
-
-        hid_map = MappingProjection(matrix=np.random.rand(2,10))
-        out_map = MappingProjection(matrix=np.random.rand(10,1))
-
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=1.0)
-
-        xor.add_node(xor_in)
-        xor.add_node(xor_hid)
-        xor.add_node(xor_out)
-
-        xor.add_projection(sender=xor_in, projection=hid_map, receiver=xor_hid)
-        xor.add_projection(sender=xor_hid, projection=out_map, receiver=xor_out)
-
-        xor_inputs = np.array(  # the inputs we will provide to the model
-            [[0, 0], [0, 1], [1, 0], [1, 1]])
-
-        xor_targets = np.array(  # the outputs we wish to see from the model
-            [[0], [1], [1], [0]])
-
-        # call run to only process the inputs, so that pytorch representation of AC gets created
-        # results = xor.run(inputs={xor_in:xor_inputs})
-
-        #KAM Changed 11/1/18
-
-        # mini version of xor.execute just to build up pytorch representation
-        xor._analyze_graph()
-        # CW changed 12/3/18
-        xor._build_pytorch_representation(xor.default_execution_id)
-        # OLD
-        # xor._build_pytorch_representation()
-
-        # call get_parameters to obtain a copy of the pytorch parameters in numpy arrays,
-        # and get the parameters straight from pytorch
-        weights_get_params = xor.get_parameters()
-        weights_straight_1 = xor.parameters.pytorch_representation.get(xor).params[0]
-        weights_straight_2 = xor.parameters.pytorch_representation.get(xor).params[1]
-
-        # check that parameter copies obtained from get_parameters are the same as the
-        # projections and parameters from pytorch
-        assert np.allclose(hid_map.parameters.matrix.get(None), weights_get_params[hid_map])
-        assert np.allclose(weights_straight_1.detach().numpy(), weights_get_params[hid_map])
-        assert np.allclose(out_map.parameters.matrix.get(None), weights_get_params[out_map])
-        assert np.allclose(weights_straight_2.detach().numpy(), weights_get_params[out_map])
-
-        # call run to train the pytorch parameters
-        results = xor.learn(inputs={"inputs": {xor_in:xor_inputs},
-                                  "targets": {xor_out:xor_targets},
-                                  "epochs": 10}, bin_execute=mode)
-
-
-        # check that the parameter copies obtained from get_parameters have not changed with the
-        # pytorch parameters during training (and are thus at a different memory location)
-        # (only makes sense in Python mode)
-        if mode == 'Python':
-            assert not np.allclose(weights_straight_1.detach().numpy(), weights_get_params[hid_map])
-            assert not np.allclose(weights_straight_2.detach().numpy(), weights_get_params[out_map])
-
-
 @pytest.mark.pytorch
 @pytest.mark.accorrectness
 class TestTrainingCorrectness:
@@ -1992,8 +1915,6 @@ def test_semantic_net_training_identicalness(self, eps, opt):
 
         result = sem_net.run(inputs=inputs_dict)
 
-        # comp_weights = sem_net.get_parameters()[0]
-
         # TRAIN COMPOSITION
         def g_f():
             yield {"inputs": inputs_dict,
@@ -2002,8 +1923,6 @@ def g_f():
         g = g_f()
         result = sem_net.learn(inputs=g_f)
 
-        comp_weights = sem_net.get_parameters()
-
         # SET UP SYSTEM
         sem_net_sys = Composition()
 
@@ -2059,13 +1978,13 @@ def g_f():
 
         # CHECK THAT PARAMETERS FOR COMPOSITION, SYSTEM ARE SAME
 
-        assert np.allclose(comp_weights[map_nouns_h1], map_nouns_h1_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_rels_h2], map_rels_h2_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_h1_h2], map_h1_h2_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_h2_I], map_h2_I_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_h2_is], map_h2_is_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_h2_has], map_h2_has_sys.get_mod_matrix(sem_net_sys))
-        assert np.allclose(comp_weights[map_h2_can], map_h2_can_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_nouns_h1.parameters.matrix.get(sem_net), map_nouns_h1_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_rels_h2.parameters.matrix.get(sem_net), map_rels_h2_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_h1_h2.parameters.matrix.get(sem_net), map_h1_h2_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_h2_I.parameters.matrix.get(sem_net), map_h2_I_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_h2_is.parameters.matrix.get(sem_net), map_h2_is_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_h2_has.parameters.matrix.get(sem_net), map_h2_has_sys.get_mod_matrix(sem_net_sys))
+        assert np.allclose(map_h2_can.parameters.matrix.get(sem_net), map_h2_can_sys.get_mod_matrix(sem_net_sys))
 
     def test_identicalness_of_input_types(self):
         # SET UP MECHANISMS FOR COMPOSITION
@@ -2922,8 +2841,6 @@ def test_semantic_net_nested(self, eps, opt, mode):
                 targets_dict[out_sig_has].append(truth_has[i])
                 targets_dict[out_sig_can].append(truth_can[i])
 
-        # comp_weights = sem_net.get_parameters()[0]
-
         # TRAIN COMPOSITION
         input_dict = {"inputs": inputs_dict,
                       "targets": targets_dict,
diff --git a/tests/composition/test_learning.py b/tests/composition/test_learning.py
index 59c0fc0b7ff..592bd4efb6c 100644
--- a/tests/composition/test_learning.py
+++ b/tests/composition/test_learning.py
@@ -2020,6 +2020,10 @@ def test_xor_training_identicalness_standard_composition_vs_autodiff(self, model
                                                                               output_comp],
                                                                              learning_rate=10)
             target_mech = backprop_pathway.target
+            inputs_dict = {"inputs": {input_comp:xor_inputs},
+                           "targets": {output_comp:xor_targets},
+                           "epochs": num_epochs}
+            result_comp = xor_comp.learn(inputs=inputs_dict)
 
         # AutodiffComposition
         if 'AUTODIFF' in models:
@@ -2060,20 +2064,13 @@ def test_xor_training_identicalness_standard_composition_vs_autodiff(self, model
             inputs_dict = {"inputs": {input_autodiff:xor_inputs},
                            "targets": {output_autodiff:xor_targets},
                            "epochs": num_epochs}
-        # RUN MODELS -----------------------------------------------------------------------------------
-        if pnl.COMPOSITION in models:
-            result = xor_comp.learn(inputs={input_comp:xor_inputs,
-                                            target_mech:xor_targets},
-                                    num_trials=(num_epochs * xor_inputs.shape[0]),
-                                    )
-        if 'AUTODIFF' in models:
-            result = xor_autodiff.learn(inputs=inputs_dict)
-            autodiff_weights = xor_autodiff.get_parameters()
+            result_autodiff = xor_autodiff.learn(inputs=inputs_dict)
 
         # COMPARE WEIGHTS FOR PAIRS OF MODELS ----------------------------------------------------------
         if all(m in models for m in {pnl.COMPOSITION, 'AUTODIFF'}):
-            assert np.allclose(autodiff_weights[in_to_hidden_autodiff], in_to_hidden_comp.get_mod_matrix(xor_comp))
-            assert np.allclose(autodiff_weights[hidden_to_out_autodiff], hidden_to_out_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(in_to_hidden_autodiff.parameters.matrix.get(xor_autodiff), in_to_hidden_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(hidden_to_out_autodiff.parameters.matrix.get(xor_autodiff), hidden_to_out_comp.get_mod_matrix(xor_comp))
+            assert np.allclose(result_comp, result_autodiff)
 
     @pytest.mark.parametrize('configuration', [
         'Y UP',

From 64598fe54954a7483786a45b490107179f1a25f8 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Mon, 15 Jun 2020 16:06:19 -0400
Subject: [PATCH 10/25] autodiff: Remove `param_init_from_pnl`

---
 Scripts/Debug/Yotam LCA Model.py              |  4 +-
 .../Rumelhart Semantic Network (autodiff).py  |  1 -
 .../compositions/autodiffcomposition.py       |  6 --
 tests/composition/test_autodiffcomposition.py | 76 ++++++++-----------
 tests/composition/test_learning.py            |  5 +-
 5 files changed, 34 insertions(+), 58 deletions(-)

diff --git a/Scripts/Debug/Yotam LCA Model.py b/Scripts/Debug/Yotam LCA Model.py
index e5c699976a6..6d2d58c76f9 100644
--- a/Scripts/Debug/Yotam LCA Model.py	
+++ b/Scripts/Debug/Yotam LCA Model.py	
@@ -147,7 +147,7 @@ def get_trained_network(bipartite_graph, num_features=3, num_hidden=200, epochs=
 	}
 
 	# Build network
-	mnet = pnl.AutodiffComposition(param_init_from_pnl=True,
+	mnet = pnl.AutodiffComposition(
                            patience=patience,
                            min_delta=min_delt,
                            learning_rate=learning_rate,
@@ -273,7 +273,7 @@ def get_trained_network_multLCA(bipartite_graph, num_features=3, num_hidden=200,
 	}
 
 	# Build network
-	mnet = pnl.AutodiffComposition(param_init_from_pnl=True,
+	mnet = pnl.AutodiffComposition(
                            patience=patience,
                            min_delta=min_delt,
                            learning_rate=learning_rate,
diff --git a/Scripts/Examples/Tutorial/Rumelhart Semantic Network (autodiff).py b/Scripts/Examples/Tutorial/Rumelhart Semantic Network (autodiff).py
index a62372fd2e4..6c6b780bf89 100644
--- a/Scripts/Examples/Tutorial/Rumelhart Semantic Network (autodiff).py	
+++ b/Scripts/Examples/Tutorial/Rumelhart Semantic Network (autodiff).py	
@@ -180,7 +180,6 @@ def gen_input_vals(nouns, relations):
 #This block of code constructs the network
 
 RumelNet = pnl.AutodiffComposition(
-        param_init_from_pnl=True,
         patience=10,
         min_delta=0.00001,
         learning_rate=1,
diff --git a/psyneulink/library/compositions/autodiffcomposition.py b/psyneulink/library/compositions/autodiffcomposition.py
index 1bc15e7ca21..c3b44f2595a 100644
--- a/psyneulink/library/compositions/autodiffcomposition.py
+++ b/psyneulink/library/compositions/autodiffcomposition.py
@@ -225,7 +225,6 @@ class Parameters(Composition.Parameters):
 
     # TODO (CW 9/28/18): add compositions to registry so default arg for name is no longer needed
     def __init__(self,
-                 param_init_from_pnl=True,
                  learning_rate=None,
                  optimizer_type='sgd',
                  weight_decay=0,
@@ -255,11 +254,6 @@ def __init__(self,
         self.force_no_retain_graph = force_no_retain_graph
         self.loss = None
         self.disable_learning = disable_learning
-        # user indication of how to initialize pytorch parameters
-        self.param_init_from_pnl = param_init_from_pnl
-
-        if param_init_from_pnl is False:
-            warnings.warn("WARNING: Autodiffcomposition.param_init_from_pnl is deprecated! Please do not use it!")
 
         # keeps track of average loss per epoch
         self.losses = []
diff --git a/tests/composition/test_autodiffcomposition.py b/tests/composition/test_autodiffcomposition.py
index 564e4f5ba9f..20636134588 100644
--- a/tests/composition/test_autodiffcomposition.py
+++ b/tests/composition/test_autodiffcomposition.py
@@ -46,7 +46,7 @@ def test_autodiff_forward(mode):
     out_map = MappingProjection(matrix=np.random.rand(10,1))
 
     # put the mechanisms and projections together in an autodiff composition (AC)
-    xor = AutodiffComposition(param_init_from_pnl=True)
+    xor = AutodiffComposition()
 
     xor.add_node(xor_in)
     xor.add_node(xor_hid)
@@ -99,8 +99,7 @@ def test_report_prefs(self):
 class TestMiscTrainingFunctionality:
 
     # test whether pytorch parameters are initialized to be identical to the Autodiff Composition's
-    # projections when AC is initialized with the "param_init_from_pnl" argument set to True
-    def test_param_init_from_pnl(self):
+    def test_weight_initialization(self):
 
         # create xor model mechanisms and projections
         xor_in = TransferMechanism(name='xor_in',
@@ -118,7 +117,7 @@ def test_param_init_from_pnl(self):
         out_map = MappingProjection(matrix=np.random.rand(10,1))
 
         # put the mechanisms and projections together in an autodiff composition (AC)
-        xor = AutodiffComposition(param_init_from_pnl=True)
+        xor = AutodiffComposition()
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -155,7 +154,7 @@ def test_training_then_processing(self, mode):
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True)
+        xor = AutodiffComposition()
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -222,7 +221,7 @@ def test_various_loss_specs(self, loss, mode):
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True, loss_spec=loss)
+        xor = AutodiffComposition(loss_spec=loss)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -268,7 +267,7 @@ def test_pytorch_loss_spec(self, mode):
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True, loss_spec=ls)
+        xor = AutodiffComposition(loss_spec=ls)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -314,8 +313,7 @@ def test_optimizer_specs(self, learning_rate, weight_decay, optimizer_type, mode
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=learning_rate,
+        xor = AutodiffComposition(learning_rate=learning_rate,
                                   optimizer_type=optimizer_type,
                                   weight_decay=weight_decay)
 
@@ -375,8 +373,7 @@ def test_params_stay_separate(self,mode):
                                     sender=xor_hid,
                                     receiver=xor_out)
 
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=10.0,
+        xor = AutodiffComposition(learning_rate=10.0,
                                   optimizer_type="sgd")
 
         xor.add_node(xor_in)
@@ -425,7 +422,7 @@ class TestTrainingCorrectness:
     @pytest.mark.parametrize("mode", ['Python',
                                       pytest.param('LLVMRun', marks=pytest.mark.llvm),
                                      ])
-    def test_xor_training_correctness(self, eps, calls, opt, from_pnl_or_not, mode, benchmark, expected):
+    def test_xor_training_correctness(self, eps, calls, opt, mode, benchmark, expected):
         xor_in = TransferMechanism(name='xor_in',
                                    default_variable=np.zeros(2))
 
@@ -440,8 +437,7 @@ def test_xor_training_correctness(self, eps, calls, opt, from_pnl_or_not, mode,
         hid_map = MappingProjection(matrix=np.random.rand(2, 10))
         out_map = MappingProjection(matrix=np.random.rand(10, 1))
 
-        xor = AutodiffComposition(param_init_from_pnl=from_pnl_or_not,
-                                  optimizer_type=opt,
+        xor = AutodiffComposition(optimizer_type=opt,
                                   learning_rate=0.1)
 
         xor.add_node(xor_in)
@@ -481,15 +477,14 @@ def test_xor_training_correctness(self, eps, calls, opt, from_pnl_or_not, mode,
     # tests whether semantic network created as autodiff composition learns properly
     @pytest.mark.benchmark(group="Semantic net")
     @pytest.mark.parametrize(
-        'eps, opt, from_pnl_or_not', [
-            (500, 'adam', True),
-            # (300, 'adam', False)
+        'eps, opt', [
+            (500, 'adam'),
         ]
     )
     @pytest.mark.parametrize("mode", ['Python',
                                       pytest.param('LLVMRun', marks=pytest.mark.llvm),
                                      ])
-    def test_semantic_net_training_correctness(self, eps, opt, from_pnl_or_not, mode, benchmark):
+    def test_semantic_net_training_correctness(self, eps, opt, mode, benchmark):
 
         # MECHANISMS FOR SEMANTIC NET:
 
@@ -561,8 +556,7 @@ def test_semantic_net_training_correctness(self, eps, opt, from_pnl_or_not, mode
                                        receiver=out_sig_can)
 
         # COMPOSITION FOR SEMANTIC NET
-        sem_net = AutodiffComposition(param_init_from_pnl=from_pnl_or_not,
-                                      optimizer_type=opt, learning_rate=.001)
+        sem_net = AutodiffComposition(optimizer_type=opt, learning_rate=.001)
 
         sem_net.add_node(nouns_in)
         sem_net.add_node(rels_in)
@@ -903,8 +897,7 @@ def test_pytorch_equivalence_with_autodiff_composition(self, mode):
         pco = MappingProjection(matrix=wco)
         pho = MappingProjection(matrix=who)
 
-        mnet = AutodiffComposition(param_init_from_pnl=True,
-                                   learning_rate=learning_rate)
+        mnet = AutodiffComposition(learning_rate=learning_rate)
 
         mnet.add_node(il)
         mnet.add_node(cl)
@@ -1118,8 +1111,7 @@ def test_pytorch_equivalence_with_autodiff_training_disabled_on_proj(self):
         pco = MappingProjection(matrix=wco)
         pho = MappingProjection(matrix=who, learnable=False)
 
-        mnet = AutodiffComposition(param_init_from_pnl=True,
-                                   learning_rate=learning_rate)
+        mnet = AutodiffComposition(learning_rate=learning_rate)
 
         mnet.add_node(il)
         mnet.add_node(cl)
@@ -1245,7 +1237,7 @@ def test_and_training_time(self, eps, opt,mode):
 
         # SET UP COMPOSITION
 
-        and_net = AutodiffComposition(param_init_from_pnl=True)
+        and_net = AutodiffComposition()
 
         and_net.add_node(and_in)
         and_net.add_node(and_out)
@@ -1348,7 +1340,7 @@ def test_xor_training_time(self, eps, opt,mode):
 
         # SET UP COMPOSITION
 
-        xor = AutodiffComposition(param_init_from_pnl=True,bin_execute=mode)
+        xor = AutodiffComposition(bin_execute=mode)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -1553,7 +1545,7 @@ def test_semantic_net_training_time(self, eps, opt):
 
         # COMPOSITION FOR SEMANTIC NET
 
-        sem_net = AutodiffComposition(param_init_from_pnl=True)
+        sem_net = AutodiffComposition()
 
         sem_net.add_node(nouns_in)
         sem_net.add_node(rels_in)
@@ -1819,8 +1811,7 @@ def test_semantic_net_training_identicalness(self, eps, opt):
                                            receiver=out_sig_can_sys)
 
         # SET UP COMPOSITION FOR SEMANTIC NET
-        sem_net = AutodiffComposition(param_init_from_pnl=True,
-                                      learning_rate=0.5,
+        sem_net = AutodiffComposition(learning_rate=0.5,
                                       optimizer_type=opt,
                                       )
 
@@ -2016,7 +2007,7 @@ def test_identicalness_of_input_types(self):
 
         # SET UP COMPOSITION
 
-        xor_dict = AutodiffComposition(param_init_from_pnl=True)
+        xor_dict = AutodiffComposition()
 
         xor_dict.add_node(xor_in_dict)
         xor_dict.add_node(xor_hid_dict)
@@ -2075,7 +2066,7 @@ def test_identicalness_of_input_types(self):
 
         # SET UP COMPOSITION
 
-        xor_func = AutodiffComposition(param_init_from_pnl=True)
+        xor_func = AutodiffComposition()
 
         xor_func.add_node(xor_in_func)
         xor_func.add_node(xor_hid_func)
@@ -2136,7 +2127,7 @@ def get_inputs(idx):
 
         # SET UP COMPOSITION
 
-        xor_gen = AutodiffComposition(param_init_from_pnl=True)
+        xor_gen = AutodiffComposition()
 
         xor_gen.add_node(xor_in_gen)
         xor_gen.add_node(xor_hid_gen)
@@ -2198,7 +2189,7 @@ def get_inputs_gen():
 
         # SET UP COMPOSITION
 
-        xor_gen_func = AutodiffComposition(param_init_from_pnl=True)
+        xor_gen_func = AutodiffComposition()
 
         xor_gen_func.add_node(xor_in_gen_func)
         xor_gen_func.add_node(xor_hid_gen_func)
@@ -2253,7 +2244,7 @@ def test_autodiff_logging(self):
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True)
+        xor = AutodiffComposition()
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -2337,7 +2328,7 @@ def test_autodiff_loss_tracking(self):
         hid_map = MappingProjection()
         out_map = MappingProjection()
 
-        xor = AutodiffComposition(param_init_from_pnl=True)
+        xor = AutodiffComposition()
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -2421,7 +2412,6 @@ def test_xor_nested_train_then_no_train(self, num_epochs, learning_rate,
         # -----------------------------------------------------------------
 
         xor_autodiff = AutodiffComposition(
-            param_init_from_pnl=True,
             learning_rate=learning_rate,
         )
 
@@ -2493,7 +2483,6 @@ def test_xor_nested_no_train_then_train(self, num_epochs, learning_rate,
         # -----------------------------------------------------------------
 
         xor_autodiff = AutodiffComposition(
-            param_init_from_pnl=True,
             learning_rate=learning_rate,
         )
 
@@ -2565,7 +2554,6 @@ def test_xor_nested_no_train_then_train(self, num_epochs, learning_rate,
     #     # -----------------------------------------------------------------
     #
     #     xor_autodiff = AutodiffComposition(
-    #         param_init_from_pnl=True,
     #         patience=patience,
     #         min_delta=min_delta,
     #         learning_rate=learning_rate,
@@ -2751,8 +2739,7 @@ def test_semantic_net_nested(self, eps, opt, mode):
 
         # SET UP COMPOSITION FOR SEMANTIC NET
 
-        sem_net = AutodiffComposition(param_init_from_pnl=True,
-                                      learning_rate=0.5,
+        sem_net = AutodiffComposition(learning_rate=0.5,
                                       optimizer_type=opt)
 
         sem_net.add_node(nouns_in)
@@ -2890,8 +2877,7 @@ def test_call_before_minibatch(self):
 
         # SET UP COMPOSITION
 
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=10)
+        xor = AutodiffComposition(learning_rate=10)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -2958,8 +2944,7 @@ def test_call_after_minibatch(self):
 
         # SET UP COMPOSITION
 
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=10)
+        xor = AutodiffComposition(learning_rate=10)
 
         xor.add_node(xor_in)
         xor.add_node(xor_hid)
@@ -3030,8 +3015,7 @@ def test_batching_with_epochs_specified(self, eps):
 
         # SET UP COMPOSITION
 
-        xor = AutodiffComposition(param_init_from_pnl=True,
-                                  learning_rate=10,
+        xor = AutodiffComposition(learning_rate=10,
                                   # optimizer_type=opt
                                   )
 
diff --git a/tests/composition/test_learning.py b/tests/composition/test_learning.py
index 592bd4efb6c..e11936c369d 100644
--- a/tests/composition/test_learning.py
+++ b/tests/composition/test_learning.py
@@ -2049,9 +2049,8 @@ def test_xor_training_identicalness_standard_composition_vs_autodiff(self, model
                                         sender=hidden_autodiff,
                                         receiver=output_autodiff)
     
-            xor_autodiff = pnl.AutodiffComposition(param_init_from_pnl=True,
-                                      learning_rate=10,
-                                      optimizer_type='sgd')
+            xor_autodiff = pnl.AutodiffComposition(learning_rate=10,
+                                                   optimizer_type='sgd')
     
             xor_autodiff.add_node(input_autodiff)
             xor_autodiff.add_node(hidden_autodiff)

From 616848954477fde33f784035b947cf8928fc3f56 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 01:18:33 -0400
Subject: [PATCH 11/25] transferfunction: Add support for vector RELU
 derivative

---
 psyneulink/core/components/functions/transferfunctions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index 18a7f9ab249..ba399c6f4b5 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -1511,7 +1511,11 @@ def derivative(self, input, output=None, context=None):
         gain = self._get_current_function_param(GAIN, context)
         leak = self._get_current_function_param(LEAK, context)
 
-        return gain if input > 0 else gain * leak
+        input = np.asarray(input).copy()
+        input[input>0] = gain
+        input[input<=0] = gain * leak
+
+        return input
 
 
 # **********************************************************************************************************************

From 7199a2b0c15e1909d2dac72441cbbf8e693a4403 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 01:21:41 -0400
Subject: [PATCH 12/25] llvm/execution: Add support for tags

---
 psyneulink/core/components/mechanisms/mechanism.py            | 4 ++--
 .../modulatory/control/optimizationcontrolmechanism.py        | 2 +-
 .../components/mechanisms/processing/transfermechanism.py     | 4 ++--
 psyneulink/core/llvm/execution.py                             | 4 ++--
 .../mechanisms/modulatory/control/agt/lccontrolmechanism.py   | 4 ++--
 .../components/mechanisms/processing/integrator/ddm.py        | 4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/psyneulink/core/components/mechanisms/mechanism.py b/psyneulink/core/components/mechanisms/mechanism.py
index 11a5a239dae..6c25d76733d 100644
--- a/psyneulink/core/components/mechanisms/mechanism.py
+++ b/psyneulink/core/components/mechanisms/mechanism.py
@@ -2967,7 +2967,7 @@ def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_o
                 self.function, f_params_ptr, ctx, builder, params, state, arg_in)
 
         f_state = pnlvm.helpers.get_state_ptr(builder, self, state, "function")
-        value, builder = self._gen_llvm_invoke_function(ctx, builder, self.function, f_params, f_state, ip_output)
+        value, builder = self._gen_llvm_invoke_function(ctx, builder, self.function, f_params, f_state, ip_output, tags=tags)
 
         # Update execution counter
         exec_count_ptr = pnlvm.helpers.get_state_ptr(builder, self, state, "execution_count")
@@ -3030,7 +3030,7 @@ def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out,
                                                     return_type=pnlvm.ir.IntType(1))
         iparams, istate, iin, iout = internal_builder.function.args[:4]
         internal_builder, is_finished = self._gen_llvm_function_internal(ctx, internal_builder,
-                                                                         iparams, istate, iin, iout)
+                                                                         iparams, istate, iin, iout, tags=tags)
         internal_builder.ret(is_finished)
 
         # Call Internal Function
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
index deccf0eefb4..74af3fcc5b0 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
@@ -1228,7 +1228,7 @@ def _gen_llvm_function(self, *, ctx:pnlvm.LLVMBuilderContext, tags:frozenset):
 
         return f
 
-    def _gen_llvm_invoke_function(self, ctx, builder, function, params, context, variable):
+    def _gen_llvm_invoke_function(self, ctx, builder, function, params, context, variable, *, tags:frozenset):
         fun = ctx.import_llvm_function(function)
         fun_in, builder = self._gen_llvm_function_input_parse(builder, ctx, fun, variable)
         fun_out = builder.alloca(fun.args[3].type.pointee)
diff --git a/psyneulink/core/components/mechanisms/processing/transfermechanism.py b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
index 309ecdbb430..01af496170f 100644
--- a/psyneulink/core/components/mechanisms/processing/transfermechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
@@ -1645,7 +1645,7 @@ def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_o
                     params, state, arg_in)
 
             mf_in, builder = self._gen_llvm_invoke_function(
-                    ctx, builder, self.integrator_function, if_params, if_state, ip_out)
+                    ctx, builder, self.integrator_function, if_params, if_state, ip_out, tags=tags)
         else:
             mf_in = ip_out
 
@@ -1654,7 +1654,7 @@ def _gen_llvm_function_internal(self, ctx, builder, params, state, arg_in, arg_o
         mf_params, builder = self._gen_llvm_param_ports_for_obj(
                 self.function, mf_param_ptr, ctx, builder, params, state, arg_in)
 
-        mf_out, builder = self._gen_llvm_invoke_function(ctx, builder, self.function, mf_params, mf_state, mf_in)
+        mf_out, builder = self._gen_llvm_invoke_function(ctx, builder, self.function, mf_params, mf_state, mf_in, tags=tags)
 
         # FIXME: Convert to runtime instead of compile time
         clip = self.parameters.clip.get()
diff --git a/psyneulink/core/llvm/execution.py b/psyneulink/core/llvm/execution.py
index 6f9bba1773a..108fd6fff72 100644
--- a/psyneulink/core/llvm/execution.py
+++ b/psyneulink/core/llvm/execution.py
@@ -151,9 +151,9 @@ def cuda_execute(self, variable):
 
 class FuncExecution(CUDAExecution):
 
-    def __init__(self, component, execution_ids=[None]):
+    def __init__(self, component, execution_ids=[None], *, tags=frozenset()):
         super().__init__()
-        self._bin_func = pnlvm.LLVMBinaryFunction.from_obj(component)
+        self._bin_func = pnlvm.LLVMBinaryFunction.from_obj(component, tags=tags)
         self._execution_contexts = [
             Context(execution_id=eid) for eid in execution_ids
         ]
diff --git a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
index 1d2559a0a71..477aa0bdc1f 100644
--- a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
@@ -836,9 +836,9 @@ def _execute(
 
         return gain_t, output_values[0], output_values[1], output_values[2]
 
-    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable):
+    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable, *, tags:frozenset):
         assert function is self.function
-        mf_out, builder = super()._gen_llvm_invoke_function(ctx, builder, function, params, state, variable)
+        mf_out, builder = super()._gen_llvm_invoke_function(ctx, builder, function, params, state, variable, tags=tags)
 
         # prepend gain type (matches output[1] type)
         gain_ty = mf_out.type.pointee.elements[1]
diff --git a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
index e0d266154b6..27aa40a77ec 100644
--- a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
+++ b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
@@ -1078,8 +1078,8 @@ def _execute(
                 return_value[self.DECISION_VARIABLE_INDEX] = threshold
             return return_value
 
-    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable):
-        mf_out, builder = super()._gen_llvm_invoke_function(ctx, builder, function, params, state, variable)
+    def _gen_llvm_invoke_function(self, ctx, builder, function, params, state, variable, *, tags:frozenset):
+        mf_out, builder = super()._gen_llvm_invoke_function(ctx, builder, function, params, state, variable, tags=tags)
 
         mech_out_ty = ctx.convert_python_struct_to_llvm_ir(self.defaults.value)
         mech_out = builder.alloca(mech_out_ty)

From aafa80b2c03e1957adbb10174b84ae03f3e7a728 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 01:22:10 -0400
Subject: [PATCH 13/25] transferfunction: Add compiled function derivatives

---
 .../components/functions/transferfunctions.py | 76 ++++++++++++++-----
 1 file changed, 57 insertions(+), 19 deletions(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index ba399c6f4b5..fb14b2a2d5d 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -68,7 +68,7 @@
 from psyneulink.core.globals.parameters import \
     Parameter, get_validator_by_function
 from psyneulink.core.globals.utilities import parameter_spec, get_global_seed, safe_len
-from psyneulink.core.globals.context import ContextFlags
+from psyneulink.core.globals.context import ContextFlags, handle_external_context
 from psyneulink.core.globals.preferences.basepreferenceset import \
     REPORT_OUTPUT_PREF, PreferenceEntry, PreferenceLevel, is_pref_set
 
@@ -122,10 +122,10 @@ def _gen_llvm_function_body(self, ctx, builder, params, state, arg_in, arg_out,
                 vo = b.gep(arg_out, [ctx.int32_ty(0), idx])
                 with pnlvm.helpers.array_ptr_loop(b, vi, "nested_transfer_loop") as args:
                     self._gen_llvm_transfer(ctx=ctx, vi=vi, vo=vo,
-                                            params=params, state=state, *args)
+                                            params=params, state=state, *args, tags=tags)
             else:
                self._gen_llvm_transfer(b, idx, ctx=ctx, vi=arg_in, vo=arg_out,
-                                       params=params, state=state)
+                                       params=params, state=state, tags=tags)
 
         return builder
 
@@ -384,7 +384,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
         slope_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, SLOPE)
@@ -393,9 +393,15 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
         slope = pnlvm.helpers.load_extract_scalar_array_one(builder, slope_ptr)
         intercept = pnlvm.helpers.load_extract_scalar_array_one(builder, intercept_ptr)
 
-        val = builder.load(ptri)
-        val = builder.fmul(val, slope)
-        val = builder.fadd(val, intercept)
+
+        if "derivative" in tags:
+            # f'(x) = m
+            val = slope
+        else:
+            # f(x) = mx + b
+            val = builder.load(ptri)
+            val = builder.fmul(val, slope)
+            val = builder.fadd(val, intercept)
 
         builder.store(val, ptro)
 
@@ -451,6 +457,7 @@ def _function(self,
 
         return self.convert_output_type(result)
 
+    @handle_external_context()
     def derivative(self, input=None, output=None, context=None):
         """
         derivative(input)
@@ -638,7 +645,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -657,8 +664,15 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
         val = builder.fmul(val, rate)
         val = builder.fadd(val, bias)
         val = builder.call(exp_f, [val])
-        val = builder.fmul(val, scale)
-        val = builder.fadd(val, offset)
+
+        if "derivative" in tags:
+            # f'(x) = s*r*e^(r*x + b)
+            val = builder.fmul(val, scale)
+            val = builder.fmul(val, rate)
+        else:
+            # f(x) = s*e^(r*x + b) + o
+            val = builder.fmul(val, scale)
+            val = builder.fadd(val, offset)
 
         builder.store(val, ptro)
 
@@ -697,6 +711,7 @@ def _function(self,
         result = scale * e**(rate * variable + bias) + offset
         return self.convert_output_type(result)
 
+    @handle_external_context()
     def derivative(self, input, output=None, context=None):
         """
         derivative(input)
@@ -908,7 +923,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -926,6 +941,7 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
 
         exp_f = ctx.get_builtin("exp", [ctx.float_ty])
         val = builder.load(ptri)
+
         val = builder.fadd(val, bias)
         val = builder.fsub(val, x_0)
         val = builder.fmul(val, gain)
@@ -935,6 +951,14 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
         val = builder.fdiv(ctx.float_ty(1), val)
         val = builder.fmul(val, scale)
 
+        if "derivative" in tags:
+            # f(x) = g * s * o * (1-o)
+            function_val = val
+            val = builder.fsub(ctx.float_ty(1), function_val)
+            val = builder.fmul(function_val, val)
+            val = builder.fmul(gain, val)
+            val = builder.fmul(scale, val)
+
         builder.store(val, ptro)
 
     def _function(self,
@@ -974,6 +998,7 @@ def _function(self,
 
         return self.convert_output_type(result)
 
+    @handle_external_context()
     def derivative(self, input=None, output=None, context=None):
         """
         derivative(input=None, output=None)
@@ -1204,7 +1229,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -1212,12 +1237,16 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
         bias_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, BIAS)
         x_0_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, X_0)
         offset_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, OFFSET)
+        scale_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, SCALE)
 
         gain = pnlvm.helpers.load_extract_scalar_array_one(builder, gain_ptr)
         bias = pnlvm.helpers.load_extract_scalar_array_one(builder, bias_ptr)
         x_0 = pnlvm.helpers.load_extract_scalar_array_one(builder, x_0_ptr)
         offset = pnlvm.helpers.load_extract_scalar_array_one(builder, offset_ptr)
         exp_f = ctx.get_builtin("exp", [ctx.float_ty])
+
+        assert "derivative" not in tags, f"Compiled derivatives are not currently supported for {self}!"
+
         exp_val = builder.load(ptri)
         exp_val = builder.fadd(exp_val, bias)
         exp_val = builder.fsub(exp_val, x_0)
@@ -1271,6 +1300,7 @@ def _function(self,
         return self.convert_output_type(result)
 
 
+    @handle_external_context()
     def derivative(self, input, output=None, context=None):
         """
         derivative(input)
@@ -1467,7 +1497,7 @@ def _function(self,
 
         return self.convert_output_type(result)
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -1482,14 +1512,20 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
         # Maxnum for some reason needs full function prototype
         max_f = ctx.get_builtin("maxnum", [ctx.float_ty])
         var = builder.load(ptri)
-        val = builder.fsub(var, bias)
-        val1 = builder.fmul(val, gain)
-        val2 = builder.fmul(val1, leak)
 
-        val = builder.call(max_f, [val1, val2])
+        if "derivative" in tags:
+            predicate = builder.fcmp_ordered('>', var, var.type(0))
+            val = builder.select(predicate, gain, builder.fmul(gain, leak))
+        else:
+            val = builder.fsub(var, bias)
+            val1 = builder.fmul(val, gain)
+            val2 = builder.fmul(val1, leak)
+
+            val = builder.call(max_f, [val1, val2])
 
         builder.store(val, ptro)
 
+    @handle_external_context()
     def derivative(self, input, output=None, context=None):
         """
         derivative(input)
@@ -1678,7 +1714,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -1751,6 +1787,7 @@ def _function(self,
 
         return self.convert_output_type(result)
 
+    @handle_external_context()
     def derivative(self, input, output=None, context=None):
         """
         derivative(input)
@@ -1961,7 +1998,7 @@ def __init__(self,
             prefs=prefs,
         )
 
-    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state):
+    def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags:frozenset):
         ptri = builder.gep(vi, [ctx.int32_ty(0), index])
         ptro = builder.gep(vo, [ctx.int32_ty(0), index])
 
@@ -2425,6 +2462,7 @@ def _function(self,
 
         return self.convert_output_type(output)
 
+    @handle_external_context()
     def derivative(self, output, input=None, context=None):
         """
         derivative(output)

From 35a63b68040e5fd668b0a4c5b3330cc54c9256f5 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 01:22:24 -0400
Subject: [PATCH 14/25] tests/transferfunction: Add function derivative tests

---
 tests/functions/test_transfer.py | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/tests/functions/test_transfer.py b/tests/functions/test_transfer.py
index 6ffd0934cb8..2fedebd5458 100644
--- a/tests/functions/test_transfer.py
+++ b/tests/functions/test_transfer.py
@@ -51,6 +51,15 @@ def gaussian_distort_helper(seed):
     (Functions.LinearMatrix, test_var.tolist(), {'matrix':test_matrix_s.tolist()}, None, np.dot(test_var, test_matrix_s)),
 ]
 
+relu_derivative_helper = lambda x : RAND1 if x > 0 else RAND1 * RAND3
+logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3))
+derivative_test_data = [
+    (Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, RAND1),
+    (Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)),
+    (Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
+    (Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, list(map(relu_derivative_helper, test_var))),
+]
+
 # use list, naming function produces ugly names
 names = [
     "LINEAR",
@@ -69,6 +78,13 @@ def gaussian_distort_helper(seed):
     "LINEAR_MATRIX TALL",
 ]
 
+derivative_names = [
+    "LINEAR_DERIVATIVE",
+    "EXPONENTIAL_DERIVATIVE",
+    "LOGISTIC_DERIVATIVE",
+    "RELU_DERIVATIVE",
+]
+
 @pytest.mark.function
 @pytest.mark.transfer_function
 @pytest.mark.benchmark
@@ -90,6 +106,26 @@ def test_execute(func, variable, params, fail, expected, benchmark, mode):
     assert np.allclose(res, expected)
     benchmark(f.function, variable)
 
+@pytest.mark.function
+@pytest.mark.transfer_function
+@pytest.mark.benchmark
+@pytest.mark.parametrize("func, variable, params, expected", derivative_test_data, ids=derivative_names)
+@pytest.mark.parametrize("mode", [
+    'Python',
+    pytest.param('LLVM', marks=pytest.mark.llvm),
+    pytest.param('PTX', marks=[pytest.mark.llvm, pytest.mark.cuda])])
+def test_execute_derivative(func, variable, params, expected, benchmark, mode):
+    f = func(default_variable=variable, **params)
+    benchmark.group = "TransferFunction " + func.componentName + " Derivative"
+    if mode == 'Python':
+        ex = f.derivative
+    elif mode == 'LLVM':
+        ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative"})).execute
+    elif mode == 'PTX':
+        ex = pnlvm.execution.FuncExecution(f, tags=frozenset({"derivative"})).cuda_execute
+    res = ex(variable)
+    assert np.allclose(res, expected)
+    benchmark(ex, variable)
 
 def test_transfer_with_costs_function():
     f = Functions.TransferWithCosts()

From 6ad655c42d71f9060802e8827e303960a1c3039f Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 16:53:15 -0400
Subject: [PATCH 15/25] pytorch/llvm: Use compiled PsyNeuLink derivatives

---
 .../library/compositions/pytorchcomponents.py | 108 +++++-------------
 .../compositions/pytorchmodelcreator.py       |  10 +-
 2 files changed, 33 insertions(+), 85 deletions(-)

diff --git a/psyneulink/library/compositions/pytorchcomponents.py b/psyneulink/library/compositions/pytorchcomponents.py
index 45b62255b78..4ef2ebdff58 100644
--- a/psyneulink/library/compositions/pytorchcomponents.py
+++ b/psyneulink/library/compositions/pytorchcomponents.py
@@ -41,80 +41,6 @@ def get_fct_param_value(param_name):
     else:
         raise Exception(f"Function {function} is not currently supported in AutodiffCompositions!")
 
-def bin_function_derivative_creator(ctx, node, context=None):
-    """
-    Returns the compiled derivative version of a PsyNeuLink node
-    TODO: Add functionality for derivitives into base PsyNeuLink Functions, and move this functionality there
-    """
-    # first try to get cached func
-    name = node.name + "_" + node.function.name + "_derivative"
-    try:
-        llvm_func = ctx.import_llvm_function(name)
-        return llvm_func
-    except Exception as e:
-        pass
-
-    # args: 1) ptr to input vector
-    #       2) sizeof vector
-    #       3) ptr to output vector
-    float_ptr_ty = ctx.float_ty.as_pointer()
-    args = [float_ptr_ty, ctx.int32_ty, float_ptr_ty]
-    builder = ctx.create_llvm_function(args, node, name)
-    llvm_func = builder.function
-
-    input_vector, dim, output_vector = llvm_func.args
-
-    def get_fct_param_value(param_name):
-        val = node.function._get_current_function_param(
-            param_name, context)
-        if val is None:
-            val = node.function._get_current_function_param(
-                param_name, None)
-        return ctx.float_ty(val[0])
-
-    if isinstance(node.function, Linear):  # f(x) = mx + b, f'(x) = m
-        slope = get_fct_param_value('slope')
-
-        def modify_value(x):
-            return slope
-
-    elif isinstance(node.function, Logistic):  # f'(x) = f(x)(1-f(x))
-        gain = pnlvm.helpers.fneg(builder, get_fct_param_value('gain'))
-        bias = get_fct_param_value('bias')
-        offset = get_fct_param_value('offset')
-        one = ctx.float_ty(1)
-        exp = ctx.import_llvm_function("__pnl_builtin_exp")
-
-        def modify_value(x):
-            arg = builder.fadd(x, bias)
-            arg = builder.fmul(gain, arg)
-            arg = builder.fadd(arg, offset)
-
-            f_x = builder.call(exp, [arg])
-            f_x = builder.fadd(one, f_x)
-            f_x = builder.fdiv(one, f_x)
-
-            ret = builder.fsub(one, f_x)
-            ret = builder.fmul(f_x, ret)
-            return ret
-
-    else:
-        raise Exception(
-            f"Function type {node.function} is currently unsupported by compiled execution!")
-
-    # do computations
-    with pnlvm.helpers.for_loop_zero_inc(builder, dim, "derivative_loop") as (builder, iterator):
-        val_ptr = builder.gep(input_vector, [iterator])
-        val = builder.load(val_ptr)
-        val = modify_value(val)
-        output_location = builder.gep(output_vector, [iterator])
-        builder.store(val, output_location)
-
-    builder.ret_void()
-
-    return llvm_func
-
-
 class PytorchMechanismWrapper():
     """
     An interpretation of a mechanism as an equivalent pytorch object
@@ -152,7 +78,7 @@ def execute(self, variable):
 
         return self.value
 
-    def _gen_execute_llvm(self, ctx, builder, state, params, mech_input, data):
+    def _gen_llvm_execute(self, ctx, builder, state, params, mech_input, data):
         mech_func = ctx.import_llvm_function(self._mechanism)
 
         mech_param = builder.gep(params, [ctx.int32_ty(0),
@@ -182,10 +108,32 @@ def log_value(self):
             self._mechanism.output_port.parameters.value._set(detached_value, self._context)
             self._mechanism.parameters.value._set(detached_value, self._context)
 
-    def _gen_execute_derivative_func_llvm(self, ctx, builder, mech_input):
-        derivative_func = ctx.import_llvm_function(
-                    bin_function_derivative_creator(ctx, self._mechanism, context=self._context).name)
-        return gen_inject_unary_function_call(ctx, builder, derivative_func, mech_input)
+    def _gen_llvm_execute_derivative_func(self, ctx, builder, state, params, arg_in):
+        # psyneulink functions expect a 2d input, where index 0 is the vector
+        fun = ctx.import_llvm_function(self._mechanism.function, tags=frozenset({"derivative"}))
+        fun_input_ty = fun.args[2].type.pointee
+
+        mech_input = builder.alloca(fun_input_ty)
+        mech_input_ptr = builder.gep(mech_input, [ctx.int32_ty(0),
+                                                  ctx.int32_ty(0)])
+        builder.store(builder.load(arg_in), mech_input_ptr)
+
+        mech_params = builder.gep(params, [ctx.int32_ty(0),
+                                           ctx.int32_ty(0),
+                                           ctx.int32_ty(self._idx)])
+
+        mech_state = builder.gep(state, [ctx.int32_ty(0),
+                                         ctx.int32_ty(0),
+                                         ctx.int32_ty(self._idx)])
+
+        f_params_ptr = pnlvm.helpers.get_param_ptr(builder, self._mechanism, mech_params, "function")
+        f_params, builder = self._mechanism._gen_llvm_param_ports_for_obj(
+                self._mechanism.function, f_params_ptr, ctx, builder, mech_params, mech_state, mech_input)
+        f_state = pnlvm.helpers.get_state_ptr(builder, self._mechanism, mech_state, "function")
+
+        output, _ = self._mechanism._gen_llvm_invoke_function(ctx, builder, self._mechanism.function, f_params, f_state, mech_input, tags=frozenset({"derivative"}))
+        return builder.gep(output, [ctx.int32_ty(0),
+                                    ctx.int32_ty(0)])
 
     def __repr__(self):
         return "PytorchWrapper for: " +self._mechanism.__repr__()
@@ -237,7 +185,7 @@ def _extract_llvm_matrix(self, ctx, builder, params):
 
         return proj_matrix
 
-    def _gen_execute_llvm(self, ctx, builder, state, params, data):
+    def _gen_llvm_execute(self, ctx, builder, state, params, data):
         proj_matrix = self._extract_llvm_matrix(ctx, builder, params)
 
         input_vec = builder.gep(data, [ctx.int32_ty(0),
diff --git a/psyneulink/library/compositions/pytorchmodelcreator.py b/psyneulink/library/compositions/pytorchmodelcreator.py
index 034f67037e3..5548f40fe1f 100644
--- a/psyneulink/library/compositions/pytorchmodelcreator.py
+++ b/psyneulink/library/compositions/pytorchmodelcreator.py
@@ -112,12 +112,12 @@ def _gen_llvm_forward_function_body(self, ctx, builder, state, params, arg_in, d
                 for (proj_idx, proj) in enumerate(component.afferents):
                     input_ptr = builder.gep(
                         variable, [ctx.int32_ty(0), ctx.int32_ty(0), ctx.int32_ty(proj_idx)])
-                    proj_output = proj._gen_execute_llvm(ctx, builder, state, params, data)
+                    proj_output = proj._gen_llvm_execute(ctx, builder, state, params, data)
                     # store in input ports struct
                     builder.store(builder.load(proj_output), input_ptr)
                     # HACK: Add to z_values struct
                     gen_inject_vec_add(ctx, builder, proj_output, z_values[component], z_values[component])
-                component._gen_execute_llvm(ctx, builder, state, params, variable, data)
+                component._gen_llvm_execute(ctx, builder, state, params, variable, data)
 
         return z_values
 
@@ -136,7 +136,7 @@ def _gen_llvm_training_backprop(self, ctx, optimizer, loss):
             if isinstance(a.type, pnlvm.ir.PointerType):
                 a.attributes.add('noalias')
 
-        context, params, data, optim_struct = llvm_func.args
+        state, params, data, optim_struct = llvm_func.args
         model_input = builder.gep(data, [ctx.int32_ty(0),
                                          ctx.int32_ty(0),
                                          ctx.int32_ty(self._composition._get_node_index(self._composition.input_CIM))])
@@ -149,7 +149,7 @@ def _gen_llvm_training_backprop(self, ctx, optimizer, loss):
 
         # 2) call forward computation
         z_values = self._gen_llvm_forward_function_body(
-            ctx, builder, context, params, model_input, data)
+            ctx, builder, state, params, model_input, data)
         
         # 3) compute errors
         loss_fn = ctx.import_llvm_function(loss)
@@ -162,7 +162,7 @@ def _gen_llvm_training_backprop(self, ctx, optimizer, loss):
                 if node._mechanism in input_nodes:
                     continue
                 node_z_value = z_values[node]
-                activation_func_derivative = node._gen_execute_derivative_func_llvm(ctx, builder, node_z_value)
+                activation_func_derivative = node._gen_llvm_execute_derivative_func(ctx, builder, state, params, node_z_value)
                 error_val = builder.alloca(z_values[node].type.pointee)
                 error_dict[node] = error_val
 

From ef3c5a301796445669d1f6fa544c6c86058a8f76 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 21 Jun 2020 18:46:59 -0400
Subject: [PATCH 16/25] pytorch/llvm: Add __all__ to pytorchllvmhelper

---
 .../library/compositions/pytorchllvmhelper.py      | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/psyneulink/library/compositions/pytorchllvmhelper.py b/psyneulink/library/compositions/pytorchllvmhelper.py
index d2ff00cd3d9..97856c497df 100644
--- a/psyneulink/library/compositions/pytorchllvmhelper.py
+++ b/psyneulink/library/compositions/pytorchllvmhelper.py
@@ -1,5 +1,19 @@
 from psyneulink.core import llvm as pnlvm
 
+__all__ = ["gen_inject_unary_function_call",
+           "gen_inject_vec_copy",
+           "gen_inject_vec_binop",
+           "gen_inject_vec_add",
+           "gen_inject_vec_sub",
+           "gen_inject_vec_hadamard",
+           "gen_inject_mat_binop",
+           "gen_inject_mat_add",
+           "gen_inject_mat_sub",
+           "gen_inject_mat_hadamard",
+           "gen_inject_mat_scalar_mult",
+           "gen_inject_vxm",
+           "gen_inject_vxm_transposed"]
+
 def gen_inject_unary_function_call(ctx, builder, unary_func, vector, output_vec=None):
     dim = len(vector.type.pointee)
     if output_vec is None:

From 79bf44fa3417049235c728a463f2e19669c7355b Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Mon, 22 Jun 2020 20:37:06 -0400
Subject: [PATCH 17/25] transferfunction: Fix exponential derivative

---
 psyneulink/core/components/functions/transferfunctions.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index fb14b2a2d5d..999d901018f 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -731,8 +731,12 @@ def derivative(self, input, output=None, context=None):
 
 
         """
-        return self._get_current_function_param(RATE, context) * input + self._get_current_function_param(BIAS, context)
+        from math import e
+        rate = self._get_current_function_param(RATE, context)
+        scale = self._get_current_function_param(SCALE, context)
+        bias = self._get_current_function_param(BIAS, context)
 
+        return rate * scale * e**(rate * input + bias)
 
 # **********************************************************************************************************************
 #                                                   Logistic

From b787767ada083ea73a48da553307f505c88e2a66 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sat, 11 Jul 2020 16:43:48 -0400
Subject: [PATCH 18/25] transferfunctions/tanh: Remove dead code

---
 psyneulink/core/components/functions/transferfunctions.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index 999d901018f..b914bd8f004 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -1241,7 +1241,6 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags
         bias_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, BIAS)
         x_0_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, X_0)
         offset_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, OFFSET)
-        scale_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, SCALE)
 
         gain = pnlvm.helpers.load_extract_scalar_array_one(builder, gain_ptr)
         bias = pnlvm.helpers.load_extract_scalar_array_one(builder, bias_ptr)

From 8b273e1aaec067e91e335359ecce186e69f2c504 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 12 Jul 2020 05:10:39 -0400
Subject: [PATCH 19/25] transferfunction/tanh: Add multiplicative scale param

---
 psyneulink/core/components/functions/transferfunctions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index b914bd8f004..58577c63b94 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -1073,7 +1073,7 @@ class Tanh(TransferFunction):  # -----------------------------------------------
 
     .. math::
 
-        \\frac{1 - e^{-2(gain*(variable+bias-x\\_0)+offset)}}{1 + e^{-2(gain*(variable+bias-x\\_0)+offset)}}
+        \\scale*frac{1 - e^{-2(gain*(variable+bias-x\\_0)+offset)}}{1 + e^{-2(gain*(variable+bias-x\\_0)+offset)}}
 
     .. note::
 
@@ -1292,13 +1292,14 @@ def _function(self,
         bias = self._get_current_function_param(BIAS, context)
         x_0 = self._get_current_function_param(X_0, context)
         offset = self._get_current_function_param(OFFSET, context)
+        scale = self._get_current_function_param(SCALE, context)
 
         # The following probably doesn't work with autograd (https://github.com/HIPS/autograd/issues/416)
         #   (since np.exp doesn't work)
         # result = 1. / (1 + np.tanh(-gain * (variable - bias) + offset))
         from math import e
         exponent = -2 * (gain * (variable + bias - x_0) + offset)
-        result = (1 - e**exponent)/ (1 + e**exponent)
+        result = scale * (1 - e**exponent)/ (1 + e**exponent)
 
         return self.convert_output_type(result)
 

From 1ac7b862351bb5f0012ac933efa73d0c5c4df6ae Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 12 Jul 2020 05:11:23 -0400
Subject: [PATCH 20/25] transferfunctions/tanh: Cleanup derivative

---
 psyneulink/core/components/functions/transferfunctions.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index 58577c63b94..39acbc43fdb 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -1328,8 +1328,13 @@ def derivative(self, input, output=None, context=None):
         offset = self._get_current_function_param(OFFSET, context)
         scale = self._get_current_function_param(SCALE, context)
 
+        exponent = -2 * (gain * (input + bias - x_0) + offset)
+        mult = -2 * gain * scale
         from math import e
-        return gain * scale / ((1 + e**(-2 * (gain * (input + bias - x_0) + offset))) / (2 * e**(-gain * (input + bias - x_0) + offset)))**2
+        numerator = -2 * e**(exponent)
+        denominator = (1 + e**(exponent))**2
+
+        return mult * (numerator / denominator)
 
 
 # **********************************************************************************************************************

From 3e530dc2f5551abf049c901a5742dd8442de988f Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 12 Jul 2020 05:11:38 -0400
Subject: [PATCH 21/25] transferfunction/tanh: Add compiled derivative

---
 .../components/functions/transferfunctions.py | 47 ++++++++++++++-----
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index 39acbc43fdb..b20ce505d7a 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -1241,26 +1241,47 @@ def _gen_llvm_transfer(self, builder, index, ctx, vi, vo, params, state, *, tags
         bias_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, BIAS)
         x_0_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, X_0)
         offset_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, OFFSET)
+        scale_ptr = pnlvm.helpers.get_param_ptr(builder, self, params, SCALE)
 
         gain = pnlvm.helpers.load_extract_scalar_array_one(builder, gain_ptr)
         bias = pnlvm.helpers.load_extract_scalar_array_one(builder, bias_ptr)
         x_0 = pnlvm.helpers.load_extract_scalar_array_one(builder, x_0_ptr)
         offset = pnlvm.helpers.load_extract_scalar_array_one(builder, offset_ptr)
-        exp_f = ctx.get_builtin("exp", [ctx.float_ty])
-
-        assert "derivative" not in tags, f"Compiled derivatives are not currently supported for {self}!"
+        scale = pnlvm.helpers.load_extract_scalar_array_one(builder, scale_ptr)
 
-        exp_val = builder.load(ptri)
-        exp_val = builder.fadd(exp_val, bias)
-        exp_val = builder.fsub(exp_val, x_0)
-        exp_val = builder.fmul(exp_val, gain)
-        exp_val = builder.fadd(exp_val, offset)
-        exp_val = builder.fmul(exp_val.type(-2), exp_val)
+        variable = builder.load(ptri)
+        exp_f = ctx.get_builtin("exp", [ctx.float_ty])
 
-        val = builder.call(exp_f, [exp_val])
-        val1 = builder.fsub(val.type(1), val)
-        val2 = builder.fadd(val.type(1), val)
-        val = builder.fdiv(val1, val2)
+        if "derivative" in tags:
+            exponent = builder.fadd(variable, bias)
+            exponent = builder.fsub(exponent, x_0)
+            exponent = builder.fmul(gain, exponent)
+            exponent = builder.fadd(exponent, offset)
+            exponent = builder.fmul(exponent.type(-2), exponent)
+
+            mult = builder.fmul(gain, scale)
+            mult = builder.fmul(mult.type(-2), mult)
+
+            exp_val = builder.call(exp_f, [exponent])
+            numerator = builder.fmul(exp_val.type(-2), exp_val)
+
+            denominator = builder.fadd(exp_val.type(1), exp_val)
+            denominator = builder.fmul(denominator, denominator)
+            
+            val = builder.fdiv(numerator, denominator)
+            val = builder.fmul(val, mult)
+        else:
+            exp_val = builder.fadd(variable, bias)
+            exp_val = builder.fsub(exp_val, x_0)
+            exp_val = builder.fmul(exp_val, gain)
+            exp_val = builder.fadd(exp_val, offset)
+            exp_val = builder.fmul(exp_val.type(-2), exp_val)
+
+            val = builder.call(exp_f, [exp_val])
+            val1 = builder.fsub(val.type(1), val)
+            val2 = builder.fadd(val.type(1), val)
+            val = builder.fdiv(val1, val2)
+            val = builder.fmul(val, scale)
 
         builder.store(val, ptro)
 

From 31e95705a7b05d1ce92170af80cc61d4120fcdac Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 12 Jul 2020 05:11:51 -0400
Subject: [PATCH 22/25] tests/transferfunction: Add tanh derivative test

---
 tests/functions/test_transfer.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/functions/test_transfer.py b/tests/functions/test_transfer.py
index 2fedebd5458..ef68a2e6415 100644
--- a/tests/functions/test_transfer.py
+++ b/tests/functions/test_transfer.py
@@ -53,11 +53,14 @@ def gaussian_distort_helper(seed):
 
 relu_derivative_helper = lambda x : RAND1 if x > 0 else RAND1 * RAND3
 logistic_helper = RAND4 / (1 + np.exp(-(RAND1 * (test_var - RAND2)) + RAND3))
+tanh_derivative_helper = (RAND1 * (test_var + RAND2) + RAND3)
+tanh_derivative_helper = (1 - np.tanh(tanh_derivative_helper)**2) * RAND4 * RAND1
 derivative_test_data = [
     (Functions.Linear, test_var, {'slope':RAND1, 'intercept':RAND2}, RAND1),
     (Functions.Exponential, test_var, {'scale':RAND1, 'rate':RAND2}, RAND1 * RAND2 * np.exp(RAND2 * test_var)),
     (Functions.Logistic, test_var, {'gain':RAND1, 'x_0':RAND2, 'offset':RAND3, 'scale':RAND4}, RAND1 * RAND4 * logistic_helper * (1 - logistic_helper)),
     (Functions.ReLU, test_var, {'gain':RAND1, 'bias':RAND2, 'leak':RAND3}, list(map(relu_derivative_helper, test_var))),
+    (Functions.Tanh, test_var, {'gain':RAND1, 'bias':RAND2, 'offset':RAND3, 'scale':RAND4}, tanh_derivative_helper),
 ]
 
 # use list, naming function produces ugly names
@@ -83,6 +86,7 @@ def gaussian_distort_helper(seed):
     "EXPONENTIAL_DERIVATIVE",
     "LOGISTIC_DERIVATIVE",
     "RELU_DERIVATIVE",
+    "TANH_DERIVATIVE",
 ]
 
 @pytest.mark.function

From 3ed624137b9d856b484f3858d2a1063bbb568669 Mon Sep 17 00:00:00 2001
From: "Samyak K. G" <samyak.k.gupta@gmail.com>
Date: Sun, 12 Jul 2020 22:53:06 -0400
Subject: [PATCH 23/25] tests/transferfunction: Use numpy tanh as test baseline

---
 tests/functions/test_transfer.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/functions/test_transfer.py b/tests/functions/test_transfer.py
index ef68a2e6415..61446dcb245 100644
--- a/tests/functions/test_transfer.py
+++ b/tests/functions/test_transfer.py
@@ -21,8 +21,8 @@
 softmax_helper = softmax_helper - np.max(softmax_helper)
 softmax_helper = np.exp(softmax_helper) / np.sum(np.exp(softmax_helper))
 
-tanh_helper = -2 * (RAND1 * (test_var + RAND2 - RAND3) + RAND4)
-tanh_helper = (1 - e**tanh_helper)/ (1 + e**tanh_helper)
+tanh_helper = (RAND1 * (test_var + RAND2 - RAND3) + RAND4)
+tanh_helper = np.tanh(tanh_helper)
 
 gaussian_helper = e**(-(test_var - RAND2)**2 / (2 * RAND1**2)) / sqrt(2 * pi * RAND1)
 gaussian_helper = RAND3 * gaussian_helper + RAND4

From 3f11d131e4948f98d68bd4ffc034a749dae38322 Mon Sep 17 00:00:00 2001
From: Katherine Mantel <kmantel@princeton.edu>
Date: Wed, 8 Jul 2020 21:11:47 -0400
Subject: [PATCH 24/25] treewide: specify parameter defaults in Parameters, not
 __init__

---
 psyneulink/core/components/component.py       |   4 -
 .../functions/combinationfunctions.py         |  34 ++--
 .../functions/distributionfunctions.py        |  48 ++---
 .../core/components/functions/function.py     |   2 +-
 .../functions/interfacefunctions.py           |   2 +-
 .../components/functions/learningfunctions.py |  32 ++--
 .../functions/objectivefunctions.py           |  24 +--
 .../functions/optimizationfunctions.py        |  32 ++--
 .../functions/selectionfunctions.py           |   6 +-
 .../statefulfunctions/integratorfunctions.py  | 166 +++++++++---------
 .../statefulfunctions/memoryfunctions.py      |  32 ++--
 .../statefulfunctions/statefulfunction.py     |  10 +-
 .../components/functions/transferfunctions.py |  92 +++++-----
 .../functions/userdefinedfunction.py          |   2 +-
 .../modulatory/control/controlmechanism.py    |   6 +-
 .../control/gating/gatingmechanism.py         |   4 +-
 .../control/optimizationcontrolmechanism.py   |   8 +-
 .../modulatory/learning/learningmechanism.py  |   6 +-
 .../compositioninterfacemechanism.py          |   2 +-
 .../processing/objectivemechanism.py          |   2 +-
 .../processing/processingmechanism.py         |   2 +-
 .../processing/transfermechanism.py           |  22 +--
 psyneulink/core/components/ports/inputport.py |   2 +-
 .../ports/modulatorysignals/controlsignal.py  |  12 +-
 .../ports/modulatorysignals/learningsignal.py |   2 +-
 .../core/components/ports/parameterport.py    |   4 +-
 .../modulatory/controlprojection.py           |   2 +-
 .../modulatory/learningprojection.py          |   4 +-
 .../projections/pathway/mappingprojection.py  |   2 +-
 psyneulink/core/compositions/composition.py   |   8 +-
 .../control/agt/agtcontrolmechanism.py        |   4 +-
 .../control/agt/lccontrolmechanism.py         |   8 +-
 .../autoassociativelearningmechanism.py       |   6 +-
 .../learning/kohonenlearningmechanism.py      |   6 +-
 .../mechanisms/processing/integrator/ddm.py   |  11 +-
 .../integrator/episodicmemorymechanism.py     |   8 +-
 .../mechanisms/processing/leabramechanism.py  |  39 ++--
 .../objective/comparatormechanism.py          |   2 +-
 .../objective/predictionerrormechanism.py     |   8 +-
 .../transfer/contrastivehebbianmechanism.py   |  37 ++--
 .../processing/transfer/kohonenmechanism.py   |  18 +-
 .../processing/transfer/kwtamechanism.py      |  47 ++---
 .../processing/transfer/lcamechanism.py       |  58 +++---
 .../transfer/recurrenttransfermechanism.py    |  35 ++--
 .../pathway/autoassociativeprojection.py      |   4 +-
 .../pathway/maskedmappingprojection.py        |   6 +-
 .../library/compositions/compositionrunner.py |   2 +-
 .../library/compositions/regressioncfa.py     |   2 +-
 48 files changed, 447 insertions(+), 428 deletions(-)

diff --git a/psyneulink/core/components/component.py b/psyneulink/core/components/component.py
index 0379eff019f..52637896f2f 100644
--- a/psyneulink/core/components/component.py
+++ b/psyneulink/core/components/component.py
@@ -3370,10 +3370,6 @@ def _variable_shape_flexibility(self):
     def _variable_shape_flexibility(self, value):
         self.__variable_shape_flexibility = value
 
-    @classmethod
-    def get_constructor_defaults(cls):
-        return {arg_name: arg.default for (arg_name, arg) in inspect.signature(cls.__init__).parameters.items()}
-
     @property
     def class_parameters(self):
         return self.__class__.parameters
diff --git a/psyneulink/core/components/functions/combinationfunctions.py b/psyneulink/core/components/functions/combinationfunctions.py
index 64b1773138e..0a0fae9e70b 100644
--- a/psyneulink/core/components/functions/combinationfunctions.py
+++ b/psyneulink/core/components/functions/combinationfunctions.py
@@ -197,11 +197,11 @@ class Parameters(CombinationFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 scale: parameter_spec = 1.0,
-                 offset: parameter_spec = 0.0,
+                 scale: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -416,12 +416,12 @@ class Parameters(CombinationFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 scale: parameter_spec = 1.0,
-                 offset: parameter_spec = 0.0,
+                 scale: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  arrangement:tc.optional(tc.any(int, tuple, list))=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -716,12 +716,12 @@ def __init__(self,
                  weights=None,
                  exponents=None,
                  default_variable=None,
-                 operation: tc.enum(SUM, PRODUCT) = SUM,
-                 scale: parameter_spec = 1.0,
-                 offset: parameter_spec = 0.0,
+                 operation: tc.optional(tc.enum(SUM, PRODUCT)) = None,
+                 scale: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1151,12 +1151,12 @@ def __init__(self,
                  # exponents: tc.optional(parameter_spec)=None,
                  weights=None,
                  exponents=None,
-                 operation: tc.enum(SUM, PRODUCT) = SUM,
+                 operation: tc.optional(tc.enum(SUM, PRODUCT)) = None,
                  scale=None,
                  offset=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1675,12 +1675,12 @@ def __init__(self,
                  # exponents:tc.optional(parameter_spec)=None,
                  weights=None,
                  exponents=None,
-                 operation: tc.enum(SUM, PRODUCT) = SUM,
+                 operation: tc.optional(tc.enum(SUM, PRODUCT)) = None,
                  scale=None,
                  offset=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1930,10 +1930,10 @@ class Parameters(CombinationFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 gamma: tc.optional(float) = 1.0,
+                 gamma: tc.optional(tc.optional(float)) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1943,8 +1943,6 @@ def __init__(self,
             prefs=prefs,
         )
 
-        self.gamma = gamma
-
     def _validate_variable(self, variable, context=None):
         """
         Insure that all items of variable are numeric
diff --git a/psyneulink/core/components/functions/distributionfunctions.py b/psyneulink/core/components/functions/distributionfunctions.py
index 2ca91ce86ee..1de497abb38 100644
--- a/psyneulink/core/components/functions/distributionfunctions.py
+++ b/psyneulink/core/components/functions/distributionfunctions.py
@@ -154,12 +154,12 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 mean=0.0,
-                 standard_deviation=1.0,
+                 mean=None,
+                 standard_deviation=None,
                  params=None,
                  owner=None,
                  seed=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -179,7 +179,7 @@ def __init__(self,
     def _validate_params(self, request_set, target_set=None, context=None):
         super()._validate_params(request_set=request_set, target_set=target_set, context=context)
 
-        if STANDARD_DEVIATION in target_set:
+        if STANDARD_DEVIATION in target_set and target_set[STANDARD_DEVIATION] is not None:
             if target_set[STANDARD_DEVIATION] < 0.0:
                 raise FunctionError("The standard_deviation parameter ({}) of {} must be greater than zero.".
                                     format(target_set[STANDARD_DEVIATION], self.name))
@@ -340,12 +340,12 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 mean=0.0,
-                 standard_deviation=1.0,
+                 mean=None,
+                 standard_deviation=None,
                  params=None,
                  owner=None,
                  seed=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -470,11 +470,11 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 beta=1.0,
+                 beta=None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -599,12 +599,12 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 low=0.0,
-                 high=1.0,
+                 low=None,
+                 high=None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -738,12 +738,12 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 scale=1.0,
-                 dist_shape=1.0,
+                 scale=None,
+                 dist_shape=None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         if seed is None:
             seed = get_global_seed()
 
@@ -874,12 +874,12 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 scale=1.0,
-                 mean=1.0,
+                 scale=None,
+                 mean=None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         if seed is None:
             seed = get_global_seed()
 
@@ -1113,14 +1113,14 @@ class Parameters(DistributionFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 drift_rate: parameter_spec = 1.0,
-                 starting_point: parameter_spec = 0.0,
-                 threshold: parameter_spec = 1.0,
-                 noise: parameter_spec = 0.5,
-                 t0: parameter_spec = .200,
+                 drift_rate: tc.optional(parameter_spec) = None,
+                 starting_point: tc.optional(parameter_spec) = None,
+                 threshold: tc.optional(parameter_spec) = None,
+                 noise: tc.optional(parameter_spec) = None,
+                 t0: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  shenhav_et_al_compat_mode=False):
 
         self._shenhav_et_al_compat_mode = shenhav_et_al_compat_mode
diff --git a/psyneulink/core/components/functions/function.py b/psyneulink/core/components/functions/function.py
index 77ede13772e..6814c544c1c 100644
--- a/psyneulink/core/components/functions/function.py
+++ b/psyneulink/core/components/functions/function.py
@@ -822,7 +822,7 @@ def __init__(self,
                  pertincacity=Manner.CONTRARIAN,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
diff --git a/psyneulink/core/components/functions/interfacefunctions.py b/psyneulink/core/components/functions/interfacefunctions.py
index e4e4f8baaba..66e055d724b 100644
--- a/psyneulink/core/components/functions/interfacefunctions.py
+++ b/psyneulink/core/components/functions/interfacefunctions.py
@@ -122,7 +122,7 @@ def __init__(self,
                  corresponding_input_port=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
diff --git a/psyneulink/core/components/functions/learningfunctions.py b/psyneulink/core/components/functions/learningfunctions.py
index 16b8bd2a76c..68edbecbd06 100644
--- a/psyneulink/core/components/functions/learningfunctions.py
+++ b/psyneulink/core/components/functions/learningfunctions.py
@@ -435,14 +435,14 @@ class Parameters(LearningFunction.Parameters):
 
     def __init__(self,
                  default_variable=None,
-                 mu_0=0,
-                 sigma_0=1,
-                 gamma_shape_0=1,
-                 gamma_size_0=1,
+                 mu_0=None,
+                 sigma_0=None,
+                 gamma_shape_0=None,
+                 gamma_size_0=None,
                  params=None,
                  owner=None,
                  seed=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         self.user_specified_default_variable = default_variable
 
@@ -761,12 +761,12 @@ def _validate_distance_function(self, distance_function):
 
     def __init__(self,
                  default_variable=None,
-                 # learning_rate: tc.optional(parameter_spec) = None,
+                 # learning_rate: tc.optional(tc.optional(parameter_spec)) = None,
                  learning_rate=None,
-                 distance_function:tc.any(tc.enum(GAUSSIAN, LINEAR, EXPONENTIAL), is_function_type)=GAUSSIAN,
+                 distance_function:tc.any(tc.enum(GAUSSIAN, LINEAR, EXPONENTIAL), is_function_type)=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1028,7 +1028,7 @@ def __init__(self,
                  learning_rate=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1253,11 +1253,11 @@ class Parameters(LearningFunction.Parameters):
 
     def __init__(self,
                  default_variable=None,
-                 # learning_rate: tc.optional(parameter_spec) = None,
+                 # learning_rate: tc.optional(tc.optional(parameter_spec)) = None,
                  learning_rate=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1548,11 +1548,11 @@ class Parameters(LearningFunction.Parameters):
 
     def __init__(self,
                  default_variable=None,
-                 # learning_rate: tc.optional(parameter_spec) = None,
+                 # learning_rate: tc.optional(tc.optional(parameter_spec)) = None,
                  learning_rate=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1900,13 +1900,13 @@ class Parameters(LearningFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 activation_derivative_fct: tc.optional(tc.any(types.FunctionType, types.MethodType)) = None,
-                 # learning_rate: tc.optional(parameter_spec) = None,
+                 activation_derivative_fct: tc.optional(tc.optional(tc.any(types.FunctionType, types.MethodType))) = None,
+                 # learning_rate: tc.optional(tc.optional(parameter_spec)) = None,
                  learning_rate=None,
                  loss_function=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         error_matrix = np.zeros((len(default_variable[LEARNING_ACTIVATION_OUTPUT]),
                                  len(default_variable[LEARNING_ERROR_OUTPUT])))
diff --git a/psyneulink/core/components/functions/objectivefunctions.py b/psyneulink/core/components/functions/objectivefunctions.py
index cb5b6d8ab22..aa09a2ab03b 100644
--- a/psyneulink/core/components/functions/objectivefunctions.py
+++ b/psyneulink/core/components/functions/objectivefunctions.py
@@ -212,14 +212,14 @@ class Parameters(ObjectiveFunction.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 matrix=HOLLOW_MATRIX,
-                 # metric:is_distance_metric=ENERGY,
-                 metric: tc.any(tc.enum(ENERGY, ENTROPY), is_distance_metric) = ENERGY,
-                 transfer_fct: tc.optional(tc.any(types.FunctionType, types.MethodType)) = None,
-                 normalize: bool = False,
+                 matrix=None,
+                 # metric:is_distance_metric=None,
+                 metric: tc.optional(tc.any(tc.enum(ENERGY, ENTROPY), is_distance_metric)) = None,
+                 transfer_fct: tc.optional(tc.optional(tc.any(types.FunctionType, types.MethodType))) = None,
+                 normalize: tc.optional(bool) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if size:
             if default_variable is None:
@@ -560,9 +560,9 @@ class Energy(Stability):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 normalize:bool=False,
+                 normalize:bool=None,
                  # transfer_fct=None,
-                 matrix=HOLLOW_MATRIX,
+                 matrix=None,
                  params=None,
                  owner=None,
                  prefs=None):
@@ -668,7 +668,7 @@ class Entropy(Stability):
 
     def __init__(self,
                  default_variable=None,
-                 normalize:bool=False,
+                 normalize:bool=None,
                  transfer_fct=None,
                  params=None,
                  owner=None,
@@ -781,11 +781,11 @@ class Parameters(ObjectiveFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 metric: DistanceMetrics._is_metric = DIFFERENCE,
-                 normalize: bool = False,
+                 metric: tc.optional(DistanceMetrics._is_metric) = None,
+                 normalize: tc.optional(bool) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             metric=metric,
diff --git a/psyneulink/core/components/functions/optimizationfunctions.py b/psyneulink/core/components/functions/optimizationfunctions.py
index 802b15d70f6..06247f6ba1a 100644
--- a/psyneulink/core/components/functions/optimizationfunctions.py
+++ b/psyneulink/core/components/functions/optimizationfunctions.py
@@ -345,8 +345,8 @@ def __init__(
         search_function:tc.optional(is_function_type)=None,
         search_space=None,
         search_termination_function:tc.optional(is_function_type)=None,
-        save_samples:tc.optional(bool)=False,
-        save_values:tc.optional(bool)=False,
+        save_samples:tc.optional(bool)=None,
+        save_values:tc.optional(bool)=None,
         max_iterations:tc.optional(int)=None,
         params=None,
         owner=None,
@@ -857,15 +857,15 @@ def __init__(self,
                  default_variable=None,
                  objective_function:tc.optional(is_function_type)=None,
                  gradient_function:tc.optional(is_function_type)=None,
-                 direction:tc.optional(tc.enum(ASCENT, DESCENT))=ASCENT,
+                 direction:tc.optional(tc.enum(ASCENT, DESCENT))=None,
                  search_space=None,
-                 step_size:tc.optional(tc.any(int, float))=1.0,
+                 step_size:tc.optional(tc.any(int, float))=None,
                  annealing_function:tc.optional(is_function_type)=None,
-                 convergence_criterion:tc.optional(tc.enum(VARIABLE, VALUE))=VALUE,
-                 convergence_threshold:tc.optional(tc.any(int, float))=.001,
-                 max_iterations:tc.optional(int)=1000,
-                 save_samples:tc.optional(bool)=False,
-                 save_values:tc.optional(bool)=False,
+                 convergence_criterion:tc.optional(tc.enum(VARIABLE, VALUE))=None,
+                 convergence_threshold:tc.optional(tc.any(int, float))=None,
+                 max_iterations:tc.optional(int)=None,
+                 save_samples:tc.optional(bool)=None,
+                 save_values:tc.optional(bool)=None,
                  params=None,
                  owner=None,
                  prefs=None):
@@ -1254,10 +1254,10 @@ def __init__(self,
                  default_variable=None,
                  objective_function:tc.optional(is_function_type)=None,
                  search_space=None,
-                 direction:tc.optional(tc.enum(MAXIMIZE, MINIMIZE))=MAXIMIZE,
-                 save_values:tc.optional(bool)=False,
+                 direction:tc.optional(tc.enum(MAXIMIZE, MINIMIZE))=None,
+                 save_values:tc.optional(bool)=None,
                  # tolerance=0.,
-                 select_randomly_from_optimal_values=False,
+                 select_randomly_from_optimal_values=None,
                  seed=None,
                  params=None,
                  owner=None,
@@ -1288,7 +1288,7 @@ def __init__(self,
             search_space=search_space,
             select_randomly_from_optimal_values=select_randomly_from_optimal_values,
             save_samples=True,
-            save_values=True,
+            save_values=save_values,
             random_state=random_state,
             direction=direction,
             params=params,
@@ -2013,8 +2013,8 @@ def __init__(self,
                  default_variable=None,
                  objective_function:tc.optional(is_function_type)=None,
                  search_space=None,
-                 direction:tc.optional(tc.enum(MAXIMIZE, MINIMIZE))=MAXIMIZE,
-                 save_values:tc.optional(bool)=False,
+                 direction:tc.optional(tc.enum(MAXIMIZE, MINIMIZE))=None,
+                 save_values:tc.optional(bool)=None,
                  params=None,
                  owner=None,
                  prefs=None,
@@ -2033,7 +2033,7 @@ def __init__(self,
             search_space=search_space,
             search_termination_function=search_termination_function,
             save_samples=True,
-            save_values=True,
+            save_values=save_values,
             params=params,
             owner=owner,
             prefs=prefs,
diff --git a/psyneulink/core/components/functions/selectionfunctions.py b/psyneulink/core/components/functions/selectionfunctions.py
index bb630090068..9ab2cd66d7f 100644
--- a/psyneulink/core/components/functions/selectionfunctions.py
+++ b/psyneulink/core/components/functions/selectionfunctions.py
@@ -203,13 +203,13 @@ def _validate_mode(self, mode):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 mode: tc.enum(MAX_VAL, MAX_ABS_VAL, MAX_INDICATOR, MAX_ABS_INDICATOR,
+                 mode: tc.optional(tc.enum(MAX_VAL, MAX_ABS_VAL, MAX_INDICATOR, MAX_ABS_INDICATOR,
                                MIN_VAL, MIN_ABS_VAL, MIN_INDICATOR, MIN_ABS_INDICATOR,
-                               PROB, PROB_INDICATOR)=MAX_VAL,
+                               PROB, PROB_INDICATOR))=None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
diff --git a/psyneulink/core/components/functions/statefulfunctions/integratorfunctions.py b/psyneulink/core/components/functions/statefulfunctions/integratorfunctions.py
index c33c30c7d07..1f34c01bc29 100644
--- a/psyneulink/core/components/functions/statefulfunctions/integratorfunctions.py
+++ b/psyneulink/core/components/functions/statefulfunctions/integratorfunctions.py
@@ -226,12 +226,12 @@ class Parameters(StatefulFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate=1.0,
-                 noise=0.0,
+                 rate=None,
+                 noise=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  context=None,
                  **kwargs):
 
@@ -559,11 +559,11 @@ def __init__(self,
                  default_variable=None,
                  rate=None,
                  increment=None,
-                 noise=0.0,
+                 noise=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -809,13 +809,13 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate: parameter_spec = 1.0,
-                 noise=0.0,
-                 offset=0.0,
+                 rate: tc.optional(parameter_spec) = None,
+                 noise=None,
+                 offset=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             rate=rate,
@@ -1043,13 +1043,13 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate=1.0,
-                 noise=0.0,
-                 offset=0.0,
+                 rate=None,
+                 noise=None,
+                 offset=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -1556,19 +1556,19 @@ def __init__(self,
                  # rate: parameter_spec = 0.5,
                  # noise=0.0,
                  initializer=None,
-                 initial_short_term_avg=0.0,
-                 initial_long_term_avg=0.0,
-                 short_term_gain=1.0,
-                 long_term_gain=1.0,
-                 short_term_bias=0.0,
-                 long_term_bias=0.0,
-                 short_term_rate=0.9,
-                 long_term_rate=0.1,
-                 operation=PRODUCT,
-                 offset=0.0,
-                 params: tc.optional(dict) = None,
+                 initial_short_term_avg=None,
+                 initial_long_term_avg=None,
+                 short_term_gain=None,
+                 long_term_gain=None,
+                 short_term_bias=None,
+                 long_term_bias=None,
+                 short_term_rate=None,
+                 long_term_rate=None,
+                 operation=None,
+                 offset=None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if not hasattr(self, "initializers"):
             self.initializers = ["initial_long_term_avg", "initial_short_term_avg"]
@@ -1655,7 +1655,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
             #         raise FunctionError(
             #             "The rate parameter ({}) (or all of its elements) of {} must be between 0.0 and "
             #             "1.0 when integration_type is set to ADAPTIVE.".format(target_set[RATE], self.name))
-            if not all_within_range(target_set[RATE], 0, 1):
+            if target_set[RATE] is not None and not all_within_range(target_set[RATE], 0, 1):
                 raise FunctionError("The rate parameter ({}) (or all of its elements) of {} "
                                     "must be in the interval [0,1]".format(target_set[RATE], self.name))
 
@@ -1669,7 +1669,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
             #     self._validate_initializer(target_set[INITIALIZER])
 
         if OPERATION in target_set:
-            if not target_set[OPERATION] in OPERATIONS:
+            if target_set[OPERATION] is not None and not target_set[OPERATION] in OPERATIONS:
                 raise FunctionError("\'{}\' arg for {} must be one of the following: {}".
                                     format(OPERATION, self.name, OPERATIONS))
 
@@ -2006,16 +2006,16 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate: parameter_spec = 1.0,
-                 decay: parameter_spec = 0.0,
-                 rest: parameter_spec = 0.0,
-                 max_val: parameter_spec = 1.0,
-                 min_val: parameter_spec = -1.0,
-                 noise=0.0,
+                 rate: tc.optional(parameter_spec) = None,
+                 decay: tc.optional(parameter_spec) = None,
+                 rest: tc.optional(parameter_spec) = None,
+                 max_val: tc.optional(parameter_spec) = None,
+                 min_val: tc.optional(parameter_spec) = None,
+                 noise=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  # **kwargs
                  ):
 
@@ -2403,17 +2403,17 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate: parameter_spec = 1.0,
-                 noise=0.0,
-                 offset: parameter_spec = 0.0,
-                 starting_point=0.0,
-                 threshold=100.0,
-                 time_step_size=1.0,
+                 rate: tc.optional(parameter_spec) = None,
+                 noise=None,
+                 offset: tc.optional(parameter_spec) = None,
+                 starting_point=None,
+                 threshold=None,
+                 time_step_size=None,
                  initializer=None,
                  seed=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -2445,7 +2445,7 @@ def __init__(self,
         self.has_initializers = True
 
     def _validate_noise(self, noise):
-        if not isinstance(noise, float) and not(isinstance(noise, np.ndarray) and np.issubdtype(noise.dtype, np.floating)):
+        if noise is not None and not isinstance(noise, float) and not(isinstance(noise, np.ndarray) and np.issubdtype(noise.dtype, np.floating)):
             raise FunctionError(
                 "Invalid noise parameter for {}: {}. DriftDiffusionIntegrator requires noise parameter to be a float or float array."
                 " Noise parameter is used to construct the standard DDM noise distribution".format(self.name, type(noise)))
@@ -2820,17 +2820,17 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate: parameter_spec = 1.0,
-                 decay=1.0,
-                 noise=0.0,
-                 offset: parameter_spec = 0.0,
-                 starting_point=0.0,
-                 time_step_size=1.0,
+                 rate: tc.optional(parameter_spec) = None,
+                 decay=None,
+                 noise=None,
+                 offset: tc.optional(parameter_spec) = None,
+                 starting_point=None,
+                 time_step_size=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  seed=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if not hasattr(self, "initializers"):
             self.initializers = ["initializer", "starting_point"]
@@ -2863,7 +2863,7 @@ def __init__(self,
         self.has_initializers = True
 
     def _validate_noise(self, noise):
-        if not isinstance(noise, float):
+        if noise is not None and not isinstance(noise, float):
             raise FunctionError(
                 "Invalid noise parameter for {}. OrnsteinUhlenbeckIntegrator requires noise parameter to be a float. "
                 "Noise parameter is used to construct the standard DDM noise distribution".format(self.name))
@@ -3116,14 +3116,14 @@ class Parameters(IntegratorFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 leak: parameter_spec = 1.0,
-                 noise=0.0,
+                 leak: tc.optional(parameter_spec) = None,
+                 noise=None,
                  offset=None,
-                 time_step_size=0.1,
+                 time_step_size=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
 
         # IMPLEMENTATION NOTE:  For backward compatibility of LeakyFun in tests/functions/test_integrator.py
@@ -3794,28 +3794,28 @@ def __init__(self,
                  default_variable=None,
                  # scale=1.0,
                  # offset=0.0,
-                 initial_w=0.0,
-                 initial_v=0.0,
-                 a_v=-1 / 3,
-                 b_v=0.0,
-                 c_v=1.0,
-                 d_v=0.0,
-                 e_v=-1.0,
-                 f_v=1.0,
-                 time_constant_v=1.0,
-                 a_w=1.0,
-                 b_w=-0.8,
-                 c_w=0.7,
-                 time_constant_w=12.5,
-                 t_0=0.0,
-                 threshold=-1.0,
-                 time_step_size=0.05,
-                 mode=1.0,
-                 uncorrelated_activity=0.0,
-                 integration_method="RK4",
-                 params: tc.optional(dict) = None,
+                 initial_w=None,
+                 initial_v=None,
+                 a_v=None,
+                 b_v=None,
+                 c_v=None,
+                 d_v=None,
+                 e_v=None,
+                 f_v=None,
+                 time_constant_v=None,
+                 a_w=None,
+                 b_w=None,
+                 c_w=None,
+                 time_constant_w=None,
+                 t_0=None,
+                 threshold=None,
+                 time_step_size=None,
+                 mode=None,
+                 uncorrelated_activity=None,
+                 integration_method=None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
 
         # These may be passed (as standard IntegratorFunction args) but are not used by FitzHughNagumo
diff --git a/psyneulink/core/components/functions/statefulfunctions/memoryfunctions.py b/psyneulink/core/components/functions/statefulfunctions/memoryfunctions.py
index be48bbb2f2e..b822e31c54c 100644
--- a/psyneulink/core/components/functions/statefulfunctions/memoryfunctions.py
+++ b/psyneulink/core/components/functions/statefulfunctions/memoryfunctions.py
@@ -207,19 +207,19 @@ def __init__(self,
                  # was failing.
                  # For now, updated default_variable, noise, and Alternatively, we can change validation on
                  # default_variable=None,   # Changed to [] because None conflicts with initializer
-                 rate=1.0,
-                 noise=0.0,
+                 rate=None,
+                 noise=None,
                  # rate: parameter_spec=1.0,
                  # noise: parameter_spec=0.0,
-                 # rate: tc.optional(tc.any(int, float)) = None,         # Changed to 1.0 because None fails validation
-                 # noise: tc.optional(tc.any(int, float, callable)) = None,    # Changed to 0.0 - None fails validation
+                 # rate: tc.optional(tc.optional(tc.any(int, float))) = None,         # Changed to 1.0 because None fails validation
+                 # noise: tc.optional(tc.optional(tc.any(int, float, callable))) = None,    # Changed to 0.0 - None fails validation
                  # rate: tc.optional(tc.any(int, float, list, np.ndarray)) = 1.0,
                  # noise: tc.optional(tc.any(int, float, list, np.ndarray, callable)) = 0.0,
-                 history: tc.optional(int) = None,
+                 history: tc.optional(tc.optional(int)) = None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -687,20 +687,20 @@ class Parameters(StatefulFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 retrieval_prob: tc.optional(tc.any(int, float))=1.0,
-                 storage_prob: tc.optional(tc.any(int, float))=1.0,
-                 noise: tc.optional(tc.any(int, float, list, np.ndarray, callable))=0.0,
-                 rate: tc.optional(tc.any(int, float, list, np.ndarray))=1.0,
+                 retrieval_prob: tc.optional(tc.any(int, float))=None,
+                 storage_prob: tc.optional(tc.any(int, float))=None,
+                 noise: tc.optional(tc.any(int, float, list, np.ndarray, callable))=None,
+                 rate: tc.optional(tc.any(int, float, list, np.ndarray))=None,
                  initializer=None,
                  distance_function:tc.optional(tc.any(Distance, is_function_type))=None,
                  selection_function:tc.optional(tc.any(OneHot, is_function_type))=None,
-                 duplicate_keys:tc.any(bool, tc.enum(OVERWRITE))=False,
-                 equidistant_keys_select:tc.enum(RANDOM, OLDEST, NEWEST)=RANDOM,
-                 max_entries=1000,
+                 duplicate_keys:tc.optional(tc.any(bool, tc.enum(OVERWRITE)))=None,
+                 equidistant_keys_select:tc.optional(tc.enum(RANDOM, OLDEST, NEWEST))=None,
+                 max_entries=None,
                  seed=None,
-                 params: tc.optional(tc.any(list, np.ndarray)) = None,
+                 params: tc.optional(tc.optional(tc.any(list, np.ndarray))) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if initializer is None:
             initializer = []
diff --git a/psyneulink/core/components/functions/statefulfunctions/statefulfunction.py b/psyneulink/core/components/functions/statefulfunctions/statefulfunction.py
index 17f60f6e497..e734b28492b 100644
--- a/psyneulink/core/components/functions/statefulfunctions/statefulfunction.py
+++ b/psyneulink/core/components/functions/statefulfunctions/statefulfunction.py
@@ -201,12 +201,12 @@ class Parameters(Function_Base.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate=1.0,
-                 noise=0.0,
+                 rate=None,
+                 noise=None,
                  initializer=None,
-                 params: tc.optional(dict) = None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  context=None,
                  **kwargs
                  ):
@@ -391,7 +391,7 @@ def _validate_noise(self, noise):
                                             "{} is not a valid noise element for {}".format(noise[i], self.name))
 
         # Otherwise, must be a float, int or function
-        elif not isinstance(noise, (float, int)) and not callable(noise):
+        elif noise is not None and not isinstance(noise, (float, int)) and not callable(noise):
             raise FunctionError(
                 "Noise parameter ({}) for {} must be a float, function, or array/list of these."
                     .format(noise, self.name))
diff --git a/psyneulink/core/components/functions/transferfunctions.py b/psyneulink/core/components/functions/transferfunctions.py
index b20ce505d7a..a2fab9742a6 100644
--- a/psyneulink/core/components/functions/transferfunctions.py
+++ b/psyneulink/core/components/functions/transferfunctions.py
@@ -202,7 +202,7 @@ def __init__(self,
                  default_variable=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(default_variable=default_variable,
                          params=params,
                          owner=owner,
@@ -369,11 +369,11 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 slope: tc.optional(parameter_spec) = None,
-                 intercept: tc.optional(parameter_spec) = None,
+                 slope: tc.optional(tc.optional(parameter_spec)) = None,
+                 intercept: tc.optional(tc.optional(parameter_spec)) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         super().__init__(
             default_variable=default_variable,
@@ -627,13 +627,13 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 rate: parameter_spec = 1.0,
-                 scale: parameter_spec = 1.0,
-                 bias: parameter_spec = 0.0,
-                 offset: parameter_spec = 0.0,
+                 rate: tc.optional(parameter_spec) = None,
+                 scale: tc.optional(parameter_spec) = None,
+                 bias: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             rate=rate,
@@ -907,14 +907,14 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 gain: parameter_spec = 1.0,
-                 x_0=0.0,
-                 bias=0.0,
-                 offset: parameter_spec = 0.0,
-                 scale: parameter_spec = 1.0,
+                 gain: tc.optional(parameter_spec) = None,
+                 x_0=None,
+                 bias=None,
+                 offset: tc.optional(parameter_spec) = None,
+                 scale: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             gain=gain,
@@ -1213,14 +1213,14 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 gain: parameter_spec = 1.0,
-                 x_0=0.0,
-                 bias=0.0,
-                 offset: parameter_spec = 0.0,
-                 scale: parameter_spec = 1.0,
+                 gain: tc.optional(parameter_spec) = None,
+                 x_0=None,
+                 bias=None,
+                 offset: tc.optional(parameter_spec) = None,
+                 scale: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             gain=gain,
@@ -1479,12 +1479,12 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 gain: parameter_spec = 1.0,
-                 bias: parameter_spec = 0.0,
-                 leak: parameter_spec = 0.0,
+                 gain: tc.optional(parameter_spec) = None,
+                 bias: tc.optional(parameter_spec) = None,
+                 leak: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             gain=gain,
@@ -1726,13 +1726,13 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 standard_deviation: parameter_spec = 1.0,
-                 bias: parameter_spec = 0.0,
-                 scale: parameter_spec = 1.0,
-                 offset: parameter_spec = 0.0,
+                 standard_deviation: tc.optional(parameter_spec) = None,
+                 bias: tc.optional(parameter_spec) = None,
+                 scale: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
         super().__init__(
             default_variable=default_variable,
             standard_deviation=standard_deviation,
@@ -2002,14 +2002,14 @@ class Parameters(TransferFunction.Parameters):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 variance: parameter_spec = 1.0,
-                 bias: parameter_spec = 0.0,
-                 scale: parameter_spec = 1.0,
-                 offset: parameter_spec = 0.0,
+                 variance: tc.optional(parameter_spec) = None,
+                 bias: tc.optional(parameter_spec) = None,
+                 scale: tc.optional(parameter_spec) = None,
+                 offset: tc.optional(parameter_spec) = None,
                  seed=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         if seed is None:
             seed = get_global_seed()
@@ -2289,12 +2289,12 @@ def _validate_output(self, output):
     @tc.typecheck
     def __init__(self,
                  default_variable=None,
-                 gain: parameter_spec = 1.0,
+                 gain: tc.optional(parameter_spec) = None,
                  output=None,
-                 per_item=True,
-                 params: tc.optional(dict) = None,
+                 per_item=None,
+                 params: tc.optional(tc.optional(dict)) = None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         try:
             # needed because one_hot_function is initialized here based
@@ -2693,7 +2693,7 @@ def __init__(self,
                  matrix=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         # Note: this calls _validate_variable and _validate_params which are overridden below;
         #       the latter implements the matrix if required
@@ -3811,15 +3811,15 @@ class Parameters(TransferFunction.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 transfer_fct:(is_function_type)=Linear,
+                 transfer_fct:tc.optional(is_function_type)=None,
                  enabled_cost_functions:tc.optional(tc.any(CostFunctions, list))=None,
-                 intensity_cost_fct:(is_function_type)=Exponential,
-                 adjustment_cost_fct:tc.optional(is_function_type)=Linear,
-                 duration_cost_fct:tc.optional(is_function_type)=SimpleIntegrator,
-                 combine_costs_fct:tc.optional(is_function_type)=LinearCombination,
+                 intensity_cost_fct:tc.optional(is_function_type)=None,
+                 adjustment_cost_fct:tc.optional(is_function_type)=None,
+                 duration_cost_fct:tc.optional(is_function_type)=None,
+                 combine_costs_fct:tc.optional(is_function_type)=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None):
+                 prefs: tc.optional(is_pref_set) = None):
 
         # if size:
         #     if default_variable is None:
diff --git a/psyneulink/core/components/functions/userdefinedfunction.py b/psyneulink/core/components/functions/userdefinedfunction.py
index c59cf739346..2b9c1468b1f 100644
--- a/psyneulink/core/components/functions/userdefinedfunction.py
+++ b/psyneulink/core/components/functions/userdefinedfunction.py
@@ -384,7 +384,7 @@ def __init__(self,
                  default_variable=None,
                  params=None,
                  owner=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
 
         def get_cust_fct_args(custom_function):
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
index 43434629c3c..2d16070cd18 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/controlmechanism.py
@@ -1189,13 +1189,13 @@ def __init__(self,
                                             InputPort,
                                             OutputPort,
                                             ControlSignal))=None,
-                 modulation:tc.optional(str)=MULTIPLICATIVE,
-                 combine_costs:is_function_type=np.sum,
+                 modulation:tc.optional(str)=None,
+                 combine_costs:tc.optional(is_function_type)=None,
                  compute_reconfiguration_cost:tc.optional(is_function_type)=None,
                  compute_net_outcome=None,
                  params=None,
                  name=None,
-                 prefs:is_pref_set=None,
+                 prefs:tc.optional(is_pref_set)=None,
                  **kwargs
                  ):
 
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
index bb156db4020..93b0c1bf787 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/gating/gatingmechanism.py
@@ -447,8 +447,8 @@ def __init__(self,
                  monitor_for_gating=None,
                  function=None,
                  default_allocation:tc.optional(tc.any(int, float, list, np.ndarray))=None,
-                 gate:tc.optional(list) = None,
-                 modulation:tc.optional(str)=MULTIPLICATIVE,
+                 gate:tc.optional(tc.optional(list)) = None,
+                 modulation:tc.optional(str)=None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None,
diff --git a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
index 74af3fcc5b0..133dce6786e 100644
--- a/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/control/optimizationcontrolmechanism.py
@@ -713,11 +713,11 @@ class Parameters(ControlMechanism.Parameters):
     def __init__(self,
                  agent_rep=None,
                  function=None,
-                 features: tc.optional(tc.any(Iterable, Mechanism, OutputPort, InputPort)) = None,
-                 feature_function: tc.optional(tc.any(is_function_type)) = None,
+                 features: tc.optional(tc.optional(tc.any(Iterable, Mechanism, OutputPort, InputPort))) = None,
+                 feature_function: tc.optional(tc.optional(tc.any(is_function_type))) = None,
                  num_estimates = None,
-                 search_function: tc.optional(tc.any(is_function_type)) = None,
-                 search_termination_function: tc.optional(tc.any(is_function_type)) = None,
+                 search_function: tc.optional(tc.optional(tc.any(is_function_type))) = None,
+                 search_termination_function: tc.optional(tc.optional(tc.any(is_function_type))) = None,
                  search_statefulness=None,
                  context=None,
                  **kwargs):
diff --git a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
index 905fe5de23f..020d07d5fb5 100644
--- a/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
+++ b/psyneulink/core/components/mechanisms/modulatory/learning/learningmechanism.py
@@ -1023,11 +1023,11 @@ def __init__(self,
                  size=None,
                  error_sources:tc.optional(tc.any(Mechanism, list))=None,
                  function=None,
-                 learning_signals:tc.optional(list) = None,
+                 learning_signals:tc.optional(tc.optional(list)) = None,
                  output_ports=None,
-                 modulation:tc.optional(str)=ADDITIVE,
+                 modulation:tc.optional(str)=None,
                  learning_rate:tc.optional(parameter_spec)=None,
-                 learning_enabled:tc.optional(tc.any(bool, tc.enum(ONLINE, AFTER)))=True,
+                 learning_enabled:tc.optional(tc.any(bool, tc.enum(ONLINE, AFTER)))=None,
                  in_composition=False,
                  params=None,
                  name=None,
diff --git a/psyneulink/core/components/mechanisms/processing/compositioninterfacemechanism.py b/psyneulink/core/components/mechanisms/processing/compositioninterfacemechanism.py
index 1359856070a..ddd4ffb0dc9 100644
--- a/psyneulink/core/components/mechanisms/processing/compositioninterfacemechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/compositioninterfacemechanism.py
@@ -122,7 +122,7 @@ class Parameters(ProcessingMechanism_Base.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 input_ports: tc.optional(tc.any(Iterable, Mechanism, OutputPort, InputPort)) = None,
+                 input_ports: tc.optional(tc.optional(tc.any(Iterable, Mechanism, OutputPort, InputPort))) = None,
                  function=None,
                  composition=None,
                  port_map=None,
diff --git a/psyneulink/core/components/mechanisms/processing/objectivemechanism.py b/psyneulink/core/components/mechanisms/processing/objectivemechanism.py
index 1324ab613b9..7d93a5ac4b6 100644
--- a/psyneulink/core/components/mechanisms/processing/objectivemechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/objectivemechanism.py
@@ -567,7 +567,7 @@ def __init__(self,
                  monitor=None,
                  default_variable=None,
                  size=None,
-                 function=LinearCombination,
+                 function=None,
                  output_ports:tc.optional(tc.any(str, Iterable))=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/core/components/mechanisms/processing/processingmechanism.py b/psyneulink/core/components/mechanisms/processing/processingmechanism.py
index 1588718edae..aede053ea3f 100644
--- a/psyneulink/core/components/mechanisms/processing/processingmechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/processingmechanism.py
@@ -285,7 +285,7 @@ def __init__(self,
                  size=None,
                  input_ports:tc.optional(tc.any(list, dict))=None,
                  output_ports:tc.optional(tc.any(str, Iterable))=None,
-                 function=Linear,
+                 function=None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None,
diff --git a/psyneulink/core/components/mechanisms/processing/transfermechanism.py b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
index 01af496170f..f18359397ad 100644
--- a/psyneulink/core/components/mechanisms/processing/transfermechanism.py
+++ b/psyneulink/core/components/mechanisms/processing/transfermechanism.py
@@ -609,7 +609,6 @@
 import numbers
 import warnings
 import logging
-import operator
 import types
 from collections.abc import Iterable
 
@@ -1069,7 +1068,7 @@ class Parameters(ProcessingMechanism_Base.Parameters):
             loggable=False
         )
         termination_threshold = Parameter(None, modulable=True)
-        termination_comparison_op = Parameter(operator.le, modulable=False, loggable=False)
+        termination_comparison_op = Parameter(LESS_THAN_OR_EQUAL, modulable=False, loggable=False)
         termination_measure_value = Parameter(0.0, modulable=False, read_only=True)
 
         output_ports = Parameter(
@@ -1113,21 +1112,21 @@ def __init__(self,
                  default_variable=None,
                  size=None,
                  input_ports:tc.optional(tc.any(Iterable, Mechanism, OutputPort, InputPort))=None,
-                 function=Linear,
-                 integrator_mode=False,
+                 function=None,
+                 integrator_mode=None,
                  integrator_function=None,
                  initial_value=None,
-                 integration_rate=0.5,
-                 on_resume_integrator_mode=INSTANTANEOUS_MODE_VALUE,
-                 noise=0.0,
+                 integration_rate=None,
+                 on_resume_integrator_mode=None,
+                 noise=None,
                  clip=None,
                  termination_measure=None,
                  termination_threshold:tc.optional(tc.any(int, float))=None,
-                 termination_comparison_op:tc.any(str, is_comparison_operator)=LESS_THAN_OR_EQUAL,
+                 termination_comparison_op: tc.optional(tc.any(str, is_comparison_operator)) = None,
                  output_ports:tc.optional(tc.any(str, Iterable))=None,
                  params=None,
                  name=None,
-                 prefs:is_pref_set=None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
         """Assign type-level preferences and call super.__init__
         """
@@ -1138,9 +1137,6 @@ def __init__(self,
         if output_ports is None or output_ports == RESULTS:
             output_ports = [RESULTS]
 
-        if termination_measure is None:
-            Distance(metric=MAX_ABS_DIFF)
-
         initial_value = self._parse_arg_initial_value(initial_value)
 
         # self.integrator_function = None
@@ -1304,7 +1300,7 @@ def _validate_noise(self, noise):
             pass
 
         # Otherwise, must be a float, int or function
-        elif not isinstance(noise, (float, int)) and not callable(noise):
+        elif noise is not None and not isinstance(noise, (float, int)) and not callable(noise):
             raise MechanismError("Noise parameter ({}) for {} must be a float, "
                                  "function, or array/list of these.".format(noise,
                                                                             self.name))
diff --git a/psyneulink/core/components/ports/inputport.py b/psyneulink/core/components/ports/inputport.py
index 632669ca5a8..f98f1781570 100644
--- a/psyneulink/core/components/ports/inputport.py
+++ b/psyneulink/core/components/ports/inputport.py
@@ -764,7 +764,7 @@ def __init__(self,
                  combine:tc.optional(tc.enum(SUM,PRODUCT))=None,
                  weight=None,
                  exponent=None,
-                 internal_only:bool=False,
+                 internal_only: tc.optional(bool) = None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None,
diff --git a/psyneulink/core/components/ports/modulatorysignals/controlsignal.py b/psyneulink/core/components/ports/modulatorysignals/controlsignal.py
index 9a1f62f44f5..bc135a51028 100644
--- a/psyneulink/core/components/ports/modulatorysignals/controlsignal.py
+++ b/psyneulink/core/components/ports/modulatorysignals/controlsignal.py
@@ -838,7 +838,7 @@ class Parameters(ModulatorySignal.Parameters):
             getter=_duration_cost_function_getter
         )
         combine_costs_function = Parameter(
-            Reduce(operation=SUM),
+            Reduce,
             stateful=False,
             loggable=False,
             function_parameter=True,
@@ -898,11 +898,11 @@ def __init__(self,
                  size=None,
                  transfer_function=None,
                  cost_options:tc.optional(tc.any(CostFunctions, list))=None,
-                 intensity_cost_function:(is_function_type)=Exponential,
-                 adjustment_cost_function:tc.optional(is_function_type)=Linear,
-                 duration_cost_function:tc.optional(is_function_type)=SimpleIntegrator,
-                 combine_costs_function:tc.optional(is_function_type)=Reduce,
-                 allocation_samples=Parameters.allocation_samples.default_value,
+                 intensity_cost_function:tc.optional(is_function_type)=None,
+                 adjustment_cost_function:tc.optional(is_function_type)=None,
+                 duration_cost_function:tc.optional(is_function_type)=None,
+                 combine_costs_function:tc.optional(is_function_type)=None,
+                 allocation_samples=None,
                  modulation:tc.optional(str)=None,
                  modulates=None,
                  params=None,
diff --git a/psyneulink/core/components/ports/modulatorysignals/learningsignal.py b/psyneulink/core/components/ports/modulatorysignals/learningsignal.py
index fe55e9531d5..ff0699c6743 100644
--- a/psyneulink/core/components/ports/modulatorysignals/learningsignal.py
+++ b/psyneulink/core/components/ports/modulatorysignals/learningsignal.py
@@ -342,7 +342,7 @@ def __init__(self,
                  index=PRIMARY,
                  assign=None,
                  function=None,
-                 learning_rate: tc.optional(parameter_spec) = None,
+                 learning_rate: tc.optional(tc.optional(parameter_spec)) = None,
                  modulation:tc.optional(str)=None,
                  modulates=None,
                  params=None,
diff --git a/psyneulink/core/components/ports/parameterport.py b/psyneulink/core/components/ports/parameterport.py
index 2aaffc07c42..b22fc40e5c4 100644
--- a/psyneulink/core/components/ports/parameterport.py
+++ b/psyneulink/core/components/ports/parameterport.py
@@ -871,13 +871,13 @@ def _get_tuple_for_single_item_modulatory_spec(obj, name, value):
         """Return (<default param value>, <modulatory spec>) for modulatory spec
         """
         try:
-            param_default_value = obj.get_constructor_defaults()[name]
+            param_default_value = getattr(obj.defaults, name)
             # Only assign default value if it is not None
             if param_default_value is not None:
                 return (param_default_value, value)
             else:
                 return value
-        except KeyError:
+        except AttributeError:
             raise ParameterPortError("Unrecognized specification for {} paramater of {} ({})".
                                       format(param_name, owner.name, param_value))
 
diff --git a/psyneulink/core/components/projections/modulatory/controlprojection.py b/psyneulink/core/components/projections/modulatory/controlprojection.py
index 316a47bd5fa..2e896dbac1a 100644
--- a/psyneulink/core/components/projections/modulatory/controlprojection.py
+++ b/psyneulink/core/components/projections/modulatory/controlprojection.py
@@ -252,7 +252,7 @@ def __init__(self,
                  receiver=None,
                  weight=None,
                  exponent=None,
-                 function=Linear,
+                 function=None,
                  control_signal_params:tc.optional(dict)=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/core/components/projections/modulatory/learningprojection.py b/psyneulink/core/components/projections/modulatory/learningprojection.py
index d522f1ec356..00c5e35764a 100644
--- a/psyneulink/core/components/projections/modulatory/learningprojection.py
+++ b/psyneulink/core/components/projections/modulatory/learningprojection.py
@@ -442,8 +442,8 @@ class Parameters(ModulatoryProjection_Base.Parameters):
     def __init__(self,
                  sender:tc.optional(tc.any(LearningSignal, LearningMechanism))=None,
                  receiver:tc.optional(tc.any(ParameterPort, MappingProjection))=None,
-                 error_function:tc.optional(is_function_type)=LinearCombination(weights=[[-1], [1]]),
-                 learning_function:tc.optional(is_function_type)=BackPropagation,
+                 error_function:tc.optional(is_function_type)=None,
+                 learning_function:tc.optional(is_function_type)=None,
                  # FIX: 10/3/17 - TEST IF THIS OK AND REINSTATE IF SO
                  # learning_signal_params:tc.optional(dict)=None,
                  learning_rate:tc.optional(tc.any(parameter_spec))=None,
diff --git a/psyneulink/core/components/projections/pathway/mappingprojection.py b/psyneulink/core/components/projections/pathway/mappingprojection.py
index 0354654d9f0..1634190abd7 100644
--- a/psyneulink/core/components/projections/pathway/mappingprojection.py
+++ b/psyneulink/core/components/projections/pathway/mappingprojection.py
@@ -450,7 +450,7 @@ def __init__(self,
                  receiver=None,
                  weight=None,
                  exponent=None,
-                 matrix=DEFAULT_MATRIX,
+                 matrix=None,
                  function=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/core/compositions/composition.py b/psyneulink/core/compositions/composition.py
index 693f8c9d8a4..08a14196edc 100644
--- a/psyneulink/core/compositions/composition.py
+++ b/psyneulink/core/compositions/composition.py
@@ -8392,13 +8392,13 @@ def run(
     def learn(
             self,
             inputs: dict,
-            targets: dict = None,
-            num_trials: int = None,
+            targets: tc.optional(dict) = None,
+            num_trials: tc.optional(int) = None,
             epochs: int = 1,
             minibatch_size: int = 1,
-            patience: int = None,
+            patience: tc.optional(int) = None,
             min_delta: int = 0,
-            context: Context = None,
+            context: tc.optional(Context) = None,
             bin_execute=False,
             randomize_minibatches=False,
             call_before_minibatch = None,
diff --git a/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py b/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
index 4263adfb8a9..f5e46de9981 100644
--- a/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/control/agt/agtcontrolmechanism.py
@@ -249,9 +249,9 @@ class AGTControlMechanism(ControlMechanism):
     def __init__(self,
                  monitored_output_ports=None,
                  function=None,
-                 # control_signals:tc.optional(list) = None,
+                 # control_signals:tc.optional(tc.optional(list)) = None,
                  control_signals= None,
-                 modulation:tc.optional(str)=MULTIPLICATIVE,
+                 modulation:tc.optional(str)=None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None):
diff --git a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
index 477aa0bdc1f..6b2a0fa4eeb 100644
--- a/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/control/agt/lccontrolmechanism.py
@@ -669,9 +669,9 @@ def __init__(self,
                  default_variable=None,
                  objective_mechanism:tc.optional(tc.any(ObjectiveMechanism, list))=None,
                  monitor_for_control:tc.optional(tc.any(is_iterable, Mechanism, OutputPort))=None,
-                 # modulated_mechanisms:tc.optional(tc.any(list,str)) = None,
+                 # modulated_mechanisms:tc.optional(tc.optional(tc.any(list,str))) = None,
                  modulated_mechanisms=None,
-                 modulation:tc.optional(str)=MULTIPLICATIVE,
+                 modulation:tc.optional(str)=None,
                  integration_method="RK4",
                  initial_w_FitzHughNagumo=0.0,
                  initial_v_FitzHughNagumo=0.0,
@@ -691,8 +691,8 @@ def __init__(self,
                  time_constant_w_FitzHughNagumo=12.5,
                  mode_FitzHughNagumo=1.0,
                  uncorrelated_activity_FitzHughNagumo=0.0,
-                 base_level_gain=0.5,
-                 scaling_factor_gain=3.0,
+                 base_level_gain=None,
+                 scaling_factor_gain=None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None
diff --git a/psyneulink/library/components/mechanisms/modulatory/learning/autoassociativelearningmechanism.py b/psyneulink/library/components/mechanisms/modulatory/learning/autoassociativelearningmechanism.py
index d22c6646eff..85410aa43e2 100644
--- a/psyneulink/library/components/mechanisms/modulatory/learning/autoassociativelearningmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/learning/autoassociativelearningmechanism.py
@@ -325,9 +325,9 @@ class Parameters(LearningMechanism.Parameters):
     def __init__(self,
                  default_variable:tc.any(list, np.ndarray),
                  size=None,
-                 function:is_function_type=Hebbian,
-                 learning_signals:tc.optional(list) = None,
-                 modulation:tc.optional(str)=ADDITIVE,
+                 function: tc.optional(is_function_type) = None,
+                 learning_signals:tc.optional(tc.optional(list)) = None,
+                 modulation:tc.optional(str)=None,
                  learning_rate:tc.optional(parameter_spec)=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/library/components/mechanisms/modulatory/learning/kohonenlearningmechanism.py b/psyneulink/library/components/mechanisms/modulatory/learning/kohonenlearningmechanism.py
index ce3ac899d35..dea5294a497 100644
--- a/psyneulink/library/components/mechanisms/modulatory/learning/kohonenlearningmechanism.py
+++ b/psyneulink/library/components/mechanisms/modulatory/learning/kohonenlearningmechanism.py
@@ -328,9 +328,9 @@ def __init__(self,
                  default_variable:tc.any(list, np.ndarray),
                  size=None,
                  matrix:tc.optional(ParameterPort)=None,
-                 function:is_function_type=Hebbian,
-                 learning_signals:tc.optional(list) = None,
-                 modulation:tc.optional(str)=ADDITIVE,
+                 function: tc.optional(is_function_type) = None,
+                 learning_signals:tc.optional(tc.optional(list)) = None,
+                 modulation:tc.optional(str)=None,
                  learning_rate:tc.optional(parameter_spec)=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
index 27aa40a77ec..5513efb05b5 100644
--- a/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
+++ b/psyneulink/library/components/mechanisms/processing/integrator/ddm.py
@@ -719,6 +719,13 @@ class Parameters(ProcessingMechanism.Parameters):
         initializer = np.array([[0]])
         random_state = Parameter(None, stateful=True, loggable=False)
 
+        output_ports = Parameter(
+            [DECISION_VARIABLE, RESPONSE_TIME],
+            stateful=False,
+            loggable=False,
+            read_only=True,
+            structural=True,
+        )
 
     standard_output_ports =[{NAME: DECISION_VARIABLE,},           # Upper or lower threshold for Analtyic function
                             {NAME: RESPONSE_TIME},                # TIME_STEP within TRIAL for Integrator function
@@ -740,11 +747,11 @@ def __init__(self,
                  input_format:tc.optional(tc.enum(SCALAR, ARRAY, VECTOR))=None,
                  function=None,
                  input_ports=None,
-                 output_ports:tc.optional(tc.any(str, Iterable))=(DECISION_VARIABLE, RESPONSE_TIME),
+                 output_ports: tc.optional(tc.any(str, Iterable)) = None,
                  seed=None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
 
         # Override instantiation of StandardOutputPorts usually done in _instantiate_output_ports
diff --git a/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py b/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
index 74b65d7cbf8..96d0b11d2d7 100644
--- a/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/integrator/episodicmemorymechanism.py
@@ -237,8 +237,8 @@ class Parameters(ProcessingMechanism_Base.Parameters):
         )
 
     def __init__(self,
-                 content_size:int=1,
-                 assoc_size:int=0,
+                 content_size:int=None,
+                 assoc_size:int=None,
                  function:Function=None,
                  params=None,
                  name=None,
@@ -249,10 +249,10 @@ def __init__(self,
         input_ports = None
         output_ports = None
 
-        if content_size != self.defaults.content_size:
+        if content_size is not None and content_size != self.defaults.content_size:
             input_ports = _generate_content_input_port_spec(content_size)
 
-        if assoc_size != self.defaults.assoc_size:
+        if assoc_size is not None and assoc_size != self.defaults.assoc_size:
             try:
                 input_ports.append({NAME: ASSOC_INPUT, SIZE: assoc_size})
             except AttributeError:
diff --git a/psyneulink/library/components/mechanisms/processing/leabramechanism.py b/psyneulink/library/components/mechanisms/processing/leabramechanism.py
index f62c60b5491..ae45b7fa91a 100644
--- a/psyneulink/library/components/mechanisms/processing/leabramechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/leabramechanism.py
@@ -114,7 +114,7 @@
 from psyneulink.core.components.mechanisms.processing.processingmechanism import ProcessingMechanism_Base
 from psyneulink.core.globals.keywords import FUNCTION, INPUT_PORTS, LEABRA_FUNCTION, LEABRA_FUNCTION_TYPE, LEABRA_MECHANISM, NETWORK, OUTPUT_PORTS, PREFERENCE_SET_NAME
 from psyneulink.core.globals.parameters import Parameter
-from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set, REPORT_OUTPUT_PREF
+from psyneulink.core.globals.preferences.basepreferenceset import REPORT_OUTPUT_PREF
 from psyneulink.core.globals.preferences.preferenceset import PreferenceEntry, PreferenceLevel
 from psyneulink.core.scheduling.time import TimeScale
 
@@ -490,19 +490,20 @@ class Parameters(ProcessingMechanism_Base.Parameters):
         quarter_size = 50
 
         network = Parameter(None, getter=_network_getter, setter=_network_setter)
-        training_flag = Parameter(None, setter=_training_flag_setter)
+        training_flag = Parameter(False, setter=_training_flag_setter)
 
     def __init__(self,
                  network=None,
-                 input_size=1,
-                 output_size=1,
-                 hidden_layers=0,
+                 input_size=None,
+                 output_size=None,
+                 hidden_layers=None,
                  hidden_sizes=None,
                  training_flag=None,
-                 quarter_size=50,
+                 quarter_size=None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None):
+                 prefs=None
+    ):
         if not leabra_available:
             raise LeabraError('leabra python module is not installed. Please install it from '
                               'https://github.com/benureau/leabra')
@@ -517,14 +518,28 @@ def __init__(self,
         else:
             if hidden_sizes is None:
                 hidden_sizes = input_size
-            if training_flag is None:
-                training_flag = False
-            network = build_leabra_network(input_size, output_size, hidden_layers, hidden_sizes,
-                                                  training_flag, quarter_size)
+
+            # don't directly assign defaults to their corresponding variable
+            # because that may cause their parameter to be incorrectly assigned
+            # _user_specified=True
+            network = build_leabra_network(
+                input_size if input_size is not None else self.class_defaults.input_size,
+                output_size if output_size is not None else self.class_defaults.output_size,
+                hidden_layers if hidden_layers is not None else self.class_defaults.hidden_layers,
+                hidden_sizes if hidden_sizes is not None else self.class_defaults.hidden_sizes,
+                training_flag if training_flag is not None else self.class_defaults.training_flag,
+                quarter_size if quarter_size is not None else self.class_defaults.quarter_size,
+            )
+
+        size = [
+            input_size if input_size is not None else self.class_defaults.input_size,
+            output_size if output_size is not None else self.class_defaults.output_size
+        ]
 
         super().__init__(
+            # override instantiate_function instead of doing this?
             function=LeabraFunction(network=network),
-            size=[input_size, output_size],
+            size=size,
             network=network,
             input_size=input_size,
             output_size=output_size,
diff --git a/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py b/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
index 6132e796f42..1b39a088548 100644
--- a/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/objective/comparatormechanism.py
@@ -330,7 +330,7 @@ def __init__(self,
                  sample: tc.optional(tc.any(OutputPort, Mechanism_Base, dict, is_numeric, str))=None,
                  target: tc.optional(tc.any(OutputPort, Mechanism_Base, dict, is_numeric, str))=None,
                  function=None,
-                 output_ports:tc.optional(tc.any(str, Iterable)) = None,
+                 output_ports:tc.optional(tc.optional(tc.any(str, Iterable))) = None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None,
diff --git a/psyneulink/library/components/mechanisms/processing/objective/predictionerrormechanism.py b/psyneulink/library/components/mechanisms/processing/objective/predictionerrormechanism.py
index 69a782c2e0a..4d06bcffcde 100644
--- a/psyneulink/library/components/mechanisms/processing/objective/predictionerrormechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/objective/predictionerrormechanism.py
@@ -291,12 +291,12 @@ def __init__(self,
                  target: tc.optional(tc.any(OutputPort, Mechanism_Base, dict,
                                             is_numeric,
                                             str)) = None,
-                 function=PredictionErrorDeltaFunction(),
-                 output_ports: tc.optional(tc.any(str, Iterable)) = None,
-                 learning_rate: is_numeric = 0.3,
+                 function=None,
+                 output_ports: tc.optional(tc.optional(tc.any(str, Iterable))) = None,
+                 learning_rate: tc.optional(is_numeric) = None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs
                  ):
 
diff --git a/psyneulink/library/components/mechanisms/processing/transfer/contrastivehebbianmechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/contrastivehebbianmechanism.py
index fe5c1131ca1..6d71ee6932b 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/contrastivehebbianmechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/contrastivehebbianmechanism.py
@@ -337,7 +337,6 @@
 from psyneulink.core.components.functions.function import get_matrix, is_function_type
 from psyneulink.core.components.functions.learningfunctions import ContrastiveHebbian, Hebbian
 from psyneulink.core.components.functions.objectivefunctions import Distance
-from psyneulink.core.components.functions.transferfunctions import Linear
 from psyneulink.core.components.mechanisms.mechanism import Mechanism
 from psyneulink.core.globals.context import ContextFlags, handle_external_context
 from psyneulink.core.globals.keywords import \
@@ -926,7 +925,7 @@ class Parameters(RecurrentTransferMechanism.Parameters):
         continuous = Parameter(True, stateful=False, loggable=False)
         clamp = Parameter(HARD_CLAMP, stateful=False, loggable=False)
         combination_function = Parameter(None, stateful=False, loggable=False)
-        phase_convergence_function = Parameter(Distance, stateful=False, pnl_internal=True, loggable=False)
+        phase_convergence_function = Parameter(Distance(metric=MAX_ABS_DIFF), stateful=False, pnl_internal=True, loggable=False)
         phase_convergence_threshold = Parameter(0.01, modulable=True, pnl_internal=True, loggable=False)
 
         minus_phase_termination_condition = Parameter(CONVERGENCE, stateful=False, loggable=False)
@@ -972,31 +971,31 @@ def __init__(self,
                  input_size:int,
                  hidden_size:tc.optional(int)=None,
                  target_size:tc.optional(int)=None,
-                 separated:bool=True,
+                 separated: tc.optional(bool) = None,
                  mode:tc.optional(tc.enum(SIMPLE_HEBBIAN))=None,
-                 continuous:bool=True,
-                 clamp:tc.enum(SOFT_CLAMP, HARD_CLAMP)=HARD_CLAMP,
+                 continuous: tc.optional(bool) = None,
+                 clamp:tc.optional(tc.enum(SOFT_CLAMP, HARD_CLAMP))=None,
                  combination_function:tc.optional(is_function_type)=None,
-                 function=Linear,
-                 matrix=HOLLOW_MATRIX,
+                 function=None,
+                 matrix=None,
                  auto=None,
                  hetero=None,
                  integrator_function=None,
                  initial_value=None,
-                 noise=0.0,
-                 integration_rate: is_numeric_or_none=0.5,
-                 integrator_mode:bool=False,
+                 noise=None,
+                 integration_rate: is_numeric_or_none=None,
+                 integrator_mode: tc.optional(bool) = None,
                  clip=None,
-                 minus_phase_termination_condition:tc.enum(CONVERGENCE, COUNT)=CONVERGENCE,
-                 minus_phase_termination_threshold:float=0.01,
-                 plus_phase_termination_condition:tc.enum(CONVERGENCE, COUNT)=CONVERGENCE,
-                 plus_phase_termination_threshold:float=0.01,
-                 phase_convergence_function:tc.any(is_function_type)=Distance(metric=MAX_ABS_DIFF),
-                 max_passes:tc.optional(int)=1000,
-                 enable_learning:bool=False,
+                 minus_phase_termination_condition:tc.optional(tc.enum(CONVERGENCE, COUNT))=None,
+                 minus_phase_termination_threshold: tc.optional(float) = None,
+                 plus_phase_termination_condition:tc.optional(tc.enum(CONVERGENCE, COUNT))=None,
+                 plus_phase_termination_threshold: tc.optional(float) = None,
+                 phase_convergence_function: tc.optional(tc.any(is_function_type)) = None,
+                 max_passes:tc.optional(int)=None,
+                 enable_learning: tc.optional(bool) = None,
                  learning_rate:tc.optional(tc.any(parameter_spec, bool))=None,
-                 learning_function: tc.any(is_function_type) = ContrastiveHebbian,
-                 additional_input_ports:tc.optional(tc.any(list, dict)) = None,
+                 learning_function: tc.optional(tc.any(is_function_type)) = None,
+                 additional_input_ports:tc.optional(tc.optional(tc.any(list, dict))) = None,
                  additional_output_ports:tc.optional(tc.any(str, Iterable))=None,
                  params=None,
                  name=None,
diff --git a/psyneulink/library/components/mechanisms/processing/transfer/kohonenmechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/kohonenmechanism.py
index e7c7c313d96..b06e4cee52a 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/kohonenmechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/kohonenmechanism.py
@@ -79,8 +79,6 @@
 
 from psyneulink.core.components.functions.function import is_function_type
 from psyneulink.core.components.functions.learningfunctions import Kohonen
-from psyneulink.core.components.functions.transferfunctions import Linear
-from psyneulink.core.components.functions.statefulfunctions.integratorfunctions import AdaptiveIntegrator
 from psyneulink.core.components.functions.selectionfunctions import OneHot
 from psyneulink.core.components.mechanisms.modulatory.learning.learningmechanism import \
     ACTIVATION_INPUT, ACTIVATION_OUTPUT, LearningMechanism
@@ -282,21 +280,21 @@ class Parameters(TransferMechanism.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 function=Linear,
+                 function=None,
                  # selection_function=OneHot(mode=MAX_INDICATOR),  # RE-INSTATE WHEN IMPLEMENT NHot function
-                 integrator_function=AdaptiveIntegrator,
+                 integrator_function=None,
                  initial_value=None,
-                 noise: is_numeric_or_none = 0.0,
-                 integration_rate: is_numeric_or_none = 0.5,
-                 integrator_mode=False,
+                 noise: tc.optional(is_numeric_or_none) = None,
+                 integration_rate: tc.optional(is_numeric_or_none) = None,
+                 integrator_mode=None,
                  clip=None,
-                 enable_learning=True,
+                 enable_learning=None,
                  learning_rate:tc.optional(tc.any(parameter_spec, bool))=None,
-                 learning_function:is_function_type=Kohonen(distance_function=GAUSSIAN),
+                 learning_function: tc.optional(is_function_type) = None,
                  learned_projection:tc.optional(MappingProjection)=None,
                  additional_output_ports:tc.optional(tc.any(str, Iterable))=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs
                  ):
         # # Default output_ports is specified in constructor as a string rather than a list
diff --git a/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
index c1c4c95aaf8..b9d42f6ff00 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/kwtamechanism.py
@@ -186,7 +186,6 @@
 import typecheck as tc
 
 from psyneulink.core.components.functions.transferfunctions import Logistic
-from psyneulink.core.components.functions.statefulfunctions.integratorfunctions import AdaptiveIntegrator
 from psyneulink.core.globals.keywords import INITIALIZING, KWTA_MECHANISM, K_VALUE, RATIO, RESULT, THRESHOLD
 from psyneulink.core.globals.parameters import Parameter
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set
@@ -332,6 +331,14 @@ class Parameters(RecurrentTransferMechanism.Parameters):
         threshold = Parameter(0.0, modulable=True)
         ratio = Parameter(0.5, modulable=True)
 
+        output_ports = Parameter(
+            [RESULT],
+            stateful=False,
+            loggable=False,
+            read_only=True,
+            structural=True,
+        )
+
         average_based = False
         inhibition_only = True
 
@@ -339,34 +346,28 @@ class Parameters(RecurrentTransferMechanism.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 function=Logistic,
+                 function=None,
                  matrix=None,
                  auto: is_numeric_or_none=None,
                  hetero: is_numeric_or_none=None,
-                 integrator_function=AdaptiveIntegrator,
+                 integrator_function=None,
                  initial_value=None,
-                 noise: is_numeric_or_none = 0.0,
-                 integration_rate: is_numeric_or_none = 0.5,
-                 integrator_mode=False,
-                 k_value: is_numeric_or_none = 0.5,
-                 threshold: is_numeric_or_none = 0,
-                 ratio: is_numeric_or_none = 0.5,
-                 average_based=False,
-                 inhibition_only=True,
+                 noise: tc.optional(is_numeric_or_none) = None,
+                 integration_rate: tc.optional(is_numeric_or_none) = None,
+                 integrator_mode=None,
+                 k_value: tc.optional(is_numeric_or_none) = None,
+                 threshold: tc.optional(is_numeric_or_none) = None,
+                 ratio: tc.optional(is_numeric_or_none) = None,
+                 average_based=None,
+                 inhibition_only=None,
                  clip=None,
-                 input_ports:tc.optional(tc.any(list, dict)) = None,
-                 output_ports:tc.optional(tc.any(str, Iterable))=RESULT,
+                 input_ports:tc.optional(tc.optional(tc.any(list, dict))) = None,
+                 output_ports:tc.optional(tc.any(str, Iterable))=None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs
                  ):
-        # Default output_ports is specified in constructor as a string rather than a list
-        # to avoid "gotcha" associated with mutable default arguments
-        # (see: bit.ly/2uID3s3 and http://docs.python-guide.org/en/latest/writing/gotchas/)
-        if output_ports is None:
-            output_ports = [RESULT]
-
         # this defaults the matrix to be an identity matrix (self excitation)
         if matrix is None:
             if auto is None:
@@ -479,7 +480,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
 
         super()._validate_params(request_set=request_set, target_set=target_set, context=context)
 
-        if RATIO in target_set:
+        if RATIO in target_set and target_set[RATIO] is not None:
             ratio_param = target_set[RATIO]
             if not isinstance(ratio_param, numbers.Real):
                 if not (isinstance(ratio_param, (np.ndarray, list)) and len(ratio_param) == 1):
@@ -488,7 +489,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
             if ratio_param > 1 or ratio_param < 0:
                 raise KWTAError("ratio parameter ({}) for {} must be between 0 and 1".format(ratio_param, self))
 
-        if K_VALUE in target_set:
+        if K_VALUE in target_set and target_set[K_VALUE] is not None:
             k_param = target_set[K_VALUE]
             if not isinstance(k_param, numbers.Real):
                 if not (isinstance(k_param, (np.ndarray, list)) and len(k_param) == 1):
@@ -508,7 +509,7 @@ def _validate_params(self, request_set, target_set=None, context=None):
                 raise KWTAError("k-value parameter ({}) for {} was larger than the total number of elements.".
                                 format(k_param, self))
 
-        if THRESHOLD in target_set:
+        if THRESHOLD in target_set and target_set[THRESHOLD] is not None:
             threshold_param = target_set[THRESHOLD]
             if not isinstance(threshold_param, numbers.Real):
                 if not (isinstance(threshold_param, (np.ndarray, list)) and len(threshold_param) == 1):
diff --git a/psyneulink/library/components/mechanisms/processing/transfer/lcamechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/lcamechanism.py
index e0e22a5effe..ae65ac9b8e9 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/lcamechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/lcamechanism.py
@@ -199,12 +199,12 @@
 from psyneulink.core.components.mechanisms.processing.transfermechanism import _integrator_mode_setter
 from psyneulink.core.globals.keywords import \
     CONVERGENCE, FUNCTION, GREATER_THAN_OR_EQUAL, INITIALIZER, LCA_MECHANISM, LEAK, LESS_THAN_OR_EQUAL, MATRIX, NAME, \
-    NOISE, RATE, RESULT, TERMINATION_THRESHOLD, TERMINATION_MEASURE, TERMINATION_COMPARISION_OP, TIME_STEP_SIZE, VALUE
+    NOISE, RATE, RESULT, TERMINATION_THRESHOLD, TERMINATION_MEASURE, TERMINATION_COMPARISION_OP, TIME_STEP_SIZE, VALUE, INVERSE_HOLLOW_MATRIX
 from psyneulink.core.globals.parameters import Parameter
 from psyneulink.core.globals.context import ContextFlags
 from psyneulink.core.globals.preferences.basepreferenceset import is_pref_set
 from psyneulink.library.components.mechanisms.processing.transfer.recurrenttransfermechanism import \
-    RecurrentTransferMechanism
+    RecurrentTransferMechanism, _recurrent_transfer_mechanism_matrix_getter, _recurrent_transfer_mechanism_matrix_setter
 
 __all__ = ['LCAMechanism', 'LCAError', 'CONVERGENCE']
 
@@ -391,6 +391,12 @@ class Parameters(RecurrentTransferMechanism.Parameters):
 
         function = Parameter(Logistic, stateful=False, loggable=False)
 
+        matrix = Parameter(
+            INVERSE_HOLLOW_MATRIX,
+            modulable=True,
+            getter=_recurrent_transfer_mechanism_matrix_getter,
+            setter=_recurrent_transfer_mechanism_matrix_setter
+        )
         leak = Parameter(0.5, modulable=True)
         auto = Parameter(0.0, modulable=True, aliases='self_excitation')
         hetero = Parameter(-1.0, modulable=True)
@@ -402,6 +408,24 @@ class Parameters(RecurrentTransferMechanism.Parameters):
         integrator_function = Parameter(LeakyCompetingIntegrator, stateful=False, loggable=False)
         termination_measure = Parameter(max, stateful=False, loggable=False)
 
+        output_ports = Parameter(
+            [RESULT],
+            stateful=False,
+            loggable=False,
+            read_only=True,
+            structural=True,
+        )
+
+        def _validate_competition(self, competition):
+            if competition < 0:
+                warnings.warn(
+                    f"The 'competition' arg specified for {self.name} is a negative value ({competition}); "
+                    f"note that this will result in a matrix that has positive off-diagonal elements "
+                    f"since 'competition' is assumed to specify the magnitude of inhibition."
+                )
+
+            return None
+
     standard_output_ports = RecurrentTransferMechanism.standard_output_ports.copy()
     standard_output_ports.extend([{NAME:MAX_VS_NEXT,
                                     FUNCTION:max_vs_next},
@@ -413,30 +437,23 @@ def __init__(self,
                  default_variable=None,
                  size:tc.optional(tc.any(int, list, np.array))=None,
                  input_ports:tc.optional(tc.any(list, dict))=None,
-                 function=Logistic,
+                 function=None,
                  initial_value=None,
-                 leak=0.5,
-                 competition=1.0,
+                 leak=None,
+                 competition=None,
                  hetero=None,
                  self_excitation=None,
-                 noise=0.0,
-                 integrator_mode=True,
-                 time_step_size=0.1,
+                 noise=None,
+                 integrator_mode=None,
+                 time_step_size=None,
                  clip=None,
-                 output_ports:tc.optional(tc.any(str, Iterable))=RESULT,
+                 output_ports:tc.optional(tc.any(str, Iterable))=None,
                  params=None,
                  name=None,
                  prefs:is_pref_set=None,
                  **kwargs):
         """Instantiate LCAMechanism
         """
-
-        # Default output_ports is specified in constructor as a string rather than a list
-        # to avoid "gotcha" associated with mutable default arguments
-        # (see: bit.ly/2uID3s3 and http://docs.python-guide.org/en/latest/writing/gotchas/)
-        if output_ports is None or output_ports == RESULT:
-            output_ports = [RESULT]
-
         # MODIFIED 1/22/20 NEW: [JDC]
         if MATRIX in kwargs:
             matrix = kwargs[MATRIX]
@@ -467,8 +484,6 @@ def __init__(self,
         termination_threshold, termination_measure, termination_comparison_op = self._parse_threshold_args(kwargs)
         # MODIFIED 10/26/19 END
 
-        integrator_function = LeakyCompetingIntegrator
-
         super().__init__(
             default_variable=default_variable,
             size=size,
@@ -477,7 +492,7 @@ def __init__(self,
             auto=self_excitation,
             hetero=hetero,
             function=function,
-            integrator_function=LeakyCompetingIntegrator,
+            integrator_function=None,
             initial_value=initial_value,
             noise=noise,
             clip=clip,
@@ -507,11 +522,6 @@ def __init__(self,
                           f"so its 'self_excitation' and 'competition' arguments will be ignored.")
             # MODIFIED 1/22/20 END
 
-        elif competition < 0:
-            warnings.warn(f"The 'competition' arg specified for {self.name} is a negative value ({competition}); "
-                          f"note that this will result in a matrix that has positive off-diagonal elements "
-                          f"since 'competition' is assumed to specify the magnitude of inhibition.")
-
     def _parse_threshold_args(self, kwargs):
         """Implements convenience arguments threshold and threshold_criterion
 
diff --git a/psyneulink/library/components/mechanisms/processing/transfer/recurrenttransfermechanism.py b/psyneulink/library/components/mechanisms/processing/transfer/recurrenttransfermechanism.py
index da1900b2d38..760b7e76cdd 100644
--- a/psyneulink/library/components/mechanisms/processing/transfer/recurrenttransfermechanism.py
+++ b/psyneulink/library/components/mechanisms/processing/transfer/recurrenttransfermechanism.py
@@ -193,7 +193,6 @@
 from psyneulink.core.components.functions.function import Function, get_matrix, is_function_type
 from psyneulink.core.components.functions.learningfunctions import Hebbian
 from psyneulink.core.components.functions.objectivefunctions import Stability
-from psyneulink.core.components.functions.transferfunctions import Linear
 from psyneulink.core.components.functions.combinationfunctions import LinearCombination
 from psyneulink.core.components.functions.userdefinedfunction import UserDefinedFunction
 from psyneulink.core.components.mechanisms.modulatory.learning.learningmechanism import \
@@ -634,8 +633,15 @@ class Parameters(TransferMechanism.Parameters):
         )
         learning_rate = Parameter(None, setter=_recurrent_transfer_mechanism_learning_rate_setter)
         learning_condition = Parameter(None, stateful=False, loggable=False)
-        has_recurrent_input_port = Parameter(None, stateful=False, loggable=False)
+        has_recurrent_input_port = Parameter(False, stateful=False, loggable=False)
 
+        output_ports = Parameter(
+            [RESULT],
+            stateful=False,
+            loggable=False,
+            read_only=True,
+            structural=True,
+        )
 
     standard_output_ports = TransferMechanism.standard_output_ports.copy()
     standard_output_ports.extend([{NAME:ENERGY_OUTPUT_PORT_NAME}, {NAME:ENTROPY_OUTPUT_PORT_NAME}])
@@ -646,22 +652,22 @@ class Parameters(TransferMechanism.Parameters):
     def __init__(self,
                  default_variable=None,
                  size=None,
-                 input_ports:tc.optional(tc.any(list, dict)) = None,
-                 has_recurrent_input_port=False,
-                 combination_function:is_function_type=LinearCombination,
-                 function=Linear,
+                 input_ports:tc.optional(tc.optional(tc.any(list, dict))) = None,
+                 has_recurrent_input_port=None,
+                 combination_function: tc.optional(is_function_type) = None,
+                 function=None,
                  matrix=None,
                  auto=None,
                  hetero=None,
-                 integrator_mode=False,
+                 integrator_mode=None,
                  integrator_function=None,
                  initial_value=None,
-                 integration_rate: is_numeric_or_none=0.5,
-                 noise=0.0,
+                 integration_rate: is_numeric_or_none=None,
+                 noise=None,
                  clip=None,
-                 enable_learning:bool=False,
+                 enable_learning: tc.optional(bool) = None,
                  learning_rate:tc.optional(tc.any(parameter_spec, bool))=None,
-                 learning_function: tc.any(is_function_type) = Hebbian,
+                 learning_function: tc.optional(tc.any(is_function_type)) = None,
                  learning_condition:tc.optional(tc.any(Condition, TimeScale,
                                                        tc.enum(UPDATE, CONVERGENCE)))=None,
                  output_ports:tc.optional(tc.any(str, Iterable))=None,
@@ -671,13 +677,6 @@ def __init__(self,
                  **kwargs):
         """Instantiate RecurrentTransferMechanism
         """
-
-        # Default output_ports is specified in constructor as a string rather than a list
-        # to avoid "gotcha" associated with mutable default arguments
-        # (see: bit.ly/2uID3s3 and http://docs.python-guide.org/en/latest/writing/gotchas/)
-        if output_ports is None or output_ports == RESULT:
-            output_ports = [RESULT]
-
         if isinstance(hetero, (list, np.matrix)):
             hetero = np.array(hetero)
 
diff --git a/psyneulink/library/components/projections/pathway/autoassociativeprojection.py b/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
index c10bedfab89..ba33b102a08 100644
--- a/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
+++ b/psyneulink/library/components/projections/pathway/autoassociativeprojection.py
@@ -269,11 +269,11 @@ def __init__(self,
                  owner=None,
                  sender=None,
                  receiver=None,
-                 matrix=DEFAULT_MATRIX,
+                 matrix=None,
                  function=None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs
                  ):
 
diff --git a/psyneulink/library/components/projections/pathway/maskedmappingprojection.py b/psyneulink/library/components/projections/pathway/maskedmappingprojection.py
index 45aa2bf73de..4fc809f6cbb 100644
--- a/psyneulink/library/components/projections/pathway/maskedmappingprojection.py
+++ b/psyneulink/library/components/projections/pathway/maskedmappingprojection.py
@@ -179,13 +179,13 @@ def _validate_mask_operation(self, mode):
     def __init__(self,
                  sender=None,
                  receiver=None,
-                 matrix=DEFAULT_MATRIX,
+                 matrix=None,
                  mask:tc.optional(tc.any(int,float,list,np.ndarray,np.matrix))=None,
-                 mask_operation:tc.enum(ADD, MULTIPLY, EXPONENTIATE)=MULTIPLY,
+                 mask_operation: tc.optional(tc.enum(ADD, MULTIPLY, EXPONENTIATE)) = None,
                  function=None,
                  params=None,
                  name=None,
-                 prefs: is_pref_set = None,
+                 prefs: tc.optional(is_pref_set) = None,
                  **kwargs):
 
         super().__init__(
diff --git a/psyneulink/library/compositions/compositionrunner.py b/psyneulink/library/compositions/compositionrunner.py
index a1645b066ed..f5342907825 100644
--- a/psyneulink/library/compositions/compositionrunner.py
+++ b/psyneulink/library/compositions/compositionrunner.py
@@ -70,7 +70,7 @@ def _batch_inputs(self,
                     yield chunk
                 if call_after_minibatch:
                     call_after_minibatch()
-                
+
                 if not self._is_llvm_mode:
                     self._composition._update_learning_parameters(context)
             if (not self._is_llvm_mode and early_stopper is not None
diff --git a/psyneulink/library/compositions/regressioncfa.py b/psyneulink/library/compositions/regressioncfa.py
index 21bc71dbe56..41ea47f7917 100644
--- a/psyneulink/library/compositions/regressioncfa.py
+++ b/psyneulink/library/compositions/regressioncfa.py
@@ -246,7 +246,7 @@ class Parameters(CompositionFunctionApproximator.Parameters):
 
     def __init__(self,
                  name=None,
-                 update_weights=BayesGLM,
+                 update_weights=None,
                  prediction_terms:tc.optional(list)=None):
 
         self._instantiate_prediction_terms(prediction_terms)

From bdebed38012eb98521a9113425320d6172a67c43 Mon Sep 17 00:00:00 2001
From: Katherine Mantel <kmantel@princeton.edu>
Date: Fri, 12 Jun 2020 20:07:33 -0400
Subject: [PATCH 25/25] tests: add test to identify clobbering _user_specified
 assignments in __init__

---
 tests/components/test_general.py | 34 ++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/tests/components/test_general.py b/tests/components/test_general.py
index 7c93f75747e..9436fc28158 100644
--- a/tests/components/test_general.py
+++ b/tests/components/test_general.py
@@ -1,3 +1,4 @@
+import inspect
 import psyneulink as pnl
 import pytest
 
@@ -6,6 +7,7 @@
 
 # gather all Component classes (a set to ensure no duplicates)
 component_classes = []
+component_class_constructor_arguments = {}
 for item in pnl.__all__:
     evaled = eval(f'pnl.{item}')
 
@@ -14,6 +16,9 @@
         pnl.core.components.component.ComponentsMeta
     ):
         component_classes.append(evaled)
+        component_class_constructor_arguments[evaled] = inspect.signature(
+            evaled.__init__
+        ).parameters
 
 component_classes.sort(key=lambda x: x.__name__)
 
@@ -47,3 +52,32 @@ def test_function_parameters_stateless(class_):
         )
     except AttributeError:
         pass
+
+
+@pytest.mark.parametrize(
+    'class_',
+    component_classes
+)
+def test_parameters_user_specified(class_):
+    violators = set()
+    constructor_parameters = inspect.signature(class_.__init__).parameters
+    for name, param in constructor_parameters.items():
+        if (
+            param.kind in {
+                inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                inspect.Parameter.KEYWORD_ONLY
+            }
+            and name in class_.parameters.names()
+            and param.default is not inspect.Parameter.empty
+            and param.default is not None
+        ):
+            violators.add(name)
+
+    message = (
+        "If a value other than None is used as the default value in a class's"
+        + ' constructor/__init__, for an argument corresponding to a Parameter,'
+        + ' _user_specified will always be True. The default value should be'
+        + " specified in the class's Parameters inner class. Violators for"
+        + f' {class_.__name__}: {violators}'
+    )
+    assert violators == set(), message