diff --git a/cleanrl/sac_continuous_action.py b/cleanrl/sac_continuous_action.py index d28cb8e2..b200071a 100644 --- a/cleanrl/sac_continuous_action.py +++ b/cleanrl/sac_continuous_action.py @@ -284,7 +284,7 @@ def get_action(self, x): a_optimizer.zero_grad() alpha_loss.backward() a_optimizer.step() - alpha = log_alpha.exp().item() + self.alpha = log_alpha.exp().item() # update the target networks if global_step % args.target_network_frequency == 0: