Increase training time and numer of samples to compare in the PC perf…

…ormance test.
HumanCompatibleAI · Oct 11, 2023 · b1b1f13 · b1b1f13
1 parent 5552245
commit b1b1f13
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/tests/algorithms/test_preference_comparisons.py b/tests/algorithms/test_preference_comparisons.py
@@ -1068,7 +1068,7 @@ def test_that_trainer_improves(
     novice_agent_rewards, _ = evaluation.evaluate_policy(
         agent_trainer.algorithm.policy,
         action_is_reward_venv,
-        25,
+        50,
         return_episode_rewards=True,
     )
 
@@ -1077,7 +1077,7 @@ def test_that_trainer_improves(
     # after this training, and thus `later_rewards` should have lower loss.
     first_reward_network_stats = main_trainer.train(20, 20)
 
-    later_reward_network_stats = main_trainer.train(50, 20)
+    later_reward_network_stats = main_trainer.train(100, 40)
     assert (
         first_reward_network_stats["reward_loss"]
         > later_reward_network_stats["reward_loss"]
@@ -1087,7 +1087,7 @@ def test_that_trainer_improves(
     trained_agent_rewards, _ = evaluation.evaluate_policy(
         agent_trainer.algorithm.policy,
         action_is_reward_venv,
-        25,
+        50,
         return_episode_rewards=True,
     )