commit stuff

kscalelabs · Apr 2, 2024 · f8e1655 · f8e1655
1 parent fa55377
commit f8e1655
Show file tree

Hide file tree

Showing 4 changed files with 84 additions and 65 deletions.
diff --git a/Makefile b/Makefile
@@ -26,13 +26,13 @@ all:
 #          Train           #
 # ------------------------ #
 
-train-one:
+train-one-vis:
 	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1
 
-train:
-	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024
+train-many-vis:
+	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 16
 
-train-full:
+train:
 	@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 2048 --headless
 
 play:

diff --git a/sim/humanoid_gym/envs/humanoid_config.py b/sim/humanoid_gym/envs/humanoid_config.py
@@ -21,7 +21,7 @@ class env(LeggedRobotCfg.env):  # noqa: N801
         num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
         num_actions = NUM_JOINTS  # Torque command for each joint.
         num_envs = 4096
-        episode_length_s = 24  # Maximum episode length
+        episode_length_s = 8  # Maximum episode length
         use_ref_actions = False
 
     class safety:  # noqa: N801
@@ -53,20 +53,20 @@ class terrain(LeggedRobotCfg.terrain):  # noqa: N801
         curriculum = False
         # For rough terrain only:
         measure_heights = False
-        static_friction = 0.6
-        dynamic_friction = 0.6
+        static_friction = 1.0
+        dynamic_friction = 1.0
         terrain_length = 8.0
         terrain_width = 8.0
         num_rows = 20  # Number of terrain rows (levels)
         num_cols = 20  # Number of terrain cols (types)
         max_init_terrain_level = 10  # Starting curriculum state
         # plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
         terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
-        restitution = 0.0
+        restitution = 1.0
 
     class noise:  # noqa: N801
         add_noise = True
-        noise_level = 0.6  # Scales other values
+        noise_level = 0.1  # Scales other values
 
         class noise_scales:  # noqa: N801
             dof_pos = 0.05
@@ -82,18 +82,18 @@ class init_state(LeggedRobotCfg.init_state):  # noqa: N801
 
     class sim(LeggedRobotCfg.sim):  # noqa: N801
         dt = 0.005
-        substeps = 2
+        substeps = 1
 
         class physx(LeggedRobotCfg.sim.physx):  # noqa: N801
             num_threads = 10
             solver_type = 1
             num_position_iterations = 4
             num_velocity_iterations = 1
-            contact_offset = 0.01
-            rest_offset = -0.02
+            contact_offset = -0.01
+            rest_offset = -0.015
             bounce_threshold_velocity = 0.5
-            max_depenetration_velocity = 1.0
-            max_gpu_contact_pairs = 2**24
+            max_depenetration_velocity = 10.0
+            max_gpu_contact_pairs = 2**23
             default_buffer_size_multiplier = 5
             contact_collection = 2  # gymapi.CC_ALL_SUBSTEPS
 
@@ -113,7 +113,7 @@ class commands(LeggedRobotCfg.commands):  # noqa: N801
         resampling_time = 8.0
 
     class rewards:  # noqa: N801
-        base_height_target = 1.1
+        # base_height_target = 1.1
         min_dist = 0.2
         max_dist = 0.5
 
@@ -122,8 +122,8 @@ class rewards:  # noqa: N801
         target_feet_height = 0.06  # m
         cycle_time = 0.64  # sec
 
-        # If truem negative total rewards are clipped at zero.
-        only_positive_rewards = True
+        # If true, negative total rewards are clipped at zero.
+        only_positive_rewards = False
 
         # Max contact force should be a bit above the weight of the robot. In
         # our case the robot weighs 62 Kg, so we set it to 700.
@@ -153,13 +153,18 @@ class scales:  # noqa: N801
 
             # Base position
             base_height = 1.0
-            base_acc = 0.1
+            # base_acc = 0.1
+            base_acc = 0.0
 
             # Energy
-            torques = -1e-5
-            dof_vel = -5e-4
-            dof_acc = -1e-7
-            collision = -1e-2
+            # torques = -1e-6
+            # dof_vel = -5e-5
+            # dof_acc = -1e-8
+            # collision = -1e-2
+            torques = 0.0
+            dof_vel = 0.0
+            dof_acc = 0.0
+            collision = 0.0
 
     class normalization:  # noqa: N801
         class obs_scales:  # noqa: N801
@@ -170,8 +175,8 @@ class obs_scales:  # noqa: N801
             quat = 1.0
             height_measurements = 5.0
 
-        clip_observations = 18.0
-        clip_actions = 18.0
+        clip_observations = 100.0
+        clip_actions = 100.0
 
 
 class StompyPPO(LeggedRobotCfgPPO):
@@ -180,16 +185,17 @@ class StompyPPO(LeggedRobotCfgPPO):
 
     class policy:  # noqa: N801
         init_noise_std = 1.0
-        actor_hidden_dims = [512, 256, 128]
-        critic_hidden_dims = [768, 256, 128]
+        actor_hidden_dims = [512, 512, 256]
+        critic_hidden_dims = [1024, 512, 256]
 
     class algorithm(LeggedRobotCfgPPO.algorithm):  # noqa: N801
-        entropy_coef = 0.001
-        learning_rate = 1e-5
+        entropy_coef = 0.01
+        learning_rate = 3e-4
         num_learning_epochs = 2
-        gamma = 0.994
-        lam = 0.9
+        gamma = 0.99
+        lam = 0.95
         num_mini_batches = 4
+        max_grad_norm = 0.5
 
     class runner:  # noqa: N801
         policy_class_name = "ActorCritic"

diff --git a/sim/humanoid_gym/envs/humanoid_env.py b/sim/humanoid_gym/envs/humanoid_env.py
@@ -6,6 +6,7 @@
 import torch
 from humanoid.envs import LeggedRobot
 from humanoid.utils.terrain import HumanoidTerrain
+from isaacgym import gymtorch
 from torch import Tensor
 
 from sim.humanoid_gym.envs.humanoid_config import StompyCfg
@@ -78,31 +79,19 @@ def __init__(
         self.compute_observations()
 
     def _push_robots(self) -> None:
-        # """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
-        # max_vel = self.cfg.domain_rand.max_push_vel_xy
-        # max_push_angular = self.cfg.domain_rand.max_push_ang_vel
-
-        # # Linear velocity in the X / Y axes.
-        # self.rand_push_force[:, :2] = torch_rand_float(
-        #     -max_vel,
-        #     max_vel,
-        #     (self.num_envs, 2),
-        #     device=self.device,
-        # )
-        # self.root_states[:, 7:9] = self.rand_push_force[:, :2]
-
-        # # Random torques in all three axes.
-        # self.rand_push_torque = torch_rand_float(
-        #     -max_push_angular,
-        #     max_push_angular,
-        #     (self.num_envs, 3),
-        #     device=self.device,
-        # )
-        # self.root_states[:, 10:13] = self.rand_push_torque
-
-        # self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
-
-        pass
+        """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
+        max_vel = self.cfg.domain_rand.max_push_vel_xy
+        max_push_angular = self.cfg.domain_rand.max_push_ang_vel
+
+        # Random forces in the X and Y axes.
+        self.rand_push_force[:, :2] = (torch.rand((self.num_envs, 2), device=self.device) * 2 - 1) * max_vel
+        self.root_states[:, 7:9] = self.rand_push_force[:, :2]
+
+        # Random torques in all three axes.
+        self.rand_push_torque = (torch.rand((self.num_envs, 3), device=self.device) * 2 - 1) * max_push_angular
+        self.root_states[:, 10:13] = self.rand_push_torque
+
+        self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))
 
     def create_sim(self) -> None:
         """Creates simulation, terrain and evironments."""
@@ -219,19 +208,42 @@ def reset_idx(self, env_ids: Tensor) -> None:
         for i in range(self.critic_history.maxlen):
             self.critic_history[i][env_ids] *= 0
 
+    def _compute_torques(self, actions: Tensor) -> Tensor:
+        # Need to override this so we can just use the motor torques directly.
+        # return torch.clip(actions * self.cfg.control.action_scale, -self.torque_limits, self.torque_limits)
+        return torch.tanh(actions * self.cfg.control.action_scale) * self.torque_limits
+
+    def check_termination(self) -> None:
+        self.reset_buf = self.episode_length_buf > self.max_episode_length
+
+    def _reset_dofs(self, env_ids: Tensor) -> Tensor:
+        # Resets the DOF positions to random positions within their limits.
+        min_pos, max_pos = self.dof_pos_limits.unbind(1)
+        rand_pos = torch.rand((len(env_ids), self.num_dof), device=self.device)
+        self.dof_pos[env_ids] = min_pos[None, :] + rand_pos * (max_pos - min_pos)[None, :]
+
+        self.dof_vel[env_ids] = 0.0
+
+        env_ids_int32 = env_ids.to(dtype=torch.int32)
+        self.gym.set_dof_state_tensor_indexed(
+            self.sim,
+            gymtorch.unwrap_tensor(self.dof_state),
+            gymtorch.unwrap_tensor(env_ids_int32),
+            len(env_ids_int32),
+        )
+
     def _reward_base_height(self) -> Tensor:
         """Calculates the reward based on the robot's base height.
 
-        This rewards the robot for being close to the target height without
-        going over (we actually penalise it for going over).
+        This rewards the robot for keeping the feet as far below the base as
+        possible.
 
         Returns:
             The reward for maximizing the base height.
         """
         base_height = self.root_states[:, 2]
-        reward = base_height / self.cfg.rewards.base_height_target
-        reward[reward > 1.0] = 0.0
-        return reward
+        max_foot_height = self.rigid_state[:, self.feet_indices, 2].max(dim=1).values
+        return base_height - max_foot_height
 
     def _reward_base_acc(self) -> Tensor:
         """Computes the reward based on the base's acceleration.

diff --git a/sim/scripts/drop_urdf.py b/sim/scripts/drop_urdf.py
@@ -66,11 +66,11 @@ def load_gym() -> GymParams:
 
     sim_params.physx.solver_type = 1
     sim_params.physx.num_position_iterations = 4
-    sim_params.physx.num_velocity_iterations = 0
-    sim_params.physx.contact_offset = 0.01
-    sim_params.physx.rest_offset = -0.02
+    sim_params.physx.num_velocity_iterations = 1
+    sim_params.physx.contact_offset = -0.01
+    sim_params.physx.rest_offset = -0.015
     sim_params.physx.bounce_threshold_velocity = 0.5
-    sim_params.physx.max_depenetration_velocity = 1.0
+    sim_params.physx.max_depenetration_velocity = 10.0
     sim_params.physx.max_gpu_contact_pairs = 2**24
     sim_params.physx.default_buffer_size_multiplier = 5
     sim_params.physx.contact_collection = gymapi.CC_ALL_SUBSTEPS
@@ -99,6 +99,7 @@ def load_gym() -> GymParams:
 
     # Add ground plane.
     plane_params = gymapi.PlaneParams()
+    plane_params.restitution = 1.0
     gym.add_ground(sim, plane_params)
 
     # Set up the environment grid.
@@ -152,7 +153,7 @@ def run_gym(gym: GymParams, mode: Literal["one_at_a_time", "all_at_once"] = "all
     body_ids: List[str] = gym.gym.get_actor_rigid_body_names(gym.env, gym.robot)
 
     joint_id = 0
-    effort = 10.0
+    effort = 5.0
 
     while not gym.gym.query_viewer_has_closed(gym.viewer):
         gym.gym.simulate(gym.sim)