update hz freq, add xbot updates

kscalelabs · Sep 25, 2024 · 343179d · 343179d
1 parent be131fe
commit 343179d
Show file tree

Hide file tree

Showing 14 changed files with 52 additions and 73 deletions.
diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -16,7 +16,7 @@
 from sim.envs.humanoids.h1_env import H1FreeEnv
 from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
 from sim.envs.humanoids.stompymini_env import MiniFreeEnv
-from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProCfgPPO
+from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProStandingCfg, StompyProCfgPPO
 from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
@@ -25,6 +25,7 @@
 task_registry = TaskRegistry()
 task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
 task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
+task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
 task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())

diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
@@ -48,6 +48,15 @@ def step(self, actions):
         Args:
             actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env)
         """
+        if self.cfg.env.use_ref_actions:
+            actions += self.ref_action
+        actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
+
+        # dynamic randomization
+        delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay
+        actions = (1 - delay) * actions + delay * self.actions
+        actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions
+
         clip_actions = self.cfg.normalization.clip_actions
         self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device)
         # step physics and render each frame
@@ -238,6 +247,9 @@ def _process_rigid_shape_props(self, props, env_id):
 
             for s in range(len(props)):
                 props[s].friction = self.friction_coeffs[env_id]
+
+            self.env_frictions[env_id] = self.friction_coeffs[env_id]
+
         return props
 
     def _process_dof_props(self, props, env_id):
@@ -263,6 +275,7 @@ def _process_dof_props(self, props, env_id):
                 self.dof_pos_limits[i, 1] = props["upper"][i].item() * self.cfg.safety.pos_limit
                 self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit
                 self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit
+
         return props
 
     def _process_rigid_body_props(self, props, env_id):
@@ -271,6 +284,9 @@ def _process_rigid_body_props(self, props, env_id):
             rng = self.cfg.domain_rand.added_mass_range
             props[0].mass += np.random.uniform(rng[0], rng[1])
 
+        for prop in props:
+            self.body_mass[env_id] += prop.mass
+
         return props
 
     def _post_physics_step_callback(self):

diff --git a/sim/envs/humanoids/dora_config.py b/sim/envs/humanoids/dora_config.py
@@ -126,7 +126,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -230,7 +232,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Dora"
+        experiment_name = "dora"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/dora_env.py b/sim/envs/humanoids/dora_env.py
@@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -309,7 +300,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/g1_config.py b/sim/envs/humanoids/g1_config.py
@@ -124,7 +124,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -228,7 +230,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "g1"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/g1_env.py b/sim/envs/humanoids/g1_env.py
@@ -186,15 +186,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -308,7 +299,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/h1_config.py b/sim/envs/humanoids/h1_config.py
@@ -128,7 +128,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -232,7 +234,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "h1"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/h1_env.py b/sim/envs/humanoids/h1_env.py
@@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -309,7 +300,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/stompymini_config.py b/sim/envs/humanoids/stompymini_config.py
@@ -128,7 +128,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.05
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)

diff --git a/sim/envs/humanoids/stompymini_env.py b/sim/envs/humanoids/stompymini_env.py
@@ -188,15 +188,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -310,7 +301,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/stompypro_config.py b/sim/envs/humanoids/stompypro_config.py
@@ -114,10 +114,10 @@ class control(LeggedRobotCfg.control):
         # action scale: target angle = actionScale * action + defaultAngle
         action_scale = 0.25
         # decimation: Number of control action updates @ sim DT per policy DT
-        decimation = 10  # 100hz
+        decimation = 20  # 100hz
 
     class sim(LeggedRobotCfg.sim):
-        dt = 0.002  # 1000 Hz
+        dt = 0.001  # 1000 Hz
         substeps = 1  # 2
         up_axis = 1  # 0 is y, 1 is z
 
@@ -145,7 +145,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.3  # 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -181,7 +183,7 @@ class scales:
             joint_pos = 1.6  # 1.6
             feet_clearance = 1.5  # 1.0
             feet_contact_number = 2.1  # 1.2
-            # # gait
+            # gait
             feet_air_time = 2.5  # 1.0
             foot_slip = -0.05
             feet_distance = 0.2  # 0.2

diff --git a/sim/envs/humanoids/stompypro_env.py b/sim/envs/humanoids/stompypro_env.py
@@ -190,14 +190,9 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
+    # def _compute_torques(self, actions):
+    #     # Override the default torque computation so that the actions are interpreted as torques directly.
+    #     return torch.clip(actions, -self.torque_limits, self.torque_limits)
 
     def compute_observations(self):
         phase = self._get_phase()
@@ -311,7 +306,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/xbot_config.py b/sim/envs/humanoids/xbot_config.py
@@ -125,7 +125,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.05
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -227,7 +229,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "g1"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/xbot_env.py b/sim/envs/humanoids/xbot_env.py
@@ -188,15 +188,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -310,7 +301,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)