Merge branch 'master' of https://github.com/kscalelabs/sim into quadr…

…uped
kscalelabs · Sep 26, 2024 · af86a44 · af86a44
2 parents d6ef15a + 67f40d1
commit af86a44
Show file tree

Hide file tree

Showing 39 changed files with 1,946 additions and 153 deletions.
diff --git a/README.md b/README.md
@@ -66,21 +66,18 @@ make install-third-party-external
 ```
 
 ### Running experiments
-1. Run training with the following command:
+1. Run walking training with the following command:
 ```bash
 python sim/train.py --task=stompymini --num_envs=4096 --headless
 ```
-or for full body:
-```bash
-python sim/train.py --task=stompymini --num_envs=4096 --headless
+or standing policy:
 ```
-
-3. Run evaluation with the following command:
+python sim/train.py --task=stompymini_standing --num_envs=4096 --headless
+```
+2. Run evaluation with the following command:
 ```bash
 python sim/play.py --task stompymini --sim_device cpu
-
 ```
-See [this doc](https://docs.google.com/document/d/1YZzBqIXO7oq7vIKu-BZr5ssNsi3nKtxpRPnfSnTXojA/edit?usp=sharing) for more beginner tips.
 
 ### Contributing
 See the [contributing guide](CONTRIBUTING.md) to get started.

diff --git a/examples/standing.onnx b/examples/standing.onnx
diff --git a/examples/standing.pt b/examples/standing.pt
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,7 @@ module = [
 [tool.isort]
 
 profile = "black"
+known_third_party = ["wandb"]
 
 [tool.ruff]
 

diff --git a/sim/env.py b/sim/env.py
@@ -9,6 +9,8 @@
 
 
 def model_dir(robotname: str) -> Path:
+    print(os.getcwd())
+
     return Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname))
 
 

diff --git a/sim/envs/__init__.py b/sim/envs/__init__.py
@@ -18,7 +18,7 @@
 from sim.envs.humanoids.quadruped_env import QuadrupedFreeEnv
 from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
 from sim.envs.humanoids.stompymini_env import MiniFreeEnv
-from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProCfgPPO
+from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProStandingCfg, StompyProCfgPPO
 from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
 from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
 from sim.envs.humanoids.xbot_env import XBotLFreeEnv
@@ -27,6 +27,7 @@
 task_registry = TaskRegistry()
 task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
 task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
+task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
 task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
 task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
 task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())

diff --git a/sim/envs/base/__init__.py b/sim/envs/base/__init__.py
diff --git a/sim/envs/base/base_config.py b/sim/envs/base/base_config.py
diff --git a/sim/envs/base/base_task.py b/sim/envs/base/base_task.py
diff --git a/sim/envs/base/legged_robot.py b/sim/envs/base/legged_robot.py
@@ -48,6 +48,15 @@ def step(self, actions):
         Args:
             actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env)
         """
+        if self.cfg.env.use_ref_actions:
+            actions += self.ref_action
+        actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)
+
+        # dynamic randomization
+        delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay
+        actions = (1 - delay) * actions + delay * self.actions
+        actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions
+
         clip_actions = self.cfg.normalization.clip_actions
         self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device)
         # step physics and render each frame
@@ -238,6 +247,9 @@ def _process_rigid_shape_props(self, props, env_id):
 
             for s in range(len(props)):
                 props[s].friction = self.friction_coeffs[env_id]
+
+            self.env_frictions[env_id] = self.friction_coeffs[env_id]
+
         return props
 
     def _process_dof_props(self, props, env_id):
@@ -263,6 +275,7 @@ def _process_dof_props(self, props, env_id):
                 self.dof_pos_limits[i, 1] = props["upper"][i].item() * self.cfg.safety.pos_limit
                 self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit
                 self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit
+
         return props
 
     def _process_rigid_body_props(self, props, env_id):
@@ -271,6 +284,9 @@ def _process_rigid_body_props(self, props, env_id):
             rng = self.cfg.domain_rand.added_mass_range
             props[0].mass += np.random.uniform(rng[0], rng[1])
 
+        for prop in props:
+            self.body_mass[env_id] += prop.mass
+
         return props
 
     def _post_physics_step_callback(self):
@@ -565,6 +581,7 @@ def _prepare_reward_function(self):
         """Prepares a list of reward functions, which will be called to compute the total reward.
         Looks for self._reward_<REWARD_NAME>, where <REWARD_NAME> are names of all non zero reward scales in the cfg.
         """
+
         # remove zero scales + multiply non-zero ones by dt
         for key in list(self.reward_scales.keys()):
             scale = self.reward_scales[key]

diff --git a/sim/envs/base/legged_robot_config.py b/sim/envs/base/legged_robot_config.py
@@ -29,6 +29,8 @@
 #
 # Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.
 
+from enum import Enum
+
 from sim.envs.base.base_config import BaseConfig
 
 

diff --git a/sim/envs/humanoids/__init__.py b/sim/envs/humanoids/__init__.py
diff --git a/sim/envs/humanoids/dora_config.py b/sim/envs/humanoids/dora_config.py
@@ -1,4 +1,4 @@
-"""Defines the environment configuration for the Getting up task"""
+"""Defines the environment configuration for the walking task"""
 
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
@@ -32,6 +32,7 @@ class safety:
         pos_limit = 1.0
         vel_limit = 1.0
         torque_limit = 0.85
+        terminate_after_contacts_on = []
 
     class asset(LeggedRobotCfg.asset):
         name = "dora"
@@ -42,7 +43,6 @@ class asset(LeggedRobotCfg.asset):
 
         termination_height = 0.5
         default_feet_height = 0.0
-        terminate_after_contacts_on = []
 
         penalize_contacts_on = []
         self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
@@ -126,7 +126,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -230,7 +232,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Dora"
+        experiment_name = "dora"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/dora_env.py b/sim/envs/humanoids/dora_env.py
@@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -309,7 +300,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/g1_config.py b/sim/envs/humanoids/g1_config.py
@@ -1,4 +1,4 @@
-"""Defines the environment configuration for the Getting up task"""
+"""Defines the environment configuration for the walking task"""
 
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
@@ -27,7 +27,7 @@ class env(LeggedRobotCfg.env):
         episode_length_s = 24  # episode length in seconds
         use_ref_actions = False
 
-    class safety:
+    class safety(LeggedRobotCfg.safety):
         # safety factors
         pos_limit = 1.0
         vel_limit = 1.0
@@ -42,7 +42,6 @@ class asset(LeggedRobotCfg.asset):
 
         termination_height = 0.55
         default_feet_height = 0.0
-        terminate_after_contacts_on = []
 
         penalize_contacts_on = []
         self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
@@ -125,7 +124,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -229,7 +230,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "g1"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/g1_env.py b/sim/envs/humanoids/g1_env.py
@@ -186,15 +186,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -308,7 +299,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)

diff --git a/sim/envs/humanoids/h1_config.py b/sim/envs/humanoids/h1_config.py
@@ -1,4 +1,4 @@
-"""Defines the environment configuration for the Getting up task"""
+"""Defines the environment configuration for the walking task"""
 
 from sim.env import robot_urdf_path
 from sim.envs.base.legged_robot_config import (  # type: ignore
@@ -34,6 +34,7 @@ class safety:
         pos_limit = 1.0
         vel_limit = 1.0
         torque_limit = 0.85
+        terminate_after_contacts_on = []
 
     class asset(LeggedRobotCfg.asset):
         name = "h1_2"
@@ -45,7 +46,6 @@ class asset(LeggedRobotCfg.asset):
         termination_height = 0.35
         default_feet_height = 0.0
         terminate_after_contacts_on = []
-
         penalize_contacts_on = []
         self_collisions = 1  # 1 to disable, 0 to enable...bitwise filter
         flip_visual_attachments = False
@@ -128,7 +128,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
         push_interval_s = 4
         max_push_vel_xy = 0.2
         max_push_ang_vel = 0.4
-        dynamic_randomization = 0.02
+        # dynamic randomization
+        action_delay = 0.5
+        action_noise = 0.02
 
     class commands(LeggedRobotCfg.commands):
         # Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
@@ -232,7 +234,7 @@ class runner:
 
         # logging
         save_interval = 100  # check for potential saves every this many iterations
-        experiment_name = "Full"
+        experiment_name = "h1"
         run_name = ""
         # load and resume
         resume = False

diff --git a/sim/envs/humanoids/h1_env.py b/sim/envs/humanoids/h1_env.py
@@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
         )  # euler x,y
         return noise_vec
 
-    def step(self, actions):
-        if self.cfg.env.use_ref_actions:
-            actions += self.ref_action
-        # dynamic randomization
-        delay = torch.rand((self.num_envs, 1), device=self.device)
-        actions = (1 - delay) * actions + delay * self.actions
-        actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
-        return super().step(actions)
-
     def compute_observations(self):
         phase = self._get_phase()
         self.compute_ref_state()
@@ -309,7 +300,7 @@ def _reward_foot_slip(self):
         with the ground. The speed of the foot is calculated and scaled by the contact condition.
         """
         contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
-        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
+        foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
         rew = torch.sqrt(foot_speed_norm)
         rew *= contact
         return torch.sum(rew, dim=1)