Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/kscalelabs/sim into quadr…
Browse files Browse the repository at this point in the history
…uped
  • Loading branch information
aaronxie0000 committed Sep 26, 2024
2 parents d6ef15a + 67f40d1 commit af86a44
Show file tree
Hide file tree
Showing 39 changed files with 1,946 additions and 153 deletions.
13 changes: 5 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,21 +66,18 @@ make install-third-party-external
```

### Running experiments
1. Run training with the following command:
1. Run walking training with the following command:
```bash
python sim/train.py --task=stompymini --num_envs=4096 --headless
```
or for full body:
```bash
python sim/train.py --task=stompymini --num_envs=4096 --headless
or standing policy:
```

3. Run evaluation with the following command:
python sim/train.py --task=stompymini_standing --num_envs=4096 --headless
```
2. Run evaluation with the following command:
```bash
python sim/play.py --task stompymini --sim_device cpu

```
See [this doc](https://docs.google.com/document/d/1YZzBqIXO7oq7vIKu-BZr5ssNsi3nKtxpRPnfSnTXojA/edit?usp=sharing) for more beginner tips.

### Contributing
See the [contributing guide](CONTRIBUTING.md) to get started.
Expand Down
Binary file added examples/standing.onnx
Binary file not shown.
Binary file added examples/standing.pt
Binary file not shown.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ module = [
[tool.isort]

profile = "black"
known_third_party = ["wandb"]

[tool.ruff]

Expand Down
2 changes: 2 additions & 0 deletions sim/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@


def model_dir(robotname: str) -> Path:
print(os.getcwd())

return Path(os.environ.get("MODEL_DIR", "sim/resources/" + robotname))


Expand Down
3 changes: 2 additions & 1 deletion sim/envs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from sim.envs.humanoids.quadruped_env import QuadrupedFreeEnv
from sim.envs.humanoids.stompymini_config import MiniCfg, MiniCfgPPO
from sim.envs.humanoids.stompymini_env import MiniFreeEnv
from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProCfgPPO
from sim.envs.humanoids.stompypro_config import StompyProCfg, StompyProStandingCfg, StompyProCfgPPO
from sim.envs.humanoids.stompypro_env import StompyProFreeEnv
from sim.envs.humanoids.xbot_config import XBotCfg, XBotCfgPPO
from sim.envs.humanoids.xbot_env import XBotLFreeEnv
Expand All @@ -27,6 +27,7 @@
task_registry = TaskRegistry()
task_registry.register("stompymini", MiniFreeEnv, MiniCfg(), MiniCfgPPO())
task_registry.register("stompypro", StompyProFreeEnv, StompyProCfg(), StompyProCfgPPO())
task_registry.register("stompypro_standing", StompyProFreeEnv, StompyProStandingCfg(), StompyProCfgPPO())
task_registry.register("dora", DoraFreeEnv, DoraCfg(), DoraCfgPPO())
task_registry.register("h1", H1FreeEnv, H1Cfg(), H1CfgPPO())
task_registry.register("g1", G1FreeEnv, G1Cfg(), G1CfgPPO())
Expand Down
Empty file added sim/envs/base/__init__.py
Empty file.
Empty file modified sim/envs/base/base_config.py
100755 → 100644
Empty file.
Empty file modified sim/envs/base/base_task.py
100755 → 100644
Empty file.
17 changes: 17 additions & 0 deletions sim/envs/base/legged_robot.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,15 @@ def step(self, actions):
Args:
actions (torch.Tensor): Tensor of shape (num_envs, num_actions_per_env)
"""
if self.cfg.env.use_ref_actions:
actions += self.ref_action
actions = torch.clip(actions, -self.cfg.normalization.clip_actions, self.cfg.normalization.clip_actions)

# dynamic randomization
delay = torch.rand((self.num_envs, 1), device=self.device) * self.cfg.domain_rand.action_delay
actions = (1 - delay) * actions + delay * self.actions
actions += self.cfg.domain_rand.action_noise * torch.randn_like(actions) * actions

clip_actions = self.cfg.normalization.clip_actions
self.actions = torch.clip(actions, -clip_actions, clip_actions).to(self.device)
# step physics and render each frame
Expand Down Expand Up @@ -238,6 +247,9 @@ def _process_rigid_shape_props(self, props, env_id):

for s in range(len(props)):
props[s].friction = self.friction_coeffs[env_id]

self.env_frictions[env_id] = self.friction_coeffs[env_id]

return props

def _process_dof_props(self, props, env_id):
Expand All @@ -263,6 +275,7 @@ def _process_dof_props(self, props, env_id):
self.dof_pos_limits[i, 1] = props["upper"][i].item() * self.cfg.safety.pos_limit
self.dof_vel_limits[i] = props["velocity"][i].item() * self.cfg.safety.vel_limit
self.torque_limits[i] = props["effort"][i].item() * self.cfg.safety.torque_limit

return props

def _process_rigid_body_props(self, props, env_id):
Expand All @@ -271,6 +284,9 @@ def _process_rigid_body_props(self, props, env_id):
rng = self.cfg.domain_rand.added_mass_range
props[0].mass += np.random.uniform(rng[0], rng[1])

for prop in props:
self.body_mass[env_id] += prop.mass

return props

def _post_physics_step_callback(self):
Expand Down Expand Up @@ -565,6 +581,7 @@ def _prepare_reward_function(self):
"""Prepares a list of reward functions, which will be called to compute the total reward.
Looks for self._reward_<REWARD_NAME>, where <REWARD_NAME> are names of all non zero reward scales in the cfg.
"""

# remove zero scales + multiply non-zero ones by dt
for key in list(self.reward_scales.keys()):
scale = self.reward_scales[key]
Expand Down
2 changes: 2 additions & 0 deletions sim/envs/base/legged_robot_config.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
#
# Copyright (c) 2024 Beijing RobotEra TECHNOLOGY CO.,LTD. All rights reserved.

from enum import Enum

from sim.envs.base.base_config import BaseConfig


Expand Down
Empty file added sim/envs/humanoids/__init__.py
Empty file.
10 changes: 6 additions & 4 deletions sim/envs/humanoids/dora_config.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Defines the environment configuration for the Getting up task"""
"""Defines the environment configuration for the walking task"""

from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
Expand Down Expand Up @@ -32,6 +32,7 @@ class safety:
pos_limit = 1.0
vel_limit = 1.0
torque_limit = 0.85
terminate_after_contacts_on = []

class asset(LeggedRobotCfg.asset):
name = "dora"
Expand All @@ -42,7 +43,6 @@ class asset(LeggedRobotCfg.asset):

termination_height = 0.5
default_feet_height = 0.0
terminate_after_contacts_on = []

penalize_contacts_on = []
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
Expand Down Expand Up @@ -126,7 +126,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
dynamic_randomization = 0.02
# dynamic randomization
action_delay = 0.5
action_noise = 0.02

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand Down Expand Up @@ -230,7 +232,7 @@ class runner:

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "Dora"
experiment_name = "dora"
run_name = ""
# load and resume
resume = False
Expand Down
11 changes: 1 addition & 10 deletions sim/envs/humanoids/dora_env.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
) # euler x,y
return noise_vec

def step(self, actions):
if self.cfg.env.use_ref_actions:
actions += self.ref_action
# dynamic randomization
delay = torch.rand((self.num_envs, 1), device=self.device)
actions = (1 - delay) * actions + delay * self.actions
actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
return super().step(actions)

def compute_observations(self):
phase = self._get_phase()
self.compute_ref_state()
Expand Down Expand Up @@ -309,7 +300,7 @@ def _reward_foot_slip(self):
with the ground. The speed of the foot is calculated and scaled by the contact condition.
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
rew = torch.sqrt(foot_speed_norm)
rew *= contact
return torch.sum(rew, dim=1)
Expand Down
11 changes: 6 additions & 5 deletions sim/envs/humanoids/g1_config.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Defines the environment configuration for the Getting up task"""
"""Defines the environment configuration for the walking task"""

from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
Expand Down Expand Up @@ -27,7 +27,7 @@ class env(LeggedRobotCfg.env):
episode_length_s = 24 # episode length in seconds
use_ref_actions = False

class safety:
class safety(LeggedRobotCfg.safety):
# safety factors
pos_limit = 1.0
vel_limit = 1.0
Expand All @@ -42,7 +42,6 @@ class asset(LeggedRobotCfg.asset):

termination_height = 0.55
default_feet_height = 0.0
terminate_after_contacts_on = []

penalize_contacts_on = []
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
Expand Down Expand Up @@ -125,7 +124,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
dynamic_randomization = 0.02
# dynamic randomization
action_delay = 0.5
action_noise = 0.02

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand Down Expand Up @@ -229,7 +230,7 @@ class runner:

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "Full"
experiment_name = "g1"
run_name = ""
# load and resume
resume = False
Expand Down
11 changes: 1 addition & 10 deletions sim/envs/humanoids/g1_env.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,6 @@ def _get_noise_scale_vec(self, cfg):
) # euler x,y
return noise_vec

def step(self, actions):
if self.cfg.env.use_ref_actions:
actions += self.ref_action
# dynamic randomization
delay = torch.rand((self.num_envs, 1), device=self.device)
actions = (1 - delay) * actions + delay * self.actions
actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
return super().step(actions)

def compute_observations(self):
phase = self._get_phase()
self.compute_ref_state()
Expand Down Expand Up @@ -308,7 +299,7 @@ def _reward_foot_slip(self):
with the ground. The speed of the foot is calculated and scaled by the contact condition.
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
rew = torch.sqrt(foot_speed_norm)
rew *= contact
return torch.sum(rew, dim=1)
Expand Down
10 changes: 6 additions & 4 deletions sim/envs/humanoids/h1_config.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Defines the environment configuration for the Getting up task"""
"""Defines the environment configuration for the walking task"""

from sim.env import robot_urdf_path
from sim.envs.base.legged_robot_config import ( # type: ignore
Expand Down Expand Up @@ -34,6 +34,7 @@ class safety:
pos_limit = 1.0
vel_limit = 1.0
torque_limit = 0.85
terminate_after_contacts_on = []

class asset(LeggedRobotCfg.asset):
name = "h1_2"
Expand All @@ -45,7 +46,6 @@ class asset(LeggedRobotCfg.asset):
termination_height = 0.35
default_feet_height = 0.0
terminate_after_contacts_on = []

penalize_contacts_on = []
self_collisions = 1 # 1 to disable, 0 to enable...bitwise filter
flip_visual_attachments = False
Expand Down Expand Up @@ -128,7 +128,9 @@ class domain_rand(LeggedRobotCfg.domain_rand):
push_interval_s = 4
max_push_vel_xy = 0.2
max_push_ang_vel = 0.4
dynamic_randomization = 0.02
# dynamic randomization
action_delay = 0.5
action_noise = 0.02

class commands(LeggedRobotCfg.commands):
# Vers: lin_vel_x, lin_vel_y, ang_vel_yaw, heading (in heading mode ang_vel_yaw is recomputed from heading error)
Expand Down Expand Up @@ -232,7 +234,7 @@ class runner:

# logging
save_interval = 100 # check for potential saves every this many iterations
experiment_name = "Full"
experiment_name = "h1"
run_name = ""
# load and resume
resume = False
Expand Down
11 changes: 1 addition & 10 deletions sim/envs/humanoids/h1_env.py
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,6 @@ def _get_noise_scale_vec(self, cfg):
) # euler x,y
return noise_vec

def step(self, actions):
if self.cfg.env.use_ref_actions:
actions += self.ref_action
# dynamic randomization
delay = torch.rand((self.num_envs, 1), device=self.device)
actions = (1 - delay) * actions + delay * self.actions
actions += self.cfg.domain_rand.dynamic_randomization * torch.randn_like(actions) * actions
return super().step(actions)

def compute_observations(self):
phase = self._get_phase()
self.compute_ref_state()
Expand Down Expand Up @@ -309,7 +300,7 @@ def _reward_foot_slip(self):
with the ground. The speed of the foot is calculated and scaled by the contact condition.
"""
contact = self.contact_forces[:, self.feet_indices, 2] > 5.0
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 10:12], dim=2)
foot_speed_norm = torch.norm(self.rigid_state[:, self.feet_indices, 7:9], dim=2)
rew = torch.sqrt(foot_speed_norm)
rew *= contact
return torch.sum(rew, dim=1)
Expand Down
Loading

0 comments on commit af86a44

Please sign in to comment.