Skip to content

Commit

Permalink
commit stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
codekansas committed Apr 2, 2024
1 parent fa55377 commit f8e1655
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 65 deletions.
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,13 @@ all:
# Train #
# ------------------------ #

train-one:
train-one-vis:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1

train:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 1024
train-many-vis:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 16

train-full:
train:
@python -m sim.humanoid_gym.train --task humanoid_ppo --run_name v1 --num_envs 2048 --headless

play:
Expand Down
58 changes: 32 additions & 26 deletions sim/humanoid_gym/envs/humanoid_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class env(LeggedRobotCfg.env): # noqa: N801
num_privileged_obs = int(c_frame_stack * single_num_privileged_obs)
num_actions = NUM_JOINTS # Torque command for each joint.
num_envs = 4096
episode_length_s = 24 # Maximum episode length
episode_length_s = 8 # Maximum episode length
use_ref_actions = False

class safety: # noqa: N801
Expand Down Expand Up @@ -53,20 +53,20 @@ class terrain(LeggedRobotCfg.terrain): # noqa: N801
curriculum = False
# For rough terrain only:
measure_heights = False
static_friction = 0.6
dynamic_friction = 0.6
static_friction = 1.0
dynamic_friction = 1.0
terrain_length = 8.0
terrain_width = 8.0
num_rows = 20 # Number of terrain rows (levels)
num_cols = 20 # Number of terrain cols (types)
max_init_terrain_level = 10 # Starting curriculum state
# plane; obstacles; uniform; slope_up; slope_down, stair_up, stair_down
terrain_proportions = [0.2, 0.2, 0.4, 0.1, 0.1, 0, 0]
restitution = 0.0
restitution = 1.0

class noise: # noqa: N801
add_noise = True
noise_level = 0.6 # Scales other values
noise_level = 0.1 # Scales other values

class noise_scales: # noqa: N801
dof_pos = 0.05
Expand All @@ -82,18 +82,18 @@ class init_state(LeggedRobotCfg.init_state): # noqa: N801

class sim(LeggedRobotCfg.sim): # noqa: N801
dt = 0.005
substeps = 2
substeps = 1

class physx(LeggedRobotCfg.sim.physx): # noqa: N801
num_threads = 10
solver_type = 1
num_position_iterations = 4
num_velocity_iterations = 1
contact_offset = 0.01
rest_offset = -0.02
contact_offset = -0.01
rest_offset = -0.015
bounce_threshold_velocity = 0.5
max_depenetration_velocity = 1.0
max_gpu_contact_pairs = 2**24
max_depenetration_velocity = 10.0
max_gpu_contact_pairs = 2**23
default_buffer_size_multiplier = 5
contact_collection = 2 # gymapi.CC_ALL_SUBSTEPS

Expand All @@ -113,7 +113,7 @@ class commands(LeggedRobotCfg.commands): # noqa: N801
resampling_time = 8.0

class rewards: # noqa: N801
base_height_target = 1.1
# base_height_target = 1.1
min_dist = 0.2
max_dist = 0.5

Expand All @@ -122,8 +122,8 @@ class rewards: # noqa: N801
target_feet_height = 0.06 # m
cycle_time = 0.64 # sec

# If truem negative total rewards are clipped at zero.
only_positive_rewards = True
# If true, negative total rewards are clipped at zero.
only_positive_rewards = False

# Max contact force should be a bit above the weight of the robot. In
# our case the robot weighs 62 Kg, so we set it to 700.
Expand Down Expand Up @@ -153,13 +153,18 @@ class scales: # noqa: N801

# Base position
base_height = 1.0
base_acc = 0.1
# base_acc = 0.1
base_acc = 0.0

# Energy
torques = -1e-5
dof_vel = -5e-4
dof_acc = -1e-7
collision = -1e-2
# torques = -1e-6
# dof_vel = -5e-5
# dof_acc = -1e-8
# collision = -1e-2
torques = 0.0
dof_vel = 0.0
dof_acc = 0.0
collision = 0.0

class normalization: # noqa: N801
class obs_scales: # noqa: N801
Expand All @@ -170,8 +175,8 @@ class obs_scales: # noqa: N801
quat = 1.0
height_measurements = 5.0

clip_observations = 18.0
clip_actions = 18.0
clip_observations = 100.0
clip_actions = 100.0


class StompyPPO(LeggedRobotCfgPPO):
Expand All @@ -180,16 +185,17 @@ class StompyPPO(LeggedRobotCfgPPO):

class policy: # noqa: N801
init_noise_std = 1.0
actor_hidden_dims = [512, 256, 128]
critic_hidden_dims = [768, 256, 128]
actor_hidden_dims = [512, 512, 256]
critic_hidden_dims = [1024, 512, 256]

class algorithm(LeggedRobotCfgPPO.algorithm): # noqa: N801
entropy_coef = 0.001
learning_rate = 1e-5
entropy_coef = 0.01
learning_rate = 3e-4
num_learning_epochs = 2
gamma = 0.994
lam = 0.9
gamma = 0.99
lam = 0.95
num_mini_batches = 4
max_grad_norm = 0.5

class runner: # noqa: N801
policy_class_name = "ActorCritic"
Expand Down
72 changes: 42 additions & 30 deletions sim/humanoid_gym/envs/humanoid_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import torch
from humanoid.envs import LeggedRobot
from humanoid.utils.terrain import HumanoidTerrain
from isaacgym import gymtorch
from torch import Tensor

from sim.humanoid_gym.envs.humanoid_config import StompyCfg
Expand Down Expand Up @@ -78,31 +79,19 @@ def __init__(
self.compute_observations()

def _push_robots(self) -> None:
# """Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
# max_vel = self.cfg.domain_rand.max_push_vel_xy
# max_push_angular = self.cfg.domain_rand.max_push_ang_vel

# # Linear velocity in the X / Y axes.
# self.rand_push_force[:, :2] = torch_rand_float(
# -max_vel,
# max_vel,
# (self.num_envs, 2),
# device=self.device,
# )
# self.root_states[:, 7:9] = self.rand_push_force[:, :2]

# # Random torques in all three axes.
# self.rand_push_torque = torch_rand_float(
# -max_push_angular,
# max_push_angular,
# (self.num_envs, 3),
# device=self.device,
# )
# self.root_states[:, 10:13] = self.rand_push_torque

# self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))

pass
"""Random pushes the robots. Emulates an impulse by setting a randomized base velocity."""
max_vel = self.cfg.domain_rand.max_push_vel_xy
max_push_angular = self.cfg.domain_rand.max_push_ang_vel

# Random forces in the X and Y axes.
self.rand_push_force[:, :2] = (torch.rand((self.num_envs, 2), device=self.device) * 2 - 1) * max_vel
self.root_states[:, 7:9] = self.rand_push_force[:, :2]

# Random torques in all three axes.
self.rand_push_torque = (torch.rand((self.num_envs, 3), device=self.device) * 2 - 1) * max_push_angular
self.root_states[:, 10:13] = self.rand_push_torque

self.gym.set_actor_root_state_tensor(self.sim, gymtorch.unwrap_tensor(self.root_states))

def create_sim(self) -> None:
"""Creates simulation, terrain and evironments."""
Expand Down Expand Up @@ -219,19 +208,42 @@ def reset_idx(self, env_ids: Tensor) -> None:
for i in range(self.critic_history.maxlen):
self.critic_history[i][env_ids] *= 0

def _compute_torques(self, actions: Tensor) -> Tensor:
# Need to override this so we can just use the motor torques directly.
# return torch.clip(actions * self.cfg.control.action_scale, -self.torque_limits, self.torque_limits)
return torch.tanh(actions * self.cfg.control.action_scale) * self.torque_limits

def check_termination(self) -> None:
self.reset_buf = self.episode_length_buf > self.max_episode_length

def _reset_dofs(self, env_ids: Tensor) -> Tensor:
# Resets the DOF positions to random positions within their limits.
min_pos, max_pos = self.dof_pos_limits.unbind(1)
rand_pos = torch.rand((len(env_ids), self.num_dof), device=self.device)
self.dof_pos[env_ids] = min_pos[None, :] + rand_pos * (max_pos - min_pos)[None, :]

self.dof_vel[env_ids] = 0.0

env_ids_int32 = env_ids.to(dtype=torch.int32)
self.gym.set_dof_state_tensor_indexed(
self.sim,
gymtorch.unwrap_tensor(self.dof_state),
gymtorch.unwrap_tensor(env_ids_int32),
len(env_ids_int32),
)

def _reward_base_height(self) -> Tensor:
"""Calculates the reward based on the robot's base height.
This rewards the robot for being close to the target height without
going over (we actually penalise it for going over).
This rewards the robot for keeping the feet as far below the base as
possible.
Returns:
The reward for maximizing the base height.
"""
base_height = self.root_states[:, 2]
reward = base_height / self.cfg.rewards.base_height_target
reward[reward > 1.0] = 0.0
return reward
max_foot_height = self.rigid_state[:, self.feet_indices, 2].max(dim=1).values
return base_height - max_foot_height

def _reward_base_acc(self) -> Tensor:
"""Computes the reward based on the base's acceleration.
Expand Down
11 changes: 6 additions & 5 deletions sim/scripts/drop_urdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,11 @@ def load_gym() -> GymParams:

sim_params.physx.solver_type = 1
sim_params.physx.num_position_iterations = 4
sim_params.physx.num_velocity_iterations = 0
sim_params.physx.contact_offset = 0.01
sim_params.physx.rest_offset = -0.02
sim_params.physx.num_velocity_iterations = 1
sim_params.physx.contact_offset = -0.01
sim_params.physx.rest_offset = -0.015
sim_params.physx.bounce_threshold_velocity = 0.5
sim_params.physx.max_depenetration_velocity = 1.0
sim_params.physx.max_depenetration_velocity = 10.0
sim_params.physx.max_gpu_contact_pairs = 2**24
sim_params.physx.default_buffer_size_multiplier = 5
sim_params.physx.contact_collection = gymapi.CC_ALL_SUBSTEPS
Expand Down Expand Up @@ -99,6 +99,7 @@ def load_gym() -> GymParams:

# Add ground plane.
plane_params = gymapi.PlaneParams()
plane_params.restitution = 1.0
gym.add_ground(sim, plane_params)

# Set up the environment grid.
Expand Down Expand Up @@ -152,7 +153,7 @@ def run_gym(gym: GymParams, mode: Literal["one_at_a_time", "all_at_once"] = "all
body_ids: List[str] = gym.gym.get_actor_rigid_body_names(gym.env, gym.robot)

joint_id = 0
effort = 10.0
effort = 5.0

while not gym.gym.query_viewer_has_closed(gym.viewer):
gym.gym.simulate(gym.sim)
Expand Down

0 comments on commit f8e1655

Please sign in to comment.