Adds Simulator from OpenQuadruped/spot_mini_mini
This commit is contained in:
@@ -0,0 +1,275 @@
|
||||
""" This file implements the gym environment of SpotMicro with Bezier Curve.
|
||||
"""
|
||||
|
||||
import math
|
||||
import time
|
||||
import gym
|
||||
import numpy as np
|
||||
import pybullet
|
||||
import pybullet_data
|
||||
from gym import spaces
|
||||
from gym.utils import seeding
|
||||
from pkg_resources import parse_version
|
||||
import spot
|
||||
import pybullet_utils.bullet_client as bullet_client
|
||||
from gym.envs.registration import register
|
||||
from OpenLoopSM.SpotOL import BezierStepper
|
||||
from spot_gym_env import spotGymEnv
|
||||
import Kinematics.LieAlgebra as LA
|
||||
from spot_env_randomizer import SpotEnvRandomizer
|
||||
|
||||
SENSOR_NOISE_STDDEV = spot.SENSOR_NOISE_STDDEV
|
||||
|
||||
# Register as OpenAI Gym Environment
|
||||
register(
|
||||
id="SpotMicroEnv-v1",
|
||||
entry_point="spotmicro.GymEnvs.spot_bezier_env:spotBezierEnv",
|
||||
max_episode_steps=1000,
|
||||
)
|
||||
|
||||
|
||||
class spotBezierEnv(spotGymEnv):
|
||||
"""The gym environment for spot.
|
||||
|
||||
It simulates the locomotion of spot, a quadruped robot. The state space
|
||||
include the angles, velocities and torques for all the motors and the action
|
||||
space is the desired motor angle for each motor. The reward function is based
|
||||
on how far spot walks in 1000 steps and penalizes the energy
|
||||
expenditure.
|
||||
|
||||
"""
|
||||
|
||||
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
distance_weight=1.0,
|
||||
rotation_weight=0.0,
|
||||
energy_weight=0.000,
|
||||
shake_weight=0.00,
|
||||
drift_weight=0.0,
|
||||
rp_weight=10.0,
|
||||
rate_weight=0.03,
|
||||
urdf_root=pybullet_data.getDataPath(),
|
||||
urdf_version=None,
|
||||
distance_limit=float("inf"),
|
||||
observation_noise_stdev=SENSOR_NOISE_STDDEV,
|
||||
self_collision_enabled=True,
|
||||
motor_velocity_limit=np.inf,
|
||||
pd_control_enabled=False,
|
||||
leg_model_enabled=False,
|
||||
accurate_motor_model_enabled=False,
|
||||
remove_default_joint_damping=False,
|
||||
motor_kp=2.0,
|
||||
motor_kd=0.03,
|
||||
control_latency=0.0,
|
||||
pd_latency=0.0,
|
||||
torque_control_enabled=False,
|
||||
motor_overheat_protection=False,
|
||||
hard_reset=False,
|
||||
on_rack=False,
|
||||
render=True,
|
||||
num_steps_to_log=1000,
|
||||
action_repeat=1,
|
||||
control_time_step=None,
|
||||
env_randomizer=SpotEnvRandomizer(),
|
||||
forward_reward_cap=float("inf"),
|
||||
reflection=True,
|
||||
log_path=None,
|
||||
desired_velocity=0.5,
|
||||
desired_rate=0.0,
|
||||
lateral=False,
|
||||
draw_foot_path=False,
|
||||
height_field=False,
|
||||
AutoStepper=True,
|
||||
action_dim=14,
|
||||
contacts=True,
|
||||
):
|
||||
|
||||
super(spotBezierEnv, self).__init__(
|
||||
distance_weight=distance_weight,
|
||||
rotation_weight=rotation_weight,
|
||||
energy_weight=energy_weight,
|
||||
shake_weight=shake_weight,
|
||||
drift_weight=drift_weight,
|
||||
rp_weight=rp_weight,
|
||||
rate_weight=rate_weight,
|
||||
urdf_root=urdf_root,
|
||||
urdf_version=urdf_version,
|
||||
distance_limit=distance_limit,
|
||||
observation_noise_stdev=observation_noise_stdev,
|
||||
self_collision_enabled=self_collision_enabled,
|
||||
motor_velocity_limit=motor_velocity_limit,
|
||||
pd_control_enabled=pd_control_enabled,
|
||||
leg_model_enabled=leg_model_enabled,
|
||||
accurate_motor_model_enabled=accurate_motor_model_enabled,
|
||||
remove_default_joint_damping=remove_default_joint_damping,
|
||||
motor_kp=motor_kp,
|
||||
motor_kd=motor_kd,
|
||||
control_latency=control_latency,
|
||||
pd_latency=pd_latency,
|
||||
torque_control_enabled=torque_control_enabled,
|
||||
motor_overheat_protection=motor_overheat_protection,
|
||||
hard_reset=hard_reset,
|
||||
on_rack=on_rack,
|
||||
render=render,
|
||||
num_steps_to_log=num_steps_to_log,
|
||||
action_repeat=action_repeat,
|
||||
control_time_step=control_time_step,
|
||||
env_randomizer=env_randomizer,
|
||||
forward_reward_cap=forward_reward_cap,
|
||||
reflection=reflection,
|
||||
log_path=log_path,
|
||||
desired_velocity=desired_velocity,
|
||||
desired_rate=desired_rate,
|
||||
lateral=lateral,
|
||||
draw_foot_path=draw_foot_path,
|
||||
height_field=height_field,
|
||||
AutoStepper=AutoStepper,
|
||||
contacts=contacts,
|
||||
)
|
||||
|
||||
# Residuals + Clearance Height + Penetration Depth
|
||||
action_high = np.array([self._action_bound] * action_dim)
|
||||
self.action_space = spaces.Box(-action_high, action_high)
|
||||
print("Action SPACE: {}".format(self.action_space))
|
||||
|
||||
self.prev_pos = np.array([0.0, 0.0, 0.0])
|
||||
|
||||
self.yaw = 0.0
|
||||
|
||||
def pass_joint_angles(self, ja):
|
||||
"""For executing joint angles"""
|
||||
self.ja = ja
|
||||
|
||||
def step(self, action):
|
||||
"""Step forward the simulation, given the action.
|
||||
|
||||
Args:
|
||||
action: A list of desired motor angles for eight motors.
|
||||
smach: the bezier state machine containing simulated
|
||||
random controll inputs
|
||||
|
||||
Returns:
|
||||
observations: The angles, velocities and torques of all motors.
|
||||
reward: The reward for the current state-action pair.
|
||||
done: Whether the episode has ended.
|
||||
info: A dictionary that stores diagnostic information.
|
||||
|
||||
Raises:
|
||||
ValueError: The action dimension is not the same as the number of motors.
|
||||
ValueError: The magnitude of actions is out of bounds.
|
||||
"""
|
||||
# Discard all but joint angles
|
||||
action = self.ja
|
||||
|
||||
self._last_base_position = self.spot.GetBasePosition()
|
||||
self._last_base_orientation = self.spot.GetBaseOrientation()
|
||||
# print("ACTION:")
|
||||
# print(action)
|
||||
if self._is_render:
|
||||
# Sleep, otherwise the computation takes less time than real time,
|
||||
# which will make the visualization like a fast-forward video.
|
||||
time_spent = time.time() - self._last_frame_time
|
||||
self._last_frame_time = time.time()
|
||||
time_to_sleep = self.control_time_step - time_spent
|
||||
if time_to_sleep > 0:
|
||||
time.sleep(time_to_sleep)
|
||||
base_pos = self.spot.GetBasePosition()
|
||||
# Keep the previous orientation of the camera set by the user.
|
||||
[yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
|
||||
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
|
||||
|
||||
action = self._transform_action_to_motor_command(action)
|
||||
self.spot.Step(action)
|
||||
# NOTE: SMACH is passed to the reward method
|
||||
reward = self._reward()
|
||||
done = self._termination()
|
||||
self._env_step_counter += 1
|
||||
|
||||
# DRAW FOOT PATH
|
||||
if self.draw_foot_path:
|
||||
self.DrawFootPath()
|
||||
|
||||
return np.array(self._get_observation()), reward, done, {}
|
||||
|
||||
def return_state(self):
|
||||
return np.array(self._get_observation())
|
||||
|
||||
def return_yaw(self):
|
||||
return self.yaw
|
||||
|
||||
def _reward(self):
|
||||
# get observation
|
||||
obs = self._get_observation()
|
||||
|
||||
orn = self.spot.GetBaseOrientation()
|
||||
|
||||
# Return StepVelocity with the sign of StepLength
|
||||
DesiredVelicty = math.copysign(
|
||||
self.spot.StepVelocity / 4.0, self.spot.StepLength
|
||||
)
|
||||
|
||||
fwd_speed = self.spot.prev_lin_twist[0] # vx
|
||||
lat_speed = self.spot.prev_lin_twist[1] # vy
|
||||
|
||||
# DEBUG
|
||||
lt, at = self.spot.GetBaseTwist()
|
||||
|
||||
# ONLY WORKS FOR MOVING PURELY FORWARD
|
||||
pos = self.spot.GetBasePosition()
|
||||
|
||||
forward_reward = pos[0] - self.prev_pos[0]
|
||||
|
||||
# yaw_rate = obs[4]
|
||||
|
||||
rot_reward = 0.0
|
||||
|
||||
roll, pitch, yaw = self._pybullet_client.getEulerFromQuaternion(
|
||||
[orn[0], orn[1], orn[2], orn[3]]
|
||||
)
|
||||
|
||||
# if yaw < 0.0:
|
||||
# yaw += np.pi
|
||||
# else:
|
||||
# yaw -= np.pi
|
||||
|
||||
# For auto correct
|
||||
self.yaw = yaw
|
||||
|
||||
# penalty for nonzero PITCH and YAW(hidden) ONLY
|
||||
# NOTE: Added Yaw mult
|
||||
rp_reward = -(abs(obs[0]) + abs(obs[1]))
|
||||
|
||||
# print("YAW: {}".format(yaw))
|
||||
# print("RP RWD: {:.2f}".format(rp_reward))
|
||||
# print("ROLL: {} \t PITCH: {}".format(obs[0], obs[1]))
|
||||
|
||||
# penalty for nonzero acc(z) - UNRELIABLE ON IMU
|
||||
shake_reward = 0
|
||||
|
||||
# penalty for nonzero rate (x,y,z)
|
||||
rate_reward = -(abs(obs[2]) + abs(obs[3]))
|
||||
|
||||
# print("RATES: {}".format(obs[2:5]))
|
||||
|
||||
drift_reward = -abs(pos[1])
|
||||
energy_reward = (
|
||||
-np.abs(np.dot(self.spot.GetMotorTorques(), self.spot.GetMotorVelocities()))
|
||||
* self._time_step
|
||||
)
|
||||
reward = (
|
||||
self._distance_weight * forward_reward
|
||||
+ self._rotation_weight * rot_reward
|
||||
+ self._energy_weight * energy_reward
|
||||
+ self._drift_weight * drift_reward
|
||||
+ self._shake_weight * shake_reward
|
||||
+ self._rp_weight * rp_reward
|
||||
+ self._rate_weight * rate_reward
|
||||
)
|
||||
self._objectives.append(
|
||||
[forward_reward, energy_reward, drift_reward, shake_reward]
|
||||
)
|
||||
# print("REWARD: ", reward)
|
||||
# NOTE: return yaw for automatic correction (not part of RL)
|
||||
return reward
|
||||
Reference in New Issue
Block a user