274 lines
9.0 KiB
Python
274 lines
9.0 KiB
Python
""" This file implements the gym environment of SpotMicro with Bezier Curve.
|
|
"""
|
|
|
|
import math
|
|
import time
|
|
import gym
|
|
import numpy as np
|
|
import pybullet
|
|
import pybullet_data
|
|
from gym import spaces
|
|
from gym.utils import seeding
|
|
from pkg_resources import parse_version
|
|
from .. import spot
|
|
import pybullet_utils.bullet_client as bullet_client
|
|
from gym.envs.registration import register
|
|
from ..spot_gym_env import spotGymEnv
|
|
from ..spot_env_randomizer import SpotEnvRandomizer
|
|
|
|
SENSOR_NOISE_STDDEV = spot.SENSOR_NOISE_STDDEV
|
|
|
|
# Register as OpenAI Gym Environment
|
|
register(
|
|
id="SpotMicroEnv-v1",
|
|
entry_point="spotmicro.GymEnvs.spot_bezier_env:spotBezierEnv",
|
|
max_episode_steps=1000,
|
|
)
|
|
|
|
|
|
class spotBezierEnv(spotGymEnv):
|
|
"""The gym environment for spot.
|
|
|
|
It simulates the locomotion of spot, a quadruped robot. The state space
|
|
include the angles, velocities and torques for all the motors and the action
|
|
space is the desired motor angle for each motor. The reward function is based
|
|
on how far spot walks in 1000 steps and penalizes the energy
|
|
expenditure.
|
|
|
|
"""
|
|
|
|
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
|
|
|
|
def __init__(
|
|
self,
|
|
distance_weight=1.0,
|
|
rotation_weight=0.0,
|
|
energy_weight=0.000,
|
|
shake_weight=0.00,
|
|
drift_weight=0.0,
|
|
rp_weight=10.0,
|
|
rate_weight=0.03,
|
|
urdf_root=pybullet_data.getDataPath(),
|
|
urdf_version=None,
|
|
distance_limit=float("inf"),
|
|
observation_noise_stdev=SENSOR_NOISE_STDDEV,
|
|
self_collision_enabled=True,
|
|
motor_velocity_limit=np.inf,
|
|
pd_control_enabled=False,
|
|
leg_model_enabled=False,
|
|
accurate_motor_model_enabled=False,
|
|
remove_default_joint_damping=False,
|
|
motor_kp=2.0,
|
|
motor_kd=0.03,
|
|
control_latency=0.0,
|
|
pd_latency=0.0,
|
|
torque_control_enabled=False,
|
|
motor_overheat_protection=False,
|
|
hard_reset=False,
|
|
on_rack=False,
|
|
render=True,
|
|
num_steps_to_log=1000,
|
|
action_repeat=1,
|
|
control_time_step=None,
|
|
env_randomizer=SpotEnvRandomizer(),
|
|
forward_reward_cap=float("inf"),
|
|
reflection=True,
|
|
log_path=None,
|
|
desired_velocity=0.5,
|
|
desired_rate=0.0,
|
|
lateral=False,
|
|
draw_foot_path=False,
|
|
height_field=False,
|
|
AutoStepper=True,
|
|
action_dim=14,
|
|
contacts=True,
|
|
):
|
|
|
|
super(spotBezierEnv, self).__init__(
|
|
distance_weight=distance_weight,
|
|
rotation_weight=rotation_weight,
|
|
energy_weight=energy_weight,
|
|
shake_weight=shake_weight,
|
|
drift_weight=drift_weight,
|
|
rp_weight=rp_weight,
|
|
rate_weight=rate_weight,
|
|
urdf_root=urdf_root,
|
|
urdf_version=urdf_version,
|
|
distance_limit=distance_limit,
|
|
observation_noise_stdev=observation_noise_stdev,
|
|
self_collision_enabled=self_collision_enabled,
|
|
motor_velocity_limit=motor_velocity_limit,
|
|
pd_control_enabled=pd_control_enabled,
|
|
leg_model_enabled=leg_model_enabled,
|
|
accurate_motor_model_enabled=accurate_motor_model_enabled,
|
|
remove_default_joint_damping=remove_default_joint_damping,
|
|
motor_kp=motor_kp,
|
|
motor_kd=motor_kd,
|
|
control_latency=control_latency,
|
|
pd_latency=pd_latency,
|
|
torque_control_enabled=torque_control_enabled,
|
|
motor_overheat_protection=motor_overheat_protection,
|
|
hard_reset=hard_reset,
|
|
on_rack=on_rack,
|
|
render=render,
|
|
num_steps_to_log=num_steps_to_log,
|
|
action_repeat=action_repeat,
|
|
control_time_step=control_time_step,
|
|
env_randomizer=env_randomizer,
|
|
forward_reward_cap=forward_reward_cap,
|
|
reflection=reflection,
|
|
log_path=log_path,
|
|
desired_velocity=desired_velocity,
|
|
desired_rate=desired_rate,
|
|
lateral=lateral,
|
|
draw_foot_path=draw_foot_path,
|
|
height_field=height_field,
|
|
AutoStepper=AutoStepper,
|
|
contacts=contacts,
|
|
)
|
|
|
|
# Residuals + Clearance Height + Penetration Depth
|
|
action_high = np.array([self._action_bound] * action_dim)
|
|
self.action_space = spaces.Box(-action_high, action_high)
|
|
print("Action SPACE: {}".format(self.action_space))
|
|
|
|
self.prev_pos = np.array([0.0, 0.0, 0.0])
|
|
|
|
self.yaw = 0.0
|
|
|
|
def pass_joint_angles(self, ja):
|
|
"""For executing joint angles"""
|
|
self.ja = ja
|
|
|
|
def step(self, action):
|
|
"""Step forward the simulation, given the action.
|
|
|
|
Args:
|
|
action: A list of desired motor angles for eight motors.
|
|
smach: the bezier state machine containing simulated
|
|
random controll inputs
|
|
|
|
Returns:
|
|
observations: The angles, velocities and torques of all motors.
|
|
reward: The reward for the current state-action pair.
|
|
done: Whether the episode has ended.
|
|
info: A dictionary that stores diagnostic information.
|
|
|
|
Raises:
|
|
ValueError: The action dimension is not the same as the number of motors.
|
|
ValueError: The magnitude of actions is out of bounds.
|
|
"""
|
|
# Discard all but joint angles
|
|
action = self.ja
|
|
|
|
self._last_base_position = self.spot.GetBasePosition()
|
|
self._last_base_orientation = self.spot.GetBaseOrientation()
|
|
# print("ACTION:")
|
|
# print(action)
|
|
if self._is_render:
|
|
# Sleep, otherwise the computation takes less time than real time,
|
|
# which will make the visualization like a fast-forward video.
|
|
time_spent = time.time() - self._last_frame_time
|
|
self._last_frame_time = time.time()
|
|
time_to_sleep = self.control_time_step - time_spent
|
|
if time_to_sleep > 0:
|
|
time.sleep(time_to_sleep)
|
|
base_pos = self.spot.GetBasePosition()
|
|
# Keep the previous orientation of the camera set by the user.
|
|
[yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
|
|
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
|
|
|
|
action = self._transform_action_to_motor_command(action)
|
|
self.spot.Step(action)
|
|
# NOTE: SMACH is passed to the reward method
|
|
reward = self._reward()
|
|
done = self._termination()
|
|
self._env_step_counter += 1
|
|
|
|
# DRAW FOOT PATH
|
|
if self.draw_foot_path:
|
|
self.DrawFootPath()
|
|
|
|
return np.array(self._get_observation()), reward, done, {}
|
|
|
|
def return_state(self):
|
|
return np.array(self._get_observation())
|
|
|
|
def return_yaw(self):
|
|
return self.yaw
|
|
|
|
def _reward(self):
|
|
# get observation
|
|
obs = self._get_observation()
|
|
|
|
orn = self.spot.GetBaseOrientation()
|
|
|
|
# Return StepVelocity with the sign of StepLength
|
|
DesiredVelicty = math.copysign(
|
|
self.spot.StepVelocity / 4.0, self.spot.StepLength
|
|
)
|
|
|
|
fwd_speed = self.spot.prev_lin_twist[0] # vx
|
|
lat_speed = self.spot.prev_lin_twist[1] # vy
|
|
|
|
# DEBUG
|
|
lt, at = self.spot.GetBaseTwist()
|
|
|
|
# ONLY WORKS FOR MOVING PURELY FORWARD
|
|
pos = self.spot.GetBasePosition()
|
|
|
|
forward_reward = pos[0] - self.prev_pos[0]
|
|
|
|
# yaw_rate = obs[4]
|
|
|
|
rot_reward = 0.0
|
|
|
|
roll, pitch, yaw = self._pybullet_client.getEulerFromQuaternion(
|
|
[orn[0], orn[1], orn[2], orn[3]]
|
|
)
|
|
|
|
# if yaw < 0.0:
|
|
# yaw += np.pi
|
|
# else:
|
|
# yaw -= np.pi
|
|
|
|
# For auto correct
|
|
self.yaw = yaw
|
|
|
|
# penalty for nonzero PITCH and YAW(hidden) ONLY
|
|
# NOTE: Added Yaw mult
|
|
rp_reward = -(abs(obs[0]) + abs(obs[1]))
|
|
|
|
# print("YAW: {}".format(yaw))
|
|
# print("RP RWD: {:.2f}".format(rp_reward))
|
|
# print("ROLL: {} \t PITCH: {}".format(obs[0], obs[1]))
|
|
|
|
# penalty for nonzero acc(z) - UNRELIABLE ON IMU
|
|
shake_reward = 0
|
|
|
|
# penalty for nonzero rate (x,y,z)
|
|
rate_reward = -(abs(obs[2]) + abs(obs[3]))
|
|
|
|
# print("RATES: {}".format(obs[2:5]))
|
|
|
|
drift_reward = -abs(pos[1])
|
|
energy_reward = (
|
|
-np.abs(np.dot(self.spot.GetMotorTorques(), self.spot.GetMotorVelocities()))
|
|
* self._time_step
|
|
)
|
|
reward = (
|
|
self._distance_weight * forward_reward
|
|
+ self._rotation_weight * rot_reward
|
|
+ self._energy_weight * energy_reward
|
|
+ self._drift_weight * drift_reward
|
|
+ self._shake_weight * shake_reward
|
|
+ self._rp_weight * rp_reward
|
|
+ self._rate_weight * rate_reward
|
|
)
|
|
self._objectives.append(
|
|
[forward_reward, energy_reward, drift_reward, shake_reward]
|
|
)
|
|
# print("REWARD: ", reward)
|
|
# NOTE: return yaw for automatic correction (not part of RL)
|
|
return reward
|