Files
SpotMicroESP32-Leika/mock/simulator/GymEnvs/spot_bezier_env.py
T
2024-03-04 18:43:07 +01:00

274 lines
9.0 KiB
Python

""" This file implements the gym environment of SpotMicro with Bezier Curve.
"""
import math
import time
import gym
import numpy as np
import pybullet
import pybullet_data
from gym import spaces
from gym.utils import seeding
from pkg_resources import parse_version
from .. import spot
import pybullet_utils.bullet_client as bullet_client
from gym.envs.registration import register
from ..spot_gym_env import spotGymEnv
from ..spot_env_randomizer import SpotEnvRandomizer
SENSOR_NOISE_STDDEV = spot.SENSOR_NOISE_STDDEV
# Register as OpenAI Gym Environment
register(
id="SpotMicroEnv-v1",
entry_point="spotmicro.GymEnvs.spot_bezier_env:spotBezierEnv",
max_episode_steps=1000,
)
class spotBezierEnv(spotGymEnv):
"""The gym environment for spot.
It simulates the locomotion of spot, a quadruped robot. The state space
include the angles, velocities and torques for all the motors and the action
space is the desired motor angle for each motor. The reward function is based
on how far spot walks in 1000 steps and penalizes the energy
expenditure.
"""
metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50}
def __init__(
self,
distance_weight=1.0,
rotation_weight=0.0,
energy_weight=0.000,
shake_weight=0.00,
drift_weight=0.0,
rp_weight=10.0,
rate_weight=0.03,
urdf_root=pybullet_data.getDataPath(),
urdf_version=None,
distance_limit=float("inf"),
observation_noise_stdev=SENSOR_NOISE_STDDEV,
self_collision_enabled=True,
motor_velocity_limit=np.inf,
pd_control_enabled=False,
leg_model_enabled=False,
accurate_motor_model_enabled=False,
remove_default_joint_damping=False,
motor_kp=2.0,
motor_kd=0.03,
control_latency=0.0,
pd_latency=0.0,
torque_control_enabled=False,
motor_overheat_protection=False,
hard_reset=False,
on_rack=False,
render=True,
num_steps_to_log=1000,
action_repeat=1,
control_time_step=None,
env_randomizer=SpotEnvRandomizer(),
forward_reward_cap=float("inf"),
reflection=True,
log_path=None,
desired_velocity=0.5,
desired_rate=0.0,
lateral=False,
draw_foot_path=False,
height_field=False,
AutoStepper=True,
action_dim=14,
contacts=True,
):
super(spotBezierEnv, self).__init__(
distance_weight=distance_weight,
rotation_weight=rotation_weight,
energy_weight=energy_weight,
shake_weight=shake_weight,
drift_weight=drift_weight,
rp_weight=rp_weight,
rate_weight=rate_weight,
urdf_root=urdf_root,
urdf_version=urdf_version,
distance_limit=distance_limit,
observation_noise_stdev=observation_noise_stdev,
self_collision_enabled=self_collision_enabled,
motor_velocity_limit=motor_velocity_limit,
pd_control_enabled=pd_control_enabled,
leg_model_enabled=leg_model_enabled,
accurate_motor_model_enabled=accurate_motor_model_enabled,
remove_default_joint_damping=remove_default_joint_damping,
motor_kp=motor_kp,
motor_kd=motor_kd,
control_latency=control_latency,
pd_latency=pd_latency,
torque_control_enabled=torque_control_enabled,
motor_overheat_protection=motor_overheat_protection,
hard_reset=hard_reset,
on_rack=on_rack,
render=render,
num_steps_to_log=num_steps_to_log,
action_repeat=action_repeat,
control_time_step=control_time_step,
env_randomizer=env_randomizer,
forward_reward_cap=forward_reward_cap,
reflection=reflection,
log_path=log_path,
desired_velocity=desired_velocity,
desired_rate=desired_rate,
lateral=lateral,
draw_foot_path=draw_foot_path,
height_field=height_field,
AutoStepper=AutoStepper,
contacts=contacts,
)
# Residuals + Clearance Height + Penetration Depth
action_high = np.array([self._action_bound] * action_dim)
self.action_space = spaces.Box(-action_high, action_high)
print("Action SPACE: {}".format(self.action_space))
self.prev_pos = np.array([0.0, 0.0, 0.0])
self.yaw = 0.0
def pass_joint_angles(self, ja):
"""For executing joint angles"""
self.ja = ja
def step(self, action):
"""Step forward the simulation, given the action.
Args:
action: A list of desired motor angles for eight motors.
smach: the bezier state machine containing simulated
random controll inputs
Returns:
observations: The angles, velocities and torques of all motors.
reward: The reward for the current state-action pair.
done: Whether the episode has ended.
info: A dictionary that stores diagnostic information.
Raises:
ValueError: The action dimension is not the same as the number of motors.
ValueError: The magnitude of actions is out of bounds.
"""
# Discard all but joint angles
action = self.ja
self._last_base_position = self.spot.GetBasePosition()
self._last_base_orientation = self.spot.GetBaseOrientation()
# print("ACTION:")
# print(action)
if self._is_render:
# Sleep, otherwise the computation takes less time than real time,
# which will make the visualization like a fast-forward video.
time_spent = time.time() - self._last_frame_time
self._last_frame_time = time.time()
time_to_sleep = self.control_time_step - time_spent
if time_to_sleep > 0:
time.sleep(time_to_sleep)
base_pos = self.spot.GetBasePosition()
# Keep the previous orientation of the camera set by the user.
[yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11]
self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos)
action = self._transform_action_to_motor_command(action)
self.spot.Step(action)
# NOTE: SMACH is passed to the reward method
reward = self._reward()
done = self._termination()
self._env_step_counter += 1
# DRAW FOOT PATH
if self.draw_foot_path:
self.DrawFootPath()
return np.array(self._get_observation()), reward, done, {}
def return_state(self):
return np.array(self._get_observation())
def return_yaw(self):
return self.yaw
def _reward(self):
# get observation
obs = self._get_observation()
orn = self.spot.GetBaseOrientation()
# Return StepVelocity with the sign of StepLength
DesiredVelicty = math.copysign(
self.spot.StepVelocity / 4.0, self.spot.StepLength
)
fwd_speed = self.spot.prev_lin_twist[0] # vx
lat_speed = self.spot.prev_lin_twist[1] # vy
# DEBUG
lt, at = self.spot.GetBaseTwist()
# ONLY WORKS FOR MOVING PURELY FORWARD
pos = self.spot.GetBasePosition()
forward_reward = pos[0] - self.prev_pos[0]
# yaw_rate = obs[4]
rot_reward = 0.0
roll, pitch, yaw = self._pybullet_client.getEulerFromQuaternion(
[orn[0], orn[1], orn[2], orn[3]]
)
# if yaw < 0.0:
# yaw += np.pi
# else:
# yaw -= np.pi
# For auto correct
self.yaw = yaw
# penalty for nonzero PITCH and YAW(hidden) ONLY
# NOTE: Added Yaw mult
rp_reward = -(abs(obs[0]) + abs(obs[1]))
# print("YAW: {}".format(yaw))
# print("RP RWD: {:.2f}".format(rp_reward))
# print("ROLL: {} \t PITCH: {}".format(obs[0], obs[1]))
# penalty for nonzero acc(z) - UNRELIABLE ON IMU
shake_reward = 0
# penalty for nonzero rate (x,y,z)
rate_reward = -(abs(obs[2]) + abs(obs[3]))
# print("RATES: {}".format(obs[2:5]))
drift_reward = -abs(pos[1])
energy_reward = (
-np.abs(np.dot(self.spot.GetMotorTorques(), self.spot.GetMotorVelocities()))
* self._time_step
)
reward = (
self._distance_weight * forward_reward
+ self._rotation_weight * rot_reward
+ self._energy_weight * energy_reward
+ self._drift_weight * drift_reward
+ self._shake_weight * shake_reward
+ self._rp_weight * rp_reward
+ self._rate_weight * rate_reward
)
self._objectives.append(
[forward_reward, energy_reward, drift_reward, shake_reward]
)
# print("REWARD: ", reward)
# NOTE: return yaw for automatic correction (not part of RL)
return reward