""" This file implements the gym environment of SpotMicro with Bezier Curve. """ import math import time import gym import numpy as np import pybullet import pybullet_data from gym import spaces from gym.utils import seeding from pkg_resources import parse_version import spot import pybullet_utils.bullet_client as bullet_client from gym.envs.registration import register from OpenLoopSM.SpotOL import BezierStepper from spot_gym_env import spotGymEnv import Kinematics.LieAlgebra as LA from spot_env_randomizer import SpotEnvRandomizer SENSOR_NOISE_STDDEV = spot.SENSOR_NOISE_STDDEV # Register as OpenAI Gym Environment register( id="SpotMicroEnv-v1", entry_point="spotmicro.GymEnvs.spot_bezier_env:spotBezierEnv", max_episode_steps=1000, ) class spotBezierEnv(spotGymEnv): """The gym environment for spot. It simulates the locomotion of spot, a quadruped robot. The state space include the angles, velocities and torques for all the motors and the action space is the desired motor angle for each motor. The reward function is based on how far spot walks in 1000 steps and penalizes the energy expenditure. """ metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": 50} def __init__( self, distance_weight=1.0, rotation_weight=0.0, energy_weight=0.000, shake_weight=0.00, drift_weight=0.0, rp_weight=10.0, rate_weight=0.03, urdf_root=pybullet_data.getDataPath(), urdf_version=None, distance_limit=float("inf"), observation_noise_stdev=SENSOR_NOISE_STDDEV, self_collision_enabled=True, motor_velocity_limit=np.inf, pd_control_enabled=False, leg_model_enabled=False, accurate_motor_model_enabled=False, remove_default_joint_damping=False, motor_kp=2.0, motor_kd=0.03, control_latency=0.0, pd_latency=0.0, torque_control_enabled=False, motor_overheat_protection=False, hard_reset=False, on_rack=False, render=True, num_steps_to_log=1000, action_repeat=1, control_time_step=None, env_randomizer=SpotEnvRandomizer(), forward_reward_cap=float("inf"), reflection=True, log_path=None, desired_velocity=0.5, desired_rate=0.0, lateral=False, draw_foot_path=False, height_field=False, AutoStepper=True, action_dim=14, contacts=True, ): super(spotBezierEnv, self).__init__( distance_weight=distance_weight, rotation_weight=rotation_weight, energy_weight=energy_weight, shake_weight=shake_weight, drift_weight=drift_weight, rp_weight=rp_weight, rate_weight=rate_weight, urdf_root=urdf_root, urdf_version=urdf_version, distance_limit=distance_limit, observation_noise_stdev=observation_noise_stdev, self_collision_enabled=self_collision_enabled, motor_velocity_limit=motor_velocity_limit, pd_control_enabled=pd_control_enabled, leg_model_enabled=leg_model_enabled, accurate_motor_model_enabled=accurate_motor_model_enabled, remove_default_joint_damping=remove_default_joint_damping, motor_kp=motor_kp, motor_kd=motor_kd, control_latency=control_latency, pd_latency=pd_latency, torque_control_enabled=torque_control_enabled, motor_overheat_protection=motor_overheat_protection, hard_reset=hard_reset, on_rack=on_rack, render=render, num_steps_to_log=num_steps_to_log, action_repeat=action_repeat, control_time_step=control_time_step, env_randomizer=env_randomizer, forward_reward_cap=forward_reward_cap, reflection=reflection, log_path=log_path, desired_velocity=desired_velocity, desired_rate=desired_rate, lateral=lateral, draw_foot_path=draw_foot_path, height_field=height_field, AutoStepper=AutoStepper, contacts=contacts, ) # Residuals + Clearance Height + Penetration Depth action_high = np.array([self._action_bound] * action_dim) self.action_space = spaces.Box(-action_high, action_high) print("Action SPACE: {}".format(self.action_space)) self.prev_pos = np.array([0.0, 0.0, 0.0]) self.yaw = 0.0 def pass_joint_angles(self, ja): """For executing joint angles""" self.ja = ja def step(self, action): """Step forward the simulation, given the action. Args: action: A list of desired motor angles for eight motors. smach: the bezier state machine containing simulated random controll inputs Returns: observations: The angles, velocities and torques of all motors. reward: The reward for the current state-action pair. done: Whether the episode has ended. info: A dictionary that stores diagnostic information. Raises: ValueError: The action dimension is not the same as the number of motors. ValueError: The magnitude of actions is out of bounds. """ # Discard all but joint angles action = self.ja self._last_base_position = self.spot.GetBasePosition() self._last_base_orientation = self.spot.GetBaseOrientation() # print("ACTION:") # print(action) if self._is_render: # Sleep, otherwise the computation takes less time than real time, # which will make the visualization like a fast-forward video. time_spent = time.time() - self._last_frame_time self._last_frame_time = time.time() time_to_sleep = self.control_time_step - time_spent if time_to_sleep > 0: time.sleep(time_to_sleep) base_pos = self.spot.GetBasePosition() # Keep the previous orientation of the camera set by the user. [yaw, pitch, dist] = self._pybullet_client.getDebugVisualizerCamera()[8:11] self._pybullet_client.resetDebugVisualizerCamera(dist, yaw, pitch, base_pos) action = self._transform_action_to_motor_command(action) self.spot.Step(action) # NOTE: SMACH is passed to the reward method reward = self._reward() done = self._termination() self._env_step_counter += 1 # DRAW FOOT PATH if self.draw_foot_path: self.DrawFootPath() return np.array(self._get_observation()), reward, done, {} def return_state(self): return np.array(self._get_observation()) def return_yaw(self): return self.yaw def _reward(self): # get observation obs = self._get_observation() orn = self.spot.GetBaseOrientation() # Return StepVelocity with the sign of StepLength DesiredVelicty = math.copysign( self.spot.StepVelocity / 4.0, self.spot.StepLength ) fwd_speed = self.spot.prev_lin_twist[0] # vx lat_speed = self.spot.prev_lin_twist[1] # vy # DEBUG lt, at = self.spot.GetBaseTwist() # ONLY WORKS FOR MOVING PURELY FORWARD pos = self.spot.GetBasePosition() forward_reward = pos[0] - self.prev_pos[0] # yaw_rate = obs[4] rot_reward = 0.0 roll, pitch, yaw = self._pybullet_client.getEulerFromQuaternion( [orn[0], orn[1], orn[2], orn[3]] ) # if yaw < 0.0: # yaw += np.pi # else: # yaw -= np.pi # For auto correct self.yaw = yaw # penalty for nonzero PITCH and YAW(hidden) ONLY # NOTE: Added Yaw mult rp_reward = -(abs(obs[0]) + abs(obs[1])) # print("YAW: {}".format(yaw)) # print("RP RWD: {:.2f}".format(rp_reward)) # print("ROLL: {} \t PITCH: {}".format(obs[0], obs[1])) # penalty for nonzero acc(z) - UNRELIABLE ON IMU shake_reward = 0 # penalty for nonzero rate (x,y,z) rate_reward = -(abs(obs[2]) + abs(obs[3])) # print("RATES: {}".format(obs[2:5])) drift_reward = -abs(pos[1]) energy_reward = ( -np.abs(np.dot(self.spot.GetMotorTorques(), self.spot.GetMotorVelocities())) * self._time_step ) reward = ( self._distance_weight * forward_reward + self._rotation_weight * rot_reward + self._energy_weight * energy_reward + self._drift_weight * drift_reward + self._shake_weight * shake_reward + self._rp_weight * rp_reward + self._rate_weight * rate_reward ) self._objectives.append( [forward_reward, energy_reward, drift_reward, shake_reward] ) # print("REWARD: ", reward) # NOTE: return yaw for automatic correction (not part of RL) return reward