diff --git a/simulation/simulation/environment.py b/simulation/simulation/environment.py index 4901845..d5805f9 100644 --- a/simulation/simulation/environment.py +++ b/simulation/simulation/environment.py @@ -4,6 +4,8 @@ import numpy as np from simulation.robot import QuadrupedRobot +roll_pitch_reward_weight = 0.1 + class QuadrupedEnv: def __init__(self, urdf_path): p.connect(p.GUI) @@ -11,11 +13,15 @@ class QuadrupedEnv: p.setGravity(0, 0, -9.8) p.setTimeStep(1 / 240) + self.plane_id = p.loadURDF("plane.urdf") + self.robot = QuadrupedRobot(urdf_path) self.reset() def reset(self): p.resetSimulation() + p.setGravity(0, 0, -9.8) + self.plane_id = p.loadURDF("plane.urdf") self.robot.load() return self.robot.get_observation() @@ -28,8 +34,11 @@ class QuadrupedEnv: return obs, reward, done def calculate_reward(self, observation): - # Define your reward function here - return 0 + reward = 0 + reward += ( + -(abs(observation[0]) + abs(observation[1])) * roll_pitch_reward_weight + ) + return reward def is_done(self, observation): roll, pitch = observation[0:2] diff --git a/simulation/train.py b/simulation/train.py index 6136ccf..dbfc985 100644 --- a/simulation/train.py +++ b/simulation/train.py @@ -3,10 +3,11 @@ from training.trainer import Trainer import resources as resources +render = True def main(): env = QuadrupedEnv(resources.getDataPath() + "/spot.urdf") - trainer = Trainer(env) + trainer = Trainer(env, render) trainer.train() diff --git a/simulation/training/trainer.py b/simulation/training/trainer.py index 3a01398..29ffa53 100644 --- a/simulation/training/trainer.py +++ b/simulation/training/trainer.py @@ -1,3 +1,4 @@ +from time import sleep import torch import torch.optim as optim import pybullet as p @@ -11,8 +12,10 @@ Experience = namedtuple("Experience", ["observation", "action", "reward", "log_p class Trainer: - def __init__(self, env): + + def __init__(self, env, render): self.env = env + self.should_render = render self.model = SimpleNN( input_size=env.robot.get_observation().shape[0], output_size=p.getNumJoints(env.robot.robot_id), @@ -30,6 +33,9 @@ class Trainer: observation, reward, done = self.env.step(action) total_reward += reward + if self.should_render: + sleep(0.005) + # Train the neural network # loss = self.compute_loss(observation, action, reward) # self.optimizer.zero_grad() @@ -42,9 +48,7 @@ class Trainer: with torch.no_grad(): observation_tensor = torch.tensor(observation, dtype=torch.float32) action = self.model(observation_tensor) - return np.array( - [-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6] - ) # action.numpy() + return action.numpy() def compute_loss(self, observation, action, reward): # Define your loss function here