52 lines
1.7 KiB
Python
52 lines
1.7 KiB
Python
import torch
|
|
import torch.optim as optim
|
|
import pybullet as p
|
|
import numpy as np
|
|
from tqdm import tqdm, trange
|
|
from collections import namedtuple
|
|
|
|
from training.model import SimpleNN
|
|
|
|
Experience = namedtuple("Experience", ["observation", "action", "reward", "log_prob"])
|
|
|
|
|
|
class Trainer:
|
|
def __init__(self, env):
|
|
self.env = env
|
|
self.model = SimpleNN(
|
|
input_size=env.robot.get_observation().shape[0],
|
|
output_size=p.getNumJoints(env.robot.robot_id),
|
|
)
|
|
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
|
|
|
def train(self, episodes=1000):
|
|
for episode in trange(episodes):
|
|
observation = self.env.reset()
|
|
done = False
|
|
total_reward = 0
|
|
|
|
while not done:
|
|
action = self.select_action(observation)
|
|
observation, reward, done = self.env.step(action)
|
|
total_reward += reward
|
|
|
|
# Train the neural network
|
|
# loss = self.compute_loss(observation, action, reward)
|
|
# self.optimizer.zero_grad()
|
|
# loss.backward()
|
|
# self.optimizer.step()
|
|
|
|
print(f"Episode {episode}: Total Reward: {total_reward}")
|
|
|
|
def select_action(self, observation):
|
|
with torch.no_grad():
|
|
observation_tensor = torch.tensor(observation, dtype=torch.float32)
|
|
action = self.model(observation_tensor)
|
|
return np.array(
|
|
[-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6]
|
|
) # action.numpy()
|
|
|
|
def compute_loss(self, observation, action, reward):
|
|
# Define your loss function here
|
|
return torch.tensor(0.0)
|