🤖 Adds initial sim structure
This commit is contained in:
@@ -0,0 +1,51 @@
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
import pybullet as p
|
||||
import numpy as np
|
||||
from tqdm import tqdm, trange
|
||||
from collections import namedtuple
|
||||
|
||||
from training.model import SimpleNN
|
||||
|
||||
Experience = namedtuple("Experience", ["observation", "action", "reward", "log_prob"])
|
||||
|
||||
|
||||
class Trainer:
|
||||
def __init__(self, env):
|
||||
self.env = env
|
||||
self.model = SimpleNN(
|
||||
input_size=env.robot.get_observation().shape[0],
|
||||
output_size=p.getNumJoints(env.robot.robot_id),
|
||||
)
|
||||
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
|
||||
|
||||
def train(self, episodes=1000):
|
||||
for episode in trange(episodes):
|
||||
observation = self.env.reset()
|
||||
done = False
|
||||
total_reward = 0
|
||||
|
||||
while not done:
|
||||
action = self.select_action(observation)
|
||||
observation, reward, done = self.env.step(action)
|
||||
total_reward += reward
|
||||
|
||||
# Train the neural network
|
||||
# loss = self.compute_loss(observation, action, reward)
|
||||
# self.optimizer.zero_grad()
|
||||
# loss.backward()
|
||||
# self.optimizer.step()
|
||||
|
||||
print(f"Episode {episode}: Total Reward: {total_reward}")
|
||||
|
||||
def select_action(self, observation):
|
||||
with torch.no_grad():
|
||||
observation_tensor = torch.tensor(observation, dtype=torch.float32)
|
||||
action = self.model(observation_tensor)
|
||||
return np.array(
|
||||
[-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6]
|
||||
) # action.numpy()
|
||||
|
||||
def compute_loss(self, observation, action, reward):
|
||||
# Define your loss function here
|
||||
return torch.tensor(0.0)
|
||||
Reference in New Issue
Block a user