🤖 Adds initial sim structure

This commit is contained in:
Rune Harlyk
2024-08-09 22:27:48 +02:00
committed by Rune Harlyk
parent 2face72aee
commit 33e7fac74c
23 changed files with 798 additions and 0 deletions
View File
+16
View File
@@ -0,0 +1,16 @@
import torch
import torch.nn as nn
class SimpleNN(nn.Module):
def __init__(self, input_size, output_size):
super(SimpleNN, self).__init__()
self.fc1 = nn.Linear(input_size, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, output_size)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
x = self.fc3(x)
return x
+51
View File
@@ -0,0 +1,51 @@
import torch
import torch.optim as optim
import pybullet as p
import numpy as np
from tqdm import tqdm, trange
from collections import namedtuple
from training.model import SimpleNN
Experience = namedtuple("Experience", ["observation", "action", "reward", "log_prob"])
class Trainer:
def __init__(self, env):
self.env = env
self.model = SimpleNN(
input_size=env.robot.get_observation().shape[0],
output_size=p.getNumJoints(env.robot.robot_id),
)
self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
def train(self, episodes=1000):
for episode in trange(episodes):
observation = self.env.reset()
done = False
total_reward = 0
while not done:
action = self.select_action(observation)
observation, reward, done = self.env.step(action)
total_reward += reward
# Train the neural network
# loss = self.compute_loss(observation, action, reward)
# self.optimizer.zero_grad()
# loss.backward()
# self.optimizer.step()
print(f"Episode {episode}: Total Reward: {total_reward}")
def select_action(self, observation):
with torch.no_grad():
observation_tensor = torch.tensor(observation, dtype=torch.float32)
action = self.model(observation_tensor)
return np.array(
[-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6]
) # action.numpy()
def compute_loss(self, observation, action, reward):
# Define your loss function here
return torch.tensor(0.0)