🤖 Adds initial sim structure

2024-08-09 22:27:48 +02:00
parent 2face72aee
commit 33e7fac74c
23 changed files with 798 additions and 0 deletions
@@ -0,0 +1,16 @@
+import torch
+import torch.nn as nn
+
+
+class SimpleNN(nn.Module):
+    def __init__(self, input_size, output_size):
+        super(SimpleNN, self).__init__()
+        self.fc1 = nn.Linear(input_size, 128)
+        self.fc2 = nn.Linear(128, 128)
+        self.fc3 = nn.Linear(128, output_size)
+
+    def forward(self, x):
+        x = torch.relu(self.fc1(x))
+        x = torch.relu(self.fc2(x))
+        x = self.fc3(x)
+        return x
@@ -0,0 +1,51 @@
+import torch
+import torch.optim as optim
+import pybullet as p
+import numpy as np
+from tqdm import tqdm, trange
+from collections import namedtuple
+
+from training.model import SimpleNN
+
+Experience = namedtuple("Experience", ["observation", "action", "reward", "log_prob"])
+
+
+class Trainer:
+    def __init__(self, env):
+        self.env = env
+        self.model = SimpleNN(
+            input_size=env.robot.get_observation().shape[0],
+            output_size=p.getNumJoints(env.robot.robot_id),
+        )
+        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
+
+    def train(self, episodes=1000):
+        for episode in trange(episodes):
+            observation = self.env.reset()
+            done = False
+            total_reward = 0
+
+            while not done:
+                action = self.select_action(observation)
+                observation, reward, done = self.env.step(action)
+                total_reward += reward
+
+                # Train the neural network
+                # loss = self.compute_loss(observation, action, reward)
+                # self.optimizer.zero_grad()
+                # loss.backward()
+                # self.optimizer.step()
+
+            print(f"Episode {episode}: Total Reward: {total_reward}")
+
+    def select_action(self, observation):
+        with torch.no_grad():
+            observation_tensor = torch.tensor(observation, dtype=torch.float32)
+            action = self.model(observation_tensor)
+            return np.array(
+                [-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6]
+            )  # action.numpy()
+
+    def compute_loss(self, observation, action, reward):
+        # Define your loss function here
+        return torch.tensor(0.0)