diff --git a/simulation/simulation/environment.py b/simulation/simulation/environment.py
index 4901845..d5805f9 100644
--- a/simulation/simulation/environment.py
+++ b/simulation/simulation/environment.py
@@ -4,6 +4,8 @@ import numpy as np
 from simulation.robot import QuadrupedRobot
 
 
+roll_pitch_reward_weight = 0.1
+
 class QuadrupedEnv:
     def __init__(self, urdf_path):
         p.connect(p.GUI)
@@ -11,11 +13,15 @@ class QuadrupedEnv:
         p.setGravity(0, 0, -9.8)
         p.setTimeStep(1 / 240)
 
+        self.plane_id = p.loadURDF("plane.urdf")
+
         self.robot = QuadrupedRobot(urdf_path)
         self.reset()
 
     def reset(self):
         p.resetSimulation()
+        p.setGravity(0, 0, -9.8)
+        self.plane_id = p.loadURDF("plane.urdf")
         self.robot.load()
         return self.robot.get_observation()
 
@@ -28,8 +34,11 @@ class QuadrupedEnv:
         return obs, reward, done
 
     def calculate_reward(self, observation):
-        # Define your reward function here
-        return 0
+        reward = 0
+        reward += (
+            -(abs(observation[0]) + abs(observation[1])) * roll_pitch_reward_weight
+        )
+        return reward
 
     def is_done(self, observation):
         roll, pitch = observation[0:2]
diff --git a/simulation/train.py b/simulation/train.py
index 6136ccf..dbfc985 100644
--- a/simulation/train.py
+++ b/simulation/train.py
@@ -3,10 +3,11 @@ from training.trainer import Trainer
 
 import resources as resources
 
+render = True
 
 def main():
     env = QuadrupedEnv(resources.getDataPath() + "/spot.urdf")
-    trainer = Trainer(env)
+    trainer = Trainer(env, render)
     trainer.train()
 
 
diff --git a/simulation/training/trainer.py b/simulation/training/trainer.py
index 3a01398..29ffa53 100644
--- a/simulation/training/trainer.py
+++ b/simulation/training/trainer.py
@@ -1,3 +1,4 @@
+from time import sleep
 import torch
 import torch.optim as optim
 import pybullet as p
@@ -11,8 +12,10 @@ Experience = namedtuple("Experience", ["observation", "action", "reward", "log_p
 
 
 class Trainer:
-    def __init__(self, env):
+
+    def __init__(self, env, render):
         self.env = env
+        self.should_render = render
         self.model = SimpleNN(
             input_size=env.robot.get_observation().shape[0],
             output_size=p.getNumJoints(env.robot.robot_id),
@@ -30,6 +33,9 @@ class Trainer:
                 observation, reward, done = self.env.step(action)
                 total_reward += reward
 
+                if self.should_render:
+                    sleep(0.005)
+
                 # Train the neural network
                 # loss = self.compute_loss(observation, action, reward)
                 # self.optimizer.zero_grad()
@@ -42,9 +48,7 @@ class Trainer:
         with torch.no_grad():
             observation_tensor = torch.tensor(observation, dtype=torch.float32)
             action = self.model(observation_tensor)
-            return np.array(
-                [-0.4, -1.5, 6, 0.4, -1.5, 6, -0.4, -1.5, 6, 0.4, -1.5, 6]
-            )  # action.numpy()
+            return action.numpy()
 
     def compute_loss(self, observation, action, reward):
         # Define your loss function here