diff --git a/simulation/src/envs/quadruped_env.py b/simulation/src/envs/quadruped_env.py
index e6f7657..580fa92 100644
--- a/simulation/src/envs/quadruped_env.py
+++ b/simulation/src/envs/quadruped_env.py
@@ -95,6 +95,8 @@ class QuadrupedEnv(gym.Env):
         self.max_steps = max_steps
         self.current_step = 0
 
+        self.prev_velocity = None
+
         self._setup_world()
         if render_mode == "human":
             self.env_start_state = p.saveState()
@@ -143,6 +145,7 @@ class QuadrupedEnv(gym.Env):
             p.resetSimulation()
             self._setup_world()
         self.current_step = 0
+        self.prev_velocity = None
         return self.robot.get_observation(), {}
 
     def step(self, action):
@@ -165,17 +168,42 @@ class QuadrupedEnv(gym.Env):
 
     def calculate_reward(self, obs):
         position = obs[:3]
+        orientation = obs[3:6]
         velocity = obs[6:9]
         angular_velocity = obs[9:12]
 
         forward_velocity = velocity[0]
         velocity_reward = -abs(forward_velocity - self.target_velocity)
 
-        height_penalty = -abs(position[2] - 0.3)
+        height_penalty = -abs(position[2] - 0.3) * 0.5
 
-        angular_penalty = -np.sum(np.square(angular_velocity))
+        roll, pitch, yaw = orientation
+        orientation_penalty = -(abs(roll) + abs(pitch)) * 1.0
 
-        total_reward = velocity_reward + 0.1 * height_penalty + 0.01 * angular_penalty
+        angular_penalty = -np.sum(np.square(angular_velocity)) * 0.05
+
+        sideways_velocity_penalty = -abs(velocity[1]) * 0.3
+
+        if self.prev_velocity is not None:
+            dt = 1.0 / 240.0
+            acceleration = (velocity - self.prev_velocity) / dt
+            lateral_acc_penalty = -abs(acceleration[1]) * 0.01
+            vertical_acc_penalty = -abs(acceleration[2]) * 0.01
+        else:
+            lateral_acc_penalty = 0
+            vertical_acc_penalty = 0
+
+        self.prev_velocity = velocity.copy()
+
+        total_reward = (
+            velocity_reward
+            + height_penalty
+            + orientation_penalty
+            + angular_penalty
+            + sideways_velocity_penalty
+            + lateral_acc_penalty
+            + vertical_acc_penalty
+        )
         return total_reward
 
     def is_done(self, obs):
diff --git a/simulation/train.py b/simulation/train.py
index 88594aa..a10a203 100644
--- a/simulation/train.py
+++ b/simulation/train.py
@@ -3,7 +3,7 @@ import os
 import gymnasium as gym
 from stable_baselines3 import PPO, SAC
 from stable_baselines3.common.callbacks import CheckpointCallback, EvalCallback
-from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
+from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecNormalize
 from stable_baselines3.common.monitor import Monitor
 import numpy as np
 import torch
@@ -37,13 +37,15 @@ def train_ppo(
     eval_freq=10000,
     save_freq=50000,
     terrain_type=TerrainType.FLAT,
+    n_envs=8,
+    use_gpu=True,
 ):
     os.makedirs(save_dir, exist_ok=True)
     os.makedirs(log_dir, exist_ok=True)
     os.makedirs(f"{log_dir}/eval", exist_ok=True)
 
-    print("Creating training environment...")
-    env = DummyVecEnv([make_env(terrain_type=terrain_type)])
+    print(f"Creating {n_envs} parallel training environments...")
+    env = SubprocVecEnv([make_env(terrain_type=terrain_type) for _ in range(n_envs)])
     env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)
 
     print("Creating evaluation environment...")
@@ -66,7 +68,8 @@ def train_ppo(
         render=False,
     )
 
-    print("Creating PPO model...")
+    device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
+    print(f"Creating PPO model on device: {device}")
     model = PPO(
         "MlpPolicy",
         env,
@@ -82,6 +85,7 @@ def train_ppo(
         max_grad_norm=max_grad_norm,
         verbose=1,
         tensorboard_log=log_dir,
+        device=device,
         policy_kwargs=dict(
             net_arch=[dict(pi=[256, 256], vf=[256, 256])],
             activation_fn=torch.nn.ReLU,
@@ -119,13 +123,15 @@ def train_sac(
     eval_freq=10000,
     save_freq=50000,
     terrain_type=TerrainType.FLAT,
+    n_envs=8,
+    use_gpu=True,
 ):
     os.makedirs(save_dir, exist_ok=True)
     os.makedirs(log_dir, exist_ok=True)
     os.makedirs(f"{log_dir}/eval", exist_ok=True)
 
-    print("Creating training environment...")
-    env = DummyVecEnv([make_env(terrain_type=terrain_type)])
+    print(f"Creating {n_envs} parallel training environments...")
+    env = SubprocVecEnv([make_env(terrain_type=terrain_type) for _ in range(n_envs)])
     env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)
 
     print("Creating evaluation environment...")
@@ -148,7 +154,8 @@ def train_sac(
         render=False,
     )
 
-    print("Creating SAC model...")
+    device = "cuda" if use_gpu and torch.cuda.is_available() else "cpu"
+    print(f"Creating SAC model on device: {device}")
     model = SAC(
         "MlpPolicy",
         env,
@@ -163,6 +170,7 @@ def train_sac(
         ent_coef=ent_coef,
         verbose=1,
         tensorboard_log=log_dir,
+        device=device,
         policy_kwargs=dict(
             net_arch=dict(pi=[256, 256], qf=[256, 256]),
             activation_fn=torch.nn.ReLU,
@@ -224,6 +232,17 @@ def main():
         default="logs",
         help="Directory to save logs",
     )
+    parser.add_argument(
+        "--n-envs",
+        type=int,
+        default=8,
+        help="Number of parallel environments (default: 8, max: 16)",
+    )
+    parser.add_argument(
+        "--cpu-only",
+        action="store_true",
+        help="Force CPU training even if GPU is available",
+    )
 
     args = parser.parse_args()
 
@@ -235,13 +254,17 @@ def main():
     }
     terrain_type = terrain_map[args.terrain]
 
+    use_gpu = not args.cpu_only and torch.cuda.is_available()
+
     print(f"\n{'='*50}")
     print(f"Training Configuration:")
     print(f"  Algorithm: {args.algo}")
     print(f"  Total timesteps: {args.timesteps:,}")
     print(f"  Learning rate: {args.learning_rate}")
     print(f"  Terrain: {args.terrain}")
-    print(f"  Device: {'cuda' if torch.cuda.is_available() else 'cpu'}")
+    print(f"  Parallel environments: {args.n_envs}")
+    print(f"  Device: {'CUDA (GPU)' if use_gpu else 'CPU'}")
+    print(f"  CPU cores available: {os.cpu_count()}")
     print(f"{'='*50}\n")
 
     if args.algo == "ppo" or args.algo == "both":
@@ -252,6 +275,8 @@ def main():
             save_dir=f"{args.save_dir}/ppo",
             log_dir=f"{args.log_dir}/ppo",
             terrain_type=terrain_type,
+            n_envs=args.n_envs,
+            use_gpu=use_gpu,
         )
 
     if args.algo == "sac" or args.algo == "both":
@@ -262,6 +287,8 @@ def main():
             save_dir=f"{args.save_dir}/sac",
             log_dir=f"{args.log_dir}/sac",
             terrain_type=terrain_type,
+            n_envs=args.n_envs,
+            use_gpu=use_gpu,
         )