#!/usr/bin/env python3 import pygame, random, math, numpy as np, json, sys, tkinter as tk from tkinter import filedialog import tensorflow as tf import tensorflow_probability as tfp from tensorflow import keras from tensorflow.keras import layers, Model # --- Constants --- WIDTH, HEIGHT = 800, 600 TANK_RADIUS = 15 TANK_SPEED = 0.07 # pixels per ms HELI_SPEED = 0.12 # pixels per ms ROTATION_SPEED = 0.002 # radians per ms RELOAD_TIME = 1000 # ms PROJECTILE_SPEED = 0.5 # pixels per ms PROJECTILE_RANGE = 200 SPAWN_INTERVAL = 3000 # ms MOVE_SOUND_DELAY = 150 # ms NUM_OBSTACLES = 3 GREEN = "\033[92m" RED = "\033[91m" RESET = "\033[0m" # --- Helper: Generate a tone as a pygame Sound --- def generate_tone(frequency, duration_ms, volume=0.4, sample_rate=44100): n_samples = int(sample_rate * duration_ms / 1000) t = np.linspace(0, duration_ms / 1000, n_samples, False) waveform = np.sin(2 * math.pi * frequency * t) * 32767 * volume waveform = waveform.astype(np.int16) stereo_waveform = np.column_stack((waveform, waveform)) # Duplicate for stereo sound = pygame.sndarray.make_sound(stereo_waveform) return sound # ------------------------------- # Define Policy Network # ------------------------------- class PolicyNetwork(Model): def __init__(self, input_dim, num_hidden_layers, first_hidden_neurons, pyramidal=False): super(PolicyNetwork, self).__init__() self.hidden_layers = [] neurons = first_hidden_neurons for i in range(num_hidden_layers): # Create hidden layers with ReLU and pyramidal reduction dense = layers.Dense(neurons, activation='relu') setattr(self, f"hidden_layer_{i}", dense) # ensure layer is tracked self.hidden_layers.append(dense) if pyramidal: neurons = max(1, int(neurons * 0.8)) # Reduce by 20% each layer # Output: 2 directional outputs (tanh) and 1 shooting output (sigmoid) self.direction_layer = layers.Dense(2, activation='tanh') self.shoot_layer = layers.Dense(1, activation='sigmoid') def call(self, inputs): x = inputs for layer in self.hidden_layers: x = layer(x) direction = self.direction_layer(x) shoot = self.shoot_layer(x) return tf.concat([direction, shoot], axis=-1) # --- Main Game Class --- class TankGame: def __init__(self): pygame.init() pygame.mixer.init() self.screen = pygame.display.set_mode((WIDTH, HEIGHT)) self.width = WIDTH # Define game area width self.height = HEIGHT # Define game area height self.steps = 0 # <-- Initialize steps self.done = False pygame.display.set_caption("Tank Battle Game") self.clock = pygame.time.Clock() self.font = pygame.font.SysFont("sans", 16) # Preload sounds: self.move_sound = generate_tone(500, 5) self.shoot_sound = generate_tone(200, 50) self.explosion_sound = generate_tone(800, 50) self.heli_explosion_sound = generate_tone(1200, 50) self.player_lost_sound = generate_tone(1000, 50) self.last_move_sound_time = 0 # Neural Net Control self.nn_control = False self.nn_params = None self.red_team_control = False self.red_team_nn_params = None def reset(self, difficulty): if difficulty == 0: self.difficulty = random.uniform(0.5, 1.0) else: self.difficulty = difficulty self.projectiles = [] # projectile dicts: x, y, angle, speed, rangeLeft, owner, ignoreObs self.score = 0 self.tank_kills = 0 self.game_over = False self.steps = 0 # Reset step counter self.done = False # Allow training to continue self.last_spawn_time = 0 if not self.red_team_control: print(f"Difficulty: {self.difficulty:.2f}") self.generate_obstacles() self.player = { 'x': TANK_RADIUS, 'y': TANK_RADIUS, 'w': TANK_RADIUS * 2, 'h': TANK_RADIUS * 2, 'angle': 2, 'reload': 0, 'disabled': False, 'lives': 3, 'dead': False, 'respawn_timer': 0 } self.enemies = [] # enemy dicts: x, y, angle, reload, disabled, type ("tank" or "helicopter") self.spawn_unit(enemy_type=None, is_Player=True) self.spawn_unit(enemy_type="tank", is_Player=False) # Always spawn at least one enemy at the start def generate_obstacles(self): # Generate random obstacles margin = TANK_RADIUS + 5 self.obstacles = [] if self.difficulty == 1: obstacles = 1 elif self.difficulty == 0.5 or self.difficulty == 0.95: obstacles = 5 else: obstacles = NUM_OBSTACLES for _ in range(obstacles): # Number of Obstacles err = 0 while True: x = random.randint(50, WIDTH - 100) y = random.randint(50, HEIGHT - 100) w = random.randint(60, 100) h = random.randint(20, 80) err += 1 if not err % 50: print ("trouble figuring out obstacle spawn points ...") if not self._rect_overlaps_point(x, y, w, h, WIDTH/2, HEIGHT-50, margin): self.obstacles.append({'x': x, 'y': y, 'w': w, 'h': h}) break def _rect_overlaps_point(self, rx, ry, rw, rh, px, py, margin): return (px >= rx - margin and px <= rx + rw + margin and py >= ry - margin and py <= ry + rh + margin) def spawn_unit(self, enemy_type, is_Player=True): margin = TANK_RADIUS + 10 while True: err = 0 x = random.uniform(margin, WIDTH-margin) y = random.uniform(margin, HEIGHT-margin) # Check if spawning inside any obstacle if is_Player: if self.collides_with_obstacles(x, y) or self.collides_with_tanks(x, y, self.player): err = 1 else: dx = x - self.player['x'] dy = y - self.player['y'] dist = math.hypot(dx, dy) # spawn enemy 1.5 projectile ranges away if dist < PROJECTILE_RANGE * 1.5 or self.collides_with_obstacles(x, y) or self.collides_with_tanks(x, y, self_tank=None): err = 1 if not err: break # Found a valid spawn location angle = random.uniform(0, 2 * math.pi) if is_Player: self.player['x'] = x self.player['y'] = y self.player['w'] = TANK_RADIUS * 2 self.player['h'] = TANK_RADIUS * 2 self.player['angle'] = angle self.player['disabled'] = False self.player['dead'] = False self.player['reload'] = RELOAD_TIME else: enemy = { 'x': x, 'y': y, 'angle': angle, 'reload': RELOAD_TIME, 'disabled': False, 'type': enemy_type } self.enemies.append(enemy) def shoot_projectile(self, x, y, angle, owner, ignoreObs): proj = { 'x': x, 'y': y, 'angle': angle, 'speed': PROJECTILE_SPEED, 'rangeLeft': PROJECTILE_RANGE, 'owner': owner, 'ignoreObs': ignoreObs } self.projectiles.append(proj) def collides_with_obstacles(self, x, y): # Check world boundaries first margin = TANK_RADIUS * 1.5 if x <= margin or x >= self.width - margin or y <= margin or y >= self.height - margin: return True for obs in self.obstacles: if self.circle_rect_collision(x, y, margin, obs): return True return False def collides_with_tanks(self, x, y, self_tank): if self_tank != self.player and not self.player.get('dead', False): if math.hypot(self.player['x'] - x, self.player['y'] - y) < TANK_RADIUS * 2: return True for enemy in self.enemies: if enemy == self_tank: continue if enemy.get('type') != "helicopter": if math.hypot(enemy['x'] - x, enemy['y'] - y) < TANK_RADIUS * 2: return True return False def circle_rect_collision(self, cx, cy, radius, rect): distX = abs(cx - (rect['x'] + rect['w'] / 2)) distY = abs(cy - (rect['y'] + rect['h'] / 2)) if distX > (rect['w'] / 2 + radius): return False if distY > (rect['h'] / 2 + radius): return False if distX <= (rect['w'] / 2): return True if distY <= (rect['h'] / 2): return True dx = distX - rect['w'] / 2 dy = distY - rect['h'] / 2 return (dx * dx + dy * dy <= radius * radius) def apply_neural_net_control(self, delta, is_Player=True, enemy=None): # For player control, use self.player; for enemy control, use the provided enemy. if is_Player: actor = self.player else: actor = enemy # enemy control uses enemy as actor state = self.get_state(enemy, is_Player) nn_params = self.nn_params if is_Player else self.red_team_nn_params if nn_params is not None: action = self.neural_net_predict_generic(state, nn_params) if len(action) != 3: raise ValueError(f"Expected 3 outputs from neural network, but got {len(action)}") # Shooting if (action[2] > random.random() or abs(action[2]) > 0.8) and actor['reload'] <= 0: # Shoot role = "player" if is_Player else "enemy" self.shoot_projectile(actor['x'], actor['y'], actor['angle'], role, False) actor['reload'] = RELOAD_TIME if is_Player: self.shoot_sound.play() # Turning if abs(action[0]) > random.random() or abs(action[0]) > 0.8: # Turn if action[0] > 0: actor['angle'] += ROTATION_SPEED * delta elif action[0] < 0: actor['angle'] -= ROTATION_SPEED * delta # Moving if abs(action[1]) > random.random() or abs(action[1]) > 0.8: # Move if action[1] > 0: nx = actor['x'] + math.cos(actor['angle']) * TANK_SPEED * delta ny = actor['y'] + math.sin(actor['angle']) * TANK_SPEED * delta elif action[1] < 0: nx = actor['x'] - math.cos(actor['angle']) * TANK_SPEED * delta ny = actor['y'] - math.sin(actor['angle']) * TANK_SPEED * delta if not self.collides_with_obstacles(nx, ny) and not self.collides_with_tanks(nx, ny, actor): actor['x'], actor['y'] = nx, ny def process_events(self): for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit(); sys.exit() elif event.type == pygame.KEYDOWN: if event.key == pygame.K_b: self.nn_control = not self.nn_control if self.nn_control and self.nn_params is None: self.load_neural_net("player") elif event.key == pygame.K_r and self.game_over: self.reset(0) elif event.key == pygame.K_r: self.red_team_control = not self.red_team_control if self.red_team_control and self.red_team_nn_params is None: self.load_neural_net("red") def load_neural_net(self, team="player"): if team == "red": path = "red_team.json" attr = "red_team_nn_params" else: path = "tank_000.json" attr = "nn_params" try: with open(path, "r") as f: params = json.load(f) # Validate JSON structure. for key in ["input_shape", "hidden_layers", "direction_layer", "shoot_layer"]: if key not in params: raise ValueError(f"Invalid network file: missing '{key}'.") for layer in params["hidden_layers"]: if "weights" not in layer or "biases" not in layer: raise ValueError("Invalid network file: each hidden layer must have 'weights' and 'biases'.") for key in ["direction_layer", "shoot_layer"]: if "weights" not in params[key] or "biases" not in params[key]: raise ValueError(f"Invalid network file: {key} missing weights or biases.") if params["input_shape"][0] != 13: raise ValueError(f"Expected input size 13, but got {params['input_shape'][0]}") # Determine network architecture. input_dim = params.get("input_shape", [13])[0] hidden_layers = params["hidden_layers"] num_hidden_layers = len(hidden_layers) if num_hidden_layers > 0: neurons_counts = [len(layer["biases"]) for layer in hidden_layers] neurons_per_layer = neurons_counts[0] pyramidal = len(set(neurons_counts)) > 1 else: neurons_per_layer = 12 # default value pyramidal = False # Instantiate your PolicyNetwork. policy_net = PolicyNetwork(input_dim, num_hidden_layers, neurons_per_layer, pyramidal) # Perform a dummy forward pass to initialize the layers. dummy = tf.convert_to_tensor(np.zeros((1, input_dim), dtype=np.float32)) policy_net(dummy) # Load hidden layers weights. for i, layer in enumerate(policy_net.hidden_layers): weights = np.array(params["hidden_layers"][i]["weights"]) biases = np.array(params["hidden_layers"][i]["biases"]) layer.set_weights([weights, biases]) # Load weights for the direction output layer. d_weights = np.array(params["direction_layer"]["weights"]) d_biases = np.array(params["direction_layer"]["biases"]) policy_net.direction_layer.set_weights([d_weights, d_biases]) # Load weights for the shoot output layer. s_weights = np.array(params["shoot_layer"]["weights"]) s_biases = np.array(params["shoot_layer"]["biases"]) policy_net.shoot_layer.set_weights([s_weights, s_biases]) # Save the model to an attribute and return it. setattr(self, attr, policy_net) print("Successfully loaded neural network from", path) return policy_net except Exception as e: print("Failed to load neural network:", e) def update_enemy_movement(self, enemy, delta): tank_speed = TANK_SPEED * self.difficulty rotation_speed = ROTATION_SPEED * self.difficulty # Determine target vector based on difficulty if self.difficulty > 0.91: # Flee: move away from player target_x = enemy['x'] - self.player['x'] target_y = enemy['y'] - self.player['y'] elif self.difficulty > 0.71: # difficulty between 0.8 and 0.9 # Chase player target_x = self.player['x'] - enemy['x'] target_y = self.player['y'] - enemy['y'] else: target_x = random.uniform(TANK_RADIUS, WIDTH - TANK_RADIUS) - enemy['x'] target_y = random.uniform(TANK_RADIUS, HEIGHT - TANK_RADIUS) - enemy['y'] # Normalize target vector dist = math.hypot(target_x, target_y) if dist: target_x /= dist target_y /= dist # Add boundary avoidance force avoid_margin = TANK_RADIUS * 5 # adjust as needed avoid_x, avoid_y = 0, 0 if enemy['x'] < avoid_margin: avoid_x = 1 elif enemy['x'] > WIDTH - avoid_margin: avoid_x = -1 if enemy['y'] < avoid_margin: avoid_y = 1 elif enemy['y'] > HEIGHT - avoid_margin: avoid_y = -1 # Combine the target and avoidance vectors combined_x = target_x + avoid_x combined_y = target_y + avoid_y combined_dist = math.hypot(combined_x, combined_y) if combined_dist: combined_x /= combined_dist combined_y /= combined_dist # Rotate enemy toward the combined vector desired_angle = math.atan2(combined_y, combined_x) diff = math.atan2(math.sin(desired_angle - enemy['angle']), math.cos(desired_angle - enemy['angle'])) enemy['angle'] += math.copysign(min(abs(diff), rotation_speed * delta), diff) # Attempt to move forward nx = enemy['x'] + math.cos(enemy['angle']) * tank_speed * delta ny = enemy['y'] + math.sin(enemy['angle']) * tank_speed * delta if not self.collides_with_obstacles(nx, ny) and not self.collides_with_tanks(nx, ny, enemy): if self.difficulty >= 0.6: # otherwise disabled enemy['x'], enemy['y'] = nx, ny enemy['stuck_time'] = 0 else: enemy.setdefault('stuck_time', 0) enemy['stuck_time'] += delta if enemy['stuck_time'] > 100 / self.difficulty: enemy['angle'] += random.uniform(-math.pi, math.pi) enemy['stuck_time'] = 0 def neural_net_predict_generic(self, input_vec, policy_net): epsilon = 0.1 # set to 0 for fully deterministic # Convert the input vector to a 2D float32 Tensor. input_tensor = tf.convert_to_tensor(input_vec[None, :], dtype=tf.float32) # Run the policy network to get the mean. mean = policy_net(input_tensor)[0] if tf.math.reduce_any(tf.math.is_nan(mean)): tf.print("WARNING: mean contains NaN!", mean) # Convert epsilon to a Tensor for the standard deviation. std = tf.convert_to_tensor(epsilon, dtype=tf.float32) # Create a normal distribution with the computed mean and std, and sample an action. dist = tfp.distributions.Normal(loc=mean, scale=std) action = dist.sample() return action.numpy() def update(self, delta): now = pygame.time.get_ticks() keys = pygame.key.get_pressed() moved = False red_team_applied = False # --- Player control --- if not self.nn_control: if keys[pygame.K_LEFT]: self.player['angle'] -= ROTATION_SPEED * delta moved = True if keys[pygame.K_RIGHT]: self.player['angle'] += ROTATION_SPEED * delta moved = True if not self.player.get('disabled', False): if keys[pygame.K_UP]: nx = self.player['x'] + math.cos(self.player['angle']) * TANK_SPEED * delta * 3 ny = self.player['y'] + math.sin(self.player['angle']) * TANK_SPEED * delta * 3 if not self.collides_with_obstacles(nx, ny) and not self.collides_with_tanks(nx, ny, self.player): self.player['x'], self.player['y'] = nx, ny moved = True if keys[pygame.K_DOWN]: nx = self.player['x'] - math.cos(self.player['angle']) * TANK_SPEED * delta * 3 ny = self.player['y'] - math.sin(self.player['angle']) * TANK_SPEED * delta * 3 if not self.collides_with_obstacles(nx, ny) and not self.collides_with_tanks(nx, ny, self.player): self.player['x'], self.player['y'] = nx, ny moved = True if keys[pygame.K_SPACE] and self.player['reload'] <= 0: self.shoot_projectile(self.player['x'], self.player['y'], self.player['angle'], "player", False) self.player['reload'] = RELOAD_TIME self.shoot_sound.play() else: enemy = self.enemies[0] if self.enemies else None self.apply_neural_net_control(delta, is_Player=True, enemy=enemy) if moved and now - self.last_move_sound_time > MOVE_SOUND_DELAY: self.move_sound.play() self.last_move_sound_time = now if self.player['reload'] > 0: self.player['reload'] -= delta # --- Enemy spawning --- self.last_spawn_time += delta if self.last_spawn_time > SPAWN_INTERVAL: if not any(e['type'] == "tank" for e in self.enemies): self.spawn_unit(enemy_type="tank", is_Player=False) self.last_spawn_time = 0 # --- Update enemies --- for enemy in self.enemies[:]: if enemy['type'] != "helicopter" and self.red_team_control and not red_team_applied: self.apply_neural_net_control(delta, is_Player=False, enemy=enemy) red_team_applied = True if enemy['reload'] > 0: enemy['reload'] -= delta if not enemy.get('disabled', False): if enemy['type'] == "helicopter": desired = math.atan2(self.player['y'] - enemy['y'], self.player['x'] - enemy['x']) diff = math.atan2(math.sin(desired - enemy['angle']), math.cos(desired - enemy['angle'])) enemy['angle'] += math.copysign(min(abs(diff), ROTATION_SPEED * delta), diff) nx = enemy['x'] + math.cos(enemy['angle']) * HELI_SPEED * delta ny = enemy['y'] + math.sin(enemy['angle']) * HELI_SPEED * delta # Check boundaries and adjust angle if needed if nx < TANK_RADIUS or nx > WIDTH - TANK_RADIUS: enemy['angle'] = math.pi - enemy['angle'] nx = enemy['x'] + math.cos(enemy['angle']) * HELI_SPEED * delta if ny < TANK_RADIUS or ny > HEIGHT - TANK_RADIUS: enemy['angle'] = -enemy['angle'] ny = enemy['y'] + math.sin(enemy['angle']) * HELI_SPEED * delta enemy['x'], enemy['y'] = nx, ny elif not self.red_team_control: self.update_enemy_movement(enemy, delta) to_player_x = self.player['x'] - enemy['x'] to_player_y = self.player['y'] - enemy['y'] dist = math.hypot(to_player_x, to_player_y) if enemy['reload'] <= 0 and (not self.red_team_control or enemy['type'] == "helicopter") and dist <= PROJECTILE_RANGE * 1.1: ignoreObs = (enemy['type'] == "helicopter") if self.difficulty < 1: self.shoot_projectile(enemy['x'], enemy['y'], enemy['angle'], "enemy", ignoreObs) enemy['reload'] = RELOAD_TIME / self.difficulty # --- Update projectiles --- for proj in self.projectiles[:]: distTravel = proj['speed'] * delta proj['x'] += math.cos(proj['angle']) * distTravel proj['y'] += math.sin(proj['angle']) * distTravel proj['rangeLeft'] -= distTravel if proj['rangeLeft'] <= 0: self.projectiles.remove(proj) continue if not proj['ignoreObs']: if any(self.point_in_rect(proj['x'], proj['y'], obs) for obs in self.obstacles): self.projectiles.remove(proj) continue if proj['owner'] == "player": hit_enemy = None for enemy in self.enemies: if math.hypot(enemy['x'] - proj['x'], enemy['y'] - proj['y']) < TANK_RADIUS: hit_enemy = enemy break if hit_enemy: if hit_enemy['type'] == "tank": self.tank_kills += 1 if self.tank_kills % 3 == 0: self.spawn_unit(enemy_type="helicopter", is_Player=False) self.enemies.remove(hit_enemy) self.score += 1 self.explosion_sound.play() elif hit_enemy['type'] == "helicopter": self.enemies.remove(hit_enemy) self.score += 1 self.heli_explosion_sound.play() self.spawn_unit(enemy_type="tank", is_Player=False) if proj in self.projectiles: self.projectiles.remove(proj) continue else: if math.hypot(self.player['x'] - proj['x'], self.player['y'] - proj['y']) < TANK_RADIUS and not self.player.get('dead', False): if not self.player.get('disabled', False): if random.random() < 0.1: self.player['disabled'] = True else: self.player['lives'] -= 1 ch = self.player_lost_sound.play() ch.queue(self.player_lost_sound) if self.player['lives'] > 0: self.player['dead'] = True self.player['respawn_timer'] = 1000 else: self.game_over = True else: self.player['lives'] -= 1 ch = self.player_lost_sound.play() ch.queue(self.player_lost_sound) if self.player['lives'] > 0: self.player['dead'] = True self.player['respawn_timer'] = 1000 else: self.game_over = True if proj in self.projectiles: self.projectiles.remove(proj) continue if self.player.get('dead', False): self.player['respawn_timer'] -= delta if self.player['respawn_timer'] <= 0: self.spawn_unit(enemy_type=None, is_Player=True) def render(self): self.screen.fill((238, 238, 238)) for obs in self.obstacles: pygame.draw.rect(self.screen, (128, 128, 128), pygame.Rect(obs['x'], obs['y'], obs['w'], obs['h'])) if not self.player.get('dead', False): self.draw_tank(self.player['x'], self.player['y'], self.player['angle'], (0, 0, 255)) for enemy in self.enemies: if enemy['type'] == "helicopter": self.draw_helicopter(enemy['x'], enemy['y'], enemy['angle'], (0, 255, 0)) else: self.draw_tank(enemy['x'], enemy['y'], enemy['angle'], (255, 0, 0)) for proj in self.projectiles: pygame.draw.circle(self.screen, (0, 0, 0), (int(proj['x']), int(proj['y'])), 3) score_text = self.font.render("Score: " + str(self.score), True, (0, 0, 0)) lives_text = self.font.render("Lives: " + str(self.player.get('lives', 3)), True, (0, 0, 0)) self.screen.blit(score_text, (10, 10)) self.screen.blit(lives_text, (10, 30)) if self.game_over: overlay = pygame.Surface((WIDTH, HEIGHT)) overlay.set_alpha(180) overlay.fill((0, 0, 0)) self.screen.blit(overlay, (0, 0)) go_text = pygame.font.SysFont("sans", 40).render("Game Over", True, (255, 255, 255)) prompt_text = pygame.font.SysFont("sans", 20).render("Press R to restart", True, (255, 255, 255)) self.screen.blit(go_text, go_text.get_rect(center=(WIDTH/2, HEIGHT/2))) self.screen.blit(prompt_text, prompt_text.get_rect(center=(WIDTH/2, HEIGHT/2 + 30))) pygame.display.flip() def draw_tank(self, x, y, angle, color): tank_surf = pygame.Surface((TANK_RADIUS*2, TANK_RADIUS*2), pygame.SRCALPHA) tank_surf.fill(color) turret = pygame.Rect(TANK_RADIUS, TANK_RADIUS - 4, TANK_RADIUS, 8) pygame.draw.rect(tank_surf, (0, 0, 0), turret) rotated = pygame.transform.rotate(tank_surf, -math.degrees(angle)) rect = rotated.get_rect(center=(x, y)) self.screen.blit(rotated, rect) def draw_helicopter(self, x, y, angle, color): pygame.draw.circle(self.screen, color, (int(x), int(y)), TANK_RADIUS) end_x = x + math.cos(angle) * TANK_RADIUS end_y = y + math.sin(angle) * TANK_RADIUS pygame.draw.line(self.screen, (0, 0, 0), (int(x), int(y)), (int(end_x), int(end_y)), 2) def run(self): while True: delta = self.clock.tick(30) self.process_events() if not self.game_over: self.update(delta) self.render() def get_obstacle_sensors(self, enemy, is_Player=True): sensors = [] # Sensor 0: straight ahead; each subsequent sensor is 45° clockwise (to the right) for i in range(8): if is_Player: sensor_angle = self.player['angle'] - i * math.radians(45) sensors.append(self.cast_ray(self.player, sensor_angle)) else: sensor_angle = enemy['angle'] - i * math.radians(45) sensors.append(self.cast_ray(enemy, sensor_angle)) return sensors def cast_ray(self, tank, angle, sensor_range=160, sensor_step=8): distance = 0 while distance < sensor_range: test_x = tank['x'] + math.cos(angle) * distance test_y = tank['y'] + math.sin(angle) * distance # Check if ray goes out of bounds if test_x <= 0 or test_x >= self.width or test_y <= 0 or test_y >= self.height: return (sensor_range - distance) / sensor_range for obs in self.obstacles: if self.point_in_rect(test_x, test_y, obs): return (sensor_range - distance) / sensor_range distance += sensor_step return 0 def point_in_rect(self, x, y, rect): return (x >= rect['x'] and x <= rect['x'] + rect['w'] and y >= rect['y'] and y <= rect['y'] + rect['h']) def get_state(self, enemy, is_Player): if not enemy: enemy = {'x': self.width / 2, 'y': 50, 'angle': math.pi / 2, 'reload': 0} # if no enemies, set default location sensors = self.get_obstacle_sensors(enemy, is_Player) # 8 ray-casts # Compute the relative angle from the front of the player to the enemy. dx = enemy['x'] - self.player['x'] dy = enemy['y'] - self.player['y'] angle_to_enemy = math.atan2(dy, dx) angle_from_player = math.atan2(math.sin(angle_to_enemy - self.player['angle']), math.cos(angle_to_enemy - self.player['angle'])) # Compute the relative angle from the enemy’s front to the player. angle_to_player = math.atan2(-dy, -dx) angle_from_enemy = math.atan2(math.sin(angle_to_player - enemy['angle']), math.cos(angle_to_player - enemy['angle'])) # Compute normalized distance (here divided by screen width) distance = math.hypot(dx, dy) / self.width # Reload status (already normalized by RELOAD_TIME) reload_status = self.player['reload'] / RELOAD_TIME e_reload_status = enemy['reload'] / RELOAD_TIME # Binary flag for line-of-sight blocked. # Assumes line_of_sight_blocked(x1, y1, x2, y2) returns True if any obstacle blocks the view. los_blocked = 1.0 if self.line_of_sight_blocked(self.player['x'], self.player['y'], enemy['x'], enemy['y']) else 0.0 # Total state vector: # 8 sensors + angle_from_tank + angle_from_enemy + distance + reload_status + los_blocked = 13 inputs if is_Player: state = sensors + [angle_from_player, angle_from_enemy, distance, reload_status, los_blocked] else: state = sensors + [angle_from_enemy, angle_from_player, distance, e_reload_status, los_blocked] return np.array(state, dtype=np.float32) def line_of_sight_blocked(self, x1, y1, x2, y2): for obs in self.obstacles: if self.line_intersects_rect(x1, y1, x2, y2, obs): return True return False def line_intersects_rect(self, x1, y1, x2, y2, rect): edges = [ (rect['x'], rect['y'], rect['x'] + rect['w'], rect['y']), (rect['x'], rect['y'], rect['x'], rect['y'] + rect['h']), (rect['x'] + rect['w'], rect['y'], rect['x'] + rect['w'], rect['y'] + rect['h']), (rect['x'], rect['y'] + rect['h'], rect['x'] + rect['w'], rect['y'] + rect['h']), ] for ex1, ey1, ex2, ey2 in edges: if self.lines_intersect(x1, y1, x2, y2, ex1, ey1, ex2, ey2): return True return False def lines_intersect(self, x1, y1, x2, y2, x3, y3, x4, y4): def ccw(a, b, c): return (c[1] - a[1]) * (b[0] - a[0]) > (b[1] - a[1]) * (c[0] - a[0]) a = (x1, y1) b = (x2, y2) c = (x3, y3) d = (x4, y4) return ccw(a, c, d) != ccw(b, c, d) and ccw(a, b, c) != ccw(a, b, d) def normalize_angle(self, angle): while angle > math.pi: angle -= 2 * math.pi while angle < -math.pi: angle += 2 * math.pi return angle def step(self, action, phase): delta = 33 self.done = False if not self.enemies: print("No enemies found. Skipping step.") return self.get_state(enemy=None, is_Player=True), -1, True, {} enemy = self.enemies[0] dx = enemy['x'] - self.player['x'] dy = enemy['y'] - self.player['y'] dist = math.hypot(dx, dy) dist_nom = dist / PROJECTILE_RANGE angle_to_enemy = math.atan2(dy, dx) diff = abs(self.normalize_angle(angle_to_enemy - self.player['angle'])) initial_score = self.score initial_lives = self.player['lives'] reward = 0.0 # Shooting if (action[2] > random.random() or abs(action[2]) > 0.8) and self.player['reload'] <= 0: self.shoot_projectile(self.player['x'], self.player['y'], self.player['angle'], "player", False) self.player['reload'] = RELOAD_TIME # if not self.line_of_sight_blocked(self.player['x'], self.player['y'], enemy['x'], enemy['y']): if dist_nom <= 1.0: dist_factor = 1.0 elif dist_nom <= 1.7: x = (dist_nom - 1.0) / 0.7 dist_factor = 1.0 - x**2 else: slope = -0.2 / (4.5 - 1.7) dist_factor = slope * (dist_nom - 1.7) if diff <= 0.8: a, b, c = -0.25, -1.05, 1.0 diff_factor = a*(diff**2) + b*diff + c else: slope = -0.2 / (3.14 - 0.8) diff_factor = slope * (diff - 0.8) raw_reward = 0.64 * dist_factor * diff_factor if raw_reward > 0.64: raw_reward = 0.64 # reward = 26.24 for 41 shots elif raw_reward < -0.12: raw_reward = -0.12 # reward = -4.92 for 41 shots if phase > 2: reward += raw_reward if dist_nom <= 1.0 and diff < 0.2: print("Nice Shot") # reward = 0.5, offsets 10 wall shots elif dist_nom <= 1.368 and diff < 0.4: print("Near Miss") # reward = 0.25, 2 near miss = 1 Nice Shot # Turning if abs(action[0]) > random.random() or abs(action[0]) > 0.8: if action[0] > 0: self.player['angle'] += ROTATION_SPEED * delta elif action[0] < 0: self.player['angle'] -= ROTATION_SPEED * delta # Moving if abs(action[1]) > random.random() or abs(action[1]) > 0.8: if action[1] > 0: nx = self.player['x'] + math.cos(self.player['angle']) * TANK_SPEED * delta ny = self.player['y'] + math.sin(self.player['angle']) * TANK_SPEED * delta elif action[1] < 0: nx = self.player['x'] - math.cos(self.player['angle']) * TANK_SPEED * delta ny = self.player['y'] - math.sin(self.player['angle']) * TANK_SPEED * delta if not self.collides_with_obstacles(nx, ny) and not self.collides_with_tanks(nx, ny, self.player): self.player['x'], self.player['y'] = nx, ny else: if phase > 1: reward -= 0.004 # 1 collision = 1 near step (reward = -5 for 1250 steps) # Reward for being close k = 6.0 midpoint = 1.75 # Where reward crosses 0 between [1.0, 2.5] f = 2.0 * (1.0 / (1.0 + math.exp(k * (dist_nom - midpoint))) - 0.5) if phase > 0: reward += 0.004 * f # reward = +/-5 for 1250 steps # Scoring self.update(delta) if self.score > initial_score: if phase > 3: reward += 1.5 # 1 hit = 3 Nice Shots print("Enemy Destroyed") self.done = True if self.player['lives'] < initial_lives: if phase > 3: reward -= 0.75 # 1 loss penalty = 1.5 Nice Shots print("NN Tank was hit!") self.done = True if self.enemies: enemy = self.enemies[0] else: enemy = None # Or create a default enemy position if needed return self.get_state(enemy=enemy, is_Player=True), float(reward), self.done, {} def main(): game = TankGame() game.reset(0) game.run() if __name__ == "__main__": main()