diff --git a/ant_state.json b/ant_state.json index 052bf25..4703733 100644 --- a/ant_state.json +++ b/ant_state.json @@ -1,7 +1,8 @@ { "grid_size": 100, - "food": [61, 59], - "ants": [[55,59,0.81],[33,64,0.81],[30,84,0.82],[21,90,0.89],[49,59,0.98],[37,59,0.85],[20,56,0.96],[54,57,0.81],[27,83,0.96],[33,24,0.83],[29,59,0.84],[22,59,0.94],[23,58,0.80],[26,90,0.81],[30,74,0.78],[33,60,0.89],[24,90,0.98],[52,59,0.90],[25,59,0.91],[26,73,0.83]], - "episode": 415780, - "epsilon": 0.050 + "food": [31, 32], + "predator": [45, 66], + "ants": [[45,80,1.00]], + "episode": 1209, + "epsilon": 0.020 } \ No newline at end of file diff --git a/ant_viewer.html b/ant_viewer.html index c69ca26..668ece4 100644 --- a/ant_viewer.html +++ b/ant_viewer.html @@ -53,104 +53,91 @@ + setInterval(update, 50); + \ No newline at end of file diff --git a/best_brain.bin b/best_brain.bin new file mode 100644 index 0000000..73a8412 Binary files /dev/null and b/best_brain.bin differ diff --git a/src/env.zig b/src/env.zig index 8e47c3d..d012db6 100644 --- a/src/env.zig +++ b/src/env.zig @@ -2,21 +2,20 @@ const std = @import("std"); const Allocator = std.mem.Allocator; pub const GRID_SIZE = 100; -pub const NUM_ANTS = 20; - +pub const NUM_ANTS = 1; pub const TILE_EMPTY = 0; pub const TILE_WALL = 1; pub const TILE_FOOD = 2; +// Aggiungiamo il tipo visuale per il predatore (non usato nella grid array, ma per logica) +pub const TILE_PREDATOR = 3; pub const Ant = struct { x: usize, y: usize, alive: bool, energy: f32, - gene_speed: f32, gene_metabolism: f32, - generation: usize, score: usize, }; @@ -28,6 +27,10 @@ pub const World = struct { food_x: usize, food_y: usize, + // NUOVO: Coordinate del predatore + pred_x: usize, + pred_y: usize, + best_ant_idx: usize, prng: std.Random.DefaultPrng, @@ -38,6 +41,8 @@ pub const World = struct { .ants = undefined, .food_x = 0, .food_y = 0, + .pred_x = 0, // Init + .pred_y = 0, // Init .best_ant_idx = 0, .prng = std.Random.DefaultPrng.init(seed), }; @@ -46,8 +51,8 @@ pub const World = struct { } pub fn reset(self: *World) void { - const random = self.prng.random(); - + //const random = self.prng.random(); + // Reset Grid (codice invariato...) for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |x| { self.pheromones[y][x] = 0.0; @@ -60,6 +65,10 @@ pub const World = struct { } self.respawnFood(); + // NUOVO: Respawna il predatore in un angolo lontano + self.pred_x = GRID_SIZE - 5; + self.pred_y = GRID_SIZE - 5; + const center = GRID_SIZE / 2; for (0..NUM_ANTS) |i| { self.ants[i] = Ant{ @@ -67,14 +76,18 @@ pub const World = struct { .y = center, .alive = true, .energy = 100.0, - .gene_speed = 0.8 + random.float(f32) * 0.4, - .gene_metabolism = 1.0, + + // PRIMA ERA: .gene_speed = 0.8 + random.float(f32) * 0.4, + .gene_speed = 1.0, // Velocità fissa e garantita + .gene_metabolism = 1.0, // Consumo fisso + .generation = 0, .score = 0, }; } } + // respawnFood e getScent invariati... fn respawnFood(self: *World) void { const random = self.prng.random(); while (true) { @@ -102,41 +115,89 @@ pub const World = struct { return false; } + // spawnChild invariato... pub fn spawnChild(self: *World, parent_idx: usize) Ant { const parent = self.ants[parent_idx]; - const random = self.prng.random(); - var new_speed = parent.gene_speed + (random.float(f32) * 0.2 - 0.1); - - if (new_speed < 0.5) new_speed = 0.5; - if (new_speed > 3.0) new_speed = 3.0; - - const new_metabolism = new_speed * new_speed; + // Abbiamo rimosso tutta la logica del random e le mutazioni return Ant{ .x = GRID_SIZE / 2, .y = GRID_SIZE / 2, .alive = true, .energy = 100.0, - .gene_speed = new_speed, - .gene_metabolism = new_metabolism, + .gene_speed = 1.0, // Il figlio ha sempre velocità 1.0 + .gene_metabolism = 1.0, // Il figlio ha sempre metabolismo 1.0 .generation = parent.generation + 1, .score = 0, }; } + // updateBestAnt invariato... pub fn updateBestAnt(self: *World) void { var max_score: usize = 0; var best_idx: usize = 0; - for (self.ants, 0..) |ant, i| { if (ant.alive and ant.score > max_score) { max_score = ant.score; best_idx = i; } } - if (max_score > 0) { - self.best_ant_idx = best_idx; + if (max_score > 0) self.best_ant_idx = best_idx; + } + + // NUOVO: Logica movimento predatore + pub fn stepPredator(self: *World) void { + const random = self.prng.random(); + + // Il predatore si muove verso la formica più vicina con una certa probabilità, + // altrimenti si muove a caso (per non renderlo impossibile da battere). + + var target_x = self.pred_x; + var target_y = self.pred_y; + + // Trova formica più vicina + var min_dist: usize = 9999; + var has_target = false; + + for (self.ants) |ant| { + if (!ant.alive) continue; + // Distanza Manhattan + const dx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const dy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist = dx + dy; + + if (dist < min_dist and dist < 200) { // Insegue solo se entro 20 caselle + min_dist = dist; + target_x = ant.x; + target_y = ant.y; + has_target = true; + } + } + + var move_x: isize = 0; + var move_y: isize = 0; + + // 60% insegue, 40% random + if (has_target and random.float(f32) < 0.6) { + if (target_x > self.pred_x) move_x = 1 else if (target_x < self.pred_x) move_x = -1; + if (target_y > self.pred_y) move_y = 1 else if (target_y < self.pred_y) move_y = -1; + } else { + // Random walk + const r = random.intRangeAtMost(u8, 0, 3); + if (r == 0) move_y = -1; + if (r == 1) move_y = 1; + if (r == 2) move_x = -1; + if (r == 3) move_x = 1; + } + + const new_px = @as(usize, @intCast(@as(isize, @intCast(self.pred_x)) + move_x)); + const new_py = @as(usize, @intCast(@as(isize, @intCast(self.pred_y)) + move_y)); + + // Controllo muri + if (self.grid[new_py][new_px] != TILE_WALL) { + self.pred_x = new_px; + self.pred_y = new_py; } } @@ -144,11 +205,16 @@ pub const World = struct { var ant = &self.ants[ant_idx]; if (!ant.alive) return .{ 0.0, true }; - const random = self.prng.random(); + // NUOVO: Controllo morte immediata da predatore (inizio turno) + if (ant.x == self.pred_x and ant.y == self.pred_y) { + self.ants[ant_idx] = self.spawnChild(self.best_ant_idx); // Respawn + return .{ -500.0, true }; // Morte dolorosa + } + const random = self.prng.random(); + // ... (Logica energia/movimento invariata fino al loop) ... var moves_to_make: usize = 0; var chance = ant.gene_speed; - while (chance > 0) { if (chance >= 1.0) { moves_to_make += 1; @@ -158,7 +224,6 @@ pub const World = struct { chance = 0; } } - if (moves_to_make == 0) { ant.energy -= 0.1; if (ant.energy <= 0) return .{ -10.0, true }; @@ -169,7 +234,6 @@ pub const World = struct { for (0..moves_to_make) |_| { if (ant.energy <= 0) break; - ant.energy -= (0.2 * ant.gene_metabolism); const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x; @@ -191,21 +255,37 @@ pub const World = struct { ant.x = new_x; ant.y = new_y; + + // NUOVO: Controllo collisione predatore DOPO il movimento + if (ant.x == self.pred_x and ant.y == self.pred_y) { + self.ants[ant_idx] = self.spawnChild(self.best_ant_idx); + return .{ -500.0, true }; // Morte + } + + const dist_pred_x = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const dist_pred_y = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist_pred = dist_pred_x + dist_pred_y; + + if (dist_pred == 1) { + total_reward -= 5.0; // Troppo vicina! Rischio altissimo. + } else if (dist_pred == 2) { + total_reward -= 2.0; // Zona di allerta. + } + self.pheromones[new_y][new_x] = 1.0; const new_dist = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x; const total_new_dist = new_dist + (if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y); - const scent = self.getScent(new_x, new_y); + if (total_new_dist < old_dist) total_reward += 1.5 + scent; - if (total_new_dist > old_dist) total_reward -= 1.0; + if (total_new_dist > old_dist) total_reward -= 0.1; if (new_x == self.food_x and new_y == self.food_y) { self.respawnFood(); ant.energy = 100.0; ant.score += 1; total_reward += 50.0; - self.updateBestAnt(); } } @@ -218,6 +298,7 @@ pub const World = struct { return .{ total_reward, false }; } + // evaporatePheromones invariato... pub fn evaporatePheromones(self: *World) void { for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |x| { @@ -226,14 +307,17 @@ pub const World = struct { } } + // Modificato per includere il predatore pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 { - var obs = try allocator.alloc(f32, 15); + // Aumentiamo la dimensione a 20 floats + var obs = try allocator.alloc(f32, 20); const ant = self.ants[ant_idx]; var idx: usize = 0; const ax = @as(i32, @intCast(ant.x)); const ay = @as(i32, @intCast(ant.y)); + // 3x3 Grid locale (9 inputs) var dy: i32 = -1; while (dy <= 1) : (dy += 1) { var dx: i32 = -1; @@ -241,10 +325,13 @@ pub const World = struct { const py = @as(usize, @intCast(ay + dy)); const px = @as(usize, @intCast(ax + dx)); var val: f32 = 0.0; + if (self.grid[py][px] == TILE_WALL) { val = -1.0; } else if (px == self.food_x and py == self.food_y) { val = 1.0; + } else if (px == self.pred_x and py == self.pred_y) { + val = -5.0; // PREDATORE VICINO! PAURA! } else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) { val = -0.5; } @@ -252,12 +339,28 @@ pub const World = struct { idx += 1; } } + + // Input originali (9..14) obs[9] = self.getScent(ant.x, ant.y); obs[10] = self.pheromones[ant.y][ant.x]; - obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; - obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; - obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; - obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; + obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo Su + obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo Giù + obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo SX + obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo DX + + // NUOVI INPUT: Sensori Predatore (15..19) + // 15: Prossimità predatore (1.0 = vicinissimo, 0.0 = lontano) + const pdx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const pdy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist_pred = @as(f32, @floatFromInt(pdx + pdy)); + obs[15] = if (dist_pred == 0) 1.0 else 1.0 / dist_pred; + + // 16-19: Direzione Predatore + obs[16] = if (self.pred_y < ant.y) 1.0 else 0.0; // Predatore è SOPRA + obs[17] = if (self.pred_y > ant.y) 1.0 else 0.0; // Predatore è SOTTO + obs[18] = if (self.pred_x < ant.x) 1.0 else 0.0; // Predatore è SINISTRA + obs[19] = if (self.pred_x > ant.x) 1.0 else 0.0; // Predatore è DESTRA + return obs; } }; diff --git a/src/main.zig b/src/main.zig index 1d21208..8403533 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,12 +3,41 @@ const World = @import("env.zig").World; const env = @import("env.zig"); const Network = @import("modular_network.zig").Network; -const GAMMA: f32 = 0.9; -const LR: f32 = 0.005; +// --- PARAMETRI DI TRAINING --- +const GAMMA: f32 = 0.98; +const LR: f32 = 0.001; const EPSILON_START: f32 = 1.0; const EPSILON_END: f32 = 0.05; const DECAY_RATE: f32 = 0.0001; +const SAVE_INTERVAL: usize = 500; +// Export JSON +fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void { + const file = try std.fs.cwd().createFile(file_path, .{}); + defer file.close(); + + var buffer: [65536]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buffer); + const allocator = fba.allocator(); + + var ants_json = std.ArrayList(u8){}; + defer ants_json.deinit(allocator); + try ants_json.appendSlice(allocator, "["); + + var first = true; + for (world.ants) |ant| { + if (!ant.alive) continue; // Esporta solo le formiche vive + if (!first) try ants_json.appendSlice(allocator, ","); + try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed }); + first = false; + } + try ants_json.appendSlice(allocator, "]"); + + const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"predator\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, world.pred_x, world.pred_y, ants_json.items, episode, avg_epsilon }); + try file.writeAll(json); +} + +// Funzioni helper fn maxVal(slice: []const f32) f32 { var m: f32 = -1.0e20; for (slice) |v| if (v > m) { @@ -29,97 +58,166 @@ fn argmax(slice: []const f32) usize { return idx; } -fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void { - const file = try std.fs.cwd().createFile(file_path, .{}); - defer file.close(); - - var buffer: [65536]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buffer); - const allocator = fba.allocator(); - - var ants_json = std.ArrayList(u8){}; - defer ants_json.deinit(allocator); - try ants_json.appendSlice(allocator, "["); - - for (world.ants, 0..) |ant, i| { - if (i > 0) try ants_json.appendSlice(allocator, ","); - try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed }); - } - try ants_json.appendSlice(allocator, "]"); - - const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, avg_epsilon }); - try file.writeAll(json); -} - pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); defer _ = gpa.deinit(); - var world = World.init(12345); - var net = Network.init(allocator); - defer net.deinit(); + // ========================================== + // 🎛️ PANNELLO DI CONTROLLO PRINCIPALE + // ========================================== + const INFERENCE_MODE = true; // TRUE = Duello 1vs1. FALSE = Training massivo. + const LOAD_EXISTING = false; // (Valido solo se INFERENCE_MODE=false) + // ========================================== - try net.addLayer(15, 40, 111, true); - try net.addLayer(40, 4, 222, false); + var world = World.init(12345); + var net: Network = undefined; + + if (INFERENCE_MODE) { + std.debug.print(">> CARICAMENTO CERVELLO PER ARENA 1vs1...\n", .{}); + net = Network.load(allocator, "best_brain.bin") catch |err| { + std.debug.print("ERRORE: Non trovo 'best_brain.bin'. Fai prima il training!\nErrore: {}\n", .{err}); + return; + }; + } else if (LOAD_EXISTING) { + std.debug.print(">> RIPRESA TRAINING: Caricamento 'best_brain.bin'...\n", .{}); + net = try Network.load(allocator, "best_brain.bin"); + } else { + std.debug.print(">> NUOVO TRAINING: Creazione rete neurale...\n", .{}); + net = Network.init(allocator); + try net.addLayer(20, 45, 111, true); + try net.addLayer(45, 4, 222, false); + } + defer net.deinit(); var prng = std.Random.DefaultPrng.init(999); const random = prng.random(); - - std.debug.print("--- HIVE MIND TRAINING START ---\n", .{}); - std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS }); - var global_step: usize = 0; - var epsilon: f32 = EPSILON_START; - while (true) { - world.evaporatePheromones(); + // ========================================== + // MODALITÀ: INFERENZA (DUELLO 1 VS 1) + // ========================================== + if (INFERENCE_MODE) { + std.debug.print("--- ARENA 1 VS 1: INIZIO DUELLO ---\n", .{}); - for (0..env.NUM_ANTS) |i| { - const current_obs = try world.getAntObservation(allocator, i); + const margin = 5; + world.ants[0].energy = 500.0; + world.ants[0].x = margin; + world.ants[0].y = margin; + world.pred_x = env.GRID_SIZE - margin; + world.pred_y = env.GRID_SIZE - margin; + + var score: usize = 0; + + while (world.ants[0].alive) { + + // FIX 1: Stessa velocità del training! + if (global_step % 2 == 0) world.stepPredator(); + + const current_obs = try world.getAntObservation(allocator, 0); defer allocator.free(current_obs); - var action: usize = 0; const q_values = net.forward(current_obs); + var action: usize = 0; - if (random.float(f32) < epsilon) { + // FIX 3: Lasciamo un 2% di "istinto" (epsilon) per non farla incantare sui muri + if (random.float(f32) < 0.02) { action = random.intRangeAtMost(usize, 0, 3); } else { action = argmax(q_values); } - const result = world.stepAnt(i, action); + const result = world.stepAnt(0, action); const reward = result[0]; - var target_val = reward; - - const next_obs = try world.getAntObservation(allocator, i); - defer allocator.free(next_obs); - const next_q_values = net.forward(next_obs); - target_val += GAMMA * maxVal(next_q_values); - - var target_vector = try allocator.alloc(f32, 4); - defer allocator.free(target_vector); - for (0..4) |j| target_vector[j] = q_values[j]; - target_vector[action] = target_val; - - _ = try net.train(current_obs, target_vector, LR); - } - - global_step += 1; - - if (epsilon > EPSILON_END) { - epsilon -= DECAY_RATE; - } - - if (global_step % 10 == 0) { - try exportAntJSON(&world, "ant_state.json", global_step, epsilon); - - if (global_step % 100 == 0) { - std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y }); + if (reward > 10.0) { + score += 1; + std.debug.print("SCORE: {d} | Cibo preso!\n", .{score}); + world.ants[0].energy = 500.0; } - std.Thread.sleep(100 * 1_000_000); + try exportAntJSON(&world, "ant_state.json", global_step, 0.02); + global_step += 1; + + std.Thread.sleep(50 * 1_000_000); + + if (!world.ants[0].alive) { + std.debug.print("\n>>> GAME OVER! La formica e' durata {d} step e ha mangiato {d} cibi. <<<\n", .{ global_step, score }); + break; + } + } + } + // ========================================== + // MODALITÀ: TRAINING (SCIAME) + // ========================================== + else { + std.debug.print("--- HIVE MIND: INIZIO TRAINING ---\n", .{}); + var epsilon: f32 = EPSILON_START; + var record_score: usize = 0; + + while (true) { + world.evaporatePheromones(); + + if (global_step % 2 == 0) world.stepPredator(); + + for (0..env.NUM_ANTS) |i| { + if (!world.ants[i].alive) continue; + + const current_obs = try world.getAntObservation(allocator, i); + defer allocator.free(current_obs); + + var action: usize = 0; + const q_values = net.forward(current_obs); + + if (random.float(f32) < epsilon) { + action = random.intRangeAtMost(usize, 0, 3); + } else { + action = argmax(q_values); + } + + const result = world.stepAnt(i, action); + const reward = result[0]; + const died = result[1] and reward <= -50.0; + + var target_val = reward; + if (!died) { + const next_obs = try world.getAntObservation(allocator, i); + defer allocator.free(next_obs); + const next_q_values = net.forward(next_obs); + target_val += GAMMA * maxVal(next_q_values); + } + + var target_vector = try allocator.alloc(f32, 4); + defer allocator.free(target_vector); + for (0..4) |j| target_vector[j] = q_values[j]; + target_vector[action] = target_val; + + _ = try net.train(current_obs, target_vector, LR); + } + + global_step += 1; + if (epsilon > EPSILON_END) epsilon -= DECAY_RATE; + + if (global_step % SAVE_INTERVAL == 0) { + var current_total_score: usize = 0; + for (world.ants) |ant| { + if (ant.alive) { + current_total_score += ant.score; + } + } + if (current_total_score > record_score) { + record_score = current_total_score; + try net.save("best_brain.bin"); + } + } + + if (global_step % 10 == 0) { + try exportAntJSON(&world, "ant_state.json", global_step, epsilon); + if (global_step % 100 == 0) { + std.debug.print("Step: {d} | Eps: {d:.3} | Record: {d} | Pred: [{d},{d}]\r", .{ global_step, epsilon, record_score, world.pred_x, world.pred_y }); + } + std.Thread.sleep(20 * 1_000_000); // Veloce per il training + } } } }