diff --git a/ant_state.json b/ant_state.json index 7da4f78..ac95645 100644 --- a/ant_state.json +++ b/ant_state.json @@ -1,9 +1,7 @@ { - "grid_size": 8, - "steps": 10, - "ant": [6, 4], - "food": [3, 1], - "walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]], - "episode": 3889, - "epsilon": 0.049 + "grid_size": 100, + "food": [11, 83], + "ants": [[10,52],[9,61],[9,63],[8,54],[9,78],[10,74],[10,58],[10,70],[10,73],[10,76],[40,77],[9,72],[9,70],[10,72],[10,75],[10,71],[9,75],[16,65],[10,63],[11,52]], + "episode": 23320, + "epsilon": 0.050 } \ No newline at end of file diff --git a/ant_viewer.html b/ant_viewer.html index 22ac221..a70dea3 100644 --- a/ant_viewer.html +++ b/ant_viewer.html @@ -2,98 +2,149 @@ - Ant Survival AI + Hive Mind Viewer -
-

Ant Survival DQN

-
Stato: Connessione...
-
Steps: 0 | Reward: 0
+ - +
+ +
\ No newline at end of file diff --git a/src/env.zig b/src/env.zig index ba4eaa1..9f5b42a 100644 --- a/src/env.zig +++ b/src/env.zig @@ -1,40 +1,40 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -pub const GRID_SIZE = 8; +pub const GRID_SIZE = 100; // Mappa Gigante! +pub const NUM_ANTS = 20; // La Colonia + pub const TILE_EMPTY = 0; pub const TILE_WALL = 1; pub const TILE_FOOD = 2; -pub const TILE_ANT = 3; -pub const ACTION_UP = 0; -pub const ACTION_DOWN = 1; -pub const ACTION_LEFT = 2; -pub const ACTION_RIGHT = 3; +// Non c'è più TILE_ANT nella griglia statica, perché le formiche si muovono sopra +// Useremo una lista dinamica. -pub const Ant = struct { x: usize, y: usize, alive: bool }; +pub const Ant = struct { + x: usize, + y: usize, + alive: bool, + steps: usize, +}; pub const World = struct { grid: [GRID_SIZE][GRID_SIZE]u8, - visited: [GRID_SIZE][GRID_SIZE]bool, - ant_x: usize, - ant_y: usize, + pheromones: [GRID_SIZE][GRID_SIZE]f32, // 0.0 = Neutro, 1.0 = Appena visitato + ants: [NUM_ANTS]Ant, // Array fisso di formiche food_x: usize, food_y: usize, - steps: usize, max_steps: usize, prng: std.Random.DefaultPrng, pub fn init(seed: u64) World { var w = World{ .grid = undefined, - .visited = undefined, - .ant_x = 0, - .ant_y = 0, + .pheromones = undefined, + .ants = undefined, .food_x = 0, .food_y = 0, - .steps = 0, - .max_steps = 100, + .max_steps = 1000, // Più passi per mappa grande .prng = std.Random.DefaultPrng.init(seed), }; w.reset(); @@ -42,12 +42,11 @@ pub const World = struct { } pub fn reset(self: *World) void { - const random = self.prng.random(); + //const random = self.prng.random(); for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |x| { - self.visited[y][x] = false; - + self.pheromones[y][x] = 0.0; if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) { self.grid[y][x] = TILE_WALL; } else { @@ -56,123 +55,182 @@ pub const World = struct { } } - self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); - self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + // 2. Spawn Cibo + self.respawnFood(); - self.visited[self.ant_y][self.ant_x] = true; - - while (true) { - self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); - self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); - if (self.food_x != self.ant_x or self.food_y != self.ant_y) break; + // 3. Spawn Formiche (Tutte al centro, come un formicaio) + const center = GRID_SIZE / 2; + for (0..NUM_ANTS) |i| { + self.ants[i] = Ant{ + .x = center, + .y = center, + .alive = true, + .steps = 0, + }; } - - self.steps = 0; - self.updateGrid(); } - fn updateGrid(self: *World) void { - for (1..GRID_SIZE - 1) |y| { - for (1..GRID_SIZE - 1) |x| { - self.grid[y][x] = TILE_EMPTY; + fn respawnFood(self: *World) void { + const random = self.prng.random(); + // Semplice loop per trovare posto libero + while (true) { + const rx = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + const ry = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + if (self.grid[ry][rx] != TILE_WALL) { + self.food_x = rx; + self.food_y = ry; + break; } } - self.grid[self.food_y][self.food_x] = TILE_FOOD; - self.grid[self.ant_y][self.ant_x] = TILE_ANT; } - pub fn step(self: *World, action: usize) struct { f32, bool } { - self.steps += 1; + // Calcola l'olfatto (Distanza inversa dal cibo) + // 0.0 (Lontanissimo) -> 1.0 (Sopra il cibo) + fn getScent(self: *World, x: usize, y: usize) f32 { + const dx = if (x > self.food_x) x - self.food_x else self.food_x - x; + const dy = if (y > self.food_y) y - self.food_y else self.food_y - y; + const dist = @as(f32, @floatFromInt(dx + dy)); + return 1.0 / (dist + 1.0); + } - var new_x = self.ant_x; - var new_y = self.ant_y; + // Controlla se una cella è occupata da UN'ALTRA formica + fn isOccupied(self: *World, x: usize, y: usize) bool { + for (self.ants) |ant| { + if (ant.alive and ant.x == x and ant.y == y) return true; + } + return false; + } - if (action == ACTION_UP) new_y -= 1; - if (action == ACTION_DOWN) new_y += 1; - if (action == ACTION_LEFT) new_x -= 1; - if (action == ACTION_RIGHT) new_x += 1; + // Esegue step per UNA formica specifica + pub fn stepAnt(self: *World, ant_idx: usize, action: usize) struct { f32, bool } { + var ant = &self.ants[ant_idx]; + if (!ant.alive) return .{ 0.0, true }; - const tile = self.grid[new_y][new_x]; + ant.steps += 1; - if (tile == TILE_WALL) { + // Salviamo lo stato PRECEDENTE per fare i confronti + const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x; + const old_dist_y = if (ant.y > self.food_y) ant.y - self.food_y else self.food_y - ant.y; + const old_dist = old_dist_x + old_dist_y; + + var new_x = ant.x; + var new_y = ant.y; + + if (action == 0) new_y -= 1; // UP + if (action == 1) new_y += 1; // DOWN + if (action == 2) new_x -= 1; // LEFT + if (action == 3) new_x += 1; // RIGHT + + // --- 1. MURI (Limiti Mappa) --- + if (self.grid[new_y][new_x] == TILE_WALL) return .{ -5.0, false }; + + // --- 2. COLLISIONI TRA FORMICHE --- + // Se c'è traffico, penalità leggera ma non bloccante se c'è cibo vicino + if (self.isOccupied(new_x, new_y)) { + // Se siamo vicini al cibo, spingi! Altrimenti aspetta. + if (old_dist > 5) return .{ -0.5, false }; + } + + const dist_x = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x; + const dist_y = if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y; + const new_dist = dist_x + dist_y; + + var reward: f32 = -0.1; + + const scent_intensity = self.getScent(new_x, new_y); + + const pheromone_level = self.pheromones[new_y][new_x]; + + if (scent_intensity > 0.15) { + reward += 0.1; + } else { + if (pheromone_level > 0.1) { + reward -= 0.5 * pheromone_level; + } else { + reward += 0.2; + } + } + + if (new_dist < old_dist) { + reward += 1.5 + (scent_intensity * 2.0); + } else if (new_dist > old_dist) { + reward -= 1.0; + } + + ant.x = new_x; + ant.y = new_y; + + self.pheromones[new_y][new_x] = 1.0; + + if (new_x == self.food_x and new_y == self.food_y) { + self.respawnFood(); + ant.steps = 0; + return .{ 100.0, false }; + } + + if (ant.steps >= self.max_steps) { + ant.x = GRID_SIZE / 2; + ant.y = GRID_SIZE / 2; + ant.steps = 0; return .{ -10.0, false }; } - var move_reward: f32 = -0.1; - - if (self.visited[new_y][new_x]) { - move_reward -= 0.5; - } else { - move_reward += 0.2; - } - - self.ant_x = new_x; - self.ant_y = new_y; - self.visited[new_y][new_x] = true; - - self.updateGrid(); - - if (new_x == self.food_x and new_y == self.food_y) { - return .{ 100.0, true }; - } - - if (self.steps >= self.max_steps) { - return .{ -10.0, true }; - } - - return .{ move_reward, false }; + return .{ reward, false }; } - pub fn getObservation(self: *World, allocator: Allocator) ![]f32 { - var obs = try allocator.alloc(f32, 10); + pub fn evaporatePheromones(self: *World) void { + for (0..GRID_SIZE) |y| { + for (0..GRID_SIZE) |x| { + if (self.pheromones[y][x] > 0) { + self.pheromones[y][x] *= 0.995; + } + } + } + } + + // Input aumentati a 15 + pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 { + var obs = try allocator.alloc(f32, 15); // AUMENTATO A 15 + const ant = self.ants[ant_idx]; var idx: usize = 0; - const ax = @as(i32, @intCast(self.ant_x)); - const ay = @as(i32, @intCast(self.ant_y)); + const ax = @as(i32, @intCast(ant.x)); + const ay = @as(i32, @intCast(ant.y)); + // 1. Visione 3x3 (0-8) var dy: i32 = -1; while (dy <= 1) : (dy += 1) { var dx: i32 = -1; while (dx <= 1) : (dx += 1) { - const py_i = ay + dy; - const px_i = ax + dx; + const py = @as(usize, @intCast(ay + dy)); + const px = @as(usize, @intCast(ax + dx)); var val: f32 = 0.0; - - if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) { - const py = @as(usize, @intCast(py_i)); - const px = @as(usize, @intCast(px_i)); - const content = self.grid[py][px]; - - if (content == TILE_WALL) { - val = -1.0; - } else if (content == TILE_FOOD) { - val = 1.0; - } else if (self.visited[py][px]) { - val = -0.5; - } else { - val = 0.0; - } - } else { + if (self.grid[py][px] == TILE_WALL) { val = -1.0; + } else if (px == self.food_x and py == self.food_y) { + val = 1.0; // Vedo il cibo vicino! + } else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) { + val = -0.5; // Vedo una sorella } - obs[idx] = val; idx += 1; } } - obs[9] = self.getScent(self.ant_x, self.ant_y); + + // 2. Sensi Chimici (9-10) + obs[9] = self.getScent(ant.x, ant.y); + obs[10] = self.pheromones[ant.y][ant.x]; + + // 3. BUSSOLA BINARIA (11-14) + // Risponde alla domanda: "In che quadrante è il cibo?" + // È molto più facile da capire per l'IA rispetto a un float. + + obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo è SOPRA? + obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo è SOTTO? + obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo è SINISTRA? + obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo è DESTRA? return obs; } - - fn getScent(self: *World, x: usize, y: usize) f32 { - const dx = if (x > self.food_x) x - self.food_x else self.food_x - x; - const dy = if (y > self.food_y) y - self.food_y else self.food_y - y; - const dist = dx + dy; - - if (dist == 0) return 1.0; - - return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0); - } }; diff --git a/src/main.zig b/src/main.zig index 9f85059..5ce89a3 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,48 +1,18 @@ const std = @import("std"); const World = @import("env.zig").World; -const env = @import("env.zig"); +const env = @import("env.zig"); // Per accedere a costanti come GRID_SIZE const Network = @import("modular_network.zig").Network; -// --- PARAMETRI AI --- -const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante) -const LR: f32 = 0.005; // Learning Rate -const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100% -const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5% -const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa +// --- IPERPARAMETRI --- +const GAMMA: f32 = 0.9; +const LR: f32 = 0.005; // Basso perché output lineare +const EPSILON_START: f32 = 1.0; +const EPSILON_END: f32 = 0.05; +const DECAY_RATE: f32 = 0.0001; // Decadimento più lento dato che abbiamo tanti step -fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void { - const file = try std.fs.cwd().createFile(file_path, .{}); - defer file.close(); - - var buffer: [8192]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buffer); - const allocator = fba.allocator(); - - var walls_json = std.ArrayList(u8){}; - defer walls_json.deinit(allocator); - - try walls_json.appendSlice(allocator, "["); - var first = true; - for (0..env.GRID_SIZE) |y| { - for (0..env.GRID_SIZE) |x| { - if (world.grid[y][x] == env.TILE_WALL) { - if (!first) try walls_json.appendSlice(allocator, ","); - try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y }); - first = false; - } - } - } - try walls_json.appendSlice(allocator, "]"); - - // Aggiungiamo info extra per debug (Epsilon ed Episode) - const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon }); - - try file.writeAll(json); -} - -// Funzione helper per trovare il valore massimo in un array +// Helper per trovare max e argmax fn maxVal(slice: []const f32) f32 { - var m: f32 = -10000.0; + var m: f32 = -1.0e20; // Numero molto basso for (slice) |v| if (v > m) { m = v; }; @@ -50,7 +20,7 @@ fn maxVal(slice: []const f32) f32 { } fn argmax(slice: []const f32) usize { - var m: f32 = -10000.0; + var m: f32 = -1.0e20; var idx: usize = 0; for (slice, 0..) |v, i| { if (v > m) { @@ -61,99 +31,126 @@ fn argmax(slice: []const f32) usize { return idx; } +// Export aggiornato per Multi-Formica +fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void { + const file = try std.fs.cwd().createFile(file_path, .{}); + defer file.close(); + + // Buffer grande per contenere le coordinate di 20 formiche + var buffer: [65536]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buffer); + const allocator = fba.allocator(); + + // Costruiamo la lista delle formiche in JSON: [[x,y], [x,y], ...] + var ants_json = std.ArrayList(u8){}; + defer ants_json.deinit(allocator); + try ants_json.appendSlice(allocator, "["); + + for (world.ants, 0..) |ant, i| { + if (i > 0) try ants_json.appendSlice(allocator, ","); + try std.fmt.format(ants_json.writer(allocator), "[{d},{d}]", .{ ant.x, ant.y }); + } + try ants_json.appendSlice(allocator, "]"); + + // Scriviamo il JSON completo + // QUI RISOLVIAMO L'ERRORE: Usiamo 'epsilon' nella stringa + const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, epsilon }); + + try file.writeAll(json); +} + pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); defer _ = gpa.deinit(); - // 1. Inizializza Ambiente e Rete + // 1. Inizializza Ambiente 100x100 e Rete var world = World.init(12345); var net = Network.init(allocator); defer net.deinit(); - // Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx) - try net.addLayer(9, 24, 111, true); - try net.addLayer(24, 4, 222, false); + // ARCHITETTURA RETE + // Input 11: (9 Vista + 1 Olfatto + 1 Feromone) + // Output 4: (Su, Giù, Sx, Dx) LINEARE + try net.addLayer(15, 40, 111, true); + try net.addLayer(40, 4, 222, false); var prng = std.Random.DefaultPrng.init(999); const random = prng.random(); - std.debug.print("--- AI TRAINING START ---\n", .{}); + std.debug.print("--- HIVE MIND TRAINING START ---\n", .{}); + std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS }); - var episode: usize = 0; + var global_step: usize = 0; var epsilon: f32 = EPSILON_START; - while (true) : (episode += 1) { - world.reset(); - var done = false; + // Ciclo infinito (o molto lungo) + while (true) { - while (!done) { + // 1. Evaporazione Feromoni (Memoria collettiva che sbiadisce) + world.evaporatePheromones(); + + // 2. Turno di ogni formica + for (0..env.NUM_ANTS) |i| { // A. OSSERVAZIONE - const current_obs = try world.getObservation(allocator); // Alloca memoria - defer allocator.free(current_obs); // Libera alla fine del ciclo + const current_obs = try world.getAntObservation(allocator, i); + defer allocator.free(current_obs); // B. SCEGLI AZIONE (Epsilon-Greedy) var action: usize = 0; - const q_values = net.forward(current_obs); // Forward pass + const q_values = net.forward(current_obs); if (random.float(f32) < epsilon) { - // Esplorazione (Random) action = random.intRangeAtMost(usize, 0, 3); } else { - // Sfruttamento (Usa il cervello) action = argmax(q_values); } // C. ESEGUI AZIONE - const result = world.step(action); + const result = world.stepAnt(i, action); const reward = result[0]; - done = result[1]; + // Nota: in multi-agente 'done' è meno rilevante per il loop principale, + // perché se una formica "finisce" (mangia), respawna o continua. - // D. ADDESTRAMENTO (Q-Learning Update) - // Target = Reward + Gamma * Max(Q_Next) - - // Dobbiamo calcolare il Q-value dello stato successivo + // D. ADDESTRAMENTO (Hive Mind Learning) + // Calcoliamo target Q-Value var target_val = reward; - if (!done) { - const next_obs = try world.getObservation(allocator); - defer allocator.free(next_obs); - const next_q_values = net.forward(next_obs); - target_val += GAMMA * maxVal(next_q_values); - } - // Creiamo il vettore target per il backpropagation - // Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val - // Gli altri neuroni devono restare come sono. + // Se non è uno stato terminale (morte o vittoria netta), aggiungiamo stima futuro + // Consideriamo la vittoria (mangiare) come continuativa qui per non rompere il flusso + const next_obs = try world.getAntObservation(allocator, i); + defer allocator.free(next_obs); + const next_q_values = net.forward(next_obs); + target_val += GAMMA * maxVal(next_q_values); + + // Backpropagation var target_vector = try allocator.alloc(f32, 4); defer allocator.free(target_vector); + for (0..4) |j| target_vector[j] = q_values[j]; + target_vector[action] = target_val; - for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori - target_vector[action] = target_val; // Aggiorna solo quello scelto - - // Train della rete su questo singolo passo _ = try net.train(current_obs, target_vector, LR); - - // Export e Delay - try exportAntJSON(&world, "ant_state.json", episode, epsilon); - - // Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena. - if (episode % 10 == 0) { - // Mostra ogni 10 episodi a velocità umana - std.Thread.sleep(50 * 1_000_000); - } else { - // Allenamento veloce (quasi istantaneo) - // Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento - // std.Thread.sleep(10 * 1_000_000); - } } - // Fine episodio + // 3. Gestione Loop Globale + global_step += 1; + + // Decadimento Epsilon if (epsilon > EPSILON_END) { epsilon -= DECAY_RATE; } - if (episode % 10 == 0) { - std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps }); + // 4. Export e Log (Ogni 10 step globali per fluidità) + if (global_step % 10 == 0) { + try exportAntJSON(&world, "ant_state.json", global_step, epsilon); + + // Log console ogni 100 step + if (global_step % 100 == 0) { + std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y }); + } + + // Pausa per vedere l'animazione (rimuovi per training ultra-veloce) + std.Thread.sleep(100 * 1_000_000); } } }