diff --git a/ant_state.json b/ant_state.json
index 7da4f78..ac95645 100644
--- a/ant_state.json
+++ b/ant_state.json
@@ -1,9 +1,7 @@
{
- "grid_size": 8,
- "steps": 10,
- "ant": [6, 4],
- "food": [3, 1],
- "walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]],
- "episode": 3889,
- "epsilon": 0.049
+ "grid_size": 100,
+ "food": [11, 83],
+ "ants": [[10,52],[9,61],[9,63],[8,54],[9,78],[10,74],[10,58],[10,70],[10,73],[10,76],[40,77],[9,72],[9,70],[10,72],[10,75],[10,71],[9,75],[16,65],[10,63],[11,52]],
+ "episode": 23320,
+ "epsilon": 0.050
}
\ No newline at end of file
diff --git a/ant_viewer.html b/ant_viewer.html
index 22ac221..a70dea3 100644
--- a/ant_viewer.html
+++ b/ant_viewer.html
@@ -2,98 +2,149 @@
- Ant Survival AI
+ Hive Mind Viewer
-
-
Ant Survival DQN
-
Stato: Connessione...
-
Steps: 0 | Reward: 0
+
-
+
+
+
\ No newline at end of file
diff --git a/src/env.zig b/src/env.zig
index ba4eaa1..9f5b42a 100644
--- a/src/env.zig
+++ b/src/env.zig
@@ -1,40 +1,40 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
-pub const GRID_SIZE = 8;
+pub const GRID_SIZE = 100; // Mappa Gigante!
+pub const NUM_ANTS = 20; // La Colonia
+
pub const TILE_EMPTY = 0;
pub const TILE_WALL = 1;
pub const TILE_FOOD = 2;
-pub const TILE_ANT = 3;
-pub const ACTION_UP = 0;
-pub const ACTION_DOWN = 1;
-pub const ACTION_LEFT = 2;
-pub const ACTION_RIGHT = 3;
+// Non c'è più TILE_ANT nella griglia statica, perché le formiche si muovono sopra
+// Useremo una lista dinamica.
-pub const Ant = struct { x: usize, y: usize, alive: bool };
+pub const Ant = struct {
+ x: usize,
+ y: usize,
+ alive: bool,
+ steps: usize,
+};
pub const World = struct {
grid: [GRID_SIZE][GRID_SIZE]u8,
- visited: [GRID_SIZE][GRID_SIZE]bool,
- ant_x: usize,
- ant_y: usize,
+ pheromones: [GRID_SIZE][GRID_SIZE]f32, // 0.0 = Neutro, 1.0 = Appena visitato
+ ants: [NUM_ANTS]Ant, // Array fisso di formiche
food_x: usize,
food_y: usize,
- steps: usize,
max_steps: usize,
prng: std.Random.DefaultPrng,
pub fn init(seed: u64) World {
var w = World{
.grid = undefined,
- .visited = undefined,
- .ant_x = 0,
- .ant_y = 0,
+ .pheromones = undefined,
+ .ants = undefined,
.food_x = 0,
.food_y = 0,
- .steps = 0,
- .max_steps = 100,
+ .max_steps = 1000, // Più passi per mappa grande
.prng = std.Random.DefaultPrng.init(seed),
};
w.reset();
@@ -42,12 +42,11 @@ pub const World = struct {
}
pub fn reset(self: *World) void {
- const random = self.prng.random();
+ //const random = self.prng.random();
for (0..GRID_SIZE) |y| {
for (0..GRID_SIZE) |x| {
- self.visited[y][x] = false;
-
+ self.pheromones[y][x] = 0.0;
if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) {
self.grid[y][x] = TILE_WALL;
} else {
@@ -56,123 +55,182 @@ pub const World = struct {
}
}
- self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
- self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ // 2. Spawn Cibo
+ self.respawnFood();
- self.visited[self.ant_y][self.ant_x] = true;
-
- while (true) {
- self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
- self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
- if (self.food_x != self.ant_x or self.food_y != self.ant_y) break;
+ // 3. Spawn Formiche (Tutte al centro, come un formicaio)
+ const center = GRID_SIZE / 2;
+ for (0..NUM_ANTS) |i| {
+ self.ants[i] = Ant{
+ .x = center,
+ .y = center,
+ .alive = true,
+ .steps = 0,
+ };
}
-
- self.steps = 0;
- self.updateGrid();
}
- fn updateGrid(self: *World) void {
- for (1..GRID_SIZE - 1) |y| {
- for (1..GRID_SIZE - 1) |x| {
- self.grid[y][x] = TILE_EMPTY;
+ fn respawnFood(self: *World) void {
+ const random = self.prng.random();
+ // Semplice loop per trovare posto libero
+ while (true) {
+ const rx = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ const ry = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ if (self.grid[ry][rx] != TILE_WALL) {
+ self.food_x = rx;
+ self.food_y = ry;
+ break;
}
}
- self.grid[self.food_y][self.food_x] = TILE_FOOD;
- self.grid[self.ant_y][self.ant_x] = TILE_ANT;
}
- pub fn step(self: *World, action: usize) struct { f32, bool } {
- self.steps += 1;
+ // Calcola l'olfatto (Distanza inversa dal cibo)
+ // 0.0 (Lontanissimo) -> 1.0 (Sopra il cibo)
+ fn getScent(self: *World, x: usize, y: usize) f32 {
+ const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
+ const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
+ const dist = @as(f32, @floatFromInt(dx + dy));
+ return 1.0 / (dist + 1.0);
+ }
- var new_x = self.ant_x;
- var new_y = self.ant_y;
+ // Controlla se una cella è occupata da UN'ALTRA formica
+ fn isOccupied(self: *World, x: usize, y: usize) bool {
+ for (self.ants) |ant| {
+ if (ant.alive and ant.x == x and ant.y == y) return true;
+ }
+ return false;
+ }
- if (action == ACTION_UP) new_y -= 1;
- if (action == ACTION_DOWN) new_y += 1;
- if (action == ACTION_LEFT) new_x -= 1;
- if (action == ACTION_RIGHT) new_x += 1;
+ // Esegue step per UNA formica specifica
+ pub fn stepAnt(self: *World, ant_idx: usize, action: usize) struct { f32, bool } {
+ var ant = &self.ants[ant_idx];
+ if (!ant.alive) return .{ 0.0, true };
- const tile = self.grid[new_y][new_x];
+ ant.steps += 1;
- if (tile == TILE_WALL) {
+ // Salviamo lo stato PRECEDENTE per fare i confronti
+ const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x;
+ const old_dist_y = if (ant.y > self.food_y) ant.y - self.food_y else self.food_y - ant.y;
+ const old_dist = old_dist_x + old_dist_y;
+
+ var new_x = ant.x;
+ var new_y = ant.y;
+
+ if (action == 0) new_y -= 1; // UP
+ if (action == 1) new_y += 1; // DOWN
+ if (action == 2) new_x -= 1; // LEFT
+ if (action == 3) new_x += 1; // RIGHT
+
+ // --- 1. MURI (Limiti Mappa) ---
+ if (self.grid[new_y][new_x] == TILE_WALL) return .{ -5.0, false };
+
+ // --- 2. COLLISIONI TRA FORMICHE ---
+ // Se c'è traffico, penalità leggera ma non bloccante se c'è cibo vicino
+ if (self.isOccupied(new_x, new_y)) {
+ // Se siamo vicini al cibo, spingi! Altrimenti aspetta.
+ if (old_dist > 5) return .{ -0.5, false };
+ }
+
+ const dist_x = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x;
+ const dist_y = if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y;
+ const new_dist = dist_x + dist_y;
+
+ var reward: f32 = -0.1;
+
+ const scent_intensity = self.getScent(new_x, new_y);
+
+ const pheromone_level = self.pheromones[new_y][new_x];
+
+ if (scent_intensity > 0.15) {
+ reward += 0.1;
+ } else {
+ if (pheromone_level > 0.1) {
+ reward -= 0.5 * pheromone_level;
+ } else {
+ reward += 0.2;
+ }
+ }
+
+ if (new_dist < old_dist) {
+ reward += 1.5 + (scent_intensity * 2.0);
+ } else if (new_dist > old_dist) {
+ reward -= 1.0;
+ }
+
+ ant.x = new_x;
+ ant.y = new_y;
+
+ self.pheromones[new_y][new_x] = 1.0;
+
+ if (new_x == self.food_x and new_y == self.food_y) {
+ self.respawnFood();
+ ant.steps = 0;
+ return .{ 100.0, false };
+ }
+
+ if (ant.steps >= self.max_steps) {
+ ant.x = GRID_SIZE / 2;
+ ant.y = GRID_SIZE / 2;
+ ant.steps = 0;
return .{ -10.0, false };
}
- var move_reward: f32 = -0.1;
-
- if (self.visited[new_y][new_x]) {
- move_reward -= 0.5;
- } else {
- move_reward += 0.2;
- }
-
- self.ant_x = new_x;
- self.ant_y = new_y;
- self.visited[new_y][new_x] = true;
-
- self.updateGrid();
-
- if (new_x == self.food_x and new_y == self.food_y) {
- return .{ 100.0, true };
- }
-
- if (self.steps >= self.max_steps) {
- return .{ -10.0, true };
- }
-
- return .{ move_reward, false };
+ return .{ reward, false };
}
- pub fn getObservation(self: *World, allocator: Allocator) ![]f32 {
- var obs = try allocator.alloc(f32, 10);
+ pub fn evaporatePheromones(self: *World) void {
+ for (0..GRID_SIZE) |y| {
+ for (0..GRID_SIZE) |x| {
+ if (self.pheromones[y][x] > 0) {
+ self.pheromones[y][x] *= 0.995;
+ }
+ }
+ }
+ }
+
+ // Input aumentati a 15
+ pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 {
+ var obs = try allocator.alloc(f32, 15); // AUMENTATO A 15
+ const ant = self.ants[ant_idx];
var idx: usize = 0;
- const ax = @as(i32, @intCast(self.ant_x));
- const ay = @as(i32, @intCast(self.ant_y));
+ const ax = @as(i32, @intCast(ant.x));
+ const ay = @as(i32, @intCast(ant.y));
+ // 1. Visione 3x3 (0-8)
var dy: i32 = -1;
while (dy <= 1) : (dy += 1) {
var dx: i32 = -1;
while (dx <= 1) : (dx += 1) {
- const py_i = ay + dy;
- const px_i = ax + dx;
+ const py = @as(usize, @intCast(ay + dy));
+ const px = @as(usize, @intCast(ax + dx));
var val: f32 = 0.0;
-
- if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) {
- const py = @as(usize, @intCast(py_i));
- const px = @as(usize, @intCast(px_i));
- const content = self.grid[py][px];
-
- if (content == TILE_WALL) {
- val = -1.0;
- } else if (content == TILE_FOOD) {
- val = 1.0;
- } else if (self.visited[py][px]) {
- val = -0.5;
- } else {
- val = 0.0;
- }
- } else {
+ if (self.grid[py][px] == TILE_WALL) {
val = -1.0;
+ } else if (px == self.food_x and py == self.food_y) {
+ val = 1.0; // Vedo il cibo vicino!
+ } else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) {
+ val = -0.5; // Vedo una sorella
}
-
obs[idx] = val;
idx += 1;
}
}
- obs[9] = self.getScent(self.ant_x, self.ant_y);
+
+ // 2. Sensi Chimici (9-10)
+ obs[9] = self.getScent(ant.x, ant.y);
+ obs[10] = self.pheromones[ant.y][ant.x];
+
+ // 3. BUSSOLA BINARIA (11-14)
+ // Risponde alla domanda: "In che quadrante è il cibo?"
+ // È molto più facile da capire per l'IA rispetto a un float.
+
+ obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo è SOPRA?
+ obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo è SOTTO?
+ obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo è SINISTRA?
+ obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo è DESTRA?
return obs;
}
-
- fn getScent(self: *World, x: usize, y: usize) f32 {
- const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
- const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
- const dist = dx + dy;
-
- if (dist == 0) return 1.0;
-
- return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0);
- }
};
diff --git a/src/main.zig b/src/main.zig
index 9f85059..5ce89a3 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,48 +1,18 @@
const std = @import("std");
const World = @import("env.zig").World;
-const env = @import("env.zig");
+const env = @import("env.zig"); // Per accedere a costanti come GRID_SIZE
const Network = @import("modular_network.zig").Network;
-// --- PARAMETRI AI ---
-const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante)
-const LR: f32 = 0.005; // Learning Rate
-const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100%
-const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5%
-const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa
+// --- IPERPARAMETRI ---
+const GAMMA: f32 = 0.9;
+const LR: f32 = 0.005; // Basso perché output lineare
+const EPSILON_START: f32 = 1.0;
+const EPSILON_END: f32 = 0.05;
+const DECAY_RATE: f32 = 0.0001; // Decadimento più lento dato che abbiamo tanti step
-fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
- const file = try std.fs.cwd().createFile(file_path, .{});
- defer file.close();
-
- var buffer: [8192]u8 = undefined;
- var fba = std.heap.FixedBufferAllocator.init(&buffer);
- const allocator = fba.allocator();
-
- var walls_json = std.ArrayList(u8){};
- defer walls_json.deinit(allocator);
-
- try walls_json.appendSlice(allocator, "[");
- var first = true;
- for (0..env.GRID_SIZE) |y| {
- for (0..env.GRID_SIZE) |x| {
- if (world.grid[y][x] == env.TILE_WALL) {
- if (!first) try walls_json.appendSlice(allocator, ",");
- try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y });
- first = false;
- }
- }
- }
- try walls_json.appendSlice(allocator, "]");
-
- // Aggiungiamo info extra per debug (Epsilon ed Episode)
- const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon });
-
- try file.writeAll(json);
-}
-
-// Funzione helper per trovare il valore massimo in un array
+// Helper per trovare max e argmax
fn maxVal(slice: []const f32) f32 {
- var m: f32 = -10000.0;
+ var m: f32 = -1.0e20; // Numero molto basso
for (slice) |v| if (v > m) {
m = v;
};
@@ -50,7 +20,7 @@ fn maxVal(slice: []const f32) f32 {
}
fn argmax(slice: []const f32) usize {
- var m: f32 = -10000.0;
+ var m: f32 = -1.0e20;
var idx: usize = 0;
for (slice, 0..) |v, i| {
if (v > m) {
@@ -61,99 +31,126 @@ fn argmax(slice: []const f32) usize {
return idx;
}
+// Export aggiornato per Multi-Formica
+fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
+ const file = try std.fs.cwd().createFile(file_path, .{});
+ defer file.close();
+
+ // Buffer grande per contenere le coordinate di 20 formiche
+ var buffer: [65536]u8 = undefined;
+ var fba = std.heap.FixedBufferAllocator.init(&buffer);
+ const allocator = fba.allocator();
+
+ // Costruiamo la lista delle formiche in JSON: [[x,y], [x,y], ...]
+ var ants_json = std.ArrayList(u8){};
+ defer ants_json.deinit(allocator);
+ try ants_json.appendSlice(allocator, "[");
+
+ for (world.ants, 0..) |ant, i| {
+ if (i > 0) try ants_json.appendSlice(allocator, ",");
+ try std.fmt.format(ants_json.writer(allocator), "[{d},{d}]", .{ ant.x, ant.y });
+ }
+ try ants_json.appendSlice(allocator, "]");
+
+ // Scriviamo il JSON completo
+ // QUI RISOLVIAMO L'ERRORE: Usiamo 'epsilon' nella stringa
+ const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, epsilon });
+
+ try file.writeAll(json);
+}
+
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
- // 1. Inizializza Ambiente e Rete
+ // 1. Inizializza Ambiente 100x100 e Rete
var world = World.init(12345);
var net = Network.init(allocator);
defer net.deinit();
- // Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx)
- try net.addLayer(9, 24, 111, true);
- try net.addLayer(24, 4, 222, false);
+ // ARCHITETTURA RETE
+ // Input 11: (9 Vista + 1 Olfatto + 1 Feromone)
+ // Output 4: (Su, Giù, Sx, Dx) LINEARE
+ try net.addLayer(15, 40, 111, true);
+ try net.addLayer(40, 4, 222, false);
var prng = std.Random.DefaultPrng.init(999);
const random = prng.random();
- std.debug.print("--- AI TRAINING START ---\n", .{});
+ std.debug.print("--- HIVE MIND TRAINING START ---\n", .{});
+ std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS });
- var episode: usize = 0;
+ var global_step: usize = 0;
var epsilon: f32 = EPSILON_START;
- while (true) : (episode += 1) {
- world.reset();
- var done = false;
+ // Ciclo infinito (o molto lungo)
+ while (true) {
- while (!done) {
+ // 1. Evaporazione Feromoni (Memoria collettiva che sbiadisce)
+ world.evaporatePheromones();
+
+ // 2. Turno di ogni formica
+ for (0..env.NUM_ANTS) |i| {
// A. OSSERVAZIONE
- const current_obs = try world.getObservation(allocator); // Alloca memoria
- defer allocator.free(current_obs); // Libera alla fine del ciclo
+ const current_obs = try world.getAntObservation(allocator, i);
+ defer allocator.free(current_obs);
// B. SCEGLI AZIONE (Epsilon-Greedy)
var action: usize = 0;
- const q_values = net.forward(current_obs); // Forward pass
+ const q_values = net.forward(current_obs);
if (random.float(f32) < epsilon) {
- // Esplorazione (Random)
action = random.intRangeAtMost(usize, 0, 3);
} else {
- // Sfruttamento (Usa il cervello)
action = argmax(q_values);
}
// C. ESEGUI AZIONE
- const result = world.step(action);
+ const result = world.stepAnt(i, action);
const reward = result[0];
- done = result[1];
+ // Nota: in multi-agente 'done' è meno rilevante per il loop principale,
+ // perché se una formica "finisce" (mangia), respawna o continua.
- // D. ADDESTRAMENTO (Q-Learning Update)
- // Target = Reward + Gamma * Max(Q_Next)
-
- // Dobbiamo calcolare il Q-value dello stato successivo
+ // D. ADDESTRAMENTO (Hive Mind Learning)
+ // Calcoliamo target Q-Value
var target_val = reward;
- if (!done) {
- const next_obs = try world.getObservation(allocator);
- defer allocator.free(next_obs);
- const next_q_values = net.forward(next_obs);
- target_val += GAMMA * maxVal(next_q_values);
- }
- // Creiamo il vettore target per il backpropagation
- // Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val
- // Gli altri neuroni devono restare come sono.
+ // Se non è uno stato terminale (morte o vittoria netta), aggiungiamo stima futuro
+ // Consideriamo la vittoria (mangiare) come continuativa qui per non rompere il flusso
+ const next_obs = try world.getAntObservation(allocator, i);
+ defer allocator.free(next_obs);
+ const next_q_values = net.forward(next_obs);
+ target_val += GAMMA * maxVal(next_q_values);
+
+ // Backpropagation
var target_vector = try allocator.alloc(f32, 4);
defer allocator.free(target_vector);
+ for (0..4) |j| target_vector[j] = q_values[j];
+ target_vector[action] = target_val;
- for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori
- target_vector[action] = target_val; // Aggiorna solo quello scelto
-
- // Train della rete su questo singolo passo
_ = try net.train(current_obs, target_vector, LR);
-
- // Export e Delay
- try exportAntJSON(&world, "ant_state.json", episode, epsilon);
-
- // Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena.
- if (episode % 10 == 0) {
- // Mostra ogni 10 episodi a velocità umana
- std.Thread.sleep(50 * 1_000_000);
- } else {
- // Allenamento veloce (quasi istantaneo)
- // Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento
- // std.Thread.sleep(10 * 1_000_000);
- }
}
- // Fine episodio
+ // 3. Gestione Loop Globale
+ global_step += 1;
+
+ // Decadimento Epsilon
if (epsilon > EPSILON_END) {
epsilon -= DECAY_RATE;
}
- if (episode % 10 == 0) {
- std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps });
+ // 4. Export e Log (Ogni 10 step globali per fluidità)
+ if (global_step % 10 == 0) {
+ try exportAntJSON(&world, "ant_state.json", global_step, epsilon);
+
+ // Log console ogni 100 step
+ if (global_step % 100 == 0) {
+ std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y });
+ }
+
+ // Pausa per vedere l'animazione (rimuovi per training ultra-veloce)
+ std.Thread.sleep(100 * 1_000_000);
}
}
}