Le formiche riescono ad imparare a raggiungere il cibo

This commit is contained in:
Riccardo Forese 2026-02-03 14:48:59 +01:00
parent 76f80f0bd7
commit 9653eada03
4 changed files with 360 additions and 256 deletions

View file

@ -1,9 +1,7 @@
{ {
"grid_size": 8, "grid_size": 100,
"steps": 10, "food": [11, 83],
"ant": [6, 4], "ants": [[10,52],[9,61],[9,63],[8,54],[9,78],[10,74],[10,58],[10,70],[10,73],[10,76],[40,77],[9,72],[9,70],[10,72],[10,75],[10,71],[9,75],[16,65],[10,63],[11,52]],
"food": [3, 1], "episode": 23320,
"walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]], "epsilon": 0.050
"episode": 3889,
"epsilon": 0.049
} }

View file

@ -2,98 +2,149 @@
<html lang="it"> <html lang="it">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<title>Ant Survival AI</title> <title>Hive Mind Viewer</title>
<style> <style>
body { margin: 0; background-color: #222; color: #eee; font-family: monospace; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; } body {
#gameInfo { margin-bottom: 10px; text-align: center; } margin: 0;
canvas { border: 2px solid #555; box-shadow: 0 0 20px rgba(0,0,0,0.5); } background-color: #111;
color: #eee;
font-family: monospace;
display: flex;
flex-direction: column;
align-items: center;
height: 100vh;
overflow: hidden;
}
#header {
width: 100%;
padding: 10px;
background: #222;
display: flex;
justify-content: space-between;
border-bottom: 1px solid #444;
box-sizing: border-box;
z-index: 10;
}
#container {
flex-grow: 1;
width: 100%;
display: flex;
justify-content: center;
align-items: center;
position: relative;
}
canvas {
box-shadow: 0 0 50px rgba(0,0,0,0.8);
image-rendering: pixelated;
}
</style> </style>
</head> </head>
<body> <body>
<div id="gameInfo"> <div id="header">
<h1>Ant Survival DQN</h1> <span><b>HIVE MIND AI</b></span>
<div>Stato: <span id="status">Connessione...</span></div> <span>Episodio: <span id="episode">0</span></span>
<div>Steps: <span id="steps">0</span> | Reward: <span id="reward">0</span></div> <span>Epsilon: <span id="epsilon">0.00</span></span>
<span>Cibo: <span id="food-coords">?</span></span>
</div> </div>
<canvas id="gridCanvas" width="600" height="600"></canvas> <div id="container">
<canvas id="gridCanvas"></canvas>
</div>
<script> <script>
const canvas = document.getElementById('gridCanvas'); const canvas = document.getElementById('gridCanvas');
const ctx = canvas.getContext('2d'); const ctx = canvas.getContext('2d');
const statusEl = document.getElementById('status');
const stepsEl = document.getElementById('steps');
const rewardEl = document.getElementById('reward');
const TILE_SIZE = 40; // Dimensione in pixel di ogni cella const epEl = document.getElementById('episode');
const epsEl = document.getElementById('epsilon');
const foodEl = document.getElementById('food-coords');
let gridSize = 100;
function drawWorld(data) { function drawWorld(data) {
const gridSize = data.grid_size; gridSize = data.grid_size;
canvas.width = gridSize * TILE_SIZE;
canvas.height = gridSize * TILE_SIZE;
// Disegna Sfondo const margin = 40;
ctx.fillStyle = "#333"; const availableW = document.getElementById('container').clientWidth - margin;
const availableH = document.getElementById('container').clientHeight - margin;
const canvasSize = Math.min(availableW, availableH);
canvas.width = canvasSize;
canvas.height = canvasSize;
const cellSize = canvasSize / gridSize;
ctx.fillStyle = "#1a1a1a";
ctx.fillRect(0, 0, canvas.width, canvas.height); ctx.fillRect(0, 0, canvas.width, canvas.height);
// Disegna Griglia if (cellSize > 4) {
ctx.strokeStyle = "#444"; ctx.strokeStyle = "#2a2a2a";
ctx.lineWidth = 1;
ctx.beginPath();
for(let i=0; i<=gridSize; i++) { for(let i=0; i<=gridSize; i++) {
ctx.beginPath(); const pos = i * cellSize;
ctx.moveTo(i*TILE_SIZE, 0); ctx.lineTo(i*TILE_SIZE, canvas.height); ctx.moveTo(pos, 0); ctx.lineTo(pos, canvas.height);
ctx.stroke(); ctx.moveTo(0, pos); ctx.lineTo(canvas.width, pos);
ctx.beginPath(); }
ctx.moveTo(0, i*TILE_SIZE); ctx.lineTo(canvas.width, i*TILE_SIZE);
ctx.stroke(); ctx.stroke();
} }
// Disegna Oggetti if (data.food) {
// data.ant = [x, y], data.food = [x, y], data.walls = [[x,y], ...] const fx = data.food[0] * cellSize;
const fy = data.food[1] * cellSize;
// CIBO (Mela Verde) ctx.shadowBlur = 15;
ctx.fillStyle = "#4caf50"; ctx.shadowColor = "#0f0";
ctx.fillStyle = "#00ff00";
ctx.beginPath(); ctx.beginPath();
ctx.arc(data.food[0] * TILE_SIZE + TILE_SIZE/2, data.food[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2); ctx.arc(fx + cellSize/2, fy + cellSize/2, cellSize/1.5, 0, Math.PI*2);
ctx.fill(); ctx.fill();
// FORMICA (Pallino Rosso o Sprite) ctx.shadowBlur = 0;
ctx.fillStyle = "#ff5722"; }
ctx.beginPath();
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2, data.ant[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2);
ctx.fill();
// Occhi della formica
ctx.fillStyle = "#fff";
ctx.beginPath();
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 - 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 + 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
ctx.fill();
// MURI (Blocchi Grigi) if (data.ants) {
ctx.fillStyle = "#777"; ctx.fillStyle = "#ff8800";
if (data.walls) { data.ants.forEach(ant => {
data.walls.forEach(w => { const ax = ant[0] * cellSize;
ctx.fillRect(w[0] * TILE_SIZE, w[1] * TILE_SIZE, TILE_SIZE, TILE_SIZE); const ay = ant[1] * cellSize;
if (cellSize < 3) {
ctx.fillRect(ax, ay, cellSize, cellSize);
} else {
ctx.beginPath();
ctx.arc(ax + cellSize/2, ay + cellSize/2, cellSize/2, 0, Math.PI*2);
ctx.fill();
}
}); });
} }
} }
async function update() { async function update() {
try { try {
const response = await fetch('ant_state.json?t=' + new Date().getTime()); const response = await fetch('ant_state.json?t=' + Date.now());
if (!response.ok) throw new Error("File missing"); if (!response.ok) return;
const data = await response.json(); const data = await response.json();
statusEl.innerText = "Running"; epEl.innerText = data.episode;
stepsEl.innerText = data.steps; epsEl.innerText = data.epsilon;
if(data.food) foodEl.innerText = `[${data.food[0]}, ${data.food[1]}]`;
drawWorld(data); drawWorld(data);
} catch (e) { } catch (e) {
// console.log(e); console.error(e);
} }
} }
setInterval(update, 100); // 10 FPS setInterval(update, 50);
window.addEventListener('resize', () => {
});
</script> </script>
</body> </body>
</html> </html>

View file

@ -1,40 +1,40 @@
const std = @import("std"); const std = @import("std");
const Allocator = std.mem.Allocator; const Allocator = std.mem.Allocator;
pub const GRID_SIZE = 8; pub const GRID_SIZE = 100; // Mappa Gigante!
pub const NUM_ANTS = 20; // La Colonia
pub const TILE_EMPTY = 0; pub const TILE_EMPTY = 0;
pub const TILE_WALL = 1; pub const TILE_WALL = 1;
pub const TILE_FOOD = 2; pub const TILE_FOOD = 2;
pub const TILE_ANT = 3;
pub const ACTION_UP = 0; // Non c'è più TILE_ANT nella griglia statica, perché le formiche si muovono sopra
pub const ACTION_DOWN = 1; // Useremo una lista dinamica.
pub const ACTION_LEFT = 2;
pub const ACTION_RIGHT = 3;
pub const Ant = struct { x: usize, y: usize, alive: bool }; pub const Ant = struct {
x: usize,
y: usize,
alive: bool,
steps: usize,
};
pub const World = struct { pub const World = struct {
grid: [GRID_SIZE][GRID_SIZE]u8, grid: [GRID_SIZE][GRID_SIZE]u8,
visited: [GRID_SIZE][GRID_SIZE]bool, pheromones: [GRID_SIZE][GRID_SIZE]f32, // 0.0 = Neutro, 1.0 = Appena visitato
ant_x: usize, ants: [NUM_ANTS]Ant, // Array fisso di formiche
ant_y: usize,
food_x: usize, food_x: usize,
food_y: usize, food_y: usize,
steps: usize,
max_steps: usize, max_steps: usize,
prng: std.Random.DefaultPrng, prng: std.Random.DefaultPrng,
pub fn init(seed: u64) World { pub fn init(seed: u64) World {
var w = World{ var w = World{
.grid = undefined, .grid = undefined,
.visited = undefined, .pheromones = undefined,
.ant_x = 0, .ants = undefined,
.ant_y = 0,
.food_x = 0, .food_x = 0,
.food_y = 0, .food_y = 0,
.steps = 0, .max_steps = 1000, // Più passi per mappa grande
.max_steps = 100,
.prng = std.Random.DefaultPrng.init(seed), .prng = std.Random.DefaultPrng.init(seed),
}; };
w.reset(); w.reset();
@ -42,12 +42,11 @@ pub const World = struct {
} }
pub fn reset(self: *World) void { pub fn reset(self: *World) void {
const random = self.prng.random(); //const random = self.prng.random();
for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |y| {
for (0..GRID_SIZE) |x| { for (0..GRID_SIZE) |x| {
self.visited[y][x] = false; self.pheromones[y][x] = 0.0;
if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) { if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) {
self.grid[y][x] = TILE_WALL; self.grid[y][x] = TILE_WALL;
} else { } else {
@ -56,123 +55,182 @@ pub const World = struct {
} }
} }
self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); // 2. Spawn Cibo
self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); self.respawnFood();
self.visited[self.ant_y][self.ant_x] = true; // 3. Spawn Formiche (Tutte al centro, come un formicaio)
const center = GRID_SIZE / 2;
for (0..NUM_ANTS) |i| {
self.ants[i] = Ant{
.x = center,
.y = center,
.alive = true,
.steps = 0,
};
}
}
fn respawnFood(self: *World) void {
const random = self.prng.random();
// Semplice loop per trovare posto libero
while (true) { while (true) {
self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); const rx = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); const ry = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
if (self.food_x != self.ant_x or self.food_y != self.ant_y) break; if (self.grid[ry][rx] != TILE_WALL) {
} self.food_x = rx;
self.food_y = ry;
self.steps = 0; break;
self.updateGrid();
}
fn updateGrid(self: *World) void {
for (1..GRID_SIZE - 1) |y| {
for (1..GRID_SIZE - 1) |x| {
self.grid[y][x] = TILE_EMPTY;
} }
} }
self.grid[self.food_y][self.food_x] = TILE_FOOD;
self.grid[self.ant_y][self.ant_x] = TILE_ANT;
} }
pub fn step(self: *World, action: usize) struct { f32, bool } { // Calcola l'olfatto (Distanza inversa dal cibo)
self.steps += 1; // 0.0 (Lontanissimo) -> 1.0 (Sopra il cibo)
fn getScent(self: *World, x: usize, y: usize) f32 {
const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
const dist = @as(f32, @floatFromInt(dx + dy));
return 1.0 / (dist + 1.0);
}
var new_x = self.ant_x; // Controlla se una cella è occupata da UN'ALTRA formica
var new_y = self.ant_y; fn isOccupied(self: *World, x: usize, y: usize) bool {
for (self.ants) |ant| {
if (ant.alive and ant.x == x and ant.y == y) return true;
}
return false;
}
if (action == ACTION_UP) new_y -= 1; // Esegue step per UNA formica specifica
if (action == ACTION_DOWN) new_y += 1; pub fn stepAnt(self: *World, ant_idx: usize, action: usize) struct { f32, bool } {
if (action == ACTION_LEFT) new_x -= 1; var ant = &self.ants[ant_idx];
if (action == ACTION_RIGHT) new_x += 1; if (!ant.alive) return .{ 0.0, true };
const tile = self.grid[new_y][new_x]; ant.steps += 1;
if (tile == TILE_WALL) { // Salviamo lo stato PRECEDENTE per fare i confronti
const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x;
const old_dist_y = if (ant.y > self.food_y) ant.y - self.food_y else self.food_y - ant.y;
const old_dist = old_dist_x + old_dist_y;
var new_x = ant.x;
var new_y = ant.y;
if (action == 0) new_y -= 1; // UP
if (action == 1) new_y += 1; // DOWN
if (action == 2) new_x -= 1; // LEFT
if (action == 3) new_x += 1; // RIGHT
// --- 1. MURI (Limiti Mappa) ---
if (self.grid[new_y][new_x] == TILE_WALL) return .{ -5.0, false };
// --- 2. COLLISIONI TRA FORMICHE ---
// Se c'è traffico, penalità leggera ma non bloccante se c'è cibo vicino
if (self.isOccupied(new_x, new_y)) {
// Se siamo vicini al cibo, spingi! Altrimenti aspetta.
if (old_dist > 5) return .{ -0.5, false };
}
const dist_x = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x;
const dist_y = if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y;
const new_dist = dist_x + dist_y;
var reward: f32 = -0.1;
const scent_intensity = self.getScent(new_x, new_y);
const pheromone_level = self.pheromones[new_y][new_x];
if (scent_intensity > 0.15) {
reward += 0.1;
} else {
if (pheromone_level > 0.1) {
reward -= 0.5 * pheromone_level;
} else {
reward += 0.2;
}
}
if (new_dist < old_dist) {
reward += 1.5 + (scent_intensity * 2.0);
} else if (new_dist > old_dist) {
reward -= 1.0;
}
ant.x = new_x;
ant.y = new_y;
self.pheromones[new_y][new_x] = 1.0;
if (new_x == self.food_x and new_y == self.food_y) {
self.respawnFood();
ant.steps = 0;
return .{ 100.0, false };
}
if (ant.steps >= self.max_steps) {
ant.x = GRID_SIZE / 2;
ant.y = GRID_SIZE / 2;
ant.steps = 0;
return .{ -10.0, false }; return .{ -10.0, false };
} }
var move_reward: f32 = -0.1; return .{ reward, false };
if (self.visited[new_y][new_x]) {
move_reward -= 0.5;
} else {
move_reward += 0.2;
} }
self.ant_x = new_x; pub fn evaporatePheromones(self: *World) void {
self.ant_y = new_y; for (0..GRID_SIZE) |y| {
self.visited[new_y][new_x] = true; for (0..GRID_SIZE) |x| {
if (self.pheromones[y][x] > 0) {
self.updateGrid(); self.pheromones[y][x] *= 0.995;
}
if (new_x == self.food_x and new_y == self.food_y) { }
return .{ 100.0, true }; }
} }
if (self.steps >= self.max_steps) { // Input aumentati a 15
return .{ -10.0, true }; pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 {
} var obs = try allocator.alloc(f32, 15); // AUMENTATO A 15
const ant = self.ants[ant_idx];
return .{ move_reward, false };
}
pub fn getObservation(self: *World, allocator: Allocator) ![]f32 {
var obs = try allocator.alloc(f32, 10);
var idx: usize = 0; var idx: usize = 0;
const ax = @as(i32, @intCast(self.ant_x)); const ax = @as(i32, @intCast(ant.x));
const ay = @as(i32, @intCast(self.ant_y)); const ay = @as(i32, @intCast(ant.y));
// 1. Visione 3x3 (0-8)
var dy: i32 = -1; var dy: i32 = -1;
while (dy <= 1) : (dy += 1) { while (dy <= 1) : (dy += 1) {
var dx: i32 = -1; var dx: i32 = -1;
while (dx <= 1) : (dx += 1) { while (dx <= 1) : (dx += 1) {
const py_i = ay + dy; const py = @as(usize, @intCast(ay + dy));
const px_i = ax + dx; const px = @as(usize, @intCast(ax + dx));
var val: f32 = 0.0; var val: f32 = 0.0;
if (self.grid[py][px] == TILE_WALL) {
if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) {
const py = @as(usize, @intCast(py_i));
const px = @as(usize, @intCast(px_i));
const content = self.grid[py][px];
if (content == TILE_WALL) {
val = -1.0; val = -1.0;
} else if (content == TILE_FOOD) { } else if (px == self.food_x and py == self.food_y) {
val = 1.0; val = 1.0; // Vedo il cibo vicino!
} else if (self.visited[py][px]) { } else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) {
val = -0.5; val = -0.5; // Vedo una sorella
} else {
val = 0.0;
} }
} else {
val = -1.0;
}
obs[idx] = val; obs[idx] = val;
idx += 1; idx += 1;
} }
} }
obs[9] = self.getScent(self.ant_x, self.ant_y);
// 2. Sensi Chimici (9-10)
obs[9] = self.getScent(ant.x, ant.y);
obs[10] = self.pheromones[ant.y][ant.x];
// 3. BUSSOLA BINARIA (11-14)
// Risponde alla domanda: "In che quadrante è il cibo?"
// È molto più facile da capire per l'IA rispetto a un float.
obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo è SOPRA?
obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo è SOTTO?
obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo è SINISTRA?
obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo è DESTRA?
return obs; return obs;
} }
fn getScent(self: *World, x: usize, y: usize) f32 {
const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
const dist = dx + dy;
if (dist == 0) return 1.0;
return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0);
}
}; };

View file

@ -1,48 +1,18 @@
const std = @import("std"); const std = @import("std");
const World = @import("env.zig").World; const World = @import("env.zig").World;
const env = @import("env.zig"); const env = @import("env.zig"); // Per accedere a costanti come GRID_SIZE
const Network = @import("modular_network.zig").Network; const Network = @import("modular_network.zig").Network;
// --- PARAMETRI AI --- // --- IPERPARAMETRI ---
const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante) const GAMMA: f32 = 0.9;
const LR: f32 = 0.005; // Learning Rate const LR: f32 = 0.005; // Basso perché output lineare
const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100% const EPSILON_START: f32 = 1.0;
const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5% const EPSILON_END: f32 = 0.05;
const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa const DECAY_RATE: f32 = 0.0001; // Decadimento più lento dato che abbiamo tanti step
fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void { // Helper per trovare max e argmax
const file = try std.fs.cwd().createFile(file_path, .{});
defer file.close();
var buffer: [8192]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&buffer);
const allocator = fba.allocator();
var walls_json = std.ArrayList(u8){};
defer walls_json.deinit(allocator);
try walls_json.appendSlice(allocator, "[");
var first = true;
for (0..env.GRID_SIZE) |y| {
for (0..env.GRID_SIZE) |x| {
if (world.grid[y][x] == env.TILE_WALL) {
if (!first) try walls_json.appendSlice(allocator, ",");
try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y });
first = false;
}
}
}
try walls_json.appendSlice(allocator, "]");
// Aggiungiamo info extra per debug (Epsilon ed Episode)
const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon });
try file.writeAll(json);
}
// Funzione helper per trovare il valore massimo in un array
fn maxVal(slice: []const f32) f32 { fn maxVal(slice: []const f32) f32 {
var m: f32 = -10000.0; var m: f32 = -1.0e20; // Numero molto basso
for (slice) |v| if (v > m) { for (slice) |v| if (v > m) {
m = v; m = v;
}; };
@ -50,7 +20,7 @@ fn maxVal(slice: []const f32) f32 {
} }
fn argmax(slice: []const f32) usize { fn argmax(slice: []const f32) usize {
var m: f32 = -10000.0; var m: f32 = -1.0e20;
var idx: usize = 0; var idx: usize = 0;
for (slice, 0..) |v, i| { for (slice, 0..) |v, i| {
if (v > m) { if (v > m) {
@ -61,99 +31,126 @@ fn argmax(slice: []const f32) usize {
return idx; return idx;
} }
// Export aggiornato per Multi-Formica
fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
const file = try std.fs.cwd().createFile(file_path, .{});
defer file.close();
// Buffer grande per contenere le coordinate di 20 formiche
var buffer: [65536]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&buffer);
const allocator = fba.allocator();
// Costruiamo la lista delle formiche in JSON: [[x,y], [x,y], ...]
var ants_json = std.ArrayList(u8){};
defer ants_json.deinit(allocator);
try ants_json.appendSlice(allocator, "[");
for (world.ants, 0..) |ant, i| {
if (i > 0) try ants_json.appendSlice(allocator, ",");
try std.fmt.format(ants_json.writer(allocator), "[{d},{d}]", .{ ant.x, ant.y });
}
try ants_json.appendSlice(allocator, "]");
// Scriviamo il JSON completo
// QUI RISOLVIAMO L'ERRORE: Usiamo 'epsilon' nella stringa
const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, epsilon });
try file.writeAll(json);
}
pub fn main() !void { pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){}; var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator(); const allocator = gpa.allocator();
defer _ = gpa.deinit(); defer _ = gpa.deinit();
// 1. Inizializza Ambiente e Rete // 1. Inizializza Ambiente 100x100 e Rete
var world = World.init(12345); var world = World.init(12345);
var net = Network.init(allocator); var net = Network.init(allocator);
defer net.deinit(); defer net.deinit();
// Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx) // ARCHITETTURA RETE
try net.addLayer(9, 24, 111, true); // Input 11: (9 Vista + 1 Olfatto + 1 Feromone)
try net.addLayer(24, 4, 222, false); // Output 4: (Su, Giù, Sx, Dx) LINEARE
try net.addLayer(15, 40, 111, true);
try net.addLayer(40, 4, 222, false);
var prng = std.Random.DefaultPrng.init(999); var prng = std.Random.DefaultPrng.init(999);
const random = prng.random(); const random = prng.random();
std.debug.print("--- AI TRAINING START ---\n", .{}); std.debug.print("--- HIVE MIND TRAINING START ---\n", .{});
std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS });
var episode: usize = 0; var global_step: usize = 0;
var epsilon: f32 = EPSILON_START; var epsilon: f32 = EPSILON_START;
while (true) : (episode += 1) { // Ciclo infinito (o molto lungo)
world.reset(); while (true) {
var done = false;
while (!done) { // 1. Evaporazione Feromoni (Memoria collettiva che sbiadisce)
world.evaporatePheromones();
// 2. Turno di ogni formica
for (0..env.NUM_ANTS) |i| {
// A. OSSERVAZIONE // A. OSSERVAZIONE
const current_obs = try world.getObservation(allocator); // Alloca memoria const current_obs = try world.getAntObservation(allocator, i);
defer allocator.free(current_obs); // Libera alla fine del ciclo defer allocator.free(current_obs);
// B. SCEGLI AZIONE (Epsilon-Greedy) // B. SCEGLI AZIONE (Epsilon-Greedy)
var action: usize = 0; var action: usize = 0;
const q_values = net.forward(current_obs); // Forward pass const q_values = net.forward(current_obs);
if (random.float(f32) < epsilon) { if (random.float(f32) < epsilon) {
// Esplorazione (Random)
action = random.intRangeAtMost(usize, 0, 3); action = random.intRangeAtMost(usize, 0, 3);
} else { } else {
// Sfruttamento (Usa il cervello)
action = argmax(q_values); action = argmax(q_values);
} }
// C. ESEGUI AZIONE // C. ESEGUI AZIONE
const result = world.step(action); const result = world.stepAnt(i, action);
const reward = result[0]; const reward = result[0];
done = result[1]; // Nota: in multi-agente 'done' è meno rilevante per il loop principale,
// perché se una formica "finisce" (mangia), respawna o continua.
// D. ADDESTRAMENTO (Q-Learning Update) // D. ADDESTRAMENTO (Hive Mind Learning)
// Target = Reward + Gamma * Max(Q_Next) // Calcoliamo target Q-Value
// Dobbiamo calcolare il Q-value dello stato successivo
var target_val = reward; var target_val = reward;
if (!done) {
const next_obs = try world.getObservation(allocator); // Se non è uno stato terminale (morte o vittoria netta), aggiungiamo stima futuro
// Consideriamo la vittoria (mangiare) come continuativa qui per non rompere il flusso
const next_obs = try world.getAntObservation(allocator, i);
defer allocator.free(next_obs); defer allocator.free(next_obs);
const next_q_values = net.forward(next_obs); const next_q_values = net.forward(next_obs);
target_val += GAMMA * maxVal(next_q_values); target_val += GAMMA * maxVal(next_q_values);
}
// Creiamo il vettore target per il backpropagation // Backpropagation
// Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val
// Gli altri neuroni devono restare come sono.
var target_vector = try allocator.alloc(f32, 4); var target_vector = try allocator.alloc(f32, 4);
defer allocator.free(target_vector); defer allocator.free(target_vector);
for (0..4) |j| target_vector[j] = q_values[j];
target_vector[action] = target_val;
for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori
target_vector[action] = target_val; // Aggiorna solo quello scelto
// Train della rete su questo singolo passo
_ = try net.train(current_obs, target_vector, LR); _ = try net.train(current_obs, target_vector, LR);
// Export e Delay
try exportAntJSON(&world, "ant_state.json", episode, epsilon);
// Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena.
if (episode % 10 == 0) {
// Mostra ogni 10 episodi a velocità umana
std.Thread.sleep(50 * 1_000_000);
} else {
// Allenamento veloce (quasi istantaneo)
// Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento
// std.Thread.sleep(10 * 1_000_000);
}
} }
// Fine episodio // 3. Gestione Loop Globale
global_step += 1;
// Decadimento Epsilon
if (epsilon > EPSILON_END) { if (epsilon > EPSILON_END) {
epsilon -= DECAY_RATE; epsilon -= DECAY_RATE;
} }
if (episode % 10 == 0) { // 4. Export e Log (Ogni 10 step globali per fluidità)
std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps }); if (global_step % 10 == 0) {
try exportAntJSON(&world, "ant_state.json", global_step, epsilon);
// Log console ogni 100 step
if (global_step % 100 == 0) {
std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y });
}
// Pausa per vedere l'animazione (rimuovi per training ultra-veloce)
std.Thread.sleep(100 * 1_000_000);
} }
} }
} }