From 44248db15e759bf06a4f7f1db2f938076dff10c0 Mon Sep 17 00:00:00 2001 From: Fores Date: Thu, 19 Feb 2026 15:58:52 +0100 Subject: [PATCH] Nuove regole --- ant_state.json | 9 +- ant_viewer.html | 161 +++++++++++++++------------------ best_brain.bin | Bin 0 -> 4564 bytes src/env.zig | 167 +++++++++++++++++++++++++++------- src/main.zig | 234 ++++++++++++++++++++++++++++++++++-------------- 5 files changed, 380 insertions(+), 191 deletions(-) create mode 100644 best_brain.bin diff --git a/ant_state.json b/ant_state.json index 052bf25..4703733 100644 --- a/ant_state.json +++ b/ant_state.json @@ -1,7 +1,8 @@ { "grid_size": 100, - "food": [61, 59], - "ants": [[55,59,0.81],[33,64,0.81],[30,84,0.82],[21,90,0.89],[49,59,0.98],[37,59,0.85],[20,56,0.96],[54,57,0.81],[27,83,0.96],[33,24,0.83],[29,59,0.84],[22,59,0.94],[23,58,0.80],[26,90,0.81],[30,74,0.78],[33,60,0.89],[24,90,0.98],[52,59,0.90],[25,59,0.91],[26,73,0.83]], - "episode": 415780, - "epsilon": 0.050 + "food": [31, 32], + "predator": [45, 66], + "ants": [[45,80,1.00]], + "episode": 1209, + "epsilon": 0.020 } \ No newline at end of file diff --git a/ant_viewer.html b/ant_viewer.html index c69ca26..668ece4 100644 --- a/ant_viewer.html +++ b/ant_viewer.html @@ -53,104 +53,91 @@ + setInterval(update, 50); + \ No newline at end of file diff --git a/best_brain.bin b/best_brain.bin new file mode 100644 index 0000000000000000000000000000000000000000..73a8412a3ca5c124349c31d36e82eda9dde2b48b GIT binary patch literal 4564 zcmXw6X*iYr*Oh}J8ctEkLj#pEg`zn3?>-qDXhdls6wQ)oMy3!M9z=gV#>#QsA3f6HG?{XY`Z{4c+M*iMc_ z#Nnit@p$rOpJ1D{0hWbWVaUltWJP`m8P+Oij1o3+a?u-5W|JIBtJiYxr{ut^m-nD+ zy)qkE;s(8)3vrT9C4@GWaGN?PlUB2zNUm2WLvPlTxvHCRQJ)fVIQxgm*PTI*8Qx~j z{nq1#bceyP_XXIQXhZ$bzwl1!280jpWe!C6L*Irp`0_Ote%=g&;9L2O#q_PDW#l{A zT)u&vIlc`uzVtHYCVcWea27f2o6JPa*}++7i{ZE>e?ZD#3l22LgWkOypl!+m+kgb{ z;0HqKz%^)W3V_|=H<+`RjLF5#3rW&z1EOiX8QV->GEN5@n2r%XF`il{Xdg-AENp(j z^#h+_FkcT;YEDDX9Dr&g$4b{DDbQw=4(^Ge@H@aA{$0C{>^bRy3O{y`UAwwT-MW(~ z*>aCjYgHj>6;GIqpUarKYC{pPb`icb=HcTO9ysmOfDbKyz|1a@vGjF?Ev^%>Jo6Gf zIhO`|+r%Ns%bn;L8nZWtk1>9YLHOSH1Znc|=kh%LjKxP!!`W&zE<4a3nOY-U(^JNE zbjrfZCLN42Ef*Ec{|wD{cv$*&4oKTh!er&Q5NbXP1D#z^IXxE!GoL|Tye@uk4Is~5 z^`Jw^ofufJAn~&6*+v}&5S(l#i*~*s+r1o#(oieg^0bizpKBu)Qc@(xmJo8h9kN&3 zv90p<#93wo*7RB7@6XEE1RAWuU+oOeX$I>cA8t>GfM?^;Xg1k5L z$YE^@*w8hOXusp3tB(-te6BJt`AR~F-NO3+*^LvIDRHW+4&c>gmV(0jTS!OLIC!w_ zA(QRpjN|6D30~aa1J|`*2}+*66U{kl%xzk_3PU<1vCQNUe0rpbC(3-d=L2;_T9gE{ zlVniebpU=P1TmMUEh0;Y<`K_Gc}8rJ9wDaoxb}A%b94D6CTP_MhW<_x8E;qPyu>AN z?X~ydeMS)m7kEQYhzKnG&T_Hq3&6&|5@yZu1B;+NP?hn3lZz|H8tG03cc(JyiFP>S zv@AO;)<(FuMnpZEk6tGuxhZb5aP<>u>>H6~+-#n46(kf^yv`wox6g}$)B52&|2Bj+ zYJ+pFA?a#yCDwKCnQp^)lK;pSRa+&Ng)+1>&@rwh`zC zL})O@k(^e0NlF!cVOqo}XJTZ{{3m1u>B(szUj|J`+F1vD!lT5%?i(|%O`gQ`Y+!8l zb2&3tIrQDv4IeWm!StUOpiurEe9AH8HcU)_WYq+yof-{x>yp4kYz3yI*`tp87}jI& z7m&j_IIkiH!QoJ+xaHp4s`iGC0}d}0N;8QvhMov%c;hh67ZAN9r6 zvn-J?&D{Fg?x1sg2^u_%hS8!(SQs(|y*i3uRn}Js*`W?r?KepN1aq>kUXnTI<$%wo zBQp=X3DYu*>5?npmK~eUnQE=VM>ob|g#C6HeI5uOO{-zBe3ElO0YUZw~W) zRLi>_twdp!3b}Ll7TB~nL+wLPSnFee%avjnhwM-cuu?!5m4jH6ppN!@Z>WunW_922J&d~IQbOE(!A`AT$h+?hwI4#YX7jBFo0 zzxI?-cQ=M$*>x_5uBQ)2G|yA za#j^~cp`Ncu52-Ztm3CIMp^<*q(jL7wS)R3b$qC;j_v#`(Z!?6B-hG|91^?43_hF8 z&Mq}a^^e9xZ|qsp&tuW;u@h+y=;PW3c#tV2fx5xkqOB|M!})n-+}XMFSmF74mF>uf!2|NZvVf#=Fxb-N4%XdpSS{woE#0Mb7unr2xUjV3&ie-uU-4VyaiJL-vZjqlx9ugjm37IP8UDn^ zxP!b1Sb+0aZV+s`Jr%R($)KfF2rKbuloYomvBK8P*qgeQ%;5P!!(-otIx@lIkrOm5>w? zdCrK;{~pFkX->wUTSuVXNeg6oJ3;qNGq`v^<%ayz!RR-IZvRjym=p*aPyC_(!Y14% zZiJS74*0XW4SJ0yV7`1DJl2hX_N-=T59?M2bgt z^0qLU+gB2;Gt0?Nzj(4e*$$1pIA&4XOXmKFA~9W+#c0@F=Tb+5X2@a{k)4nwx>cS)R>B0%VeEEnD`W8KPbmQT>EQ5D6U_%5xwMVX zVVb%e9*^;ds*!Ox^_d&#`n8u)oIam4{=Ca5{_sJmnIa;W;7$yAiH!9z0#=#YT$;!f zd1;3D>+fyqc=nT<+_4bDoH#*rPYOh=9s$j1w?MBP;2WbXGU=*iqCENBoaQ9L>RiAr z-N)IAN=LY>Tp~cP^{i`On5bgZ8B^2BrW=<8FOy(68d!(`P`NnKWuO$iq(i zrFgf^gptck#^$}_*~E>(DJ#n z;6>)yEM$8226FSyOu%13Pr$KC2#Pu}(Bym@92|I@E$abWTFSw~Dhd?-xWLeYBc$== z2>CEvOKb)M2!C)ZI*$({J1!cKY{k{&SxX;N6zj>g9%+FJ6PaLKU56kzt)3aap0{c7o3O9qFDXI}SWnVUY z6NPWvj$d-LP`ht3d13#_p~XF1A8&BOjtbENK3 zs5}~qOuZR4s67YO$9{OFY8Bow^2HXh`Eay374Mt5Vf|z?G*7d{ZdQ%$sNal}RvF{a z3_JE$ID-c_8er$-jemDV3cIC)*j2+&K3e)5;d9DGb#oFMT{f?+p zrjEmvA(4=BmC-L-6?>Civ!lz(RiS#DiQ6J0r)Cn}~N&3JT4_HCLV zy;(S=uAbY}D@JCo@uD5y3u&oD4IV70ph{7B^yC3kRG3mp_n7*i<-}YX^x!i6lynLo zTs}tb4PT{2w&!Tgw1qUOrJU~k_g`VG=0|q@^*kCm_lnR#E1fnj{F_(fVd-)4SZ7!wjUc?IhM-$lS{grfCU9|9*Y=BUl zZ%9{rT&24v2U5;CmmXdiFB}cXqnQ%Pv|TBTwl5iGnSd(#AFVQ~^7|S+EpJQho|aSZ z*0a>y?+VshR?yFFKpj69)6gqLxG^-FJ{WUIXnUv>Hru+fW2YXZx6F#@!6(re9wDMv zWB<-wCxVeIpgSb$@xi(qG~fCv&D3}cV^}%T_b{EBeDtBp%SLhTCXPBQ#|wX#MzM9> zx%BP01mQsEabb<+R9Z3q2DKVEM}6j2(I@1hu+HixP56;X&nL!F=O&&oGC7i#of1*w ji+Oa-3Lza0%b{XH!8H6urEpjH1>y1E=Y;1MCkX!sWM>}! literal 0 HcmV?d00001 diff --git a/src/env.zig b/src/env.zig index 8e47c3d..d012db6 100644 --- a/src/env.zig +++ b/src/env.zig @@ -2,21 +2,20 @@ const std = @import("std"); const Allocator = std.mem.Allocator; pub const GRID_SIZE = 100; -pub const NUM_ANTS = 20; - +pub const NUM_ANTS = 1; pub const TILE_EMPTY = 0; pub const TILE_WALL = 1; pub const TILE_FOOD = 2; +// Aggiungiamo il tipo visuale per il predatore (non usato nella grid array, ma per logica) +pub const TILE_PREDATOR = 3; pub const Ant = struct { x: usize, y: usize, alive: bool, energy: f32, - gene_speed: f32, gene_metabolism: f32, - generation: usize, score: usize, }; @@ -28,6 +27,10 @@ pub const World = struct { food_x: usize, food_y: usize, + // NUOVO: Coordinate del predatore + pred_x: usize, + pred_y: usize, + best_ant_idx: usize, prng: std.Random.DefaultPrng, @@ -38,6 +41,8 @@ pub const World = struct { .ants = undefined, .food_x = 0, .food_y = 0, + .pred_x = 0, // Init + .pred_y = 0, // Init .best_ant_idx = 0, .prng = std.Random.DefaultPrng.init(seed), }; @@ -46,8 +51,8 @@ pub const World = struct { } pub fn reset(self: *World) void { - const random = self.prng.random(); - + //const random = self.prng.random(); + // Reset Grid (codice invariato...) for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |x| { self.pheromones[y][x] = 0.0; @@ -60,6 +65,10 @@ pub const World = struct { } self.respawnFood(); + // NUOVO: Respawna il predatore in un angolo lontano + self.pred_x = GRID_SIZE - 5; + self.pred_y = GRID_SIZE - 5; + const center = GRID_SIZE / 2; for (0..NUM_ANTS) |i| { self.ants[i] = Ant{ @@ -67,14 +76,18 @@ pub const World = struct { .y = center, .alive = true, .energy = 100.0, - .gene_speed = 0.8 + random.float(f32) * 0.4, - .gene_metabolism = 1.0, + + // PRIMA ERA: .gene_speed = 0.8 + random.float(f32) * 0.4, + .gene_speed = 1.0, // Velocità fissa e garantita + .gene_metabolism = 1.0, // Consumo fisso + .generation = 0, .score = 0, }; } } + // respawnFood e getScent invariati... fn respawnFood(self: *World) void { const random = self.prng.random(); while (true) { @@ -102,41 +115,89 @@ pub const World = struct { return false; } + // spawnChild invariato... pub fn spawnChild(self: *World, parent_idx: usize) Ant { const parent = self.ants[parent_idx]; - const random = self.prng.random(); - var new_speed = parent.gene_speed + (random.float(f32) * 0.2 - 0.1); - - if (new_speed < 0.5) new_speed = 0.5; - if (new_speed > 3.0) new_speed = 3.0; - - const new_metabolism = new_speed * new_speed; + // Abbiamo rimosso tutta la logica del random e le mutazioni return Ant{ .x = GRID_SIZE / 2, .y = GRID_SIZE / 2, .alive = true, .energy = 100.0, - .gene_speed = new_speed, - .gene_metabolism = new_metabolism, + .gene_speed = 1.0, // Il figlio ha sempre velocità 1.0 + .gene_metabolism = 1.0, // Il figlio ha sempre metabolismo 1.0 .generation = parent.generation + 1, .score = 0, }; } + // updateBestAnt invariato... pub fn updateBestAnt(self: *World) void { var max_score: usize = 0; var best_idx: usize = 0; - for (self.ants, 0..) |ant, i| { if (ant.alive and ant.score > max_score) { max_score = ant.score; best_idx = i; } } - if (max_score > 0) { - self.best_ant_idx = best_idx; + if (max_score > 0) self.best_ant_idx = best_idx; + } + + // NUOVO: Logica movimento predatore + pub fn stepPredator(self: *World) void { + const random = self.prng.random(); + + // Il predatore si muove verso la formica più vicina con una certa probabilità, + // altrimenti si muove a caso (per non renderlo impossibile da battere). + + var target_x = self.pred_x; + var target_y = self.pred_y; + + // Trova formica più vicina + var min_dist: usize = 9999; + var has_target = false; + + for (self.ants) |ant| { + if (!ant.alive) continue; + // Distanza Manhattan + const dx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const dy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist = dx + dy; + + if (dist < min_dist and dist < 200) { // Insegue solo se entro 20 caselle + min_dist = dist; + target_x = ant.x; + target_y = ant.y; + has_target = true; + } + } + + var move_x: isize = 0; + var move_y: isize = 0; + + // 60% insegue, 40% random + if (has_target and random.float(f32) < 0.6) { + if (target_x > self.pred_x) move_x = 1 else if (target_x < self.pred_x) move_x = -1; + if (target_y > self.pred_y) move_y = 1 else if (target_y < self.pred_y) move_y = -1; + } else { + // Random walk + const r = random.intRangeAtMost(u8, 0, 3); + if (r == 0) move_y = -1; + if (r == 1) move_y = 1; + if (r == 2) move_x = -1; + if (r == 3) move_x = 1; + } + + const new_px = @as(usize, @intCast(@as(isize, @intCast(self.pred_x)) + move_x)); + const new_py = @as(usize, @intCast(@as(isize, @intCast(self.pred_y)) + move_y)); + + // Controllo muri + if (self.grid[new_py][new_px] != TILE_WALL) { + self.pred_x = new_px; + self.pred_y = new_py; } } @@ -144,11 +205,16 @@ pub const World = struct { var ant = &self.ants[ant_idx]; if (!ant.alive) return .{ 0.0, true }; - const random = self.prng.random(); + // NUOVO: Controllo morte immediata da predatore (inizio turno) + if (ant.x == self.pred_x and ant.y == self.pred_y) { + self.ants[ant_idx] = self.spawnChild(self.best_ant_idx); // Respawn + return .{ -500.0, true }; // Morte dolorosa + } + const random = self.prng.random(); + // ... (Logica energia/movimento invariata fino al loop) ... var moves_to_make: usize = 0; var chance = ant.gene_speed; - while (chance > 0) { if (chance >= 1.0) { moves_to_make += 1; @@ -158,7 +224,6 @@ pub const World = struct { chance = 0; } } - if (moves_to_make == 0) { ant.energy -= 0.1; if (ant.energy <= 0) return .{ -10.0, true }; @@ -169,7 +234,6 @@ pub const World = struct { for (0..moves_to_make) |_| { if (ant.energy <= 0) break; - ant.energy -= (0.2 * ant.gene_metabolism); const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x; @@ -191,21 +255,37 @@ pub const World = struct { ant.x = new_x; ant.y = new_y; + + // NUOVO: Controllo collisione predatore DOPO il movimento + if (ant.x == self.pred_x and ant.y == self.pred_y) { + self.ants[ant_idx] = self.spawnChild(self.best_ant_idx); + return .{ -500.0, true }; // Morte + } + + const dist_pred_x = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const dist_pred_y = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist_pred = dist_pred_x + dist_pred_y; + + if (dist_pred == 1) { + total_reward -= 5.0; // Troppo vicina! Rischio altissimo. + } else if (dist_pred == 2) { + total_reward -= 2.0; // Zona di allerta. + } + self.pheromones[new_y][new_x] = 1.0; const new_dist = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x; const total_new_dist = new_dist + (if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y); - const scent = self.getScent(new_x, new_y); + if (total_new_dist < old_dist) total_reward += 1.5 + scent; - if (total_new_dist > old_dist) total_reward -= 1.0; + if (total_new_dist > old_dist) total_reward -= 0.1; if (new_x == self.food_x and new_y == self.food_y) { self.respawnFood(); ant.energy = 100.0; ant.score += 1; total_reward += 50.0; - self.updateBestAnt(); } } @@ -218,6 +298,7 @@ pub const World = struct { return .{ total_reward, false }; } + // evaporatePheromones invariato... pub fn evaporatePheromones(self: *World) void { for (0..GRID_SIZE) |y| { for (0..GRID_SIZE) |x| { @@ -226,14 +307,17 @@ pub const World = struct { } } + // Modificato per includere il predatore pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 { - var obs = try allocator.alloc(f32, 15); + // Aumentiamo la dimensione a 20 floats + var obs = try allocator.alloc(f32, 20); const ant = self.ants[ant_idx]; var idx: usize = 0; const ax = @as(i32, @intCast(ant.x)); const ay = @as(i32, @intCast(ant.y)); + // 3x3 Grid locale (9 inputs) var dy: i32 = -1; while (dy <= 1) : (dy += 1) { var dx: i32 = -1; @@ -241,10 +325,13 @@ pub const World = struct { const py = @as(usize, @intCast(ay + dy)); const px = @as(usize, @intCast(ax + dx)); var val: f32 = 0.0; + if (self.grid[py][px] == TILE_WALL) { val = -1.0; } else if (px == self.food_x and py == self.food_y) { val = 1.0; + } else if (px == self.pred_x and py == self.pred_y) { + val = -5.0; // PREDATORE VICINO! PAURA! } else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) { val = -0.5; } @@ -252,12 +339,28 @@ pub const World = struct { idx += 1; } } + + // Input originali (9..14) obs[9] = self.getScent(ant.x, ant.y); obs[10] = self.pheromones[ant.y][ant.x]; - obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; - obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; - obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; - obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; + obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo Su + obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo Giù + obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo SX + obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo DX + + // NUOVI INPUT: Sensori Predatore (15..19) + // 15: Prossimità predatore (1.0 = vicinissimo, 0.0 = lontano) + const pdx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x; + const pdy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y; + const dist_pred = @as(f32, @floatFromInt(pdx + pdy)); + obs[15] = if (dist_pred == 0) 1.0 else 1.0 / dist_pred; + + // 16-19: Direzione Predatore + obs[16] = if (self.pred_y < ant.y) 1.0 else 0.0; // Predatore è SOPRA + obs[17] = if (self.pred_y > ant.y) 1.0 else 0.0; // Predatore è SOTTO + obs[18] = if (self.pred_x < ant.x) 1.0 else 0.0; // Predatore è SINISTRA + obs[19] = if (self.pred_x > ant.x) 1.0 else 0.0; // Predatore è DESTRA + return obs; } }; diff --git a/src/main.zig b/src/main.zig index 1d21208..8403533 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3,12 +3,41 @@ const World = @import("env.zig").World; const env = @import("env.zig"); const Network = @import("modular_network.zig").Network; -const GAMMA: f32 = 0.9; -const LR: f32 = 0.005; +// --- PARAMETRI DI TRAINING --- +const GAMMA: f32 = 0.98; +const LR: f32 = 0.001; const EPSILON_START: f32 = 1.0; const EPSILON_END: f32 = 0.05; const DECAY_RATE: f32 = 0.0001; +const SAVE_INTERVAL: usize = 500; +// Export JSON +fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void { + const file = try std.fs.cwd().createFile(file_path, .{}); + defer file.close(); + + var buffer: [65536]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buffer); + const allocator = fba.allocator(); + + var ants_json = std.ArrayList(u8){}; + defer ants_json.deinit(allocator); + try ants_json.appendSlice(allocator, "["); + + var first = true; + for (world.ants) |ant| { + if (!ant.alive) continue; // Esporta solo le formiche vive + if (!first) try ants_json.appendSlice(allocator, ","); + try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed }); + first = false; + } + try ants_json.appendSlice(allocator, "]"); + + const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"predator\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, world.pred_x, world.pred_y, ants_json.items, episode, avg_epsilon }); + try file.writeAll(json); +} + +// Funzioni helper fn maxVal(slice: []const f32) f32 { var m: f32 = -1.0e20; for (slice) |v| if (v > m) { @@ -29,97 +58,166 @@ fn argmax(slice: []const f32) usize { return idx; } -fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void { - const file = try std.fs.cwd().createFile(file_path, .{}); - defer file.close(); - - var buffer: [65536]u8 = undefined; - var fba = std.heap.FixedBufferAllocator.init(&buffer); - const allocator = fba.allocator(); - - var ants_json = std.ArrayList(u8){}; - defer ants_json.deinit(allocator); - try ants_json.appendSlice(allocator, "["); - - for (world.ants, 0..) |ant, i| { - if (i > 0) try ants_json.appendSlice(allocator, ","); - try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed }); - } - try ants_json.appendSlice(allocator, "]"); - - const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, avg_epsilon }); - try file.writeAll(json); -} - pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); defer _ = gpa.deinit(); - var world = World.init(12345); - var net = Network.init(allocator); - defer net.deinit(); + // ========================================== + // 🎛️ PANNELLO DI CONTROLLO PRINCIPALE + // ========================================== + const INFERENCE_MODE = true; // TRUE = Duello 1vs1. FALSE = Training massivo. + const LOAD_EXISTING = false; // (Valido solo se INFERENCE_MODE=false) + // ========================================== - try net.addLayer(15, 40, 111, true); - try net.addLayer(40, 4, 222, false); + var world = World.init(12345); + var net: Network = undefined; + + if (INFERENCE_MODE) { + std.debug.print(">> CARICAMENTO CERVELLO PER ARENA 1vs1...\n", .{}); + net = Network.load(allocator, "best_brain.bin") catch |err| { + std.debug.print("ERRORE: Non trovo 'best_brain.bin'. Fai prima il training!\nErrore: {}\n", .{err}); + return; + }; + } else if (LOAD_EXISTING) { + std.debug.print(">> RIPRESA TRAINING: Caricamento 'best_brain.bin'...\n", .{}); + net = try Network.load(allocator, "best_brain.bin"); + } else { + std.debug.print(">> NUOVO TRAINING: Creazione rete neurale...\n", .{}); + net = Network.init(allocator); + try net.addLayer(20, 45, 111, true); + try net.addLayer(45, 4, 222, false); + } + defer net.deinit(); var prng = std.Random.DefaultPrng.init(999); const random = prng.random(); - - std.debug.print("--- HIVE MIND TRAINING START ---\n", .{}); - std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS }); - var global_step: usize = 0; - var epsilon: f32 = EPSILON_START; - while (true) { - world.evaporatePheromones(); + // ========================================== + // MODALITÀ: INFERENZA (DUELLO 1 VS 1) + // ========================================== + if (INFERENCE_MODE) { + std.debug.print("--- ARENA 1 VS 1: INIZIO DUELLO ---\n", .{}); - for (0..env.NUM_ANTS) |i| { - const current_obs = try world.getAntObservation(allocator, i); + const margin = 5; + world.ants[0].energy = 500.0; + world.ants[0].x = margin; + world.ants[0].y = margin; + world.pred_x = env.GRID_SIZE - margin; + world.pred_y = env.GRID_SIZE - margin; + + var score: usize = 0; + + while (world.ants[0].alive) { + + // FIX 1: Stessa velocità del training! + if (global_step % 2 == 0) world.stepPredator(); + + const current_obs = try world.getAntObservation(allocator, 0); defer allocator.free(current_obs); - var action: usize = 0; const q_values = net.forward(current_obs); + var action: usize = 0; - if (random.float(f32) < epsilon) { + // FIX 3: Lasciamo un 2% di "istinto" (epsilon) per non farla incantare sui muri + if (random.float(f32) < 0.02) { action = random.intRangeAtMost(usize, 0, 3); } else { action = argmax(q_values); } - const result = world.stepAnt(i, action); + const result = world.stepAnt(0, action); const reward = result[0]; - var target_val = reward; - - const next_obs = try world.getAntObservation(allocator, i); - defer allocator.free(next_obs); - const next_q_values = net.forward(next_obs); - target_val += GAMMA * maxVal(next_q_values); - - var target_vector = try allocator.alloc(f32, 4); - defer allocator.free(target_vector); - for (0..4) |j| target_vector[j] = q_values[j]; - target_vector[action] = target_val; - - _ = try net.train(current_obs, target_vector, LR); - } - - global_step += 1; - - if (epsilon > EPSILON_END) { - epsilon -= DECAY_RATE; - } - - if (global_step % 10 == 0) { - try exportAntJSON(&world, "ant_state.json", global_step, epsilon); - - if (global_step % 100 == 0) { - std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y }); + if (reward > 10.0) { + score += 1; + std.debug.print("SCORE: {d} | Cibo preso!\n", .{score}); + world.ants[0].energy = 500.0; } - std.Thread.sleep(100 * 1_000_000); + try exportAntJSON(&world, "ant_state.json", global_step, 0.02); + global_step += 1; + + std.Thread.sleep(50 * 1_000_000); + + if (!world.ants[0].alive) { + std.debug.print("\n>>> GAME OVER! La formica e' durata {d} step e ha mangiato {d} cibi. <<<\n", .{ global_step, score }); + break; + } + } + } + // ========================================== + // MODALITÀ: TRAINING (SCIAME) + // ========================================== + else { + std.debug.print("--- HIVE MIND: INIZIO TRAINING ---\n", .{}); + var epsilon: f32 = EPSILON_START; + var record_score: usize = 0; + + while (true) { + world.evaporatePheromones(); + + if (global_step % 2 == 0) world.stepPredator(); + + for (0..env.NUM_ANTS) |i| { + if (!world.ants[i].alive) continue; + + const current_obs = try world.getAntObservation(allocator, i); + defer allocator.free(current_obs); + + var action: usize = 0; + const q_values = net.forward(current_obs); + + if (random.float(f32) < epsilon) { + action = random.intRangeAtMost(usize, 0, 3); + } else { + action = argmax(q_values); + } + + const result = world.stepAnt(i, action); + const reward = result[0]; + const died = result[1] and reward <= -50.0; + + var target_val = reward; + if (!died) { + const next_obs = try world.getAntObservation(allocator, i); + defer allocator.free(next_obs); + const next_q_values = net.forward(next_obs); + target_val += GAMMA * maxVal(next_q_values); + } + + var target_vector = try allocator.alloc(f32, 4); + defer allocator.free(target_vector); + for (0..4) |j| target_vector[j] = q_values[j]; + target_vector[action] = target_val; + + _ = try net.train(current_obs, target_vector, LR); + } + + global_step += 1; + if (epsilon > EPSILON_END) epsilon -= DECAY_RATE; + + if (global_step % SAVE_INTERVAL == 0) { + var current_total_score: usize = 0; + for (world.ants) |ant| { + if (ant.alive) { + current_total_score += ant.score; + } + } + if (current_total_score > record_score) { + record_score = current_total_score; + try net.save("best_brain.bin"); + } + } + + if (global_step % 10 == 0) { + try exportAntJSON(&world, "ant_state.json", global_step, epsilon); + if (global_step % 100 == 0) { + std.debug.print("Step: {d} | Eps: {d:.3} | Record: {d} | Pred: [{d},{d}]\r", .{ global_step, epsilon, record_score, world.pred_x, world.pred_y }); + } + std.Thread.sleep(20 * 1_000_000); // Veloce per il training + } } } }