Nuove regole
This commit is contained in:
parent
1980e7996f
commit
44248db15e
|
|
@ -1,7 +1,8 @@
|
|||
{
|
||||
"grid_size": 100,
|
||||
"food": [61, 59],
|
||||
"ants": [[55,59,0.81],[33,64,0.81],[30,84,0.82],[21,90,0.89],[49,59,0.98],[37,59,0.85],[20,56,0.96],[54,57,0.81],[27,83,0.96],[33,24,0.83],[29,59,0.84],[22,59,0.94],[23,58,0.80],[26,90,0.81],[30,74,0.78],[33,60,0.89],[24,90,0.98],[52,59,0.90],[25,59,0.91],[26,73,0.83]],
|
||||
"episode": 415780,
|
||||
"epsilon": 0.050
|
||||
"food": [31, 32],
|
||||
"predator": [45, 66],
|
||||
"ants": [[45,80,1.00]],
|
||||
"episode": 1209,
|
||||
"epsilon": 0.020
|
||||
}
|
||||
153
ant_viewer.html
153
ant_viewer.html
|
|
@ -53,104 +53,91 @@
|
|||
</div>
|
||||
|
||||
<script>
|
||||
const canvas = document.getElementById('gridCanvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
// ... setup canvas e context esistente ...
|
||||
const canvas = document.getElementById('gridCanvas');
|
||||
const ctx = canvas.getContext('2d');
|
||||
const epEl = document.getElementById('episode');
|
||||
const epsEl = document.getElementById('epsilon');
|
||||
const foodEl = document.getElementById('food-coords');
|
||||
|
||||
const epEl = document.getElementById('episode');
|
||||
const epsEl = document.getElementById('epsilon');
|
||||
const foodEl = document.getElementById('food-coords');
|
||||
function drawWorld(data) {
|
||||
// ... (setup grid size e clear rect come prima) ...
|
||||
gridSize = data.grid_size;
|
||||
const margin = 40;
|
||||
const availableW = document.getElementById('container').clientWidth - margin;
|
||||
const availableH = document.getElementById('container').clientHeight - margin;
|
||||
const canvasSize = Math.min(availableW, availableH);
|
||||
canvas.width = canvasSize;
|
||||
canvas.height = canvasSize;
|
||||
const cellSize = canvasSize / gridSize;
|
||||
|
||||
let gridSize = 100;
|
||||
ctx.fillStyle = "#1a1a1a";
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
|
||||
function drawWorld(data) {
|
||||
gridSize = data.grid_size;
|
||||
// ... (disegno griglia opzionale come prima) ...
|
||||
|
||||
const margin = 40;
|
||||
const availableW = document.getElementById('container').clientWidth - margin;
|
||||
const availableH = document.getElementById('container').clientHeight - margin;
|
||||
// Disegno CIBO
|
||||
if (data.food) {
|
||||
const fx = data.food[0] * cellSize;
|
||||
const fy = data.food[1] * cellSize;
|
||||
ctx.shadowBlur = 15;
|
||||
ctx.shadowColor = "#0f0";
|
||||
ctx.fillStyle = "#00ff00";
|
||||
ctx.beginPath();
|
||||
ctx.arc(fx + cellSize/2, fy + cellSize/2, cellSize/1.5, 0, Math.PI*2);
|
||||
ctx.fill();
|
||||
ctx.shadowBlur = 0;
|
||||
}
|
||||
|
||||
const canvasSize = Math.min(availableW, availableH);
|
||||
// NUOVO: Disegno PREDATORE
|
||||
if (data.predator) {
|
||||
const px = data.predator[0] * cellSize;
|
||||
const py = data.predator[1] * cellSize;
|
||||
|
||||
canvas.width = canvasSize;
|
||||
canvas.height = canvasSize;
|
||||
ctx.shadowBlur = 20;
|
||||
ctx.shadowColor = "#f00";
|
||||
ctx.fillStyle = "#ff0000";
|
||||
|
||||
const cellSize = canvasSize / gridSize;
|
||||
// Disegno come un quadrato "cattivo"
|
||||
ctx.fillRect(px, py, cellSize, cellSize);
|
||||
|
||||
ctx.fillStyle = "#1a1a1a";
|
||||
ctx.fillRect(0, 0, canvas.width, canvas.height);
|
||||
ctx.shadowBlur = 0;
|
||||
}
|
||||
|
||||
if (cellSize > 4) {
|
||||
ctx.strokeStyle = "#2a2a2a";
|
||||
ctx.lineWidth = 1;
|
||||
// Disegno FORMICHE
|
||||
if (data.ants) {
|
||||
data.ants.forEach(ant => {
|
||||
const ax = ant[0] * cellSize;
|
||||
const ay = ant[1] * cellSize;
|
||||
const speed = ant[2];
|
||||
|
||||
let color = "orange";
|
||||
if (speed > 1.3) color = `rgb(255, ${255 - (speed*80)}, 0)`; // Giallo/Rosso veloce
|
||||
else if (speed < 0.9) color = `rgb(${speed*100}, ${speed*100}, 255)`; // Blu lento
|
||||
|
||||
ctx.fillStyle = color;
|
||||
ctx.beginPath();
|
||||
for(let i=0; i<=gridSize; i++) {
|
||||
const pos = i * cellSize;
|
||||
ctx.moveTo(pos, 0); ctx.lineTo(pos, canvas.height);
|
||||
ctx.moveTo(0, pos); ctx.lineTo(canvas.width, pos);
|
||||
}
|
||||
ctx.stroke();
|
||||
}
|
||||
|
||||
if (data.food) {
|
||||
const fx = data.food[0] * cellSize;
|
||||
const fy = data.food[1] * cellSize;
|
||||
|
||||
ctx.shadowBlur = 15;
|
||||
ctx.shadowColor = "#0f0";
|
||||
|
||||
ctx.fillStyle = "#00ff00";
|
||||
ctx.beginPath();
|
||||
ctx.arc(fx + cellSize/2, fy + cellSize/2, cellSize/1.5, 0, Math.PI*2);
|
||||
ctx.arc(ax + cellSize/2, ay + cellSize/2, cellSize/2, 0, Math.PI*2);
|
||||
ctx.fill();
|
||||
|
||||
ctx.shadowBlur = 0;
|
||||
}
|
||||
|
||||
if (data.ants) {
|
||||
data.ants.forEach(ant => {
|
||||
const ax = ant[0] * cellSize;
|
||||
const ay = ant[1] * cellSize;
|
||||
const speed = ant[2];
|
||||
|
||||
let color = "orange";
|
||||
if (speed > 1.3) color = `rgb(255, ${255 - (speed*80)}, 0)`;
|
||||
else if (speed < 0.9) color = `rgb(${speed*100}, ${speed*100}, 255)`;
|
||||
|
||||
ctx.fillStyle = color;
|
||||
|
||||
if (cellSize < 3) {
|
||||
ctx.fillRect(ax, ay, cellSize, cellSize);
|
||||
} else {
|
||||
ctx.beginPath();
|
||||
ctx.arc(ax + cellSize/2, ay + cellSize/2, cellSize/2, 0, Math.PI*2);
|
||||
ctx.fill();
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async function update() {
|
||||
try {
|
||||
const response = await fetch('ant_state.json?t=' + Date.now());
|
||||
if (!response.ok) return;
|
||||
async function update() {
|
||||
try {
|
||||
const response = await fetch('ant_state.json?t=' + Date.now());
|
||||
if (!response.ok) return;
|
||||
const data = await response.json();
|
||||
|
||||
const data = await response.json();
|
||||
epEl.innerText = data.episode;
|
||||
epsEl.innerText = data.epsilon;
|
||||
if(data.food) foodEl.innerText = `[${data.food[0]}, ${data.food[1]}]`;
|
||||
|
||||
epEl.innerText = data.episode;
|
||||
epsEl.innerText = data.epsilon;
|
||||
if(data.food) foodEl.innerText = `[${data.food[0]}, ${data.food[1]}]`;
|
||||
drawWorld(data);
|
||||
} catch (e) { console.error(e); }
|
||||
}
|
||||
|
||||
drawWorld(data);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
setInterval(update, 50);
|
||||
|
||||
window.addEventListener('resize', () => {
|
||||
|
||||
});
|
||||
</script>
|
||||
setInterval(update, 50);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
BIN
best_brain.bin
Normal file
BIN
best_brain.bin
Normal file
Binary file not shown.
167
src/env.zig
167
src/env.zig
|
|
@ -2,21 +2,20 @@ const std = @import("std");
|
|||
const Allocator = std.mem.Allocator;
|
||||
|
||||
pub const GRID_SIZE = 100;
|
||||
pub const NUM_ANTS = 20;
|
||||
|
||||
pub const NUM_ANTS = 1;
|
||||
pub const TILE_EMPTY = 0;
|
||||
pub const TILE_WALL = 1;
|
||||
pub const TILE_FOOD = 2;
|
||||
// Aggiungiamo il tipo visuale per il predatore (non usato nella grid array, ma per logica)
|
||||
pub const TILE_PREDATOR = 3;
|
||||
|
||||
pub const Ant = struct {
|
||||
x: usize,
|
||||
y: usize,
|
||||
alive: bool,
|
||||
energy: f32,
|
||||
|
||||
gene_speed: f32,
|
||||
gene_metabolism: f32,
|
||||
|
||||
generation: usize,
|
||||
score: usize,
|
||||
};
|
||||
|
|
@ -28,6 +27,10 @@ pub const World = struct {
|
|||
food_x: usize,
|
||||
food_y: usize,
|
||||
|
||||
// NUOVO: Coordinate del predatore
|
||||
pred_x: usize,
|
||||
pred_y: usize,
|
||||
|
||||
best_ant_idx: usize,
|
||||
prng: std.Random.DefaultPrng,
|
||||
|
||||
|
|
@ -38,6 +41,8 @@ pub const World = struct {
|
|||
.ants = undefined,
|
||||
.food_x = 0,
|
||||
.food_y = 0,
|
||||
.pred_x = 0, // Init
|
||||
.pred_y = 0, // Init
|
||||
.best_ant_idx = 0,
|
||||
.prng = std.Random.DefaultPrng.init(seed),
|
||||
};
|
||||
|
|
@ -46,8 +51,8 @@ pub const World = struct {
|
|||
}
|
||||
|
||||
pub fn reset(self: *World) void {
|
||||
const random = self.prng.random();
|
||||
|
||||
//const random = self.prng.random();
|
||||
// Reset Grid (codice invariato...)
|
||||
for (0..GRID_SIZE) |y| {
|
||||
for (0..GRID_SIZE) |x| {
|
||||
self.pheromones[y][x] = 0.0;
|
||||
|
|
@ -60,6 +65,10 @@ pub const World = struct {
|
|||
}
|
||||
self.respawnFood();
|
||||
|
||||
// NUOVO: Respawna il predatore in un angolo lontano
|
||||
self.pred_x = GRID_SIZE - 5;
|
||||
self.pred_y = GRID_SIZE - 5;
|
||||
|
||||
const center = GRID_SIZE / 2;
|
||||
for (0..NUM_ANTS) |i| {
|
||||
self.ants[i] = Ant{
|
||||
|
|
@ -67,14 +76,18 @@ pub const World = struct {
|
|||
.y = center,
|
||||
.alive = true,
|
||||
.energy = 100.0,
|
||||
.gene_speed = 0.8 + random.float(f32) * 0.4,
|
||||
.gene_metabolism = 1.0,
|
||||
|
||||
// PRIMA ERA: .gene_speed = 0.8 + random.float(f32) * 0.4,
|
||||
.gene_speed = 1.0, // Velocità fissa e garantita
|
||||
.gene_metabolism = 1.0, // Consumo fisso
|
||||
|
||||
.generation = 0,
|
||||
.score = 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// respawnFood e getScent invariati...
|
||||
fn respawnFood(self: *World) void {
|
||||
const random = self.prng.random();
|
||||
while (true) {
|
||||
|
|
@ -102,41 +115,89 @@ pub const World = struct {
|
|||
return false;
|
||||
}
|
||||
|
||||
// spawnChild invariato...
|
||||
pub fn spawnChild(self: *World, parent_idx: usize) Ant {
|
||||
const parent = self.ants[parent_idx];
|
||||
const random = self.prng.random();
|
||||
|
||||
var new_speed = parent.gene_speed + (random.float(f32) * 0.2 - 0.1);
|
||||
|
||||
if (new_speed < 0.5) new_speed = 0.5;
|
||||
if (new_speed > 3.0) new_speed = 3.0;
|
||||
|
||||
const new_metabolism = new_speed * new_speed;
|
||||
// Abbiamo rimosso tutta la logica del random e le mutazioni
|
||||
|
||||
return Ant{
|
||||
.x = GRID_SIZE / 2,
|
||||
.y = GRID_SIZE / 2,
|
||||
.alive = true,
|
||||
.energy = 100.0,
|
||||
.gene_speed = new_speed,
|
||||
.gene_metabolism = new_metabolism,
|
||||
.gene_speed = 1.0, // Il figlio ha sempre velocità 1.0
|
||||
.gene_metabolism = 1.0, // Il figlio ha sempre metabolismo 1.0
|
||||
.generation = parent.generation + 1,
|
||||
.score = 0,
|
||||
};
|
||||
}
|
||||
|
||||
// updateBestAnt invariato...
|
||||
pub fn updateBestAnt(self: *World) void {
|
||||
var max_score: usize = 0;
|
||||
var best_idx: usize = 0;
|
||||
|
||||
for (self.ants, 0..) |ant, i| {
|
||||
if (ant.alive and ant.score > max_score) {
|
||||
max_score = ant.score;
|
||||
best_idx = i;
|
||||
}
|
||||
}
|
||||
if (max_score > 0) {
|
||||
self.best_ant_idx = best_idx;
|
||||
if (max_score > 0) self.best_ant_idx = best_idx;
|
||||
}
|
||||
|
||||
// NUOVO: Logica movimento predatore
|
||||
pub fn stepPredator(self: *World) void {
|
||||
const random = self.prng.random();
|
||||
|
||||
// Il predatore si muove verso la formica più vicina con una certa probabilità,
|
||||
// altrimenti si muove a caso (per non renderlo impossibile da battere).
|
||||
|
||||
var target_x = self.pred_x;
|
||||
var target_y = self.pred_y;
|
||||
|
||||
// Trova formica più vicina
|
||||
var min_dist: usize = 9999;
|
||||
var has_target = false;
|
||||
|
||||
for (self.ants) |ant| {
|
||||
if (!ant.alive) continue;
|
||||
// Distanza Manhattan
|
||||
const dx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x;
|
||||
const dy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y;
|
||||
const dist = dx + dy;
|
||||
|
||||
if (dist < min_dist and dist < 200) { // Insegue solo se entro 20 caselle
|
||||
min_dist = dist;
|
||||
target_x = ant.x;
|
||||
target_y = ant.y;
|
||||
has_target = true;
|
||||
}
|
||||
}
|
||||
|
||||
var move_x: isize = 0;
|
||||
var move_y: isize = 0;
|
||||
|
||||
// 60% insegue, 40% random
|
||||
if (has_target and random.float(f32) < 0.6) {
|
||||
if (target_x > self.pred_x) move_x = 1 else if (target_x < self.pred_x) move_x = -1;
|
||||
if (target_y > self.pred_y) move_y = 1 else if (target_y < self.pred_y) move_y = -1;
|
||||
} else {
|
||||
// Random walk
|
||||
const r = random.intRangeAtMost(u8, 0, 3);
|
||||
if (r == 0) move_y = -1;
|
||||
if (r == 1) move_y = 1;
|
||||
if (r == 2) move_x = -1;
|
||||
if (r == 3) move_x = 1;
|
||||
}
|
||||
|
||||
const new_px = @as(usize, @intCast(@as(isize, @intCast(self.pred_x)) + move_x));
|
||||
const new_py = @as(usize, @intCast(@as(isize, @intCast(self.pred_y)) + move_y));
|
||||
|
||||
// Controllo muri
|
||||
if (self.grid[new_py][new_px] != TILE_WALL) {
|
||||
self.pred_x = new_px;
|
||||
self.pred_y = new_py;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -144,11 +205,16 @@ pub const World = struct {
|
|||
var ant = &self.ants[ant_idx];
|
||||
if (!ant.alive) return .{ 0.0, true };
|
||||
|
||||
const random = self.prng.random();
|
||||
// NUOVO: Controllo morte immediata da predatore (inizio turno)
|
||||
if (ant.x == self.pred_x and ant.y == self.pred_y) {
|
||||
self.ants[ant_idx] = self.spawnChild(self.best_ant_idx); // Respawn
|
||||
return .{ -500.0, true }; // Morte dolorosa
|
||||
}
|
||||
|
||||
const random = self.prng.random();
|
||||
// ... (Logica energia/movimento invariata fino al loop) ...
|
||||
var moves_to_make: usize = 0;
|
||||
var chance = ant.gene_speed;
|
||||
|
||||
while (chance > 0) {
|
||||
if (chance >= 1.0) {
|
||||
moves_to_make += 1;
|
||||
|
|
@ -158,7 +224,6 @@ pub const World = struct {
|
|||
chance = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (moves_to_make == 0) {
|
||||
ant.energy -= 0.1;
|
||||
if (ant.energy <= 0) return .{ -10.0, true };
|
||||
|
|
@ -169,7 +234,6 @@ pub const World = struct {
|
|||
|
||||
for (0..moves_to_make) |_| {
|
||||
if (ant.energy <= 0) break;
|
||||
|
||||
ant.energy -= (0.2 * ant.gene_metabolism);
|
||||
|
||||
const old_dist_x = if (ant.x > self.food_x) ant.x - self.food_x else self.food_x - ant.x;
|
||||
|
|
@ -191,21 +255,37 @@ pub const World = struct {
|
|||
|
||||
ant.x = new_x;
|
||||
ant.y = new_y;
|
||||
|
||||
// NUOVO: Controllo collisione predatore DOPO il movimento
|
||||
if (ant.x == self.pred_x and ant.y == self.pred_y) {
|
||||
self.ants[ant_idx] = self.spawnChild(self.best_ant_idx);
|
||||
return .{ -500.0, true }; // Morte
|
||||
}
|
||||
|
||||
const dist_pred_x = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x;
|
||||
const dist_pred_y = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y;
|
||||
const dist_pred = dist_pred_x + dist_pred_y;
|
||||
|
||||
if (dist_pred == 1) {
|
||||
total_reward -= 5.0; // Troppo vicina! Rischio altissimo.
|
||||
} else if (dist_pred == 2) {
|
||||
total_reward -= 2.0; // Zona di allerta.
|
||||
}
|
||||
|
||||
self.pheromones[new_y][new_x] = 1.0;
|
||||
|
||||
const new_dist = if (new_x > self.food_x) new_x - self.food_x else self.food_x - new_x;
|
||||
const total_new_dist = new_dist + (if (new_y > self.food_y) new_y - self.food_y else self.food_y - new_y);
|
||||
|
||||
const scent = self.getScent(new_x, new_y);
|
||||
|
||||
if (total_new_dist < old_dist) total_reward += 1.5 + scent;
|
||||
if (total_new_dist > old_dist) total_reward -= 1.0;
|
||||
if (total_new_dist > old_dist) total_reward -= 0.1;
|
||||
|
||||
if (new_x == self.food_x and new_y == self.food_y) {
|
||||
self.respawnFood();
|
||||
ant.energy = 100.0;
|
||||
ant.score += 1;
|
||||
total_reward += 50.0;
|
||||
|
||||
self.updateBestAnt();
|
||||
}
|
||||
}
|
||||
|
|
@ -218,6 +298,7 @@ pub const World = struct {
|
|||
return .{ total_reward, false };
|
||||
}
|
||||
|
||||
// evaporatePheromones invariato...
|
||||
pub fn evaporatePheromones(self: *World) void {
|
||||
for (0..GRID_SIZE) |y| {
|
||||
for (0..GRID_SIZE) |x| {
|
||||
|
|
@ -226,14 +307,17 @@ pub const World = struct {
|
|||
}
|
||||
}
|
||||
|
||||
// Modificato per includere il predatore
|
||||
pub fn getAntObservation(self: *World, allocator: Allocator, ant_idx: usize) ![]f32 {
|
||||
var obs = try allocator.alloc(f32, 15);
|
||||
// Aumentiamo la dimensione a 20 floats
|
||||
var obs = try allocator.alloc(f32, 20);
|
||||
const ant = self.ants[ant_idx];
|
||||
var idx: usize = 0;
|
||||
|
||||
const ax = @as(i32, @intCast(ant.x));
|
||||
const ay = @as(i32, @intCast(ant.y));
|
||||
|
||||
// 3x3 Grid locale (9 inputs)
|
||||
var dy: i32 = -1;
|
||||
while (dy <= 1) : (dy += 1) {
|
||||
var dx: i32 = -1;
|
||||
|
|
@ -241,10 +325,13 @@ pub const World = struct {
|
|||
const py = @as(usize, @intCast(ay + dy));
|
||||
const px = @as(usize, @intCast(ax + dx));
|
||||
var val: f32 = 0.0;
|
||||
|
||||
if (self.grid[py][px] == TILE_WALL) {
|
||||
val = -1.0;
|
||||
} else if (px == self.food_x and py == self.food_y) {
|
||||
val = 1.0;
|
||||
} else if (px == self.pred_x and py == self.pred_y) {
|
||||
val = -5.0; // PREDATORE VICINO! PAURA!
|
||||
} else if (self.isOccupied(px, py) and (dx != 0 or dy != 0)) {
|
||||
val = -0.5;
|
||||
}
|
||||
|
|
@ -252,12 +339,28 @@ pub const World = struct {
|
|||
idx += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Input originali (9..14)
|
||||
obs[9] = self.getScent(ant.x, ant.y);
|
||||
obs[10] = self.pheromones[ant.y][ant.x];
|
||||
obs[11] = if (self.food_y < ant.y) 1.0 else 0.0;
|
||||
obs[12] = if (self.food_y > ant.y) 1.0 else 0.0;
|
||||
obs[13] = if (self.food_x < ant.x) 1.0 else 0.0;
|
||||
obs[14] = if (self.food_x > ant.x) 1.0 else 0.0;
|
||||
obs[11] = if (self.food_y < ant.y) 1.0 else 0.0; // Cibo Su
|
||||
obs[12] = if (self.food_y > ant.y) 1.0 else 0.0; // Cibo Giù
|
||||
obs[13] = if (self.food_x < ant.x) 1.0 else 0.0; // Cibo SX
|
||||
obs[14] = if (self.food_x > ant.x) 1.0 else 0.0; // Cibo DX
|
||||
|
||||
// NUOVI INPUT: Sensori Predatore (15..19)
|
||||
// 15: Prossimità predatore (1.0 = vicinissimo, 0.0 = lontano)
|
||||
const pdx = if (ant.x > self.pred_x) ant.x - self.pred_x else self.pred_x - ant.x;
|
||||
const pdy = if (ant.y > self.pred_y) ant.y - self.pred_y else self.pred_y - ant.y;
|
||||
const dist_pred = @as(f32, @floatFromInt(pdx + pdy));
|
||||
obs[15] = if (dist_pred == 0) 1.0 else 1.0 / dist_pred;
|
||||
|
||||
// 16-19: Direzione Predatore
|
||||
obs[16] = if (self.pred_y < ant.y) 1.0 else 0.0; // Predatore è SOPRA
|
||||
obs[17] = if (self.pred_y > ant.y) 1.0 else 0.0; // Predatore è SOTTO
|
||||
obs[18] = if (self.pred_x < ant.x) 1.0 else 0.0; // Predatore è SINISTRA
|
||||
obs[19] = if (self.pred_x > ant.x) 1.0 else 0.0; // Predatore è DESTRA
|
||||
|
||||
return obs;
|
||||
}
|
||||
};
|
||||
|
|
|
|||
234
src/main.zig
234
src/main.zig
|
|
@ -3,12 +3,41 @@ const World = @import("env.zig").World;
|
|||
const env = @import("env.zig");
|
||||
const Network = @import("modular_network.zig").Network;
|
||||
|
||||
const GAMMA: f32 = 0.9;
|
||||
const LR: f32 = 0.005;
|
||||
// --- PARAMETRI DI TRAINING ---
|
||||
const GAMMA: f32 = 0.98;
|
||||
const LR: f32 = 0.001;
|
||||
const EPSILON_START: f32 = 1.0;
|
||||
const EPSILON_END: f32 = 0.05;
|
||||
const DECAY_RATE: f32 = 0.0001;
|
||||
const SAVE_INTERVAL: usize = 500;
|
||||
|
||||
// Export JSON
|
||||
fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void {
|
||||
const file = try std.fs.cwd().createFile(file_path, .{});
|
||||
defer file.close();
|
||||
|
||||
var buffer: [65536]u8 = undefined;
|
||||
var fba = std.heap.FixedBufferAllocator.init(&buffer);
|
||||
const allocator = fba.allocator();
|
||||
|
||||
var ants_json = std.ArrayList(u8){};
|
||||
defer ants_json.deinit(allocator);
|
||||
try ants_json.appendSlice(allocator, "[");
|
||||
|
||||
var first = true;
|
||||
for (world.ants) |ant| {
|
||||
if (!ant.alive) continue; // Esporta solo le formiche vive
|
||||
if (!first) try ants_json.appendSlice(allocator, ",");
|
||||
try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed });
|
||||
first = false;
|
||||
}
|
||||
try ants_json.appendSlice(allocator, "]");
|
||||
|
||||
const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"predator\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, world.pred_x, world.pred_y, ants_json.items, episode, avg_epsilon });
|
||||
try file.writeAll(json);
|
||||
}
|
||||
|
||||
// Funzioni helper
|
||||
fn maxVal(slice: []const f32) f32 {
|
||||
var m: f32 = -1.0e20;
|
||||
for (slice) |v| if (v > m) {
|
||||
|
|
@ -29,97 +58,166 @@ fn argmax(slice: []const f32) usize {
|
|||
return idx;
|
||||
}
|
||||
|
||||
fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, avg_epsilon: f32) !void {
|
||||
const file = try std.fs.cwd().createFile(file_path, .{});
|
||||
defer file.close();
|
||||
|
||||
var buffer: [65536]u8 = undefined;
|
||||
var fba = std.heap.FixedBufferAllocator.init(&buffer);
|
||||
const allocator = fba.allocator();
|
||||
|
||||
var ants_json = std.ArrayList(u8){};
|
||||
defer ants_json.deinit(allocator);
|
||||
try ants_json.appendSlice(allocator, "[");
|
||||
|
||||
for (world.ants, 0..) |ant, i| {
|
||||
if (i > 0) try ants_json.appendSlice(allocator, ",");
|
||||
try std.fmt.format(ants_json.writer(allocator), "[{d},{d},{d:.2}]", .{ ant.x, ant.y, ant.gene_speed });
|
||||
}
|
||||
try ants_json.appendSlice(allocator, "]");
|
||||
|
||||
const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"food\": [{d}, {d}],\n \"ants\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.food_x, world.food_y, ants_json.items, episode, avg_epsilon });
|
||||
try file.writeAll(json);
|
||||
}
|
||||
|
||||
pub fn main() !void {
|
||||
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
|
||||
const allocator = gpa.allocator();
|
||||
defer _ = gpa.deinit();
|
||||
|
||||
var world = World.init(12345);
|
||||
var net = Network.init(allocator);
|
||||
defer net.deinit();
|
||||
// ==========================================
|
||||
// 🎛️ PANNELLO DI CONTROLLO PRINCIPALE
|
||||
// ==========================================
|
||||
const INFERENCE_MODE = true; // TRUE = Duello 1vs1. FALSE = Training massivo.
|
||||
const LOAD_EXISTING = false; // (Valido solo se INFERENCE_MODE=false)
|
||||
// ==========================================
|
||||
|
||||
try net.addLayer(15, 40, 111, true);
|
||||
try net.addLayer(40, 4, 222, false);
|
||||
var world = World.init(12345);
|
||||
var net: Network = undefined;
|
||||
|
||||
if (INFERENCE_MODE) {
|
||||
std.debug.print(">> CARICAMENTO CERVELLO PER ARENA 1vs1...\n", .{});
|
||||
net = Network.load(allocator, "best_brain.bin") catch |err| {
|
||||
std.debug.print("ERRORE: Non trovo 'best_brain.bin'. Fai prima il training!\nErrore: {}\n", .{err});
|
||||
return;
|
||||
};
|
||||
} else if (LOAD_EXISTING) {
|
||||
std.debug.print(">> RIPRESA TRAINING: Caricamento 'best_brain.bin'...\n", .{});
|
||||
net = try Network.load(allocator, "best_brain.bin");
|
||||
} else {
|
||||
std.debug.print(">> NUOVO TRAINING: Creazione rete neurale...\n", .{});
|
||||
net = Network.init(allocator);
|
||||
try net.addLayer(20, 45, 111, true);
|
||||
try net.addLayer(45, 4, 222, false);
|
||||
}
|
||||
defer net.deinit();
|
||||
|
||||
var prng = std.Random.DefaultPrng.init(999);
|
||||
const random = prng.random();
|
||||
|
||||
std.debug.print("--- HIVE MIND TRAINING START ---\n", .{});
|
||||
std.debug.print("Mappa: {d}x{d} | Formiche: {d}\n", .{ env.GRID_SIZE, env.GRID_SIZE, env.NUM_ANTS });
|
||||
|
||||
var global_step: usize = 0;
|
||||
var epsilon: f32 = EPSILON_START;
|
||||
|
||||
while (true) {
|
||||
world.evaporatePheromones();
|
||||
// ==========================================
|
||||
// MODALITÀ: INFERENZA (DUELLO 1 VS 1)
|
||||
// ==========================================
|
||||
if (INFERENCE_MODE) {
|
||||
std.debug.print("--- ARENA 1 VS 1: INIZIO DUELLO ---\n", .{});
|
||||
|
||||
for (0..env.NUM_ANTS) |i| {
|
||||
const current_obs = try world.getAntObservation(allocator, i);
|
||||
const margin = 5;
|
||||
world.ants[0].energy = 500.0;
|
||||
world.ants[0].x = margin;
|
||||
world.ants[0].y = margin;
|
||||
world.pred_x = env.GRID_SIZE - margin;
|
||||
world.pred_y = env.GRID_SIZE - margin;
|
||||
|
||||
var score: usize = 0;
|
||||
|
||||
while (world.ants[0].alive) {
|
||||
|
||||
// FIX 1: Stessa velocità del training!
|
||||
if (global_step % 2 == 0) world.stepPredator();
|
||||
|
||||
const current_obs = try world.getAntObservation(allocator, 0);
|
||||
defer allocator.free(current_obs);
|
||||
|
||||
var action: usize = 0;
|
||||
const q_values = net.forward(current_obs);
|
||||
var action: usize = 0;
|
||||
|
||||
if (random.float(f32) < epsilon) {
|
||||
// FIX 3: Lasciamo un 2% di "istinto" (epsilon) per non farla incantare sui muri
|
||||
if (random.float(f32) < 0.02) {
|
||||
action = random.intRangeAtMost(usize, 0, 3);
|
||||
} else {
|
||||
action = argmax(q_values);
|
||||
}
|
||||
|
||||
const result = world.stepAnt(i, action);
|
||||
const result = world.stepAnt(0, action);
|
||||
const reward = result[0];
|
||||
|
||||
var target_val = reward;
|
||||
|
||||
const next_obs = try world.getAntObservation(allocator, i);
|
||||
defer allocator.free(next_obs);
|
||||
const next_q_values = net.forward(next_obs);
|
||||
target_val += GAMMA * maxVal(next_q_values);
|
||||
|
||||
var target_vector = try allocator.alloc(f32, 4);
|
||||
defer allocator.free(target_vector);
|
||||
for (0..4) |j| target_vector[j] = q_values[j];
|
||||
target_vector[action] = target_val;
|
||||
|
||||
_ = try net.train(current_obs, target_vector, LR);
|
||||
}
|
||||
|
||||
global_step += 1;
|
||||
|
||||
if (epsilon > EPSILON_END) {
|
||||
epsilon -= DECAY_RATE;
|
||||
}
|
||||
|
||||
if (global_step % 10 == 0) {
|
||||
try exportAntJSON(&world, "ant_state.json", global_step, epsilon);
|
||||
|
||||
if (global_step % 100 == 0) {
|
||||
std.debug.print("Step: {d} | Epsilon: {d:.3} | Cibo: [{d},{d}]\r", .{ global_step, epsilon, world.food_x, world.food_y });
|
||||
if (reward > 10.0) {
|
||||
score += 1;
|
||||
std.debug.print("SCORE: {d} | Cibo preso!\n", .{score});
|
||||
world.ants[0].energy = 500.0;
|
||||
}
|
||||
|
||||
std.Thread.sleep(100 * 1_000_000);
|
||||
try exportAntJSON(&world, "ant_state.json", global_step, 0.02);
|
||||
global_step += 1;
|
||||
|
||||
std.Thread.sleep(50 * 1_000_000);
|
||||
|
||||
if (!world.ants[0].alive) {
|
||||
std.debug.print("\n>>> GAME OVER! La formica e' durata {d} step e ha mangiato {d} cibi. <<<\n", .{ global_step, score });
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
// ==========================================
|
||||
// MODALITÀ: TRAINING (SCIAME)
|
||||
// ==========================================
|
||||
else {
|
||||
std.debug.print("--- HIVE MIND: INIZIO TRAINING ---\n", .{});
|
||||
var epsilon: f32 = EPSILON_START;
|
||||
var record_score: usize = 0;
|
||||
|
||||
while (true) {
|
||||
world.evaporatePheromones();
|
||||
|
||||
if (global_step % 2 == 0) world.stepPredator();
|
||||
|
||||
for (0..env.NUM_ANTS) |i| {
|
||||
if (!world.ants[i].alive) continue;
|
||||
|
||||
const current_obs = try world.getAntObservation(allocator, i);
|
||||
defer allocator.free(current_obs);
|
||||
|
||||
var action: usize = 0;
|
||||
const q_values = net.forward(current_obs);
|
||||
|
||||
if (random.float(f32) < epsilon) {
|
||||
action = random.intRangeAtMost(usize, 0, 3);
|
||||
} else {
|
||||
action = argmax(q_values);
|
||||
}
|
||||
|
||||
const result = world.stepAnt(i, action);
|
||||
const reward = result[0];
|
||||
const died = result[1] and reward <= -50.0;
|
||||
|
||||
var target_val = reward;
|
||||
if (!died) {
|
||||
const next_obs = try world.getAntObservation(allocator, i);
|
||||
defer allocator.free(next_obs);
|
||||
const next_q_values = net.forward(next_obs);
|
||||
target_val += GAMMA * maxVal(next_q_values);
|
||||
}
|
||||
|
||||
var target_vector = try allocator.alloc(f32, 4);
|
||||
defer allocator.free(target_vector);
|
||||
for (0..4) |j| target_vector[j] = q_values[j];
|
||||
target_vector[action] = target_val;
|
||||
|
||||
_ = try net.train(current_obs, target_vector, LR);
|
||||
}
|
||||
|
||||
global_step += 1;
|
||||
if (epsilon > EPSILON_END) epsilon -= DECAY_RATE;
|
||||
|
||||
if (global_step % SAVE_INTERVAL == 0) {
|
||||
var current_total_score: usize = 0;
|
||||
for (world.ants) |ant| {
|
||||
if (ant.alive) {
|
||||
current_total_score += ant.score;
|
||||
}
|
||||
}
|
||||
if (current_total_score > record_score) {
|
||||
record_score = current_total_score;
|
||||
try net.save("best_brain.bin");
|
||||
}
|
||||
}
|
||||
|
||||
if (global_step % 10 == 0) {
|
||||
try exportAntJSON(&world, "ant_state.json", global_step, epsilon);
|
||||
if (global_step % 100 == 0) {
|
||||
std.debug.print("Step: {d} | Eps: {d:.3} | Record: {d} | Pred: [{d},{d}]\r", .{ global_step, epsilon, record_score, world.pred_x, world.pred_y });
|
||||
}
|
||||
std.Thread.sleep(20 * 1_000_000); // Veloce per il training
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue