Test per la formica che cerca il cibo

This commit is contained in:
Riccardo Forese 2026-02-03 14:05:40 +01:00
parent 56f9afd031
commit 76f80f0bd7
6 changed files with 452 additions and 102 deletions

9
ant_state.json Normal file
View file

@ -0,0 +1,9 @@
{
"grid_size": 8,
"steps": 10,
"ant": [6, 4],
"food": [3, 1],
"walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]],
"episode": 3889,
"epsilon": 0.049
}

99
ant_viewer.html Normal file
View file

@ -0,0 +1,99 @@
<!DOCTYPE html>
<html lang="it">
<head>
<meta charset="UTF-8">
<title>Ant Survival AI</title>
<style>
body { margin: 0; background-color: #222; color: #eee; font-family: monospace; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; }
#gameInfo { margin-bottom: 10px; text-align: center; }
canvas { border: 2px solid #555; box-shadow: 0 0 20px rgba(0,0,0,0.5); }
</style>
</head>
<body>
<div id="gameInfo">
<h1>Ant Survival DQN</h1>
<div>Stato: <span id="status">Connessione...</span></div>
<div>Steps: <span id="steps">0</span> | Reward: <span id="reward">0</span></div>
</div>
<canvas id="gridCanvas" width="600" height="600"></canvas>
<script>
const canvas = document.getElementById('gridCanvas');
const ctx = canvas.getContext('2d');
const statusEl = document.getElementById('status');
const stepsEl = document.getElementById('steps');
const rewardEl = document.getElementById('reward');
const TILE_SIZE = 40; // Dimensione in pixel di ogni cella
function drawWorld(data) {
const gridSize = data.grid_size;
canvas.width = gridSize * TILE_SIZE;
canvas.height = gridSize * TILE_SIZE;
// Disegna Sfondo
ctx.fillStyle = "#333";
ctx.fillRect(0, 0, canvas.width, canvas.height);
// Disegna Griglia
ctx.strokeStyle = "#444";
for(let i=0; i<=gridSize; i++) {
ctx.beginPath();
ctx.moveTo(i*TILE_SIZE, 0); ctx.lineTo(i*TILE_SIZE, canvas.height);
ctx.stroke();
ctx.beginPath();
ctx.moveTo(0, i*TILE_SIZE); ctx.lineTo(canvas.width, i*TILE_SIZE);
ctx.stroke();
}
// Disegna Oggetti
// data.ant = [x, y], data.food = [x, y], data.walls = [[x,y], ...]
// CIBO (Mela Verde)
ctx.fillStyle = "#4caf50";
ctx.beginPath();
ctx.arc(data.food[0] * TILE_SIZE + TILE_SIZE/2, data.food[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2);
ctx.fill();
// FORMICA (Pallino Rosso o Sprite)
ctx.fillStyle = "#ff5722";
ctx.beginPath();
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2, data.ant[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2);
ctx.fill();
// Occhi della formica
ctx.fillStyle = "#fff";
ctx.beginPath();
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 - 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 + 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
ctx.fill();
// MURI (Blocchi Grigi)
ctx.fillStyle = "#777";
if (data.walls) {
data.walls.forEach(w => {
ctx.fillRect(w[0] * TILE_SIZE, w[1] * TILE_SIZE, TILE_SIZE, TILE_SIZE);
});
}
}
async function update() {
try {
const response = await fetch('ant_state.json?t=' + new Date().getTime());
if (!response.ok) throw new Error("File missing");
const data = await response.json();
statusEl.innerText = "Running";
stepsEl.innerText = data.steps;
drawWorld(data);
} catch (e) {
// console.log(e);
}
}
setInterval(update, 100); // 10 FPS
</script>
</body>
</html>

178
src/env.zig Normal file
View file

@ -0,0 +1,178 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
pub const GRID_SIZE = 8;
pub const TILE_EMPTY = 0;
pub const TILE_WALL = 1;
pub const TILE_FOOD = 2;
pub const TILE_ANT = 3;
pub const ACTION_UP = 0;
pub const ACTION_DOWN = 1;
pub const ACTION_LEFT = 2;
pub const ACTION_RIGHT = 3;
pub const Ant = struct { x: usize, y: usize, alive: bool };
pub const World = struct {
grid: [GRID_SIZE][GRID_SIZE]u8,
visited: [GRID_SIZE][GRID_SIZE]bool,
ant_x: usize,
ant_y: usize,
food_x: usize,
food_y: usize,
steps: usize,
max_steps: usize,
prng: std.Random.DefaultPrng,
pub fn init(seed: u64) World {
var w = World{
.grid = undefined,
.visited = undefined,
.ant_x = 0,
.ant_y = 0,
.food_x = 0,
.food_y = 0,
.steps = 0,
.max_steps = 100,
.prng = std.Random.DefaultPrng.init(seed),
};
w.reset();
return w;
}
pub fn reset(self: *World) void {
const random = self.prng.random();
for (0..GRID_SIZE) |y| {
for (0..GRID_SIZE) |x| {
self.visited[y][x] = false;
if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) {
self.grid[y][x] = TILE_WALL;
} else {
self.grid[y][x] = TILE_EMPTY;
}
}
}
self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
self.visited[self.ant_y][self.ant_x] = true;
while (true) {
self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
if (self.food_x != self.ant_x or self.food_y != self.ant_y) break;
}
self.steps = 0;
self.updateGrid();
}
fn updateGrid(self: *World) void {
for (1..GRID_SIZE - 1) |y| {
for (1..GRID_SIZE - 1) |x| {
self.grid[y][x] = TILE_EMPTY;
}
}
self.grid[self.food_y][self.food_x] = TILE_FOOD;
self.grid[self.ant_y][self.ant_x] = TILE_ANT;
}
pub fn step(self: *World, action: usize) struct { f32, bool } {
self.steps += 1;
var new_x = self.ant_x;
var new_y = self.ant_y;
if (action == ACTION_UP) new_y -= 1;
if (action == ACTION_DOWN) new_y += 1;
if (action == ACTION_LEFT) new_x -= 1;
if (action == ACTION_RIGHT) new_x += 1;
const tile = self.grid[new_y][new_x];
if (tile == TILE_WALL) {
return .{ -10.0, false };
}
var move_reward: f32 = -0.1;
if (self.visited[new_y][new_x]) {
move_reward -= 0.5;
} else {
move_reward += 0.2;
}
self.ant_x = new_x;
self.ant_y = new_y;
self.visited[new_y][new_x] = true;
self.updateGrid();
if (new_x == self.food_x and new_y == self.food_y) {
return .{ 100.0, true };
}
if (self.steps >= self.max_steps) {
return .{ -10.0, true };
}
return .{ move_reward, false };
}
pub fn getObservation(self: *World, allocator: Allocator) ![]f32 {
var obs = try allocator.alloc(f32, 10);
var idx: usize = 0;
const ax = @as(i32, @intCast(self.ant_x));
const ay = @as(i32, @intCast(self.ant_y));
var dy: i32 = -1;
while (dy <= 1) : (dy += 1) {
var dx: i32 = -1;
while (dx <= 1) : (dx += 1) {
const py_i = ay + dy;
const px_i = ax + dx;
var val: f32 = 0.0;
if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) {
const py = @as(usize, @intCast(py_i));
const px = @as(usize, @intCast(px_i));
const content = self.grid[py][px];
if (content == TILE_WALL) {
val = -1.0;
} else if (content == TILE_FOOD) {
val = 1.0;
} else if (self.visited[py][px]) {
val = -0.5;
} else {
val = 0.0;
}
} else {
val = -1.0;
}
obs[idx] = val;
idx += 1;
}
}
obs[9] = self.getScent(self.ant_x, self.ant_y);
return obs;
}
fn getScent(self: *World, x: usize, y: usize) f32 {
const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
const dist = dx + dy;
if (dist == 0) return 1.0;
return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0);
}
};

View file

@ -1,7 +1,6 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
// Definiamo la larghezza del vettore SIMD (8 float alla volta = 256 bit)
const SimdWidth = 8;
const Vec = @Vector(SimdWidth, f32);
@ -11,6 +10,7 @@ pub const DenseLayer = struct {
output: []f32,
inputs_count: usize,
neurons_count: usize,
use_sigmoid: bool, // NUOVO CAMPO
allocator: Allocator,
fn sigmoid(x: f32) f32 {
@ -21,19 +21,17 @@ pub const DenseLayer = struct {
return x * (1.0 - x);
}
pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64) !DenseLayer {
// Aggiunto parametro 'use_sigmoid'
pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64, use_sigmoid: bool) !DenseLayer {
const weights = try allocator.alloc(f32, inputs * neurons);
const biases = try allocator.alloc(f32, neurons);
const output = try allocator.alloc(f32, neurons);
// --- CORREZIONE QUI SOTTO ---
// Prima era: std.rand.DefaultPrng
// Ora è: std.Random.DefaultPrng
var prng = std.Random.DefaultPrng.init(seed);
const random = prng.random();
// Inizializzazione Xavier/Glorot
for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.5;
// Pesi più piccoli per stabilità iniziale
for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.1;
for (biases) |*b| b.* = 0.0;
return DenseLayer{
@ -42,6 +40,7 @@ pub const DenseLayer = struct {
.output = output,
.inputs_count = inputs,
.neurons_count = neurons,
.use_sigmoid = use_sigmoid,
.allocator = allocator,
};
}
@ -52,46 +51,55 @@ pub const DenseLayer = struct {
self.allocator.free(self.output);
}
// --- FORWARD PASS CON SIMD ---
pub fn forward(self: *DenseLayer, input: []const f32) []const f32 {
for (0..self.neurons_count) |n| {
var sum: f32 = self.biases[n];
const w_start = n * self.inputs_count;
// 1. Processiamo a blocchi di 8 (SIMD)
// SIMD
var vec_sum: Vec = @splat(0.0);
var i: usize = 0;
while (i + SimdWidth <= self.inputs_count) : (i += SimdWidth) {
const v_in: Vec = input[i..][0..SimdWidth].*;
const v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
vec_sum += v_in * v_w;
}
sum += @reduce(.Add, vec_sum);
// 2. Tail Loop
// Tail Loop
while (i < self.inputs_count) : (i += 1) {
sum += input[i] * self.weights[w_start + i];
}
self.output[n] = sigmoid(sum);
// CORREZIONE: Se non usiamo sigmoide, è lineare (passa 'sum' diretto)
if (self.use_sigmoid) {
self.output[n] = sigmoid(sum);
} else {
self.output[n] = sum;
}
}
return self.output;
}
// --- BACKWARD PASS CON SIMD ---
pub fn backward(self: *DenseLayer, output_gradient: []const f32, input_vals: []const f32, learning_rate: f32) []f32 {
const input_gradient = self.allocator.alloc(f32, self.inputs_count) catch @panic("OOM");
@memset(input_gradient, 0.0);
for (0..self.neurons_count) |n| {
const delta = output_gradient[n] * sigmoidDerivative(self.output[n]);
// CORREZIONE DERIVATA:
// Se Sigmoide: f'(x) = out * (1 - out)
// Se Lineare: f'(x) = 1.0
var derivative: f32 = 1.0;
if (self.use_sigmoid) {
derivative = sigmoidDerivative(self.output[n]);
}
const delta = output_gradient[n] * derivative;
const w_start = n * self.inputs_count;
self.biases[n] -= learning_rate * delta;
// SIMD LOOP
// SIMD LOOP (Backprop)
const v_delta: Vec = @splat(delta);
const v_lr: Vec = @splat(learning_rate);
const v_change_factor = v_delta * v_lr;
@ -101,26 +109,21 @@ pub const DenseLayer = struct {
var v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
const v_in: Vec = input_vals[i..][0..SimdWidth].*;
// Backprop error
var v_in_grad: Vec = input_gradient[i..][0..SimdWidth].*;
v_in_grad += v_w * v_delta;
input_gradient[i..][0..SimdWidth].* = v_in_grad;
// Update weights
v_w -= v_in * v_change_factor;
self.weights[w_start + i ..][0..SimdWidth].* = v_w;
}
// TAIL LOOP
while (i < self.inputs_count) : (i += 1) {
const w_idx = w_start + i;
const old_weight = self.weights[w_idx];
input_gradient[i] += old_weight * delta;
self.weights[w_idx] -= input_vals[i] * delta * learning_rate;
}
}
return input_gradient;
}
};

View file

@ -1,100 +1,159 @@
const std = @import("std");
const World = @import("env.zig").World;
const env = @import("env.zig");
const Network = @import("modular_network.zig").Network;
const MnistData = @import("mnist.zig").MnistData;
// --- PARAMETRI AI ---
const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante)
const LR: f32 = 0.005; // Learning Rate
const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100%
const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5%
const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa
fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
const file = try std.fs.cwd().createFile(file_path, .{});
defer file.close();
var buffer: [8192]u8 = undefined;
var fba = std.heap.FixedBufferAllocator.init(&buffer);
const allocator = fba.allocator();
var walls_json = std.ArrayList(u8){};
defer walls_json.deinit(allocator);
try walls_json.appendSlice(allocator, "[");
var first = true;
for (0..env.GRID_SIZE) |y| {
for (0..env.GRID_SIZE) |x| {
if (world.grid[y][x] == env.TILE_WALL) {
if (!first) try walls_json.appendSlice(allocator, ",");
try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y });
first = false;
}
}
}
try walls_json.appendSlice(allocator, "]");
// Aggiungiamo info extra per debug (Epsilon ed Episode)
const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon });
try file.writeAll(json);
}
// Funzione helper per trovare il valore massimo in un array
fn maxVal(slice: []const f32) f32 {
var m: f32 = -10000.0;
for (slice) |v| if (v > m) {
m = v;
};
return m;
}
fn argmax(slice: []const f32) usize {
var m: f32 = -10000.0;
var idx: usize = 0;
for (slice, 0..) |v, i| {
if (v > m) {
m = v;
idx = i;
}
}
return idx;
}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
std.debug.print("--- CARICAMENTO MNIST ---\n", .{});
var dataset = try MnistData.init(allocator, "data/train-images-idx3-ubyte", "data/train-labels-idx1-ubyte", 5000);
defer dataset.deinit();
const save_path = "brain.bin";
var net: Network = undefined;
var needs_training = true;
// Load or Init
if (Network.load(allocator, save_path)) |loaded_net| {
std.debug.print(">>> SALVATAGGIO TROVATO! Skip training.\n", .{});
net = loaded_net;
needs_training = false;
} else |_| {
std.debug.print(">>> NUOVA RETE.\n", .{});
net = Network.init(allocator);
try net.addLayer(784, 64, 111);
try net.addLayer(64, 32, 222);
try net.addLayer(32, 10, 333);
}
// 1. Inizializza Ambiente e Rete
var world = World.init(12345);
var net = Network.init(allocator);
defer net.deinit();
// --- TRAINING ---
if (needs_training) {
std.debug.print("--- INIZIO TRAINING (SIMD ACCELERATED) ---\n", .{});
const lr: f32 = 0.1;
const epochs = 10; // Bastano meno epoche ora
// Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx)
try net.addLayer(9, 24, 111, true);
try net.addLayer(24, 4, 222, false);
var epoch: usize = 0;
while (epoch < epochs) : (epoch += 1) {
var total_loss: f32 = 0.0;
var correct: usize = 0;
for (dataset.images, 0..) |img, i| {
total_loss += try net.train(img, dataset.labels[i], lr);
const out = net.forward(img);
if (argmax(out) == argmax(dataset.labels[i])) correct += 1;
}
const accuracy = @as(f32, @floatFromInt(correct)) / @as(f32, @floatFromInt(dataset.images.len)) * 100.0;
std.debug.print("Epoca {d}: Acc: {d:.2}%\n", .{ epoch, accuracy });
// Durante il training passiamo 'null' come immagine per non rallentare troppo
try net.exportJSON("network_state.json", epoch, total_loss / 5000.0, null);
}
try net.save(save_path);
}
// --- SHOWCASE MODE (Il Gran Finale) ---
std.debug.print("\n--- AVVIO DEMO VISUALE ---\n", .{});
std.debug.print("Guarda il browser! (CTRL+C per uscire)\n", .{});
var prng = std.Random.DefaultPrng.init(0);
var prng = std.Random.DefaultPrng.init(999);
const random = prng.random();
while (true) {
// 1. Pesca un'immagine a caso
const idx = random.intRangeAtMost(usize, 0, dataset.images.len - 1);
const img = dataset.images[idx];
const label = argmax(dataset.labels[idx]);
std.debug.print("--- AI TRAINING START ---\n", .{});
// 2. Fai la previsione
const out = net.forward(img);
const prediction = argmax(out);
var episode: usize = 0;
var epsilon: f32 = EPSILON_START;
// Calcoliamo una "Loss" finta solo per il grafico
const loss: f32 = if (prediction == label) 0.0 else 1.0;
while (true) : (episode += 1) {
world.reset();
var done = false;
// 3. Stampa su console
const result_str = if (prediction == label) "CORRETTO" else "SBAGLIATO";
std.debug.print("Input: {d} | AI Dice: {d} -> {s}\r", .{ label, prediction, result_str });
while (!done) {
// A. OSSERVAZIONE
const current_obs = try world.getObservation(allocator); // Alloca memoria
defer allocator.free(current_obs); // Libera alla fine del ciclo
// 4. ESPORTA TUTTO (inclusa l'immagine) per il browser
try net.exportJSON("network_state.json", 999, loss, img);
// B. SCEGLI AZIONE (Epsilon-Greedy)
var action: usize = 0;
const q_values = net.forward(current_obs); // Forward pass
// 5. Aspetta un secondo per farci godere la scena
std.Thread.sleep(1000 * 1_000_000);
}
}
if (random.float(f32) < epsilon) {
// Esplorazione (Random)
action = random.intRangeAtMost(usize, 0, 3);
} else {
// Sfruttamento (Usa il cervello)
action = argmax(q_values);
}
fn argmax(slice: []const f32) usize {
var max_val: f32 = -1000.0;
var max_idx: usize = 0;
for (slice, 0..) |val, i| {
if (val > max_val) {
max_val = val;
max_idx = i;
// C. ESEGUI AZIONE
const result = world.step(action);
const reward = result[0];
done = result[1];
// D. ADDESTRAMENTO (Q-Learning Update)
// Target = Reward + Gamma * Max(Q_Next)
// Dobbiamo calcolare il Q-value dello stato successivo
var target_val = reward;
if (!done) {
const next_obs = try world.getObservation(allocator);
defer allocator.free(next_obs);
const next_q_values = net.forward(next_obs);
target_val += GAMMA * maxVal(next_q_values);
}
// Creiamo il vettore target per il backpropagation
// Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val
// Gli altri neuroni devono restare come sono.
var target_vector = try allocator.alloc(f32, 4);
defer allocator.free(target_vector);
for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori
target_vector[action] = target_val; // Aggiorna solo quello scelto
// Train della rete su questo singolo passo
_ = try net.train(current_obs, target_vector, LR);
// Export e Delay
try exportAntJSON(&world, "ant_state.json", episode, epsilon);
// Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena.
if (episode % 10 == 0) {
// Mostra ogni 10 episodi a velocità umana
std.Thread.sleep(50 * 1_000_000);
} else {
// Allenamento veloce (quasi istantaneo)
// Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento
// std.Thread.sleep(10 * 1_000_000);
}
}
// Fine episodio
if (epsilon > EPSILON_END) {
epsilon -= DECAY_RATE;
}
if (episode % 10 == 0) {
std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps });
}
}
return max_idx;
}

View file

@ -20,8 +20,8 @@ pub const Network = struct {
self.layers.deinit(self.allocator);
}
pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64) !void {
const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed);
pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64, use_sigmoid: bool) !void {
const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed, use_sigmoid);
try self.layers.append(self.allocator, layer);
}
@ -148,7 +148,9 @@ pub const Network = struct {
_ = try file.readAll(std.mem.asBytes(&inputs));
_ = try file.readAll(std.mem.asBytes(&neurons));
try net.addLayer(@intCast(inputs), @intCast(neurons), 0);
// SE è l'ultimo layer, niente sigmoide!
const is_last = (i == layer_count - 1);
try net.addLayer(@intCast(inputs), @intCast(neurons), 0, !is_last);
const layer = &net.layers.items[net.layers.items.len - 1];