Test per la formica che cerca il cibo

2026-02-03 14:05:40 +01:00 · 2026-02-03 14:05:40 +01:00 · 76f80f0bd7
parent 56f9afd031
commit 76f80f0bd7
6 changed files with 452 additions and 102 deletions
--- a/ant_state.json
+++ b/ant_state.json
@ -0,0 +1,9 @@
+{
+ "grid_size": 8,
+ "steps": 10,
+ "ant": [6, 4],
+ "food": [3, 1],
+ "walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]],
+ "episode": 3889,
+ "epsilon": 0.049
+}
--- a/ant_viewer.html
+++ b/ant_viewer.html
@ -0,0 +1,99 @@
+<!DOCTYPE html>
+<html lang="it">
+<head>
+    <meta charset="UTF-8">
+    <title>Ant Survival AI</title>
+    <style>
+        body { margin: 0; background-color: #222; color: #eee; font-family: monospace; display: flex; flex-direction: column; align-items: center; justify-content: center; height: 100vh; }
+        #gameInfo { margin-bottom: 10px; text-align: center; }
+        canvas { border: 2px solid #555; box-shadow: 0 0 20px rgba(0,0,0,0.5); }
+    </style>
+</head>
+<body>
+
+    <div id="gameInfo">
+        <h1>Ant Survival DQN</h1>
+        <div>Stato: <span id="status">Connessione...</span></div>
+        <div>Steps: <span id="steps">0</span> | Reward: <span id="reward">0</span></div>
+    </div>
+    
+    <canvas id="gridCanvas" width="600" height="600"></canvas>
+
+    <script>
+        const canvas = document.getElementById('gridCanvas');
+        const ctx = canvas.getContext('2d');
+        const statusEl = document.getElementById('status');
+        const stepsEl = document.getElementById('steps');
+        const rewardEl = document.getElementById('reward');
+
+        const TILE_SIZE = 40; // Dimensione in pixel di ogni cella
+        
+        function drawWorld(data) {
+            const gridSize = data.grid_size;
+            canvas.width = gridSize * TILE_SIZE;
+            canvas.height = gridSize * TILE_SIZE;
+
+            // Disegna Sfondo
+            ctx.fillStyle = "#333";
+            ctx.fillRect(0, 0, canvas.width, canvas.height);
+
+            // Disegna Griglia
+            ctx.strokeStyle = "#444";
+            for(let i=0; i<=gridSize; i++) {
+                ctx.beginPath();
+                ctx.moveTo(i*TILE_SIZE, 0); ctx.lineTo(i*TILE_SIZE, canvas.height);
+                ctx.stroke();
+                ctx.beginPath();
+                ctx.moveTo(0, i*TILE_SIZE); ctx.lineTo(canvas.width, i*TILE_SIZE);
+                ctx.stroke();
+            }
+
+            // Disegna Oggetti
+            // data.ant = [x, y], data.food = [x, y], data.walls = [[x,y], ...]
+            
+            // CIBO (Mela Verde)
+            ctx.fillStyle = "#4caf50";
+            ctx.beginPath();
+            ctx.arc(data.food[0] * TILE_SIZE + TILE_SIZE/2, data.food[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2);
+            ctx.fill();
+
+            // FORMICA (Pallino Rosso o Sprite)
+            ctx.fillStyle = "#ff5722";
+            ctx.beginPath();
+            ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2, data.ant[1] * TILE_SIZE + TILE_SIZE/2, TILE_SIZE/3, 0, Math.PI*2);
+            ctx.fill();
+            // Occhi della formica
+            ctx.fillStyle = "#fff";
+            ctx.beginPath();
+            ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 - 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
+            ctx.arc(data.ant[0] * TILE_SIZE + TILE_SIZE/2 + 5, data.ant[1] * TILE_SIZE + TILE_SIZE/2 - 5, 3, 0, Math.PI*2);
+            ctx.fill();
+
+            // MURI (Blocchi Grigi)
+            ctx.fillStyle = "#777";
+            if (data.walls) {
+                data.walls.forEach(w => {
+                    ctx.fillRect(w[0] * TILE_SIZE, w[1] * TILE_SIZE, TILE_SIZE, TILE_SIZE);
+                });
+            }
+        }
+
+        async function update() {
+            try {
+                const response = await fetch('ant_state.json?t=' + new Date().getTime());
+                if (!response.ok) throw new Error("File missing");
+                const data = await response.json();
+                
+                statusEl.innerText = "Running";
+                stepsEl.innerText = data.steps;
+                
+                drawWorld(data);
+            } catch (e) {
+                // console.log(e);
+            }
+        }
+
+        setInterval(update, 100); // 10 FPS
+    </script>
+</body>
+</html>
--- a/src/env.zig
+++ b/src/env.zig
@ -0,0 +1,178 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const GRID_SIZE = 8;
+pub const TILE_EMPTY = 0;
+pub const TILE_WALL = 1;
+pub const TILE_FOOD = 2;
+pub const TILE_ANT = 3;
+
+pub const ACTION_UP = 0;
+pub const ACTION_DOWN = 1;
+pub const ACTION_LEFT = 2;
+pub const ACTION_RIGHT = 3;
+
+pub const Ant = struct { x: usize, y: usize, alive: bool };
+
+pub const World = struct {
+    grid: [GRID_SIZE][GRID_SIZE]u8,
+    visited: [GRID_SIZE][GRID_SIZE]bool,
+    ant_x: usize,
+    ant_y: usize,
+    food_x: usize,
+    food_y: usize,
+    steps: usize,
+    max_steps: usize,
+    prng: std.Random.DefaultPrng,
+
+    pub fn init(seed: u64) World {
+        var w = World{
+            .grid = undefined,
+            .visited = undefined,
+            .ant_x = 0,
+            .ant_y = 0,
+            .food_x = 0,
+            .food_y = 0,
+            .steps = 0,
+            .max_steps = 100,
+            .prng = std.Random.DefaultPrng.init(seed),
+        };
+        w.reset();
+        return w;
+    }
+
+    pub fn reset(self: *World) void {
+        const random = self.prng.random();
+
+        for (0..GRID_SIZE) |y| {
+            for (0..GRID_SIZE) |x| {
+                self.visited[y][x] = false;
+
+                if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) {
+                    self.grid[y][x] = TILE_WALL;
+                } else {
+                    self.grid[y][x] = TILE_EMPTY;
+                }
+            }
+        }
+
+        self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+        self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+
+        self.visited[self.ant_y][self.ant_x] = true;
+
+        while (true) {
+            self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+            self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+            if (self.food_x != self.ant_x or self.food_y != self.ant_y) break;
+        }
+
+        self.steps = 0;
+        self.updateGrid();
+    }
+
+    fn updateGrid(self: *World) void {
+        for (1..GRID_SIZE - 1) |y| {
+            for (1..GRID_SIZE - 1) |x| {
+                self.grid[y][x] = TILE_EMPTY;
+            }
+        }
+        self.grid[self.food_y][self.food_x] = TILE_FOOD;
+        self.grid[self.ant_y][self.ant_x] = TILE_ANT;
+    }
+
+    pub fn step(self: *World, action: usize) struct { f32, bool } {
+        self.steps += 1;
+
+        var new_x = self.ant_x;
+        var new_y = self.ant_y;
+
+        if (action == ACTION_UP) new_y -= 1;
+        if (action == ACTION_DOWN) new_y += 1;
+        if (action == ACTION_LEFT) new_x -= 1;
+        if (action == ACTION_RIGHT) new_x += 1;
+
+        const tile = self.grid[new_y][new_x];
+
+        if (tile == TILE_WALL) {
+            return .{ -10.0, false };
+        }
+
+        var move_reward: f32 = -0.1;
+
+        if (self.visited[new_y][new_x]) {
+            move_reward -= 0.5;
+        } else {
+            move_reward += 0.2;
+        }
+
+        self.ant_x = new_x;
+        self.ant_y = new_y;
+        self.visited[new_y][new_x] = true;
+
+        self.updateGrid();
+
+        if (new_x == self.food_x and new_y == self.food_y) {
+            return .{ 100.0, true };
+        }
+
+        if (self.steps >= self.max_steps) {
+            return .{ -10.0, true };
+        }
+
+        return .{ move_reward, false };
+    }
+
+    pub fn getObservation(self: *World, allocator: Allocator) ![]f32 {
+        var obs = try allocator.alloc(f32, 10);
+        var idx: usize = 0;
+
+        const ax = @as(i32, @intCast(self.ant_x));
+        const ay = @as(i32, @intCast(self.ant_y));
+
+        var dy: i32 = -1;
+        while (dy <= 1) : (dy += 1) {
+            var dx: i32 = -1;
+            while (dx <= 1) : (dx += 1) {
+                const py_i = ay + dy;
+                const px_i = ax + dx;
+
+                var val: f32 = 0.0;
+
+                if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) {
+                    const py = @as(usize, @intCast(py_i));
+                    const px = @as(usize, @intCast(px_i));
+                    const content = self.grid[py][px];
+
+                    if (content == TILE_WALL) {
+                        val = -1.0;
+                    } else if (content == TILE_FOOD) {
+                        val = 1.0;
+                    } else if (self.visited[py][px]) {
+                        val = -0.5;
+                    } else {
+                        val = 0.0;
+                    }
+                } else {
+                    val = -1.0;
+                }
+
+                obs[idx] = val;
+                idx += 1;
+            }
+        }
+        obs[9] = self.getScent(self.ant_x, self.ant_y);
+
+        return obs;
+    }
+
+    fn getScent(self: *World, x: usize, y: usize) f32 {
+        const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
+        const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
+        const dist = dx + dy;
+
+        if (dist == 0) return 1.0;
+
+        return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0);
+    }
+};
--- a/src/layer.zig
+++ b/src/layer.zig
@ -1,7 +1,6 @@
 const std = @import("std");
 const Allocator = std.mem.Allocator;

-// Definiamo la larghezza del vettore SIMD (8 float alla volta = 256 bit)
 const SimdWidth = 8;
 const Vec = @Vector(SimdWidth, f32);

@ -11,6 +10,7 @@ pub const DenseLayer = struct {
    output: []f32,
    inputs_count: usize,
    neurons_count: usize,
+    use_sigmoid: bool, // NUOVO CAMPO
    allocator: Allocator,

    fn sigmoid(x: f32) f32 {
@ -21,19 +21,17 @@ pub const DenseLayer = struct {
        return x * (1.0 - x);
    }

-    pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64) !DenseLayer {
+    // Aggiunto parametro 'use_sigmoid'
+    pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64, use_sigmoid: bool) !DenseLayer {
        const weights = try allocator.alloc(f32, inputs * neurons);
        const biases = try allocator.alloc(f32, neurons);
        const output = try allocator.alloc(f32, neurons);

-        // --- CORREZIONE QUI SOTTO ---
-        // Prima era: std.rand.DefaultPrng
-        // Ora è: std.Random.DefaultPrng
        var prng = std.Random.DefaultPrng.init(seed);
        const random = prng.random();

-        // Inizializzazione Xavier/Glorot
-        for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.5;
+        // Pesi più piccoli per stabilità iniziale
+        for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.1;
        for (biases) |*b| b.* = 0.0;

        return DenseLayer{
@ -42,6 +40,7 @@ pub const DenseLayer = struct {
            .output = output,
            .inputs_count = inputs,
            .neurons_count = neurons,
+            .use_sigmoid = use_sigmoid,
            .allocator = allocator,
        };
    }
@ -52,46 +51,55 @@ pub const DenseLayer = struct {
        self.allocator.free(self.output);
    }

-    // --- FORWARD PASS CON SIMD ---
    pub fn forward(self: *DenseLayer, input: []const f32) []const f32 {
        for (0..self.neurons_count) |n| {
            var sum: f32 = self.biases[n];
            const w_start = n * self.inputs_count;

-            // 1. Processiamo a blocchi di 8 (SIMD)
+            // SIMD
            var vec_sum: Vec = @splat(0.0);
            var i: usize = 0;
-
            while (i + SimdWidth <= self.inputs_count) : (i += SimdWidth) {
                const v_in: Vec = input[i..][0..SimdWidth].*;
                const v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
                vec_sum += v_in * v_w;
            }
-
            sum += @reduce(.Add, vec_sum);

-            // 2. Tail Loop
+            // Tail Loop
            while (i < self.inputs_count) : (i += 1) {
                sum += input[i] * self.weights[w_start + i];
            }

-            self.output[n] = sigmoid(sum);
+            // CORREZIONE: Se non usiamo sigmoide, è lineare (passa 'sum' diretto)
+            if (self.use_sigmoid) {
+                self.output[n] = sigmoid(sum);
+            } else {
+                self.output[n] = sum;
+            }
        }
        return self.output;
    }

-    // --- BACKWARD PASS CON SIMD ---
    pub fn backward(self: *DenseLayer, output_gradient: []const f32, input_vals: []const f32, learning_rate: f32) []f32 {
        const input_gradient = self.allocator.alloc(f32, self.inputs_count) catch @panic("OOM");
        @memset(input_gradient, 0.0);

        for (0..self.neurons_count) |n| {
-            const delta = output_gradient[n] * sigmoidDerivative(self.output[n]);
+            // CORREZIONE DERIVATA:
+            // Se Sigmoide: f'(x) = out * (1 - out)
+            // Se Lineare:  f'(x) = 1.0
+            var derivative: f32 = 1.0;
+            if (self.use_sigmoid) {
+                derivative = sigmoidDerivative(self.output[n]);
+            }
+
+            const delta = output_gradient[n] * derivative;
            const w_start = n * self.inputs_count;

            self.biases[n] -= learning_rate * delta;

-            // SIMD LOOP
+            // SIMD LOOP (Backprop)
            const v_delta: Vec = @splat(delta);
            const v_lr: Vec = @splat(learning_rate);
            const v_change_factor = v_delta * v_lr;
@ -101,26 +109,21 @@ pub const DenseLayer = struct {
                var v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
                const v_in: Vec = input_vals[i..][0..SimdWidth].*;

-                // Backprop error
                var v_in_grad: Vec = input_gradient[i..][0..SimdWidth].*;
                v_in_grad += v_w * v_delta;
                input_gradient[i..][0..SimdWidth].* = v_in_grad;

-                // Update weights
                v_w -= v_in * v_change_factor;
                self.weights[w_start + i ..][0..SimdWidth].* = v_w;
            }

-            // TAIL LOOP
            while (i < self.inputs_count) : (i += 1) {
                const w_idx = w_start + i;
                const old_weight = self.weights[w_idx];
-
                input_gradient[i] += old_weight * delta;
                self.weights[w_idx] -= input_vals[i] * delta * learning_rate;
            }
        }
-
        return input_gradient;
    }
 };
--- a/src/main.zig
+++ b/src/main.zig
@ -1,100 +1,159 @@
 const std = @import("std");
+const World = @import("env.zig").World;
+const env = @import("env.zig");
 const Network = @import("modular_network.zig").Network;
-const MnistData = @import("mnist.zig").MnistData;
+
+// --- PARAMETRI AI ---
+const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante)
+const LR: f32 = 0.005; // Learning Rate
+const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100%
+const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5%
+const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa
+
+fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
+    const file = try std.fs.cwd().createFile(file_path, .{});
+    defer file.close();
+
+    var buffer: [8192]u8 = undefined;
+    var fba = std.heap.FixedBufferAllocator.init(&buffer);
+    const allocator = fba.allocator();
+
+    var walls_json = std.ArrayList(u8){};
+    defer walls_json.deinit(allocator);
+
+    try walls_json.appendSlice(allocator, "[");
+    var first = true;
+    for (0..env.GRID_SIZE) |y| {
+        for (0..env.GRID_SIZE) |x| {
+            if (world.grid[y][x] == env.TILE_WALL) {
+                if (!first) try walls_json.appendSlice(allocator, ",");
+                try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y });
+                first = false;
+            }
+        }
+    }
+    try walls_json.appendSlice(allocator, "]");
+
+    // Aggiungiamo info extra per debug (Epsilon ed Episode)
+    const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon });
+
+    try file.writeAll(json);
+}
+
+// Funzione helper per trovare il valore massimo in un array
+fn maxVal(slice: []const f32) f32 {
+    var m: f32 = -10000.0;
+    for (slice) |v| if (v > m) {
+        m = v;
+    };
+    return m;
+}
+
+fn argmax(slice: []const f32) usize {
+    var m: f32 = -10000.0;
+    var idx: usize = 0;
+    for (slice, 0..) |v, i| {
+        if (v > m) {
+            m = v;
+            idx = i;
+        }
+    }
+    return idx;
+}

 pub fn main() !void {
    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    const allocator = gpa.allocator();
    defer _ = gpa.deinit();

-    std.debug.print("--- CARICAMENTO MNIST ---\n", .{});
-    var dataset = try MnistData.init(allocator, "data/train-images-idx3-ubyte", "data/train-labels-idx1-ubyte", 5000);
-    defer dataset.deinit();
-
-    const save_path = "brain.bin";
-    var net: Network = undefined;
-    var needs_training = true;
-
-    // Load or Init
-    if (Network.load(allocator, save_path)) |loaded_net| {
-        std.debug.print(">>> SALVATAGGIO TROVATO! Skip training.\n", .{});
-        net = loaded_net;
-        needs_training = false;
-    } else |_| {
-        std.debug.print(">>> NUOVA RETE.\n", .{});
-        net = Network.init(allocator);
-        try net.addLayer(784, 64, 111);
-        try net.addLayer(64, 32, 222);
-        try net.addLayer(32, 10, 333);
-    }
+    // 1. Inizializza Ambiente e Rete
+    var world = World.init(12345);
+    var net = Network.init(allocator);
    defer net.deinit();

-    // --- TRAINING ---
-    if (needs_training) {
-        std.debug.print("--- INIZIO TRAINING (SIMD ACCELERATED) ---\n", .{});
-        const lr: f32 = 0.1;
-        const epochs = 10; // Bastano meno epoche ora
+    // Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx)
+    try net.addLayer(9, 24, 111, true);
+    try net.addLayer(24, 4, 222, false);

-        var epoch: usize = 0;
-        while (epoch < epochs) : (epoch += 1) {
-            var total_loss: f32 = 0.0;
-            var correct: usize = 0;
-
-            for (dataset.images, 0..) |img, i| {
-                total_loss += try net.train(img, dataset.labels[i], lr);
-                const out = net.forward(img);
-                if (argmax(out) == argmax(dataset.labels[i])) correct += 1;
-            }
-
-            const accuracy = @as(f32, @floatFromInt(correct)) / @as(f32, @floatFromInt(dataset.images.len)) * 100.0;
-            std.debug.print("Epoca {d}: Acc: {d:.2}%\n", .{ epoch, accuracy });
-
-            // Durante il training passiamo 'null' come immagine per non rallentare troppo
-            try net.exportJSON("network_state.json", epoch, total_loss / 5000.0, null);
-        }
-        try net.save(save_path);
-    }
-
-    // --- SHOWCASE MODE (Il Gran Finale) ---
-    std.debug.print("\n--- AVVIO DEMO VISUALE ---\n", .{});
-    std.debug.print("Guarda il browser! (CTRL+C per uscire)\n", .{});
-
-    var prng = std.Random.DefaultPrng.init(0);
+    var prng = std.Random.DefaultPrng.init(999);
    const random = prng.random();

-    while (true) {
-        // 1. Pesca un'immagine a caso
-        const idx = random.intRangeAtMost(usize, 0, dataset.images.len - 1);
-        const img = dataset.images[idx];
-        const label = argmax(dataset.labels[idx]);
+    std.debug.print("--- AI TRAINING START ---\n", .{});

-        // 2. Fai la previsione
-        const out = net.forward(img);
-        const prediction = argmax(out);
+    var episode: usize = 0;
+    var epsilon: f32 = EPSILON_START;

-        // Calcoliamo una "Loss" finta solo per il grafico
-        const loss: f32 = if (prediction == label) 0.0 else 1.0;
+    while (true) : (episode += 1) {
+        world.reset();
+        var done = false;

-        // 3. Stampa su console
-        const result_str = if (prediction == label) "CORRETTO" else "SBAGLIATO";
-        std.debug.print("Input: {d} | AI Dice: {d} -> {s}\r", .{ label, prediction, result_str });
+        while (!done) {
+            // A. OSSERVAZIONE
+            const current_obs = try world.getObservation(allocator); // Alloca memoria
+            defer allocator.free(current_obs); // Libera alla fine del ciclo

-        // 4. ESPORTA TUTTO (inclusa l'immagine) per il browser
-        try net.exportJSON("network_state.json", 999, loss, img);
+            // B. SCEGLI AZIONE (Epsilon-Greedy)
+            var action: usize = 0;
+            const q_values = net.forward(current_obs); // Forward pass

-        // 5. Aspetta un secondo per farci godere la scena
-        std.Thread.sleep(1000 * 1_000_000);
-    }
-}
+            if (random.float(f32) < epsilon) {
+                // Esplorazione (Random)
+                action = random.intRangeAtMost(usize, 0, 3);
+            } else {
+                // Sfruttamento (Usa il cervello)
+                action = argmax(q_values);
+            }

-fn argmax(slice: []const f32) usize {
-    var max_val: f32 = -1000.0;
-    var max_idx: usize = 0;
-    for (slice, 0..) |val, i| {
-        if (val > max_val) {
-            max_val = val;
-            max_idx = i;
+            // C. ESEGUI AZIONE
+            const result = world.step(action);
+            const reward = result[0];
+            done = result[1];
+
+            // D. ADDESTRAMENTO (Q-Learning Update)
+            // Target = Reward + Gamma * Max(Q_Next)
+
+            // Dobbiamo calcolare il Q-value dello stato successivo
+            var target_val = reward;
+            if (!done) {
+                const next_obs = try world.getObservation(allocator);
+                defer allocator.free(next_obs);
+                const next_q_values = net.forward(next_obs);
+                target_val += GAMMA * maxVal(next_q_values);
+            }
+
+            // Creiamo il vettore target per il backpropagation
+            // Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val
+            // Gli altri neuroni devono restare come sono.
+            var target_vector = try allocator.alloc(f32, 4);
+            defer allocator.free(target_vector);
+
+            for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori
+            target_vector[action] = target_val; // Aggiorna solo quello scelto
+
+            // Train della rete su questo singolo passo
+            _ = try net.train(current_obs, target_vector, LR);
+
+            // Export e Delay
+            try exportAntJSON(&world, "ant_state.json", episode, epsilon);
+
+            // Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena.
+            if (episode % 10 == 0) {
+                // Mostra ogni 10 episodi a velocità umana
+                std.Thread.sleep(50 * 1_000_000);
+            } else {
+                // Allenamento veloce (quasi istantaneo)
+                // Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento
+                // std.Thread.sleep(10 * 1_000_000);
+            }
+        }
+
+        // Fine episodio
+        if (epsilon > EPSILON_END) {
+            epsilon -= DECAY_RATE;
+        }
+
+        if (episode % 10 == 0) {
+            std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps });
        }
    }
-    return max_idx;
 }
--- a/src/modular_network.zig
+++ b/src/modular_network.zig
@ -20,8 +20,8 @@ pub const Network = struct {
        self.layers.deinit(self.allocator);
    }

-    pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64) !void {
-        const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed);
+    pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64, use_sigmoid: bool) !void {
+        const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed, use_sigmoid);
        try self.layers.append(self.allocator, layer);
    }

@ -148,7 +148,9 @@ pub const Network = struct {
            _ = try file.readAll(std.mem.asBytes(&inputs));
            _ = try file.readAll(std.mem.asBytes(&neurons));

-            try net.addLayer(@intCast(inputs), @intCast(neurons), 0);
+            // SE è l'ultimo layer, niente sigmoide!
+            const is_last = (i == layer_count - 1);
+            try net.addLayer(@intCast(inputs), @intCast(neurons), 0, !is_last);

            const layer = &net.layers.items[net.layers.items.len - 1];