diff --git a/ant_state.json b/ant_state.json new file mode 100644 index 0000000..7da4f78 --- /dev/null +++ b/ant_state.json @@ -0,0 +1,9 @@ +{ + "grid_size": 8, + "steps": 10, + "ant": [6, 4], + "food": [3, 1], + "walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]], + "episode": 3889, + "epsilon": 0.049 +} \ No newline at end of file diff --git a/ant_viewer.html b/ant_viewer.html new file mode 100644 index 0000000..22ac221 --- /dev/null +++ b/ant_viewer.html @@ -0,0 +1,99 @@ + + + + + Ant Survival AI + + + + +
+

Ant Survival DQN

+
Stato: Connessione...
+
Steps: 0 | Reward: 0
+
+ + + + + + \ No newline at end of file diff --git a/src/env.zig b/src/env.zig new file mode 100644 index 0000000..ba4eaa1 --- /dev/null +++ b/src/env.zig @@ -0,0 +1,178 @@ +const std = @import("std"); +const Allocator = std.mem.Allocator; + +pub const GRID_SIZE = 8; +pub const TILE_EMPTY = 0; +pub const TILE_WALL = 1; +pub const TILE_FOOD = 2; +pub const TILE_ANT = 3; + +pub const ACTION_UP = 0; +pub const ACTION_DOWN = 1; +pub const ACTION_LEFT = 2; +pub const ACTION_RIGHT = 3; + +pub const Ant = struct { x: usize, y: usize, alive: bool }; + +pub const World = struct { + grid: [GRID_SIZE][GRID_SIZE]u8, + visited: [GRID_SIZE][GRID_SIZE]bool, + ant_x: usize, + ant_y: usize, + food_x: usize, + food_y: usize, + steps: usize, + max_steps: usize, + prng: std.Random.DefaultPrng, + + pub fn init(seed: u64) World { + var w = World{ + .grid = undefined, + .visited = undefined, + .ant_x = 0, + .ant_y = 0, + .food_x = 0, + .food_y = 0, + .steps = 0, + .max_steps = 100, + .prng = std.Random.DefaultPrng.init(seed), + }; + w.reset(); + return w; + } + + pub fn reset(self: *World) void { + const random = self.prng.random(); + + for (0..GRID_SIZE) |y| { + for (0..GRID_SIZE) |x| { + self.visited[y][x] = false; + + if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) { + self.grid[y][x] = TILE_WALL; + } else { + self.grid[y][x] = TILE_EMPTY; + } + } + } + + self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + + self.visited[self.ant_y][self.ant_x] = true; + + while (true) { + self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2); + if (self.food_x != self.ant_x or self.food_y != self.ant_y) break; + } + + self.steps = 0; + self.updateGrid(); + } + + fn updateGrid(self: *World) void { + for (1..GRID_SIZE - 1) |y| { + for (1..GRID_SIZE - 1) |x| { + self.grid[y][x] = TILE_EMPTY; + } + } + self.grid[self.food_y][self.food_x] = TILE_FOOD; + self.grid[self.ant_y][self.ant_x] = TILE_ANT; + } + + pub fn step(self: *World, action: usize) struct { f32, bool } { + self.steps += 1; + + var new_x = self.ant_x; + var new_y = self.ant_y; + + if (action == ACTION_UP) new_y -= 1; + if (action == ACTION_DOWN) new_y += 1; + if (action == ACTION_LEFT) new_x -= 1; + if (action == ACTION_RIGHT) new_x += 1; + + const tile = self.grid[new_y][new_x]; + + if (tile == TILE_WALL) { + return .{ -10.0, false }; + } + + var move_reward: f32 = -0.1; + + if (self.visited[new_y][new_x]) { + move_reward -= 0.5; + } else { + move_reward += 0.2; + } + + self.ant_x = new_x; + self.ant_y = new_y; + self.visited[new_y][new_x] = true; + + self.updateGrid(); + + if (new_x == self.food_x and new_y == self.food_y) { + return .{ 100.0, true }; + } + + if (self.steps >= self.max_steps) { + return .{ -10.0, true }; + } + + return .{ move_reward, false }; + } + + pub fn getObservation(self: *World, allocator: Allocator) ![]f32 { + var obs = try allocator.alloc(f32, 10); + var idx: usize = 0; + + const ax = @as(i32, @intCast(self.ant_x)); + const ay = @as(i32, @intCast(self.ant_y)); + + var dy: i32 = -1; + while (dy <= 1) : (dy += 1) { + var dx: i32 = -1; + while (dx <= 1) : (dx += 1) { + const py_i = ay + dy; + const px_i = ax + dx; + + var val: f32 = 0.0; + + if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) { + const py = @as(usize, @intCast(py_i)); + const px = @as(usize, @intCast(px_i)); + const content = self.grid[py][px]; + + if (content == TILE_WALL) { + val = -1.0; + } else if (content == TILE_FOOD) { + val = 1.0; + } else if (self.visited[py][px]) { + val = -0.5; + } else { + val = 0.0; + } + } else { + val = -1.0; + } + + obs[idx] = val; + idx += 1; + } + } + obs[9] = self.getScent(self.ant_x, self.ant_y); + + return obs; + } + + fn getScent(self: *World, x: usize, y: usize) f32 { + const dx = if (x > self.food_x) x - self.food_x else self.food_x - x; + const dy = if (y > self.food_y) y - self.food_y else self.food_y - y; + const dist = dx + dy; + + if (dist == 0) return 1.0; + + return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0); + } +}; diff --git a/src/layer.zig b/src/layer.zig index 7acdeb8..7d76801 100644 --- a/src/layer.zig +++ b/src/layer.zig @@ -1,7 +1,6 @@ const std = @import("std"); const Allocator = std.mem.Allocator; -// Definiamo la larghezza del vettore SIMD (8 float alla volta = 256 bit) const SimdWidth = 8; const Vec = @Vector(SimdWidth, f32); @@ -11,6 +10,7 @@ pub const DenseLayer = struct { output: []f32, inputs_count: usize, neurons_count: usize, + use_sigmoid: bool, // NUOVO CAMPO allocator: Allocator, fn sigmoid(x: f32) f32 { @@ -21,19 +21,17 @@ pub const DenseLayer = struct { return x * (1.0 - x); } - pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64) !DenseLayer { + // Aggiunto parametro 'use_sigmoid' + pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64, use_sigmoid: bool) !DenseLayer { const weights = try allocator.alloc(f32, inputs * neurons); const biases = try allocator.alloc(f32, neurons); const output = try allocator.alloc(f32, neurons); - // --- CORREZIONE QUI SOTTO --- - // Prima era: std.rand.DefaultPrng - // Ora è: std.Random.DefaultPrng var prng = std.Random.DefaultPrng.init(seed); const random = prng.random(); - // Inizializzazione Xavier/Glorot - for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.5; + // Pesi più piccoli per stabilità iniziale + for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.1; for (biases) |*b| b.* = 0.0; return DenseLayer{ @@ -42,6 +40,7 @@ pub const DenseLayer = struct { .output = output, .inputs_count = inputs, .neurons_count = neurons, + .use_sigmoid = use_sigmoid, .allocator = allocator, }; } @@ -52,46 +51,55 @@ pub const DenseLayer = struct { self.allocator.free(self.output); } - // --- FORWARD PASS CON SIMD --- pub fn forward(self: *DenseLayer, input: []const f32) []const f32 { for (0..self.neurons_count) |n| { var sum: f32 = self.biases[n]; const w_start = n * self.inputs_count; - // 1. Processiamo a blocchi di 8 (SIMD) + // SIMD var vec_sum: Vec = @splat(0.0); var i: usize = 0; - while (i + SimdWidth <= self.inputs_count) : (i += SimdWidth) { const v_in: Vec = input[i..][0..SimdWidth].*; const v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*; vec_sum += v_in * v_w; } - sum += @reduce(.Add, vec_sum); - // 2. Tail Loop + // Tail Loop while (i < self.inputs_count) : (i += 1) { sum += input[i] * self.weights[w_start + i]; } - self.output[n] = sigmoid(sum); + // CORREZIONE: Se non usiamo sigmoide, è lineare (passa 'sum' diretto) + if (self.use_sigmoid) { + self.output[n] = sigmoid(sum); + } else { + self.output[n] = sum; + } } return self.output; } - // --- BACKWARD PASS CON SIMD --- pub fn backward(self: *DenseLayer, output_gradient: []const f32, input_vals: []const f32, learning_rate: f32) []f32 { const input_gradient = self.allocator.alloc(f32, self.inputs_count) catch @panic("OOM"); @memset(input_gradient, 0.0); for (0..self.neurons_count) |n| { - const delta = output_gradient[n] * sigmoidDerivative(self.output[n]); + // CORREZIONE DERIVATA: + // Se Sigmoide: f'(x) = out * (1 - out) + // Se Lineare: f'(x) = 1.0 + var derivative: f32 = 1.0; + if (self.use_sigmoid) { + derivative = sigmoidDerivative(self.output[n]); + } + + const delta = output_gradient[n] * derivative; const w_start = n * self.inputs_count; self.biases[n] -= learning_rate * delta; - // SIMD LOOP + // SIMD LOOP (Backprop) const v_delta: Vec = @splat(delta); const v_lr: Vec = @splat(learning_rate); const v_change_factor = v_delta * v_lr; @@ -101,26 +109,21 @@ pub const DenseLayer = struct { var v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*; const v_in: Vec = input_vals[i..][0..SimdWidth].*; - // Backprop error var v_in_grad: Vec = input_gradient[i..][0..SimdWidth].*; v_in_grad += v_w * v_delta; input_gradient[i..][0..SimdWidth].* = v_in_grad; - // Update weights v_w -= v_in * v_change_factor; self.weights[w_start + i ..][0..SimdWidth].* = v_w; } - // TAIL LOOP while (i < self.inputs_count) : (i += 1) { const w_idx = w_start + i; const old_weight = self.weights[w_idx]; - input_gradient[i] += old_weight * delta; self.weights[w_idx] -= input_vals[i] * delta * learning_rate; } } - return input_gradient; } }; diff --git a/src/main.zig b/src/main.zig index 92f9043..9f85059 100644 --- a/src/main.zig +++ b/src/main.zig @@ -1,100 +1,159 @@ const std = @import("std"); +const World = @import("env.zig").World; +const env = @import("env.zig"); const Network = @import("modular_network.zig").Network; -const MnistData = @import("mnist.zig").MnistData; + +// --- PARAMETRI AI --- +const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante) +const LR: f32 = 0.005; // Learning Rate +const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100% +const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5% +const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa + +fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void { + const file = try std.fs.cwd().createFile(file_path, .{}); + defer file.close(); + + var buffer: [8192]u8 = undefined; + var fba = std.heap.FixedBufferAllocator.init(&buffer); + const allocator = fba.allocator(); + + var walls_json = std.ArrayList(u8){}; + defer walls_json.deinit(allocator); + + try walls_json.appendSlice(allocator, "["); + var first = true; + for (0..env.GRID_SIZE) |y| { + for (0..env.GRID_SIZE) |x| { + if (world.grid[y][x] == env.TILE_WALL) { + if (!first) try walls_json.appendSlice(allocator, ","); + try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y }); + first = false; + } + } + } + try walls_json.appendSlice(allocator, "]"); + + // Aggiungiamo info extra per debug (Epsilon ed Episode) + const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon }); + + try file.writeAll(json); +} + +// Funzione helper per trovare il valore massimo in un array +fn maxVal(slice: []const f32) f32 { + var m: f32 = -10000.0; + for (slice) |v| if (v > m) { + m = v; + }; + return m; +} + +fn argmax(slice: []const f32) usize { + var m: f32 = -10000.0; + var idx: usize = 0; + for (slice, 0..) |v, i| { + if (v > m) { + m = v; + idx = i; + } + } + return idx; +} pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); defer _ = gpa.deinit(); - std.debug.print("--- CARICAMENTO MNIST ---\n", .{}); - var dataset = try MnistData.init(allocator, "data/train-images-idx3-ubyte", "data/train-labels-idx1-ubyte", 5000); - defer dataset.deinit(); - - const save_path = "brain.bin"; - var net: Network = undefined; - var needs_training = true; - - // Load or Init - if (Network.load(allocator, save_path)) |loaded_net| { - std.debug.print(">>> SALVATAGGIO TROVATO! Skip training.\n", .{}); - net = loaded_net; - needs_training = false; - } else |_| { - std.debug.print(">>> NUOVA RETE.\n", .{}); - net = Network.init(allocator); - try net.addLayer(784, 64, 111); - try net.addLayer(64, 32, 222); - try net.addLayer(32, 10, 333); - } + // 1. Inizializza Ambiente e Rete + var world = World.init(12345); + var net = Network.init(allocator); defer net.deinit(); - // --- TRAINING --- - if (needs_training) { - std.debug.print("--- INIZIO TRAINING (SIMD ACCELERATED) ---\n", .{}); - const lr: f32 = 0.1; - const epochs = 10; // Bastano meno epoche ora + // Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx) + try net.addLayer(9, 24, 111, true); + try net.addLayer(24, 4, 222, false); - var epoch: usize = 0; - while (epoch < epochs) : (epoch += 1) { - var total_loss: f32 = 0.0; - var correct: usize = 0; - - for (dataset.images, 0..) |img, i| { - total_loss += try net.train(img, dataset.labels[i], lr); - const out = net.forward(img); - if (argmax(out) == argmax(dataset.labels[i])) correct += 1; - } - - const accuracy = @as(f32, @floatFromInt(correct)) / @as(f32, @floatFromInt(dataset.images.len)) * 100.0; - std.debug.print("Epoca {d}: Acc: {d:.2}%\n", .{ epoch, accuracy }); - - // Durante il training passiamo 'null' come immagine per non rallentare troppo - try net.exportJSON("network_state.json", epoch, total_loss / 5000.0, null); - } - try net.save(save_path); - } - - // --- SHOWCASE MODE (Il Gran Finale) --- - std.debug.print("\n--- AVVIO DEMO VISUALE ---\n", .{}); - std.debug.print("Guarda il browser! (CTRL+C per uscire)\n", .{}); - - var prng = std.Random.DefaultPrng.init(0); + var prng = std.Random.DefaultPrng.init(999); const random = prng.random(); - while (true) { - // 1. Pesca un'immagine a caso - const idx = random.intRangeAtMost(usize, 0, dataset.images.len - 1); - const img = dataset.images[idx]; - const label = argmax(dataset.labels[idx]); + std.debug.print("--- AI TRAINING START ---\n", .{}); - // 2. Fai la previsione - const out = net.forward(img); - const prediction = argmax(out); + var episode: usize = 0; + var epsilon: f32 = EPSILON_START; - // Calcoliamo una "Loss" finta solo per il grafico - const loss: f32 = if (prediction == label) 0.0 else 1.0; + while (true) : (episode += 1) { + world.reset(); + var done = false; - // 3. Stampa su console - const result_str = if (prediction == label) "CORRETTO" else "SBAGLIATO"; - std.debug.print("Input: {d} | AI Dice: {d} -> {s}\r", .{ label, prediction, result_str }); + while (!done) { + // A. OSSERVAZIONE + const current_obs = try world.getObservation(allocator); // Alloca memoria + defer allocator.free(current_obs); // Libera alla fine del ciclo - // 4. ESPORTA TUTTO (inclusa l'immagine) per il browser - try net.exportJSON("network_state.json", 999, loss, img); + // B. SCEGLI AZIONE (Epsilon-Greedy) + var action: usize = 0; + const q_values = net.forward(current_obs); // Forward pass - // 5. Aspetta un secondo per farci godere la scena - std.Thread.sleep(1000 * 1_000_000); - } -} + if (random.float(f32) < epsilon) { + // Esplorazione (Random) + action = random.intRangeAtMost(usize, 0, 3); + } else { + // Sfruttamento (Usa il cervello) + action = argmax(q_values); + } -fn argmax(slice: []const f32) usize { - var max_val: f32 = -1000.0; - var max_idx: usize = 0; - for (slice, 0..) |val, i| { - if (val > max_val) { - max_val = val; - max_idx = i; + // C. ESEGUI AZIONE + const result = world.step(action); + const reward = result[0]; + done = result[1]; + + // D. ADDESTRAMENTO (Q-Learning Update) + // Target = Reward + Gamma * Max(Q_Next) + + // Dobbiamo calcolare il Q-value dello stato successivo + var target_val = reward; + if (!done) { + const next_obs = try world.getObservation(allocator); + defer allocator.free(next_obs); + const next_q_values = net.forward(next_obs); + target_val += GAMMA * maxVal(next_q_values); + } + + // Creiamo il vettore target per il backpropagation + // Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val + // Gli altri neuroni devono restare come sono. + var target_vector = try allocator.alloc(f32, 4); + defer allocator.free(target_vector); + + for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori + target_vector[action] = target_val; // Aggiorna solo quello scelto + + // Train della rete su questo singolo passo + _ = try net.train(current_obs, target_vector, LR); + + // Export e Delay + try exportAntJSON(&world, "ant_state.json", episode, epsilon); + + // Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena. + if (episode % 10 == 0) { + // Mostra ogni 10 episodi a velocità umana + std.Thread.sleep(50 * 1_000_000); + } else { + // Allenamento veloce (quasi istantaneo) + // Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento + // std.Thread.sleep(10 * 1_000_000); + } + } + + // Fine episodio + if (epsilon > EPSILON_END) { + epsilon -= DECAY_RATE; + } + + if (episode % 10 == 0) { + std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps }); } } - return max_idx; } diff --git a/src/modular_network.zig b/src/modular_network.zig index 2fdf114..0aae241 100644 --- a/src/modular_network.zig +++ b/src/modular_network.zig @@ -20,8 +20,8 @@ pub const Network = struct { self.layers.deinit(self.allocator); } - pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64) !void { - const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed); + pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64, use_sigmoid: bool) !void { + const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed, use_sigmoid); try self.layers.append(self.allocator, layer); } @@ -148,7 +148,9 @@ pub const Network = struct { _ = try file.readAll(std.mem.asBytes(&inputs)); _ = try file.readAll(std.mem.asBytes(&neurons)); - try net.addLayer(@intCast(inputs), @intCast(neurons), 0); + // SE è l'ultimo layer, niente sigmoide! + const is_last = (i == layer_count - 1); + try net.addLayer(@intCast(inputs), @intCast(neurons), 0, !is_last); const layer = &net.layers.items[net.layers.items.len - 1];