diff --git a/ant_state.json b/ant_state.json
new file mode 100644
index 0000000..7da4f78
--- /dev/null
+++ b/ant_state.json
@@ -0,0 +1,9 @@
+{
+ "grid_size": 8,
+ "steps": 10,
+ "ant": [6, 4],
+ "food": [3, 1],
+ "walls": [[0,0],[1,0],[2,0],[3,0],[4,0],[5,0],[6,0],[7,0],[0,1],[7,1],[0,2],[7,2],[0,3],[7,3],[0,4],[7,4],[0,5],[7,5],[0,6],[7,6],[0,7],[1,7],[2,7],[3,7],[4,7],[5,7],[6,7],[7,7]],
+ "episode": 3889,
+ "epsilon": 0.049
+}
\ No newline at end of file
diff --git a/ant_viewer.html b/ant_viewer.html
new file mode 100644
index 0000000..22ac221
--- /dev/null
+++ b/ant_viewer.html
@@ -0,0 +1,99 @@
+
+
+
+
+ Ant Survival AI
+
+
+
+
+
+
Ant Survival DQN
+
Stato: Connessione...
+
Steps: 0 | Reward: 0
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/env.zig b/src/env.zig
new file mode 100644
index 0000000..ba4eaa1
--- /dev/null
+++ b/src/env.zig
@@ -0,0 +1,178 @@
+const std = @import("std");
+const Allocator = std.mem.Allocator;
+
+pub const GRID_SIZE = 8;
+pub const TILE_EMPTY = 0;
+pub const TILE_WALL = 1;
+pub const TILE_FOOD = 2;
+pub const TILE_ANT = 3;
+
+pub const ACTION_UP = 0;
+pub const ACTION_DOWN = 1;
+pub const ACTION_LEFT = 2;
+pub const ACTION_RIGHT = 3;
+
+pub const Ant = struct { x: usize, y: usize, alive: bool };
+
+pub const World = struct {
+ grid: [GRID_SIZE][GRID_SIZE]u8,
+ visited: [GRID_SIZE][GRID_SIZE]bool,
+ ant_x: usize,
+ ant_y: usize,
+ food_x: usize,
+ food_y: usize,
+ steps: usize,
+ max_steps: usize,
+ prng: std.Random.DefaultPrng,
+
+ pub fn init(seed: u64) World {
+ var w = World{
+ .grid = undefined,
+ .visited = undefined,
+ .ant_x = 0,
+ .ant_y = 0,
+ .food_x = 0,
+ .food_y = 0,
+ .steps = 0,
+ .max_steps = 100,
+ .prng = std.Random.DefaultPrng.init(seed),
+ };
+ w.reset();
+ return w;
+ }
+
+ pub fn reset(self: *World) void {
+ const random = self.prng.random();
+
+ for (0..GRID_SIZE) |y| {
+ for (0..GRID_SIZE) |x| {
+ self.visited[y][x] = false;
+
+ if (x == 0 or y == 0 or x == GRID_SIZE - 1 or y == GRID_SIZE - 1) {
+ self.grid[y][x] = TILE_WALL;
+ } else {
+ self.grid[y][x] = TILE_EMPTY;
+ }
+ }
+ }
+
+ self.ant_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ self.ant_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+
+ self.visited[self.ant_y][self.ant_x] = true;
+
+ while (true) {
+ self.food_x = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ self.food_y = random.intRangeAtMost(usize, 1, GRID_SIZE - 2);
+ if (self.food_x != self.ant_x or self.food_y != self.ant_y) break;
+ }
+
+ self.steps = 0;
+ self.updateGrid();
+ }
+
+ fn updateGrid(self: *World) void {
+ for (1..GRID_SIZE - 1) |y| {
+ for (1..GRID_SIZE - 1) |x| {
+ self.grid[y][x] = TILE_EMPTY;
+ }
+ }
+ self.grid[self.food_y][self.food_x] = TILE_FOOD;
+ self.grid[self.ant_y][self.ant_x] = TILE_ANT;
+ }
+
+ pub fn step(self: *World, action: usize) struct { f32, bool } {
+ self.steps += 1;
+
+ var new_x = self.ant_x;
+ var new_y = self.ant_y;
+
+ if (action == ACTION_UP) new_y -= 1;
+ if (action == ACTION_DOWN) new_y += 1;
+ if (action == ACTION_LEFT) new_x -= 1;
+ if (action == ACTION_RIGHT) new_x += 1;
+
+ const tile = self.grid[new_y][new_x];
+
+ if (tile == TILE_WALL) {
+ return .{ -10.0, false };
+ }
+
+ var move_reward: f32 = -0.1;
+
+ if (self.visited[new_y][new_x]) {
+ move_reward -= 0.5;
+ } else {
+ move_reward += 0.2;
+ }
+
+ self.ant_x = new_x;
+ self.ant_y = new_y;
+ self.visited[new_y][new_x] = true;
+
+ self.updateGrid();
+
+ if (new_x == self.food_x and new_y == self.food_y) {
+ return .{ 100.0, true };
+ }
+
+ if (self.steps >= self.max_steps) {
+ return .{ -10.0, true };
+ }
+
+ return .{ move_reward, false };
+ }
+
+ pub fn getObservation(self: *World, allocator: Allocator) ![]f32 {
+ var obs = try allocator.alloc(f32, 10);
+ var idx: usize = 0;
+
+ const ax = @as(i32, @intCast(self.ant_x));
+ const ay = @as(i32, @intCast(self.ant_y));
+
+ var dy: i32 = -1;
+ while (dy <= 1) : (dy += 1) {
+ var dx: i32 = -1;
+ while (dx <= 1) : (dx += 1) {
+ const py_i = ay + dy;
+ const px_i = ax + dx;
+
+ var val: f32 = 0.0;
+
+ if (py_i >= 0 and py_i < GRID_SIZE and px_i >= 0 and px_i < GRID_SIZE) {
+ const py = @as(usize, @intCast(py_i));
+ const px = @as(usize, @intCast(px_i));
+ const content = self.grid[py][px];
+
+ if (content == TILE_WALL) {
+ val = -1.0;
+ } else if (content == TILE_FOOD) {
+ val = 1.0;
+ } else if (self.visited[py][px]) {
+ val = -0.5;
+ } else {
+ val = 0.0;
+ }
+ } else {
+ val = -1.0;
+ }
+
+ obs[idx] = val;
+ idx += 1;
+ }
+ }
+ obs[9] = self.getScent(self.ant_x, self.ant_y);
+
+ return obs;
+ }
+
+ fn getScent(self: *World, x: usize, y: usize) f32 {
+ const dx = if (x > self.food_x) x - self.food_x else self.food_x - x;
+ const dy = if (y > self.food_y) y - self.food_y else self.food_y - y;
+ const dist = dx + dy;
+
+ if (dist == 0) return 1.0;
+
+ return 1.0 / (@as(f32, @floatFromInt(dist)) + 1.0);
+ }
+};
diff --git a/src/layer.zig b/src/layer.zig
index 7acdeb8..7d76801 100644
--- a/src/layer.zig
+++ b/src/layer.zig
@@ -1,7 +1,6 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
-// Definiamo la larghezza del vettore SIMD (8 float alla volta = 256 bit)
const SimdWidth = 8;
const Vec = @Vector(SimdWidth, f32);
@@ -11,6 +10,7 @@ pub const DenseLayer = struct {
output: []f32,
inputs_count: usize,
neurons_count: usize,
+ use_sigmoid: bool, // NUOVO CAMPO
allocator: Allocator,
fn sigmoid(x: f32) f32 {
@@ -21,19 +21,17 @@ pub const DenseLayer = struct {
return x * (1.0 - x);
}
- pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64) !DenseLayer {
+ // Aggiunto parametro 'use_sigmoid'
+ pub fn init(allocator: Allocator, inputs: usize, neurons: usize, seed: u64, use_sigmoid: bool) !DenseLayer {
const weights = try allocator.alloc(f32, inputs * neurons);
const biases = try allocator.alloc(f32, neurons);
const output = try allocator.alloc(f32, neurons);
- // --- CORREZIONE QUI SOTTO ---
- // Prima era: std.rand.DefaultPrng
- // Ora è: std.Random.DefaultPrng
var prng = std.Random.DefaultPrng.init(seed);
const random = prng.random();
- // Inizializzazione Xavier/Glorot
- for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.5;
+ // Pesi più piccoli per stabilità iniziale
+ for (weights) |*w| w.* = (random.float(f32) * 2.0 - 1.0) * 0.1;
for (biases) |*b| b.* = 0.0;
return DenseLayer{
@@ -42,6 +40,7 @@ pub const DenseLayer = struct {
.output = output,
.inputs_count = inputs,
.neurons_count = neurons,
+ .use_sigmoid = use_sigmoid,
.allocator = allocator,
};
}
@@ -52,46 +51,55 @@ pub const DenseLayer = struct {
self.allocator.free(self.output);
}
- // --- FORWARD PASS CON SIMD ---
pub fn forward(self: *DenseLayer, input: []const f32) []const f32 {
for (0..self.neurons_count) |n| {
var sum: f32 = self.biases[n];
const w_start = n * self.inputs_count;
- // 1. Processiamo a blocchi di 8 (SIMD)
+ // SIMD
var vec_sum: Vec = @splat(0.0);
var i: usize = 0;
-
while (i + SimdWidth <= self.inputs_count) : (i += SimdWidth) {
const v_in: Vec = input[i..][0..SimdWidth].*;
const v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
vec_sum += v_in * v_w;
}
-
sum += @reduce(.Add, vec_sum);
- // 2. Tail Loop
+ // Tail Loop
while (i < self.inputs_count) : (i += 1) {
sum += input[i] * self.weights[w_start + i];
}
- self.output[n] = sigmoid(sum);
+ // CORREZIONE: Se non usiamo sigmoide, è lineare (passa 'sum' diretto)
+ if (self.use_sigmoid) {
+ self.output[n] = sigmoid(sum);
+ } else {
+ self.output[n] = sum;
+ }
}
return self.output;
}
- // --- BACKWARD PASS CON SIMD ---
pub fn backward(self: *DenseLayer, output_gradient: []const f32, input_vals: []const f32, learning_rate: f32) []f32 {
const input_gradient = self.allocator.alloc(f32, self.inputs_count) catch @panic("OOM");
@memset(input_gradient, 0.0);
for (0..self.neurons_count) |n| {
- const delta = output_gradient[n] * sigmoidDerivative(self.output[n]);
+ // CORREZIONE DERIVATA:
+ // Se Sigmoide: f'(x) = out * (1 - out)
+ // Se Lineare: f'(x) = 1.0
+ var derivative: f32 = 1.0;
+ if (self.use_sigmoid) {
+ derivative = sigmoidDerivative(self.output[n]);
+ }
+
+ const delta = output_gradient[n] * derivative;
const w_start = n * self.inputs_count;
self.biases[n] -= learning_rate * delta;
- // SIMD LOOP
+ // SIMD LOOP (Backprop)
const v_delta: Vec = @splat(delta);
const v_lr: Vec = @splat(learning_rate);
const v_change_factor = v_delta * v_lr;
@@ -101,26 +109,21 @@ pub const DenseLayer = struct {
var v_w: Vec = self.weights[w_start + i ..][0..SimdWidth].*;
const v_in: Vec = input_vals[i..][0..SimdWidth].*;
- // Backprop error
var v_in_grad: Vec = input_gradient[i..][0..SimdWidth].*;
v_in_grad += v_w * v_delta;
input_gradient[i..][0..SimdWidth].* = v_in_grad;
- // Update weights
v_w -= v_in * v_change_factor;
self.weights[w_start + i ..][0..SimdWidth].* = v_w;
}
- // TAIL LOOP
while (i < self.inputs_count) : (i += 1) {
const w_idx = w_start + i;
const old_weight = self.weights[w_idx];
-
input_gradient[i] += old_weight * delta;
self.weights[w_idx] -= input_vals[i] * delta * learning_rate;
}
}
-
return input_gradient;
}
};
diff --git a/src/main.zig b/src/main.zig
index 92f9043..9f85059 100644
--- a/src/main.zig
+++ b/src/main.zig
@@ -1,100 +1,159 @@
const std = @import("std");
+const World = @import("env.zig").World;
+const env = @import("env.zig");
const Network = @import("modular_network.zig").Network;
-const MnistData = @import("mnist.zig").MnistData;
+
+// --- PARAMETRI AI ---
+const GAMMA: f32 = 0.9; // Quanto conta il futuro? (0.9 = lungimirante)
+const LR: f32 = 0.005; // Learning Rate
+const EPSILON_START: f32 = 1.0; // Inizia esplorando al 100%
+const EPSILON_END: f32 = 0.05; // Finisce esplorando al 5%
+const DECAY_RATE: f32 = 0.001; // Quanto velocemente smette di essere curiosa
+
+fn exportAntJSON(world: *World, file_path: []const u8, episode: usize, epsilon: f32) !void {
+ const file = try std.fs.cwd().createFile(file_path, .{});
+ defer file.close();
+
+ var buffer: [8192]u8 = undefined;
+ var fba = std.heap.FixedBufferAllocator.init(&buffer);
+ const allocator = fba.allocator();
+
+ var walls_json = std.ArrayList(u8){};
+ defer walls_json.deinit(allocator);
+
+ try walls_json.appendSlice(allocator, "[");
+ var first = true;
+ for (0..env.GRID_SIZE) |y| {
+ for (0..env.GRID_SIZE) |x| {
+ if (world.grid[y][x] == env.TILE_WALL) {
+ if (!first) try walls_json.appendSlice(allocator, ",");
+ try std.fmt.format(walls_json.writer(allocator), "[{d},{d}]", .{ x, y });
+ first = false;
+ }
+ }
+ }
+ try walls_json.appendSlice(allocator, "]");
+
+ // Aggiungiamo info extra per debug (Epsilon ed Episode)
+ const json = try std.fmt.allocPrint(allocator, "{{\n \"grid_size\": {d},\n \"steps\": {d},\n \"ant\": [{d}, {d}],\n \"food\": [{d}, {d}],\n \"walls\": {s},\n \"episode\": {d},\n \"epsilon\": {d:.3}\n}}", .{ env.GRID_SIZE, world.steps, world.ant_x, world.ant_y, world.food_x, world.food_y, walls_json.items, episode, epsilon });
+
+ try file.writeAll(json);
+}
+
+// Funzione helper per trovare il valore massimo in un array
+fn maxVal(slice: []const f32) f32 {
+ var m: f32 = -10000.0;
+ for (slice) |v| if (v > m) {
+ m = v;
+ };
+ return m;
+}
+
+fn argmax(slice: []const f32) usize {
+ var m: f32 = -10000.0;
+ var idx: usize = 0;
+ for (slice, 0..) |v, i| {
+ if (v > m) {
+ m = v;
+ idx = i;
+ }
+ }
+ return idx;
+}
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
defer _ = gpa.deinit();
- std.debug.print("--- CARICAMENTO MNIST ---\n", .{});
- var dataset = try MnistData.init(allocator, "data/train-images-idx3-ubyte", "data/train-labels-idx1-ubyte", 5000);
- defer dataset.deinit();
-
- const save_path = "brain.bin";
- var net: Network = undefined;
- var needs_training = true;
-
- // Load or Init
- if (Network.load(allocator, save_path)) |loaded_net| {
- std.debug.print(">>> SALVATAGGIO TROVATO! Skip training.\n", .{});
- net = loaded_net;
- needs_training = false;
- } else |_| {
- std.debug.print(">>> NUOVA RETE.\n", .{});
- net = Network.init(allocator);
- try net.addLayer(784, 64, 111);
- try net.addLayer(64, 32, 222);
- try net.addLayer(32, 10, 333);
- }
+ // 1. Inizializza Ambiente e Rete
+ var world = World.init(12345);
+ var net = Network.init(allocator);
defer net.deinit();
- // --- TRAINING ---
- if (needs_training) {
- std.debug.print("--- INIZIO TRAINING (SIMD ACCELERATED) ---\n", .{});
- const lr: f32 = 0.1;
- const epochs = 10; // Bastano meno epoche ora
+ // Input: 9 (3x3 vista) -> Hidden: 24 -> Output: 4 (Su, Giù, Sx, Dx)
+ try net.addLayer(9, 24, 111, true);
+ try net.addLayer(24, 4, 222, false);
- var epoch: usize = 0;
- while (epoch < epochs) : (epoch += 1) {
- var total_loss: f32 = 0.0;
- var correct: usize = 0;
-
- for (dataset.images, 0..) |img, i| {
- total_loss += try net.train(img, dataset.labels[i], lr);
- const out = net.forward(img);
- if (argmax(out) == argmax(dataset.labels[i])) correct += 1;
- }
-
- const accuracy = @as(f32, @floatFromInt(correct)) / @as(f32, @floatFromInt(dataset.images.len)) * 100.0;
- std.debug.print("Epoca {d}: Acc: {d:.2}%\n", .{ epoch, accuracy });
-
- // Durante il training passiamo 'null' come immagine per non rallentare troppo
- try net.exportJSON("network_state.json", epoch, total_loss / 5000.0, null);
- }
- try net.save(save_path);
- }
-
- // --- SHOWCASE MODE (Il Gran Finale) ---
- std.debug.print("\n--- AVVIO DEMO VISUALE ---\n", .{});
- std.debug.print("Guarda il browser! (CTRL+C per uscire)\n", .{});
-
- var prng = std.Random.DefaultPrng.init(0);
+ var prng = std.Random.DefaultPrng.init(999);
const random = prng.random();
- while (true) {
- // 1. Pesca un'immagine a caso
- const idx = random.intRangeAtMost(usize, 0, dataset.images.len - 1);
- const img = dataset.images[idx];
- const label = argmax(dataset.labels[idx]);
+ std.debug.print("--- AI TRAINING START ---\n", .{});
- // 2. Fai la previsione
- const out = net.forward(img);
- const prediction = argmax(out);
+ var episode: usize = 0;
+ var epsilon: f32 = EPSILON_START;
- // Calcoliamo una "Loss" finta solo per il grafico
- const loss: f32 = if (prediction == label) 0.0 else 1.0;
+ while (true) : (episode += 1) {
+ world.reset();
+ var done = false;
- // 3. Stampa su console
- const result_str = if (prediction == label) "CORRETTO" else "SBAGLIATO";
- std.debug.print("Input: {d} | AI Dice: {d} -> {s}\r", .{ label, prediction, result_str });
+ while (!done) {
+ // A. OSSERVAZIONE
+ const current_obs = try world.getObservation(allocator); // Alloca memoria
+ defer allocator.free(current_obs); // Libera alla fine del ciclo
- // 4. ESPORTA TUTTO (inclusa l'immagine) per il browser
- try net.exportJSON("network_state.json", 999, loss, img);
+ // B. SCEGLI AZIONE (Epsilon-Greedy)
+ var action: usize = 0;
+ const q_values = net.forward(current_obs); // Forward pass
- // 5. Aspetta un secondo per farci godere la scena
- std.Thread.sleep(1000 * 1_000_000);
- }
-}
+ if (random.float(f32) < epsilon) {
+ // Esplorazione (Random)
+ action = random.intRangeAtMost(usize, 0, 3);
+ } else {
+ // Sfruttamento (Usa il cervello)
+ action = argmax(q_values);
+ }
-fn argmax(slice: []const f32) usize {
- var max_val: f32 = -1000.0;
- var max_idx: usize = 0;
- for (slice, 0..) |val, i| {
- if (val > max_val) {
- max_val = val;
- max_idx = i;
+ // C. ESEGUI AZIONE
+ const result = world.step(action);
+ const reward = result[0];
+ done = result[1];
+
+ // D. ADDESTRAMENTO (Q-Learning Update)
+ // Target = Reward + Gamma * Max(Q_Next)
+
+ // Dobbiamo calcolare il Q-value dello stato successivo
+ var target_val = reward;
+ if (!done) {
+ const next_obs = try world.getObservation(allocator);
+ defer allocator.free(next_obs);
+ const next_q_values = net.forward(next_obs);
+ target_val += GAMMA * maxVal(next_q_values);
+ }
+
+ // Creiamo il vettore target per il backpropagation
+ // Vogliamo che SOLO il neurone dell'azione presa si avvicini al target_val
+ // Gli altri neuroni devono restare come sono.
+ var target_vector = try allocator.alloc(f32, 4);
+ defer allocator.free(target_vector);
+
+ for (0..4) |i| target_vector[i] = q_values[i]; // Copia i vecchi valori
+ target_vector[action] = target_val; // Aggiorna solo quello scelto
+
+ // Train della rete su questo singolo passo
+ _ = try net.train(current_obs, target_vector, LR);
+
+ // Export e Delay
+ try exportAntJSON(&world, "ant_state.json", episode, epsilon);
+
+ // Se stiamo esplorando molto (inizio), andiamo veloci. Se siamo bravi, rallentiamo per goderci la scena.
+ if (episode % 10 == 0) {
+ // Mostra ogni 10 episodi a velocità umana
+ std.Thread.sleep(50 * 1_000_000);
+ } else {
+ // Allenamento veloce (quasi istantaneo)
+ // Togli il commento sotto se vuoi vedere TUTTO, ma sarà lento
+ // std.Thread.sleep(10 * 1_000_000);
+ }
+ }
+
+ // Fine episodio
+ if (epsilon > EPSILON_END) {
+ epsilon -= DECAY_RATE;
+ }
+
+ if (episode % 10 == 0) {
+ std.debug.print("Episodio {d} | Epsilon: {d:.3} | Steps: {d}\n", .{ episode, epsilon, world.steps });
}
}
- return max_idx;
}
diff --git a/src/modular_network.zig b/src/modular_network.zig
index 2fdf114..0aae241 100644
--- a/src/modular_network.zig
+++ b/src/modular_network.zig
@@ -20,8 +20,8 @@ pub const Network = struct {
self.layers.deinit(self.allocator);
}
- pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64) !void {
- const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed);
+ pub fn addLayer(self: *Network, input_size: usize, output_size: usize, seed: u64, use_sigmoid: bool) !void {
+ const layer = try DenseLayer.init(self.allocator, input_size, output_size, seed, use_sigmoid);
try self.layers.append(self.allocator, layer);
}
@@ -148,7 +148,9 @@ pub const Network = struct {
_ = try file.readAll(std.mem.asBytes(&inputs));
_ = try file.readAll(std.mem.asBytes(&neurons));
- try net.addLayer(@intCast(inputs), @intCast(neurons), 0);
+ // SE è l'ultimo layer, niente sigmoide!
+ const is_last = (i == layer_count - 1);
+ try net.addLayer(@intCast(inputs), @intCast(neurons), 0, !is_last);
const layer = &net.layers.items[net.layers.items.len - 1];