AI_Zig/get_data.py

38 lines
1 KiB
Python

import urllib.request
import gzip
import os
import shutil
files = [
"train-images-idx3-ubyte.gz",
"train-labels-idx1-ubyte.gz",
"t10k-images-idx3-ubyte.gz",
"t10k-labels-idx1-ubyte.gz"
]
base_url = "https://storage.googleapis.com/cvdf-datasets/mnist/"
data_dir = "data"
if not os.path.exists(data_dir):
os.makedirs(data_dir)
for file in files:
file_path = os.path.join(data_dir, file)
unzipped_path = file_path.replace(".gz", "")
if not os.path.exists(unzipped_path):
print(f"Scaricando {file}...")
urllib.request.urlretrieve(base_url + file, file_path)
print(f"Estraendo {file}...")
with gzip.open(file_path, 'rb') as f_in:
with open(unzipped_path, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
# Rimuoviamo il file .gz per pulizia
os.remove(file_path)
print("Fatto.")
else:
print(f"{unzipped_path} esiste già.")
print("\nTutti i dati sono nella cartella 'data/'!")