cvicenie 3 hotove

2026-03-12 22:06:29 +01:00
parent b0778cfe69
commit f6b9d79062
4 changed files with 2102 additions and 128 deletions
--- a/hod_1/Cargo.lock
+++ b/hod_1/Cargo.lock
--- a/hod_1/Cargo.toml
+++ b/hod_1/Cargo.toml
@@ -4,7 +4,8 @@ version = "0.1.0"
 edition = "2024"
 [dependencies]
-burn = { version = "0.20.1", default-features = false, features = ["ndarray"] }
+burn = { version = "0.20.1", default-features = false, features = ["ndarray", "train"] }
 burn-autodiff = "0.20.1"
 burn-ndarray = "0.20.1"
 clap = { version = "4.5.60", features = ["derive"] }
 ndarray = "0.17.2"
--- a/hod_1/src/lib.rs
+++ b/hod_1/src/lib.rs
@@ -1,57 +1,177 @@
 use burn::tensor::Tensor;
-use burn::tensor::linalg::diag;
+use burn::optim::Optimizer;
-use burn::tensor::Shape;
+use burn::nn::loss::CrossEntropyLossConfig;
 use burn::tensor::Int;
 use burn::tensor::backend::AutodiffBackend;
 use burn::tensor::backend::Backend;
 use burn::optim::GradientsParams;
 use burn::tensor::activation;
 use std::str::FromStr;
 use burn::module::Module;
 use burn::nn::{Linear, LinearConfig};
-pub type B = burn_ndarray::NdArray<f64>;
+pub type B = burn_autodiff::Autodiff<burn_ndarray::NdArray<f64>>;
-fn l2_norm(v: Tensor<B, 1>) -> f64 {
+#[derive(Module, Debug)]
-    v.clone()
+pub struct MnistClassifier<B: Backend> {
-        .mul(v)           // element-wise: v_i * v_i
+    hidden: Vec<Linear<B>>,
-        .sum()            // suma všetkých v_i^2
+    output: Linear<B>,
-        .sqrt()           // odmocnina
+    activation: Activation,
        .into_scalar()    // na f32
 }
-/// Input: [N, 784], Output: [N, 784]
+impl<B: Backend<FloatElem = f64, IntElem = i64>> MnistClassifier<B> {
-pub fn center(x: Tensor<B, 2>) -> Tensor<B, 2> {
+    pub fn new(
-   let mean = x.clone().mean_dim(0);
+        device: &B::Device,
-    x.sub(mean)
+        hidden_layers: usize,
        hidden_layer_size: usize,
        activation: Activation,
    ) -> Self {
        let mut hidden = Vec::new();
        let mut current_input_size = 784;
        if hidden_layers > 0 {
            hidden.push(LinearConfig::new(current_input_size, hidden_layer_size).init(device));
            current_input_size = hidden_layer_size;
            for _ in 1..hidden_layers {
                hidden.push(LinearConfig::new(hidden_layer_size, hidden_layer_size).init(device));
            }
        }
-/// Input: [N, 784], Output: [784, 784]
+        let output = LinearConfig::new(current_input_size, 10).init(device);
 pub fn covariance(x: Tensor<B, 2>) -> Tensor<B, 2> {
    let cen = center(x);
    let transpose = cen.clone().transpose();
    let n = cen.dims()[0] as f64;
    let mul = transpose.matmul(cen);
    mul.div_scalar(n - 1.0)
        Self { hidden, output, activation }
    }
-pub fn total_variance(x: Tensor<B, 2>) -> f64 {
+    pub fn forward(&self, images: Tensor<B, 2>) -> Tensor<B, 2> {
-    let cov: Tensor<B, 2> = covariance(x);
+        let mut result = images;
-    let diag: Tensor<B, 1> = diag(cov);
+        for layer in &self.hidden {
-    let sum = diag.sum().into_scalar();
+            result = layer.forward(result);
-    sum
+            result = self.activation.forward(result);
        }
        self.output.forward(result)
    }
-/// Input: [784, 784], scalar, Output: [784]
+    pub fn train_step(
-pub fn power_iteration(cov: Tensor<B, 2>, iterations: usize) -> (Tensor<B, 1>, f64) {
+        &self,
-    let n = cov.dims()[0];
+        images: Tensor<B, 2>,
-    let device = cov.device();
+        labels: Tensor<B, 1, Int>,
-    let mut v: Tensor<B, 1> = Tensor::ones(Shape::new([n]), &device);
+        optimizer: &mut impl Optimizer<Self, B>,
-    let mut s: f64 = 0.0;
+        lr: f64
    ) -> (Self, f64, usize) where B: AutodiffBackend {
        // Forward pass
        let logits = self.forward(images);
-    for _ in 0..iterations {
+        // Loss calculation
-        let v_new_2d = cov.clone().matmul(v.reshape([n, 1]));
+        let loss_fn = CrossEntropyLossConfig::new().init(&logits.device());
-        let v_new = v_new_2d.squeeze::<1>();
+        let loss = loss_fn.forward(logits.clone(), labels.clone());
-        s = l2_norm(v_new.clone());
+
-        v = v_new.div_scalar(s);
+        // Accuracy
-    }
+        let correct = logits.argmax(1)
-    return (v, s);
+            .flatten::<1>(0, 1)
            .equal(labels)
            .int()
            .sum()
            .into_scalar() as usize;
        let loss_val = loss.clone().into_scalar();
        // Backprop
        let grads = loss.backward();
        let grads = GradientsParams::from_grads(grads, self);
        let updated_model = optimizer.step(lr, self.clone(), grads);
        (updated_model, loss_val, correct)
    }
-/// Input: [784, 784], [784], Output: f32
+    pub fn train_and_evaluate(
-pub fn explained_variance(total_var: f64, s: f64) -> f64 {
+        &mut self,
-    s / total_var
+        images: Tensor<B, 2>,
        labels: Tensor<B, 1, Int>,
        optimizer: &mut impl Optimizer<Self, B>,
        args_epochs: usize,
        args_batch_size: usize,
    ) where B: AutodiffBackend {
        eprintln!("images shape: {:?}", images.shape());
        eprintln!("labels shape: {:?}", labels.shape());
        let train_size = 50000;
        let x_train = images.clone().slice([0..train_size]);
        let y_train = labels.clone().slice([0..train_size]);
        let x_dev = images.slice([train_size..55000]);
        let y_dev = labels.slice([train_size..55000]);
        let target_epochs = [1, 5, 10];
        for epoch in target_epochs {
            let start = std::time::Instant::now();
            let mut train_loss = 0.0;
            let mut train_correct = 0;
            for i in (0..train_size).step_by(args_batch_size) {
                let end = (i + args_batch_size).min(train_size);
                if i >= end { continue; }
                let b_x = x_train.clone().slice([i..end]);
                let b_y = y_train.clone().slice([i..end]);
                if i == 0 {
                    eprintln!("first batch shape: {:?}", b_x.shape());
                    eprintln!("output layer: input={:?} output=10", self.output.weight.shape());
                }
                let (updated_model, loss_val, correct) = self.train_step(b_x, b_y, optimizer, 1e-3);
                *self = updated_model;
                train_loss += loss_val;
                train_correct += correct;
            }
            // Dev metrics
            let dev_logits = self.forward(x_dev.clone());
            let loss_fn = CrossEntropyLossConfig::new().init(&dev_logits.device());
            let dev_loss = loss_fn.forward(dev_logits.clone(), y_dev.clone()).into_scalar();
            let dev_acc = dev_logits.argmax(1).flatten::<1>(0, 1).equal(y_dev.clone()).int().sum().into_scalar() as f64 / 5000.0;
            println!(
                "Epoch {:2}/{} {:.1}s loss={:.4} accuracy={:.4} dev:loss={:.4} dev:accuracy={:.4}",
                epoch, args_epochs, start.elapsed().as_secs_f32(),
                train_loss / (train_size as f64 / args_batch_size as f64),
                train_correct as f64 / train_size as f64,
                dev_loss, dev_acc
            );
        }
    }
 }
 #[derive(Debug, Clone, Copy, Module, Default)]
 pub enum Activation {
    #[default]
    None,
    ReLU,
    Tanh,
    Sigmoid,
 }
 impl FromStr for Activation {
    type Err = String;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        match s {
            "none" => Ok(Activation::None),
            "relu" => Ok(Activation::ReLU),
            "tanh" => Ok(Activation::Tanh),
            "sigmoid" => Ok(Activation::Sigmoid),
            _ => Err(format!("Unknown activation: {}", s)),
        }
    }
 }
 impl Activation {
    pub fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
        match self {
            Activation::None => x,
            Activation::ReLU => activation::relu(x),
            Activation::Tanh => activation::tanh(x),
            Activation::Sigmoid => activation::sigmoid(x),
        }
    }
 }
--- a/hod_1/src/main.rs
+++ b/hod_1/src/main.rs
@@ -1,86 +1,133 @@
 use burn::tensor::backend::Backend;
 use burn::tensor::Tensor;
 use clap::Parser;
-use hod_1::{covariance, explained_variance, power_iteration, total_variance, B};
+use hod_1::B;
 use std::fs::File;
 use std::io::Read;
 use std::str::FromStr;
 use hod_1::*;
 use burn::optim::AdamConfig;
 use burn::optim::Optimizer;
 use burn::nn::loss::CrossEntropyLossConfig;
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
-    #[arg(long = "examples", default_value = "1024")]
+    #[arg(long = "activation", default_value = "none")]
-    examples: usize,
+    activation: String,
-    #[arg(long = "iterations", default_value = "64")]
+
-    iterations: usize,
+    #[arg(long = "batch_size", default_value = "50")]
    batch_size: usize,
    #[arg(long = "epochs", default_value = "10")]
    epochs: usize,
    #[arg(long = "hidden_layer_size", default_value = "100")]
    hidden_layer_size: usize,
    #[arg(long = "hidden_layers", default_value = "1")]
    hidden_layers: usize,
    #[arg(long = "seed", default_value = "42")]
    seed: u64,
    #[arg(long = "threads", default_value = "1")]
    threads: usize,
 }
-fn load_mnist(examples: usize, device: &<B as Backend>::Device) -> Tensor<B, 2> {
+/// Load MNIST images and labels for training.
 /// Returns (images [N, 784], labels [N]) where labels are class indices 0-9.
 fn load_mnist_labeled(
    examples: usize,
    device: &<B as Backend>::Device,
 ) -> (Tensor<B, 2>, Tensor<B, 1, burn::tensor::Int>) {
    let file = File::open("mnist.npz").expect("Cannot open mnist.npz");
    let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
-    // Print all available array names so you can see what's inside
+    // Load images
-    eprintln!("Arrays in mnist.npz:");
+    let image_candidates = [
    for i in 0..archive.len() {
        eprintln!("  {}", archive.by_index(i).unwrap().name());
    }
    // Try the most common key names used for MNIST train images
    let candidates = [
        "train_images.npy",
        "train.images.npy",
        "x_train.npy",
        "images.npy",
    ];
-
+    let mut image_bytes = Vec::new();
-    let mut bytes = Vec::new();
+    let mut found_images = false;
-    let mut found_name = "";
+    for name in &image_candidates {
-    for name in &candidates {
+        if let Ok(mut entry) = archive.by_name(name) {
-        if archive.by_name(name).is_ok() {
+            entry.read_to_end(&mut image_bytes).expect("Failed to read images");
-            archive
+            found_images = true;
                .by_name(name)
                .unwrap()
                .read_to_end(&mut bytes)
                .expect("Failed to read entry");
            found_name = name;
            break;
        }
    }
-    assert!(!bytes.is_empty(), "Could not find train images — check the printed names above and update candidates[]");
+    assert!(found_images, "Could not find train images in mnist.npz");
    eprintln!("Loaded from: {found_name}");
-    // Parse the .npy header to get the shape
+    // Load labels
-    let npy = npyz::NpyFile::new(&bytes[..]).expect("Cannot parse npy");
+    let label_candidates = [
-    let shape = npy.shape().to_vec();
+        "train_labels.npy",
-    eprintln!("Raw array shape: {shape:?}");
+        "train.labels.npy",
        "y_train.npy",
        "labels.npy",
    ];
    let mut label_bytes = Vec::new();
    let mut found_labels = false;
    for name in &label_candidates {
        if let Ok(mut entry) = archive.by_name(name) {
            entry.read_to_end(&mut label_bytes).expect("Failed to read labels");
            found_labels = true;
            break;
        }
    }
    assert!(found_labels, "Could not find train labels in mnist.npz");
-    // MNIST is stored as uint8 (0–255); we normalise to [0.0, 1.0]
+    // Parse images
-    let raw: Vec<u8> = npy.into_vec().expect("Failed to read as u8 — dtype mismatch?");
+    let image_npy = npyz::NpyFile::new(&image_bytes[..]).expect("Cannot parse images npy");
    let image_shape = image_npy.shape().to_vec();
    let image_raw: Vec<u8> = image_npy.into_vec().expect("Failed to read images as u8");
    let n = examples.min(image_shape[0] as usize);
    let pixels = image_raw.len() / image_shape[0] as usize;
-    let n = examples.min(shape[0] as usize);
+    let image_data: Vec<f64> = image_raw[..n * pixels]
    let pixels = raw.len() / shape[0] as usize; // 784 = 1*28*28, regardless of how axes are ordered
    let data: Vec<f64> = raw[..n * pixels]
        .iter()
        .map(|&p| p as f64 / 255.0)
        .collect();
-    eprintln!("Loaded {n} examples, {pixels} pixels each");
+    let image_tensor_data = burn::tensor::TensorData::new(image_data, [n, pixels]);
    let images = Tensor::<B, 2>::from_data(image_tensor_data, device);
-    let tensor_data = burn::tensor::TensorData::new(data, [n, pixels]);
+    // Parse labels
-    Tensor::<B, 2>::from_data(tensor_data, device)
+    let label_npy = npyz::NpyFile::new(&label_bytes[..]).expect("Cannot parse labels npy");
    let label_raw: Vec<u8> = label_npy.into_vec().expect("Failed to read labels as u8");
    let label_data: Vec<i64> = label_raw[..n]
        .iter()
        .map(|&p| p as i64)
        .collect();
    let label_tensor_data = burn::tensor::TensorData::new(label_data, [n]);
    let labels = Tensor::<B, 1, burn::tensor::Int>::from_data(label_tensor_data, device);
    (images, labels)
 }
 fn main() {
    let args = Args::parse();
-    let device = <B as Backend>::Device::default();
+    let device = burn_ndarray::NdArrayDevice::Cpu;
    let activation = Activation::from_str(&args.activation).unwrap_or_default();
-    let x = load_mnist(args.examples, &device);
+    let mut model = MnistClassifier::<B>::new(
        &device,
        args.hidden_layers,
        args.hidden_layer_size,
        activation,
    );
-    let cov = covariance(x.clone());
+    let mut optim = AdamConfig::new().init::<B, MnistClassifier<B>>();
-    let total_var = total_variance(x.clone());
+    let (images, labels) = load_mnist_labeled(60000, &device);
    let (_pc, s) = power_iteration(cov, args.iterations);
    let ev = explained_variance(total_var, s);
-    println!("Total variance: {:.2}", total_var);
+    println!("Starting training...");
-    println!("Explained variance: {:.2}%", 100.0 * ev);
+
    // Main just tells the model to run the process
    model.train_and_evaluate(images, labels, &mut optim, args.epochs, args.batch_size);
 }