cvicenie 3 hotove

gitignore before pushing to git
working pca_first
2026-03-12 22:06:29 +01:00 · 2026-03-12 15:29:01 +01:00 · 2026-03-12 15:20:07 +01:00
5 changed files with 2853 additions and 103 deletions
--- a/hod_1/.gitignore
+++ b/hod_1/.gitignore
@@ -1 +1,2 @@
 target/
+mnist.npz
--- a/hod_1/Cargo.lock
+++ b/hod_1/Cargo.lock
--- a/hod_1/Cargo.toml
+++ b/hod_1/Cargo.toml
@@ -4,7 +4,10 @@ version = "0.1.0"
 edition = "2024"

 [dependencies]
-burn = { version = "0.20.1", default-features = false, features = ["ndarray"] }
+burn = { version = "0.20.1", default-features = false, features = ["ndarray", "train"] }
+burn-autodiff = "0.20.1"
 burn-ndarray = "0.20.1"
 clap = { version = "4.5.60", features = ["derive"] }
 ndarray = "0.17.2"
+npyz = { version = "0.8.4", features = ["npz"] }
+zip = { version = "8.2.0", features = ["deflate"] }
--- a/hod_1/src/lib.rs
+++ b/hod_1/src/lib.rs
@@ -1,63 +1,177 @@
 use burn::tensor::Tensor;
-use std::collections::HashMap;
-use std::fs::File;
-use std::io::{BufRead, BufReader};
+use burn::optim::Optimizer;
+use burn::nn::loss::CrossEntropyLossConfig;
+use burn::tensor::Int;
+use burn::tensor::backend::AutodiffBackend;
+use burn::tensor::backend::Backend;
+use burn::optim::GradientsParams;
+use burn::tensor::activation;
+use std::str::FromStr;
+use burn::module::Module;
+use burn::nn::{Linear, LinearConfig};

-pub type B = burn_ndarray::NdArray<f64>;
+pub type B = burn_autodiff::Autodiff<burn_ndarray::NdArray<f64>>;

-// Funkcia na načítanie a zarovnanie dát
-pub fn load_and_align(data_path: &str, model_path: &str) -> (Vec<f64>, Vec<f64>) {
-    let mut counts = HashMap::new();
-    let mut total_count = 0.0;
-
-    let file_p = File::open(data_path).expect("Nepodarilo sa otvoriť dáta");
-    for line in BufReader::new(file_p).lines() {
-        let point = line.unwrap().trim().to_string();
-        if point.is_empty() { continue; }
-        *counts.entry(point).or_insert(0.0) += 1.0;
-        total_count += 1.0;
-    }
-
-    let mut model_map = HashMap::new();
-    let file_q = File::open(model_path).expect("Nepodarilo sa otvoriť model");
-    for line in BufReader::new(file_q).lines() {
-        let l = line.unwrap();
-        let parts: Vec<&str> = l.split('\t').collect();
-        if parts.len() >= 2 {
-            model_map.insert(parts[0].to_string(), parts[1].parse::<f64>().unwrap());
-        }
-    }
-
-    let mut p_vals = Vec::new();
-    let mut q_vals = Vec::new();
-    for (point, count) in counts.iter() {
-        p_vals.push(count / total_count);
-        q_vals.push(*model_map.get(point).unwrap_or(&0.0));
-    }
-    (p_vals, q_vals)
+#[derive(Module, Debug)]
+pub struct MnistClassifier<B: Backend> {
+    hidden: Vec<Linear<B>>,
+    output: Linear<B>,
+    activation: Activation,
 }

-pub fn entropy(p: Tensor<B, 1>) -> f64 {
-    let zero_mask = p.clone().equal_elem(0.0);
-    let p_safe = p.clone().mask_fill(zero_mask, 1.0);
-    let terms = p * p_safe.log();
-    -terms.sum().into_scalar()
-}
+impl<B: Backend<FloatElem = f64, IntElem = i64>> MnistClassifier<B> {
+    pub fn new(
+        device: &B::Device,
+        hidden_layers: usize,
+        hidden_layer_size: usize,
+        activation: Activation,
+    ) -> Self {
+        let mut hidden = Vec::new();
+        let mut current_input_size = 784;
+        if hidden_layers > 0 {
+            hidden.push(LinearConfig::new(current_input_size, hidden_layer_size).init(device));
+            current_input_size = hidden_layer_size;

-pub fn cross_entropy(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
-    let zero_mask_q = q.clone().equal_elem(0.0);
-    let p_exists = p.clone().greater_elem(0.0);
-    if p_exists.bool_and(zero_mask_q.clone()).any().into_scalar() {
-        return f64::INFINITY;
+            for _ in 1..hidden_layers {
+                hidden.push(LinearConfig::new(hidden_layer_size, hidden_layer_size).init(device));
+            }
+        }
+
+        let output = LinearConfig::new(current_input_size, 10).init(device);
+
+        Self { hidden, output, activation }
+    }
+
+    pub fn forward(&self, images: Tensor<B, 2>) -> Tensor<B, 2> {
+        let mut result = images;
+        for layer in &self.hidden {
+            result = layer.forward(result);
+            result = self.activation.forward(result);
+        }
+        self.output.forward(result)
+    }
+
+    pub fn train_step(
+        &self,
+        images: Tensor<B, 2>,
+        labels: Tensor<B, 1, Int>,
+        optimizer: &mut impl Optimizer<Self, B>,
+        lr: f64
+    ) -> (Self, f64, usize) where B: AutodiffBackend {
+        // Forward pass
+        let logits = self.forward(images);
+
+        // Loss calculation
+        let loss_fn = CrossEntropyLossConfig::new().init(&logits.device());
+        let loss = loss_fn.forward(logits.clone(), labels.clone());
+
+        // Accuracy
+        let correct = logits.argmax(1)
+            .flatten::<1>(0, 1)
+            .equal(labels)
+            .int()
+            .sum()
+            .into_scalar() as usize;
+
+        let loss_val = loss.clone().into_scalar();
+
+        // Backprop
+        let grads = loss.backward();
+        let grads = GradientsParams::from_grads(grads, self);
+        let updated_model = optimizer.step(lr, self.clone(), grads);
+
+        (updated_model, loss_val, correct)
+    }
+
+    pub fn train_and_evaluate(
+        &mut self,
+        images: Tensor<B, 2>,
+        labels: Tensor<B, 1, Int>,
+        optimizer: &mut impl Optimizer<Self, B>,
+        args_epochs: usize,
+        args_batch_size: usize,
+    ) where B: AutodiffBackend {
+        eprintln!("images shape: {:?}", images.shape());
+        eprintln!("labels shape: {:?}", labels.shape());
+
+        let train_size = 50000;
+        let x_train = images.clone().slice([0..train_size]);
+        let y_train = labels.clone().slice([0..train_size]);
+        let x_dev = images.slice([train_size..55000]);
+        let y_dev = labels.slice([train_size..55000]);
+
+        let target_epochs = [1, 5, 10];
+        for epoch in target_epochs {
+            let start = std::time::Instant::now();
+            let mut train_loss = 0.0;
+            let mut train_correct = 0;
+
+            for i in (0..train_size).step_by(args_batch_size) {
+                let end = (i + args_batch_size).min(train_size);
+                if i >= end { continue; }
+
+                let b_x = x_train.clone().slice([i..end]);
+                let b_y = y_train.clone().slice([i..end]);
+
+                if i == 0 {
+                    eprintln!("first batch shape: {:?}", b_x.shape());
+                    eprintln!("output layer: input={:?} output=10", self.output.weight.shape());
+                }
+
+                let (updated_model, loss_val, correct) = self.train_step(b_x, b_y, optimizer, 1e-3);
+                *self = updated_model;
+
+                train_loss += loss_val;
+                train_correct += correct;
+            }
+
+            // Dev metrics
+            let dev_logits = self.forward(x_dev.clone());
+            let loss_fn = CrossEntropyLossConfig::new().init(&dev_logits.device());
+            let dev_loss = loss_fn.forward(dev_logits.clone(), y_dev.clone()).into_scalar();
+            let dev_acc = dev_logits.argmax(1).flatten::<1>(0, 1).equal(y_dev.clone()).int().sum().into_scalar() as f64 / 5000.0;
+
+            println!(
+                "Epoch {:2}/{} {:.1}s loss={:.4} accuracy={:.4} dev:loss={:.4} dev:accuracy={:.4}",
+                epoch, args_epochs, start.elapsed().as_secs_f32(),
+                train_loss / (train_size as f64 / args_batch_size as f64),
+                train_correct as f64 / train_size as f64,
+                dev_loss, dev_acc
+            );
+        }
    }
-    let q_safe = q.mask_fill(zero_mask_q, 1.0);
-    let terms = p * q_safe.log();
-    -terms.sum().into_scalar()
 }

-pub fn kl_div2(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
-    let ce = cross_entropy(p.clone(), q);
-    let e = entropy(p);
-    let result = ce - e;
-    if result < 0.0 { 0.0 } else { result }
+
+#[derive(Debug, Clone, Copy, Module, Default)]
+pub enum Activation {
+    #[default]
+    None,
+    ReLU,
+    Tanh,
+    Sigmoid,
+}
+
+impl FromStr for Activation {
+    type Err = String;
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s {
+            "none" => Ok(Activation::None),
+            "relu" => Ok(Activation::ReLU),
+            "tanh" => Ok(Activation::Tanh),
+            "sigmoid" => Ok(Activation::Sigmoid),
+            _ => Err(format!("Unknown activation: {}", s)),
+        }
+    }
+}
+
+impl Activation {
+    pub fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
+        match self {
+            Activation::None => x,
+            Activation::ReLU => activation::relu(x),
+            Activation::Tanh => activation::tanh(x),
+            Activation::Sigmoid => activation::sigmoid(x),
+        }
+    }
 }
--- a/hod_1/src/main.rs
+++ b/hod_1/src/main.rs
@@ -1,30 +1,133 @@
-use burn::tensor::{backend::Backend, Tensor};
+use burn::tensor::backend::Backend;
+use burn::tensor::Tensor;
 use clap::Parser;
-// Nahraď 'hod_1' názvom tvojho projektu v Cargo.toml
-use hod_1::{load_and_align, entropy, cross_entropy, kl_div2, B};
+use hod_1::B;
+use std::fs::File;
+use std::io::Read;
+use std::str::FromStr;
+use hod_1::*;
+
+use burn::optim::AdamConfig;
+use burn::optim::Optimizer;
+use burn::nn::loss::CrossEntropyLossConfig;

 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
-    #[arg(long = "data_path")]
-    data_path: String,
+    #[arg(long = "activation", default_value = "none")]
+    activation: String,

-    #[arg(long = "model_path")]
-    model_path: String,
+    #[arg(long = "batch_size", default_value = "50")]
+    batch_size: usize,
+
+    #[arg(long = "epochs", default_value = "10")]
+    epochs: usize,
+
+    #[arg(long = "hidden_layer_size", default_value = "100")]
+    hidden_layer_size: usize,
+
+    #[arg(long = "hidden_layers", default_value = "1")]
+    hidden_layers: usize,
+
+    #[arg(long = "seed", default_value = "42")]
+    seed: u64,
+
+    #[arg(long = "threads", default_value = "1")]
+    threads: usize,
+}
+
+/// Load MNIST images and labels for training.
+/// Returns (images [N, 784], labels [N]) where labels are class indices 0-9.
+fn load_mnist_labeled(
+    examples: usize,
+    device: &<B as Backend>::Device,
+) -> (Tensor<B, 2>, Tensor<B, 1, burn::tensor::Int>) {
+    let file = File::open("mnist.npz").expect("Cannot open mnist.npz");
+    let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
+
+    // Load images
+    let image_candidates = [
+        "train_images.npy",
+        "train.images.npy",
+        "x_train.npy",
+        "images.npy",
+    ];
+    let mut image_bytes = Vec::new();
+    let mut found_images = false;
+    for name in &image_candidates {
+        if let Ok(mut entry) = archive.by_name(name) {
+            entry.read_to_end(&mut image_bytes).expect("Failed to read images");
+            found_images = true;
+            break;
+        }
+    }
+    assert!(found_images, "Could not find train images in mnist.npz");
+
+    // Load labels
+    let label_candidates = [
+        "train_labels.npy",
+        "train.labels.npy",
+        "y_train.npy",
+        "labels.npy",
+    ];
+    let mut label_bytes = Vec::new();
+    let mut found_labels = false;
+    for name in &label_candidates {
+        if let Ok(mut entry) = archive.by_name(name) {
+            entry.read_to_end(&mut label_bytes).expect("Failed to read labels");
+            found_labels = true;
+            break;
+        }
+    }
+    assert!(found_labels, "Could not find train labels in mnist.npz");
+
+    // Parse images
+    let image_npy = npyz::NpyFile::new(&image_bytes[..]).expect("Cannot parse images npy");
+    let image_shape = image_npy.shape().to_vec();
+    let image_raw: Vec<u8> = image_npy.into_vec().expect("Failed to read images as u8");
+    let n = examples.min(image_shape[0] as usize);
+    let pixels = image_raw.len() / image_shape[0] as usize;
+
+    let image_data: Vec<f64> = image_raw[..n * pixels]
+        .iter()
+        .map(|&p| p as f64 / 255.0)
+        .collect();
+
+    let image_tensor_data = burn::tensor::TensorData::new(image_data, [n, pixels]);
+    let images = Tensor::<B, 2>::from_data(image_tensor_data, device);
+
+    // Parse labels
+    let label_npy = npyz::NpyFile::new(&label_bytes[..]).expect("Cannot parse labels npy");
+    let label_raw: Vec<u8> = label_npy.into_vec().expect("Failed to read labels as u8");
+    
+    let label_data: Vec<i64> = label_raw[..n]
+        .iter()
+        .map(|&p| p as i64)
+        .collect();
+
+    let label_tensor_data = burn::tensor::TensorData::new(label_data, [n]);
+    let labels = Tensor::<B, 1, burn::tensor::Int>::from_data(label_tensor_data, device);
+
+    (images, labels)
 }

 fn main() {
    let args = Args::parse();
-    let device = <B as Backend>::Device::default();
+    let device = burn_ndarray::NdArrayDevice::Cpu;
+    let activation = Activation::from_str(&args.activation).unwrap_or_default();

-    // Použijeme funkciu z lib.rs
-    let (p_vec, q_vec) = load_and_align(&args.data_path, &args.model_path);
+    let mut model = MnistClassifier::<B>::new(
+        &device,
+        args.hidden_layers,
+        args.hidden_layer_size,
+        activation,
+    );

-    let p = Tensor::<B, 1>::from_data(p_vec.as_slice(), &device);
-    let q = Tensor::<B, 1>::from_data(q_vec.as_slice(), &device);
+    let mut optim = AdamConfig::new().init::<B, MnistClassifier<B>>();
+    let (images, labels) = load_mnist_labeled(60000, &device);

-    // Výpočty
-    println!("{}", entropy(p.clone()));
-    println!("{}", cross_entropy(p.clone(), q.clone()));
-    println!("{}", kl_div2(p, q));
+    println!("Starting training...");
+
+    // Main just tells the model to run the process
+    model.train_and_evaluate(images, labels, &mut optim, args.epochs, args.batch_size);
 }
Author	SHA1	Message	Date
Priec	f6b9d79062	cvicenie 3 hotove	2026-03-12 22:06:29 +01:00
Priec	b0778cfe69	gitignore before pushing to git	2026-03-12 15:29:01 +01:00
Priec	2174d4e506	working pca_first	2026-03-12 15:20:07 +01:00