cvicenie 3 hotove
This commit is contained in:
1874
hod_1/Cargo.lock
generated
1874
hod_1/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,8 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
burn = { version = "0.20.1", default-features = false, features = ["ndarray"] }
|
burn = { version = "0.20.1", default-features = false, features = ["ndarray", "train"] }
|
||||||
|
burn-autodiff = "0.20.1"
|
||||||
burn-ndarray = "0.20.1"
|
burn-ndarray = "0.20.1"
|
||||||
clap = { version = "4.5.60", features = ["derive"] }
|
clap = { version = "4.5.60", features = ["derive"] }
|
||||||
ndarray = "0.17.2"
|
ndarray = "0.17.2"
|
||||||
|
|||||||
210
hod_1/src/lib.rs
210
hod_1/src/lib.rs
@@ -1,57 +1,177 @@
|
|||||||
use burn::tensor::Tensor;
|
use burn::tensor::Tensor;
|
||||||
use burn::tensor::linalg::diag;
|
use burn::optim::Optimizer;
|
||||||
use burn::tensor::Shape;
|
use burn::nn::loss::CrossEntropyLossConfig;
|
||||||
|
use burn::tensor::Int;
|
||||||
|
use burn::tensor::backend::AutodiffBackend;
|
||||||
|
use burn::tensor::backend::Backend;
|
||||||
|
use burn::optim::GradientsParams;
|
||||||
|
use burn::tensor::activation;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use burn::module::Module;
|
||||||
|
use burn::nn::{Linear, LinearConfig};
|
||||||
|
|
||||||
pub type B = burn_ndarray::NdArray<f64>;
|
pub type B = burn_autodiff::Autodiff<burn_ndarray::NdArray<f64>>;
|
||||||
|
|
||||||
fn l2_norm(v: Tensor<B, 1>) -> f64 {
|
#[derive(Module, Debug)]
|
||||||
v.clone()
|
pub struct MnistClassifier<B: Backend> {
|
||||||
.mul(v) // element-wise: v_i * v_i
|
hidden: Vec<Linear<B>>,
|
||||||
.sum() // suma všetkých v_i^2
|
output: Linear<B>,
|
||||||
.sqrt() // odmocnina
|
activation: Activation,
|
||||||
.into_scalar() // na f32
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Input: [N, 784], Output: [N, 784]
|
impl<B: Backend<FloatElem = f64, IntElem = i64>> MnistClassifier<B> {
|
||||||
pub fn center(x: Tensor<B, 2>) -> Tensor<B, 2> {
|
pub fn new(
|
||||||
let mean = x.clone().mean_dim(0);
|
device: &B::Device,
|
||||||
x.sub(mean)
|
hidden_layers: usize,
|
||||||
}
|
hidden_layer_size: usize,
|
||||||
|
activation: Activation,
|
||||||
|
) -> Self {
|
||||||
|
let mut hidden = Vec::new();
|
||||||
|
let mut current_input_size = 784;
|
||||||
|
if hidden_layers > 0 {
|
||||||
|
hidden.push(LinearConfig::new(current_input_size, hidden_layer_size).init(device));
|
||||||
|
current_input_size = hidden_layer_size;
|
||||||
|
|
||||||
/// Input: [N, 784], Output: [784, 784]
|
for _ in 1..hidden_layers {
|
||||||
pub fn covariance(x: Tensor<B, 2>) -> Tensor<B, 2> {
|
hidden.push(LinearConfig::new(hidden_layer_size, hidden_layer_size).init(device));
|
||||||
let cen = center(x);
|
}
|
||||||
let transpose = cen.clone().transpose();
|
}
|
||||||
let n = cen.dims()[0] as f64;
|
|
||||||
let mul = transpose.matmul(cen);
|
|
||||||
mul.div_scalar(n - 1.0)
|
|
||||||
|
|
||||||
}
|
let output = LinearConfig::new(current_input_size, 10).init(device);
|
||||||
|
|
||||||
pub fn total_variance(x: Tensor<B, 2>) -> f64 {
|
Self { hidden, output, activation }
|
||||||
let cov: Tensor<B, 2> = covariance(x);
|
}
|
||||||
let diag: Tensor<B, 1> = diag(cov);
|
|
||||||
let sum = diag.sum().into_scalar();
|
pub fn forward(&self, images: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||||
sum
|
let mut result = images;
|
||||||
}
|
for layer in &self.hidden {
|
||||||
|
result = layer.forward(result);
|
||||||
/// Input: [784, 784], scalar, Output: [784]
|
result = self.activation.forward(result);
|
||||||
pub fn power_iteration(cov: Tensor<B, 2>, iterations: usize) -> (Tensor<B, 1>, f64) {
|
}
|
||||||
let n = cov.dims()[0];
|
self.output.forward(result)
|
||||||
let device = cov.device();
|
}
|
||||||
let mut v: Tensor<B, 1> = Tensor::ones(Shape::new([n]), &device);
|
|
||||||
let mut s: f64 = 0.0;
|
pub fn train_step(
|
||||||
|
&self,
|
||||||
for _ in 0..iterations {
|
images: Tensor<B, 2>,
|
||||||
let v_new_2d = cov.clone().matmul(v.reshape([n, 1]));
|
labels: Tensor<B, 1, Int>,
|
||||||
let v_new = v_new_2d.squeeze::<1>();
|
optimizer: &mut impl Optimizer<Self, B>,
|
||||||
s = l2_norm(v_new.clone());
|
lr: f64
|
||||||
v = v_new.div_scalar(s);
|
) -> (Self, f64, usize) where B: AutodiffBackend {
|
||||||
|
// Forward pass
|
||||||
|
let logits = self.forward(images);
|
||||||
|
|
||||||
|
// Loss calculation
|
||||||
|
let loss_fn = CrossEntropyLossConfig::new().init(&logits.device());
|
||||||
|
let loss = loss_fn.forward(logits.clone(), labels.clone());
|
||||||
|
|
||||||
|
// Accuracy
|
||||||
|
let correct = logits.argmax(1)
|
||||||
|
.flatten::<1>(0, 1)
|
||||||
|
.equal(labels)
|
||||||
|
.int()
|
||||||
|
.sum()
|
||||||
|
.into_scalar() as usize;
|
||||||
|
|
||||||
|
let loss_val = loss.clone().into_scalar();
|
||||||
|
|
||||||
|
// Backprop
|
||||||
|
let grads = loss.backward();
|
||||||
|
let grads = GradientsParams::from_grads(grads, self);
|
||||||
|
let updated_model = optimizer.step(lr, self.clone(), grads);
|
||||||
|
|
||||||
|
(updated_model, loss_val, correct)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn train_and_evaluate(
|
||||||
|
&mut self,
|
||||||
|
images: Tensor<B, 2>,
|
||||||
|
labels: Tensor<B, 1, Int>,
|
||||||
|
optimizer: &mut impl Optimizer<Self, B>,
|
||||||
|
args_epochs: usize,
|
||||||
|
args_batch_size: usize,
|
||||||
|
) where B: AutodiffBackend {
|
||||||
|
eprintln!("images shape: {:?}", images.shape());
|
||||||
|
eprintln!("labels shape: {:?}", labels.shape());
|
||||||
|
|
||||||
|
let train_size = 50000;
|
||||||
|
let x_train = images.clone().slice([0..train_size]);
|
||||||
|
let y_train = labels.clone().slice([0..train_size]);
|
||||||
|
let x_dev = images.slice([train_size..55000]);
|
||||||
|
let y_dev = labels.slice([train_size..55000]);
|
||||||
|
|
||||||
|
let target_epochs = [1, 5, 10];
|
||||||
|
for epoch in target_epochs {
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
let mut train_loss = 0.0;
|
||||||
|
let mut train_correct = 0;
|
||||||
|
|
||||||
|
for i in (0..train_size).step_by(args_batch_size) {
|
||||||
|
let end = (i + args_batch_size).min(train_size);
|
||||||
|
if i >= end { continue; }
|
||||||
|
|
||||||
|
let b_x = x_train.clone().slice([i..end]);
|
||||||
|
let b_y = y_train.clone().slice([i..end]);
|
||||||
|
|
||||||
|
if i == 0 {
|
||||||
|
eprintln!("first batch shape: {:?}", b_x.shape());
|
||||||
|
eprintln!("output layer: input={:?} output=10", self.output.weight.shape());
|
||||||
|
}
|
||||||
|
|
||||||
|
let (updated_model, loss_val, correct) = self.train_step(b_x, b_y, optimizer, 1e-3);
|
||||||
|
*self = updated_model;
|
||||||
|
|
||||||
|
train_loss += loss_val;
|
||||||
|
train_correct += correct;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dev metrics
|
||||||
|
let dev_logits = self.forward(x_dev.clone());
|
||||||
|
let loss_fn = CrossEntropyLossConfig::new().init(&dev_logits.device());
|
||||||
|
let dev_loss = loss_fn.forward(dev_logits.clone(), y_dev.clone()).into_scalar();
|
||||||
|
let dev_acc = dev_logits.argmax(1).flatten::<1>(0, 1).equal(y_dev.clone()).int().sum().into_scalar() as f64 / 5000.0;
|
||||||
|
|
||||||
|
println!(
|
||||||
|
"Epoch {:2}/{} {:.1}s loss={:.4} accuracy={:.4} dev:loss={:.4} dev:accuracy={:.4}",
|
||||||
|
epoch, args_epochs, start.elapsed().as_secs_f32(),
|
||||||
|
train_loss / (train_size as f64 / args_batch_size as f64),
|
||||||
|
train_correct as f64 / train_size as f64,
|
||||||
|
dev_loss, dev_acc
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return (v, s);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Input: [784, 784], [784], Output: f32
|
|
||||||
pub fn explained_variance(total_var: f64, s: f64) -> f64 {
|
#[derive(Debug, Clone, Copy, Module, Default)]
|
||||||
s / total_var
|
pub enum Activation {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
ReLU,
|
||||||
|
Tanh,
|
||||||
|
Sigmoid,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Activation {
|
||||||
|
type Err = String;
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"none" => Ok(Activation::None),
|
||||||
|
"relu" => Ok(Activation::ReLU),
|
||||||
|
"tanh" => Ok(Activation::Tanh),
|
||||||
|
"sigmoid" => Ok(Activation::Sigmoid),
|
||||||
|
_ => Err(format!("Unknown activation: {}", s)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Activation {
|
||||||
|
pub fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
|
||||||
|
match self {
|
||||||
|
Activation::None => x,
|
||||||
|
Activation::ReLU => activation::relu(x),
|
||||||
|
Activation::Tanh => activation::tanh(x),
|
||||||
|
Activation::Sigmoid => activation::sigmoid(x),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,86 +1,133 @@
|
|||||||
use burn::tensor::backend::Backend;
|
use burn::tensor::backend::Backend;
|
||||||
use burn::tensor::Tensor;
|
use burn::tensor::Tensor;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use hod_1::{covariance, explained_variance, power_iteration, total_variance, B};
|
use hod_1::B;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Read;
|
use std::io::Read;
|
||||||
|
use std::str::FromStr;
|
||||||
|
use hod_1::*;
|
||||||
|
|
||||||
|
use burn::optim::AdamConfig;
|
||||||
|
use burn::optim::Optimizer;
|
||||||
|
use burn::nn::loss::CrossEntropyLossConfig;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about, long_about = None)]
|
||||||
struct Args {
|
struct Args {
|
||||||
#[arg(long = "examples", default_value = "1024")]
|
#[arg(long = "activation", default_value = "none")]
|
||||||
examples: usize,
|
activation: String,
|
||||||
#[arg(long = "iterations", default_value = "64")]
|
|
||||||
iterations: usize,
|
#[arg(long = "batch_size", default_value = "50")]
|
||||||
|
batch_size: usize,
|
||||||
|
|
||||||
|
#[arg(long = "epochs", default_value = "10")]
|
||||||
|
epochs: usize,
|
||||||
|
|
||||||
|
#[arg(long = "hidden_layer_size", default_value = "100")]
|
||||||
|
hidden_layer_size: usize,
|
||||||
|
|
||||||
|
#[arg(long = "hidden_layers", default_value = "1")]
|
||||||
|
hidden_layers: usize,
|
||||||
|
|
||||||
|
#[arg(long = "seed", default_value = "42")]
|
||||||
|
seed: u64,
|
||||||
|
|
||||||
|
#[arg(long = "threads", default_value = "1")]
|
||||||
|
threads: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_mnist(examples: usize, device: &<B as Backend>::Device) -> Tensor<B, 2> {
|
/// Load MNIST images and labels for training.
|
||||||
|
/// Returns (images [N, 784], labels [N]) where labels are class indices 0-9.
|
||||||
|
fn load_mnist_labeled(
|
||||||
|
examples: usize,
|
||||||
|
device: &<B as Backend>::Device,
|
||||||
|
) -> (Tensor<B, 2>, Tensor<B, 1, burn::tensor::Int>) {
|
||||||
let file = File::open("mnist.npz").expect("Cannot open mnist.npz");
|
let file = File::open("mnist.npz").expect("Cannot open mnist.npz");
|
||||||
let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
|
let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
|
||||||
|
|
||||||
// Print all available array names so you can see what's inside
|
// Load images
|
||||||
eprintln!("Arrays in mnist.npz:");
|
let image_candidates = [
|
||||||
for i in 0..archive.len() {
|
|
||||||
eprintln!(" {}", archive.by_index(i).unwrap().name());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try the most common key names used for MNIST train images
|
|
||||||
let candidates = [
|
|
||||||
"train_images.npy",
|
"train_images.npy",
|
||||||
"train.images.npy",
|
"train.images.npy",
|
||||||
"x_train.npy",
|
"x_train.npy",
|
||||||
"images.npy",
|
"images.npy",
|
||||||
];
|
];
|
||||||
|
let mut image_bytes = Vec::new();
|
||||||
let mut bytes = Vec::new();
|
let mut found_images = false;
|
||||||
let mut found_name = "";
|
for name in &image_candidates {
|
||||||
for name in &candidates {
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
if archive.by_name(name).is_ok() {
|
entry.read_to_end(&mut image_bytes).expect("Failed to read images");
|
||||||
archive
|
found_images = true;
|
||||||
.by_name(name)
|
|
||||||
.unwrap()
|
|
||||||
.read_to_end(&mut bytes)
|
|
||||||
.expect("Failed to read entry");
|
|
||||||
found_name = name;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert!(!bytes.is_empty(), "Could not find train images — check the printed names above and update candidates[]");
|
assert!(found_images, "Could not find train images in mnist.npz");
|
||||||
eprintln!("Loaded from: {found_name}");
|
|
||||||
|
|
||||||
// Parse the .npy header to get the shape
|
// Load labels
|
||||||
let npy = npyz::NpyFile::new(&bytes[..]).expect("Cannot parse npy");
|
let label_candidates = [
|
||||||
let shape = npy.shape().to_vec();
|
"train_labels.npy",
|
||||||
eprintln!("Raw array shape: {shape:?}");
|
"train.labels.npy",
|
||||||
|
"y_train.npy",
|
||||||
|
"labels.npy",
|
||||||
|
];
|
||||||
|
let mut label_bytes = Vec::new();
|
||||||
|
let mut found_labels = false;
|
||||||
|
for name in &label_candidates {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut label_bytes).expect("Failed to read labels");
|
||||||
|
found_labels = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(found_labels, "Could not find train labels in mnist.npz");
|
||||||
|
|
||||||
// MNIST is stored as uint8 (0–255); we normalise to [0.0, 1.0]
|
// Parse images
|
||||||
let raw: Vec<u8> = npy.into_vec().expect("Failed to read as u8 — dtype mismatch?");
|
let image_npy = npyz::NpyFile::new(&image_bytes[..]).expect("Cannot parse images npy");
|
||||||
|
let image_shape = image_npy.shape().to_vec();
|
||||||
|
let image_raw: Vec<u8> = image_npy.into_vec().expect("Failed to read images as u8");
|
||||||
|
let n = examples.min(image_shape[0] as usize);
|
||||||
|
let pixels = image_raw.len() / image_shape[0] as usize;
|
||||||
|
|
||||||
let n = examples.min(shape[0] as usize);
|
let image_data: Vec<f64> = image_raw[..n * pixels]
|
||||||
let pixels = raw.len() / shape[0] as usize; // 784 = 1*28*28, regardless of how axes are ordered
|
|
||||||
|
|
||||||
let data: Vec<f64> = raw[..n * pixels]
|
|
||||||
.iter()
|
.iter()
|
||||||
.map(|&p| p as f64 / 255.0)
|
.map(|&p| p as f64 / 255.0)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
eprintln!("Loaded {n} examples, {pixels} pixels each");
|
let image_tensor_data = burn::tensor::TensorData::new(image_data, [n, pixels]);
|
||||||
|
let images = Tensor::<B, 2>::from_data(image_tensor_data, device);
|
||||||
|
|
||||||
let tensor_data = burn::tensor::TensorData::new(data, [n, pixels]);
|
// Parse labels
|
||||||
Tensor::<B, 2>::from_data(tensor_data, device)
|
let label_npy = npyz::NpyFile::new(&label_bytes[..]).expect("Cannot parse labels npy");
|
||||||
|
let label_raw: Vec<u8> = label_npy.into_vec().expect("Failed to read labels as u8");
|
||||||
|
|
||||||
|
let label_data: Vec<i64> = label_raw[..n]
|
||||||
|
.iter()
|
||||||
|
.map(|&p| p as i64)
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
let label_tensor_data = burn::tensor::TensorData::new(label_data, [n]);
|
||||||
|
let labels = Tensor::<B, 1, burn::tensor::Int>::from_data(label_tensor_data, device);
|
||||||
|
|
||||||
|
(images, labels)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
let device = <B as Backend>::Device::default();
|
let device = burn_ndarray::NdArrayDevice::Cpu;
|
||||||
|
let activation = Activation::from_str(&args.activation).unwrap_or_default();
|
||||||
|
|
||||||
let x = load_mnist(args.examples, &device);
|
let mut model = MnistClassifier::<B>::new(
|
||||||
|
&device,
|
||||||
|
args.hidden_layers,
|
||||||
|
args.hidden_layer_size,
|
||||||
|
activation,
|
||||||
|
);
|
||||||
|
|
||||||
let cov = covariance(x.clone());
|
let mut optim = AdamConfig::new().init::<B, MnistClassifier<B>>();
|
||||||
let total_var = total_variance(x.clone());
|
let (images, labels) = load_mnist_labeled(60000, &device);
|
||||||
let (_pc, s) = power_iteration(cov, args.iterations);
|
|
||||||
let ev = explained_variance(total_var, s);
|
|
||||||
|
|
||||||
println!("Total variance: {:.2}", total_var);
|
println!("Starting training...");
|
||||||
println!("Explained variance: {:.2}%", 100.0 * ev);
|
|
||||||
|
// Main just tells the model to run the process
|
||||||
|
model.train_and_evaluate(images, labels, &mut optim, args.epochs, args.batch_size);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user