Compare commits
6 Commits
hod_1/nump
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dabe8fe673 | ||
|
|
b86b3334d6 | ||
|
|
8fc8addcac | ||
|
|
f6b9d79062 | ||
|
|
b0778cfe69 | ||
|
|
2174d4e506 |
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
*/target/
|
||||||
|
*/mnist.npz
|
||||||
1236
burn_tutorial/ch1.md
Normal file
1236
burn_tutorial/ch1.md
Normal file
File diff suppressed because it is too large
Load Diff
1
hod_1/.gitignore
vendored
1
hod_1/.gitignore
vendored
@@ -1 +1,2 @@
|
|||||||
target/
|
target/
|
||||||
|
mnist.npz
|
||||||
|
|||||||
3149
hod_1/Cargo.lock
generated
3149
hod_1/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,7 +4,11 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
burn = { version = "0.20.1", default-features = false, features = ["ndarray"] }
|
burn = { version = "0.20.1", default-features = false, features = ["ndarray", "std", "train"] }
|
||||||
|
burn-autodiff = "0.20.1"
|
||||||
burn-ndarray = "0.20.1"
|
burn-ndarray = "0.20.1"
|
||||||
clap = { version = "4.5.60", features = ["derive"] }
|
clap = { version = "4.5.60", features = ["derive"] }
|
||||||
ndarray = "0.17.2"
|
ndarray = "0.17.2"
|
||||||
|
npyz = { version = "0.8.4", features = ["npz"] }
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
zip = { version = "8.2.0", features = ["deflate"] }
|
||||||
|
|||||||
307
hod_1/src/lib.rs
307
hod_1/src/lib.rs
@@ -1,63 +1,264 @@
|
|||||||
use burn::tensor::Tensor;
|
// src/lib.rs
|
||||||
use std::collections::HashMap;
|
|
||||||
use std::fs::File;
|
|
||||||
use std::io::{BufRead, BufReader};
|
|
||||||
|
|
||||||
pub type B = burn_ndarray::NdArray<f64>;
|
use burn::config::Config;
|
||||||
|
use burn::data::dataloader::DataLoaderBuilder;
|
||||||
|
use burn::data::dataloader::batcher::Batcher;
|
||||||
|
use burn::data::dataset::Dataset;
|
||||||
|
use burn::module::Module;
|
||||||
|
use burn::nn::loss::CrossEntropyLossConfig;
|
||||||
|
use burn::nn::{Linear, LinearConfig};
|
||||||
|
use burn::optim::AdamConfig;
|
||||||
|
use burn::record::CompactRecorder;
|
||||||
|
use burn::tensor::activation;
|
||||||
|
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||||
|
use burn::tensor::{Int, Tensor};
|
||||||
|
use burn::train::metric::{AccuracyMetric, LossMetric};
|
||||||
|
use burn::lr_scheduler::constant::ConstantLr;
|
||||||
|
use burn::train::{
|
||||||
|
ClassificationOutput, InferenceStep, Learner, SupervisedTraining,
|
||||||
|
TrainOutput, TrainStep, TrainingStrategy,
|
||||||
|
};
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
// Funkcia na načítanie a zarovnanie dát
|
pub type B = burn_autodiff::Autodiff<burn_ndarray::NdArray<f64>>;
|
||||||
pub fn load_and_align(data_path: &str, model_path: &str) -> (Vec<f64>, Vec<f64>) {
|
|
||||||
let mut counts = HashMap::new();
|
|
||||||
let mut total_count = 0.0;
|
|
||||||
|
|
||||||
let file_p = File::open(data_path).expect("Nepodarilo sa otvoriť dáta");
|
// Model
|
||||||
for line in BufReader::new(file_p).lines() {
|
#[derive(Module, Debug)]
|
||||||
let point = line.unwrap().trim().to_string();
|
pub struct MnistClassifier<B: Backend> {
|
||||||
if point.is_empty() { continue; }
|
hidden: Vec<Linear<B>>,
|
||||||
*counts.entry(point).or_insert(0.0) += 1.0;
|
output: Linear<B>,
|
||||||
total_count += 1.0;
|
activation: Activation,
|
||||||
}
|
|
||||||
|
|
||||||
let mut model_map = HashMap::new();
|
|
||||||
let file_q = File::open(model_path).expect("Nepodarilo sa otvoriť model");
|
|
||||||
for line in BufReader::new(file_q).lines() {
|
|
||||||
let l = line.unwrap();
|
|
||||||
let parts: Vec<&str> = l.split('\t').collect();
|
|
||||||
if parts.len() >= 2 {
|
|
||||||
model_map.insert(parts[0].to_string(), parts[1].parse::<f64>().unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut p_vals = Vec::new();
|
|
||||||
let mut q_vals = Vec::new();
|
|
||||||
for (point, count) in counts.iter() {
|
|
||||||
p_vals.push(count / total_count);
|
|
||||||
q_vals.push(*model_map.get(point).unwrap_or(&0.0));
|
|
||||||
}
|
|
||||||
(p_vals, q_vals)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn entropy(p: Tensor<B, 1>) -> f64 {
|
impl<B: Backend> MnistClassifier<B> {
|
||||||
let zero_mask = p.clone().equal_elem(0.0);
|
pub fn new(
|
||||||
let p_safe = p.clone().mask_fill(zero_mask, 1.0);
|
device: &B::Device,
|
||||||
let terms = p * p_safe.log();
|
hidden_layers: usize,
|
||||||
-terms.sum().into_scalar()
|
hidden_layer_size: usize,
|
||||||
}
|
activation: Activation,
|
||||||
|
) -> Self {
|
||||||
|
let mut hidden = Vec::new();
|
||||||
|
let mut in_size = 784;
|
||||||
|
|
||||||
pub fn cross_entropy(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
|
for _ in 0..hidden_layers {
|
||||||
let zero_mask_q = q.clone().equal_elem(0.0);
|
hidden.push(LinearConfig::new(in_size, hidden_layer_size).init(device));
|
||||||
let p_exists = p.clone().greater_elem(0.0);
|
in_size = hidden_layer_size;
|
||||||
if p_exists.bool_and(zero_mask_q.clone()).any().into_scalar() {
|
}
|
||||||
return f64::INFINITY;
|
|
||||||
|
let output = LinearConfig::new(in_size, 10).init(device);
|
||||||
|
Self { hidden, output, activation }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn forward(&self, images: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||||
|
let mut x = images;
|
||||||
|
for layer in &self.hidden {
|
||||||
|
x = layer.forward(x);
|
||||||
|
x = self.activation.forward(x);
|
||||||
|
}
|
||||||
|
self.output.forward(x)
|
||||||
}
|
}
|
||||||
let q_safe = q.mask_fill(zero_mask_q, 1.0);
|
|
||||||
let terms = p * q_safe.log();
|
|
||||||
-terms.sum().into_scalar()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn kl_div2(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
|
impl<B: AutodiffBackend> TrainStep for MnistClassifier<B> {
|
||||||
let ce = cross_entropy(p.clone(), q);
|
type Input = MnistBatch<B>;
|
||||||
let e = entropy(p);
|
type Output = ClassificationOutput<B>;
|
||||||
let result = ce - e;
|
|
||||||
if result < 0.0 { 0.0 } else { result }
|
fn step(&self, batch: MnistBatch<B>) -> TrainOutput<ClassificationOutput<B>> {
|
||||||
|
let output = self.forward(batch.images);
|
||||||
|
let loss = CrossEntropyLossConfig::new()
|
||||||
|
.init(&output.device())
|
||||||
|
.forward(output.clone(), batch.targets.clone());
|
||||||
|
|
||||||
|
TrainOutput::new(
|
||||||
|
self,
|
||||||
|
loss.backward(),
|
||||||
|
ClassificationOutput { loss, output, targets: batch.targets },
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: Backend> InferenceStep for MnistClassifier<B> {
|
||||||
|
type Input = MnistBatch<B>;
|
||||||
|
type Output = ClassificationOutput<B>;
|
||||||
|
|
||||||
|
fn step(&self, batch: MnistBatch<B>) -> ClassificationOutput<B> {
|
||||||
|
let output = self.forward(batch.images);
|
||||||
|
let loss = CrossEntropyLossConfig::new()
|
||||||
|
.init(&output.device())
|
||||||
|
.forward(output.clone(), batch.targets.clone());
|
||||||
|
|
||||||
|
ClassificationOutput { loss, output, targets: batch.targets }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Activation
|
||||||
|
#[derive(Debug, Clone, Copy, Module, Default, serde::Serialize, serde::Deserialize)]
|
||||||
|
pub enum Activation {
|
||||||
|
#[default]
|
||||||
|
None,
|
||||||
|
ReLU,
|
||||||
|
Tanh,
|
||||||
|
Sigmoid,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FromStr for Activation {
|
||||||
|
type Err = String;
|
||||||
|
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||||
|
match s {
|
||||||
|
"none" => Ok(Activation::None),
|
||||||
|
"relu" => Ok(Activation::ReLU),
|
||||||
|
"tanh" => Ok(Activation::Tanh),
|
||||||
|
"sigmoid" => Ok(Activation::Sigmoid),
|
||||||
|
_ => Err(format!("Unknown activation: {}", s)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Activation {
|
||||||
|
pub fn forward<B: Backend, const D: usize>(&self, x: Tensor<B, D>) -> Tensor<B, D> {
|
||||||
|
match self {
|
||||||
|
Activation::None => x,
|
||||||
|
Activation::ReLU => activation::relu(x),
|
||||||
|
Activation::Tanh => activation::tanh(x),
|
||||||
|
Activation::Sigmoid => activation::sigmoid(x),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dataset & Batch
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct MnistItem {
|
||||||
|
pub image: [f64; 784],
|
||||||
|
pub label: u8,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct MnistDataset {
|
||||||
|
items: Vec<MnistItem>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MnistDataset {
|
||||||
|
pub fn new(items: Vec<MnistItem>) -> Self {
|
||||||
|
Self { items }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Dataset<MnistItem> for MnistDataset {
|
||||||
|
fn get(&self, index: usize) -> Option<MnistItem> {
|
||||||
|
self.items.get(index).cloned()
|
||||||
|
}
|
||||||
|
fn len(&self) -> usize {
|
||||||
|
self.items.len()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct MnistBatch<B: Backend> {
|
||||||
|
pub images: Tensor<B, 2>,
|
||||||
|
pub targets: Tensor<B, 1, Int>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct MnistBatcher;
|
||||||
|
|
||||||
|
impl MnistBatcher {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: Backend<FloatElem = f64, IntElem = i64>> Batcher<B, MnistItem, MnistBatch<B>>
|
||||||
|
for MnistBatcher
|
||||||
|
{
|
||||||
|
fn batch(&self, items: Vec<MnistItem>, device: &B::Device) -> MnistBatch<B> {
|
||||||
|
let n = items.len();
|
||||||
|
let image_data: Vec<f64> = items.iter().flat_map(|i| i.image).collect();
|
||||||
|
let label_data: Vec<i64> = items.iter().map(|i| i.label as i64).collect();
|
||||||
|
|
||||||
|
let images = Tensor::<B, 2>::from_data(
|
||||||
|
burn::tensor::TensorData::new(image_data, [n, 784]),
|
||||||
|
device, // ← use the passed-in device, not self.device
|
||||||
|
);
|
||||||
|
let targets = Tensor::<B, 1, Int>::from_data(
|
||||||
|
burn::tensor::TensorData::new(label_data, [n]),
|
||||||
|
device,
|
||||||
|
);
|
||||||
|
|
||||||
|
MnistBatch { images, targets }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Config
|
||||||
|
#[derive(Config, Debug)]
|
||||||
|
pub struct MnistModelConfig {
|
||||||
|
pub hidden_layers: usize,
|
||||||
|
pub hidden_layer_size: usize,
|
||||||
|
pub activation: Activation,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MnistModelConfig {
|
||||||
|
pub fn init<B: Backend>(&self, device: &B::Device) -> MnistClassifier<B> {
|
||||||
|
MnistClassifier::new(device, self.hidden_layers, self.hidden_layer_size, self.activation)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Config, Debug)]
|
||||||
|
pub struct MnistTrainingConfig {
|
||||||
|
pub model: MnistModelConfig,
|
||||||
|
pub optimizer: AdamConfig,
|
||||||
|
|
||||||
|
#[config(default = 10)]
|
||||||
|
pub num_epochs: usize,
|
||||||
|
#[config(default = 64)]
|
||||||
|
pub batch_size: usize,
|
||||||
|
#[config(default = 4)]
|
||||||
|
pub num_workers: usize,
|
||||||
|
#[config(default = 42)]
|
||||||
|
pub seed: u64,
|
||||||
|
#[config(default = 1.0e-4)]
|
||||||
|
pub learning_rate: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Training
|
||||||
|
impl MnistTrainingConfig {
|
||||||
|
pub fn train<B>(
|
||||||
|
&self,
|
||||||
|
device: B::Device,
|
||||||
|
train_dataset: MnistDataset,
|
||||||
|
valid_dataset: MnistDataset,
|
||||||
|
) where
|
||||||
|
B: AutodiffBackend<FloatElem = f64, IntElem = i64>,
|
||||||
|
B::InnerBackend: Backend<FloatElem = f64, IntElem = i64>,
|
||||||
|
{
|
||||||
|
B::seed(&device, self.seed);
|
||||||
|
|
||||||
|
let model = self.model.init::<B>(&device);
|
||||||
|
let optim = self.optimizer.init();
|
||||||
|
|
||||||
|
let batcher_train = MnistBatcher::new();
|
||||||
|
let batcher_valid = MnistBatcher::new();
|
||||||
|
|
||||||
|
let dataloader_train = DataLoaderBuilder::new(batcher_train)
|
||||||
|
.batch_size(self.batch_size)
|
||||||
|
.shuffle(self.seed)
|
||||||
|
.num_workers(self.num_workers)
|
||||||
|
.build(train_dataset);
|
||||||
|
|
||||||
|
let dataloader_valid = DataLoaderBuilder::new(batcher_valid)
|
||||||
|
.batch_size(self.batch_size)
|
||||||
|
.num_workers(self.num_workers)
|
||||||
|
.build(valid_dataset);
|
||||||
|
|
||||||
|
let training = SupervisedTraining::new("/tmp/artifacts", dataloader_train, dataloader_valid)
|
||||||
|
.metrics((AccuracyMetric::new(), LossMetric::new()))
|
||||||
|
.with_file_checkpointer(CompactRecorder::new())
|
||||||
|
.num_epochs(self.num_epochs)
|
||||||
|
.summary()
|
||||||
|
.with_training_strategy(TrainingStrategy::SingleDevice(device));
|
||||||
|
|
||||||
|
let _result = training.launch(Learner::new(
|
||||||
|
model,
|
||||||
|
optim,
|
||||||
|
ConstantLr::new(self.learning_rate), // plain float → constant LR scheduler
|
||||||
|
));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,30 +1,115 @@
|
|||||||
use burn::tensor::{backend::Backend, Tensor};
|
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
// Nahraď 'hod_1' názvom tvojho projektu v Cargo.toml
|
use hod_1::{Activation, MnistDataset, MnistItem, MnistModelConfig, MnistTrainingConfig, B};
|
||||||
use hod_1::{load_and_align, entropy, cross_entropy, kl_div2, B};
|
use burn::optim::AdamConfig;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
#[derive(Parser, Debug)]
|
#[derive(Parser, Debug)]
|
||||||
#[command(author, version, about, long_about = None)]
|
#[command(author, version, about)]
|
||||||
struct Args {
|
struct Args {
|
||||||
#[arg(long = "data_path")]
|
#[arg(long, default_value = "none")]
|
||||||
data_path: String,
|
activation: String,
|
||||||
|
|
||||||
#[arg(long = "model_path")]
|
#[arg(long, default_value = "64")]
|
||||||
model_path: String,
|
batch_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "10")]
|
||||||
|
epochs: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "100")]
|
||||||
|
hidden_layer_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "1")]
|
||||||
|
hidden_layers: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value = "42")]
|
||||||
|
seed: u64,
|
||||||
|
|
||||||
|
/// Fraction of training data used for validation (e.g. 0.1 = 10 %)
|
||||||
|
#[arg(long, default_value = "0.1")]
|
||||||
|
valid_split: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_mnist_items(examples: usize) -> Vec<MnistItem> {
|
||||||
|
let file = File::open("mnist.npz").expect("Cannot open mnist.npz");
|
||||||
|
let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
|
||||||
|
|
||||||
|
// images
|
||||||
|
let image_candidates = ["train_images.npy", "train.images.npy", "x_train.npy", "images.npy"];
|
||||||
|
let mut image_bytes = Vec::new();
|
||||||
|
for name in &image_candidates {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut image_bytes).expect("read images");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(!image_bytes.is_empty(), "Could not find train images in mnist.npz");
|
||||||
|
|
||||||
|
// labels
|
||||||
|
let label_candidates = ["train_labels.npy", "train.labels.npy", "y_train.npy", "labels.npy"];
|
||||||
|
let mut label_bytes = Vec::new();
|
||||||
|
for name in &label_candidates {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut label_bytes).expect("read labels");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert!(!label_bytes.is_empty(), "Could not find train labels in mnist.npz");
|
||||||
|
|
||||||
|
// parse
|
||||||
|
let image_npy = npyz::NpyFile::new(&image_bytes[..]).expect("parse images");
|
||||||
|
let image_shape = image_npy.shape().to_vec();
|
||||||
|
let image_raw: Vec<u8> = image_npy.into_vec().expect("images to vec");
|
||||||
|
let n = examples.min(image_shape[0] as usize);
|
||||||
|
let pixels = image_raw.len() / image_shape[0] as usize; // should be 784
|
||||||
|
assert_eq!(pixels, 784, "Expected 784 pixels per image, got {pixels}");
|
||||||
|
|
||||||
|
let label_npy = npyz::NpyFile::new(&label_bytes[..]).expect("parse labels");
|
||||||
|
let label_raw: Vec<u8> = label_npy.into_vec().expect("labels to vec");
|
||||||
|
|
||||||
|
// build items
|
||||||
|
(0..n)
|
||||||
|
.map(|i| {
|
||||||
|
let mut image = [0f64; 784];
|
||||||
|
for (j, &px) in image_raw[i * 784..(i + 1) * 784].iter().enumerate() {
|
||||||
|
image[j] = px as f64 / 255.0;
|
||||||
|
}
|
||||||
|
MnistItem { image, label: label_raw[i] }
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let args = Args::parse();
|
let args = Args::parse();
|
||||||
let device = <B as Backend>::Device::default();
|
let device = burn_ndarray::NdArrayDevice::Cpu;
|
||||||
|
let activation = Activation::from_str(&args.activation).unwrap_or_default();
|
||||||
|
|
||||||
// Použijeme funkciu z lib.rs
|
println!("Loading MNIST…");
|
||||||
let (p_vec, q_vec) = load_and_align(&args.data_path, &args.model_path);
|
let all_items = load_mnist_items(60_000);
|
||||||
|
|
||||||
let p = Tensor::<B, 1>::from_data(p_vec.as_slice(), &device);
|
// Split into train / validation
|
||||||
let q = Tensor::<B, 1>::from_data(q_vec.as_slice(), &device);
|
let valid_n = (all_items.len() as f64 * args.valid_split) as usize;
|
||||||
|
let train_n = all_items.len() - valid_n;
|
||||||
|
let mut items = all_items;
|
||||||
|
let valid_items = items.split_off(train_n); // last `valid_n` items
|
||||||
|
let train_items = items;
|
||||||
|
|
||||||
// Výpočty
|
println!("Train: {} Valid: {}", train_items.len(), valid_items.len());
|
||||||
println!("{}", entropy(p.clone()));
|
|
||||||
println!("{}", cross_entropy(p.clone(), q.clone()));
|
let train_dataset = MnistDataset::new(train_items);
|
||||||
println!("{}", kl_div2(p, q));
|
let valid_dataset = MnistDataset::new(valid_items);
|
||||||
|
|
||||||
|
let config = MnistTrainingConfig::new(
|
||||||
|
MnistModelConfig::new(args.hidden_layers, args.hidden_layer_size, activation),
|
||||||
|
AdamConfig::new(),
|
||||||
|
)
|
||||||
|
.with_num_epochs(args.epochs)
|
||||||
|
.with_batch_size(args.batch_size)
|
||||||
|
.with_num_workers(1) // NdArray backend is single-threaded; keep at 1
|
||||||
|
.with_seed(args.seed)
|
||||||
|
.with_learning_rate(1e-3);
|
||||||
|
|
||||||
|
println!("Starting training…");
|
||||||
|
config.train::<B>(device, train_dataset, valid_dataset);
|
||||||
}
|
}
|
||||||
|
|||||||
5987
hod_2/Cargo.lock
generated
Normal file
5987
hod_2/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
16
hod_2/Cargo.toml
Normal file
16
hod_2/Cargo.toml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
[package]
|
||||||
|
name = "hod_2"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
burn = { version = "0.20.1", default-features = false, features = ["ndarray", "std", "train"] }
|
||||||
|
burn-autodiff = "0.20.1"
|
||||||
|
burn-ndarray = "0.20.1"
|
||||||
|
clap = { version = "4.5.60", features = ["derive"] }
|
||||||
|
ndarray = "0.17.2"
|
||||||
|
npyz = { version = "0.8.4", features = ["npz"] }
|
||||||
|
rand = "0.10.0"
|
||||||
|
rand_distr = "0.6.0"
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
zip = { version = "8.2.0", features = ["deflate"] }
|
||||||
50
hod_2/plan.md
Normal file
50
hod_2/plan.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
## Phase 1: Core Data Structures
|
||||||
|
|
||||||
|
**`src/model.rs`** - Manual parameter management
|
||||||
|
- `struct Parameters<B: Backend>`: holds `w1, b1, w2, b2` as `Tensor<B, 2>`
|
||||||
|
- `impl Parameters`: initialization with `randn(0.1)` for weights, zeros for biases
|
||||||
|
- No `nn.Linear`—manual tensors to match the Python exercise
|
||||||
|
|
||||||
|
## Phase 2: Forward Pass
|
||||||
|
|
||||||
|
**`src/forward.rs`** or in `model.rs`
|
||||||
|
- `fn forward<B: Backend>(params: &Parameters<B>, images: Tensor<B, 2>) -> Tensor<B, 2>`
|
||||||
|
- Cast `uint8` images to `f32`, divide by 255, flatten to `[batch, 784]`
|
||||||
|
- `hidden = tanh(images @ w1 + b1)`
|
||||||
|
- `logits = hidden @ w2 + b2`
|
||||||
|
- Return raw logits (no softmax here)
|
||||||
|
|
||||||
|
## Phase 3: Loss Computation
|
||||||
|
|
||||||
|
**`src/loss.rs`**
|
||||||
|
- `fn cross_entropy_loss<B: Backend>(logits: Tensor<B, 2>, labels: Tensor<B, 1, Int>) -> Tensor<B, 0>`
|
||||||
|
- Manual implementation—no `CrossEntropyLoss` module
|
||||||
|
- `softmax = exp(logits - max) / sum(exp(logits - max))`
|
||||||
|
- Index `softmax` by gold labels to get `p_correct`
|
||||||
|
- `loss = -mean(log(p_correct))`
|
||||||
|
|
||||||
|
## Phase 4: Backward Pass & SGD
|
||||||
|
|
||||||
|
**`src/train.rs`**
|
||||||
|
- `fn train_epoch<B: Backend>(params: &mut Parameters<B>, dataset: &[MnistItem], args: &Args)`
|
||||||
|
- For each batch:
|
||||||
|
1. `let loss = cross_entropy_loss(forward(¶ms, images), labels)`
|
||||||
|
2. `let grads = loss.backward()` — automatic differentiation
|
||||||
|
3. **Manual SGD**: `param = param - lr * grad` for each parameter
|
||||||
|
4. No `Optimizer`—raw gradient descent like Python
|
||||||
|
|
||||||
|
## Phase 5: Evaluation
|
||||||
|
|
||||||
|
**`src/eval.rs`**
|
||||||
|
- `fn evaluate<B: Backend>(params: &Parameters<B>, dataset: &[MnistItem]) -> f64`
|
||||||
|
- `argmax` on logits, compare to labels, return accuracy
|
||||||
|
|
||||||
|
## Phase 6: Main Training Loop
|
||||||
|
|
||||||
|
**Update `src/main.rs`**
|
||||||
|
- Parse args ✓ (done)
|
||||||
|
- Load data ✓ (done)
|
||||||
|
- Initialize `Parameters` with seed
|
||||||
|
- Loop `args.epochs`: `train_epoch` → `evaluate(dev)` → print
|
||||||
|
- Final `evaluate(test)`
|
||||||
|
|
||||||
1
hod_2/src/lib.rs
Normal file
1
hod_2/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod model;
|
||||||
79
hod_2/src/main.rs
Normal file
79
hod_2/src/main.rs
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
use clap::Parser;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{Cursor, Read};
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(long, default_value_t = 50)]
|
||||||
|
batch_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 10)]
|
||||||
|
epochs: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 100)]
|
||||||
|
hidden_layer_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 0.1)]
|
||||||
|
learning_rate: f64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 42)]
|
||||||
|
seed: u64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 1)]
|
||||||
|
threads: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_mnist_items(path: &str, examples: usize) -> Vec<(Vec<f32>, u8)> {
|
||||||
|
let file = File::open(path).expect("Cannot open mnist.npz");
|
||||||
|
let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
|
||||||
|
|
||||||
|
let image_names = ["train_images.npy", "train.images.npy", "x_train.npy", "images.npy"];
|
||||||
|
let mut image_bytes = Vec::new();
|
||||||
|
for name in &image_names {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut image_bytes).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let label_names = ["train_labels.npy", "train.labels.npy", "y_train.npy", "labels.npy"];
|
||||||
|
let mut label_bytes = Vec::new();
|
||||||
|
for name in &label_names {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut label_bytes).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let images_npy = npyz::NpyFile::new(Cursor::new(&image_bytes)).unwrap();
|
||||||
|
let shape = images_npy.shape().to_vec();
|
||||||
|
let n = shape[0] as usize;
|
||||||
|
let pixels = shape[1..].iter().product::<u64>() as usize;
|
||||||
|
let image_raw: Vec<u8> = images_npy.into_vec().unwrap();
|
||||||
|
|
||||||
|
let labels_npy = npyz::NpyFile::new(Cursor::new(&label_bytes)).unwrap();
|
||||||
|
let label_raw: Vec<u8> = labels_npy.into_vec().unwrap();
|
||||||
|
|
||||||
|
(0..n.min(examples))
|
||||||
|
.map(|i| {
|
||||||
|
let image: Vec<f32> = image_raw[i * pixels..(i + 1) * pixels]
|
||||||
|
.iter()
|
||||||
|
.map(|&p| p as f32 / 255.0)
|
||||||
|
.collect();
|
||||||
|
(image, label_raw[i])
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
println!("Loading MNIST...");
|
||||||
|
let train_items = load_mnist_items("mnist.npz", 55_000);
|
||||||
|
let dev_items = load_mnist_items("mnist.npz", 5_000);
|
||||||
|
let test_items = load_mnist_items("mnist.npz", 10_000);
|
||||||
|
|
||||||
|
println!("Train: {}, Dev: {}, Test: {}", train_items.len(), dev_items.len(), test_items.len());
|
||||||
|
println!("Args: {:?}", args);
|
||||||
|
}
|
||||||
64
hod_2/src/model.rs
Normal file
64
hod_2/src/model.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
use burn::tensor::{backend::Backend, Tensor};
|
||||||
|
use rand::{rngs::StdRng, SeedableRng};
|
||||||
|
use rand_distr::{Distribution, Normal};
|
||||||
|
|
||||||
|
/// Manual neural network parameters for SGD backpropagation.
|
||||||
|
/// No nn.Linear — just raw tensors to match the Python exercise.
|
||||||
|
pub struct Parameters<B: Backend> {
|
||||||
|
/// First layer weights: [784, hidden_layer_size]
|
||||||
|
pub w1: Tensor<B, 2>,
|
||||||
|
/// First layer biases: [hidden_layer_size]
|
||||||
|
pub b1: Tensor<B, 1>,
|
||||||
|
/// Second layer weights: [hidden_layer_size, 10]
|
||||||
|
pub w2: Tensor<B, 2>,
|
||||||
|
/// Second layer biases: [10]
|
||||||
|
pub b2: Tensor<B, 1>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: Backend> Parameters<B> {
|
||||||
|
/// Initialize parameters with given hidden size and random seed.
|
||||||
|
/// Weights: randn * 0.1, Biases: zeros
|
||||||
|
pub fn new(device: &B::Device, hidden_size: usize, seed: u64) -> Self {
|
||||||
|
let w1 = random_tensor([784, hidden_size], 0.1, seed, device);
|
||||||
|
let b1 = Tensor::zeros([hidden_size], device);
|
||||||
|
|
||||||
|
let w2 = random_tensor([hidden_size, 10], 0.1, seed.wrapping_add(1), device);
|
||||||
|
let b2 = Tensor::zeros([10], device);
|
||||||
|
|
||||||
|
Self { w1, b1, w2, b2 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all parameters as a vector for gradient updates.
|
||||||
|
/// Order: w1, b1, w2, b2
|
||||||
|
pub fn to_vec(&self) -> Vec<ParamRef<B>> {
|
||||||
|
vec![
|
||||||
|
ParamRef::TwoD(self.w1.clone()),
|
||||||
|
ParamRef::OneD(self.b1.clone()),
|
||||||
|
ParamRef::TwoD(self.w2.clone()),
|
||||||
|
ParamRef::OneD(self.b2.clone()),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper enum to handle 1D and 2D parameters uniformly.
|
||||||
|
pub enum ParamRef<B: Backend> {
|
||||||
|
OneD(Tensor<B, 1>),
|
||||||
|
TwoD(Tensor<B, 2>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a random tensor with normal distribution, scaled by std_dev.
|
||||||
|
fn random_tensor<B: Backend, const D: usize>(
|
||||||
|
shape: [usize; D],
|
||||||
|
std_dev: f64,
|
||||||
|
seed: u64,
|
||||||
|
device: &B::Device,
|
||||||
|
) -> Tensor<B, D> {
|
||||||
|
|
||||||
|
let dist = Normal::new(0.0, std_dev).unwrap();
|
||||||
|
let mut rng = StdRng::seed_from_u64(seed);
|
||||||
|
|
||||||
|
let total: usize = shape.iter().product();
|
||||||
|
let data: Vec<f64> = (0..total).map(|_| dist.sample(&mut rng)).collect();
|
||||||
|
|
||||||
|
Tensor::from_floats(burn::tensor::TensorData::new(data, shape), device)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user