hod2
This commit is contained in:
5980
hod_2/Cargo.lock
generated
5980
hod_2/Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -4,3 +4,13 @@ version = "0.1.0"
|
|||||||
edition = "2024"
|
edition = "2024"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
burn = { version = "0.20.1", default-features = false, features = ["ndarray", "std", "train"] }
|
||||||
|
burn-autodiff = "0.20.1"
|
||||||
|
burn-ndarray = "0.20.1"
|
||||||
|
clap = { version = "4.5.60", features = ["derive"] }
|
||||||
|
ndarray = "0.17.2"
|
||||||
|
npyz = { version = "0.8.4", features = ["npz"] }
|
||||||
|
rand = "0.10.0"
|
||||||
|
rand_distr = "0.6.0"
|
||||||
|
serde = { version = "1.0.228", features = ["derive"] }
|
||||||
|
zip = { version = "8.2.0", features = ["deflate"] }
|
||||||
|
|||||||
50
hod_2/plan.md
Normal file
50
hod_2/plan.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
## Phase 1: Core Data Structures
|
||||||
|
|
||||||
|
**`src/model.rs`** - Manual parameter management
|
||||||
|
- `struct Parameters<B: Backend>`: holds `w1, b1, w2, b2` as `Tensor<B, 2>`
|
||||||
|
- `impl Parameters`: initialization with `randn(0.1)` for weights, zeros for biases
|
||||||
|
- No `nn.Linear`—manual tensors to match the Python exercise
|
||||||
|
|
||||||
|
## Phase 2: Forward Pass
|
||||||
|
|
||||||
|
**`src/forward.rs`** or in `model.rs`
|
||||||
|
- `fn forward<B: Backend>(params: &Parameters<B>, images: Tensor<B, 2>) -> Tensor<B, 2>`
|
||||||
|
- Cast `uint8` images to `f32`, divide by 255, flatten to `[batch, 784]`
|
||||||
|
- `hidden = tanh(images @ w1 + b1)`
|
||||||
|
- `logits = hidden @ w2 + b2`
|
||||||
|
- Return raw logits (no softmax here)
|
||||||
|
|
||||||
|
## Phase 3: Loss Computation
|
||||||
|
|
||||||
|
**`src/loss.rs`**
|
||||||
|
- `fn cross_entropy_loss<B: Backend>(logits: Tensor<B, 2>, labels: Tensor<B, 1, Int>) -> Tensor<B, 0>`
|
||||||
|
- Manual implementation—no `CrossEntropyLoss` module
|
||||||
|
- `softmax = exp(logits - max) / sum(exp(logits - max))`
|
||||||
|
- Index `softmax` by gold labels to get `p_correct`
|
||||||
|
- `loss = -mean(log(p_correct))`
|
||||||
|
|
||||||
|
## Phase 4: Backward Pass & SGD
|
||||||
|
|
||||||
|
**`src/train.rs`**
|
||||||
|
- `fn train_epoch<B: Backend>(params: &mut Parameters<B>, dataset: &[MnistItem], args: &Args)`
|
||||||
|
- For each batch:
|
||||||
|
1. `let loss = cross_entropy_loss(forward(¶ms, images), labels)`
|
||||||
|
2. `let grads = loss.backward()` — automatic differentiation
|
||||||
|
3. **Manual SGD**: `param = param - lr * grad` for each parameter
|
||||||
|
4. No `Optimizer`—raw gradient descent like Python
|
||||||
|
|
||||||
|
## Phase 5: Evaluation
|
||||||
|
|
||||||
|
**`src/eval.rs`**
|
||||||
|
- `fn evaluate<B: Backend>(params: &Parameters<B>, dataset: &[MnistItem]) -> f64`
|
||||||
|
- `argmax` on logits, compare to labels, return accuracy
|
||||||
|
|
||||||
|
## Phase 6: Main Training Loop
|
||||||
|
|
||||||
|
**Update `src/main.rs`**
|
||||||
|
- Parse args ✓ (done)
|
||||||
|
- Load data ✓ (done)
|
||||||
|
- Initialize `Parameters` with seed
|
||||||
|
- Loop `args.epochs`: `train_epoch` → `evaluate(dev)` → print
|
||||||
|
- Final `evaluate(test)`
|
||||||
|
|
||||||
1
hod_2/src/lib.rs
Normal file
1
hod_2/src/lib.rs
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pub mod model;
|
||||||
@@ -1,3 +1,79 @@
|
|||||||
fn main() {
|
use clap::Parser;
|
||||||
println!("Hello, world!");
|
use std::fs::File;
|
||||||
|
use std::io::{Cursor, Read};
|
||||||
|
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about)]
|
||||||
|
struct Args {
|
||||||
|
#[arg(long, default_value_t = 50)]
|
||||||
|
batch_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 10)]
|
||||||
|
epochs: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 100)]
|
||||||
|
hidden_layer_size: usize,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 0.1)]
|
||||||
|
learning_rate: f64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 42)]
|
||||||
|
seed: u64,
|
||||||
|
|
||||||
|
#[arg(long, default_value_t = 1)]
|
||||||
|
threads: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_mnist_items(path: &str, examples: usize) -> Vec<(Vec<f32>, u8)> {
|
||||||
|
let file = File::open(path).expect("Cannot open mnist.npz");
|
||||||
|
let mut archive = zip::ZipArchive::new(file).expect("Cannot read zip");
|
||||||
|
|
||||||
|
let image_names = ["train_images.npy", "train.images.npy", "x_train.npy", "images.npy"];
|
||||||
|
let mut image_bytes = Vec::new();
|
||||||
|
for name in &image_names {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut image_bytes).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let label_names = ["train_labels.npy", "train.labels.npy", "y_train.npy", "labels.npy"];
|
||||||
|
let mut label_bytes = Vec::new();
|
||||||
|
for name in &label_names {
|
||||||
|
if let Ok(mut entry) = archive.by_name(name) {
|
||||||
|
entry.read_to_end(&mut label_bytes).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let images_npy = npyz::NpyFile::new(Cursor::new(&image_bytes)).unwrap();
|
||||||
|
let shape = images_npy.shape().to_vec();
|
||||||
|
let n = shape[0] as usize;
|
||||||
|
let pixels = shape[1..].iter().product::<u64>() as usize;
|
||||||
|
let image_raw: Vec<u8> = images_npy.into_vec().unwrap();
|
||||||
|
|
||||||
|
let labels_npy = npyz::NpyFile::new(Cursor::new(&label_bytes)).unwrap();
|
||||||
|
let label_raw: Vec<u8> = labels_npy.into_vec().unwrap();
|
||||||
|
|
||||||
|
(0..n.min(examples))
|
||||||
|
.map(|i| {
|
||||||
|
let image: Vec<f32> = image_raw[i * pixels..(i + 1) * pixels]
|
||||||
|
.iter()
|
||||||
|
.map(|&p| p as f32 / 255.0)
|
||||||
|
.collect();
|
||||||
|
(image, label_raw[i])
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = Args::parse();
|
||||||
|
|
||||||
|
println!("Loading MNIST...");
|
||||||
|
let train_items = load_mnist_items("mnist.npz", 55_000);
|
||||||
|
let dev_items = load_mnist_items("mnist.npz", 5_000);
|
||||||
|
let test_items = load_mnist_items("mnist.npz", 10_000);
|
||||||
|
|
||||||
|
println!("Train: {}, Dev: {}, Test: {}", train_items.len(), dev_items.len(), test_items.len());
|
||||||
|
println!("Args: {:?}", args);
|
||||||
}
|
}
|
||||||
|
|||||||
64
hod_2/src/model.rs
Normal file
64
hod_2/src/model.rs
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
use burn::tensor::{backend::Backend, Tensor};
|
||||||
|
use rand::{rngs::StdRng, SeedableRng};
|
||||||
|
use rand_distr::{Distribution, Normal};
|
||||||
|
|
||||||
|
/// Manual neural network parameters for SGD backpropagation.
|
||||||
|
/// No nn.Linear — just raw tensors to match the Python exercise.
|
||||||
|
pub struct Parameters<B: Backend> {
|
||||||
|
/// First layer weights: [784, hidden_layer_size]
|
||||||
|
pub w1: Tensor<B, 2>,
|
||||||
|
/// First layer biases: [hidden_layer_size]
|
||||||
|
pub b1: Tensor<B, 1>,
|
||||||
|
/// Second layer weights: [hidden_layer_size, 10]
|
||||||
|
pub w2: Tensor<B, 2>,
|
||||||
|
/// Second layer biases: [10]
|
||||||
|
pub b2: Tensor<B, 1>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<B: Backend> Parameters<B> {
|
||||||
|
/// Initialize parameters with given hidden size and random seed.
|
||||||
|
/// Weights: randn * 0.1, Biases: zeros
|
||||||
|
pub fn new(device: &B::Device, hidden_size: usize, seed: u64) -> Self {
|
||||||
|
let w1 = random_tensor([784, hidden_size], 0.1, seed, device);
|
||||||
|
let b1 = Tensor::zeros([hidden_size], device);
|
||||||
|
|
||||||
|
let w2 = random_tensor([hidden_size, 10], 0.1, seed.wrapping_add(1), device);
|
||||||
|
let b2 = Tensor::zeros([10], device);
|
||||||
|
|
||||||
|
Self { w1, b1, w2, b2 }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get all parameters as a vector for gradient updates.
|
||||||
|
/// Order: w1, b1, w2, b2
|
||||||
|
pub fn to_vec(&self) -> Vec<ParamRef<B>> {
|
||||||
|
vec![
|
||||||
|
ParamRef::TwoD(self.w1.clone()),
|
||||||
|
ParamRef::OneD(self.b1.clone()),
|
||||||
|
ParamRef::TwoD(self.w2.clone()),
|
||||||
|
ParamRef::OneD(self.b2.clone()),
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Helper enum to handle 1D and 2D parameters uniformly.
|
||||||
|
pub enum ParamRef<B: Backend> {
|
||||||
|
OneD(Tensor<B, 1>),
|
||||||
|
TwoD(Tensor<B, 2>),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a random tensor with normal distribution, scaled by std_dev.
|
||||||
|
fn random_tensor<B: Backend, const D: usize>(
|
||||||
|
shape: [usize; D],
|
||||||
|
std_dev: f64,
|
||||||
|
seed: u64,
|
||||||
|
device: &B::Device,
|
||||||
|
) -> Tensor<B, D> {
|
||||||
|
|
||||||
|
let dist = Normal::new(0.0, std_dev).unwrap();
|
||||||
|
let mut rng = StdRng::seed_from_u64(seed);
|
||||||
|
|
||||||
|
let total: usize = shape.iter().product();
|
||||||
|
let data: Vec<f64> = (0..total).map(|_| dist.sample(&mut rng)).collect();
|
||||||
|
|
||||||
|
Tensor::from_floats(burn::tensor::TensorData::new(data, shape), device)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user