01 hotova

2026-03-07 21:30:55 +01:00
parent 009e5c4925
commit f0b2073caa
15 changed files with 11753 additions and 48 deletions
--- a/hod_1/data/example_pytorch.py
+++ b/hod_1/data/example_pytorch.py
@@ -0,0 +1,67 @@
 #!/usr/bin/env python3
 import argparse
 import torch
 import torchmetrics
 import npfl138
 from npfl138.datasets.mnist import MNIST
 npfl138.require_version("2526.1")
 # Parse arguments
 parser = argparse.ArgumentParser()
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
 parser.add_argument("--hidden_layer_size", default=100, type=int, help="Size of the hidden layer.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 class Dataset(npfl138.TransformedDataset):
    def transform(self, example):
        image = example["image"]  # a torch.Tensor with torch.uint8 values in [0, 255] range
        image = image.to(torch.float32) / 255  # image converted to float32 and rescaled to [0, 1]
        label = example["label"]  # a torch.Tensor with a single integer representing the label
        return image, label  # return an (input, target) pair
 def main(args: argparse.Namespace) -> None:
    # Set the random seed and the number of threads.
    npfl138.startup(args.seed, args.threads)
    npfl138.global_keras_initializers()
    # Load the data and create dataloaders.
    mnist = MNIST()
    train = torch.utils.data.DataLoader(Dataset(mnist.train), batch_size=args.batch_size, shuffle=True)
    dev = torch.utils.data.DataLoader(Dataset(mnist.dev), batch_size=args.batch_size)
    test = torch.utils.data.DataLoader(Dataset(mnist.test), batch_size=args.batch_size)
    # Create the model.
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(MNIST.C * MNIST.H * MNIST.W, args.hidden_layer_size),
        torch.nn.ReLU(),
        torch.nn.Linear(args.hidden_layer_size, MNIST.LABELS),
    )
    print("The following model has been created:", model)
    # Create the TrainableModule and configure it for training.
    model = npfl138.TrainableModule(model)
    model.configure(
        optimizer=torch.optim.Adam(model.parameters()),
        loss=torch.nn.CrossEntropyLoss(),
        metrics={"accuracy": torchmetrics.Accuracy("multiclass", num_classes=MNIST.LABELS)},
    )
    # Train the model.
    model.fit(train, dev=dev, epochs=args.epochs)
    # Evaluate the model on the test data.
    model.evaluate(test)
 if __name__ == "__main__":
    main_args = parser.parse_args([] if "__file__" not in globals() else None)
    main(main_args)
--- a/hod_1/data/example_pytorch_tensorboard.py
+++ b/hod_1/data/example_pytorch_tensorboard.py
@@ -0,0 +1,68 @@
 #!/usr/bin/env python3
 import argparse
 import torch
 import torchmetrics
 import npfl138
 from npfl138.datasets.mnist import MNIST
 npfl138.require_version("2526.1")
 # Parse arguments
 parser = argparse.ArgumentParser()
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
 parser.add_argument("--hidden_layer_size", default=100, type=int, help="Size of the hidden layer.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 class Dataset(npfl138.TransformedDataset):
    def transform(self, example):
        image = example["image"]  # a torch.Tensor with torch.uint8 values in [0, 255] range
        image = image.to(torch.float32) / 255  # image converted to float32 and rescaled to [0, 1]
        label = example["label"]  # a torch.Tensor with a single integer representing the label
        return image, label  # return an (input, target) pair
 def main(args: argparse.Namespace) -> None:
    # Set the random seed and the number of threads.
    npfl138.startup(args.seed, args.threads)
    npfl138.global_keras_initializers()
    # Load the data and create dataloaders.
    mnist = MNIST()
    train = torch.utils.data.DataLoader(Dataset(mnist.train), batch_size=args.batch_size, shuffle=True)
    dev = torch.utils.data.DataLoader(Dataset(mnist.dev), batch_size=args.batch_size)
    test = torch.utils.data.DataLoader(Dataset(mnist.test), batch_size=args.batch_size)
    # Create the model.
    model = torch.nn.Sequential(
        torch.nn.Flatten(),
        torch.nn.Linear(MNIST.C * MNIST.H * MNIST.W, args.hidden_layer_size),
        torch.nn.ReLU(),
        torch.nn.Linear(args.hidden_layer_size, MNIST.LABELS),
    )
    print("The following model has been created:", model)
    # Create the TrainableModule and configure it for training.
    model = npfl138.TrainableModule(model)
    model.configure(
        optimizer=torch.optim.Adam(model.parameters()),
        loss=torch.nn.CrossEntropyLoss(),
        metrics={"accuracy": torchmetrics.Accuracy("multiclass", num_classes=MNIST.LABELS)},
        logdir=npfl138.format_logdir("logs/{file-}{timestamp}{-config}", **vars(args)),
    )
    # Train the model.
    model.fit(train, dev=dev, epochs=args.epochs, log_config=vars(args), log_graph=True)
    # Evaluate the model on the test data.
    model.evaluate(test)
 if __name__ == "__main__":
    main_args = parser.parse_args([] if "__file__" not in globals() else None)
    main(main_args)
--- a/hod_1/data/mnist_layers_activations.py
+++ b/hod_1/data/mnist_layers_activations.py
@@ -0,0 +1,73 @@
 #!/usr/bin/env python3
 import argparse
 import torch
 import torchmetrics
 import npfl138
 npfl138.require_version("2526.1")
 from npfl138.datasets.mnist import MNIST
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--activation", default="none", choices=["none", "relu", "tanh", "sigmoid"], help="Activation.")
 parser.add_argument("--batch_size", default=50, type=int, help="Batch size.")
 parser.add_argument("--epochs", default=10, type=int, help="Number of epochs.")
 parser.add_argument("--hidden_layer_size", default=100, type=int, help="Size of the hidden layer.")
 parser.add_argument("--hidden_layers", default=1, type=int, help="Number of layers.")
 parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
 class Dataset(npfl138.TransformedDataset):
    def transform(self, example):
        image = example["image"]  # a torch.Tensor with torch.uint8 values in [0, 255] range
        image = image.to(torch.float32) / 255  # image converted to float32 and rescaled to [0, 1]
        label = example["label"]  # a torch.Tensor with a single integer representing the label
        return image, label  # return an (input, target) pair
 def main(args: argparse.Namespace) -> dict[str, float]:
    # Set the random seed and the number of threads.
    npfl138.startup(args.seed, args.threads, args.recodex)
    npfl138.global_keras_initializers()
    # Load the data and create dataloaders.
    mnist = MNIST()
    train = torch.utils.data.DataLoader(Dataset(mnist.train), batch_size=args.batch_size, shuffle=True)
    dev = torch.utils.data.DataLoader(Dataset(mnist.dev), batch_size=args.batch_size)
    # Create the model.
    model = torch.nn.Sequential()
    # TODO: Finish the model. Namely:
    # - start by adding the `torch.nn.Flatten()` layer;
    # - then add `args.hidden_layers` number of fully connected hidden layers
    #   `torch.nn.Linear()`, each with `args.hidden_layer_size` neurons and followed by
    #   a specified `args.activation`, allowing "none", "relu", "tanh", "sigmoid";
    # - finally, add an output fully connected layer with `MNIST.LABELS` units.
    ...
    # Create the TrainableModule and configure it for training.
    model = npfl138.TrainableModule(model)
    model.configure(
        optimizer=torch.optim.Adam(model.parameters()),
        loss=torch.nn.CrossEntropyLoss(),
        metrics={"accuracy": torchmetrics.Accuracy("multiclass", num_classes=MNIST.LABELS)},
        logdir=npfl138.format_logdir("logs/{file-}{timestamp}{-config}", **vars(args)),
    )
    # Train the model.
    logs = model.fit(train, dev=dev, epochs=args.epochs)
    # Return development metrics for ReCodEx to validate.
    return {metric: value for metric, value in logs.items() if metric.startswith("dev:")}
 if __name__ == "__main__":
    main_args = parser.parse_args([] if "__file__" not in globals() else None)
    main(main_args)
--- a/hod_1/data/numpy_entropy.py
+++ b/hod_1/data/numpy_entropy.py
@@ -0,0 +1,58 @@
 #!/usr/bin/env python3
 import argparse
 import numpy as np
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--data_path", default="numpy_entropy_data.txt", type=str, help="Data distribution path.")
 parser.add_argument("--model_path", default="numpy_entropy_model.txt", type=str, help="Model distribution path.")
 parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
 # If you add more arguments, ReCodEx will keep them with your default values.
 def main(args: argparse.Namespace) -> tuple[float, float, float]:
    # TODO: Load data distribution, each line containing a datapoint -- a string.
    with open(args.data_path, "r") as data:
        for line in data:
            line = line.rstrip("\n")
            # TODO: Process the line, aggregating data with built-in Python
            # data structures (not NumPy, which is not suitable for incremental
            # addition and string mapping).
    # TODO: Create a NumPy array containing the data distribution. The
    # NumPy array should contain only data, not any mapping. Alternatively,
    # the NumPy array might be created after loading the model distribution.
    # TODO: Load model distribution, each line `string \t probability`.
    with open(args.model_path, "r") as model:
        for line in model:
            line = line.rstrip("\n")
            # TODO: Process the line, aggregating using Python data structures.
    # TODO: Create a NumPy array containing the model distribution.
    # TODO: Compute the entropy H(data distribution). You should not use
    # manual for/while cycles, but instead use the fact that most NumPy methods
    # operate on all elements (for example `*` is vector element-wise multiplication).
    entropy = ...
    # TODO: Compute cross-entropy H(data distribution, model distribution).
    # When some data distribution elements are missing in the model distribution,
    # the resulting crossentropy should be `np.inf`.
    crossentropy = ...
    # TODO: Compute KL-divergence D_KL(data distribution, model_distribution),
    # again using `np.inf` when needed.
    kl_divergence = ...
    # Return the computed values for ReCodEx to validate.
    return entropy, crossentropy, kl_divergence
 if __name__ == "__main__":
    main_args = parser.parse_args([] if "__file__" not in globals() else None)
    entropy, crossentropy, kl_divergence = main(main_args)
    print(f"Entropy: {entropy:.2f} nats")
    print(f"Crossentropy: {crossentropy:.2f} nats")
    print(f"KL divergence: {kl_divergence:.2f} nats")
--- a/hod_1/data/numpy_entropy_data_1.txt
+++ b/hod_1/data/numpy_entropy_data_1.txt
@@ -0,0 +1,7 @@
 A
 BB
 A
 A
 BB
 A
 CCC
--- a/hod_1/data/numpy_entropy_data_2.txt
+++ b/hod_1/data/numpy_entropy_data_2.txt
@@ -0,0 +1,7 @@
 A
 BB
 A
 A
 BB
 A
 CCC
--- a/hod_1/data/numpy_entropy_data_3.txt
+++ b/hod_1/data/numpy_entropy_data_3.txt
--- a/hod_1/data/numpy_entropy_data_4.txt
+++ b/hod_1/data/numpy_entropy_data_4.txt
--- a/hod_1/data/numpy_entropy_model_1.txt
+++ b/hod_1/data/numpy_entropy_model_1.txt
@@ -0,0 +1,3 @@
 BB	0.4
 A	0.5
 CCC	0.1
--- a/hod_1/data/numpy_entropy_model_2.txt
+++ b/hod_1/data/numpy_entropy_model_2.txt
@@ -0,0 +1,3 @@
 BB	0.4
 A	0.5
 D	0.1
--- a/hod_1/data/numpy_entropy_model_3.txt
+++ b/hod_1/data/numpy_entropy_model_3.txt
@@ -0,0 +1,100 @@
 ttvhw	0.0094312126288478
 jvgsz	0.0010542688790326
 dppgn	0.00155178394881855
 gsrfj	0.00115959330379828
 gonfc	0.0125594135756661
 taplx	0.0130168483718944
 grjbd	0.0118157969957567
 ezlzh	0.000884396746443509
 etbqm	0.0028883276682822
 hbezl	0.0477149325045959
 izndo	0.0136057551818149
 emvms	0.00348555623538066
 vvbgz	0.011074503885834
 qlvmh	0.0324508321082636
 xmjnf	0.00731021437537367
 mzzkf	0.00587917090038903
 qfdhm	0.0245761056970893
 qwmks	0.00849433418640691
 osehz	0.00337991233869235
 dsgct	0.00303651707035691
 mpubo	0.0133586334433875
 fpnuy	4.5815030263525e-05
 icwiv	0.0104882479671
 usmyd	0.0143965053434422
 tahse	0.00826831006992072
 kpewx	0.00251578239615924
 xtkrt	0.0107339052903878
 iougj	0.0342521998718883
 guhsk	0.0185906728923218
 tykoo	0.0197009412084226
 ehhoq	0.0353696768415327
 vlquz	0.00115966059630282
 wocje	0.0141062590065673
 cpbgj	0.00219079990051944
 ctugy	0.00378729517828768
 aessn	0.000587148370086414
 sqxvv	0.0116458117148786
 rcruz	0.00226519368027114
 bevod	0.00718954957464239
 fufwi	0.0252245678495015
 mkcbp	0.0146700693446102
 fuayx	0.0131555291338195
 gcltr	0.00174610698744372
 esgda	0.00815650534385205
 tajwq	0.00445130730008939
 qcrek	0.00272711711160869
 bulfd	0.00193564926804655
 oyqll	0.00407246240697133
 nfloq	0.00982791783138974
 aiphx	0.00102055661540108
 xiohq	0.000665516553375884
 jnzwv	0.0159815501657243
 ydniv	0.00740460630409081
 zntnp	0.001347520113701
 hpzog	0.00828471646149118
 zpfwu	0.00259323364561152
 fgnfx	0.00281632291480477
 zexjt	0.00460980407970152
 cqjir	0.00449780441058927
 raabk	0.0168867125862489
 pojke	0.000629586226187955
 qhsix	0.0131909362211278
 zxzaj	0.00643340353139025
 dtpqp	0.0306079363057332
 cgkms	0.0142372189818962
 dpuri	0.0183070308798131
 ysuqz	0.0121665423788785
 tcclk	0.0170144702078789
 tqwbr	0.00377004404546414
 izoar	0.0407864649522166
 egbxl	0.000800271672355991
 nsjfo	0.00449051924902884
 rgmnm	0.0506655416330107
 lvynz	0.0159326679515115
 rwlqi	0.000972123481280115
 ypygl	0.00490944226804078
 suddn	0.00796616445877217
 yzgcc	0.000398622353240487
 wtxyq	0.0067836100325681
 huchd	0.00622457548430983
 zhrco	0.00380565517326084
 raffb	0.00216123484164195
 xumnc	1.76957759973509e-05
 peyxt	0.00331881717875047
 keunp	0.00108034331566177
 euofa	0.00273925508171951
 rzacp	0.0412127910997975
 mcktt	0.0122506696757427
 boyoj	0.0123878873436728
 wuidn	0.0109507122517976
 omyfi	0.00125088875317538
 fxwih	0.00289331010631559
 zajpa	0.00207816129029445
 qilpf	0.00757622675637339
 eghhy	0.00147889398768911
 xzxgz	0.0228182364589342
 lfgnw	0.00765022250913878
 wxaxe	0.0118444674613856
 qpeyg	0.0164611528436398
 ydmkt	0.00163874437311358
--- a/hod_1/data/numpy_entropy_model_4.txt
+++ b/hod_1/data/numpy_entropy_model_4.txt
@@ -0,0 +1,200 @@
 aysirskl	0.000465969985327225
 icjgrrej	0.00125399262934441
 rpwecdml	0.00302176884392648
 knqqumtg	0.00414001389703705
 rijwsqio	0.00363108825124935
 pjkouayb	0.000590694273926047
 yhppippt	0.00281383589518343
 gviqjnka	0.00596853741622785
 rblbftch	0.0118925148629093
 rpwoeola	0.00137284781891226
 zkrxhkny	0.00514371341483877
 evaytxev	0.000424463748174283
 uoasbjrz	0.0368666520055359
 faxbpnva	0.00413936957451408
 ziyvyejl	0.00149656681939826
 litrmtxc	0.00619374172395002
 hlfnhahj	0.00338459417211952
 xntjdryl	0.00966638405208417
 gkqprfoc	0.00560911891937092
 grxldlyk	0.00409549456009234
 lklsancl	0.00323231239406044
 syvktodv	0.0052132145962036
 rqludekg	0.00644484194616499
 uqsvrodw	0.00123606973097706
 zryqedcy	0.00196828327955391
 pslumxli	0.00183063547110444
 tcuxhxaz	0.00583574881980228
 nptbmfae	0.00358244966827256
 wmrryjbh	0.00358044531741753
 zusfguab	0.00275169767409786
 nxknvpfz	0.00218936428227388
 adydcnwt	0.00191009117751983
 bgsrnutc	0.00243875740436847
 aktfqryi	0.00274622822697218
 czsayrkz	0.00245129923038723
 snfnnaow	0.0170456098040683
 fjffynzj	0.00460651599373612
 uwkawwfh	0.000604574134472098
 gdpurstk	0.0118908587120531
 dqxddeox	0.00644999423047188
 gjrnpmcc	0.0116106131733171
 sghuynub	0.00426256348792279
 vldohfjo	0.00356866558256043
 mzwexwte	1.07961561013392e-05
 dyslcyus	0.00128908650106376
 pkonquix	0.000220433673243581
 komsctkv	0.0023824578761889
 opbtajsx	0.00398981801318325
 nvgkafec	0.000529321417752203
 bqekebug	0.00822060594854104
 vzbhuxdb	0.012716052012467
 onjszcec	0.0070519552054062
 uyzkiyny	0.00663642136106938
 lvpfctfp	0.00739145880453734
 jqpyomdo	0.00602519295566007
 dibzffyd	0.00166506991120574
 bsvflmhn	0.00280000060286557
 qeabeirk	0.00323521527495261
 stnxvfgm	0.00209650697187905
 pgtznfnf	0.00198824215693064
 qxpfvmqq	0.00632503971872179
 bqjirtjb	0.00710032224860597
 ebnqbokg	0.0105976764736864
 ifdqnine	0.00626844145375311
 oarfqwxd	0.0061165404206265
 jllhtiip	0.0132176299369683
 sumgousm	0.0054742969637985
 gifksbue	0.0105812688731108
 zokasczg	0.00606818562710146
 fwrqxmxw	0.000169467389301491
 wqpfwzbj	0.000537291085070445
 klhjcomg	0.0114437454569636
 nfkgpovg	0.00246715557607813
 vrjiljsq	0.00508852153669522
 modgjwsr	0.019505200896841
 lkwpaiik	0.00772415799941654
 hpuguqca	0.000346480864675375
 rodlhbtu	0.0119742929904463
 sbvqorso	0.00291444366504115
 utqcywou	0.000497243075444838
 rcxcoclf	0.0183219799199796
 clzquaoh	0.00875008206537465
 hnpikhxo	0.00793749915555697
 slfuffmf	0.00664340904976409
 hmpjprva	0.0109556882297664
 wrwlwwgs	0.0103183805128625
 ocfnlnyk	0.00532839764792461
 fgshzlkz	0.0137498707566837
 llwmrzjl	0.00078175543757186
 bnqqgrlp	0.0128729356486284
 hcnggcmw	0.00143409059189367
 wwatmyfh	0.0123189657702296
 ybgaiwqm	0.00140099211351084
 fpzxralq	0.00279045651387019
 xlrcyrio	0.00227845165607012
 cxzsyfuo	0.00267141299994114
 gjhikwpn	0.000883501012657947
 wlpbnexn	0.0027562191128744
 jemonchd	0.00126825844735878
 canuyblm	0.0216756574568504
 pdbdosic	0.00180608741766137
 cxorzqix	0.00212926341416582
 rtledujy	0.00129871885173409
 hejxlqrt	0.00573128888947967
 kavwcfmu	3.54587299894723e-05
 fatcfvpk	0.00723990686470467
 bpplofjm	0.0041596511618454
 posryrwj	0.0117680238680073
 bvpnukag	0.00404497449067731
 lbelzpbs	0.0062178236771778
 gribnjjy	0.0109197239989149
 stkfzrxm	0.00514831697438371
 inbikasp	0.00284453578841593
 cdxltwqx	0.00878268364294129
 nvgiglpg	0.014784030751736
 fmaafgvi	0.00204418798526165
 qtyafvix	0.0137826948872999
 erhbdnlg	0.00187229855731789
 ulesdghl	0.00316583831999121
 ebheqjpk	0.00266386141319316
 gsmutwgq	0.00193798388627551
 hfjbohcn	0.00723777956971534
 kvanptal	0.00527843140542493
 hsbfrqwc	0.00546974392745142
 ruiquets	0.00168512464647176
 veourctx	0.00180094566860724
 nnshafed	0.00172147543036827
 irohhwfg	0.00243495609454943
 ivxoaqvg	0.000906434346262496
 wnddbfsg	0.000226736591764822
 osegqybm	0.00057255386567979
 lrjysmbk	0.000139930203234663
 sakueadt	0.00131143263623671
 dhmytfyl	0.00102515492571535
 fcctcaro	0.00722697313689681
 bvggwzqp	0.0111959052719189
 ajtczmtc	0.00214380561744269
 dklpprvj	0.000723659838801874
 scvfioku	0.00874660366923869
 egrqonlm	0.000188777762207543
 hpzlpxzg	0.000962056196345939
 ntdwlyyu	0.00496070367855473
 ygedcrzi	0.00182670850712668
 fswersgr	0.00622626521429683
 kndcdnkp	0.000674263651837786
 qaoatkzw	0.003087492612335
 krwmiegb	0.00855596722752998
 rxpjkrob	0.00261830406705972
 twtxssyg	0.013091523141205
 qyalrqha	0.00503801619670401
 ixmclekj	0.00107631312343053
 tdfpaezp	0.0010257675376158
 hvjrkgvn	0.00955381062591057
 kqznhktp	0.00400096482424042
 nuqwzjqi	0.000819740767639069
 xjoydjkd	0.00395214578889989
 zqcetrfj	0.00319829249514583
 amhehfow	1.88587714546712e-05
 sxmtlzlw	0.00663259259665281
 qrrwjspo	0.00712841972374552
 szhtpjyv	0.00156278271170568
 verttiuu	0.00188167145628583
 cxoftctg	0.00106191765565093
 htuvrriy	0.00364447520438559
 xqfvyctk	0.00449839948971503
 kuipdodd	0.000411362629861999
 gdqunmio	0.00264740859722539
 sjyqvgwo	0.000428065184436695
 rqgdglno	0.00175138550652958
 aylkedwc	0.0119582316725472
 ikutiphu	0.00576611726594471
 qegytrys	0.018424201625129
 zjfehspd	0.00139828990383295
 hdbfrnha	0.00293768717793717
 ssrktoar	0.000223708711653422
 wcnhypxx	0.000694563090736388
 egdyjbri	0.00107871204886407
 stfwtrsa	0.00293697893838925
 yaqvpcri	0.00303605758519147
 irdnmbpc	0.0048176606001274
 wtnecrbu	0.00459253359839141
 lsijbdpo	0.00285817743547033
 emvfrril	0.00422099180331
 lmzuxenv	0.00898921604201908
 ahpqjvoy	0.000549047186779937
 mbpcgqyi	0.0139713446598273
 cbutcdlm	0.00210694004038603
 aybiktvj	0.00482170683266502
 yhjoghiv	0.0024614271271427
 umlwokvi	0.0012906962733002
 pqxekaan	0.00585376668463907
 xzdhqifz	0.00518329508250998
 xpncwfyz	0.00616017953348638
 hpuoygat	0.00626962514542393
 ghvjpnkn	0.00674336788737429
 elmvmqfx	0.00298649404059306
 ewtkqwqc	0.00278733029070604
 tmmjvjvw	0.0143408324546755
 mxzsltag	0.00367522668308852
 barlvanl	0.00468990081850378
--- a/hod_1/data/pca_first.py
+++ b/hod_1/data/pca_first.py
@@ -0,0 +1,82 @@
 #!/usr/bin/env python3
 import argparse
 import numpy as np
 import torch
 import npfl138
 from npfl138.datasets.mnist import MNIST
 npfl138.require_version("2526.1")
 parser = argparse.ArgumentParser()
 # These arguments will be set appropriately by ReCodEx, even if you change them.
 parser.add_argument("--examples", default=256, type=int, help="MNIST examples to use.")
 parser.add_argument("--iterations", default=100, type=int, help="Iterations of the power algorithm.")
 parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
 parser.add_argument("--seed", default=42, type=int, help="Random seed.")
 parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
 # If you add more arguments, ReCodEx will keep them with your default values.
 def main(args: argparse.Namespace) -> tuple[float, float]:
    # Set the random seed and the number of threads.
    npfl138.startup(args.seed, args.threads, args.recodex)
    npfl138.global_keras_initializers()
    # Prepare the data.
    mnist = MNIST()
    data_indices = np.random.choice(len(mnist.train), size=args.examples, replace=False)
    data = mnist.train.data["images"][data_indices].to(torch.float32) / 255
    # TODO: Data has shape [args.examples, MNIST.C, MNIST.H, MNIST.W].
    # We want to reshape it to [args.examples, MNIST.C * MNIST.H * MNIST.W].
    # We can do so using `torch.reshape(data, new_shape)` with new shape
    # `[data.shape[0], data.shape[1] * data.shape[2] * data.shape[3]]`.
    data = ...
    # TODO: Now compute mean of every feature. Use `torch.mean`, and set
    # `dim` (or `axis`) argument to zero -- therefore, the mean will be
    # computed across the first dimension, so across examples.
    #
    # Note that for compatibility with Numpy/TF/Keras, all `dim` arguments
    # in PyTorch can be also called `axis`.
    mean = ...
    # TODO: Compute the covariance matrix. The covariance matrix is
    #   (data - mean)^T @ (data - mean) / data.shape[0]
    # where transpose can be computed using `torch.transpose` or `torch.t` and
    # matrix multiplication using either Python operator @ or `torch.matmul`.
    cov = ...
    # TODO: Compute the total variance, which is the sum of the diagonal
    # of the covariance matrix. To extract the diagonal use `torch.diagonal`,
    # and to sum a tensor use `torch.sum`.
    total_variance = ...
    # TODO: Now run `args.iterations` of the power iteration algorithm.
    # Start with a vector of `cov.shape[0]` ones of type `torch.float32` using `torch.ones`.
    v = ...
    for i in range(args.iterations):
        # TODO: In the power iteration algorithm, we compute
        # 1. v = cov v
        #    The matrix-vector multiplication can be computed as regular matrix multiplication
        #    or using `torch.mv`.
        # 2. s = l2_norm(v)
        #    The l2_norm can be computed using for example `torch.linalg.vector_norm`.
        # 3. v = v / s
        ...
    # The `v` is now approximately the eigenvector of the largest eigenvalue, `s`.
    # We now compute the explained variance, which is the ratio of `s` and `total_variance`.
    explained_variance = s / total_variance
    # Return the total and explained variance for ReCodEx to validate
    return total_variance, 100 * explained_variance
 if __name__ == "__main__":
    main_args = parser.parse_args([] if "__file__" not in globals() else None)
    total_variance, explained_variance = main(main_args)
    print(f"Total variance: {total_variance:.2f}")
    print(f"Explained variance: {explained_variance:.2f}%")
--- a/hod_1/src/lib.rs
+++ b/hod_1/src/lib.rs
@@ -0,0 +1,63 @@
 use burn::tensor::Tensor;
 use std::collections::HashMap;
 use std::fs::File;
 use std::io::{BufRead, BufReader};
 pub type B = burn_ndarray::NdArray<f64>;
 // Funkcia na načítanie a zarovnanie dát
 pub fn load_and_align(data_path: &str, model_path: &str) -> (Vec<f64>, Vec<f64>) {
    let mut counts = HashMap::new();
    let mut total_count = 0.0;
    let file_p = File::open(data_path).expect("Nepodarilo sa otvoriť dáta");
    for line in BufReader::new(file_p).lines() {
        let point = line.unwrap().trim().to_string();
        if point.is_empty() { continue; }
        *counts.entry(point).or_insert(0.0) += 1.0;
        total_count += 1.0;
    }
    let mut model_map = HashMap::new();
    let file_q = File::open(model_path).expect("Nepodarilo sa otvoriť model");
    for line in BufReader::new(file_q).lines() {
        let l = line.unwrap();
        let parts: Vec<&str> = l.split('\t').collect();
        if parts.len() >= 2 {
            model_map.insert(parts[0].to_string(), parts[1].parse::<f64>().unwrap());
        }
    }
    let mut p_vals = Vec::new();
    let mut q_vals = Vec::new();
    for (point, count) in counts.iter() {
        p_vals.push(count / total_count);
        q_vals.push(*model_map.get(point).unwrap_or(&0.0));
    }
    (p_vals, q_vals)
 }
 pub fn entropy(p: Tensor<B, 1>) -> f64 {
    let zero_mask = p.clone().equal_elem(0.0);
    let p_safe = p.clone().mask_fill(zero_mask, 1.0);
    let terms = p * p_safe.log();
    -terms.sum().into_scalar()
 }
 pub fn cross_entropy(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
    let zero_mask_q = q.clone().equal_elem(0.0);
    let p_exists = p.clone().greater_elem(0.0);
    if p_exists.bool_and(zero_mask_q.clone()).any().into_scalar() {
        return f64::INFINITY;
    }
    let q_safe = q.mask_fill(zero_mask_q, 1.0);
    let terms = p * q_safe.log();
    -terms.sum().into_scalar()
 }
 pub fn kl_div2(p: Tensor<B, 1>, q: Tensor<B, 1>) -> f64 {
    let ce = cross_entropy(p.clone(), q);
    let e = entropy(p);
    let result = ce - e;
    if result < 0.0 { 0.0 } else { result }
 }
--- a/hod_1/src/main.rs
+++ b/hod_1/src/main.rs
@@ -1,56 +1,30 @@
-use burn::tensor::{backend::Backend, Tensor, TensorData};
+use burn::tensor::{backend::Backend, Tensor};
-use burn_ndarray::NdArray;
+use clap::Parser;
-use ndarray::{array, Array1};
+// Nahraď 'hod_1' názvom tvojho projektu v Cargo.toml
 use hod_1::{load_and_align, entropy, cross_entropy, kl_div2, B};
-type B = NdArray<f64>;
+#[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {
    #[arg(long = "data_path")]
    data_path: String,
    #[arg(long = "model_path")]
    model_path: String,
 }
 fn main() {
    let args = Args::parse();
    let device = <B as Backend>::Device::default();
-    // ndarray::Array1<f64>
+    // Použijeme funkciu z lib.rs
-    let a: Array1<f64> = array![0.4, 0.6];
+    let (p_vec, q_vec) = load_and_align(&args.data_path, &args.model_path);
-    // Convert ndarray -> Burn tensor
+    let p = Tensor::<B, 1>::from_data(p_vec.as_slice(), &device);
-    let p = Tensor::<B, 1>::from_data(
+    let q = Tensor::<B, 1>::from_data(q_vec.as_slice(), &device);
        TensorData::new(a.to_vec(), [a.len()]),
        &device,
    );
-    let h = entropy(p);
+    // Výpočty
-    println!("{h}");
+    println!("{}", entropy(p.clone()));
    println!("{}", cross_entropy(p.clone(), q.clone()));
    println!("{}", kl_div2(p, q));
 }
 fn entropy(p: Tensor<B, 1>) -> f64 {
    // Handle p = 0 safely, because 0 * log(0) should contribute 0
    let zero_mask = p.clone().equal_elem(0.0);
    let p_safe = p.clone().mask_fill(zero_mask.clone(), 1.0);
    let terms = (p.clone() * p_safe.log()).mask_fill(zero_mask, 0.0);
    (-terms).sum().into_scalar()
 }
 // pub fn entropy2(p: Tensor<B, 1>) -> f64 {
 //     if p == 0.0 {
 //         return 0.0;
 //     }
 //     - p * p.ln()
 // }
 // pub fn cross_entropy(p: Tensor<B, 1>, q: f64) -> f64 {
 //     if p == 0.0 {
 //         return 0.0;
 //     }
 //     - p * q.ln()
 // }
 // pub fn kl_div(p: f64, q: f64) -> f64 {
 //     if p == 0.0 {
 //         return 0.0;
 //     }
 //     p*(p.ln()-q.ln())
 // }
 // pub fn kl_div2(p: f64, q: f64) -> f64 {
 //     cross_entropy(p,q) - entropy(p)
 // }