01 hotova
This commit is contained in:
58
hod_1/data/numpy_entropy.py
Normal file
58
hod_1/data/numpy_entropy.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
|
||||
import numpy as np
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
# These arguments will be set appropriately by ReCodEx, even if you change them.
|
||||
parser.add_argument("--data_path", default="numpy_entropy_data.txt", type=str, help="Data distribution path.")
|
||||
parser.add_argument("--model_path", default="numpy_entropy_model.txt", type=str, help="Model distribution path.")
|
||||
parser.add_argument("--recodex", default=False, action="store_true", help="Evaluation in ReCodEx.")
|
||||
# If you add more arguments, ReCodEx will keep them with your default values.
|
||||
|
||||
|
||||
def main(args: argparse.Namespace) -> tuple[float, float, float]:
|
||||
# TODO: Load data distribution, each line containing a datapoint -- a string.
|
||||
with open(args.data_path, "r") as data:
|
||||
for line in data:
|
||||
line = line.rstrip("\n")
|
||||
# TODO: Process the line, aggregating data with built-in Python
|
||||
# data structures (not NumPy, which is not suitable for incremental
|
||||
# addition and string mapping).
|
||||
|
||||
# TODO: Create a NumPy array containing the data distribution. The
|
||||
# NumPy array should contain only data, not any mapping. Alternatively,
|
||||
# the NumPy array might be created after loading the model distribution.
|
||||
|
||||
# TODO: Load model distribution, each line `string \t probability`.
|
||||
with open(args.model_path, "r") as model:
|
||||
for line in model:
|
||||
line = line.rstrip("\n")
|
||||
# TODO: Process the line, aggregating using Python data structures.
|
||||
|
||||
# TODO: Create a NumPy array containing the model distribution.
|
||||
|
||||
# TODO: Compute the entropy H(data distribution). You should not use
|
||||
# manual for/while cycles, but instead use the fact that most NumPy methods
|
||||
# operate on all elements (for example `*` is vector element-wise multiplication).
|
||||
entropy = ...
|
||||
|
||||
# TODO: Compute cross-entropy H(data distribution, model distribution).
|
||||
# When some data distribution elements are missing in the model distribution,
|
||||
# the resulting crossentropy should be `np.inf`.
|
||||
crossentropy = ...
|
||||
|
||||
# TODO: Compute KL-divergence D_KL(data distribution, model_distribution),
|
||||
# again using `np.inf` when needed.
|
||||
kl_divergence = ...
|
||||
|
||||
# Return the computed values for ReCodEx to validate.
|
||||
return entropy, crossentropy, kl_divergence
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main_args = parser.parse_args([] if "__file__" not in globals() else None)
|
||||
entropy, crossentropy, kl_divergence = main(main_args)
|
||||
print(f"Entropy: {entropy:.2f} nats")
|
||||
print(f"Crossentropy: {crossentropy:.2f} nats")
|
||||
print(f"KL divergence: {kl_divergence:.2f} nats")
|
||||
Reference in New Issue
Block a user