Most of functions in this homework are saved in the following notebooks.
using NBInclude
nbinclude("Basic Functions.ipynb");
nbinclude("Non-binary Huffman Coding.ipynb");
nbinclude("Tunstall Coding.ipynb");
using PyPlot
pmf = [0.6, 0.3, 0.1]
code = daryHuffman(pmf, 3)
avgLength(code, pmf)
pmf1 = pmf
print("==========================================================================================\n")
print("pmf1: ", pmf1, "\n")
print("entropy of base 3: ", entropy(pmf1)/log2(3), "\n")
pmfn = copy(pmf1)
for n in [2,3,5]
pmfn = blockPmf(pmf1, n, pmfn)
code = daryHuffman(pmfn, 3)
avg_length = avgLength(code, pmfn)
print("------------------------------\n")
print("For the block length = ", n, ", \n")
print("The entropy of base 3 is ", n*entropy(pmf1)/log2(3), "\n")
print("The average length of Huffman code is \t ", avg_length, "\n")
end
\tt ["B", "AB", "AAA", "AAB"], [0.1,0.09,0.729,0.081] \ = Tunstall([0.9, 0.1], 4), $$
\tt ["B", "AB", "AAB", "AAAA", "AAAB"], [0.2,0.16,0.128,0.4096,0.1024] \ = Tunstall([0.8, 0.2], 5). $$
src_pmf = [0.9, 0.1]
avg_lengths = zeros(10)
for k = 2:10
phrases, pmf = Tunstall(src_pmf, 2^k)
len_codewords = log2(length(phrases))
avg_len_phrases = avgLength(phrases, pmf)
avg_lengths[k] = len_codewords/avg_len_phrases
print("For num_phrases = ", 2^k, ", the length of codewords per average number of symbols is ", avg_lengths[k], ".\n")
end
plot(map(x -> 2^x, 2:10), avg_lengths[2:10])
Note that the entropy of the source pmf is
entropy(src_pmf)
num_phrases = 1024
plist = collect(0.01:0.01:0.99)
avg_lengths_Tunstall = zeros(length(plist))
for i in 1:length(plist)
phrases, pmf = Tunstall([plist[i], 1-plist[i]], num_phrases)
len_codewords = log2(length(phrases))
avg_len_phrases = avgLength(phrases, pmf)
avg_lengths_Tunstall[i] = len_codewords/avg_len_phrases
end
plot(plist, avg_lengths_Tunstall, marker = "*", label = "Tunstall")
plot(plist, map(x->entropy([x,1-x]), plist), color = "r", label = "Binary entropy") # binary entropy function
axhline(1, color = "g", linestyle = "--")
xlabel("Probability p")
ylabel("Average lengths")
legend(loc = "upper right", fancybox = "true")
Note that the average lengths are lower bounded by the binary entropy function.
Recall the last homework:
n = 10
plist = collect(0.01:0.01:0.99)
avg_lengths_Huffman = zeros(length(plist))
for i in 1:length(plist)
pmfn = blockPmf([plist[i], 1-plist[i]], n)
avg_lengths_Huffman[i] = avgLength(daryHuffman(pmfn, 2), pmfn)/n
end
Plot the results in one plot:
plot(plist, avg_lengths_Tunstall, marker = "*", label = "Tunstall")
plot(plist, avg_lengths_Huffman, marker = "*", label = "Binary Huffman")
axhline(1, color = "g", linestyle = "--")
xlabel("Probability p")
ylabel("Average lengths")
legend(loc = "upper right", fancybox = "true")