Binary Huffman coding

In [1]:
using NBInclude
nbinclude("Basic Functions.ipynb");

1) w/ sorting the pmf

In [2]:
# Binary Huffman coding: a recursive implementation I
# WITH sorting the pmf!
# The input pmf does not have to be normalized.
function binaryHuffman(pmf)
    pmf = pmf[pmf .> 0]
    n = length(pmf)
    perm = sortperm(pmf, rev=true)  # find the sorted index of the pmf
    sorted_pmf = pmf[perm]  # sort the pmf in descending order
    if n > 2
        input_pmf = sorted_pmf[1:end-2]  # remove the two smallest probabilities
        append!(input_pmf, sum(sorted_pmf[end-1:end]))  # merge the two smallest probability masses
        code = binaryHuffman(input_pmf)  # call the huffman function recursively
        last_codeword = pop!(code)  # pop the last codeword which corresponds to the merged mass
        append!(code, ["$(last_codeword)0", "$(last_codeword)1"])  # append the two codewords accordingly
#         print(map(length, code), "\n")
        return ipermute!(code, perm)  # permute back the code according to the previous permutation
    elseif n == 2
        return ["0", "1"][perm]
    else
        print("ERROR:: You have only one symbol!\n")
        return 0
    end
end
Out[2]:
binaryHuffman (generic function with 1 method)

2) w/o sorting the pmf

In [3]:
# Binary Huffman coding: a recursive implementation II
# WITHOUT sorting the pmf!
# The input pmf does not have to be normalized.
function binaryHuffman2(pmf)
    pmf = pmf[pmf .> 0]
    n = length(pmf)
    if n > 2
        input_pmf = copy(pmf)
        min_vals, idx_list = minValues(input_pmf, 2)  # pop two smallest values from input_pmf, and get the indices as well
        append!(input_pmf, sum(min_vals))  # merge the two smallest probability masses
        code = binaryHuffman2(input_pmf)  # call the huffman function recursively
        last_codeword = pop!(code)  # pop the last codeword which corresponds to the merged mass
        list_range = ["0", "1"]  # append the two codewords accordingly
        for i = 2:-1:1
            insert!(code, idx_list[i], "$(last_codeword)$(list_range[i])")
        end
#         for debugging
#         print(map(length, code), " ")
#         print(pmf, " ")
#         print(avgLength(code, pmf), " ")
#         print(map(length, code).*pmf, "\n")
        return code
    elseif n == 2
        return ["0", "1"]
    else
        print("ERROR:: You have only one symbol!\n")
        return 0
    end
end
Out[3]:
binaryHuffman2 (generic function with 1 method)