Tunstall Coding

In [1]:
# We only assume a Bernoulli source.
# A function constructs a Tunstall code, with the number of phrases specified.
# If one does not specify it, it will expand the phrases by the smallest power of 2.
function Tunstall(src_pmf, num_phrases)
    src_pmf = src_pmf[src_pmf .> 0]
    src_pmf /= sum(src_pmf)
    pmf = copy(src_pmf)
    n = length(pmf)
    symbols = map(string, collect(1:n))
    # copy symbols to phrases; we will extend each phrase by adding symbols
    phrases = copy(symbols)
#     m = floor(log2(n))+1; num_phrases = 2^m  # just for Tunstall
    num_branches = num_phrases - n  # the number of branches required
    for i = 1:num_branches
        (maxprob, idx) = findmax(pmf)
        deleteat!(pmf, idx);  # delete the highest probability from the pmf
        phrase = splice!(phrases, idx)  # get the phrase with the highest prob, and delete it from the list
        append!(pmf, maxprob*src_pmf)  # append the probabilities of size n
        append!(phrases, map(x -> "$phrase$x", symbols))  # append the extended phrases
    end
    return phrases, pmf
end
Tunstall(src_pmf) = Tunstall(src_pmf, 2^(floor(log2(length(src_pmf))+1)))
Out[1]:
Tunstall (generic function with 2 methods)