using NBInclude
nbinclude("Basic Functions.ipynb");
function ShannonFano(pmf)
pmf = pmf[pmf .> 0]
n = length(pmf)
s = sum(pmf)
perm = sortperm(pmf, rev=true)
ps = 0
if n > 2
i = 0
while(ps <= s/2)
i += 1
ps += pmf[perm[i]]
end
# decide the most balanced bipartition
(a1, b1) = (ps, s-ps)
(a2, b2) = (ps-pmf[perm[i]], s-ps+pmf[perm[i]])
idx = (abs(a1-b1) < abs(a2-b2) ? i : i-1)
# for debugging the singular case:
# print("To check the control flow: ",abs(a1-b1), " vs. ", abs(a2-b2), "\n")
# print(idx, pmf[perm[1:idx]], "\n")
# print(pmf[perm[1:idx]], " ", pmf[perm[idx+1:end]], " ", idx, "\n")
temp = [map(x -> "0$x", ShannonFano(pmf[perm[1:idx]]))..., map(x -> "1$x", ShannonFano(pmf[perm[idx+1:end]]))...]
return ipermute!(temp, perm)
elseif n == 2
return ["0", "1"][perm]
elseif n == 1
return [""]
end
end
ShannonFano(pmf, base) = ShannonFano(pmf) # for consistency in testMultiSchemes, so base means nothing.
ShannonFano([2,1,3])
A singular case: [7,6,4,3] If you normalize it, then this might give a different result.
pmf = [7,6,4,3]
ShannonFano(pmf)
ShannonFano(pmf/sum(pmf))
This is because we can break this pmf into ([7],[6,4,3]) or ([7,6],[4,3]). Clearly, the previous one gives the shorter average length. If we do not normalize it, the alogorithm will choose the previous one due to its implementation. If we normalize the input, the algorithm chooses the latter, because of the floating point operation.
qmf = pmf/sum(pmf)
qmf[1]+qmf[2]