const std = @import("std");
const assert = std.debug.assert;
const math = std.math;
const mem = std.mem;
const sort = std.sort;
const testing = std.testing;
const Allocator = std.mem.Allocator;
const bu = @import("bits_utils.zig");
const deflate_const = @import("deflate_const.zig");
const max_bits_limit = 16;
const LiteralNode = struct {
literal: u16,
freq: u16,
};
const LevelInfo = struct {
level: u32,
last_freq: u32,
next_char_freq: u32,
next_pair_freq: u32,
needed: u32,
};
pub const HuffCode = struct {
code: u16 = 0,
len: u16 = 0,
fn set(self: *HuffCode, code: u16, length: u16) void {
self.len = length;
self.code = code;
}
};
pub const HuffmanEncoder = struct {
codes: []HuffCode,
freq_cache: []LiteralNode = undefined,
bit_count: [17]u32 = undefined,
lns: []LiteralNode = undefined,
lfs: []LiteralNode = undefined,
allocator: Allocator,
pub fn deinit(self: *HuffmanEncoder) void {
self.allocator.free(self.codes);
self.allocator.free(self.freq_cache);
}
pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void {
var list = self.freq_cache[0 .. freq.len + 1];
var count: u32 = 0;
for (freq, 0..) |f, i| {
if (f != 0) {
list[count] = LiteralNode{ .literal = @intCast(u16, i), .freq = f };
count += 1;
} else {
list[count] = LiteralNode{ .literal = 0x00, .freq = 0 };
self.codes[i].len = 0;
}
}
list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 };
list = list[0..count];
if (count <= 2) {
for (list, 0..) |node, i| {
self.codes[node.literal].set(@intCast(u16, i), 1);
}
return;
}
self.lfs = list;
sort.sort(LiteralNode, self.lfs, {}, byFreq);
var bit_count = self.bitCounts(list, max_bits);
self.assignEncodingAndSize(bit_count, list);
}
pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 {
var total: u32 = 0;
for (freq, 0..) |f, i| {
if (f != 0) {
total += @intCast(u32, f) * @intCast(u32, self.codes[i].len);
}
}
return total;
}
fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 {
var max_bits = max_bits_to_use;
var n = list.len;
assert(max_bits < max_bits_limit);
max_bits = @min(max_bits, n - 1);
var levels: [max_bits_limit]LevelInfo = mem.zeroes([max_bits_limit]LevelInfo);
var leaf_counts: [max_bits_limit][max_bits_limit]u32 = mem.zeroes([max_bits_limit][max_bits_limit]u32);
{
var level = @as(u32, 1);
while (level <= max_bits) : (level += 1) {
levels[level] = LevelInfo{
.level = level,
.last_freq = list[1].freq,
.next_char_freq = list[2].freq,
.next_pair_freq = list[0].freq + list[1].freq,
.needed = 0,
};
leaf_counts[level][level] = 2;
if (level == 1) {
levels[level].next_pair_freq = math.maxInt(i32);
}
}
}
levels[max_bits].needed = 2 * @intCast(u32, n) - 4;
{
var level = max_bits;
while (true) {
var l = &levels[level];
if (l.next_pair_freq == math.maxInt(i32) and l.next_char_freq == math.maxInt(i32)) {
l.needed = 0;
levels[level + 1].next_pair_freq = math.maxInt(i32);
level += 1;
continue;
}
var prev_freq = l.last_freq;
if (l.next_char_freq < l.next_pair_freq) {
var next = leaf_counts[level][level] + 1;
l.last_freq = l.next_char_freq;
leaf_counts[level][level] = next;
if (next >= list.len) {
l.next_char_freq = maxNode().freq;
} else {
l.next_char_freq = list[next].freq;
}
} else {
l.last_freq = l.next_pair_freq;
mem.copy(u32, leaf_counts[level][0..level], leaf_counts[level - 1][0..level]);
levels[l.level - 1].needed = 2;
}
l.needed -= 1;
if (l.needed == 0) {
if (l.level == max_bits) {
break;
}
levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq;
level += 1;
} else {
while (levels[level - 1].needed > 0) {
level -= 1;
if (level == 0) {
break;
}
}
}
}
}
assert(leaf_counts[max_bits][max_bits] == n);
var bit_count = self.bit_count[0 .. max_bits + 1];
var bits: u32 = 1;
var counts = &leaf_counts[max_bits];
{
var level = max_bits;
while (level > 0) : (level -= 1) {
bit_count[bits] = counts[level] - counts[level - 1];
bits += 1;
if (level == 0) {
break;
}
}
}
return bit_count;
}
fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void {
var code = @as(u16, 0);
var list = list_arg;
for (bit_count, 0..) |bits, n| {
code <<= 1;
if (n == 0 or bits == 0) {
continue;
}
var chunk = list[list.len - @intCast(u32, bits) ..];
self.lns = chunk;
sort.sort(LiteralNode, self.lns, {}, byLiteral);
for (chunk) |node| {
self.codes[node.literal] = HuffCode{
.code = bu.bitReverse(u16, code, @intCast(u5, n)),
.len = @intCast(u16, n),
};
code += 1;
}
list = list[0 .. list.len - @intCast(u32, bits)];
}
}
};
fn maxNode() LiteralNode {
return LiteralNode{
.literal = math.maxInt(u16),
.freq = math.maxInt(u16),
};
}
pub fn newHuffmanEncoder(allocator: Allocator, size: u32) !HuffmanEncoder {
return HuffmanEncoder{
.codes = try allocator.alloc(HuffCode, size),
.freq_cache = try allocator.alloc(LiteralNode, deflate_const.max_num_frequencies + 1),
.allocator = allocator,
};
}
pub fn generateFixedLiteralEncoding(allocator: Allocator) !HuffmanEncoder {
var h = try newHuffmanEncoder(allocator, deflate_const.max_num_frequencies);
var codes = h.codes;
var ch: u16 = 0;
while (ch < deflate_const.max_num_frequencies) : (ch += 1) {
var bits: u16 = undefined;
var size: u16 = undefined;
switch (ch) {
0...143 => {
bits = ch + 48;
size = 8;
},
144...255 => {
bits = ch + 400 - 144;
size = 9;
},
256...279 => {
bits = ch - 256;
size = 7;
},
else => {
bits = ch + 192 - 280;
size = 8;
},
}
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, bits, @intCast(u5, size)), .len = size };
}
return h;
}
pub fn generateFixedOffsetEncoding(allocator: Allocator) !HuffmanEncoder {
var h = try newHuffmanEncoder(allocator, 30);
var codes = h.codes;
for (codes, 0..) |_, ch| {
codes[ch] = HuffCode{ .code = bu.bitReverse(u16, @intCast(u16, ch), 5), .len = 5 };
}
return h;
}
fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
return a.literal < b.literal;
}
fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool {
_ = context;
if (a.freq == b.freq) {
return a.literal < b.literal;
}
return a.freq < b.freq;
}
test "generate a Huffman code from an array of frequencies" {
var freqs: [19]u16 = [_]u16{
8,
1,
1,
2,
5,
10,
9,
1,
0,
0,
0,
0,
0,
0,
0,
0,
1,
3,
5,
};
var enc = try newHuffmanEncoder(testing.allocator, freqs.len);
defer enc.deinit();
enc.generate(freqs[0..], 7);
try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..]));
try testing.expectEqual(@as(usize, 3), enc.codes[0].len);
try testing.expectEqual(@as(usize, 6), enc.codes[1].len);
try testing.expectEqual(@as(usize, 6), enc.codes[2].len);
try testing.expectEqual(@as(usize, 5), enc.codes[3].len);
try testing.expectEqual(@as(usize, 3), enc.codes[4].len);
try testing.expectEqual(@as(usize, 2), enc.codes[5].len);
try testing.expectEqual(@as(usize, 2), enc.codes[6].len);
try testing.expectEqual(@as(usize, 6), enc.codes[7].len);
try testing.expectEqual(@as(usize, 0), enc.codes[8].len);
try testing.expectEqual(@as(usize, 0), enc.codes[9].len);
try testing.expectEqual(@as(usize, 0), enc.codes[10].len);
try testing.expectEqual(@as(usize, 0), enc.codes[11].len);
try testing.expectEqual(@as(usize, 0), enc.codes[12].len);
try testing.expectEqual(@as(usize, 0), enc.codes[13].len);
try testing.expectEqual(@as(usize, 0), enc.codes[14].len);
try testing.expectEqual(@as(usize, 0), enc.codes[15].len);
try testing.expectEqual(@as(usize, 6), enc.codes[16].len);
try testing.expectEqual(@as(usize, 5), enc.codes[17].len);
try testing.expectEqual(@as(usize, 3), enc.codes[18].len);
try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code);
try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code);
try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code);
try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code);
try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code);
try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code);
try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code);
try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code);
try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code);
try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code);
try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code);
}
test "generate a Huffman code for the fixed litteral table specific to Deflate" {
var enc = try generateFixedLiteralEncoding(testing.allocator);
defer enc.deinit();
}
test "generate a Huffman code for the 30 possible relative offsets (LZ77 distances) of Deflate" {
var enc = try generateFixedOffsetEncoding(testing.allocator);
defer enc.deinit();
}