const std = @import("std");
const assert = std.debug.assert;
const Allocator = std.mem.Allocator;
const RingBuffer = std.RingBuffer;
const types = @import("types.zig");
const frame = types.frame;
const LiteralsSection = types.compressed_block.LiteralsSection;
const SequencesSection = types.compressed_block.SequencesSection;
const SkippableHeader = types.frame.Skippable.Header;
const ZstandardHeader = types.frame.Zstandard.Header;
const Table = types.compressed_block.Table;
pub const block = @import("decode/block.zig");
const readers = @import("readers.zig");
const readInt = std.mem.readIntLittle;
const readIntSlice = std.mem.readIntSliceLittle;
pub fn isSkippableMagic(magic: u32) bool {
return frame.Skippable.magic_number_min <= magic and magic <= frame.Skippable.magic_number_max;
}
pub fn decodeFrameType(source: anytype) error{ BadMagic, EndOfStream }!frame.Kind {
const magic = try source.readIntLittle(u32);
return frameType(magic);
}
pub fn frameType(magic: u32) error{BadMagic}!frame.Kind {
return if (magic == frame.Zstandard.magic_number)
.zstandard
else if (isSkippableMagic(magic))
.skippable
else
error.BadMagic;
}
pub const FrameHeader = union(enum) {
zstandard: ZstandardHeader,
skippable: SkippableHeader,
};
pub const HeaderError = error{ BadMagic, EndOfStream, ReservedBitSet };
pub fn decodeFrameHeader(source: anytype) (@TypeOf(source).Error || HeaderError)!FrameHeader {
const magic = try source.readIntLittle(u32);
const frame_type = try frameType(magic);
switch (frame_type) {
.zstandard => return FrameHeader{ .zstandard = try decodeZstandardHeader(source) },
.skippable => return FrameHeader{
.skippable = .{
.magic_number = magic,
.frame_size = try source.readIntLittle(u32),
},
},
}
}
pub const ReadWriteCount = struct {
read_count: usize,
write_count: usize,
};
pub fn decode(dest: []u8, src: []const u8, verify_checksum: bool) error{
MalformedFrame,
UnknownContentSizeUnsupported,
DictionaryIdFlagUnsupported,
}!usize {
var write_count: usize = 0;
var read_count: usize = 0;
while (read_count < src.len) {
const counts = decodeFrame(dest, src[read_count..], verify_checksum) catch |err| {
switch (err) {
error.UnknownContentSizeUnsupported => return error.UnknownContentSizeUnsupported,
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
else => return error.MalformedFrame,
}
};
read_count += counts.read_count;
write_count += counts.write_count;
}
return write_count;
}
pub fn decodeAlloc(
allocator: Allocator,
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) error{ DictionaryIdFlagUnsupported, MalformedFrame, OutOfMemory }![]u8 {
var result = std.ArrayList(u8).init(allocator);
errdefer result.deinit();
var read_count: usize = 0;
while (read_count < src.len) {
read_count += decodeFrameArrayList(
allocator,
&result,
src[read_count..],
verify_checksum,
window_size_max,
) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
error.DictionaryIdFlagUnsupported => return error.DictionaryIdFlagUnsupported,
else => return error.MalformedFrame,
};
}
return result.toOwnedSlice();
}
pub fn decodeFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
) (error{
BadMagic,
UnknownContentSizeUnsupported,
ContentTooLarge,
ContentSizeTooLarge,
WindowSizeUnknown,
DictionaryIdFlagUnsupported,
SkippableSizeTooLarge,
} || FrameError)!ReadWriteCount {
var fbs = std.io.fixedBufferStream(src);
switch (try decodeFrameType(fbs.reader())) {
.zstandard => return decodeZstandardFrame(dest, src, verify_checksum),
.skippable => {
const content_size = try fbs.reader().readIntLittle(u32);
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
const read_count = @as(usize, content_size) + 8;
if (read_count > src.len) return error.SkippableSizeTooLarge;
return ReadWriteCount{
.read_count = read_count,
.write_count = 0,
};
},
}
}
pub fn decodeFrameArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) (error{ BadMagic, OutOfMemory, SkippableSizeTooLarge } || FrameContext.Error || FrameError)!usize {
var fbs = std.io.fixedBufferStream(src);
const reader = fbs.reader();
const magic = try reader.readIntLittle(u32);
switch (try frameType(magic)) {
.zstandard => return decodeZstandardFrameArrayList(
allocator,
dest,
src,
verify_checksum,
window_size_max,
),
.skippable => {
const content_size = try fbs.reader().readIntLittle(u32);
if (content_size > std.math.maxInt(usize) - 8) return error.SkippableSizeTooLarge;
const read_count = @as(usize, content_size) + 8;
if (read_count > src.len) return error.SkippableSizeTooLarge;
return read_count;
},
}
}
pub fn computeChecksum(hasher: *std.hash.XxHash64) u32 {
const hash = hasher.final();
return @intCast(u32, hash & 0xFFFFFFFF);
}
const FrameError = error{
ChecksumFailure,
BadContentSize,
EndOfStream,
ReservedBitSet,
} || block.Error;
pub fn decodeZstandardFrame(
dest: []u8,
src: []const u8,
verify_checksum: bool,
) (error{
UnknownContentSizeUnsupported,
ContentTooLarge,
ContentSizeTooLarge,
WindowSizeUnknown,
DictionaryIdFlagUnsupported,
} || FrameError)!ReadWriteCount {
assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number);
var consumed_count: usize = 4;
var frame_context = context: {
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
var source = fbs.reader();
const frame_header = try decodeZstandardHeader(source);
consumed_count += fbs.pos;
break :context FrameContext.init(
frame_header,
std.math.maxInt(usize),
verify_checksum,
) catch |err| switch (err) {
error.WindowTooLarge => unreachable,
inline else => |e| return e,
};
};
const counts = try decodeZStandardFrameBlocks(
dest,
src[consumed_count..],
&frame_context,
);
return ReadWriteCount{
.read_count = counts.read_count + consumed_count,
.write_count = counts.write_count,
};
}
pub fn decodeZStandardFrameBlocks(
dest: []u8,
src: []const u8,
frame_context: *FrameContext,
) (error{ ContentTooLarge, UnknownContentSizeUnsupported } || FrameError)!ReadWriteCount {
const content_size = frame_context.content_size orelse
return error.UnknownContentSizeUnsupported;
if (dest.len < content_size) return error.ContentTooLarge;
var consumed_count: usize = 0;
const written_count = decodeFrameBlocksInner(
dest[0..content_size],
src[consumed_count..],
&consumed_count,
if (frame_context.hasher_opt) |*hasher| hasher else null,
frame_context.block_size_max,
) catch |err| switch (err) {
error.DestTooSmall => return error.BadContentSize,
inline else => |e| return e,
};
if (written_count != content_size) return error.BadContentSize;
if (frame_context.has_checksum) {
if (src.len < consumed_count + 4) return error.EndOfStream;
const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]);
consumed_count += 4;
if (frame_context.hasher_opt) |*hasher| {
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
}
}
return ReadWriteCount{ .read_count = consumed_count, .write_count = written_count };
}
pub const FrameContext = struct {
hasher_opt: ?std.hash.XxHash64,
window_size: usize,
has_checksum: bool,
block_size_max: usize,
content_size: ?usize,
const Error = error{
DictionaryIdFlagUnsupported,
WindowSizeUnknown,
WindowTooLarge,
ContentSizeTooLarge,
};
pub fn init(
frame_header: ZstandardHeader,
window_size_max: usize,
verify_checksum: bool,
) Error!FrameContext {
if (frame_header.descriptor.dictionary_id_flag != 0)
return error.DictionaryIdFlagUnsupported;
const window_size_raw = frameWindowSize(frame_header) orelse return error.WindowSizeUnknown;
const window_size = if (window_size_raw > window_size_max)
return error.WindowTooLarge
else
@intCast(usize, window_size_raw);
const should_compute_checksum =
frame_header.descriptor.content_checksum_flag and verify_checksum;
const content_size = if (frame_header.content_size) |size|
std.math.cast(usize, size) orelse return error.ContentSizeTooLarge
else
null;
return .{
.hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null,
.window_size = window_size,
.has_checksum = frame_header.descriptor.content_checksum_flag,
.block_size_max = @min(1 << 17, window_size),
.content_size = content_size,
};
}
};
pub fn decodeZstandardFrameArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
verify_checksum: bool,
window_size_max: usize,
) (error{OutOfMemory} || FrameContext.Error || FrameError)!usize {
assert(readInt(u32, src[0..4]) == frame.Zstandard.magic_number);
var consumed_count: usize = 4;
var frame_context = context: {
var fbs = std.io.fixedBufferStream(src[consumed_count..]);
var source = fbs.reader();
const frame_header = try decodeZstandardHeader(source);
consumed_count += fbs.pos;
break :context try FrameContext.init(frame_header, window_size_max, verify_checksum);
};
consumed_count += try decodeZstandardFrameBlocksArrayList(
allocator,
dest,
src[consumed_count..],
&frame_context,
);
return consumed_count;
}
pub fn decodeZstandardFrameBlocksArrayList(
allocator: Allocator,
dest: *std.ArrayList(u8),
src: []const u8,
frame_context: *FrameContext,
) (error{OutOfMemory} || FrameError)!usize {
const initial_len = dest.items.len;
var ring_buffer = try RingBuffer.init(allocator, frame_context.window_size);
defer ring_buffer.deinit(allocator);
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
var block_header = try block.decodeBlockHeaderSlice(src);
var consumed_count: usize = 3;
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
while (true) : ({
block_header = try block.decodeBlockHeaderSlice(src[consumed_count..]);
consumed_count += 3;
}) {
const written_size = try block.decodeBlockRingBuffer(
&ring_buffer,
src[consumed_count..],
block_header,
&decode_state,
&consumed_count,
frame_context.block_size_max,
);
if (frame_context.content_size) |size| {
if (dest.items.len - initial_len > size) {
return error.BadContentSize;
}
}
if (written_size > 0) {
const written_slice = ring_buffer.sliceLast(written_size);
try dest.appendSlice(written_slice.first);
try dest.appendSlice(written_slice.second);
if (frame_context.hasher_opt) |*hasher| {
hasher.update(written_slice.first);
hasher.update(written_slice.second);
}
}
if (block_header.last_block) break;
}
if (frame_context.content_size) |size| {
if (dest.items.len - initial_len != size) {
return error.BadContentSize;
}
}
if (frame_context.has_checksum) {
if (src.len < consumed_count + 4) return error.EndOfStream;
const checksum = readIntSlice(u32, src[consumed_count .. consumed_count + 4]);
consumed_count += 4;
if (frame_context.hasher_opt) |*hasher| {
if (checksum != computeChecksum(hasher)) return error.ChecksumFailure;
}
}
return consumed_count;
}
fn decodeFrameBlocksInner(
dest: []u8,
src: []const u8,
consumed_count: *usize,
hash: ?*std.hash.XxHash64,
block_size_max: usize,
) (error{ EndOfStream, DestTooSmall } || block.Error)!usize {
var literal_fse_data: [types.compressed_block.table_size_max.literal]Table.Fse = undefined;
var match_fse_data: [types.compressed_block.table_size_max.match]Table.Fse = undefined;
var offset_fse_data: [types.compressed_block.table_size_max.offset]Table.Fse = undefined;
var block_header = try block.decodeBlockHeaderSlice(src);
var bytes_read: usize = 3;
defer consumed_count.* += bytes_read;
var decode_state = block.DecodeState.init(&literal_fse_data, &match_fse_data, &offset_fse_data);
var count: usize = 0;
while (true) : ({
block_header = try block.decodeBlockHeaderSlice(src[bytes_read..]);
bytes_read += 3;
}) {
const written_size = try block.decodeBlock(
dest,
src[bytes_read..],
block_header,
&decode_state,
&bytes_read,
block_size_max,
count,
);
if (hash) |hash_state| hash_state.update(dest[count .. count + written_size]);
count += written_size;
if (block_header.last_block) break;
}
return count;
}
pub fn decodeSkippableHeader(src: *const [8]u8) SkippableHeader {
const magic = readInt(u32, src[0..4]);
assert(isSkippableMagic(magic));
const frame_size = readInt(u32, src[4..8]);
return .{
.magic_number = magic,
.frame_size = frame_size,
};
}
pub fn frameWindowSize(header: ZstandardHeader) ?u64 {
if (header.window_descriptor) |descriptor| {
const exponent = (descriptor & 0b11111000) >> 3;
const mantissa = descriptor & 0b00000111;
const window_log = 10 + exponent;
const window_base = @as(u64, 1) << @intCast(u6, window_log);
const window_add = (window_base / 8) * mantissa;
return window_base + window_add;
} else return header.content_size;
}
pub fn decodeZstandardHeader(
source: anytype,
) (@TypeOf(source).Error || error{ EndOfStream, ReservedBitSet })!ZstandardHeader {
const descriptor = @bitCast(ZstandardHeader.Descriptor, try source.readByte());
if (descriptor.reserved) return error.ReservedBitSet;
var window_descriptor: ?u8 = null;
if (!descriptor.single_segment_flag) {
window_descriptor = try source.readByte();
}
var dictionary_id: ?u32 = null;
if (descriptor.dictionary_id_flag > 0) {
const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1;
dictionary_id = try source.readVarInt(u32, .Little, field_size);
}
var content_size: ?u64 = null;
if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) {
const field_size = @as(u4, 1) << descriptor.content_size_flag;
content_size = try source.readVarInt(u64, .Little, field_size);
if (field_size == 2) content_size.? += 256;
}
const header = ZstandardHeader{
.descriptor = descriptor,
.window_descriptor = window_descriptor,
.dictionary_id = dictionary_id,
.content_size = content_size,
};
return header;
}
test {
std.testing.refAllDecls(@This());
}