const std = @import("std");
const assert = std.debug.assert;
const mem = std.mem;

const Allocator = std.mem.Allocator;

// Implements the LZ77 sliding dictionary as used in decompression.

// LZ77 decompresses data through sequences of two forms of commands:

//

//  * Literal insertions: Runs of one or more symbols are inserted into the data

//  stream as is. This is accomplished through the writeByte method for a

//  single symbol, or combinations of writeSlice/writeMark for multiple symbols.

//  Any valid stream must start with a literal insertion if no preset dictionary

//  is used.

//

//  * Backward copies: Runs of one or more symbols are copied from previously

//  emitted data. Backward copies come as the tuple (dist, length) where dist

//  determines how far back in the stream to copy from and length determines how

//  many bytes to copy. Note that it is valid for the length to be greater than

//  the distance. Since LZ77 uses forward copies, that situation is used to

//  perform a form of run-length encoding on repeated runs of symbols.

//  The writeCopy and tryWriteCopy are used to implement this command.

//

// For performance reasons, this implementation performs little to no sanity

// checks about the arguments. As such, the invariants documented for each

// method call must be respected.

pub const DictDecoder = struct {
    const Self = @This();

    allocator: Allocator = undefined,

    hist: []u8 = undefined, // Sliding window history


    // Invariant: 0 <= rd_pos <= wr_pos <= hist.len

    wr_pos: u32 = 0, // Current output position in buffer

    rd_pos: u32 = 0, // Have emitted hist[0..rd_pos] already

    full: bool = false, // Has a full window length been written yet?


    // init initializes DictDecoder to have a sliding window dictionary of the given

    // size. If a preset dict is provided, it will initialize the dictionary with

    // the contents of dict.

    pub fn init(self: *Self, allocator: Allocator, size: u32, dict: ?[]const u8) !void {
        self.allocator = allocator;

        self.hist = try allocator.alloc(u8, size);

        self.wr_pos = 0;

        if (dict != null) {
            mem.copy(u8, self.hist, dict.?[dict.?.len -| self.hist.len..]);
            self.wr_pos = @intCast(u32, dict.?.len);
        }

        if (self.wr_pos == self.hist.len) {
            self.wr_pos = 0;
            self.full = true;
        }
        self.rd_pos = self.wr_pos;
    }

    pub fn deinit(self: *Self) void {
        self.allocator.free(self.hist);
    }

    // Reports the total amount of historical data in the dictionary.

    pub fn histSize(self: *Self) u32 {
        if (self.full) {
            return @intCast(u32, self.hist.len);
        }
        return self.wr_pos;
    }

    // Reports the number of bytes that can be flushed by readFlush.

    pub fn availRead(self: *Self) u32 {
        return self.wr_pos - self.rd_pos;
    }

    // Reports the available amount of output buffer space.

    pub fn availWrite(self: *Self) u32 {
        return @intCast(u32, self.hist.len - self.wr_pos);
    }

    // Returns a slice of the available buffer to write data to.

    //

    // This invariant will be kept: s.len <= availWrite()

    pub fn writeSlice(self: *Self) []u8 {
        return self.hist[self.wr_pos..];
    }

    // Advances the writer pointer by `count`.

    //

    // This invariant must be kept: 0 <= count <= availWrite()

    pub fn writeMark(self: *Self, count: u32) void {
        assert(0 <= count and count <= self.availWrite());
        self.wr_pos += count;
    }

    // Writes a single byte to the dictionary.

    //

    // This invariant must be kept: 0 < availWrite()

    pub fn writeByte(self: *Self, byte: u8) void {
        self.hist[self.wr_pos] = byte;
        self.wr_pos += 1;
    }

    fn copy(dst: []u8, src: []const u8) u32 {
        if (src.len > dst.len) {
            mem.copy(u8, dst, src[0..dst.len]);
            return @intCast(u32, dst.len);
        }
        mem.copy(u8, dst, src);
        return @intCast(u32, src.len);
    }

    // Copies a string at a given (dist, length) to the output.

    // This returns the number of bytes copied and may be less than the requested

    // length if the available space in the output buffer is too small.

    //

    // This invariant must be kept: 0 < dist <= histSize()

    pub fn writeCopy(self: *Self, dist: u32, length: u32) u32 {
        assert(0 < dist and dist <= self.histSize());
        var dst_base = self.wr_pos;
        var dst_pos = dst_base;
        var src_pos: i32 = @intCast(i32, dst_pos) - @intCast(i32, dist);
        var end_pos = dst_pos + length;
        if (end_pos > self.hist.len) {
            end_pos = @intCast(u32, self.hist.len);
        }

        // Copy non-overlapping section after destination position.

        //

        // This section is non-overlapping in that the copy length for this section

        // is always less than or equal to the backwards distance. This can occur

        // if a distance refers to data that wraps-around in the buffer.

        // Thus, a backwards copy is performed here; that is, the exact bytes in

        // the source prior to the copy is placed in the destination.

        if (src_pos < 0) {
            src_pos += @intCast(i32, self.hist.len);
            dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..]);
            src_pos = 0;
        }

        // Copy possibly overlapping section before destination position.

        //

        // This section can overlap if the copy length for this section is larger

        // than the backwards distance. This is allowed by LZ77 so that repeated

        // strings can be succinctly represented using (dist, length) pairs.

        // Thus, a forwards copy is performed here; that is, the bytes copied is

        // possibly dependent on the resulting bytes in the destination as the copy

        // progresses along. This is functionally equivalent to the following:

        //

        //    var i = 0;

        //    while(i < end_pos - dst_pos) : (i+=1) {

        //        self.hist[dst_pos+i] = self.hist[src_pos+i];

        //    }

        //    dst_pos = end_pos;

        //

        while (dst_pos < end_pos) {
            dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[@intCast(usize, src_pos)..dst_pos]);
        }

        self.wr_pos = dst_pos;
        return dst_pos - dst_base;
    }

    // Tries to copy a string at a given (distance, length) to the

    // output. This specialized version is optimized for short distances.

    //

    // This method is designed to be inlined for performance reasons.

    //

    // This invariant must be kept: 0 < dist <= histSize()

    pub fn tryWriteCopy(self: *Self, dist: u32, length: u32) u32 {
        var dst_pos = self.wr_pos;
        var end_pos = dst_pos + length;
        if (dst_pos < dist or end_pos > self.hist.len) {
            return 0;
        }
        var dst_base = dst_pos;
        var src_pos = dst_pos - dist;

        // Copy possibly overlapping section before destination position.

        while (dst_pos < end_pos) {
            dst_pos += copy(self.hist[dst_pos..end_pos], self.hist[src_pos..dst_pos]);
        }

        self.wr_pos = dst_pos;
        return dst_pos - dst_base;
    }

    // Returns a slice of the historical buffer that is ready to be

    // emitted to the user. The data returned by readFlush must be fully consumed

    // before calling any other DictDecoder methods.

    pub fn readFlush(self: *Self) []u8 {
        var to_read = self.hist[self.rd_pos..self.wr_pos];
        self.rd_pos = self.wr_pos;
        if (self.wr_pos == self.hist.len) {
            self.wr_pos = 0;
            self.rd_pos = 0;
            self.full = true;
        }
        return to_read;
    }
};

// tests


test "dictionary decoder" {
    const ArrayList = std.ArrayList;
    const testing = std.testing;

    const abc = "ABC\n";
    const fox = "The quick brown fox jumped over the lazy dog!\n";
    const poem: []const u8 =
        \\The Road Not Taken

        \\Robert Frost

        \\

        \\Two roads diverged in a yellow wood,

        \\And sorry I could not travel both

        \\And be one traveler, long I stood

        \\And looked down one as far as I could

        \\To where it bent in the undergrowth;

        \\

        \\Then took the other, as just as fair,

        \\And having perhaps the better claim,

        \\Because it was grassy and wanted wear;

        \\Though as for that the passing there

        \\Had worn them really about the same,

        \\

        \\And both that morning equally lay

        \\In leaves no step had trodden black.

        \\Oh, I kept the first for another day!

        \\Yet knowing how way leads on to way,

        \\I doubted if I should ever come back.

        \\

        \\I shall be telling this with a sigh

        \\Somewhere ages and ages hence:

        \\Two roads diverged in a wood, and I-

        \\I took the one less traveled by,

        \\And that has made all the difference.

        \\

    ;

    const uppercase: []const u8 =
        \\THE ROAD NOT TAKEN

        \\ROBERT FROST

        \\

        \\TWO ROADS DIVERGED IN A YELLOW WOOD,

        \\AND SORRY I COULD NOT TRAVEL BOTH

        \\AND BE ONE TRAVELER, LONG I STOOD

        \\AND LOOKED DOWN ONE AS FAR AS I COULD

        \\TO WHERE IT BENT IN THE UNDERGROWTH;

        \\

        \\THEN TOOK THE OTHER, AS JUST AS FAIR,

        \\AND HAVING PERHAPS THE BETTER CLAIM,

        \\BECAUSE IT WAS GRASSY AND WANTED WEAR;

        \\THOUGH AS FOR THAT THE PASSING THERE

        \\HAD WORN THEM REALLY ABOUT THE SAME,

        \\

        \\AND BOTH THAT MORNING EQUALLY LAY

        \\IN LEAVES NO STEP HAD TRODDEN BLACK.

        \\OH, I KEPT THE FIRST FOR ANOTHER DAY!

        \\YET KNOWING HOW WAY LEADS ON TO WAY,

        \\I DOUBTED IF I SHOULD EVER COME BACK.

        \\

        \\I SHALL BE TELLING THIS WITH A SIGH

        \\SOMEWHERE AGES AND AGES HENCE:

        \\TWO ROADS DIVERGED IN A WOOD, AND I-

        \\I TOOK THE ONE LESS TRAVELED BY,

        \\AND THAT HAS MADE ALL THE DIFFERENCE.

        \\

    ;

    const PoemRefs = struct {
        dist: u32, // Backward distance (0 if this is an insertion)

        length: u32, // Length of copy or insertion

    };

    var poem_refs = [_]PoemRefs{
        .{ .dist = 0, .length = 38 },  .{ .dist = 33, .length = 3 },   .{ .dist = 0, .length = 48 },
        .{ .dist = 79, .length = 3 },  .{ .dist = 0, .length = 11 },   .{ .dist = 34, .length = 5 },
        .{ .dist = 0, .length = 6 },   .{ .dist = 23, .length = 7 },   .{ .dist = 0, .length = 8 },
        .{ .dist = 50, .length = 3 },  .{ .dist = 0, .length = 2 },    .{ .dist = 69, .length = 3 },
        .{ .dist = 34, .length = 5 },  .{ .dist = 0, .length = 4 },    .{ .dist = 97, .length = 3 },
        .{ .dist = 0, .length = 4 },   .{ .dist = 43, .length = 5 },   .{ .dist = 0, .length = 6 },
        .{ .dist = 7, .length = 4 },   .{ .dist = 88, .length = 7 },   .{ .dist = 0, .length = 12 },
        .{ .dist = 80, .length = 3 },  .{ .dist = 0, .length = 2 },    .{ .dist = 141, .length = 4 },
        .{ .dist = 0, .length = 1 },   .{ .dist = 196, .length = 3 },  .{ .dist = 0, .length = 3 },
        .{ .dist = 157, .length = 3 }, .{ .dist = 0, .length = 6 },    .{ .dist = 181, .length = 3 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 23, .length = 3 },   .{ .dist = 77, .length = 3 },
        .{ .dist = 28, .length = 5 },  .{ .dist = 128, .length = 3 },  .{ .dist = 110, .length = 4 },
        .{ .dist = 70, .length = 3 },  .{ .dist = 0, .length = 4 },    .{ .dist = 85, .length = 6 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 182, .length = 6 },  .{ .dist = 0, .length = 4 },
        .{ .dist = 133, .length = 3 }, .{ .dist = 0, .length = 7 },    .{ .dist = 47, .length = 5 },
        .{ .dist = 0, .length = 20 },  .{ .dist = 112, .length = 5 },  .{ .dist = 0, .length = 1 },
        .{ .dist = 58, .length = 3 },  .{ .dist = 0, .length = 8 },    .{ .dist = 59, .length = 3 },
        .{ .dist = 0, .length = 4 },   .{ .dist = 173, .length = 3 },  .{ .dist = 0, .length = 5 },
        .{ .dist = 114, .length = 3 }, .{ .dist = 0, .length = 4 },    .{ .dist = 92, .length = 5 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 71, .length = 3 },   .{ .dist = 0, .length = 2 },
        .{ .dist = 76, .length = 5 },  .{ .dist = 0, .length = 1 },    .{ .dist = 46, .length = 3 },
        .{ .dist = 96, .length = 4 },  .{ .dist = 130, .length = 4 },  .{ .dist = 0, .length = 3 },
        .{ .dist = 360, .length = 3 }, .{ .dist = 0, .length = 3 },    .{ .dist = 178, .length = 5 },
        .{ .dist = 0, .length = 7 },   .{ .dist = 75, .length = 3 },   .{ .dist = 0, .length = 3 },
        .{ .dist = 45, .length = 6 },  .{ .dist = 0, .length = 6 },    .{ .dist = 299, .length = 6 },
        .{ .dist = 180, .length = 3 }, .{ .dist = 70, .length = 6 },   .{ .dist = 0, .length = 1 },
        .{ .dist = 48, .length = 3 },  .{ .dist = 66, .length = 4 },   .{ .dist = 0, .length = 3 },
        .{ .dist = 47, .length = 5 },  .{ .dist = 0, .length = 9 },    .{ .dist = 325, .length = 3 },
        .{ .dist = 0, .length = 1 },   .{ .dist = 359, .length = 3 },  .{ .dist = 318, .length = 3 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 199, .length = 3 },  .{ .dist = 0, .length = 1 },
        .{ .dist = 344, .length = 3 }, .{ .dist = 0, .length = 3 },    .{ .dist = 248, .length = 3 },
        .{ .dist = 0, .length = 10 },  .{ .dist = 310, .length = 3 },  .{ .dist = 0, .length = 3 },
        .{ .dist = 93, .length = 6 },  .{ .dist = 0, .length = 3 },    .{ .dist = 252, .length = 3 },
        .{ .dist = 157, .length = 4 }, .{ .dist = 0, .length = 2 },    .{ .dist = 273, .length = 5 },
        .{ .dist = 0, .length = 14 },  .{ .dist = 99, .length = 4 },   .{ .dist = 0, .length = 1 },
        .{ .dist = 464, .length = 4 }, .{ .dist = 0, .length = 2 },    .{ .dist = 92, .length = 4 },
        .{ .dist = 495, .length = 3 }, .{ .dist = 0, .length = 1 },    .{ .dist = 322, .length = 4 },
        .{ .dist = 16, .length = 4 },  .{ .dist = 0, .length = 3 },    .{ .dist = 402, .length = 3 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 237, .length = 4 },  .{ .dist = 0, .length = 2 },
        .{ .dist = 432, .length = 4 }, .{ .dist = 0, .length = 1 },    .{ .dist = 483, .length = 5 },
        .{ .dist = 0, .length = 2 },   .{ .dist = 294, .length = 4 },  .{ .dist = 0, .length = 2 },
        .{ .dist = 306, .length = 3 }, .{ .dist = 113, .length = 5 },  .{ .dist = 0, .length = 1 },
        .{ .dist = 26, .length = 4 },  .{ .dist = 164, .length = 3 },  .{ .dist = 488, .length = 4 },
        .{ .dist = 0, .length = 1 },   .{ .dist = 542, .length = 3 },  .{ .dist = 248, .length = 6 },
        .{ .dist = 0, .length = 5 },   .{ .dist = 205, .length = 3 },  .{ .dist = 0, .length = 8 },
        .{ .dist = 48, .length = 3 },  .{ .dist = 449, .length = 6 },  .{ .dist = 0, .length = 2 },
        .{ .dist = 192, .length = 3 }, .{ .dist = 328, .length = 4 },  .{ .dist = 9, .length = 5 },
        .{ .dist = 433, .length = 3 }, .{ .dist = 0, .length = 3 },    .{ .dist = 622, .length = 25 },
        .{ .dist = 615, .length = 5 }, .{ .dist = 46, .length = 5 },   .{ .dist = 0, .length = 2 },
        .{ .dist = 104, .length = 3 }, .{ .dist = 475, .length = 10 }, .{ .dist = 549, .length = 3 },
        .{ .dist = 0, .length = 4 },   .{ .dist = 597, .length = 8 },  .{ .dist = 314, .length = 3 },
        .{ .dist = 0, .length = 1 },   .{ .dist = 473, .length = 6 },  .{ .dist = 317, .length = 5 },
        .{ .dist = 0, .length = 1 },   .{ .dist = 400, .length = 3 },  .{ .dist = 0, .length = 3 },
        .{ .dist = 109, .length = 3 }, .{ .dist = 151, .length = 3 },  .{ .dist = 48, .length = 4 },
        .{ .dist = 0, .length = 4 },   .{ .dist = 125, .length = 3 },  .{ .dist = 108, .length = 3 },
        .{ .dist = 0, .length = 2 },
    };

    var got_list = ArrayList(u8).init(testing.allocator);
    defer got_list.deinit();
    var got = got_list.writer();

    var want_list = ArrayList(u8).init(testing.allocator);
    defer want_list.deinit();
    var want = want_list.writer();

    var dd = DictDecoder{};
    try dd.init(testing.allocator, 1 << 11, null);
    defer dd.deinit();

    const util = struct {
        fn writeCopy(dst_dd: *DictDecoder, dst: anytype, dist: u32, length: u32) !void {
            var len = length;
            while (len > 0) {
                var n = dst_dd.tryWriteCopy(dist, len);
                if (n == 0) {
                    n = dst_dd.writeCopy(dist, len);
                }

                len -= n;
                if (dst_dd.availWrite() == 0) {
                    _ = try dst.write(dst_dd.readFlush());
                }
            }
        }
        fn writeString(dst_dd: *DictDecoder, dst: anytype, str: []const u8) !void {
            var string = str;
            while (string.len > 0) {
                var cnt = DictDecoder.copy(dst_dd.writeSlice(), string);
                dst_dd.writeMark(cnt);
                string = string[cnt..];
                if (dst_dd.availWrite() == 0) {
                    _ = try dst.write(dst_dd.readFlush());
                }
            }
        }
    };

    try util.writeString(&dd, got, ".");
    _ = try want.write(".");

    var str = poem;
    for (poem_refs, 0..) |ref, i| {
        _ = i;
        if (ref.dist == 0) {
            try util.writeString(&dd, got, str[0..ref.length]);
        } else {
            try util.writeCopy(&dd, got, ref.dist, ref.length);
        }
        str = str[ref.length..];
    }
    _ = try want.write(poem);

    try util.writeCopy(&dd, got, dd.histSize(), 33);
    _ = try want.write(want_list.items[0..33]);

    try util.writeString(&dd, got, abc);
    try util.writeCopy(&dd, got, abc.len, 59 * abc.len);
    _ = try want.write(abc ** 60);

    try util.writeString(&dd, got, fox);
    try util.writeCopy(&dd, got, fox.len, 9 * fox.len);
    _ = try want.write(fox ** 10);

    try util.writeString(&dd, got, ".");
    try util.writeCopy(&dd, got, 1, 9);
    _ = try want.write("." ** 10);

    try util.writeString(&dd, got, uppercase);
    try util.writeCopy(&dd, got, uppercase.len, 7 * uppercase.len);
    var i: u8 = 0;
    while (i < 8) : (i += 1) {
        _ = try want.write(uppercase);
    }

    try util.writeCopy(&dd, got, dd.histSize(), 10);
    _ = try want.write(want_list.items[want_list.items.len - dd.histSize() ..][0..10]);

    _ = try got.write(dd.readFlush());
    try testing.expectEqualSlices(u8, want_list.items, got_list.items);
}