const std = @import("../std.zig");
const testing = std.testing;
const mem = std.mem;
const assert = std.debug.assert;
pub const State = enum {
invalid,
start,
seen_n,
seen_r,
seen_rn,
seen_rnr,
finished,
chunk_head_size,
chunk_head_ext,
chunk_head_r,
chunk_data,
chunk_data_suffix,
chunk_data_suffix_r,
pub fn isContent(self: State) bool {
return switch (self) {
.invalid, .start, .seen_n, .seen_r, .seen_rn, .seen_rnr => false,
.finished, .chunk_head_size, .chunk_head_ext, .chunk_head_r, .chunk_data, .chunk_data_suffix, .chunk_data_suffix_r => true,
};
}
};
pub const HeadersParser = struct {
state: State = .start,
header_bytes_owned: bool,
header_bytes: std.ArrayListUnmanaged(u8),
max_header_bytes: usize,
next_chunk_length: u64 = 0,
done: bool = false,
pub fn initDynamic(max: usize) HeadersParser {
return .{
.header_bytes = .{},
.max_header_bytes = max,
.header_bytes_owned = true,
};
}
pub fn initStatic(buf: []u8) HeadersParser {
return .{
.header_bytes = .{ .items = buf[0..0], .capacity = buf.len },
.max_header_bytes = buf.len,
.header_bytes_owned = false,
};
}
pub fn reset(r: *HeadersParser) void {
assert(r.done);
r.header_bytes.clearRetainingCapacity();
r.* = .{
.header_bytes = r.header_bytes,
.max_header_bytes = r.max_header_bytes,
.header_bytes_owned = r.header_bytes_owned,
};
}
pub fn findHeadersEnd(r: *HeadersParser, bytes: []const u8) u32 {
const vector_len: comptime_int = comptime std.math.max(std.simd.suggestVectorSize(u8) orelse 1, 8);
const len = @intCast(u32, bytes.len);
var index: u32 = 0;
while (true) {
switch (r.state) {
.invalid => unreachable,
.finished => return index,
.start => switch (len - index) {
0 => return index,
1 => {
switch (bytes[index]) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
return index + 1;
},
2 => {
const b16 = int16(bytes[index..][0..2]);
const b8 = intShift(u8, b16);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
return index + 2;
},
3 => {
const b24 = int24(bytes[index..][0..3]);
const b16 = intShift(u16, b24);
const b8 = intShift(u8, b24);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
switch (b24) {
int24("\r\n\r") => r.state = .seen_rnr,
else => {},
}
return index + 3;
},
4...vector_len - 1 => {
const b32 = int32(bytes[index..][0..4]);
const b24 = intShift(u24, b32);
const b16 = intShift(u16, b32);
const b8 = intShift(u8, b32);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
switch (b24) {
int24("\r\n\r") => r.state = .seen_rnr,
else => {},
}
switch (b32) {
int32("\r\n\r\n") => r.state = .finished,
else => {},
}
index += 4;
continue;
},
else => {
const Vector = @Vector(vector_len, u8);
const BitVector = @Vector(vector_len, u1);
const SizeVector = @Vector(vector_len, u8);
const chunk = bytes[index..][0..vector_len];
const v: Vector = chunk.*;
const matches_r = @bitCast(BitVector, v == @splat(vector_len, @as(u8, '\r')));
const matches_n = @bitCast(BitVector, v == @splat(vector_len, @as(u8, '\n')));
const matches_or: SizeVector = matches_r | matches_n;
const matches = @reduce(.Add, matches_or);
switch (matches) {
0 => {},
1 => switch (chunk[vector_len - 1]) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
},
2 => {
const b16 = int16(chunk[vector_len - 2 ..][0..2]);
const b8 = intShift(u8, b16);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
},
3 => {
const b24 = int24(chunk[vector_len - 3 ..][0..3]);
const b16 = intShift(u16, b24);
const b8 = intShift(u8, b24);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
switch (b24) {
int24("\r\n\r") => r.state = .seen_rnr,
else => {},
}
},
4...vector_len => {
inline for (0..vector_len - 3) |i_usize| {
const i = @truncate(u32, i_usize);
const b32 = int32(chunk[i..][0..4]);
const b16 = intShift(u16, b32);
if (b32 == int32("\r\n\r\n")) {
r.state = .finished;
return index + i + 4;
} else if (b16 == int16("\n\n")) {
r.state = .finished;
return index + i + 2;
}
}
const b24 = int24(chunk[vector_len - 3 ..][0..3]);
const b16 = intShift(u16, b24);
const b8 = intShift(u8, b24);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => {},
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
switch (b24) {
int24("\r\n\r") => r.state = .seen_rnr,
else => {},
}
},
else => unreachable,
}
index += vector_len;
continue;
},
},
.seen_n => switch (len - index) {
0 => return index,
else => {
switch (bytes[index]) {
'\n' => r.state = .finished,
else => r.state = .start,
}
index += 1;
continue;
},
},
.seen_r => switch (len - index) {
0 => return index,
1 => {
switch (bytes[index]) {
'\n' => r.state = .seen_rn,
'\r' => r.state = .seen_r,
else => r.state = .start,
}
return index + 1;
},
2 => {
const b16 = int16(bytes[index..][0..2]);
const b8 = intShift(u8, b16);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_rn,
else => r.state = .start,
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\r") => r.state = .seen_rnr,
int16("\n\n") => r.state = .finished,
else => {},
}
return index + 2;
},
else => {
const b24 = int24(bytes[index..][0..3]);
const b16 = intShift(u16, b24);
const b8 = intShift(u8, b24);
switch (b8) {
'\r' => r.state = .seen_r,
'\n' => r.state = .seen_n,
else => r.state = .start,
}
switch (b16) {
int16("\r\n") => r.state = .seen_rn,
int16("\n\n") => r.state = .finished,
else => {},
}
switch (b24) {
int24("\n\r\n") => r.state = .finished,
else => {},
}
index += 3;
continue;
},
},
.seen_rn => switch (len - index) {
0 => return index,
1 => {
switch (bytes[index]) {
'\r' => r.state = .seen_rnr,
'\n' => r.state = .seen_n,
else => r.state = .start,
}
return index + 1;
},
else => {
const b16 = int16(bytes[index..][0..2]);
const b8 = intShift(u8, b16);
switch (b8) {
'\r' => r.state = .seen_rnr,
'\n' => r.state = .seen_n,
else => r.state = .start,
}
switch (b16) {
int16("\r\n") => r.state = .finished,
int16("\n\n") => r.state = .finished,
else => {},
}
index += 2;
continue;
},
},
.seen_rnr => switch (len - index) {
0 => return index,
else => {
switch (bytes[index]) {
'\n' => r.state = .finished,
else => r.state = .start,
}
index += 1;
continue;
},
},
.chunk_head_size => unreachable,
.chunk_head_ext => unreachable,
.chunk_head_r => unreachable,
.chunk_data => unreachable,
.chunk_data_suffix => unreachable,
.chunk_data_suffix_r => unreachable,
}
return index;
}
}
pub fn findChunkedLen(r: *HeadersParser, bytes: []const u8) u32 {
const len = @intCast(u32, bytes.len);
for (bytes[0..], 0..) |c, i| {
const index = @intCast(u32, i);
switch (r.state) {
.chunk_data_suffix => switch (c) {
'\r' => r.state = .chunk_data_suffix_r,
'\n' => r.state = .chunk_head_size,
else => {
r.state = .invalid;
return index;
},
},
.chunk_data_suffix_r => switch (c) {
'\n' => r.state = .chunk_head_size,
else => {
r.state = .invalid;
return index;
},
},
.chunk_head_size => {
const digit = switch (c) {
'0'...'9' => |b| b - '0',
'A'...'Z' => |b| b - 'A' + 10,
'a'...'z' => |b| b - 'a' + 10,
'\r' => {
r.state = .chunk_head_r;
continue;
},
'\n' => {
r.state = .chunk_data;
return index + 1;
},
else => {
r.state = .chunk_head_ext;
continue;
},
};
const new_len = r.next_chunk_length *% 16 +% digit;
if (new_len <= r.next_chunk_length and r.next_chunk_length != 0) {
r.state = .invalid;
return index;
}
r.next_chunk_length = new_len;
},
.chunk_head_ext => switch (c) {
'\r' => r.state = .chunk_head_r,
'\n' => {
r.state = .chunk_data;
return index + 1;
},
else => continue,
},
.chunk_head_r => switch (c) {
'\n' => {
r.state = .chunk_data;
return index + 1;
},
else => {
r.state = .invalid;
return index;
},
},
else => unreachable,
}
}
return len;
}
pub fn isComplete(r: *HeadersParser) bool {
return r.done and r.state == .finished;
}
pub const CheckCompleteHeadError = mem.Allocator.Error || error{HttpHeadersExceededSizeLimit};
pub fn checkCompleteHead(r: *HeadersParser, allocator: std.mem.Allocator, in: []const u8) CheckCompleteHeadError!u32 {
if (r.state.isContent()) return 0;
const i = r.findHeadersEnd(in);
const data = in[0..i];
if (r.header_bytes.items.len + data.len > r.max_header_bytes) {
return error.HttpHeadersExceededSizeLimit;
} else {
if (r.header_bytes_owned) try r.header_bytes.ensureUnusedCapacity(allocator, data.len);
r.header_bytes.appendSliceAssumeCapacity(data);
}
return i;
}
pub const ReadError = error{
HttpChunkInvalid,
};
pub fn read(r: *HeadersParser, bconn: anytype, buffer: []u8, skip: bool) !usize {
assert(r.state.isContent());
if (r.done) return 0;
var out_index: usize = 0;
while (true) {
switch (r.state) {
.invalid, .start, .seen_n, .seen_r, .seen_rn, .seen_rnr => unreachable,
.finished => {
const data_avail = r.next_chunk_length;
if (skip) {
try bconn.fill();
const nread = @min(bconn.peek().len, data_avail);
bconn.clear(@intCast(u16, nread));
r.next_chunk_length -= nread;
if (r.next_chunk_length == 0) r.done = true;
return 0;
} else {
const out_avail = buffer.len;
const can_read = @intCast(usize, @min(data_avail, out_avail));
const nread = try bconn.read(buffer[0..can_read]);
r.next_chunk_length -= nread;
if (r.next_chunk_length == 0) r.done = true;
return nread;
}
},
.chunk_data_suffix, .chunk_data_suffix_r, .chunk_head_size, .chunk_head_ext, .chunk_head_r => {
try bconn.fill();
const i = r.findChunkedLen(bconn.peek());
bconn.clear(@intCast(u16, i));
switch (r.state) {
.invalid => return error.HttpChunkInvalid,
.chunk_data => if (r.next_chunk_length == 0) {
if (std.mem.eql(u8, bconn.peek(), "\r\n")) {
r.state = .finished;
} else {
r.state = .seen_rn;
}
r.done = true;
return out_index;
},
else => return out_index,
}
continue;
},
.chunk_data => {
const data_avail = r.next_chunk_length;
const out_avail = buffer.len - out_index;
if (skip) {
try bconn.fill();
const nread = @min(bconn.peek().len, data_avail);
bconn.clear(@intCast(u16, nread));
r.next_chunk_length -= nread;
} else {
const can_read = @intCast(usize, @min(data_avail, out_avail));
const nread = try bconn.read(buffer[out_index..][0..can_read]);
r.next_chunk_length -= nread;
out_index += nread;
}
if (r.next_chunk_length == 0) {
r.state = .chunk_data_suffix;
continue;
}
return out_index;
},
}
}
}
};
inline fn int16(array: *const [2]u8) u16 {
return @bitCast(u16, array.*);
}
inline fn int24(array: *const [3]u8) u24 {
return @bitCast(u24, array.*);
}
inline fn int32(array: *const [4]u8) u32 {
return @bitCast(u32, array.*);
}
inline fn intShift(comptime T: type, x: anytype) T {
switch (@import("builtin").cpu.arch.endian()) {
.Little => return @truncate(T, x >> (@bitSizeOf(@TypeOf(x)) - @bitSizeOf(T))),
.Big => return @truncate(T, x),
}
}
const MockBufferedConnection = struct {
pub const buffer_size = 0x2000;
conn: std.io.FixedBufferStream([]const u8),
buf: [buffer_size]u8 = undefined,
start: u16 = 0,
end: u16 = 0,
pub fn fill(bconn: *MockBufferedConnection) ReadError!void {
if (bconn.end != bconn.start) return;
const nread = try bconn.conn.read(bconn.buf[0..]);
if (nread == 0) return error.EndOfStream;
bconn.start = 0;
bconn.end = @truncate(u16, nread);
}
pub fn peek(bconn: *MockBufferedConnection) []const u8 {
return bconn.buf[bconn.start..bconn.end];
}
pub fn clear(bconn: *MockBufferedConnection, num: u16) void {
bconn.start += num;
}
pub fn readAtLeast(bconn: *MockBufferedConnection, buffer: []u8, len: usize) ReadError!usize {
var out_index: u16 = 0;
while (out_index < len) {
const available = bconn.end - bconn.start;
const left = buffer.len - out_index;
if (available > 0) {
const can_read = @truncate(u16, @min(available, left));
std.mem.copy(u8, buffer[out_index..], bconn.buf[bconn.start..][0..can_read]);
out_index += can_read;
bconn.start += can_read;
continue;
}
if (left > bconn.buf.len) {
return bconn.conn.read(buffer[out_index..]);
}
try bconn.fill();
}
return out_index;
}
pub fn read(bconn: *MockBufferedConnection, buffer: []u8) ReadError!usize {
return bconn.readAtLeast(buffer, 1);
}
pub const ReadError = std.io.FixedBufferStream([]const u8).ReadError || error{EndOfStream};
pub const Reader = std.io.Reader(*MockBufferedConnection, ReadError, read);
pub fn reader(bconn: *MockBufferedConnection) Reader {
return Reader{ .context = bconn };
}
pub fn writeAll(bconn: *MockBufferedConnection, buffer: []const u8) WriteError!void {
return bconn.conn.writeAll(buffer);
}
pub fn write(bconn: *MockBufferedConnection, buffer: []const u8) WriteError!usize {
return bconn.conn.write(buffer);
}
pub const WriteError = std.io.FixedBufferStream([]const u8).WriteError;
pub const Writer = std.io.Writer(*MockBufferedConnection, WriteError, write);
pub fn writer(bconn: *MockBufferedConnection) Writer {
return Writer{ .context = bconn };
}
};
test "HeadersParser.findHeadersEnd" {
var r: HeadersParser = undefined;
const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\nHello";
for (0..36) |i| {
r = HeadersParser.initDynamic(0);
try std.testing.expectEqual(@intCast(u32, i), r.findHeadersEnd(data[0..i]));
try std.testing.expectEqual(@intCast(u32, 35 - i), r.findHeadersEnd(data[i..]));
}
}
test "HeadersParser.findChunkedLen" {
var r: HeadersParser = undefined;
const data = "Ff\r\nf0f000 ; ext\n0\r\nffffffffffffffffffffffffffffffffffffffff\r\n";
r = HeadersParser.initDynamic(0);
r.state = .chunk_head_size;
r.next_chunk_length = 0;
const first = r.findChunkedLen(data[0..]);
try testing.expectEqual(@as(u32, 4), first);
try testing.expectEqual(@as(u64, 0xff), r.next_chunk_length);
try testing.expectEqual(State.chunk_data, r.state);
r.state = .chunk_head_size;
r.next_chunk_length = 0;
const second = r.findChunkedLen(data[first..]);
try testing.expectEqual(@as(u32, 13), second);
try testing.expectEqual(@as(u64, 0xf0f000), r.next_chunk_length);
try testing.expectEqual(State.chunk_data, r.state);
r.state = .chunk_head_size;
r.next_chunk_length = 0;
const third = r.findChunkedLen(data[first + second ..]);
try testing.expectEqual(@as(u32, 3), third);
try testing.expectEqual(@as(u64, 0), r.next_chunk_length);
try testing.expectEqual(State.chunk_data, r.state);
r.state = .chunk_head_size;
r.next_chunk_length = 0;
const fourth = r.findChunkedLen(data[first + second + third ..]);
try testing.expectEqual(@as(u32, 16), fourth);
try testing.expectEqual(@as(u64, 0xffffffffffffffff), r.next_chunk_length);
try testing.expectEqual(State.invalid, r.state);
}
test "HeadersParser.read length" {
var r = HeadersParser.initDynamic(256);
defer r.header_bytes.deinit(std.testing.allocator);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\nHello";
var fbs = std.io.fixedBufferStream(data);
var bconn = MockBufferedConnection{
.conn = fbs,
};
while (true) {
try bconn.fill();
const nchecked = try r.checkCompleteHead(std.testing.allocator, bconn.peek());
bconn.clear(@intCast(u16, nchecked));
if (r.state.isContent()) break;
}
var buf: [8]u8 = undefined;
r.next_chunk_length = 5;
const len = try r.read(&bconn, &buf, false);
try std.testing.expectEqual(@as(usize, 5), len);
try std.testing.expectEqualStrings("Hello", buf[0..len]);
try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\nContent-Length: 5\r\n\r\n", r.header_bytes.items);
}
test "HeadersParser.read chunked" {
var r = HeadersParser.initDynamic(256);
defer r.header_bytes.deinit(std.testing.allocator);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\n\r\n";
var fbs = std.io.fixedBufferStream(data);
var bconn = MockBufferedConnection{
.conn = fbs,
};
while (true) {
try bconn.fill();
const nchecked = try r.checkCompleteHead(std.testing.allocator, bconn.peek());
bconn.clear(@intCast(u16, nchecked));
if (r.state.isContent()) break;
}
var buf: [8]u8 = undefined;
r.state = .chunk_head_size;
const len = try r.read(&bconn, &buf, false);
try std.testing.expectEqual(@as(usize, 5), len);
try std.testing.expectEqualStrings("Hello", buf[0..len]);
try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\n", r.header_bytes.items);
}
test "HeadersParser.read chunked trailer" {
var r = HeadersParser.initDynamic(256);
defer r.header_bytes.deinit(std.testing.allocator);
const data = "GET / HTTP/1.1\r\nHost: localhost\r\n\r\n2\r\nHe\r\n2\r\nll\r\n1\r\no\r\n0\r\nContent-Type: text/plain\r\n\r\n";
var fbs = std.io.fixedBufferStream(data);
var bconn = MockBufferedConnection{
.conn = fbs,
};
while (true) {
try bconn.fill();
const nchecked = try r.checkCompleteHead(std.testing.allocator, bconn.peek());
bconn.clear(@intCast(u16, nchecked));
if (r.state.isContent()) break;
}
var buf: [8]u8 = undefined;
r.state = .chunk_head_size;
const len = try r.read(&bconn, &buf, false);
try std.testing.expectEqual(@as(usize, 5), len);
try std.testing.expectEqualStrings("Hello", buf[0..len]);
while (true) {
try bconn.fill();
const nchecked = try r.checkCompleteHead(std.testing.allocator, bconn.peek());
bconn.clear(@intCast(u16, nchecked));
if (r.state.isContent()) break;
}
try std.testing.expectEqualStrings("GET / HTTP/1.1\r\nHost: localhost\r\n\r\nContent-Type: text/plain\r\n\r\n", r.header_bytes.items);
}