From fb8e10737dfe14aaaa5fbc0295044f1f57dc9ad0 Mon Sep 17 00:00:00 2001 From: Matthew Wild Date: Fri, 26 Jun 2020 16:41:31 +0100 Subject: util.dbuffer: dynamic string buffer Similar to util.ringbuffer (and shares almost identical API). Differences: - size limit is optional and dynamic - does not allocate a fixed buffer of max_size bytes - focus on simply storing references to existing string objects where possible, avoiding unnecessary allocations - references are still stored in a ring buffer to enable use as a fast FIFO Optional second parameter to new() provides the number of ring buffer segments. On Lua 5.2 on my laptop, a segment is ~19 bytes. If the ring buffer fills up, the next write will compact all strings into a single item. --- spec/util_dbuffer_spec.lua | 95 +++++++++++++++++++++++++ util/dbuffer.lua | 171 +++++++++++++++++++++++++++++++++++++++++++++ util/queue.lua | 7 ++ 3 files changed, 273 insertions(+) create mode 100644 spec/util_dbuffer_spec.lua create mode 100644 util/dbuffer.lua diff --git a/spec/util_dbuffer_spec.lua b/spec/util_dbuffer_spec.lua new file mode 100644 index 00000000..854f3125 --- /dev/null +++ b/spec/util_dbuffer_spec.lua @@ -0,0 +1,95 @@ +local dbuffer = require "util.dbuffer"; +describe("util.dbuffer", function () + describe("#new", function () + it("has a constructor", function () + assert.Function(dbuffer.new); + end); + it("can be created", function () + assert.truthy(dbuffer.new()); + end); + it("won't create an empty buffer", function () + assert.falsy(dbuffer.new(0)); + end); + it("won't create a negatively sized buffer", function () + assert.falsy(dbuffer.new(-1)); + end); + end); + describe(":write", function () + local b = dbuffer.new(); + it("works", function () + assert.truthy(b:write("hi")); + end); + end); + + describe(":discard", function () + local b = dbuffer.new(); + it("works", function () + assert.truthy(b:write("hello world")); + assert.truthy(b:discard(6)); + assert.equal(5, #b); + assert.equal("world", b:read(5)); + end); + end); + + describe(":sub", function () + -- Helper function to compare buffer:sub() with string:sub() + local s = "hello world"; + local function test_sub(b, x, y) + local string_result, buffer_result = s:sub(x, y), b:sub(x, y); + assert.equals(string_result, buffer_result, ("buffer:sub(%d, %s) does not match string:sub()"):format(x, y and ("%d"):format(y) or "nil")); + end + + it("works", function () + local b = dbuffer.new(); + assert.truthy(b:write("hello world")); + assert.equals("hello", b:sub(1, 5)); + end); + + it("supports optional end parameter", function () + local b = dbuffer.new(); + assert.truthy(b:write("hello world")); + assert.equals("hello world", b:sub(1)); + assert.equals("world", b:sub(-5)); + end); + + it("is equivalent to string:sub", function () + local b = dbuffer.new(11); + assert.truthy(b:write(s)); + for i = -13, 13 do + for j = -13, 13 do + test_sub(b, i, j); + end + end + end); + end); + + describe(":byte", function () + -- Helper function to compare buffer:byte() with string:byte() + local s = "hello world" + local function test_byte(b, x, y) + local string_result, buffer_result = {s:byte(x, y)}, {b:byte(x, y)}; + assert.same(string_result, buffer_result, ("buffer:byte(%d, %s) does not match string:byte()"):format(x, y and ("%d"):format(y) or "nil")); + end + + it("is equivalent to string:byte", function () + local b = dbuffer.new(11); + assert.truthy(b:write(s)); + test_byte(b, 1); + test_byte(b, 3); + test_byte(b, -1); + test_byte(b, -3); + for i = -13, 13 do + for j = -13, 13 do + test_byte(b, i, j); + end + end + end); + + it("works with characters > 127", function () + local b = dbuffer.new(); + b:write(string.char(0, 140)); + local r = { b:byte(1, 2) }; + assert.same({ 0, 140 }, r); + end); + end); +end); diff --git a/util/dbuffer.lua b/util/dbuffer.lua new file mode 100644 index 00000000..c38a16f5 --- /dev/null +++ b/util/dbuffer.lua @@ -0,0 +1,171 @@ +local queue = require "util.queue"; + +local dbuffer_methods = {}; +local dynamic_buffer_mt = { __index = dbuffer_methods }; + +function dbuffer_methods:write(data) + if self.max_size and #data + self._length > self.max_size then + return nil; + end + local ok = self.items:push(data); + if not ok then + self:collapse(); + ok = self.items:push(data); + end + if not ok then + return nil; + end + self._length = self._length + #data; + return true; +end + +function dbuffer_methods:read_chunk(requested_bytes) + local chunk, consumed = self.items:peek(), self.front_consumed; + if not chunk then return; end + local chunk_length = #chunk; + local remaining_chunk_length = chunk_length - consumed; + if remaining_chunk_length <= requested_bytes then + self.front_consumed = 0; + self._length = self._length - remaining_chunk_length; + self.items:pop(); + assert(#chunk:sub(consumed + 1, -1) == remaining_chunk_length); + return chunk:sub(consumed + 1, -1), remaining_chunk_length; + end + local end_pos = consumed + requested_bytes; + self.front_consumed = end_pos; + self._length = self._length - requested_bytes; + assert(#chunk:sub(consumed + 1, end_pos) == requested_bytes); + return chunk:sub(consumed + 1, end_pos), requested_bytes; +end + +function dbuffer_methods:read(requested_bytes) + local chunks; + + if requested_bytes > self._length then + return nil; + end + + local chunk, read_bytes = self:read_chunk(requested_bytes); + if chunk then + requested_bytes = requested_bytes - read_bytes; + if requested_bytes == 0 then -- Already read everything we need + return chunk; + end + chunks = {}; + else + return nil; + end + + -- Need to keep reading more chunks + while chunk do + table.insert(chunks, chunk); + if requested_bytes > 0 then + chunk, read_bytes = self:read_chunk(requested_bytes); + requested_bytes = requested_bytes - read_bytes; + else + break; + end + end + + return table.concat(chunks); +end + +function dbuffer_methods:discard(requested_bytes) + if requested_bytes > self._length then + return nil; + end + + local chunk, read_bytes = self:read_chunk(requested_bytes); + if chunk then + requested_bytes = requested_bytes - read_bytes; + if requested_bytes == 0 then -- Already read everything we need + return true; + end + else + return nil; + end + + while chunk do + if requested_bytes > 0 then + chunk, read_bytes = self:read_chunk(requested_bytes); + requested_bytes = requested_bytes - read_bytes; + else + break; + end + end + return true; +end + +function dbuffer_methods:sub(i, j) + if j == nil then + j = -1; + end + if j < 0 then + j = self._length + (j+1); + end + if i < 0 then + i = self._length + (i+1); + end + if i < 1 then + i = 1; + end + if j > self._length then + j = self._length; + end + if i > j then + return ""; + end + + self:collapse(j); + + return self.items:peek():sub(i, j); +end + +function dbuffer_methods:byte(i, j) + i = i or 1; + j = j or i; + return string.byte(self:sub(i, j), 1, -1); +end + +function dbuffer_methods:length() + return self._length; +end +dynamic_buffer_mt.__len = dbuffer_methods.length; -- support # operator + +function dbuffer_methods:collapse(bytes) + bytes = bytes or self._length; + + local front_chunk = self.items:peek(); + + if #front_chunk - self.front_consumed >= bytes then + return; + end + + local front_chunks = { front_chunk:sub(self.front_consumed+1) }; + local front_bytes = #front_chunks[1]; + + while front_bytes < bytes do + self.items:pop(); + local chunk = self.items:peek(); + front_bytes = front_bytes + #chunk; + table.insert(front_chunks, chunk); + end + self.items:replace(table.concat(front_chunks)); + self.front_consumed = 0; +end + +local function new(max_size, max_chunks) + if max_size and max_size <= 0 then + return nil; + end + return setmetatable({ + front_consumed = 0; + _length = 0; + max_size = max_size; + items = queue.new(max_chunks or 32); + }, dynamic_buffer_mt); +end + +return { + new = new; +}; diff --git a/util/queue.lua b/util/queue.lua index 66ed098b..c94c62ae 100644 --- a/util/queue.lua +++ b/util/queue.lua @@ -51,6 +51,13 @@ local function new(size, allow_wrapping) end return t[tail]; end; + replace = function (self, data) + if items == 0 then + return self:push(data); + end + t[tail] = data; + return true; + end; items = function (self) return function (_, pos) if pos >= items then -- cgit v1.2.3