From a55ee78b737b66673d602e0a8d4f9426b21bfdf8 Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Thu, 22 Feb 2024 08:40:35 +0100 Subject: [PATCH 1/9] :sparkles: Implement LZSS decoder --- src/Ekona/Compression/CircularBuffer.cs | 417 ++++++++++++++++++ .../DataBlockConverterExtensions.cs | 40 ++ src/Ekona/Compression/IDataBlockConverter.cs | 10 + src/Ekona/Compression/LzssDecoder.cs | 71 +++ src/Ekona/Compression/LzssDecompressor.cs | 39 ++ 5 files changed, 577 insertions(+) create mode 100644 src/Ekona/Compression/CircularBuffer.cs create mode 100644 src/Ekona/Compression/DataBlockConverterExtensions.cs create mode 100644 src/Ekona/Compression/IDataBlockConverter.cs create mode 100644 src/Ekona/Compression/LzssDecoder.cs create mode 100644 src/Ekona/Compression/LzssDecompressor.cs diff --git a/src/Ekona/Compression/CircularBuffer.cs b/src/Ekona/Compression/CircularBuffer.cs new file mode 100644 index 0000000..934525b --- /dev/null +++ b/src/Ekona/Compression/CircularBuffer.cs @@ -0,0 +1,417 @@ +using System; +using System.Collections; +using System.Collections.Generic; + +namespace SceneGate.Ekona.Compression +{ + /// + /// + /// Circular buffer. + /// + /// When writing to a full buffer: + /// PushBack -> removes this[0] / Front() + /// PushFront -> removes this[Size-1] / Back() + /// + /// this implementation is inspired by + /// http://www.boost.org/doc/libs/1_53_0/libs/circular_buffer/doc/circular_buffer.html + /// because I liked their interface. + /// + public class CircularBuffer : IEnumerable + { + private readonly T[] _buffer; + + /// + /// The _start. Index of the first element in buffer. + /// + private int _start; + + /// + /// The _end. Index after the last element in the buffer. + /// + private int _end; + + /// + /// The _size. Buffer size. + /// + private int _size; + + /// + /// Initializes a new instance of the class. + /// + /// + /// + /// Buffer capacity. Must be positive. + /// + public CircularBuffer(int capacity) + : this(capacity, new T[] { }) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// + /// + /// Buffer capacity. Must be positive. + /// + /// + /// Items to fill buffer with. Items length must be less than capacity. + /// Suggestion: use Skip(x).Take(y).ToArray() to build this argument from + /// any enumerable. + /// + public CircularBuffer(int capacity, T[] items) + { + if (capacity < 1) + { + throw new ArgumentException( + "Circular buffer cannot have negative or zero capacity.", nameof(capacity)); + } + if (items == null) + { + throw new ArgumentNullException(nameof(items)); + } + if (items.Length > capacity) + { + throw new ArgumentException( + "Too many items to fit circular buffer", nameof(items)); + } + + _buffer = new T[capacity]; + + Array.Copy(items, _buffer, items.Length); + _size = items.Length; + + _start = 0; + _end = _size == capacity ? 0 : _size; + } + + /// + /// Maximum capacity of the buffer. Elements pushed into the buffer after + /// maximum capacity is reached (IsFull = true), will remove an element. + /// + public int Capacity { get { return _buffer.Length; } } + + /// + /// Boolean indicating if Circular is at full capacity. + /// Adding more elements when the buffer is full will + /// cause elements to be removed from the other end + /// of the buffer. + /// + public bool IsFull + { + get + { + return Size == Capacity; + } + } + + /// + /// True if has no elements. + /// + public bool IsEmpty + { + get + { + return Size == 0; + } + } + + /// + /// Current buffer size (the number of elements that the buffer has). + /// + public int Size { get { return _size; } } + + /// + /// Element at the front of the buffer - this[0]. + /// + /// The value of the element of type T at the front of the buffer. + public T Front() + { + ThrowIfEmpty(); + return _buffer[_start]; + } + + /// + /// Element at the back of the buffer - this[Size - 1]. + /// + /// The value of the element of type T at the back of the buffer. + public T Back() + { + ThrowIfEmpty(); + return _buffer[(_end != 0 ? _end : Capacity) - 1]; + } + + /// + /// Index access to elements in buffer. + /// Index does not loop around like when adding elements, + /// valid interval is [0;Size[ + /// + /// Index of element to access. + /// Thrown when index is outside of [; Size[ interval. + public T this[int index] + { + get + { + if (IsEmpty) + { + throw new IndexOutOfRangeException(string.Format("Cannot access index {0}. Buffer is empty", index)); + } + if (index >= _size) + { + throw new IndexOutOfRangeException(string.Format("Cannot access index {0}. Buffer size is {1}", index, _size)); + } + int actualIndex = InternalIndex(index); + return _buffer[actualIndex]; + } + set + { + if (IsEmpty) + { + throw new IndexOutOfRangeException(string.Format("Cannot access index {0}. Buffer is empty", index)); + } + if (index >= _size) + { + throw new IndexOutOfRangeException(string.Format("Cannot access index {0}. Buffer size is {1}", index, _size)); + } + int actualIndex = InternalIndex(index); + _buffer[actualIndex] = value; + } + } + + /// + /// Pushes a new element to the back of the buffer. Back()/this[Size-1] + /// will now return this element. + /// + /// When the buffer is full, the element at Front()/this[0] will be + /// popped to allow for this new element to fit. + /// + /// Item to push to the back of the buffer + public void PushBack(T item) + { + if (IsFull) + { + _buffer[_end] = item; + Increment(ref _end); + _start = _end; + } + else + { + _buffer[_end] = item; + Increment(ref _end); + ++_size; + } + } + + /// + /// Pushes a new element to the front of the buffer. Front()/this[0] + /// will now return this element. + /// + /// When the buffer is full, the element at Back()/this[Size-1] will be + /// popped to allow for this new element to fit. + /// + /// Item to push to the front of the buffer + public void PushFront(T item) + { + if (IsFull) + { + Decrement(ref _start); + _end = _start; + _buffer[_start] = item; + } + else + { + Decrement(ref _start); + _buffer[_start] = item; + ++_size; + } + } + + /// + /// Removes the element at the back of the buffer. Decreasing the + /// Buffer size by 1. + /// + public void PopBack() + { + ThrowIfEmpty("Cannot take elements from an empty buffer."); + Decrement(ref _end); + _buffer[_end] = default(T); + --_size; + } + + /// + /// Removes the element at the front of the buffer. Decreasing the + /// Buffer size by 1. + /// + public void PopFront() + { + ThrowIfEmpty("Cannot take elements from an empty buffer."); + _buffer[_start] = default(T); + Increment(ref _start); + --_size; + } + + /// + /// Clears the contents of the array. Size = 0, Capacity is unchanged. + /// + /// + public void Clear() + { + // to clear we just reset everything. + _start = 0; + _end = 0; + _size = 0; + Array.Clear(_buffer, 0, _buffer.Length); + } + + /// + /// Copies the buffer contents to an array, according to the logical + /// contents of the buffer (i.e. independent of the internal + /// order/contents) + /// + /// A new array with a copy of the buffer contents. + public T[] ToArray() + { + T[] newArray = new T[Size]; + int newArrayOffset = 0; + var segments = ToArraySegments(); + foreach (ArraySegment segment in segments) + { + Array.Copy(segment.Array, segment.Offset, newArray, newArrayOffset, segment.Count); + newArrayOffset += segment.Count; + } + return newArray; + } + + /// + /// Get the contents of the buffer as 2 ArraySegments. + /// Respects the logical contents of the buffer, where + /// each segment and items in each segment are ordered + /// according to insertion. + /// + /// Fast: does not copy the array elements. + /// Useful for methods like Send(IList<ArraySegment<Byte>>). + /// + /// Segments may be empty. + /// + /// An IList with 2 segments corresponding to the buffer content. + public IList> ToArraySegments() + { + return new [] { ArrayOne(), ArrayTwo() }; + } + + #region IEnumerable implementation + /// + /// Returns an enumerator that iterates through this buffer. + /// + /// An enumerator that can be used to iterate this collection. + public IEnumerator GetEnumerator() + { + var segments = ToArraySegments(); + foreach (ArraySegment segment in segments) + { + for (int i = 0; i < segment.Count; i++) + { + yield return segment.Array[segment.Offset + i]; + } + } + } + #endregion + #region IEnumerable implementation + IEnumerator IEnumerable.GetEnumerator() + { + return (IEnumerator)GetEnumerator(); + } + #endregion + + private void ThrowIfEmpty(string message = "Cannot access an empty buffer.") + { + if (IsEmpty) + { + throw new InvalidOperationException(message); + } + } + + /// + /// Increments the provided index variable by one, wrapping + /// around if necessary. + /// + /// + private void Increment(ref int index) + { + if (++index == Capacity) + { + index = 0; + } + } + + /// + /// Decrements the provided index variable by one, wrapping + /// around if necessary. + /// + /// + private void Decrement(ref int index) + { + if (index == 0) + { + index = Capacity; + } + index--; + } + + /// + /// Converts the index in the argument to an index in _buffer + /// + /// + /// The transformed index. + /// + /// + /// External index. + /// + private int InternalIndex(int index) + { + return _start + (index < (Capacity - _start) ? index : index - Capacity); + } + + // doing ArrayOne and ArrayTwo methods returning ArraySegment as seen here: + // http://www.boost.org/doc/libs/1_37_0/libs/circular_buffer/doc/circular_buffer.html#classboost_1_1circular__buffer_1957cccdcb0c4ef7d80a34a990065818d + // http://www.boost.org/doc/libs/1_37_0/libs/circular_buffer/doc/circular_buffer.html#classboost_1_1circular__buffer_1f5081a54afbc2dfc1a7fb20329df7d5b + // should help a lot with the code. + + #region Array items easy access. + // The array is composed by at most two non-contiguous segments, + // the next two methods allow easy access to those. + + private ArraySegment ArrayOne() + { + if (IsEmpty) + { + return new ArraySegment(new T[0]); + } + else if (_start < _end) + { + return new ArraySegment(_buffer, _start, _end - _start); + } + else + { + return new ArraySegment(_buffer, _start, _buffer.Length - _start); + } + } + + private ArraySegment ArrayTwo() + { + if (IsEmpty) + { + return new ArraySegment(new T[0]); + } + else if (_start < _end) + { + return new ArraySegment(_buffer, _end, 0); + } + else + { + return new ArraySegment(_buffer, 0, _end); + } + } + #endregion + } +} diff --git a/src/Ekona/Compression/DataBlockConverterExtensions.cs b/src/Ekona/Compression/DataBlockConverterExtensions.cs new file mode 100644 index 0000000..35efbf0 --- /dev/null +++ b/src/Ekona/Compression/DataBlockConverterExtensions.cs @@ -0,0 +1,40 @@ +namespace SceneGate.Ekona.Compression; + +using System; +using System.Buffers; +using System.IO; + +public static class DataBlockConverterExtensions +{ + private const int ReadBufferLength = 40 * 1024; + + public static void Convert(this IDataBlockConverter converter, Stream input, Stream output) + { + byte[] inputBuffer = ArrayPool.Shared.Rent(ReadBufferLength); + + int outputBufferLength = converter.GetOutputMaxCount(inputBuffer.Length); + byte[] outputBuffer = ArrayPool.Shared.Rent(outputBufferLength); + + try { + while (input.Position < input.Length) { + // Read from input stream + long inputPos = input.Position; + int read = input.Read(inputBuffer); + ReadOnlySpan inputData = inputBuffer.AsSpan(0, read); + + // Convert + int produced = converter.Convert(inputData, outputBuffer, out int consumed); + Span outputData = outputBuffer.AsSpan(0, produced); + + // Write to output stream + output.Write(outputData); + + // Advance as many bytes as we consumed + input.Position = inputPos + consumed; + } + } finally { + ArrayPool.Shared.Return(inputBuffer); + ArrayPool.Shared.Return(outputBuffer); + } + } +} diff --git a/src/Ekona/Compression/IDataBlockConverter.cs b/src/Ekona/Compression/IDataBlockConverter.cs new file mode 100644 index 0000000..3c89640 --- /dev/null +++ b/src/Ekona/Compression/IDataBlockConverter.cs @@ -0,0 +1,10 @@ +namespace SceneGate.Ekona.Compression; + +using System; + +public interface IDataBlockConverter +{ + int GetOutputMaxCount(int inputLength); + + int Convert(ReadOnlySpan input, Span output, out int consumed); +} diff --git a/src/Ekona/Compression/LzssDecoder.cs b/src/Ekona/Compression/LzssDecoder.cs new file mode 100644 index 0000000..b2dc9fb --- /dev/null +++ b/src/Ekona/Compression/LzssDecoder.cs @@ -0,0 +1,71 @@ +namespace SceneGate.Ekona.Compression; + +using System; + +public class LzssDecoder : IDataBlockConverter +{ + private readonly CircularBuffer pastBuffer = new((1 << 12) + 19); + + private byte flag = 0; + private int remainingFlagBits = 0; + + public int GetOutputMaxCount(int inputLength) => inputLength * 8 * 19; // last byte was 8-bit enabled flag to copy max + + public int Convert(ReadOnlySpan input, Span output, out int consumed) + { + int produced = 0; + consumed = 0; + while (consumed < input.Length) { + if (IsFlagRawCopy(input, ref consumed)) { + if (consumed >= input.Length) { + break; + } + + DecodeRawMode(input, ref consumed, output, ref produced); + } else { + if (consumed + 1 >= input.Length) { + break; + } + + DecodePastCopyMode(input, ref consumed, output, ref produced); + } + } + + return produced; + } + + private bool IsFlagRawCopy(ReadOnlySpan input, ref int consumed) + { + if (remainingFlagBits <= 0) { + remainingFlagBits = 8; + flag = input[consumed++]; + } + + remainingFlagBits--; + return ((flag >> remainingFlagBits) & 1) == 0; + } + + private void DecodeRawMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + { + WriteOutput(input[consumed++], output, ref produced); + } + + private void DecodePastCopyMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + { + byte info = input[consumed++]; + int bufferPos = (((info & 0x0F) << 8) | input[consumed++]) + 1; + int length = (info >> 4) + 2 + 1; + + while (length > 0) { + byte value = pastBuffer[bufferPos - 1]; + WriteOutput(value, output, ref produced); + length--; + } + } + + private void WriteOutput(byte value, Span output, ref int produced) + { + pastBuffer.PushFront(value); + output[produced++] = value; + } +} diff --git a/src/Ekona/Compression/LzssDecompressor.cs b/src/Ekona/Compression/LzssDecompressor.cs new file mode 100644 index 0000000..6df603f --- /dev/null +++ b/src/Ekona/Compression/LzssDecompressor.cs @@ -0,0 +1,39 @@ +namespace SceneGate.Ekona.Compression; + +using System; +using System.IO; +using Yarhl.FileFormat; +using Yarhl.IO; + +public class LzssDecompressor : + IConverter, + IConverter +{ + public BinaryFormat Convert(IBinary source) + { + ArgumentNullException.ThrowIfNull(source); + + DataStream decompressed = Convert(source.Stream); + return new BinaryFormat(decompressed); + } + + public DataStream Convert(Stream source) + { + ArgumentNullException.ThrowIfNull(source); + + uint header = new DataReader(source).ReadUInt32(); + uint id = header & 0xFF; + uint uncompressedLength = header >> 8; + + if (id != 0x10) { + throw new FormatException("Invalid header"); + } + + var decompressed = new DataStream(); + + var decoder = new LzssDecoder(); + decoder.Convert(source, decompressed); + + return decompressed; + } +} From 7237773be5121e137006afb1ce388298278d08db Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Fri, 23 Feb 2024 09:40:22 +0100 Subject: [PATCH 2/9] :umbrella: Add tests to LZSS and improve code --- .../Compression/LzssDecoderTests.cs | 166 ++++++++++++++++++ src/Ekona.Tests/Ekona.Tests.csproj | 1 + src/Ekona/Compression/CircularBuffer.cs | 8 +- .../DataBlockConverterExtensions.cs | 13 +- src/Ekona/Compression/IDataBlockConverter.cs | 18 ++ src/Ekona/Compression/LzssDecoder.cs | 70 +++++--- src/Ekona/Compression/LzssDecompressor.cs | 11 +- 7 files changed, 257 insertions(+), 30 deletions(-) create mode 100644 src/Ekona.Tests/Compression/LzssDecoderTests.cs diff --git a/src/Ekona.Tests/Compression/LzssDecoderTests.cs b/src/Ekona.Tests/Compression/LzssDecoderTests.cs new file mode 100644 index 0000000..43eafe2 --- /dev/null +++ b/src/Ekona.Tests/Compression/LzssDecoderTests.cs @@ -0,0 +1,166 @@ +namespace SceneGate.Ekona.Tests.Compression; + +using System; +using System.IO; +using System.Linq; +using NUnit.Framework; +using SceneGate.Ekona.Compression; + +[TestFixture] +public class LzssDecoderTests +{ + [Test] + public void MaxCountSizeForSmallBufferGivesNonZero() + { + const int inputLength = 1; + var decoder = new LzssDecoder(); + + int maxCount = decoder.GetOutputMaxCount(inputLength); + + Assert.That(maxCount, Is.GreaterThan(0)); + } + + [Test] + public void MaxCountForReadBufferIsLessThanLoh() + { + const int inputLength = 9 * 1024; + var decoder = new LzssDecoder(); + + int maxCount = decoder.GetOutputMaxCount(inputLength); + + Assert.That(maxCount, Is.LessThan(80 * 1024)); + } + + [Test] + public void DecodeRawToken() + { + byte[] input = [0x00, 0xBE, 0xB0, 0xCA, 0xFE, 0xC0, 0xC0, 0xBA, 0xBE]; + byte[] expected = [0xBE, 0xB0, 0xCA, 0xFE, 0xC0, 0xC0, 0xBA, 0xBE]; + + AssertConversion(input, expected); + } + + [Test] + public void DecodeCopyToken() + { + byte[] input = [ + 0b0001_0011, + 0xBE, 0xEE, 0xEF, + 0x0_0, 0x02, // full past + 0xCA, 0xFE, + 0x1_0, 0x01, // with future + 0x0_0, 0x00, // full future + ]; + byte[] expected = [ + 0xBE, 0xEE, 0xEF, 0xBE, 0xEE, 0xEF, + 0xCA, 0xFE, 0xCA, 0xFE, 0xCA, 0xFE, + 0xFE, 0xFE, 0xFE, + ]; + + AssertConversion(input, expected); + } + + [Test] + public void DecodeStopWithMissingRawAfterReadingFlag() + { + byte[] input = [ 0b0000_0000 ]; + byte[] expected = [ ]; + + AssertConversion(input, expected); + } + + [Test] + public void DecodeStopWithMissingRawAfterIteration() + { + byte[] input = [ 0b0000_0000, 0xAA ]; + byte[] expected = [ 0xAA ]; + + AssertConversion(input, expected); + } + + [Test] + public void DecodeStopWithMissingCopyInfo() + { + byte[] input = [ 0b0100_0000, 0xAA, 0x00 ]; + byte[] expected = [ 0xAA ]; + + var decoder = new LzssDecoder(); + byte[] actual = new byte[32]; + + int produced = decoder.Convert(input, actual, out int consumed); + Assert.Multiple(() => { + Assert.That(consumed, Is.EqualTo(2)); + Assert.That(produced, Is.EqualTo(expected.Length)); + Assert.That(actual.Take(produced), Is.EquivalentTo(expected)); + }); + } + + [Test] + public void DecodeContinueKeepingInfo() + { + // Test keep flag and past buffer + byte[] input1 = [ 0b0010_0000, 0xCA, 0xFE ]; + byte[] input2 = [ 0x00, 0x00 ]; + byte[] expected = [ 0xCA, 0xFE, 0xFE, 0xFE, 0xFE ]; + + var decoder = new LzssDecoder(); + byte[] actual = new byte[32]; + + int produced1 = decoder.Convert(input1, actual, out int consumed1); + Assert.Multiple(() => { + Assert.That(consumed1, Is.EqualTo(input1.Length)); + Assert.That(produced1, Is.EqualTo(2)); + }); + + int produced2 = decoder.Convert(input2, actual.AsSpan(produced1), out int consumed2); + Assert.Multiple(() => { + Assert.That(consumed2, Is.EqualTo(input2.Length)); + Assert.That(produced2, Is.EqualTo(3)); + Assert.That(actual.Take(produced1 + produced2), Is.EquivalentTo(expected)); + }); + } + + [Test] + public void DecodeNewFlagAfter8Tokens() + { + byte[] input = [0x00, 0xBE, 0xB0, 0xCA, 0xFE, 0xC0, 0xC0, 0xBA, 0xBE, 0x00, 0xAA]; + byte[] expected = [0xBE, 0xB0, 0xCA, 0xFE, 0xC0, 0xC0, 0xBA, 0xBE, 0xAA]; + + AssertConversion(input, expected); + } + + [Test] + public void ThrowWhenOutputBufferIsNotLargeEnough() + { + byte[] inputFailRaw = [ 0b0000_0000, 0xAA ]; + byte[] inputFailCopy = [ 0b1000_0000, 0x00, 0x00 ]; + + Assert.That( + () => new LzssDecoder().Convert(inputFailRaw, [], out _), + Throws.InstanceOf()); + Assert.That( + () => new LzssDecoder().Convert(inputFailCopy, [], out _), + Throws.InstanceOf()); + } + + [Test] + public void DecodeEmpty() + { + byte[] input = []; + byte[] expected = []; + + AssertConversion(input, expected); + } + + private static void AssertConversion(ReadOnlySpan input, ReadOnlySpan expected) + { + var decoder = new LzssDecoder(); + byte[] actual = new byte[32]; + + int produced = decoder.Convert(input, actual, out int consumed); + + Assert.That(consumed, Is.EqualTo(input.Length)); + Assert.That(produced, Is.EqualTo(expected.Length)); + Assert.That(actual.Take(produced), Is.EquivalentTo(expected.ToArray())); + } +} diff --git a/src/Ekona.Tests/Ekona.Tests.csproj b/src/Ekona.Tests/Ekona.Tests.csproj index 54107fe..c08df10 100644 --- a/src/Ekona.Tests/Ekona.Tests.csproj +++ b/src/Ekona.Tests/Ekona.Tests.csproj @@ -5,6 +5,7 @@ Tests for Ekona. net6.0;net8.0 + 12 SceneGate.Ekona.Tests false diff --git a/src/Ekona/Compression/CircularBuffer.cs b/src/Ekona/Compression/CircularBuffer.cs index 934525b..3d356b8 100644 --- a/src/Ekona/Compression/CircularBuffer.cs +++ b/src/Ekona/Compression/CircularBuffer.cs @@ -1,6 +1,9 @@ -using System; +// +// From: https://github.com/joaoportela/CircularBuffer-CSharp +using System; using System.Collections; using System.Collections.Generic; +using System.Diagnostics.CodeAnalysis; namespace SceneGate.Ekona.Compression { @@ -16,7 +19,8 @@ namespace SceneGate.Ekona.Compression /// http://www.boost.org/doc/libs/1_53_0/libs/circular_buffer/doc/circular_buffer.html /// because I liked their interface. /// - public class CircularBuffer : IEnumerable + [ExcludeFromCodeCoverageAttribute(Justification = "Third-party. In their repo they have the tests.")] + internal class CircularBuffer : IEnumerable { private readonly T[] _buffer; diff --git a/src/Ekona/Compression/DataBlockConverterExtensions.cs b/src/Ekona/Compression/DataBlockConverterExtensions.cs index 35efbf0..8fec4cf 100644 --- a/src/Ekona/Compression/DataBlockConverterExtensions.cs +++ b/src/Ekona/Compression/DataBlockConverterExtensions.cs @@ -4,10 +4,19 @@ using System.Buffers; using System.IO; +/// +/// Extension methods for easily convert with . +/// public static class DataBlockConverterExtensions { - private const int ReadBufferLength = 40 * 1024; - + private const int ReadBufferLength = 9 * 1024; // so a 88% of compression rate (LZSS) is below 80 kB ( + /// Converts the input stream. + /// + /// The converter to use. + /// Data to process. + /// Stream to write the output. public static void Convert(this IDataBlockConverter converter, Stream input, Stream output) { byte[] inputBuffer = ArrayPool.Shared.Rent(ReadBufferLength); diff --git a/src/Ekona/Compression/IDataBlockConverter.cs b/src/Ekona/Compression/IDataBlockConverter.cs index 3c89640..7d18a95 100644 --- a/src/Ekona/Compression/IDataBlockConverter.cs +++ b/src/Ekona/Compression/IDataBlockConverter.cs @@ -2,9 +2,27 @@ using System; +/// +/// Interface to perform conversion of data in iterative blocks. +/// +/// Type of the input data. +/// Type of the destination data. public interface IDataBlockConverter { + /// + /// Gets the maximum size of the output for a given input length. + /// It can be used to allocate the buffer to use in the conversion. + /// + /// Size of the input buffer to convert. + /// Maximum length needed in the output buffer. int GetOutputMaxCount(int inputLength); + /// + /// Converts the next iteration of the input data. + /// + /// Buffer with data to process. + /// Buffer to write the output. + /// Amount of bytes read from the input. + /// Amount of bytes written in the output. int Convert(ReadOnlySpan input, Span output, out int consumed); } diff --git a/src/Ekona/Compression/LzssDecoder.cs b/src/Ekona/Compression/LzssDecoder.cs index b2dc9fb..a8cd326 100644 --- a/src/Ekona/Compression/LzssDecoder.cs +++ b/src/Ekona/Compression/LzssDecoder.cs @@ -1,34 +1,41 @@ namespace SceneGate.Ekona.Compression; using System; +using System.IO; +/// +/// Decode / Decompress blocks of data with the LZSS DS/GBA algorithm. +/// public class LzssDecoder : IDataBlockConverter { + private const int MinSequenceLength = 2; + private readonly CircularBuffer pastBuffer = new((1 << 12) + 19); - private byte flag = 0; - private int remainingFlagBits = 0; + private byte flag; + private int remainingFlagBits; - public int GetOutputMaxCount(int inputLength) => inputLength * 8 * 19; // last byte was 8-bit enabled flag to copy max + /// + public int GetOutputMaxCount(int inputLength) + { + // best compression rate achieved by copying a sequence of bytes (already in circular buffer) + // for each bit of the token (already read) with its maximum sequence length. + return inputLength * 8 * 18 / 17; + } + /// public int Convert(ReadOnlySpan input, Span output, out int consumed) { int produced = 0; consumed = 0; - while (consumed < input.Length) { - if (IsFlagRawCopy(input, ref consumed)) { - if (consumed >= input.Length) { - break; - } - - DecodeRawMode(input, ref consumed, output, ref produced); - } else { - if (consumed + 1 >= input.Length) { - break; - } - - DecodePastCopyMode(input, ref consumed, output, ref produced); - } + + bool continueProcessing = consumed < input.Length; + while (continueProcessing) { + bool enoughDataAvailable = IsFlagRawCopy(input, ref consumed) + ? DecodeRawMode(input, ref consumed, output, ref produced) + : DecodePastCopyMode(input, ref consumed, output, ref produced); + + continueProcessing = enoughDataAvailable && (consumed < input.Length); } return produced; @@ -45,22 +52,41 @@ private bool IsFlagRawCopy(ReadOnlySpan input, ref int consumed) return ((flag >> remainingFlagBits) & 1) == 0; } - private void DecodeRawMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + private bool DecodeRawMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) { + if (consumed >= input.Length) { + return false; + } + + if (produced >= output.Length) { + throw new EndOfStreamException("Output is not large enough to decompress data"); + } + WriteOutput(input[consumed++], output, ref produced); + return true; } - private void DecodePastCopyMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + private bool DecodePastCopyMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) { + if (consumed + 1 >= input.Length) { + return false; + } + byte info = input[consumed++]; - int bufferPos = (((info & 0x0F) << 8) | input[consumed++]) + 1; - int length = (info >> 4) + 2 + 1; + int bufferPos = ((info & 0x0F) << 8) | input[consumed++]; + int length = (info >> 4) + MinSequenceLength + 1; + + if (produced + length > output.Length) { + throw new EndOfStreamException("Output is not large enough to decompress data"); + } while (length > 0) { - byte value = pastBuffer[bufferPos - 1]; + byte value = pastBuffer[bufferPos]; WriteOutput(value, output, ref produced); length--; } + + return true; } private void WriteOutput(byte value, Span output, ref int produced) diff --git a/src/Ekona/Compression/LzssDecompressor.cs b/src/Ekona/Compression/LzssDecompressor.cs index 6df603f..78134dd 100644 --- a/src/Ekona/Compression/LzssDecompressor.cs +++ b/src/Ekona/Compression/LzssDecompressor.cs @@ -1,14 +1,18 @@ -namespace SceneGate.Ekona.Compression; +namespace SceneGate.Ekona.Compression; using System; using System.IO; using Yarhl.FileFormat; using Yarhl.IO; +/// +/// Converter that decompress a stream with the LZSS algorithm. +/// public class LzssDecompressor : IConverter, IConverter { + /// public BinaryFormat Convert(IBinary source) { ArgumentNullException.ThrowIfNull(source); @@ -17,20 +21,19 @@ public BinaryFormat Convert(IBinary source) return new BinaryFormat(decompressed); } + /// public DataStream Convert(Stream source) { ArgumentNullException.ThrowIfNull(source); + // bit 0-3: ID (0x10), 4-31: uncompressed length uint header = new DataReader(source).ReadUInt32(); uint id = header & 0xFF; - uint uncompressedLength = header >> 8; - if (id != 0x10) { throw new FormatException("Invalid header"); } var decompressed = new DataStream(); - var decoder = new LzssDecoder(); decoder.Convert(source, decompressed); From ae51e34e547f33728ff4252dc0c0662468558b2b Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Sun, 25 Feb 2024 09:55:50 +0100 Subject: [PATCH 3/9] =?UTF-8?q?=F0=9F=93=9A=20Document=20LZSS=20format?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/articles/specs/compression/lzss.md | 52 +++++++++++++++++++++++++ docs/articles/specs/toc.yml | 4 ++ 2 files changed, 56 insertions(+) create mode 100644 docs/articles/specs/compression/lzss.md diff --git a/docs/articles/specs/compression/lzss.md b/docs/articles/specs/compression/lzss.md new file mode 100644 index 0000000..90f2a63 --- /dev/null +++ b/docs/articles/specs/compression/lzss.md @@ -0,0 +1,52 @@ +# LZSS + +[Lempel–Ziv–Storer–Szymanski (LZSS)](https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Storer%E2%80%93Szymanski) +is a lossless compression algorithm implemented in the BIOS of the GBA and DS. +Software can trigger the decompression functions via +[SWI calls](https://problemkaputt.de/gbatek.htm#biosdecompressionfunctions). + +## Format + +The GBA/DS BIOS expects a 32-bits header before the compression data. + +| Offset | Type | Description | +| ------ | ------ | --------------- | +| 0x00 | uint | Header | +| 0x04 | byte[] | Compressed data | + +The header bit fields are: + +- Bits 0-3: reserved (0) +- Bits 4-7: compression type `1` +- Bits 8-31: decompressed length + +### Compression format + +The compression supports two operation modes: + +- Copy the next byte from the input stream into the output stream. +- Repeat a sequence from the decompressed data in the output + +The compressed data starts with a flag byte that indicates the mode for the next +8 operations. The bits are processed in big-endian order that is, from bit 7 to +bit 0. + +If the next flag bit is 0, then the next byte from the input stream is written +into the output stream. + +If the next flag bit is 1, then there is a 16-bits value in the input stream +containing the repeat information: + +- Bits 0-11: backwards counting position of the start of the sequence in the + output stream. +- Bits 12-15: sequence length - 3 (minimum sequence length) + +> [!NOTE] +> The length of the sequence could be larger than the available output at the +> start of the decoding. While repeating the sequence, we may need to copy also +> bytes that we just wrote. For instance, we could repeat the last two bytes of +> the output 5 times by encoding the position 1 and a length of 10. + +After processing every flag bit, the next input byte contains the next flags. +The operation repeats until reaching the decompressed size or running out of +input data. Note that there may be some unused bits (set to 0). diff --git a/docs/articles/specs/toc.yml b/docs/articles/specs/toc.yml index 58512f0..2cccb6b 100644 --- a/docs/articles/specs/toc.yml +++ b/docs/articles/specs/toc.yml @@ -11,3 +11,7 @@ href: cartridge/program.md - name: Security href: cartridge/security.md + +- name: 🗜️ Compression +- name: LZSS + href: compression/lzss.md From 2ed917cd555c8e4f800654f550979b9beb09608e Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Mon, 26 Feb 2024 20:21:26 +0100 Subject: [PATCH 4/9] =?UTF-8?q?=F0=9F=91=95Improve=20naming=20of=20LZSS=20?= =?UTF-8?q?decompressor?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Ekona/Compression/LzssDecoder.cs | 7 ++++--- .../{LzssDecompressor.cs => LzssFormatDecompressor.cs} | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) rename src/Ekona/Compression/{LzssDecompressor.cs => LzssFormatDecompressor.cs} (90%) diff --git a/src/Ekona/Compression/LzssDecoder.cs b/src/Ekona/Compression/LzssDecoder.cs index a8cd326..f0d7b2d 100644 --- a/src/Ekona/Compression/LzssDecoder.cs +++ b/src/Ekona/Compression/LzssDecoder.cs @@ -8,9 +8,10 @@ /// public class LzssDecoder : IDataBlockConverter { - private const int MinSequenceLength = 2; + private const int MinSequenceLength = 3; + private const int MaxDistance = 1 << 12; - private readonly CircularBuffer pastBuffer = new((1 << 12) + 19); + private readonly CircularBuffer pastBuffer = new(MaxDistance); private byte flag; private int remainingFlagBits; @@ -74,7 +75,7 @@ private bool DecodePastCopyMode(ReadOnlySpan input, ref int consumed, Span byte info = input[consumed++]; int bufferPos = ((info & 0x0F) << 8) | input[consumed++]; - int length = (info >> 4) + MinSequenceLength + 1; + int length = (info >> 4) + MinSequenceLength; if (produced + length > output.Length) { throw new EndOfStreamException("Output is not large enough to decompress data"); diff --git a/src/Ekona/Compression/LzssDecompressor.cs b/src/Ekona/Compression/LzssFormatDecompressor.cs similarity index 90% rename from src/Ekona/Compression/LzssDecompressor.cs rename to src/Ekona/Compression/LzssFormatDecompressor.cs index 78134dd..dfd5cff 100644 --- a/src/Ekona/Compression/LzssDecompressor.cs +++ b/src/Ekona/Compression/LzssFormatDecompressor.cs @@ -6,9 +6,9 @@ using Yarhl.IO; /// -/// Converter that decompress a stream with the LZSS algorithm. +/// Converter that decompress a binary format with LZSS format. /// -public class LzssDecompressor : +public class LzssFormatDecompressor : IConverter, IConverter { From 26e5adb68a7c2c0c2a8661f1c58455fce2ff40ea Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Mon, 26 Feb 2024 21:05:59 +0100 Subject: [PATCH 5/9] =?UTF-8?q?=E2=9C=A8Convert=20LZSS=20decoder=20to=20st?= =?UTF-8?q?ream-based=20after=20perf=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Different was 20 us between span-based and the implementation is easier. --- .../Compression/LzssDecoderTests.cs | 108 +++++---------- .../DataBlockConverterExtensions.cs | 49 ------- src/Ekona/Compression/IDataBlockConverter.cs | 28 ---- src/Ekona/Compression/LzssDecoder.cs | 128 ++++++++++++------ .../Compression/LzssFormatDecompressor.cs | 42 ------ 5 files changed, 116 insertions(+), 239 deletions(-) delete mode 100644 src/Ekona/Compression/DataBlockConverterExtensions.cs delete mode 100644 src/Ekona/Compression/IDataBlockConverter.cs delete mode 100644 src/Ekona/Compression/LzssFormatDecompressor.cs diff --git a/src/Ekona.Tests/Compression/LzssDecoderTests.cs b/src/Ekona.Tests/Compression/LzssDecoderTests.cs index 43eafe2..2a336ed 100644 --- a/src/Ekona.Tests/Compression/LzssDecoderTests.cs +++ b/src/Ekona.Tests/Compression/LzssDecoderTests.cs @@ -2,35 +2,13 @@ using System; using System.IO; -using System.Linq; using NUnit.Framework; using SceneGate.Ekona.Compression; +using Yarhl.IO; [TestFixture] public class LzssDecoderTests { - [Test] - public void MaxCountSizeForSmallBufferGivesNonZero() - { - const int inputLength = 1; - var decoder = new LzssDecoder(); - - int maxCount = decoder.GetOutputMaxCount(inputLength); - - Assert.That(maxCount, Is.GreaterThan(0)); - } - - [Test] - public void MaxCountForReadBufferIsLessThanLoh() - { - const int inputLength = 9 * 1024; - var decoder = new LzssDecoder(); - - int maxCount = decoder.GetOutputMaxCount(inputLength); - - Assert.That(maxCount, Is.LessThan(80 * 1024)); - } - [Test] public void DecodeRawToken() { @@ -61,16 +39,18 @@ public void DecodeCopyToken() } [Test] - public void DecodeStopWithMissingRawAfterReadingFlag() + public void DecodeThrowsEOSWithMissingRawAfterReadingFlag() { - byte[] input = [ 0b0000_0000 ]; - byte[] expected = [ ]; + using var input = new DataStream(); + input.Write([ 0b0000_0000 ]); - AssertConversion(input, expected); + using DataStream actual = new(); + var decoder = new LzssDecoder(actual, false); + Assert.That(() => decoder.Convert(input), Throws.InstanceOf()); } [Test] - public void DecodeStopWithMissingRawAfterIteration() + public void DecodeStopWithPaddingFlagBits() { byte[] input = [ 0b0000_0000, 0xAA ]; byte[] expected = [ 0xAA ]; @@ -79,45 +59,13 @@ public void DecodeStopWithMissingRawAfterIteration() } [Test] - public void DecodeStopWithMissingCopyInfo() - { - byte[] input = [ 0b0100_0000, 0xAA, 0x00 ]; - byte[] expected = [ 0xAA ]; - - var decoder = new LzssDecoder(); - byte[] actual = new byte[32]; - - int produced = decoder.Convert(input, actual, out int consumed); - Assert.Multiple(() => { - Assert.That(consumed, Is.EqualTo(2)); - Assert.That(produced, Is.EqualTo(expected.Length)); - Assert.That(actual.Take(produced), Is.EquivalentTo(expected)); - }); - } - - [Test] - public void DecodeContinueKeepingInfo() + public void DecodeThrowsEOSWithMissingCopyInfo() { - // Test keep flag and past buffer - byte[] input1 = [ 0b0010_0000, 0xCA, 0xFE ]; - byte[] input2 = [ 0x00, 0x00 ]; - byte[] expected = [ 0xCA, 0xFE, 0xFE, 0xFE, 0xFE ]; - - var decoder = new LzssDecoder(); - byte[] actual = new byte[32]; + using DataStream input = new(); + input.Write([ 0b0100_0000, 0xAA, 0x00 ]); - int produced1 = decoder.Convert(input1, actual, out int consumed1); - Assert.Multiple(() => { - Assert.That(consumed1, Is.EqualTo(input1.Length)); - Assert.That(produced1, Is.EqualTo(2)); - }); - - int produced2 = decoder.Convert(input2, actual.AsSpan(produced1), out int consumed2); - Assert.Multiple(() => { - Assert.That(consumed2, Is.EqualTo(input2.Length)); - Assert.That(produced2, Is.EqualTo(3)); - Assert.That(actual.Take(produced1 + produced2), Is.EquivalentTo(expected)); - }); + var decoder = new LzssDecoder(new DataStream(), false); + Assert.That(() => decoder.Convert(input), Throws.InstanceOf()); } [Test] @@ -133,14 +81,16 @@ public void DecodeNewFlagAfter8Tokens() public void ThrowWhenOutputBufferIsNotLargeEnough() { byte[] inputFailRaw = [ 0b0000_0000, 0xAA ]; - byte[] inputFailCopy = [ 0b1000_0000, 0x00, 0x00 ]; + byte[] inputFailCopy = [ 0b0100_0000, 0x00, 0x00, 0x00 ]; + using var streamParent = new DataStream(); + using var fixedOutputStream = new DataStream(streamParent, 0, 0); Assert.That( - () => new LzssDecoder().Convert(inputFailRaw, [], out _), - Throws.InstanceOf()); + () => new LzssDecoder(fixedOutputStream, false).Convert(new MemoryStream(inputFailRaw)), + Throws.InstanceOf()); Assert.That( - () => new LzssDecoder().Convert(inputFailCopy, [], out _), - Throws.InstanceOf()); + () => new LzssDecoder(fixedOutputStream, false).Convert(new MemoryStream(inputFailCopy)), + Throws.InstanceOf()); } [Test] @@ -152,15 +102,19 @@ public void DecodeEmpty() AssertConversion(input, expected); } - private static void AssertConversion(ReadOnlySpan input, ReadOnlySpan expected) + private void AssertConversion(byte[] input, byte[] expected) { - var decoder = new LzssDecoder(); - byte[] actual = new byte[32]; + using DataStream expectedStream = DataStreamFactory.FromArray(expected); + using DataStream inputStream = DataStreamFactory.FromArray(input); - int produced = decoder.Convert(input, actual, out int consumed); + using DataStream actual = new(); + var decoder = new LzssDecoder(actual, false); + _ = decoder.Convert(inputStream); - Assert.That(consumed, Is.EqualTo(input.Length)); - Assert.That(produced, Is.EqualTo(expected.Length)); - Assert.That(actual.Take(produced), Is.EquivalentTo(expected.ToArray())); + Assert.Multiple(() => { + Assert.That(inputStream.Position, Is.EqualTo(input.Length)); + Assert.That(actual.Length, Is.EqualTo(expected.Length)); + Assert.That(expectedStream.Compare(actual), Is.True); + }); } } diff --git a/src/Ekona/Compression/DataBlockConverterExtensions.cs b/src/Ekona/Compression/DataBlockConverterExtensions.cs deleted file mode 100644 index 8fec4cf..0000000 --- a/src/Ekona/Compression/DataBlockConverterExtensions.cs +++ /dev/null @@ -1,49 +0,0 @@ -namespace SceneGate.Ekona.Compression; - -using System; -using System.Buffers; -using System.IO; - -/// -/// Extension methods for easily convert with . -/// -public static class DataBlockConverterExtensions -{ - private const int ReadBufferLength = 9 * 1024; // so a 88% of compression rate (LZSS) is below 80 kB ( - /// Converts the input stream. - /// - /// The converter to use. - /// Data to process. - /// Stream to write the output. - public static void Convert(this IDataBlockConverter converter, Stream input, Stream output) - { - byte[] inputBuffer = ArrayPool.Shared.Rent(ReadBufferLength); - - int outputBufferLength = converter.GetOutputMaxCount(inputBuffer.Length); - byte[] outputBuffer = ArrayPool.Shared.Rent(outputBufferLength); - - try { - while (input.Position < input.Length) { - // Read from input stream - long inputPos = input.Position; - int read = input.Read(inputBuffer); - ReadOnlySpan inputData = inputBuffer.AsSpan(0, read); - - // Convert - int produced = converter.Convert(inputData, outputBuffer, out int consumed); - Span outputData = outputBuffer.AsSpan(0, produced); - - // Write to output stream - output.Write(outputData); - - // Advance as many bytes as we consumed - input.Position = inputPos + consumed; - } - } finally { - ArrayPool.Shared.Return(inputBuffer); - ArrayPool.Shared.Return(outputBuffer); - } - } -} diff --git a/src/Ekona/Compression/IDataBlockConverter.cs b/src/Ekona/Compression/IDataBlockConverter.cs deleted file mode 100644 index 7d18a95..0000000 --- a/src/Ekona/Compression/IDataBlockConverter.cs +++ /dev/null @@ -1,28 +0,0 @@ -namespace SceneGate.Ekona.Compression; - -using System; - -/// -/// Interface to perform conversion of data in iterative blocks. -/// -/// Type of the input data. -/// Type of the destination data. -public interface IDataBlockConverter -{ - /// - /// Gets the maximum size of the output for a given input length. - /// It can be used to allocate the buffer to use in the conversion. - /// - /// Size of the input buffer to convert. - /// Maximum length needed in the output buffer. - int GetOutputMaxCount(int inputLength); - - /// - /// Converts the next iteration of the input data. - /// - /// Buffer with data to process. - /// Buffer to write the output. - /// Amount of bytes read from the input. - /// Amount of bytes written in the output. - int Convert(ReadOnlySpan input, Span output, out int consumed); -} diff --git a/src/Ekona/Compression/LzssDecoder.cs b/src/Ekona/Compression/LzssDecoder.cs index f0d7b2d..f8b8e02 100644 --- a/src/Ekona/Compression/LzssDecoder.cs +++ b/src/Ekona/Compression/LzssDecoder.cs @@ -1,98 +1,140 @@ -namespace SceneGate.Ekona.Compression; +namespace SceneGate.Ekona.Compression; using System; using System.IO; +using System.Runtime.CompilerServices; +using Yarhl.FileFormat; +using Yarhl.IO; /// /// Decode / Decompress blocks of data with the LZSS DS/GBA algorithm. /// -public class LzssDecoder : IDataBlockConverter +public class LzssDecoder : + IConverter, + IConverter { private const int MinSequenceLength = 3; - private const int MaxDistance = 1 << 12; + private const int MaxDistance = (1 << 12) - 1; private readonly CircularBuffer pastBuffer = new(MaxDistance); + private readonly Stream output; + private readonly bool hasHeader; + private byte flag; private int remainingFlagBits; - /// - public int GetOutputMaxCount(int inputLength) + /// + /// Initializes a new instance of the class. + /// + public LzssDecoder() + { + output = new MemoryStream(); + hasHeader = true; + } + + /// + /// Initializes a new instance of the class. + /// + /// The maximum decompressed length of the output. + public LzssDecoder(int decompressedLength) { - // best compression rate achieved by copying a sequence of bytes (already in circular buffer) - // for each bit of the token (already read) with its maximum sequence length. - return inputLength * 8 * 18 / 17; + output = new MemoryStream(decompressedLength); + hasHeader = true; + } + + /// + /// Initializes a new instance of the class. + /// + /// The output stream to write the decompressed data. + /// Value indicating whether the input stream has a 4-bytes header. + public LzssDecoder(Stream output, bool hasHeader) + { + ArgumentNullException.ThrowIfNull(output); + this.output = output; + this.hasHeader = hasHeader; } /// - public int Convert(ReadOnlySpan input, Span output, out int consumed) + public BinaryFormat Convert(IBinary source) { - int produced = 0; - consumed = 0; + ArgumentNullException.ThrowIfNull(source); + return new BinaryFormat(Convert(source.Stream)); + } - bool continueProcessing = consumed < input.Length; - while (continueProcessing) { - bool enoughDataAvailable = IsFlagRawCopy(input, ref consumed) - ? DecodeRawMode(input, ref consumed, output, ref produced) - : DecodePastCopyMode(input, ref consumed, output, ref produced); + /// + public Stream Convert(Stream source) + { + ArgumentNullException.ThrowIfNull(source); + + source.Position = 0; + if (hasHeader) { + if (source.Length < 4) { + throw new EndOfStreamException(); + } + + // bit 0-3: ID (0x10), 4-31: uncompressed length + Span header = stackalloc byte[4]; + source.Read(header); + + uint id = header[0]; + if (id != 0x10) { + throw new FormatException("Invalid header"); + } + } - continueProcessing = enoughDataAvailable && (consumed < input.Length); + while (source.Position < source.Length) { + if (IsFlagRawCopy(source)) { + DecodeRawMode(source); + } else { + DecodePastCopyMode(source); + } } - return produced; + return output; } - private bool IsFlagRawCopy(ReadOnlySpan input, ref int consumed) + private bool IsFlagRawCopy(Stream input) { if (remainingFlagBits <= 0) { remainingFlagBits = 8; - flag = input[consumed++]; + flag = (byte)input.ReadByte(); } remainingFlagBits--; return ((flag >> remainingFlagBits) & 1) == 0; } - private bool DecodeRawMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + private void DecodeRawMode(Stream input) { - if (consumed >= input.Length) { - return false; - } - - if (produced >= output.Length) { - throw new EndOfStreamException("Output is not large enough to decompress data"); + if (input.Position >= input.Length) { + throw new EndOfStreamException(); } - WriteOutput(input[consumed++], output, ref produced); - return true; + WriteOutput((byte)input.ReadByte()); } - private bool DecodePastCopyMode(ReadOnlySpan input, ref int consumed, Span output, ref int produced) + private void DecodePastCopyMode(Stream input) { - if (consumed + 1 >= input.Length) { - return false; + if (input.Position + 1 >= input.Length) { + throw new EndOfStreamException(); } - byte info = input[consumed++]; - int bufferPos = ((info & 0x0F) << 8) | input[consumed++]; + byte info = (byte)input.ReadByte(); + int bufferPos = ((info & 0x0F) << 8) | (byte)input.ReadByte(); int length = (info >> 4) + MinSequenceLength; - if (produced + length > output.Length) { - throw new EndOfStreamException("Output is not large enough to decompress data"); - } - while (length > 0) { byte value = pastBuffer[bufferPos]; - WriteOutput(value, output, ref produced); + WriteOutput(value); length--; } - - return true; } - private void WriteOutput(byte value, Span output, ref int produced) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private void WriteOutput(byte value) { pastBuffer.PushFront(value); - output[produced++] = value; + output.WriteByte(value); } } diff --git a/src/Ekona/Compression/LzssFormatDecompressor.cs b/src/Ekona/Compression/LzssFormatDecompressor.cs deleted file mode 100644 index dfd5cff..0000000 --- a/src/Ekona/Compression/LzssFormatDecompressor.cs +++ /dev/null @@ -1,42 +0,0 @@ -namespace SceneGate.Ekona.Compression; - -using System; -using System.IO; -using Yarhl.FileFormat; -using Yarhl.IO; - -/// -/// Converter that decompress a binary format with LZSS format. -/// -public class LzssFormatDecompressor : - IConverter, - IConverter -{ - /// - public BinaryFormat Convert(IBinary source) - { - ArgumentNullException.ThrowIfNull(source); - - DataStream decompressed = Convert(source.Stream); - return new BinaryFormat(decompressed); - } - - /// - public DataStream Convert(Stream source) - { - ArgumentNullException.ThrowIfNull(source); - - // bit 0-3: ID (0x10), 4-31: uncompressed length - uint header = new DataReader(source).ReadUInt32(); - uint id = header & 0xFF; - if (id != 0x10) { - throw new FormatException("Invalid header"); - } - - var decompressed = new DataStream(); - var decoder = new LzssDecoder(); - decoder.Convert(source, decompressed); - - return decompressed; - } -} From 6b77a358a3afa76ef876e3223a07d254c173b902 Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Mon, 26 Feb 2024 21:06:26 +0100 Subject: [PATCH 6/9] =?UTF-8?q?=E2=9C=A8First=20attempt=20of=20LZSS=20enco?= =?UTF-8?q?der?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Ekona/Compression/LzssEncoder.cs | 135 +++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 src/Ekona/Compression/LzssEncoder.cs diff --git a/src/Ekona/Compression/LzssEncoder.cs b/src/Ekona/Compression/LzssEncoder.cs new file mode 100644 index 0000000..231a721 --- /dev/null +++ b/src/Ekona/Compression/LzssEncoder.cs @@ -0,0 +1,135 @@ +namespace Ekona.Compressions; + +using System; +using System.IO; +using Yarhl.FileFormat; +using Yarhl.IO; + +public class LzssEncoder : + IConverter, + IConverter +{ + private const int MinSequenceLength = 3; + private const int MaxSequenceLength = (1 << 4) + MinSequenceLength - 1; + private const int MaxDistance = (1 << 12) - 1; + + private readonly byte[] windowBuffer = new byte[MaxDistance + MaxSequenceLength]; + private readonly byte[] patternBuffer = new byte[MaxSequenceLength]; + private readonly Stream output; + + private int actionsEncoded; + private byte currentFlag; + private long flagPosition; + + public LzssEncoder() + { + output = new MemoryStream(); + } + + public LzssEncoder(Stream output) + { + ArgumentNullException.ThrowIfNull(output); + this.output = output; + } + + public BinaryFormat Convert(IBinary source) + { + ArgumentNullException.ThrowIfNull(source); + Stream result = Convert(source.Stream); + + return new BinaryFormat(result); + } + + public Stream Convert(Stream source) + { + while (source.Position < source.Length) { + if (actionsEncoded == 8) { + FlushFlag(); + } + + (int sequencePos, int sequenceLen) = FindSequence(source); + + if (sequenceLen >= MinSequenceLength) { + currentFlag |= (byte)(1 << (7 - actionsEncoded)); + + int encodedLength = sequenceLen - MinSequenceLength; + output.WriteByte((byte)((encodedLength << 4) | (sequencePos >> 8))); + output.WriteByte((byte)sequencePos); + + source.Position += sequenceLen; + } else { + // flag bit is 0, so no need to update it + output.WriteByte((byte)source.ReadByte()); + } + + actionsEncoded++; + } + + FlushFlag(); + return output; + } + + private void FlushFlag() + { + long currentPos = output.Position; + output.Position = flagPosition; + output.WriteByte(currentFlag); + + currentFlag = 0; + output.Position = currentPos; + flagPosition = currentPos; + } + + private (int pos, int length) FindSequence(Stream input) + { + long inputLen = input.Length; + + int maxPattern = (int)(input.Position + MaxSequenceLength > inputLen + ? inputLen - input.Position + : MaxSequenceLength); + if (maxPattern < MinSequenceLength) { + return (0, 0); + } + + long windowPos = input.Position > MaxDistance ? input.Position - MaxDistance : 0; + int windowLen = (int)(windowPos + MaxDistance > input.Position + ? input.Position - windowPos + : MaxDistance); + if (windowLen == 0) { + return (-1, -1); + } + + long inputPos = input.Position; + + Span window = windowBuffer.AsSpan(0, windowLen + (maxPattern - 1)); + input.Position = windowPos; + _ = input.Read(window); + + Span pattern = patternBuffer.AsSpan(0, maxPattern); + input.Position = inputPos; + _ = input.Read(pattern); + + input.Position = inputPos; + + int bestLength = -1; + int bestPos = -1; + for (int pos = windowLen - 1; pos >= 0; pos--) { + int length = 0; + for (; length < maxPattern; length++) { + if (pattern[length] != window[pos + length]) { + break; + } + } + + if (length > bestLength) { + bestLength = length; + bestPos = pos; + if (length == MaxSequenceLength) { + return (windowLen - bestPos, bestLength); + } + } + } + + return (windowLen - bestPos, bestLength); + } +} From 318b45263d09ee4fa111dc85fa4ddbe26295dc50 Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Wed, 28 Feb 2024 19:12:34 +0100 Subject: [PATCH 7/9] :sparkles: Improve LZSS encoder and write header --- .config/dotnet-tools.json | 17 ++-- .github/workflows/build-and-release.yml | 4 +- .../LzssEncoderTests.cs | 33 ++++++++ src/Ekona.PerformanceTests/Program.cs | 4 +- src/Ekona/Compression/LzssEncoder.cs | 79 +++++++++++++------ 5 files changed, 101 insertions(+), 36 deletions(-) create mode 100644 src/Ekona.PerformanceTests/LzssEncoderTests.cs diff --git a/.config/dotnet-tools.json b/.config/dotnet-tools.json index 282ad84..5c6fb8c 100644 --- a/.config/dotnet-tools.json +++ b/.config/dotnet-tools.json @@ -6,31 +6,36 @@ "version": "5.12.0", "commands": [ "dotnet-gitversion" - ] + ], + "rollForward": false }, "thirdlicense": { "version": "1.3.1", "commands": [ "thirdlicense" - ] + ], + "rollForward": false }, "dotnet-reportgenerator-globaltool": { "version": "5.2.0", "commands": [ "reportgenerator" - ] + ], + "rollForward": false }, "docfx": { - "version": "2.75.2", + "version": "2.77.0", "commands": [ "docfx" - ] + ], + "rollForward": false }, "gitreleasemanager.tool": { "version": "0.16.0", "commands": [ "dotnet-gitreleasemanager" - ] + ], + "rollForward": false } } } \ No newline at end of file diff --git a/.github/workflows/build-and-release.yml b/.github/workflows/build-and-release.yml index c578b62..4cea124 100644 --- a/.github/workflows/build-and-release.yml +++ b/.github/workflows/build-and-release.yml @@ -15,7 +15,7 @@ jobs: name: "Build" uses: ./.github/workflows/build.yml with: - dotnet_version: '8.0.204' + dotnet_version: '8.0.401' secrets: test_resources: ${{ secrets.TEST_RESOURCES_URI_V1 }} @@ -27,7 +27,7 @@ jobs: needs: build uses: ./.github/workflows/deploy.yml with: - dotnet_version: '8.0.204' + dotnet_version: '8.0.401' azure_nuget_feed: 'https://pkgs.dev.azure.com/SceneGate/SceneGate/_packaging/SceneGate-Preview/nuget/v3/index.json' secrets: nuget_preview_token: "az" # Dummy values as we use Azure DevOps onlyg diff --git a/src/Ekona.PerformanceTests/LzssEncoderTests.cs b/src/Ekona.PerformanceTests/LzssEncoderTests.cs new file mode 100644 index 0000000..d440a50 --- /dev/null +++ b/src/Ekona.PerformanceTests/LzssEncoderTests.cs @@ -0,0 +1,33 @@ +namespace SceneGate.Ekona.PerformanceTests; + +using BenchmarkDotNet.Attributes; +using SceneGate.Ekona.Compression; + +[MemoryDiagnoser] +public class LzssEncoderTests +{ + private Stream inputStream = null!; + private Stream outputStream = null!; + + [GlobalSetup] + public void SetUp() + { + var input = new byte[Length]; + Random.Shared.NextBytes(input); + inputStream = new MemoryStream(input); + + var output = new byte[Length * 2]; + outputStream = new MemoryStream(output); + } + + [Params(512, 10 * 1024, 3 * 1024 * 1024)] + public int Length { get; set; } + + [Benchmark] + public Stream Encode() + { + outputStream.Position = 0; + var encoder = new LzssEncoder(outputStream); + return encoder.Convert(inputStream); + } +} diff --git a/src/Ekona.PerformanceTests/Program.cs b/src/Ekona.PerformanceTests/Program.cs index 1146461..ccd7333 100644 --- a/src/Ekona.PerformanceTests/Program.cs +++ b/src/Ekona.PerformanceTests/Program.cs @@ -17,10 +17,10 @@ // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE // SOFTWARE. -using BenchmarkDotNet.Running; - namespace SceneGate.Ekona.PerformanceTests; +using BenchmarkDotNet.Running; + public static class Program { public static void Main(string[] args) => diff --git a/src/Ekona/Compression/LzssEncoder.cs b/src/Ekona/Compression/LzssEncoder.cs index 231a721..0059b42 100644 --- a/src/Ekona/Compression/LzssEncoder.cs +++ b/src/Ekona/Compression/LzssEncoder.cs @@ -1,37 +1,46 @@ -namespace Ekona.Compressions; +namespace SceneGate.Ekona.Compression; using System; using System.IO; using Yarhl.FileFormat; using Yarhl.IO; +/// +/// Encode / Compress blocks of data with the LZSS DS/GBA algorithm. +/// public class LzssEncoder : IConverter, IConverter { private const int MinSequenceLength = 3; - private const int MaxSequenceLength = (1 << 4) + MinSequenceLength - 1; - private const int MaxDistance = (1 << 12) - 1; + private const int MaxSequenceLength = (1 << 4) - 1 + MinSequenceLength; + private const int MaxDistance = 1 << 12; private readonly byte[] windowBuffer = new byte[MaxDistance + MaxSequenceLength]; - private readonly byte[] patternBuffer = new byte[MaxSequenceLength]; - private readonly Stream output; + private Stream output = null!; - private int actionsEncoded; private byte currentFlag; private long flagPosition; + /// + /// Initializes a new instance of the class. + /// + /// The output of the converter will be in a new memory stream each time. public LzssEncoder() { - output = new MemoryStream(); } + /// + /// Initializes a new instance of the class. + /// + /// Stream to write the output. public LzssEncoder(Stream output) { ArgumentNullException.ThrowIfNull(output); this.output = output; } + /// public BinaryFormat Convert(IBinary source) { ArgumentNullException.ThrowIfNull(source); @@ -40,11 +49,28 @@ public BinaryFormat Convert(IBinary source) return new BinaryFormat(result); } + /// public Stream Convert(Stream source) { + output ??= new MemoryStream(); + + long decompressedLength = source.Length; + output.WriteByte(0x10); // compression ID + output.WriteByte((byte)(decompressedLength & 0xFF)); + output.WriteByte((byte)(decompressedLength >> 8)); + output.WriteByte((byte)(decompressedLength >> 16)); + + // Prepare the initial token flag + currentFlag = 0; + flagPosition = output.Position; + output.WriteByte(0); + + int actionsEncoded = 0; + source.Position = 0; while (source.Position < source.Length) { if (actionsEncoded == 8) { - FlushFlag(); + FlushFlag(true); + actionsEncoded = 0; } (int sequencePos, int sequenceLen) = FindSequence(source); @@ -53,8 +79,9 @@ public Stream Convert(Stream source) currentFlag |= (byte)(1 << (7 - actionsEncoded)); int encodedLength = sequenceLen - MinSequenceLength; - output.WriteByte((byte)((encodedLength << 4) | (sequencePos >> 8))); - output.WriteByte((byte)sequencePos); + int encodedPos = sequencePos; + output.WriteByte((byte)((encodedLength << 4) | (encodedPos >> 8))); + output.WriteByte((byte)encodedPos); source.Position += sequenceLen; } else { @@ -65,19 +92,22 @@ public Stream Convert(Stream source) actionsEncoded++; } - FlushFlag(); + FlushFlag(false); return output; } - private void FlushFlag() + private void FlushFlag(bool hasMoreData) { long currentPos = output.Position; output.Position = flagPosition; output.WriteByte(currentFlag); - - currentFlag = 0; output.Position = currentPos; - flagPosition = currentPos; + + if (hasMoreData) { + currentFlag = 0; + flagPosition = currentPos; + output.WriteByte(0x00); + } } private (int pos, int length) FindSequence(Stream input) @@ -96,27 +126,24 @@ private void FlushFlag() ? input.Position - windowPos : MaxDistance); if (windowLen == 0) { - return (-1, -1); + return (0, 0); } - long inputPos = input.Position; + Span window = windowBuffer.AsSpan(0, windowLen + maxPattern); - Span window = windowBuffer.AsSpan(0, windowLen + (maxPattern - 1)); + long inputPos = input.Position; input.Position = windowPos; _ = input.Read(window); - - Span pattern = patternBuffer.AsSpan(0, maxPattern); input.Position = inputPos; - _ = input.Read(pattern); - input.Position = inputPos; + Span fullPattern = window[^maxPattern..]; int bestLength = -1; int bestPos = -1; - for (int pos = windowLen - 1; pos >= 0; pos--) { + for (int pos = 0; pos < windowLen - 1; pos++) { int length = 0; for (; length < maxPattern; length++) { - if (pattern[length] != window[pos + length]) { + if (fullPattern[length] != window[pos + length]) { break; } } @@ -125,11 +152,11 @@ private void FlushFlag() bestLength = length; bestPos = pos; if (length == MaxSequenceLength) { - return (windowLen - bestPos, bestLength); + return (windowLen - bestPos - 1, bestLength); } } } - return (windowLen - bestPos, bestLength); + return (windowLen - bestPos - 1, bestLength); } } From fd9841ab947c017c278a6d54b13fb248aeaad667 Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Thu, 29 Feb 2024 19:12:34 +0100 Subject: [PATCH 8/9] :umbrella: LZSS encoder tests and fixes --- .../Compression/LzssEncoderTests.cs | 317 ++++++++++++++++++ src/Ekona/Compression/LzssEncoder.cs | 39 ++- 2 files changed, 347 insertions(+), 9 deletions(-) create mode 100644 src/Ekona.Tests/Compression/LzssEncoderTests.cs diff --git a/src/Ekona.Tests/Compression/LzssEncoderTests.cs b/src/Ekona.Tests/Compression/LzssEncoderTests.cs new file mode 100644 index 0000000..c350d6d --- /dev/null +++ b/src/Ekona.Tests/Compression/LzssEncoderTests.cs @@ -0,0 +1,317 @@ +namespace SceneGate.Ekona.Tests.Compression; + +using System; +using System.IO; +using NUnit.Framework; +using SceneGate.Ekona.Compression; +using Yarhl.IO; + +[TestFixture] +public class LzssEncoderTests +{ + private const int MaxRaw = 8; + private const int MinPattern = 3; + private const int MaxPattern = (1 << 4) + 2; + private const int MaxDistance = 1 << 12; + + private LzssEncoder encoder; + private DataStream actualOutput; + + [SetUp] + public void SetUp() + { + actualOutput = new DataStream(); + encoder = new LzssEncoder(actualOutput, false); + } + + [TearDown] + public void TearDown() + { + actualOutput.Dispose(); + } + + [Test] + public void NullDecompressionThrowsException() + { + Assert.That(() => encoder.Convert(((BinaryFormat)null)!), Throws.ArgumentNullException); + Assert.That(() => encoder.Convert(((Stream)null)!), Throws.ArgumentNullException); + } + + [Test] + public void InvalidStreamInitializationThrows() + { + Assert.That(() => new LzssEncoder(null!), Throws.ArgumentNullException); + } + + [Test] + public void ConverterUseConstructorStream() + { + using var input = new DataStream(); + input.Write([0xCA, 0xFE]); + + var myEncoder = new LzssEncoder(actualOutput); + + Stream returned = myEncoder.Convert(input); + + Assert.That(actualOutput, Is.SameAs(returned)); + } + + [Test] + public void FirstTwoBytesEncodedAsRaw() + { + byte[] input = [ 0xCA, 0xFE ]; + byte[] expected = [0x00, 0xCA, 0xFE]; + + AssertCompression(input, expected); + } + + [Test] + public void WriteMaxRawBytes() + { + byte[] input = new byte[MaxRaw]; + for (int i = 0; i < MaxRaw; i++) { + input[i] = (byte)i; + } + + byte[] expected = [0x00, .. input]; + AssertCompression(input, expected); + } + + [Test] + public void WriteRawAfterMaxRawBytes() + { + var stream = new DataStream(); + for (int i = 0; i < MaxRaw + 1; i++) { + stream.WriteByte((byte)i); + } + + Stream compressed = encoder.Convert(stream); + + compressed.Position = 0; + Assert.That(compressed.Length, Is.EqualTo(MaxRaw + 3)); + Assert.That(compressed.ReadByte(), Is.EqualTo(0)); + for (int i = 0; i < MaxRaw; i++) { + Assert.That(compressed.ReadByte(), Is.EqualTo(i)); + } + + Assert.That(compressed.ReadByte(), Is.EqualTo(0)); + Assert.That(compressed.ReadByte(), Is.EqualTo(MaxRaw)); + } + + [Test] + public void IgnoreSequencesSmallerEqual3() + { + byte[] input = [ 0xFE, 0xFE, 0xFE, 0xCA, 0xFE ]; + byte[] expected = [0x00, 0xFE, 0xFE, 0xFE, 0xCA, 0xFE]; + + AssertCompression(input, expected); + } + + [Test] + public void FindSequenceOfLength3() + { + byte[] input = [ 0xCA, 0xFE, 0xC0, 0xFF, 0xCA, 0xFE, 0xC0 ]; + byte[] expected = [ 0b0000_1000, 0xCA, 0xFE, 0xC0, 0xFF, 0x00, 0x03 ]; + + AssertCompression(input, expected); + } + + [Test] + public void FindSequenceStartingAtPos1() + { + // This guarantees compatibility with VRAM (units of 16-bits) + byte[] input = [ 0xAA, 0xAA, 0xAA, 0xAA, 0xAA ]; + byte[] expected = [ 0b0010_0000, 0xAA, 0xAA, 0x00, 0x01 ]; + + AssertCompression(input, expected); + } + + [Test] + public void FindSequenceInPresent() + { + byte[] input = [ 0xCA, 0xFE, 0xC0, 0xCA, 0xFE, 0xC0 ]; + byte[] expected = [0b0001_0000, 0xCA, 0xFE, 0xC0, 0x00, 0x02]; + AssertCompression(input, expected); + } + + [Test] + public void FindPatternInFutureDecompressed() + { + byte[] input = [ 0xBB, 0xBB, 0xBB, 0xBB, 0xBB ]; + byte[] expected = [ 0b0010_0000, 0xBB, 0xBB, 0x00, 0x01 ]; + + AssertCompression(input, expected); + } + + [Test] + public void FindSelfRepeatingSequence() + { + byte[] input = [ 0xCA, 0xFE, 0xCA, 0xFE, 0xCA, 0xFE ]; + byte[] expected = [0b0010_0000, 0xCA, 0xFE, 0x10, 0x01]; + AssertCompression(input, expected); + } + + [Test] + public void FindLargerPatternWhenMultipleMatch() + { + byte[] input = [ 0xAA, 0xBB, 0xCC, 0xAA, 0xBB, 0xFF, 0xAA, 0xBB, 0xCC ]; + byte[] expected = [ 0b0000_0010, 0xAA, 0xBB, 0xCC, 0xAA, 0xBB, 0xFF, 0x00, 0x05 ]; + + AssertCompression(input, expected); + } + + [Test] + public void FindPatternsStartingEndOfBuffer() + { + byte[] input = [ 0xAA, 0xBB, 0xCC, 0xAA, 0xBB, 0xCC, 0xFF, 0xAA, 0xBB, 0xCC ]; + byte[] expected = [ 0b0001_0100, 0xAA, 0xBB, 0xCC, 0x00, 0x02, 0xFF, 0x00, 0x06 ]; + + AssertCompression(input, expected); + } + + [Test] + public void FindPatternWithMaxLength() + { + byte[] input = new byte[MaxPattern + 2]; + for (int i = 0; i < input.Length; i++) { + input[i] = 0xC0; + } + + byte[] expected = [0b0010_0000, 0xC0, 0xC0, 0xF0, 0x01]; + + AssertCompression(input, expected); + } + + [Test] + public void FindAnotherPatternAfterMaxLengthPattern() + { + byte[] input = new byte[2 + MaxPattern + 3]; + for (int i = 0; i < input.Length; i++) { + input[i] = 0xC0; + } + + // It finds starting the end of the buffer, not the last position + byte[] expected = [0b0011_0000, 0xC0, 0xC0, 0xF0, 0x01, 0x00, 0x13]; + + AssertCompression(input, expected); + } + + [Test] + public void FindShorterPatternAsRawCopyAfterMaxLengthPattern() + { + byte[] input = new byte[2 + MaxPattern + 2]; + for (int i = 0; i < input.Length; i++) { + input[i] = 0xC0; + } + + byte[] expected = [0b0010_0000, 0xC0, 0xC0, 0xF0, 0x01, 0xC0, 0xC0]; + + AssertCompression(input, expected); + } + + [Test] + public void DoNotFindPatternsOutsideMaxDistance() + { + byte[] input = new byte[MinPattern + MaxDistance + MinPattern]; + Array.Fill(input, 0xAA, 0, MinPattern); + Array.Fill(input, 0xBB, MinPattern, MaxDistance); + Array.Fill(input, 0xAA, MinPattern + MaxDistance, MinPattern); + + byte[] expected = [ + 0x07, 0xAA, 0xAA, 0xAA, 0xBB, 0xBB, 0xF0, 0x01, 0xF0, 0x13, 0xF0, 0x25, + 0xFF, 0xF0, 0x37, 0xF0, 0x49, 0xF0, 0x5B, 0xF0, 0x6D, 0xF0, 0x7F, 0xF0, 0x91, 0xF0, 0xA3, 0xF0, 0xB5, + 0xFF, 0xF0, 0xC7, 0xF0, 0xD9, 0xF0, 0xEB, 0xF0, 0xFD, 0xF1, 0x0F, 0xF1, 0x21, 0xF1, 0x33, 0xF1, 0x45, + 0xFF, 0xF1, 0x57, 0xF1, 0x69, 0xF1, 0x7B, 0xF1, 0x8D, 0xF1, 0x9F, 0xF1, 0xB1, 0xF1, 0xC3, 0xF1, 0xD5, + 0xFF, 0xF1, 0xE7, 0xF1, 0xF9, 0xF2, 0x0B, 0xF2, 0x1D, 0xF2, 0x2F, 0xF2, 0x41, 0xF2, 0x53, 0xF2, 0x65, + 0xFF, 0xF2, 0x77, 0xF2, 0x89, 0xF2, 0x9B, 0xF2, 0xAD, 0xF2, 0xBF, 0xF2, 0xD1, 0xF2, 0xE3, 0xF2, 0xF5, + 0xFF, 0xF3, 0x07, 0xF3, 0x19, 0xF3, 0x2B, 0xF3, 0x3D, 0xF3, 0x4F, 0xF3, 0x61, 0xF3, 0x73, 0xF3, 0x85, + 0xFF, 0xF3, 0x97, 0xF3, 0xA9, 0xF3, 0xBB, 0xF3, 0xCD, 0xF3, 0xDF, 0xF3, 0xF1, 0xF4, 0x03, 0xF4, 0x15, + 0xFF, 0xF4, 0x27, 0xF4, 0x39, 0xF4, 0x4B, 0xF4, 0x5D, 0xF4, 0x6F, 0xF4, 0x81, 0xF4, 0x93, 0xF4, 0xA5, + 0xFF, 0xF4, 0xB7, 0xF4, 0xC9, 0xF4, 0xDB, 0xF4, 0xED, 0xF4, 0xFF, 0xF5, 0x11, 0xF5, 0x23, 0xF5, 0x35, + 0xFF, 0xF5, 0x47, 0xF5, 0x59, 0xF5, 0x6B, 0xF5, 0x7D, 0xF5, 0x8F, 0xF5, 0xA1, 0xF5, 0xB3, 0xF5, 0xC5, + 0xFF, 0xF5, 0xD7, 0xF5, 0xE9, 0xF5, 0xFB, 0xF6, 0x0D, 0xF6, 0x1F, 0xF6, 0x31, 0xF6, 0x43, 0xF6, 0x55, + 0xFF, 0xF6, 0x67, 0xF6, 0x79, 0xF6, 0x8B, 0xF6, 0x9D, 0xF6, 0xAF, 0xF6, 0xC1, 0xF6, 0xD3, 0xF6, 0xE5, + 0xFF, 0xF6, 0xF7, 0xF7, 0x09, 0xF7, 0x1B, 0xF7, 0x2D, 0xF7, 0x3F, 0xF7, 0x51, 0xF7, 0x63, 0xF7, 0x75, + 0xFF, 0xF7, 0x87, 0xF7, 0x99, 0xF7, 0xAB, 0xF7, 0xBD, 0xF7, 0xCF, 0xF7, 0xE1, 0xF7, 0xF3, 0xF8, 0x05, + 0xFF, 0xF8, 0x17, 0xF8, 0x29, 0xF8, 0x3B, 0xF8, 0x4D, 0xF8, 0x5F, 0xF8, 0x71, 0xF8, 0x83, 0xF8, 0x95, + 0xFF, 0xF8, 0xA7, 0xF8, 0xB9, 0xF8, 0xCB, 0xF8, 0xDD, 0xF8, 0xEF, 0xF9, 0x01, 0xF9, 0x13, 0xF9, 0x25, + 0xFF, 0xF9, 0x37, 0xF9, 0x49, 0xF9, 0x5B, 0xF9, 0x6D, 0xF9, 0x7F, 0xF9, 0x91, 0xF9, 0xA3, 0xF9, 0xB5, + 0xFF, 0xF9, 0xC7, 0xF9, 0xD9, 0xF9, 0xEB, 0xF9, 0xFD, 0xFA, 0x0F, 0xFA, 0x21, 0xFA, 0x33, 0xFA, 0x45, + 0xFF, 0xFA, 0x57, 0xFA, 0x69, 0xFA, 0x7B, 0xFA, 0x8D, 0xFA, 0x9F, 0xFA, 0xB1, 0xFA, 0xC3, 0xFA, 0xD5, + 0xFF, 0xFA, 0xE7, 0xFA, 0xF9, 0xFB, 0x0B, 0xFB, 0x1D, 0xFB, 0x2F, 0xFB, 0x41, 0xFB, 0x53, 0xFB, 0x65, + 0xFF, 0xFB, 0x77, 0xFB, 0x89, 0xFB, 0x9B, 0xFB, 0xAD, 0xFB, 0xBF, 0xFB, 0xD1, 0xFB, 0xE3, 0xFB, 0xF5, + 0xFF, 0xFC, 0x07, 0xFC, 0x19, 0xFC, 0x2B, 0xFC, 0x3D, 0xFC, 0x4F, 0xFC, 0x61, 0xFC, 0x73, 0xFC, 0x85, + 0xFF, 0xFC, 0x97, 0xFC, 0xA9, 0xFC, 0xBB, 0xFC, 0xCD, 0xFC, 0xDF, 0xFC, 0xF1, 0xFD, 0x03, 0xFD, 0x15, + 0xFF, 0xFD, 0x27, 0xFD, 0x39, 0xFD, 0x4B, 0xFD, 0x5D, 0xFD, 0x6F, 0xFD, 0x81, 0xFD, 0x93, 0xFD, 0xA5, + 0xFF, 0xFD, 0xB7, 0xFD, 0xC9, 0xFD, 0xDB, 0xFD, 0xED, 0xFD, 0xFF, 0xFE, 0x11, 0xFE, 0x23, 0xFE, 0x35, + 0xFF, 0xFE, 0x47, 0xFE, 0x59, 0xFE, 0x6B, 0xFE, 0x7D, 0xFE, 0x8F, 0xFE, 0xA1, 0xFE, 0xB3, 0xFE, 0xC5, + 0xFF, 0xFE, 0xD7, 0xFE, 0xE9, 0xFE, 0xFB, 0xFF, 0x0D, 0xFF, 0x1F, 0xFF, 0x31, 0xFF, 0x43, 0xFF, 0x55, + 0xFF, 0xFF, 0x67, 0xFF, 0x79, 0xFF, 0x8B, 0xFF, 0x9D, 0xFF, 0xAF, 0xFF, 0xC1, 0xFF, 0xD3, 0xFF, 0xE5, + 0x80, 0x5F, 0xF7, 0xAA, 0xAA, 0xAA, + ]; + AssertCompression(input, expected); + } + + [Test] + public void FindPatternAtMaxDistance() + { + byte[] input = new byte[MaxDistance + MinPattern]; + Array.Fill(input, 0xAA, 0, MinPattern); + Array.Fill(input, 0xBB, MinPattern, MaxDistance - MinPattern); + Array.Fill(input, 0xAA, MaxDistance, MinPattern); + + byte[] expected = [ + 0x07, 0xAA, 0xAA, 0xAA, 0xBB, 0xBB, 0xF0, 0x01, 0xF0, 0x13, 0xF0, 0x25, + 0xFF, 0xF0, 0x37, 0xF0, 0x49, 0xF0, 0x5B, 0xF0, 0x6D, 0xF0, 0x7F, 0xF0, 0x91, 0xF0, 0xA3, 0xF0, 0xB5, + 0xFF, 0xF0, 0xC7, 0xF0, 0xD9, 0xF0, 0xEB, 0xF0, 0xFD, 0xF1, 0x0F, 0xF1, 0x21, 0xF1, 0x33, 0xF1, 0x45, + 0xFF, 0xF1, 0x57, 0xF1, 0x69, 0xF1, 0x7B, 0xF1, 0x8D, 0xF1, 0x9F, 0xF1, 0xB1, 0xF1, 0xC3, 0xF1, 0xD5, + 0xFF, 0xF1, 0xE7, 0xF1, 0xF9, 0xF2, 0x0B, 0xF2, 0x1D, 0xF2, 0x2F, 0xF2, 0x41, 0xF2, 0x53, 0xF2, 0x65, + 0xFF, 0xF2, 0x77, 0xF2, 0x89, 0xF2, 0x9B, 0xF2, 0xAD, 0xF2, 0xBF, 0xF2, 0xD1, 0xF2, 0xE3, 0xF2, 0xF5, + 0xFF, 0xF3, 0x07, 0xF3, 0x19, 0xF3, 0x2B, 0xF3, 0x3D, 0xF3, 0x4F, 0xF3, 0x61, 0xF3, 0x73, 0xF3, 0x85, + 0xFF, 0xF3, 0x97, 0xF3, 0xA9, 0xF3, 0xBB, 0xF3, 0xCD, 0xF3, 0xDF, 0xF3, 0xF1, 0xF4, 0x03, 0xF4, 0x15, + 0xFF, 0xF4, 0x27, 0xF4, 0x39, 0xF4, 0x4B, 0xF4, 0x5D, 0xF4, 0x6F, 0xF4, 0x81, 0xF4, 0x93, 0xF4, 0xA5, + 0xFF, 0xF4, 0xB7, 0xF4, 0xC9, 0xF4, 0xDB, 0xF4, 0xED, 0xF4, 0xFF, 0xF5, 0x11, 0xF5, 0x23, 0xF5, 0x35, + 0xFF, 0xF5, 0x47, 0xF5, 0x59, 0xF5, 0x6B, 0xF5, 0x7D, 0xF5, 0x8F, 0xF5, 0xA1, 0xF5, 0xB3, 0xF5, 0xC5, + 0xFF, 0xF5, 0xD7, 0xF5, 0xE9, 0xF5, 0xFB, 0xF6, 0x0D, 0xF6, 0x1F, 0xF6, 0x31, 0xF6, 0x43, 0xF6, 0x55, + 0xFF, 0xF6, 0x67, 0xF6, 0x79, 0xF6, 0x8B, 0xF6, 0x9D, 0xF6, 0xAF, 0xF6, 0xC1, 0xF6, 0xD3, 0xF6, 0xE5, + 0xFF, 0xF6, 0xF7, 0xF7, 0x09, 0xF7, 0x1B, 0xF7, 0x2D, 0xF7, 0x3F, 0xF7, 0x51, 0xF7, 0x63, 0xF7, 0x75, + 0xFF, 0xF7, 0x87, 0xF7, 0x99, 0xF7, 0xAB, 0xF7, 0xBD, 0xF7, 0xCF, 0xF7, 0xE1, 0xF7, 0xF3, 0xF8, 0x05, + 0xFF, 0xF8, 0x17, 0xF8, 0x29, 0xF8, 0x3B, 0xF8, 0x4D, 0xF8, 0x5F, 0xF8, 0x71, 0xF8, 0x83, 0xF8, 0x95, + 0xFF, 0xF8, 0xA7, 0xF8, 0xB9, 0xF8, 0xCB, 0xF8, 0xDD, 0xF8, 0xEF, 0xF9, 0x01, 0xF9, 0x13, 0xF9, 0x25, + 0xFF, 0xF9, 0x37, 0xF9, 0x49, 0xF9, 0x5B, 0xF9, 0x6D, 0xF9, 0x7F, 0xF9, 0x91, 0xF9, 0xA3, 0xF9, 0xB5, + 0xFF, 0xF9, 0xC7, 0xF9, 0xD9, 0xF9, 0xEB, 0xF9, 0xFD, 0xFA, 0x0F, 0xFA, 0x21, 0xFA, 0x33, 0xFA, 0x45, + 0xFF, 0xFA, 0x57, 0xFA, 0x69, 0xFA, 0x7B, 0xFA, 0x8D, 0xFA, 0x9F, 0xFA, 0xB1, 0xFA, 0xC3, 0xFA, 0xD5, + 0xFF, 0xFA, 0xE7, 0xFA, 0xF9, 0xFB, 0x0B, 0xFB, 0x1D, 0xFB, 0x2F, 0xFB, 0x41, 0xFB, 0x53, 0xFB, 0x65, + 0xFF, 0xFB, 0x77, 0xFB, 0x89, 0xFB, 0x9B, 0xFB, 0xAD, 0xFB, 0xBF, 0xFB, 0xD1, 0xFB, 0xE3, 0xFB, 0xF5, + 0xFF, 0xFC, 0x07, 0xFC, 0x19, 0xFC, 0x2B, 0xFC, 0x3D, 0xFC, 0x4F, 0xFC, 0x61, 0xFC, 0x73, 0xFC, 0x85, + 0xFF, 0xFC, 0x97, 0xFC, 0xA9, 0xFC, 0xBB, 0xFC, 0xCD, 0xFC, 0xDF, 0xFC, 0xF1, 0xFD, 0x03, 0xFD, 0x15, + 0xFF, 0xFD, 0x27, 0xFD, 0x39, 0xFD, 0x4B, 0xFD, 0x5D, 0xFD, 0x6F, 0xFD, 0x81, 0xFD, 0x93, 0xFD, 0xA5, + 0xFF, 0xFD, 0xB7, 0xFD, 0xC9, 0xFD, 0xDB, 0xFD, 0xED, 0xFD, 0xFF, 0xFE, 0x11, 0xFE, 0x23, 0xFE, 0x35, + 0xFF, 0xFE, 0x47, 0xFE, 0x59, 0xFE, 0x6B, 0xFE, 0x7D, 0xFE, 0x8F, 0xFE, 0xA1, 0xFE, 0xB3, 0xFE, 0xC5, + 0xFF, 0xFE, 0xD7, 0xFE, 0xE9, 0xFE, 0xFB, 0xFF, 0x0D, 0xFF, 0x1F, 0xFF, 0x31, 0xFF, 0x43, 0xFF, 0x55, + 0xFF, 0xFF, 0x67, 0xFF, 0x79, 0xFF, 0x8B, 0xFF, 0x9D, 0xFF, 0xAF, 0xFF, 0xC1, 0xFF, 0xD3, 0xFF, 0xE5, + 0xC0, 0x2F, 0xF7, 0x0F, 0xFF, + ]; + AssertCompression(input, expected); + } + + private void AssertCompression(byte[] input, byte[] expected) + { + using DataStream inputStream = DataStreamFactory.FromArray(input); + + Stream actual = encoder.Convert(inputStream); + + Assert.Multiple(() => { + Assert.That(actual.Length, Is.EqualTo(expected.Length), "Stream lengths does not match"); + + long checkLength = Math.Min(actual.Length, expected.Length); + actual.Position = 0; + for (int i = 0; i < checkLength; i++) { + Assert.That( + actual.ReadByte().ToString("X2"), + Is.EqualTo(expected[i].ToString("X2")), + $"Position {i} does not match"); + } + }); + } +} diff --git a/src/Ekona/Compression/LzssEncoder.cs b/src/Ekona/Compression/LzssEncoder.cs index 0059b42..386fac3 100644 --- a/src/Ekona/Compression/LzssEncoder.cs +++ b/src/Ekona/Compression/LzssEncoder.cs @@ -18,6 +18,7 @@ public class LzssEncoder : private readonly byte[] windowBuffer = new byte[MaxDistance + MaxSequenceLength]; private Stream output = null!; + private readonly bool hasHeader; private byte currentFlag; private long flagPosition; @@ -28,6 +29,7 @@ public class LzssEncoder : /// The output of the converter will be in a new memory stream each time. public LzssEncoder() { + hasHeader = true; } /// @@ -38,6 +40,18 @@ public LzssEncoder(Stream output) { ArgumentNullException.ThrowIfNull(output); this.output = output; + hasHeader = true; + } + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write the output. + /// Value indicating whether the output stream will include the compression header. + public LzssEncoder(Stream output, bool hasHeader) + : this(output) + { + this.hasHeader = hasHeader; } /// @@ -52,13 +66,18 @@ public BinaryFormat Convert(IBinary source) /// public Stream Convert(Stream source) { + ArgumentNullException.ThrowIfNull(source); + source.Position = 0; + output ??= new MemoryStream(); - long decompressedLength = source.Length; - output.WriteByte(0x10); // compression ID - output.WriteByte((byte)(decompressedLength & 0xFF)); - output.WriteByte((byte)(decompressedLength >> 8)); - output.WriteByte((byte)(decompressedLength >> 16)); + if (hasHeader) { + long decompressedLength = source.Length; + output.WriteByte(0x10); // compression ID + output.WriteByte((byte)(decompressedLength & 0xFF)); + output.WriteByte((byte)(decompressedLength >> 8)); + output.WriteByte((byte)(decompressedLength >> 16)); + } // Prepare the initial token flag currentFlag = 0; @@ -66,7 +85,6 @@ public Stream Convert(Stream source) output.WriteByte(0); int actionsEncoded = 0; - source.Position = 0; while (source.Position < source.Length) { if (actionsEncoded == 8) { FlushFlag(true); @@ -125,12 +143,13 @@ private void FlushFlag(bool hasMoreData) int windowLen = (int)(windowPos + MaxDistance > input.Position ? input.Position - windowPos : MaxDistance); - if (windowLen == 0) { + + // To be VRAM-compatible we need a window of minimum two bytes + if (windowLen <= 1) { return (0, 0); } Span window = windowBuffer.AsSpan(0, windowLen + maxPattern); - long inputPos = input.Position; input.Position = windowPos; _ = input.Read(window); @@ -138,6 +157,8 @@ private void FlushFlag(bool hasMoreData) Span fullPattern = window[^maxPattern..]; + // To be VRAM compatible we don't start sequences from the last byte + // We start searching from the bottom of the buffer not the last byte int bestLength = -1; int bestPos = -1; for (int pos = 0; pos < windowLen - 1; pos++) { @@ -151,7 +172,7 @@ private void FlushFlag(bool hasMoreData) if (length > bestLength) { bestLength = length; bestPos = pos; - if (length == MaxSequenceLength) { + if (length == maxPattern) { return (windowLen - bestPos - 1, bestLength); } } From c05c8f1567b8c7124e29df7e2b88d6bb4b77b38b Mon Sep 17 00:00:00 2001 From: Benito Palacios Sanchez Date: Thu, 29 Feb 2024 19:22:34 +0100 Subject: [PATCH 9/9] :sparkles: Implement RLE decoder --- src/Ekona/Compression/RleDecoder.cs | 101 ++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 src/Ekona/Compression/RleDecoder.cs diff --git a/src/Ekona/Compression/RleDecoder.cs b/src/Ekona/Compression/RleDecoder.cs new file mode 100644 index 0000000..1d1b3da --- /dev/null +++ b/src/Ekona/Compression/RleDecoder.cs @@ -0,0 +1,101 @@ +namespace SceneGate.Ekona.Compression; + +using System; +using System.IO; +using Yarhl.FileFormat; +using Yarhl.IO; + +/// +/// Decode / Decompress blocks of data with the RLE DS/GBA algorithm. +/// +public class RleDecoder : + IConverter, + IConverter +{ + private const int MinSequence = 2; + + private readonly Stream output; + private readonly bool hasHeader; + + /// + /// Initializes a new instance of the class. + /// + public RleDecoder() + { + output = new MemoryStream(); + hasHeader = true; + } + + /// + /// Initializes a new instance of the class. + /// + /// The maximum decompressed length of the output. + public RleDecoder(int decompressedLength) + { + output = new MemoryStream(decompressedLength); + hasHeader = true; + } + + /// + /// Initializes a new instance of the class. + /// + /// The output stream to write the decompressed data. + /// Value indicating whether the input stream has a 4-bytes header. + public RleDecoder(Stream output, bool hasHeader) + { + ArgumentNullException.ThrowIfNull(output); + this.output = output; + this.hasHeader = hasHeader; + } + + /// + public BinaryFormat Convert(IBinary source) + { + ArgumentNullException.ThrowIfNull(source); + return new BinaryFormat(Convert(source.Stream)); + } + + /// + public Stream Convert(Stream source) + { + ArgumentNullException.ThrowIfNull(source); + + source.Position = 0; + if (hasHeader) { + if (source.Length < 4) { + throw new EndOfStreamException(); + } + + // bit 0-3: ID (0x10), 4-31: uncompressed length + Span header = stackalloc byte[4]; + if (source.Read(header) != 4) { + throw new FormatException("Insufficient bytes"); + } + + uint id = header[0]; + if (id != 0x30) { + throw new FormatException("Invalid header"); + } + } + + while (source.Position < source.Length) { + int seqInfo = source.ReadByte(); + bool isCompressed = (seqInfo >> 7) == 1; + int length = (seqInfo & 0x7F) + 1; + + if (isCompressed) { + length += MinSequence; + byte value = (byte)source.ReadByte(); + for (int i = 0; i < length; i++) { + output.WriteByte(value); + } + } else { + for (int i = 0; i < length; i++) { + output.WriteByte((byte)source.ReadByte()); + } + } + } + + return output; + } +}