-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathkencode.c
More file actions
332 lines (286 loc) · 10.9 KB
/
Copy pathkencode.c
File metadata and controls
332 lines (286 loc) · 10.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2026 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
// KenCode decompressor
// Reverse-engineered from Apple DiskCopy 6.3.3 resource fork hdi2 ID 128
// Algorithm: LZSS with MSB-first bitstream and variable-length prefix codes
//
// Function map from PEF disassembly:
// 0x0000 plugin_entry Component Manager dispatcher (selectors 0-3)
// 0x00CC read_match_info Reads one token (match or literal run) from bitstream
// 0x01AC decompress Main decompression loop
// 0x03E4 bs_init Initialize bitstream reader
// 0x03F4 read_unary Count consecutive 1-bits (MSB-first)
// 0x046C read_bits Read N bits from bitstream
// 0x0510 read_literals Read N literal bytes from bitstream
// 0x05C0 decode_lit_count Decode literal run length
// 0x0750 decode_match_len Decode match length with prefix code
// 0x0A84 decode_offset Decode match offset (multi-level)
// 0x0C2C offset_dispatch Select distance class based on output position
#include <stddef.h>
#include <stdint.h>
#include <string.h>
#include "library.h"
#include "kencode.h"
// ============================================================================
// MSB-first bitstream reader
// ============================================================================
typedef struct
{
const uint8_t *data;
uint32_t bit_pos;
uint32_t bit_end;
} kc_bs;
static inline void bs_init(kc_bs *bs, const uint8_t *data, size_t size)
{
bs->data = data;
bs->bit_pos = 0;
bs->bit_end = (uint32_t)(size * 8);
}
// PEF 0x046C: Read N bits MSB-first.
// The PEF reads a 32-bit big-endian word at (bit_pos / 8), shifts right
// by (32 - bit_within_byte - n), and masks to n bits.
static inline uint32_t bs_read(kc_bs *bs, int n)
{
if(n <= 0 || bs->bit_pos + (uint32_t)n > bs->bit_end) return 0;
uint32_t byte_off = bs->bit_pos >> 3;
int bit_off = bs->bit_pos & 7;
int shift = 32 - bit_off - n;
// Read a 32-bit big-endian word starting at byte_off
// Need to handle reads near end of buffer
uint32_t word = 0;
size_t max_byte = (bs->bit_end + 7) / 8;
for(int i = 0; i < 4; i++)
{
word <<= 8;
if(byte_off + i < max_byte) word |= bs->data[byte_off + i];
}
uint32_t mask = ((uint32_t)1 << n) - 1;
bs->bit_pos += n;
if(shift >= 0) return (word >> shift) & mask;
// shift < 0: need bits from next word (rare: n > 24 or misaligned)
word <<= (-shift);
if(byte_off + 4 < max_byte) word |= (bs->data[byte_off + 4] >> (8 + shift));
return word & mask;
}
// PEF 0x03F4: Count consecutive 1-bits, stopping at first 0-bit or max_count.
static inline uint32_t bs_read_unary(kc_bs *bs, int max_count)
{
uint32_t count = 0;
while((int)count < max_count && bs->bit_pos < bs->bit_end)
{
uint32_t byte_off = bs->bit_pos >> 3;
int bit_off = bs->bit_pos & 7;
bs->bit_pos++;
if(!(bs->data[byte_off] & (0x80 >> bit_off))) return count; // hit 0
count++;
}
return count;
}
// PEF 0x0510: Read N literal bytes from bitstream. Each byte is 8 bits
// at the current (possibly unaligned) bit position.
static void bs_read_bytes(kc_bs *bs, uint8_t *dst, int count)
{
uint32_t byte_off = bs->bit_pos >> 3;
int bit_off = bs->bit_pos & 7;
int rshift = 8 - bit_off;
for(int i = 0; i < count; i++)
{
uint16_t w = ((uint16_t)bs->data[byte_off] << 8) | bs->data[byte_off + 1];
dst[i] = (uint8_t)(w >> rshift);
byte_off++;
}
bs->bit_pos += count * 8;
}
// ============================================================================
// VLC decoders
// ============================================================================
// PEF 0x05C0: Decode literal run length (1..63)
static int decode_lit_count(kc_bs *bs)
{
// "0" → 1
if(bs_read(bs, 1) == 0) return 1;
// "1" + 2 bits
uint32_t v = bs_read(bs, 2);
if(v == 0) return 2;
if(v == 1) return 3;
if(v == 2) return (int)bs_read(bs, 2) + 4; // 4-7
// v==3: 4 more bits
uint32_t e = bs_read(bs, 4);
if(e <= 7) return (int)e + 8; // 8-15
if(e <= 11) return (int)(e * 4 + bs_read(bs, 2)) - 16; // 16-31
return (int)(e * 8 + bs_read(bs, 3)) - 64; // 32-63
}
// PEF 0x0750: Decode match length raw value (0..2042+)
// Value 0 with had_literals=1 → literal run marker
// Otherwise: actual match length = raw + 2 (or +3 if previous was literals)
static int decode_match_len(kc_bs *bs)
{
uint32_t pfx = bs_read_unary(bs, 10);
switch(pfx)
{
case 0:
return (int)bs_read(bs, 1); // 0-1
case 1:
{
uint32_t b = bs_read(bs, 1);
if(b == 0) return 2;
return (int)bs_read(bs, 1) + 3; // 3-4
}
case 2:
{
uint32_t b = bs_read(bs, 1);
if(b == 0) return (int)bs_read(bs, 1) + 5; // 5-6
return (int)bs_read(bs, 2) + 7; // 7-10
}
case 3:
return (int)bs_read(bs, 3) + 11; // 11-18
case 4:
return (int)bs_read(bs, 3) + 19; // 19-26
case 5:
return (int)bs_read(bs, 5) + 27; // 27-58
case 6:
return (int)bs_read(bs, 6) + 59; // 59-122
case 7:
return (int)bs_read(bs, 7) + 123; // 123-250
case 8:
return (int)bs_read(bs, 8) + 251; // 251-506
case 9:
return (int)bs_read(bs, 9) + 507; // 507-1018
default:
return (int)bs_read(bs, 10) + 1019; // 1019+
}
}
// PEF 0x0A84: Decode match offset given distance class (dist_bits)
static int decode_offset(kc_bs *bs, int dist_bits, int hash_size)
{
int pw = 1 << dist_bits;
// Prefix "0" + dist_bits → short offset
if(bs_read(bs, 1) == 0) return (int)bs_read(bs, dist_bits) + 1;
// Prefix "10" + (dist_bits+2) bits → medium offset
if(bs_read(bs, 1) == 0) return pw + 1 + (int)bs_read(bs, dist_bits + 2);
// Prefix "11" → long offset, escalating levels
int pw5 = pw * 5;
if(hash_size <= pw5 + 2) return pw5 + 1 + (int)bs_read(bs, 1);
if(hash_size <= pw5 + 4) return pw5 + 1 + (int)bs_read(bs, 2);
// Escalating loop
int max_level = dist_bits + 4;
int scale = 4;
int level = 3;
int threshold = pw5 + 4;
if(max_level >= 3)
{
while(level <= max_level)
{
threshold += scale;
// 68K helper at 0x0ABA: clamp ONLY the exact value 0x680 to 0x66C
int clamped = (threshold == 0x680) ? 0x66C : threshold;
if(hash_size <= clamped || level == max_level) break;
scale <<= 1;
level++;
}
}
return pw5 + 1 + (int)bs_read(bs, level);
}
// PEF 0x0C2C / 68K 0x0D48: Select distance class based on output position and call decode_offset
// out_pos determines the distance class, hash_size (from state init) caps it for large positions
static int offset_dispatch(kc_bs *bs, int out_pos, int hash_size)
{
int dist_bits;
if(out_pos <= 0xa)
dist_bits = 0;
else if(out_pos <= 0x14)
dist_bits = 1;
else if(out_pos <= 0x28)
dist_bits = 2;
else if(out_pos <= 0x50)
dist_bits = 3;
else if(out_pos <= 0xa0)
dist_bits = 4;
else if(out_pos <= 0x2a0)
dist_bits = 5;
else if(out_pos <= 0x3e8)
dist_bits = 6;
else if(out_pos <= 0xa80 || hash_size <= 0x800)
dist_bits = 7;
else if(out_pos <= 0x1500 || hash_size <= 0x1000)
dist_bits = 8;
else if(out_pos <= 0x2a00 || hash_size <= 0x2000)
dist_bits = 9;
else if(out_pos <= 0x5400 || hash_size <= 0x4000)
dist_bits = 10;
else if(out_pos <= 0xa800 || hash_size <= 0x8000)
dist_bits = 11;
else if(out_pos <= 0x11170 || hash_size <= 0x10000)
dist_bits = 12;
else if(out_pos <= 0x2a000 || hash_size <= 0x20000)
dist_bits = 13;
else
dist_bits = 14;
// 68K passes (out_pos, dist_bits, bitstream) to decode_offset
// but decode_offset internally uses out_pos for hash_size comparisons too
return decode_offset(bs, dist_bits, out_pos);
}
// ============================================================================
// Main decompressor — PEF 0x01AC
// ============================================================================
AARU_EXPORT int32_t AARU_CALL AARU_kencode_decode_buffer(uint8_t *dst_buffer, size_t *dst_size,
const uint8_t *src_buffer, size_t src_size)
{
if(!dst_buffer || !dst_size || !src_buffer || src_size == 0) return -1;
size_t max_out = *dst_size;
kc_bs bs;
bs_init(&bs, src_buffer, src_size);
size_t out_pos = 0;
uint8_t had_literals = 1; // PEF 0x01C0-0x01C4
// Plugin init: hash_size from clamp_hash_size(0x2800) → 0x2800 (in range [0x800, 0x30000])
int hash_size = 0x2800;
while(out_pos < max_out)
{
// PEF 0x00CC: read_match_info
int raw = decode_match_len(&bs);
if(raw <= 0 && had_literals)
{
// Literal run
int lit_count = decode_lit_count(&bs);
if(out_pos + lit_count > max_out) return -4;
bs_read_bytes(&bs, dst_buffer + out_pos, lit_count);
out_pos += lit_count;
// 68K 0x03C4-0x03D2: had_literals = (lit_count >= 63) ? 1 : 0
// PPC 0x0180-0x0194: equivalent via subfc/adde/subfe/clrlwi
had_literals = (lit_count >= 63) ? 1 : 0;
}
else
{
// Match reference
int match_len;
if(had_literals == 0)
match_len = raw + 3; // PEF 0x0130: +3 when coming after non-literal
else
match_len = raw + 2; // PEF 0x011C: +2 when had_literals is set
had_literals = 1; // PEF 0x0140
int offset = offset_dispatch(&bs, (int)out_pos, hash_size);
if(out_pos + match_len > max_out) return -4;
if(offset > (int)out_pos) return -4;
// Byte-by-byte copy (may overlap)
for(int i = 0; i < match_len; i++) dst_buffer[out_pos + i] = dst_buffer[out_pos - offset + i];
out_pos += match_len;
}
}
*dst_size = out_pos;
return 0;
}