Add notes about the dictionaries used
This commit is contained in:
@@ -7,6 +7,9 @@
|
|||||||
* Andy McFadden: https://fadden.com/
|
* Andy McFadden: https://fadden.com/
|
||||||
* LZP: https://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#LZP
|
* LZP: https://en.wikibooks.org/wiki/Data_Compression/Dictionary_compression#LZP
|
||||||
*
|
*
|
||||||
|
* I wouldn't recommend using LZPF on anything but HTML and other text-based data (unless the data has many repeating bytes)
|
||||||
|
* LZPF can be replaced with gzip for LC2 and newer boxes. Classic is stuck with LZPF.
|
||||||
|
*
|
||||||
* Reverse engineered and ported by: Eric MacDonald (eMac)
|
* Reverse engineered and ported by: Eric MacDonald (eMac)
|
||||||
* Modified By: zefie
|
* Modified By: zefie
|
||||||
**/
|
**/
|
||||||
@@ -25,97 +28,123 @@ class WTVLzpf {
|
|||||||
ring_buffer = new Uint8Array(0x2000)
|
ring_buffer = new Uint8Array(0x2000)
|
||||||
encoded_data = [];
|
encoded_data = [];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is used to encode (one-byte) literals with no previous tracked occurence.
|
||||||
|
*
|
||||||
|
* Bytes with best compression: SPACE and LF and e"/<>Tainoprst
|
||||||
|
* Bytes with good compression: TAB and ,-.1=ABCDEFGHILNOPRSbcdfghlmuw
|
||||||
|
* Bytes that don't change the length of the bit stream: 024:MW_kvy
|
||||||
|
* (The rest will increase the length of bit stream)
|
||||||
|
*
|
||||||
|
* I don't know what process they used to build this dictionary.
|
||||||
|
* I assume they frequency-scanned a bunch of HTML files they had.
|
||||||
|
*
|
||||||
|
* Using ISO-8859-1 chracter encoding.
|
||||||
|
* Didn't seem like they used a different dictionary for Japan builds (ISO-2022-JP).
|
||||||
|
**/
|
||||||
nomatchEncode = [
|
nomatchEncode = [
|
||||||
|
/* [FLATTENED HUFFMAN CODE, CODE BIT LENGTH] */
|
||||||
[0x0000, 0x10], [0x0001, 0x10], [0x0002, 0x10],
|
[0x0000, 0x10] /* NUL */, [0x0001, 0x10] /* SOH */, [0x0002, 0x10] /* STX */,
|
||||||
[0x0003, 0x10], [0x0004, 0x10], [0x009A, 0x0F],
|
[0x0003, 0x10] /* ETX */, [0x0004, 0x10] /* EOT */, [0x009A, 0x0F] /* ENQ */,
|
||||||
[0x0005, 0x10], [0x009C, 0x0F], [0x009E, 0x0F],
|
[0x0005, 0x10] /* ACK */, [0x009C, 0x0F] /* BEL */, [0x009E, 0x0F] /* BS */,
|
||||||
[0x3400, 0x06], [0x7000, 0x05], [0x00A0, 0x0F],
|
[0x3400, 0x06] /* TAB */, [0x7000, 0x05] /* LF */, [0x00A0, 0x0F] /* VT */,
|
||||||
[0x0006, 0x10], [0x0380, 0x09], [0x0007, 0x10],
|
[0x0006, 0x10] /* FF */, [0x0380, 0x09] /* CR */, [0x0007, 0x10] /* SO */,
|
||||||
[0x0008, 0x10], [0x0009, 0x10], [0x000A, 0x10],
|
[0x0008, 0x10] /* SI */, [0x0009, 0x10] /* DLE */, [0x000A, 0x10] /* DC1 */,
|
||||||
[0x000B, 0x10], [0x000C, 0x10], [0x000D, 0x10],
|
[0x000B, 0x10] /* DC2 */, [0x000C, 0x10] /* DC3 */, [0x000D, 0x10] /* DC4 */,
|
||||||
[0x000E, 0x10], [0x000F, 0x10], [0x00A2, 0x0F],
|
[0x000E, 0x10] /* NAK */, [0x000F, 0x10] /* SYN */, [0x00A2, 0x0F] /* BTB */,
|
||||||
[0x0010, 0x10], [0x0011, 0x10], [0x0012, 0x10],
|
[0x0010, 0x10] /* CAN */, [0x0011, 0x10] /* EM */, [0x0012, 0x10] /* SUB */,
|
||||||
[0x0013, 0x10], [0x0014, 0x10], [0x0015, 0x10],
|
[0x0013, 0x10] /* ESC */, [0x0014, 0x10] /* FS */, [0x0015, 0x10] /* GS */,
|
||||||
[0x0016, 0x10], [0x0017, 0x10], [0xE000, 0x04],
|
[0x0016, 0x10] /* RS */, [0x0017, 0x10] /* US */, [0xE000, 0x04] /* SPACE */,
|
||||||
[0x0200, 0x0A], [0x7800, 0x05], [0x0400, 0x09],
|
[0x0200, 0x0A] /* ! */, [0x7800, 0x05] /* " */, [0x0400, 0x09] /* # */,
|
||||||
[0x00B0, 0x0E], [0x0018, 0x10], [0x0120, 0x0B],
|
[0x00B0, 0x0E] /* $ */, [0x0018, 0x10] /* % */, [0x0120, 0x0B] /* & */,
|
||||||
[0x0480, 0x09], [0x0140, 0x0B], [0x0160, 0x0B],
|
[0x0480, 0x09] /* ' */, [0x0140, 0x0B] /* ( */, [0x0160, 0x0B] /* ) */,
|
||||||
[0x0240, 0x0A], [0x00B8, 0x0D], [0x1400, 0x07],
|
[0x0240, 0x0A] /* * */, [0x00B8, 0x0D] /* + */, [0x1400, 0x07] /* , */,
|
||||||
[0x1600, 0x07], [0x3800, 0x06], [0x8000, 0x05],
|
[0x1600, 0x07] /* - */, [0x3800, 0x06] /* . */, [0x8000, 0x05] /* / */,
|
||||||
[0x0A00, 0x08], [0x1800, 0x07], [0x0B00, 0x08],
|
[0x0A00, 0x08] /* 0 */, [0x1800, 0x07] /* 1 */, [0x0B00, 0x08] /* 2 */,
|
||||||
[0x0500, 0x09], [0x0C00, 0x08], [0x0580, 0x09],
|
[0x0500, 0x09] /* 3 */, [0x0C00, 0x08] /* 4 */, [0x0580, 0x09] /* 5 */,
|
||||||
[0x0600, 0x09], [0x0680, 0x09], [0x0700, 0x09],
|
[0x0600, 0x09] /* 6 */, [0x0680, 0x09] /* 7 */, [0x0700, 0x09] /* 8 */,
|
||||||
[0x0780, 0x09], [0x0D00, 0x08], [0x0180, 0x0B],
|
[0x0780, 0x09] /* 9 */, [0x0D00, 0x08] /* : */, [0x0180, 0x0B] /* ; */,
|
||||||
[0x8800, 0x05], [0x3C00, 0x06], [0x9000, 0x05],
|
[0x8800, 0x05] /* < */, [0x3C00, 0x06] /* = */, [0x9000, 0x05] /* > */,
|
||||||
[0x0280, 0x0A], [0x00B4, 0x0E], [0x4000, 0x06],
|
[0x0280, 0x0A] /* ? */, [0x00B4, 0x0E] /* @ */, [0x4000, 0x06] /* A */,
|
||||||
[0x1A00, 0x07], [0x1C00, 0x07], [0x1E00, 0x07],
|
[0x1A00, 0x07] /* B */, [0x1C00, 0x07] /* C */, [0x1E00, 0x07] /* D */,
|
||||||
[0x4400, 0x06], [0x2000, 0x07], [0x2200, 0x07],
|
[0x4400, 0x06] /* E */, [0x2000, 0x07] /* F */, [0x2200, 0x07] /* G */,
|
||||||
[0x2400, 0x07], [0x4800, 0x06], [0x01A0, 0x0B],
|
[0x2400, 0x07] /* H */, [0x4800, 0x06] /* I */, [0x01A0, 0x0B] /* J */,
|
||||||
[0x02C0, 0x0A], [0x2600, 0x07], [0x0E00, 0x08],
|
[0x02C0, 0x0A] /* K */, [0x2600, 0x07] /* L */, [0x0E00, 0x08] /* M */,
|
||||||
[0x4C00, 0x06], [0x5000, 0x06], [0x2800, 0x07],
|
[0x4C00, 0x06] /* N */, [0x5000, 0x06] /* O */, [0x2800, 0x07] /* P */,
|
||||||
[0x00C0, 0x0C], [0x5400, 0x06], [0x2A00, 0x07],
|
[0x00C0, 0x0C] /* Q */, [0x5400, 0x06] /* R */, [0x2A00, 0x07] /* S */,
|
||||||
[0x9800, 0x05], [0x0800, 0x09], [0x0880, 0x09],
|
[0x9800, 0x05] /* T */, [0x0800, 0x09] /* U */, [0x0880, 0x09] /* V */,
|
||||||
[0x0F00, 0x08], [0x00D0, 0x0C], [0x0300, 0x0A],
|
[0x0F00, 0x08] /* W */, [0x00D0, 0x0C] /* X */, [0x0300, 0x0A] /* Y */,
|
||||||
[0x0900, 0x09], [0x0019, 0x10], [0x001A, 0x10],
|
[0x0900, 0x09] /* Z */, [0x0019, 0x10] /* [ */, [0x001A, 0x10] /* \ */,
|
||||||
[0x001B, 0x10], [0x001C, 0x10], [0x1000, 0x08],
|
[0x001B, 0x10] /* ] */, [0x001C, 0x10] /* ^ */, [0x1000, 0x08] /* _ */,
|
||||||
[0x001D, 0x10], [0xA000, 0x05], [0x2C00, 0x07],
|
[0x001D, 0x10] /* ` */, [0xA000, 0x05] /* a */, [0x2C00, 0x07] /* b */,
|
||||||
[0x5800, 0x06], [0x5C00, 0x06], [0xF000, 0x04],
|
[0x5800, 0x06] /* c */, [0x5C00, 0x06] /* d */, [0xF000, 0x04] /* e */,
|
||||||
[0x2E00, 0x07], [0x3000, 0x07], [0x6000, 0x06],
|
[0x2E00, 0x07] /* f */, [0x3000, 0x07] /* g */, [0x6000, 0x06] /* h */,
|
||||||
[0xA800, 0x05], [0x01C0, 0x0B], [0x1100, 0x08],
|
[0xA800, 0x05] /* i */, [0x01C0, 0x0B] /* j */, [0x1100, 0x08] /* k */,
|
||||||
[0x6400, 0x06], [0x6800, 0x06], [0xB000, 0x05],
|
[0x6400, 0x06] /* l */, [0x6800, 0x06] /* m */, [0xB000, 0x05] /* n */,
|
||||||
[0xB800, 0x05], [0xC000, 0x05], [0x01E0, 0x0B],
|
[0xB800, 0x05] /* o */, [0xC000, 0x05] /* p */, [0x01E0, 0x0B] /* q */,
|
||||||
[0xC800, 0x05], [0xD000, 0x05], [0xD800, 0x05],
|
[0xC800, 0x05] /* r */, [0xD000, 0x05] /* s */, [0xD800, 0x05] /* t */,
|
||||||
[0x3200, 0x07], [0x1200, 0x08], [0x6C00, 0x06],
|
[0x3200, 0x07] /* u */, [0x1200, 0x08] /* v */, [0x6C00, 0x06] /* w */,
|
||||||
[0x0980, 0x09], [0x1300, 0x08], [0x0340, 0x0A],
|
[0x0980, 0x09] /* x */, [0x1300, 0x08] /* y */, [0x0340, 0x0A] /* z */,
|
||||||
[0x00E0, 0x0C], [0x00F0, 0x0C], [0x0100, 0x0C],
|
[0x00E0, 0x0C] /* { */, [0x00F0, 0x0C] /* | */, [0x0100, 0x0C] /* } */,
|
||||||
[0x0110, 0x0C], [0x001E, 0x10], [0x001F, 0x10],
|
[0x0110, 0x0C] /* ~ */, [0x001E, 0x10] /* DEL */, [0x001F, 0x10] /* <20> */,
|
||||||
[0x0020, 0x10], [0x0021, 0x10], [0x0022, 0x10],
|
[0x0020, 0x10] /* */, [0x0021, 0x10] /* <20> */, [0x0022, 0x10] /* <20> */,
|
||||||
[0x0023, 0x10], [0x0024, 0x10], [0x0025, 0x10],
|
[0x0023, 0x10] /* <20> */, [0x0024, 0x10] /* <20> */, [0x0025, 0x10] /* <20> */,
|
||||||
[0x0026, 0x10], [0x0027, 0x10], [0x0028, 0x10],
|
[0x0026, 0x10] /* <20> */, [0x0027, 0x10] /* <20> */, [0x0028, 0x10] /* <20> */,
|
||||||
[0x0029, 0x10], [0x002A, 0x10], [0x002B, 0x10],
|
[0x0029, 0x10] /* <20> */, [0x002A, 0x10] /* <20> */, [0x002B, 0x10] /* <20> */,
|
||||||
[0x002C, 0x10], [0x002D, 0x10], [0x002E, 0x10],
|
[0x002C, 0x10] /* */, [0x002D, 0x10] /* <20> */, [0x002E, 0x10] /* */,
|
||||||
[0x002F, 0x10], [0x00A4, 0x0F], [0x00A6, 0x0F],
|
[0x002F, 0x10] /* */, [0x00A4, 0x0F] /* <20> */, [0x00A6, 0x0F] /* <20> */,
|
||||||
[0x00A8, 0x0F], [0x0030, 0x10], [0x0031, 0x10],
|
[0x00A8, 0x0F] /* <20> */, [0x0030, 0x10] /* <20> */, [0x0031, 0x10] /* <20> */,
|
||||||
[0x0032, 0x10], [0x0033, 0x10], [0x0034, 0x10],
|
[0x0032, 0x10] /* <20> */, [0x0033, 0x10] /* <20> */, [0x0034, 0x10] /* <20> */,
|
||||||
[0x0035, 0x10], [0x0036, 0x10], [0x0037, 0x10],
|
[0x0035, 0x10] /* <20> */, [0x0036, 0x10] /* <20> */, [0x0037, 0x10] /* <20> */,
|
||||||
[0x0038, 0x10], [0x0039, 0x10], [0x003A, 0x10],
|
[0x0038, 0x10] /* <20> */, [0x0039, 0x10] /* */, [0x003A, 0x10] /* <20> */,
|
||||||
[0x003B, 0x10], [0x003C, 0x10], [0x003D, 0x10],
|
[0x003B, 0x10] /* <20> */, [0x003C, 0x10] /* NBSP*/, [0x003D, 0x10] /* <20> */,
|
||||||
[0x003E, 0x10], [0x003F, 0x10], [0x0040, 0x10],
|
[0x003E, 0x10] /* <20> */, [0x003F, 0x10] /* <20> */, [0x0040, 0x10] /* <20> */,
|
||||||
[0x0041, 0x10], [0x0042, 0x10], [0x0043, 0x10],
|
[0x0041, 0x10] /* <20> */, [0x0042, 0x10] /* <20> */, [0x0043, 0x10] /* <20> */,
|
||||||
[0x0044, 0x10], [0x0045, 0x10], [0x0046, 0x10],
|
[0x0044, 0x10] /* <20> */, [0x0045, 0x10] /* <20> */, [0x0046, 0x10] /* <20> */,
|
||||||
[0x0047, 0x10], [0x0048, 0x10], [0x0049, 0x10],
|
[0x0047, 0x10] /* <20> */, [0x0048, 0x10] /* <20> */, [0x0049, 0x10] /* SHY */,
|
||||||
[0x004A, 0x10], [0x004B, 0x10], [0x004C, 0x10],
|
[0x004A, 0x10] /* <20> */, [0x004B, 0x10] /* <20> */, [0x004C, 0x10] /* <20> */,
|
||||||
[0x004D, 0x10], [0x004E, 0x10], [0x004F, 0x10],
|
[0x004D, 0x10] /* <20> */, [0x004E, 0x10] /* <20> */, [0x004F, 0x10] /* <20> */,
|
||||||
[0x0050, 0x10], [0x0051, 0x10], [0x0052, 0x10],
|
[0x0050, 0x10] /* <20> */, [0x0051, 0x10] /* <20> */, [0x0052, 0x10] /* <20> */,
|
||||||
[0x0053, 0x10], [0x0054, 0x10], [0x0055, 0x10],
|
[0x0053, 0x10] /* <20> */, [0x0054, 0x10] /* <20> */, [0x0055, 0x10] /* <20> */,
|
||||||
[0x0056, 0x10], [0x0057, 0x10], [0x0058, 0x10],
|
[0x0056, 0x10] /* <20> */, [0x0057, 0x10] /* <20> */, [0x0058, 0x10] /* <20> */,
|
||||||
[0x0059, 0x10], [0x005A, 0x10], [0x005B, 0x10],
|
[0x0059, 0x10] /* <20> */, [0x005A, 0x10] /* <20> */, [0x005B, 0x10] /* <20> */,
|
||||||
[0x005C, 0x10], [0x005D, 0x10], [0x005E, 0x10],
|
[0x005C, 0x10] /* <20> */, [0x005D, 0x10] /* <20> */, [0x005E, 0x10] /* <20> */,
|
||||||
[0x005F, 0x10], [0x0060, 0x10], [0x0061, 0x10],
|
[0x005F, 0x10] /* <20> */, [0x0060, 0x10] /* <20> */, [0x0061, 0x10] /* <20> */,
|
||||||
[0x0062, 0x10], [0x00AA, 0x0F], [0x0063, 0x10],
|
[0x0062, 0x10] /* <20> */, [0x00AA, 0x0F] /* <20> */, [0x0063, 0x10] /* <20> */,
|
||||||
[0x0064, 0x10], [0x0065, 0x10], [0x0066, 0x10],
|
[0x0064, 0x10] /* <20> */, [0x0065, 0x10] /* <20> */, [0x0066, 0x10] /* <20> */,
|
||||||
[0x0067, 0x10], [0x0068, 0x10], [0x0069, 0x10],
|
[0x0067, 0x10] /* <20> */, [0x0068, 0x10] /* <20> */, [0x0069, 0x10] /* <20> */,
|
||||||
[0x006A, 0x10], [0x006B, 0x10], [0x006C, 0x10],
|
[0x006A, 0x10] /* <20> */, [0x006B, 0x10] /* <20> */, [0x006C, 0x10] /* <20> */,
|
||||||
[0x006D, 0x10], [0x006E, 0x10], [0x006F, 0x10],
|
[0x006D, 0x10] /* <20> */, [0x006E, 0x10] /* <20> */, [0x006F, 0x10] /* <20> */,
|
||||||
[0x0070, 0x10], [0x0071, 0x10], [0x0072, 0x10],
|
[0x0070, 0x10] /* <20> */, [0x0071, 0x10] /* <20> */, [0x0072, 0x10] /* <20> */,
|
||||||
[0x0073, 0x10], [0x0074, 0x10], [0x0075, 0x10],
|
[0x0073, 0x10] /* <20> */, [0x0074, 0x10] /* <20> */, [0x0075, 0x10] /* <20> */,
|
||||||
[0x0076, 0x10], [0x0077, 0x10], [0x0078, 0x10],
|
[0x0076, 0x10] /* <20> */, [0x0077, 0x10] /* <20> */, [0x0078, 0x10] /* <20> */,
|
||||||
[0x0079, 0x10], [0x007A, 0x10], [0x007B, 0x10],
|
[0x0079, 0x10] /* <20> */, [0x007A, 0x10] /* <20> */, [0x007B, 0x10] /* <20> */,
|
||||||
[0x007C, 0x10], [0x007D, 0x10], [0x007E, 0x10],
|
[0x007C, 0x10] /* <20> */, [0x007D, 0x10] /* <20> */, [0x007E, 0x10] /* <20> */,
|
||||||
[0x007F, 0x10], [0x0080, 0x10], [0x0081, 0x10],
|
[0x007F, 0x10] /* <20> */, [0x0080, 0x10] /* <20> */, [0x0081, 0x10] /* <20> */,
|
||||||
[0x0082, 0x10], [0x0083, 0x10], [0x0084, 0x10],
|
[0x0082, 0x10] /* <20> */, [0x0083, 0x10] /* <20> */, [0x0084, 0x10] /* <20> */,
|
||||||
[0x0085, 0x10], [0x0086, 0x10], [0x0087, 0x10],
|
[0x0085, 0x10] /* <20> */, [0x0086, 0x10] /* <20> */, [0x0087, 0x10] /* <20> */,
|
||||||
[0x0088, 0x10], [0x0089, 0x10], [0x008A, 0x10],
|
[0x0088, 0x10] /* <20> */, [0x0089, 0x10] /* <20> */, [0x008A, 0x10] /* <20> */,
|
||||||
[0x008B, 0x10], [0x008C, 0x10], [0x008D, 0x10],
|
[0x008B, 0x10] /* <20> */, [0x008C, 0x10] /* <20> */, [0x008D, 0x10] /* <20> */,
|
||||||
[0x00AC, 0x0F], [0x008E, 0x10], [0x008F, 0x10],
|
[0x00AC, 0x0F] /* <20> */, [0x008E, 0x10] /* <20> */, [0x008F, 0x10] /* <20> */,
|
||||||
[0x0090, 0x10], [0x0091, 0x10], [0x0092, 0x10],
|
[0x0090, 0x10] /* <20> */, [0x0091, 0x10] /* <20> */, [0x0092, 0x10] /* <20> */,
|
||||||
[0x0093, 0x10], [0x00AE, 0x0F], [0x0094, 0x10],
|
[0x0093, 0x10] /* <20> */, [0x00AE, 0x0F] /* <20> */, [0x0094, 0x10] /* <20> */,
|
||||||
[0x0095, 0x10], [0x0096, 0x10], [0x0097, 0x10],
|
[0x0095, 0x10] /* <20> */, [0x0096, 0x10] /* <20> */, [0x0097, 0x10] /* <20> */,
|
||||||
[0x0098, 0x10], [0x0099, 0x10]
|
[0x0098, 0x10] /* <20> */, [0x0099, 0x10]
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the dictionary that reduces the size based on repeated patterns in the file.
|
||||||
|
*
|
||||||
|
* When we find a byte match in the ring buffer we use this dictionary to encode the length of the matched bytes.
|
||||||
|
*
|
||||||
|
* - These are intentionally 32-bit. The leftmost bit is 1 in each of these to tell the decoder to use match decoding.
|
||||||
|
* - LZP flag bits are used to encode the position where the matched bytes start.
|
||||||
|
* - We're allowed to match up to 298 bytes before we can't encode more (we need an entry in this dictionary for each byte more).
|
||||||
|
* - We can reach for matches 65KB behind the current LZ cursor (65KB is the ring buffer size and highest a 16-bit flag can reach).
|
||||||
|
**/
|
||||||
matchEncode = [
|
matchEncode = [
|
||||||
|
/* [MATCH CODE, MATCH CODE BIT LENGTH] */
|
||||||
[0x80000000, 0x01], [0x80000000, 0x03],
|
[0x80000000, 0x01], [0x80000000, 0x03],
|
||||||
[0xA0000000, 0x03], [0xC0000000, 0x03],
|
[0xA0000000, 0x03], [0xC0000000, 0x03],
|
||||||
[0xE0000000, 0x06], [0xE4000000, 0x06],
|
[0xE0000000, 0x06], [0xE4000000, 0x06],
|
||||||
@@ -265,6 +294,7 @@ class WTVLzpf {
|
|||||||
[0xFFFF4000, 0x13], [0xFFFF6000, 0x13],
|
[0xFFFF4000, 0x13], [0xFFFF6000, 0x13],
|
||||||
[0xFFFF8000, 0x13], [0xFFFFA000, 0x13],
|
[0xFFFF8000, 0x13], [0xFFFFA000, 0x13],
|
||||||
[0xFFFFC000, 0x13], [0xFFFFE000, 0x13],
|
[0xFFFFC000, 0x13], [0xFFFFE000, 0x13],
|
||||||
|
// We never should select these. These were in the original executable so including them here.
|
||||||
[0x00000000, 0x00], [0x00000000, 0x00]
|
[0x00000000, 0x00], [0x00000000, 0x00]
|
||||||
];
|
];
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user