-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathbwt_compress_v9.hpp
More file actions
88 lines (72 loc) · 2.75 KB
/
bwt_compress_v9.hpp
File metadata and controls
88 lines (72 loc) · 2.75 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
// BWT Compression v9 - Smart Adaptive Dispatcher
//
// Intelligently dispatches to:
// - v8 (fixed model range coding) for data <5KB
// - v5 (multi-tree Huffman with symbol bitmap) for data >=5KB
//
// v8 now uses an empirically-trained model that's 7-9% better than before.
// Crossover point moved from ~2.5KB to ~5KB thanks to the improved model.
#pragma once
#include "bwt_compress_v5.hpp"
#include "bwt_compress_v8.hpp"
namespace bwt9 {
// Crossover threshold: v8 wins below this, v4 wins above
// Empirically determined: v8 with optimal model beats v4 up to ~4.5-5KB
constexpr size_t CROSSOVER_SIZE = 5120; // 5KB
// =============================================================================
// Mode Selection
// =============================================================================
enum class Mode : uint8_t {
USE_V8 = 0, // Fixed model range coding (v8)
USE_V5 = 1, // Multi-tree Huffman with bitmap (v5)
};
inline Mode choose_mode(size_t original_size) {
// Simple size-based threshold
// v8's trained model is efficient up to ~5KB
// v4's local adaptation wins for larger data
return (original_size < CROSSOVER_SIZE) ? Mode::USE_V8 : Mode::USE_V5;
}
// =============================================================================
// Main Compress/Decompress
// =============================================================================
inline std::vector<uint8_t> compress(const uint8_t* data, size_t n) {
if (n == 0) return {};
Mode mode = choose_mode(n);
std::vector<uint8_t> output;
output.push_back('B');
output.push_back('9');
output.push_back((uint8_t)mode);
if (mode == Mode::USE_V8) {
auto result = bwt8::compress(data, n);
output.insert(output.end(), result.begin(), result.end());
} else {
auto result = bwt5::compress(data, n);
output.insert(output.end(), result.begin(), result.end());
}
return output;
}
inline std::vector<uint8_t> decompress(const uint8_t* data, size_t n) {
if (n < 4) return {};
if (data[0] != 'B' || data[1] != '9') return {};
Mode mode = (Mode)data[2];
if (mode == Mode::USE_V8) {
return bwt8::decompress(data + 3, n - 3);
} else {
return bwt5::decompress(data + 3, n - 3);
}
}
// =============================================================================
// Utility
// =============================================================================
inline const char* mode_name(Mode m) {
switch (m) {
case Mode::USE_V8: return "USE_V8 (fixed model)";
case Mode::USE_V5: return "USE_V5 (bitmap)";
default: return "UNKNOWN";
}
}
// Re-export transform functions for analysis
using bwt5::bwt_encode;
using bwt5::mtf_encode;
using bwt5::zrle_encode;
} // namespace bwt9