Compare commits
3 Commits
4563e1ef4a
...
bba9db290e
| Author | SHA1 | Date | |
|---|---|---|---|
| bba9db290e | |||
| 3fb10b78e3 | |||
| 59b5eade7e |
@@ -1,26 +1,30 @@
|
|||||||
//! # Benchmark: BlockQ4K vs BlockQ4KRle
|
//! # Benchmark: BlockQ4K vs BlockQ4KRle
|
||||||
//!
|
//!
|
||||||
//! Measures three operations across two weight distributions:
|
//! Measures three operations across three weight distributions, encoded with
|
||||||
|
//! `min_coverage = 0.01` (blocks need ≥ 1 % of their 256 nibbles in repeated
|
||||||
|
//! runs to use RLE mode).
|
||||||
//!
|
//!
|
||||||
//! | Group | What is timed |
|
//! | Group | What is timed |
|
||||||
//! |--------------|--------------------------------------------------|
|
//! |--------------|-----------------------------------------------------|
|
||||||
//! | `encode` | BlockQ4K → BlockQ4KRle for a batch of 512 blocks |
|
//! | `encode` | BlockQ4K → BlockQ4KRle for a batch of 512 blocks |
|
||||||
//! | `dequantize` | Single-block dequantisation for all three paths |
|
//! | `dequantize` | Single-block dequantisation across all four paths |
|
||||||
//! | `matmul` | Full A×B multiply at three matrix sizes |
|
//! | `matmul` | Full A×B multiply at three matrix sizes |
|
||||||
//!
|
//!
|
||||||
//! ## Weight distributions
|
//! ## Weight distributions
|
||||||
//!
|
//!
|
||||||
//! **uniform** — each qs byte is drawn from a pseudo-random sequence (LCG).
|
//! **uniform** — each qs byte is drawn from a pseudo-random LCG sequence.
|
||||||
//! Consecutive bytes almost never repeat, so each block produces ~128
|
//! Adjacent nibbles match with probability 1/16, giving ~12 % nibble coverage.
|
||||||
//! single-byte runs. At 2 bytes per pair that would require ~256 bytes,
|
//! At `min_coverage = 0.01` these blocks encode to **RLE mode** (IS_RLE = 1)
|
||||||
//! which exceeds the 128-byte raw payload, so `encode` always keeps these
|
//! with ~230–240 nibble entries — a realistic proxy for trained Q4_K weights.
|
||||||
//! blocks in **raw mode** (IS_RLE = 0). This is representative of typical
|
|
||||||
//! unstructured LLM weight matrices.
|
|
||||||
//!
|
//!
|
||||||
//! **rle_optimal** — every byte in a block's qs field is the same value.
|
//! **rle_optimal** — every qs byte is the same value. All 256 nibbles are
|
||||||
//! `encode` stores a single (value, count) pair — 2 bytes instead of 128 —
|
//! identical, giving 100 % coverage and just 16 nibble entries. This is the
|
||||||
//! and sets IS_RLE = 1. This is the theoretical compression maximum, and
|
//! theoretical RLE maximum and represents highly structured weight blocks.
|
||||||
//! is representative of highly sparse or dead-neuron weight matrices.
|
//!
|
||||||
|
//! **zero_coverage** — nibbles cycle deterministically so no two consecutive
|
||||||
|
//! nibbles (in output-sequential order) are ever equal. Coverage = 0 %;
|
||||||
|
//! `encode` keeps these blocks in **raw mode** (IS_RLE = 0) at any positive
|
||||||
|
//! threshold. Used only in the `dequantize` group to benchmark the raw path.
|
||||||
|
|
||||||
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
use criterion::{black_box, criterion_group, criterion_main, Criterion, Throughput};
|
||||||
use matrix_testing::{
|
use matrix_testing::{
|
||||||
@@ -83,12 +87,11 @@ fn make_scales(scale: u8, min: u8) -> [u8; K_SCALE_SIZE] {
|
|||||||
s
|
s
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return `count` blocks whose qs bytes are pseudo-random.
|
/// Return `count` blocks whose qs bytes are pseudo-random (LCG).
|
||||||
///
|
///
|
||||||
/// With uniformly distributed bytes, consecutive bytes match with probability
|
/// Adjacent nibbles match with probability 1/16, giving each block roughly
|
||||||
/// 1/256 ≈ 0.4%, yielding ~128 runs per block. Storing those as (value,
|
/// 12 % nibble coverage. At `min_coverage = 0.01` these blocks encode to
|
||||||
/// count) pairs would need ~256 bytes — more than the 128-byte raw payload —
|
/// **RLE mode** (IS_RLE = 1) with ~230–240 nibble entries per block.
|
||||||
/// so `encode` will always select **raw mode** (IS_RLE = 0).
|
|
||||||
fn uniform_blocks(count: usize) -> Vec<BlockQ4K> {
|
fn uniform_blocks(count: usize) -> Vec<BlockQ4K> {
|
||||||
let mut rng = Lcg::new(0xDEAD_BEEF_CAFE_1234);
|
let mut rng = Lcg::new(0xDEAD_BEEF_CAFE_1234);
|
||||||
let scales = make_scales(7, 2);
|
let scales = make_scales(7, 2);
|
||||||
@@ -107,10 +110,9 @@ fn uniform_blocks(count: usize) -> Vec<BlockQ4K> {
|
|||||||
|
|
||||||
/// Return `count` blocks where every qs byte is the same value.
|
/// Return `count` blocks where every qs byte is the same value.
|
||||||
///
|
///
|
||||||
/// A uniform byte array collapses to one (value, count) RLE pair: 2 bytes
|
/// All 256 nibbles are identical → 100 % nibble coverage → always **RLE mode**
|
||||||
/// instead of 128. `encode` will always select **RLE mode** (IS_RLE = 1).
|
/// with exactly 16 entries (256 nibbles / 16 per entry).
|
||||||
/// Each block uses a fresh pseudo-random byte so no two blocks are identical,
|
/// Each block uses a fresh pseudo-random byte to avoid cache-warm artifacts.
|
||||||
/// avoiding degenerate cache-warm effects across the batch.
|
|
||||||
fn rle_optimal_blocks(count: usize) -> Vec<BlockQ4K> {
|
fn rle_optimal_blocks(count: usize) -> Vec<BlockQ4K> {
|
||||||
let mut rng = Lcg::new(0x1234_5678_9ABC_DEF0);
|
let mut rng = Lcg::new(0x1234_5678_9ABC_DEF0);
|
||||||
let scales = make_scales(7, 2);
|
let scales = make_scales(7, 2);
|
||||||
@@ -129,6 +131,28 @@ fn fp16_ones(k: usize, n: usize) -> Vec<u16> {
|
|||||||
vec![f32_to_fp16(1.0); k * n]
|
vec![f32_to_fp16(1.0); k * n]
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build one block whose nibbles cycle so that no two consecutive nibbles
|
||||||
|
/// (in output-sequential order) are ever equal → 0 % nibble coverage.
|
||||||
|
///
|
||||||
|
/// Lo nibble of byte `i` = `i % 16`; hi nibble = `(i + 8) % 16`.
|
||||||
|
/// Within every 32-byte group the lo and hi streams each visit all 16 values
|
||||||
|
/// twice without repetition, and across group boundaries the last nibble of
|
||||||
|
/// one stream differs from the first nibble of the next.
|
||||||
|
///
|
||||||
|
/// At any `min_coverage > 0.0`, `encode` keeps this block in **raw mode**.
|
||||||
|
fn zero_coverage_block() -> BlockQ4K {
|
||||||
|
let scales = make_scales(7, 2);
|
||||||
|
let d = f32_to_fp16(0.01);
|
||||||
|
let dmin = f32_to_fp16(0.001);
|
||||||
|
let mut qs = [0u8; QK_K / 2];
|
||||||
|
for (i, b) in qs.iter_mut().enumerate() {
|
||||||
|
let lo = (i % 16) as u8;
|
||||||
|
let hi = ((i + 8) % 16) as u8;
|
||||||
|
*b = lo | (hi << 4);
|
||||||
|
}
|
||||||
|
BlockQ4K { d, dmin, scales, qs }
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Group 1 — encode
|
// Group 1 — encode
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@@ -136,12 +160,11 @@ fn fp16_ones(k: usize, n: usize) -> Vec<u16> {
|
|||||||
/// Number of blocks encoded per iteration in `bench_encode`.
|
/// Number of blocks encoded per iteration in `bench_encode`.
|
||||||
const ENCODE_BATCH: usize = 512;
|
const ENCODE_BATCH: usize = 512;
|
||||||
|
|
||||||
/// Measures the cost of scanning qs bytes and writing the BlockQ4KRle output.
|
/// Measures the cost of scanning nibbles and writing the `BlockQ4KRle` output.
|
||||||
///
|
///
|
||||||
/// Both distributions perform the same O(128) run-length scan. The only
|
/// Both distributions perform the same O(256) nibble scan. The output differs:
|
||||||
/// divergence is at the output stage:
|
/// * **uniform** — ~12 % coverage → RLE mode, ~230–240 entries written.
|
||||||
/// * **uniform** — run count > 63 → fall through to memcpy of 128 bytes.
|
/// * **rle_optimal** — 100 % coverage → RLE mode, exactly 16 entries written.
|
||||||
/// * **rle_optimal** — run count = 1 → write 2 bytes and set IS_RLE.
|
|
||||||
fn bench_encode(c: &mut Criterion) {
|
fn bench_encode(c: &mut Criterion) {
|
||||||
let uniform = uniform_blocks(ENCODE_BATCH);
|
let uniform = uniform_blocks(ENCODE_BATCH);
|
||||||
let rle_opt = rle_optimal_blocks(ENCODE_BATCH);
|
let rle_opt = rle_optimal_blocks(ENCODE_BATCH);
|
||||||
@@ -153,7 +176,7 @@ fn bench_encode(c: &mut Criterion) {
|
|||||||
group.bench_function("uniform", |b| {
|
group.bench_function("uniform", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
for blk in &uniform {
|
for blk in &uniform {
|
||||||
black_box(encode(black_box(blk)));
|
black_box(encode(black_box(blk), 0.01));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -161,7 +184,7 @@ fn bench_encode(c: &mut Criterion) {
|
|||||||
group.bench_function("rle_optimal", |b| {
|
group.bench_function("rle_optimal", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
for blk in &rle_opt {
|
for blk in &rle_opt {
|
||||||
black_box(encode(black_box(blk)));
|
black_box(encode(black_box(blk), 0.01));
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -173,25 +196,35 @@ fn bench_encode(c: &mut Criterion) {
|
|||||||
// Group 2 — dequantize (single block)
|
// Group 2 — dequantize (single block)
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
/// Compares the three single-block dequantisation code paths.
|
/// Compares four single-block dequantisation code paths.
|
||||||
///
|
///
|
||||||
/// | Variant | Block type | Encoding | Extra work vs baseline |
|
/// | Variant | Block type | Encoding | IS_RLE | Entries |
|
||||||
/// |------------------|-------------|----------|-------------------------------|
|
/// |--------------------|-------------|-----------|--------|---------|
|
||||||
/// | `q4k_baseline` | BlockQ4K | — | none |
|
/// | `q4k_baseline` | BlockQ4K | — | — | — |
|
||||||
/// | `rle_raw_mode` | BlockQ4KRle | IS_RLE=0 | one branch (`is_rle()` check) |
|
/// | `rle_raw_mode` | BlockQ4KRle | raw | 0 | — |
|
||||||
/// | `rle_rle_mode` | BlockQ4KRle | IS_RLE=1 | RLE expansion into 128-B buf |
|
/// | `rle_sparse` | BlockQ4KRle | RLE | 1 | ~235 |
|
||||||
|
/// | `rle_dense` | BlockQ4KRle | RLE | 1 | 16 |
|
||||||
|
///
|
||||||
|
/// `rle_raw_mode` uses the zero-coverage fixture (0 % nibble coverage), which
|
||||||
|
/// stays in raw mode at any positive threshold.
|
||||||
|
/// `rle_sparse` uses the LCG uniform fixture (~12 % coverage, ~235 entries),
|
||||||
|
/// representative of actual trained Q4_K weight blocks.
|
||||||
|
/// `rle_dense` uses the rle_optimal fixture (100 % coverage, 16 entries).
|
||||||
///
|
///
|
||||||
/// Throughput is the number of dequantised weights produced per second.
|
/// Throughput is the number of dequantised weights produced per second.
|
||||||
fn bench_dequantize(c: &mut Criterion) {
|
fn bench_dequantize(c: &mut Criterion) {
|
||||||
|
let q4k_baseline_block = uniform_blocks(1).into_iter().next().unwrap();
|
||||||
|
let q4k_zero_cov = zero_coverage_block();
|
||||||
let q4k_uniform = uniform_blocks(1).into_iter().next().unwrap();
|
let q4k_uniform = uniform_blocks(1).into_iter().next().unwrap();
|
||||||
let q4k_rle_opt = rle_optimal_blocks(1).into_iter().next().unwrap();
|
let q4k_rle_opt = rle_optimal_blocks(1).into_iter().next().unwrap();
|
||||||
|
|
||||||
let rle_raw = encode(&q4k_uniform); // IS_RLE = 0
|
let rle_raw = encode(&q4k_zero_cov, 0.01); // IS_RLE = 0 (0 % coverage)
|
||||||
let rle_rle = encode(&q4k_rle_opt); // IS_RLE = 1
|
let rle_sparse = encode(&q4k_uniform, 0.01); // IS_RLE = 1 (~12 % coverage)
|
||||||
|
let rle_dense = encode(&q4k_rle_opt, 0.01); // IS_RLE = 1 (100 % coverage)
|
||||||
|
|
||||||
// Confirm the fixtures ended up in the right encoding modes.
|
assert!(!rle_raw.is_rle(), "zero-coverage block must be raw mode");
|
||||||
assert!(!rle_raw.is_rle(), "uniform block should encode to raw mode");
|
assert!(rle_sparse.is_rle(), "uniform block must be RLE at 0.01 threshold");
|
||||||
assert!(rle_rle.is_rle(), "rle-optimal block should encode to rle mode");
|
assert!(rle_dense.is_rle(), "rle-optimal block must be RLE mode");
|
||||||
|
|
||||||
let mut group = c.benchmark_group("dequantize");
|
let mut group = c.benchmark_group("dequantize");
|
||||||
// Throughput = QK_K (256) weights dequantised per second.
|
// Throughput = QK_K (256) weights dequantised per second.
|
||||||
@@ -200,7 +233,7 @@ fn bench_dequantize(c: &mut Criterion) {
|
|||||||
group.bench_function("q4k_baseline", |b| {
|
group.bench_function("q4k_baseline", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut out = [0.0f32; QK_K];
|
let mut out = [0.0f32; QK_K];
|
||||||
dequantize_block_q4k(black_box(&q4k_uniform), &mut out);
|
dequantize_block_q4k(black_box(&q4k_baseline_block), &mut out);
|
||||||
black_box(out)
|
black_box(out)
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -213,10 +246,18 @@ fn bench_dequantize(c: &mut Criterion) {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
group.bench_function("rle_rle_mode", |b| {
|
group.bench_function("rle_sparse", |b| {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let mut out = [0.0f32; QK_K];
|
let mut out = [0.0f32; QK_K];
|
||||||
dequantize_block_q4k_rle(black_box(&rle_rle), &mut out);
|
dequantize_block_q4k_rle(black_box(&rle_sparse), &mut out);
|
||||||
|
black_box(out)
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
group.bench_function("rle_dense", |b| {
|
||||||
|
b.iter(|| {
|
||||||
|
let mut out = [0.0f32; QK_K];
|
||||||
|
dequantize_block_q4k_rle(black_box(&rle_dense), &mut out);
|
||||||
black_box(out)
|
black_box(out)
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -245,14 +286,14 @@ const CONFIGS: &[(usize, usize, usize)] = &[
|
|||||||
|
|
||||||
/// Full matrix-multiply benchmark across weight distributions and matrix sizes.
|
/// Full matrix-multiply benchmark across weight distributions and matrix sizes.
|
||||||
///
|
///
|
||||||
/// Four variants per size:
|
/// Four variants per size (`min_coverage = 0.01`):
|
||||||
///
|
///
|
||||||
/// | Label | A type | RLE mode? |
|
/// | Label | A type | IS_RLE | Entries/block |
|
||||||
/// |----------------------|-------------|-----------|
|
/// |----------------------|-------------|--------|---------------|
|
||||||
/// | `baseline/uniform` | BlockQ4K | — |
|
/// | `baseline/uniform` | BlockQ4K | — | — |
|
||||||
/// | `rle/uniform` | BlockQ4KRle | raw |
|
/// | `rle/uniform` | BlockQ4KRle | 1 | ~235 |
|
||||||
/// | `baseline/rle_opt` | BlockQ4K | — |
|
/// | `baseline/rle_opt` | BlockQ4K | — | — |
|
||||||
/// | `rle/rle_opt` | BlockQ4KRle | rle |
|
/// | `rle/rle_opt` | BlockQ4KRle | 1 | 16 |
|
||||||
///
|
///
|
||||||
/// Throughput is reported as multiply-accumulate operations (M × K × N) per
|
/// Throughput is reported as multiply-accumulate operations (M × K × N) per
|
||||||
/// second, allowing fair cross-size comparison.
|
/// second, allowing fair cross-size comparison.
|
||||||
@@ -270,10 +311,10 @@ fn bench_matmul(c: &mut Criterion) {
|
|||||||
|
|
||||||
// Build all four A variants and the shared B matrix for this config.
|
// Build all four A variants and the shared B matrix for this config.
|
||||||
let a_q4k_u: Vec<BlockQ4K> = uniform_blocks(m * bpr);
|
let a_q4k_u: Vec<BlockQ4K> = uniform_blocks(m * bpr);
|
||||||
let a_rle_u: Vec<BlockQ4KRle> = a_q4k_u.iter().map(encode).collect();
|
let a_rle_u: Vec<BlockQ4KRle> = a_q4k_u.iter().map(|b| encode(b, 0.01)).collect();
|
||||||
|
|
||||||
let a_q4k_r: Vec<BlockQ4K> = rle_optimal_blocks(m * bpr);
|
let a_q4k_r: Vec<BlockQ4K> = rle_optimal_blocks(m * bpr);
|
||||||
let a_rle_r: Vec<BlockQ4KRle> = a_q4k_r.iter().map(encode).collect();
|
let a_rle_r: Vec<BlockQ4KRle> = a_q4k_r.iter().map(|b| encode(b, 0.01)).collect();
|
||||||
|
|
||||||
let b = fp16_ones(k, n);
|
let b = fp16_ones(k, n);
|
||||||
|
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
|
|
||||||
// ── RLE encode (best of `trials`) ────────────────────────────────────────
|
// ── RLE encode (best of `trials`) ────────────────────────────────────────
|
||||||
let (rle_blocks, t_enc) = bench(trials, || -> Vec<BlockQ4KRle> {
|
let (rle_blocks, t_enc) = bench(trials, || -> Vec<BlockQ4KRle> {
|
||||||
blocks.iter().map(encode).collect()
|
blocks.iter().map(|b| encode(b, 0.0)).collect()
|
||||||
});
|
});
|
||||||
|
|
||||||
let n_rle = rle_blocks.iter().filter(|b| b.is_rle()).count();
|
let n_rle = rle_blocks.iter().filter(|b| b.is_rle()).count();
|
||||||
|
|||||||
@@ -101,11 +101,32 @@ fn fixed(s: &str, width: usize) -> String {
|
|||||||
fn main() -> Result<(), Box<dyn Error>> {
|
fn main() -> Result<(), Box<dyn Error>> {
|
||||||
let args: Vec<String> = env::args().collect();
|
let args: Vec<String> = env::args().collect();
|
||||||
if args.len() < 2 {
|
if args.len() < 2 {
|
||||||
eprintln!("usage: {} <model.gguf>", args[0]);
|
eprintln!("usage: {} <model.gguf> [--threshold <0.0..1.0>]", args[0]);
|
||||||
|
eprintln!();
|
||||||
|
eprintln!(" --threshold Minimum fraction of qs bytes that must be in runs of");
|
||||||
|
eprintln!(" length ≥ 2 for a block to use RLE mode. Default: 0.0");
|
||||||
|
eprintln!(" (use RLE whenever the pair count fits in 64 pairs).");
|
||||||
std::process::exit(1);
|
std::process::exit(1);
|
||||||
}
|
}
|
||||||
let path = &args[1];
|
let path = &args[1];
|
||||||
|
|
||||||
|
// Parse optional --threshold flag from the remaining arguments.
|
||||||
|
let mut threshold = 0.0f32;
|
||||||
|
let mut idx = 2usize;
|
||||||
|
while idx < args.len() {
|
||||||
|
if args[idx] == "--threshold" {
|
||||||
|
idx += 1;
|
||||||
|
threshold = args.get(idx)
|
||||||
|
.and_then(|s| s.parse::<f32>().ok())
|
||||||
|
.filter(|&v| (0.0..=1.0).contains(&v))
|
||||||
|
.unwrap_or_else(|| {
|
||||||
|
eprintln!("error: --threshold requires a value in [0.0, 1.0]");
|
||||||
|
std::process::exit(1);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
idx += 1;
|
||||||
|
}
|
||||||
|
|
||||||
// ── Parse header ─────────────────────────────────────────────────────────
|
// ── Parse header ─────────────────────────────────────────────────────────
|
||||||
eprintln!("Parsing {path} …");
|
eprintln!("Parsing {path} …");
|
||||||
let (tensors, data_start) = parse_header(path)?;
|
let (tensors, data_start) = parse_header(path)?;
|
||||||
@@ -122,6 +143,8 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
q4k_tensors.len(),
|
q4k_tensors.len(),
|
||||||
other_count,
|
other_count,
|
||||||
);
|
);
|
||||||
|
eprintln!(" RLE threshold: {threshold:.2} (blocks need ≥ {:.0}% of bytes in runs)",
|
||||||
|
threshold * 100.0);
|
||||||
eprintln!();
|
eprintln!();
|
||||||
|
|
||||||
// ── Header row ───────────────────────────────────────────────────────────
|
// ── Header row ───────────────────────────────────────────────────────────
|
||||||
@@ -145,7 +168,7 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let mut stats = TensorStats::new();
|
let mut stats = TensorStats::new();
|
||||||
|
|
||||||
for_each_block(&mut file, data_start, tensor, |block| {
|
for_each_block(&mut file, data_start, tensor, |block| {
|
||||||
let rle_block = encode(block);
|
let rle_block = encode(block, threshold);
|
||||||
stats.observe(rle_block.is_rle(), rle_block.rle_len());
|
stats.observe(rle_block.is_rle(), rle_block.rle_len());
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
@@ -187,10 +210,15 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
|
|
||||||
if !any_rle {
|
if !any_rle {
|
||||||
println!();
|
println!();
|
||||||
println!("No blocks compressed with RLE — all weights are effectively random at");
|
println!("No blocks used RLE at threshold {threshold:.2}.");
|
||||||
println!("the byte level, which is typical for trained Q4_K quantised weights.");
|
if threshold < 0.01 {
|
||||||
println!("RLE compression only helps for structured weight matrices (binary,");
|
println!("All weights are effectively random at the byte level — typical for");
|
||||||
println!("ternary, heavily pruned, or synthetic).");
|
println!("trained Q4_K weights. RLE only helps for structured weight matrices");
|
||||||
|
println!("(binary, ternary, heavily pruned, or synthetic).");
|
||||||
|
} else {
|
||||||
|
println!("Try a lower --threshold (e.g. --threshold 0.0) to see whether any");
|
||||||
|
println!("blocks have enough run structure to qualify at a looser threshold.");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
|||||||
642
src/rle.rs
642
src/rle.rs
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user