in candle-core/src/quantized/k_quants.rs [1011:1092]
fn from_float(xs: &[f32], ys: &mut [Self]) -> Result<()> {
for (block, x) in group_for_quantization(xs, ys)? {
let mut scales: [f32; QK_K / 16] = [0.0; QK_K / 16];
for (j, x_scale_slice) in x.chunks_exact(16).enumerate() {
scales[j] = make_q3_quants(x_scale_slice, 4, true);
}
// Get max scale by absolute value.
let mut max_scale: f32 = 0.0;
for &scale in scales.iter() {
if scale.abs() > max_scale.abs() {
max_scale = scale;
}
}
block.scales.fill(0);
if max_scale != 0.0 {
let iscale = -32.0 / max_scale;
for (j, scale) in scales.iter().enumerate() {
let l_val = nearest_int(iscale * scale);
let l_val = l_val.clamp(-32, 31) + 32;
if j < 8 {
block.scales[j] = (l_val & 0xF) as u8;
} else {
block.scales[j - 8] |= ((l_val & 0xF) << 4) as u8;
}
let l_val = l_val >> 4;
block.scales[j % 4 + 8] |= (l_val << (2 * (j / 4))) as u8;
}
block.d = f16::from_f32(1.0 / iscale);
} else {
block.d = f16::from_f32(0.0);
}
let mut l: [i8; QK_K] = [0; QK_K];
for j in 0..QK_K / 16 {
let sc = if j < 8 {
block.scales[j] & 0xF
} else {
block.scales[j - 8] >> 4
};
let sc = (sc | (((block.scales[8 + j % 4] >> (2 * (j / 4))) & 3) << 4)) as i8 - 32;
let d = block.d.to_f32() * sc as f32;
if d != 0.0 {
for ii in 0..16 {
let l_val = nearest_int(x[16 * j + ii] / d);
l[16 * j + ii] = (l_val.clamp(-4, 3) + 4) as i8;
}
}
}
block.hmask.fill(0);
let mut m = 0;
let mut hm = 1;
for ll in l.iter_mut() {
if *ll > 3 {
block.hmask[m] |= hm;
*ll -= 4;
}
m += 1;
if m == QK_K / 8 {
m = 0;
hm <<= 1;
}
}
for j in (0..QK_K).step_by(128) {
for l_val in 0..32 {
block.qs[j / 4 + l_val] = (l[j + l_val]
| (l[j + l_val + 32] << 2)
| (l[j + l_val + 64] << 4)
| (l[j + l_val + 96] << 6))
as u8;
}
}
}
Ok(())
}