in crates/ratchet-loader/src/gguf/dtype.rs [43:80]
fn transcode(
data: &[Self::GGUF_TYPE],
n_blocks: usize,
shape: Shape,
device: &Device,
) -> anyhow::Result<Tensor> {
//TODO: these should be uninit
let mut qs_bytes = Vec::with_capacity(n_blocks * QK_K);
let mut scales_bytes = Vec::with_capacity(n_blocks * K_SCALE_SIZE);
let mut dmin_bytes = Vec::with_capacity(n_blocks * 4);
let mut d_bytes = Vec::with_capacity(n_blocks * 4);
for block in data {
dmin_bytes.extend_from_slice(&block.dmin.to_f32().to_le_bytes());
d_bytes.extend_from_slice(&block.d.to_f32().to_le_bytes());
let block_qs = block.qs;
qs_bytes.extend_from_slice(bytemuck::cast_slice(&block_qs));
scales_bytes.extend_from_slice(bytemuck::cast_slice(&block.scales));
}
let _ = qs_bytes.pad_to_offset();
let _ = scales_bytes.pad_to_offset();
let _ = dmin_bytes.pad_to_offset();
let _ = d_bytes.pad_to_offset();
qs_bytes.append(&mut scales_bytes);
qs_bytes.append(&mut dmin_bytes);
qs_bytes.append(&mut d_bytes);
let casted = bytemuck::cast_slice::<u8, u32>(&qs_bytes);
unsafe {
Ok(Tensor::from_quantized::<u32, _>(
casted,
DType::Q4_KF(Q4_KF::default()),
shape,
device.clone(),
))
}
}