in crates/ratchet-loader/src/gguf/dtype.rs [87:124]
fn transcode(
data: &[Self::GGUF_TYPE],
n_blocks: usize,
shape: Shape,
device: &Device,
) -> anyhow::Result<Tensor> {
//TODO: these should be uninit
let mut qs_bytes = Vec::with_capacity(n_blocks * QK_K);
let mut scales_bytes = Vec::with_capacity(n_blocks * K_SCALE_SIZE);
let mut dmin_bytes = Vec::with_capacity(n_blocks * 2);
let mut d_bytes = Vec::with_capacity(n_blocks * 2);
for block in data {
dmin_bytes.extend_from_slice(&block.dmin.to_le_bytes());
d_bytes.extend_from_slice(&block.d.to_le_bytes());
let block_qs = block.qs;
qs_bytes.extend_from_slice(bytemuck::cast_slice(&block_qs));
scales_bytes.extend_from_slice(bytemuck::cast_slice(&block.scales));
}
let _ = qs_bytes.pad_to_offset();
let _ = scales_bytes.pad_to_offset();
let _ = dmin_bytes.pad_to_offset();
let _ = d_bytes.pad_to_offset();
qs_bytes.append(&mut scales_bytes);
qs_bytes.append(&mut dmin_bytes);
qs_bytes.append(&mut d_bytes);
let casted = bytemuck::cast_slice::<u8, u32>(&qs_bytes);
unsafe {
Ok(Tensor::from_quantized::<u32, _>(
casted,
DType::Q4_KH(Q4_KH::default()),
shape,
device.clone(),
))
}
}