in candle-core/src/cpu_backend/mod.rs [676:735]
fn f<T: WithDType>(&self, v1: &[T], l1: &Layout, src: &[T], src_l: &Layout) -> Result<Vec<T>> {
let dst_len = l1.shape().elem_count();
let mut dst = vec![T::zero(); dst_len];
copy_strided_src_(v1, &mut dst, 0, l1);
let src = match src_l.contiguous_offsets() {
None => Err(Error::RequiresContiguous { op: "index-add" }.bt())?,
Some((o1, o2)) => &src[o1..o2],
};
let dim = self.dim;
let max_idx = l1.dims()[dim];
let pre_dim = src_l.dims()[..dim].iter().product::<usize>();
let src_dim_sz = src_l.dims()[dim];
let post_dim = src_l.dims()[dim + 1..].iter().product::<usize>();
if dim == 0 {
for (src_idx, dst_idx) in self.ids.iter().enumerate() {
if *dst_idx == I::max_value() {
continue;
}
let dst_idx = dst_idx.as_usize();
if dst_idx >= max_idx {
Err(Error::InvalidIndex {
index: dst_idx,
op: "index-add",
size: max_idx,
})?
}
let src_idx = src_idx * post_dim;
let dst_idx = dst_idx * post_dim;
let src = &src[src_idx..src_idx + post_dim];
let dst = &mut dst[dst_idx..dst_idx + post_dim];
for (d, &s) in dst.iter_mut().zip(src.iter()) {
*d += s
}
}
} else {
for (src_idx, dst_idx) in self.ids.iter().enumerate() {
if *dst_idx == I::max_value() {
continue;
}
let dst_idx = dst_idx.as_usize();
if dst_idx >= max_idx {
Err(Error::InvalidIndex {
index: dst_idx,
op: "index-add",
size: max_idx,
})?
}
for pre_i in 0..pre_dim {
let pre_src_i = (pre_i * src_dim_sz + src_idx) * post_dim;
let pre_dst_i = (pre_i * max_idx + dst_idx) * post_dim;
let src = &src[pre_src_i..pre_src_i + post_dim];
let dst = &mut dst[pre_dst_i..pre_dst_i + post_dim];
for (d, &s) in dst.iter_mut().zip(src.iter()) {
*d += s
}
}
}
}
Ok(dst)
}