arrow/math/int64_neon_arm64.s (60 lines of code) (raw):
//+build !noasm !appengine
// ARROW-15172:
// (C2GOASM doesn't work correctly for Arm64)
// Partly GENERATED BY asm2plan9s.
// func _sum_int64_neon(buf unsafe.Pointer, len uintptr, res unsafe.Pointer)
TEXT ·_sum_int64_neon(SB), $0-24
MOVD buf+0(FP), R0
MOVD len+8(FP), R1
MOVD res+16(FP), R2
// The Go ABI saves the frame pointer register one word below the
// caller's frame. Make room so we don't overwrite it. Needs to stay
// 16-byte aligned
SUB $16, RSP
WORD $0xa9bf7bfd // stp x29, x30, [sp, #-16]!
WORD $0x910003fd // mov x29, sp
CBZ R1, LBB0_3
WORD $0xf1000c3f // cmp x1, #3
BHI LBB0_4
WORD $0xaa1f03e8 // mov x8, xzr
WORD $0xaa1f03e9 // mov x9, xzr
JMP LBB0_7
LBB0_3:
WORD $0xaa1f03e9 // mov x9, xzr
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
// Put the stack pointer back where it was
ADD $16, RSP
RET
LBB0_4:
WORD $0x927ef428 // and x8, x1, #0xfffffffffffffffc
WORD $0x91004009 // add x9, x0, #16
WORD $0x6f00e400 // movi v0.2d, #0000000000000000
WORD $0xaa0803ea // mov x10, x8
WORD $0x6f00e401 // movi v1.2d, #0000000000000000
LBB0_5:
WORD $0xad7f8d22 // ldp q2, q3, [x9, #-16]
WORD $0xf100114a // subs x10, x10, #4
WORD $0x91008129 // add x9, x9, #32
WORD $0x4ee08440 // add v0.2d, v2.2d, v0.2d
WORD $0x4ee18461 // add v1.2d, v3.2d, v1.2d
BNE LBB0_5
WORD $0x4ee08420 // add v0.2d, v1.2d, v0.2d
WORD $0x5ef1b800 // addp d0, v0.2d
WORD $0xeb01011f // cmp x8, x1
WORD $0x9e660009 // fmov x9, d0
BEQ LBB0_9
LBB0_7:
WORD $0x8b080c0a // add x10, x0, x8, lsl #3
WORD $0xcb080028 // sub x8, x1, x8
LBB0_8:
WORD $0xf840854b // ldr x11, [x10], #8
WORD $0xf1000508 // subs x8, x8, #1
WORD $0x8b090169 // add x9, x11, x9
BNE LBB0_8
LBB0_9:
WORD $0xf9000049 // str x9, [x2]
WORD $0xa8c17bfd // ldp x29, x30, [sp], #16
// Put the stack pointer back where it was
ADD $16, RSP
RET