void mul_truncate_a()

in FourQ_ARM_NEON/ARM/fp2_1271_NEON.c [1530:1667]


void mul_truncate_a(uint32_t* a, uint32_t* b, uint32_t* c) 
{ // 256-bit multiplication with truncation for the scalar decomposition
  // Outputs 64-bit value c = (uint64_t)((a * b) >> 256).

asm volatile(   
    "push         {r4-r12}                 	\n\t"

    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #0]              \n\t"       
    "mov          r8, #0                    \n\t" 
    "sub          r13, r13, #12             \n\t"    // Allocating space in the stack
    "umull        r7, r6, r5, r3            \n\t" 
    "umlal        r6, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t"      
    "mov          r9, #0                    \n\t"     
    "mov          r10, #0                   \n\t" 
    "str          r6, [r13], #4             \n\t"    // Store in stack  
    "umlal        r8, r9, r5, r3            \n\t"
    "umlal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t"      
    "mov          r11, #0                   \n\t"       
    "mov          r12, #0                   \n\t"  
    "umlal        r10, r11, r5, r3          \n\t"
    "umlal        r11, r12, r5, r4          \n\t"
    "ldm          %0!, {r3-r4}              \n\t"     
    "mov          r6, #0                    \n\t"     
    "mov          r7, #0                    \n\t" 
    "sub          %0, %0, #32               \n\t"  
    "umlal        r12, r6, r5, r3           \n\t"
    "umlal        r6, r7, r5, r4            \n\t"
    "stm          r13, {r6-r7}              \n\t"    // Store in stack 
  
    "sub          r13, r13, #4              \n\t" 
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #4]              \n\t"    
    "ldr          r6, [r13]                 \n\t"      
    "mov          r7, #0                    \n\t"   
    "umlal        r6, r7, r5, r3            \n\t" 
    "umaal        r7, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "str          r7, [r13], #4             \n\t"    // Store in stack
    "umaal        r8, r9, r5, r3            \n\t"
    "umaal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t"  
    "umaal        r10, r11, r5, r3          \n\t"
    "umaal        r11, r12, r5, r4          \n\t"
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldm          r13, {r6-r7}              \n\t" 
    "sub          %0, %0, #32               \n\t"
    "umaal        r12, r6, r5, r3           \n\t"
    "umaal        r6, r7, r5, r4            \n\t" 
    "stm          r13, {r6-r7}              \n\t"    // Store in stack 
   
    "sub          r13, r13, #4              \n\t" 
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #8]              \n\t"    
    "ldr          r7, [r13]                 \n\t"        
    "mov          r6, #0                    \n\t"    
    "umlal        r7, r6, r5, r3            \n\t" 
    "umaal        r6, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "str          r6, [r13], #4             \n\t"    // Store in stack
    "umaal        r8, r9, r5, r3            \n\t"
    "umaal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t"  
    "umaal        r10, r11, r5, r3          \n\t"
    "umaal        r11, r12, r5, r4          \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "ldm          r13, {r6-r7}              \n\t" 
    "sub          %0, %0, #32               \n\t"    
    "umaal        r12, r6, r5, r3           \n\t"
    "umaal        r6, r7, r5, r4            \n\t"
    "stm          r13, {r6-r7}              \n\t"    // Store in stack 
  
    "sub          r13, r13, #4              \n\t" 
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #12]             \n\t"    
    "ldr          r6, [r13]                 \n\t"          
    "mov          r7, #0                    \n\t"    
    "umlal        r6, r7, r5, r3            \n\t" 
    "umaal        r7, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "str          r7, [r13], #4             \n\t"    // Store in stack
    "umaal        r8, r9, r5, r3            \n\t"
    "umaal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t"  
    "umaal        r10, r11, r5, r3          \n\t"
    "umaal        r11, r12, r5, r4          \n\t"
    "ldm          %0!, {r3-r4}              \n\t"  
    "ldm          r13, {r6-r7}              \n\t"  
    "sub          %0, %0, #32               \n\t" 
    "umaal        r12, r6, r5, r3           \n\t"   

    "sub          r13, r13, #4              \n\t"    
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #16]             \n\t"   
    "ldr          r7, [r13]                 \n\t"           
    "mov          r6, #0                    \n\t"      
    "umlal        r7, r6, r5, r3            \n\t" 
    "umaal        r6, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t"
    "str          r6, [r13]                 \n\t"    // Store in stack 
    "umaal        r8, r9, r5, r3            \n\t"
    "umaal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "sub          %0, %0, #24               \n\t" 
    "umaal        r10, r11, r5, r3          \n\t"
    "umaal        r11, r12, r5, r4          \n\t"    
  
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #20]             \n\t"    
    "ldr          r6, [r13], #12            \n\t"             
    "mov          r7, #0                    \n\t"     
    "umlal        r6, r7, r5, r3            \n\t" 
    "umaal        r7, r8, r5, r4            \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "umaal        r8, r9, r5, r3            \n\t"
    "umaal        r9, r10, r5, r4           \n\t"
    "ldm          %0!, {r3-r4}              \n\t" 
    "sub          %0, %0, #24               \n\t"  
    "umaal        r10, r11, r5, r3          \n\t"        
  
    "ldm          %0!, {r3-r4}              \n\t"    
    "ldr          r5, [%1, #24]             \n\t" 
    "ldm          %0!, {r11-r12}            \n\t"        
    "mov          r6, #0                    \n\t"    
    "umlal        r7, r6, r5, r3            \n\t" 
    "umaal        r6, r8, r5, r4            \n\t"
    "umaal        r8, r9, r5, r11           \n\t"
    "umaal        r9, r10, r5, r12          \n\t"     
    "stm          %2!, {r8-r9}              \n\t" 
    "pop          {r4-r12}                 	\n\t"
    :
    :"r"(&a[0]), "r"(&b[0]), "r"(&c[0])
    :"memory","r3","r4","r5","r6","r7","r8","r9","r10","r11","r12"
	);
	return; 
}