-rw-r--r-- 21239 lib25519-20221222/crypto_pow/inv25519/sandy2x/fe51_nsquare.S raw
// linker define fe51_nsquare
// linker use REDMASK51
/*
This file is adapted from amd64-51/fe25519_square.s:
Adding loop to perform n squares.
*/
#include "fe51_namespace.h"
#include "consts_namespace.h"
# qhasm: int64 input_0
# qhasm: int64 input_1
# qhasm: int64 input_2
# qhasm: int64 input_3
# qhasm: int64 input_4
# qhasm: int64 input_5
# qhasm: stack64 input_6
# qhasm: stack64 input_7
# qhasm: int64 caller_r11
# qhasm: int64 caller_r12
# qhasm: int64 caller_r13
# qhasm: int64 caller_r14
# qhasm: int64 caller_r15
# qhasm: int64 caller_rbx
# qhasm: int64 caller_rbp
# qhasm: int64 r0
# qhasm: int64 r1
# qhasm: int64 r2
# qhasm: int64 r3
# qhasm: int64 r4
# qhasm: int64 x0
# qhasm: int64 x1
# qhasm: int64 x2
# qhasm: int64 x3
# qhasm: int64 x4
# qhasm: stack64 x119_stack
# qhasm: stack64 x219_stack
# qhasm: stack64 x319_stack
# qhasm: stack64 x419_stack
# qhasm: int64 squarer01
# qhasm: int64 squarer11
# qhasm: int64 squarer21
# qhasm: int64 squarer31
# qhasm: int64 squarer41
# qhasm: int64 squarerax
# qhasm: int64 squarerdx
# qhasm: int64 squaret
# qhasm: int64 squareredmask
# qhasm: int64 iters
# qhasm: stack64 r11_stack
# qhasm: stack64 r12_stack
# qhasm: stack64 r13_stack
# qhasm: stack64 r14_stack
# qhasm: stack64 r15_stack
# qhasm: stack64 rbx_stack
# qhasm: stack64 rbp_stack
# qhasm: enter fe51_nsquare
.p2align 5
.global _fe51_nsquare
.global fe51_nsquare
_fe51_nsquare:
fe51_nsquare:
mov %rsp,%r11
and $31,%r11
add $64,%r11
sub %r11,%rsp
# qhasm: r11_stack = caller_r11
# asm 1: movq <caller_r11=int64#9,>r11_stack=stack64#1
# asm 2: movq <caller_r11=%r11,>r11_stack=0(%rsp)
movq %r11,0(%rsp)
# qhasm: r12_stack = caller_r12
# asm 1: movq <caller_r12=int64#10,>r12_stack=stack64#2
# asm 2: movq <caller_r12=%r12,>r12_stack=8(%rsp)
movq %r12,8(%rsp)
# qhasm: r13_stack = caller_r13
# asm 1: movq <caller_r13=int64#11,>r13_stack=stack64#3
# asm 2: movq <caller_r13=%r13,>r13_stack=16(%rsp)
movq %r13,16(%rsp)
# qhasm: r14_stack = caller_r14
# asm 1: movq <caller_r14=int64#12,>r14_stack=stack64#4
# asm 2: movq <caller_r14=%r14,>r14_stack=24(%rsp)
movq %r14,24(%rsp)
# qhasm: r15_stack = caller_r15
# asm 1: movq <caller_r15=int64#13,>r15_stack=stack64#5
# asm 2: movq <caller_r15=%r15,>r15_stack=32(%rsp)
movq %r15,32(%rsp)
# qhasm: rbx_stack = caller_rbx
# asm 1: movq <caller_rbx=int64#14,>rbx_stack=stack64#6
# asm 2: movq <caller_rbx=%rbx,>rbx_stack=40(%rsp)
movq %rbx,40(%rsp)
# qhasm: rbp_stack = caller_rbp
# asm 1: movq <caller_rbp=int64#15,>rbp_stack=stack64#7
# asm 2: movq <caller_rbp=%rbp,>rbp_stack=48(%rsp)
movq %rbp,48(%rsp)
# qhasm: x0 = *(uint64 *)(input_1 + 0)
# asm 1: movq 0(<input_1=int64#2),>x0=int64#4
# asm 2: movq 0(<input_1=%rsi),>x0=%rcx
movq 0(%rsi),%rcx
# qhasm: x1 = *(uint64 *)(input_1 + 8)
# asm 1: movq 8(<input_1=int64#2),>x1=int64#5
# asm 2: movq 8(<input_1=%rsi),>x1=%r8
movq 8(%rsi),%r8
# qhasm: x2 = *(uint64 *)(input_1 + 16)
# asm 1: movq 16(<input_1=int64#2),>x2=int64#6
# asm 2: movq 16(<input_1=%rsi),>x2=%r9
movq 16(%rsi),%r9
# qhasm: x3 = *(uint64 *)(input_1 + 24)
# asm 1: movq 24(<input_1=int64#2),>x3=int64#7
# asm 2: movq 24(<input_1=%rsi),>x3=%rax
movq 24(%rsi),%rax
# qhasm: x4 = *(uint64 *)(input_1 + 32)
# asm 1: movq 32(<input_1=int64#2),>x4=int64#2
# asm 2: movq 32(<input_1=%rsi),>x4=%rsi
movq 32(%rsi),%rsi
# qhasm: *(uint64 *)(input_0 + 16) = x2
# asm 1: movq <x2=int64#6,16(<input_0=int64#1)
# asm 2: movq <x2=%r9,16(<input_0=%rdi)
movq %r9,16(%rdi)
# qhasm: *(uint64 *)(input_0 + 24) = x3
# asm 1: movq <x3=int64#7,24(<input_0=int64#1)
# asm 2: movq <x3=%rax,24(<input_0=%rdi)
movq %rax,24(%rdi)
# qhasm: *(uint64 *)(input_0 + 32) = x4
# asm 1: movq <x4=int64#2,32(<input_0=int64#1)
# asm 2: movq <x4=%rsi,32(<input_0=%rdi)
movq %rsi,32(%rdi)
# qhasm: iters = input_2
# asm 1: mov <input_2=int64#3,>iters=int64#2
# asm 2: mov <input_2=%rdx,>iters=%rsi
mov %rdx,%rsi
# qhasm: loop:
._loop:
# qhasm: iters -= 1
# asm 1: sub $1,<iters=int64#2
# asm 2: sub $1,<iters=%rsi
sub $1,%rsi
# qhasm: squarerax = x0
# asm 1: mov <x0=int64#4,>squarerax=int64#7
# asm 2: mov <x0=%rcx,>squarerax=%rax
mov %rcx,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * x0
# asm 1: mul <x0=int64#4
# asm 2: mul <x0=%rcx
mul %rcx
# qhasm: x0 += x0
# asm 1: add <x0=int64#4,<x0=int64#4
# asm 2: add <x0=%rcx,<x0=%rcx
add %rcx,%rcx
# qhasm: r0 = squarerax
# asm 1: mov <squarerax=int64#7,>r0=int64#6
# asm 2: mov <squarerax=%rax,>r0=%r9
mov %rax,%r9
# qhasm: squarer01 = squarerdx
# asm 1: mov <squarerdx=int64#3,>squarer01=int64#8
# asm 2: mov <squarerdx=%rdx,>squarer01=%r10
mov %rdx,%r10
# qhasm: squarerax = x0
# asm 1: mov <x0=int64#4,>squarerax=int64#7
# asm 2: mov <x0=%rcx,>squarerax=%rax
mov %rcx,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * x1
# asm 1: mul <x1=int64#5
# asm 2: mul <x1=%r8
mul %r8
# qhasm: r1 = squarerax
# asm 1: mov <squarerax=int64#7,>r1=int64#9
# asm 2: mov <squarerax=%rax,>r1=%r11
mov %rax,%r11
# qhasm: squarer11 = squarerdx
# asm 1: mov <squarerdx=int64#3,>squarer11=int64#10
# asm 2: mov <squarerdx=%rdx,>squarer11=%r12
mov %rdx,%r12
# qhasm: squarerax = x0
# asm 1: mov <x0=int64#4,>squarerax=int64#7
# asm 2: mov <x0=%rcx,>squarerax=%rax
mov %rcx,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16)
# asm 1: mulq 16(<input_0=int64#1)
# asm 2: mulq 16(<input_0=%rdi)
mulq 16(%rdi)
# qhasm: r2 = squarerax
# asm 1: mov <squarerax=int64#7,>r2=int64#11
# asm 2: mov <squarerax=%rax,>r2=%r13
mov %rax,%r13
# qhasm: squarer21 = squarerdx
# asm 1: mov <squarerdx=int64#3,>squarer21=int64#12
# asm 2: mov <squarerdx=%rdx,>squarer21=%r14
mov %rdx,%r14
# qhasm: squarerax = x0
# asm 1: mov <x0=int64#4,>squarerax=int64#7
# asm 2: mov <x0=%rcx,>squarerax=%rax
mov %rcx,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 24)
# asm 1: mulq 24(<input_0=int64#1)
# asm 2: mulq 24(<input_0=%rdi)
mulq 24(%rdi)
# qhasm: r3 = squarerax
# asm 1: mov <squarerax=int64#7,>r3=int64#13
# asm 2: mov <squarerax=%rax,>r3=%r15
mov %rax,%r15
# qhasm: squarer31 = squarerdx
# asm 1: mov <squarerdx=int64#3,>squarer31=int64#14
# asm 2: mov <squarerdx=%rdx,>squarer31=%rbx
mov %rdx,%rbx
# qhasm: squarerax = x0
# asm 1: mov <x0=int64#4,>squarerax=int64#7
# asm 2: mov <x0=%rcx,>squarerax=%rax
mov %rcx,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32)
# asm 1: mulq 32(<input_0=int64#1)
# asm 2: mulq 32(<input_0=%rdi)
mulq 32(%rdi)
# qhasm: r4 = squarerax
# asm 1: mov <squarerax=int64#7,>r4=int64#4
# asm 2: mov <squarerax=%rax,>r4=%rcx
mov %rax,%rcx
# qhasm: squarer41 = squarerdx
# asm 1: mov <squarerdx=int64#3,>squarer41=int64#15
# asm 2: mov <squarerdx=%rdx,>squarer41=%rbp
mov %rdx,%rbp
# qhasm: squarerax = x1
# asm 1: mov <x1=int64#5,>squarerax=int64#7
# asm 2: mov <x1=%r8,>squarerax=%rax
mov %r8,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * x1
# asm 1: mul <x1=int64#5
# asm 2: mul <x1=%r8
mul %r8
# qhasm: x1 += x1
# asm 1: add <x1=int64#5,<x1=int64#5
# asm 2: add <x1=%r8,<x1=%r8
add %r8,%r8
# qhasm: carry? r2 += squarerax
# asm 1: add <squarerax=int64#7,<r2=int64#11
# asm 2: add <squarerax=%rax,<r2=%r13
add %rax,%r13
# qhasm: squarer21 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer21=int64#12
# asm 2: adc <squarerdx=%rdx,<squarer21=%r14
adc %rdx,%r14
# qhasm: squarerax = x1
# asm 1: mov <x1=int64#5,>squarerax=int64#7
# asm 2: mov <x1=%r8,>squarerax=%rax
mov %r8,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16)
# asm 1: mulq 16(<input_0=int64#1)
# asm 2: mulq 16(<input_0=%rdi)
mulq 16(%rdi)
# qhasm: carry? r3 += squarerax
# asm 1: add <squarerax=int64#7,<r3=int64#13
# asm 2: add <squarerax=%rax,<r3=%r15
add %rax,%r15
# qhasm: squarer31 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer31=int64#14
# asm 2: adc <squarerdx=%rdx,<squarer31=%rbx
adc %rdx,%rbx
# qhasm: squarerax = x1
# asm 1: mov <x1=int64#5,>squarerax=int64#7
# asm 2: mov <x1=%r8,>squarerax=%rax
mov %r8,%rax
# qhasm: x1 *= 19
# asm 1: imulq $19, <x1=int64#5,<x1=int64#5
# asm 2: imulq $19, <x1=%r8,<x1=%r8
imulq $19, %r8,%r8
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 24)
# asm 1: mulq 24(<input_0=int64#1)
# asm 2: mulq 24(<input_0=%rdi)
mulq 24(%rdi)
# qhasm: carry? r4 += squarerax
# asm 1: add <squarerax=int64#7,<r4=int64#4
# asm 2: add <squarerax=%rax,<r4=%rcx
add %rax,%rcx
# qhasm: squarer41 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer41=int64#15
# asm 2: adc <squarerdx=%rdx,<squarer41=%rbp
adc %rdx,%rbp
# qhasm: squarerax = x1
# asm 1: mov <x1=int64#5,>squarerax=int64#7
# asm 2: mov <x1=%r8,>squarerax=%rax
mov %r8,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32)
# asm 1: mulq 32(<input_0=int64#1)
# asm 2: mulq 32(<input_0=%rdi)
mulq 32(%rdi)
# qhasm: carry? r0 += squarerax
# asm 1: add <squarerax=int64#7,<r0=int64#6
# asm 2: add <squarerax=%rax,<r0=%r9
add %rax,%r9
# qhasm: squarer01 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer01=int64#8
# asm 2: adc <squarerdx=%rdx,<squarer01=%r10
adc %rdx,%r10
# qhasm: squarerax = *(uint64 *)(input_0 + 16)
# asm 1: movq 16(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 16(<input_0=%rdi),>squarerax=%rax
movq 16(%rdi),%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 16)
# asm 1: mulq 16(<input_0=int64#1)
# asm 2: mulq 16(<input_0=%rdi)
mulq 16(%rdi)
# qhasm: carry? r4 += squarerax
# asm 1: add <squarerax=int64#7,<r4=int64#4
# asm 2: add <squarerax=%rax,<r4=%rcx
add %rax,%rcx
# qhasm: squarer41 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer41=int64#15
# asm 2: adc <squarerdx=%rdx,<squarer41=%rbp
adc %rdx,%rbp
# qhasm: squarer41 = (squarer41.r4) << 13
# asm 1: shld $13,<r4=int64#4,<squarer41=int64#15
# asm 2: shld $13,<r4=%rcx,<squarer41=%rbp
shld $13,%rcx,%rbp
# qhasm: squarerax = *(uint64 *)(input_0 + 16)
# asm 1: movq 16(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 16(<input_0=%rdi),>squarerax=%rax
movq 16(%rdi),%rax
# qhasm: squarerax *= 38
# asm 1: imulq $38, <squarerax=int64#7,<squarerax=int64#7
# asm 2: imulq $38, <squarerax=%rax,<squarerax=%rax
imulq $38, %rax,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 24)
# asm 1: mulq 24(<input_0=int64#1)
# asm 2: mulq 24(<input_0=%rdi)
mulq 24(%rdi)
# qhasm: carry? r0 += squarerax
# asm 1: add <squarerax=int64#7,<r0=int64#6
# asm 2: add <squarerax=%rax,<r0=%r9
add %rax,%r9
# qhasm: squarer01 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer01=int64#8
# asm 2: adc <squarerdx=%rdx,<squarer01=%r10
adc %rdx,%r10
# qhasm: squarer01 = (squarer01.r0) << 13
# asm 1: shld $13,<r0=int64#6,<squarer01=int64#8
# asm 2: shld $13,<r0=%r9,<squarer01=%r10
shld $13,%r9,%r10
# qhasm: squarerax = *(uint64 *)(input_0 + 16)
# asm 1: movq 16(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 16(<input_0=%rdi),>squarerax=%rax
movq 16(%rdi),%rax
# qhasm: squarerax *= 38
# asm 1: imulq $38, <squarerax=int64#7,<squarerax=int64#7
# asm 2: imulq $38, <squarerax=%rax,<squarerax=%rax
imulq $38, %rax,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32)
# asm 1: mulq 32(<input_0=int64#1)
# asm 2: mulq 32(<input_0=%rdi)
mulq 32(%rdi)
# qhasm: carry? r1 += squarerax
# asm 1: add <squarerax=int64#7,<r1=int64#9
# asm 2: add <squarerax=%rax,<r1=%r11
add %rax,%r11
# qhasm: squarer11 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer11=int64#10
# asm 2: adc <squarerdx=%rdx,<squarer11=%r12
adc %rdx,%r12
# qhasm: squarerax = *(uint64 *)(input_0 + 24)
# asm 1: movq 24(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 24(<input_0=%rdi),>squarerax=%rax
movq 24(%rdi),%rax
# qhasm: squarerax *= 19
# asm 1: imulq $19, <squarerax=int64#7,<squarerax=int64#7
# asm 2: imulq $19, <squarerax=%rax,<squarerax=%rax
imulq $19, %rax,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 24)
# asm 1: mulq 24(<input_0=int64#1)
# asm 2: mulq 24(<input_0=%rdi)
mulq 24(%rdi)
# qhasm: carry? r1 += squarerax
# asm 1: add <squarerax=int64#7,<r1=int64#9
# asm 2: add <squarerax=%rax,<r1=%r11
add %rax,%r11
# qhasm: squarer11 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer11=int64#10
# asm 2: adc <squarerdx=%rdx,<squarer11=%r12
adc %rdx,%r12
# qhasm: squarer11 = (squarer11.r1) << 13
# asm 1: shld $13,<r1=int64#9,<squarer11=int64#10
# asm 2: shld $13,<r1=%r11,<squarer11=%r12
shld $13,%r11,%r12
# qhasm: squarerax = *(uint64 *)(input_0 + 24)
# asm 1: movq 24(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 24(<input_0=%rdi),>squarerax=%rax
movq 24(%rdi),%rax
# qhasm: squarerax *= 38
# asm 1: imulq $38, <squarerax=int64#7,<squarerax=int64#7
# asm 2: imulq $38, <squarerax=%rax,<squarerax=%rax
imulq $38, %rax,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32)
# asm 1: mulq 32(<input_0=int64#1)
# asm 2: mulq 32(<input_0=%rdi)
mulq 32(%rdi)
# qhasm: carry? r2 += squarerax
# asm 1: add <squarerax=int64#7,<r2=int64#11
# asm 2: add <squarerax=%rax,<r2=%r13
add %rax,%r13
# qhasm: squarer21 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer21=int64#12
# asm 2: adc <squarerdx=%rdx,<squarer21=%r14
adc %rdx,%r14
# qhasm: squarer21 = (squarer21.r2) << 13
# asm 1: shld $13,<r2=int64#11,<squarer21=int64#12
# asm 2: shld $13,<r2=%r13,<squarer21=%r14
shld $13,%r13,%r14
# qhasm: squarerax = *(uint64 *)(input_0 + 32)
# asm 1: movq 32(<input_0=int64#1),>squarerax=int64#7
# asm 2: movq 32(<input_0=%rdi),>squarerax=%rax
movq 32(%rdi),%rax
# qhasm: squarerax *= 19
# asm 1: imulq $19, <squarerax=int64#7,<squarerax=int64#7
# asm 2: imulq $19, <squarerax=%rax,<squarerax=%rax
imulq $19, %rax,%rax
# qhasm: (uint128) squarerdx squarerax = squarerax * *(uint64 *)(input_0 + 32)
# asm 1: mulq 32(<input_0=int64#1)
# asm 2: mulq 32(<input_0=%rdi)
mulq 32(%rdi)
# qhasm: carry? r3 += squarerax
# asm 1: add <squarerax=int64#7,<r3=int64#13
# asm 2: add <squarerax=%rax,<r3=%r15
add %rax,%r15
# qhasm: squarer31 += squarerdx + carry
# asm 1: adc <squarerdx=int64#3,<squarer31=int64#14
# asm 2: adc <squarerdx=%rdx,<squarer31=%rbx
adc %rdx,%rbx
# qhasm: squarer31 = (squarer31.r3) << 13
# asm 1: shld $13,<r3=int64#13,<squarer31=int64#14
# asm 2: shld $13,<r3=%r15,<squarer31=%rbx
shld $13,%r15,%rbx
# qhasm: squareredmask = *(uint64 *) &REDMASK51
# asm 1: movq REDMASK51,>squareredmask=int64#3
# asm 2: movq REDMASK51,>squareredmask=%rdx
movq REDMASK51(%rip),%rdx
# qhasm: r4 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r4=int64#4
# asm 2: and <squareredmask=%rdx,<r4=%rcx
and %rdx,%rcx
# qhasm: r4 += squarer31
# asm 1: add <squarer31=int64#14,<r4=int64#4
# asm 2: add <squarer31=%rbx,<r4=%rcx
add %rbx,%rcx
# qhasm: r0 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r0=int64#6
# asm 2: and <squareredmask=%rdx,<r0=%r9
and %rdx,%r9
# qhasm: r1 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r1=int64#9
# asm 2: and <squareredmask=%rdx,<r1=%r11
and %rdx,%r11
# qhasm: r1 += squarer01
# asm 1: add <squarer01=int64#8,<r1=int64#9
# asm 2: add <squarer01=%r10,<r1=%r11
add %r10,%r11
# qhasm: r2 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r2=int64#11
# asm 2: and <squareredmask=%rdx,<r2=%r13
and %rdx,%r13
# qhasm: r2 += squarer11
# asm 1: add <squarer11=int64#10,<r2=int64#11
# asm 2: add <squarer11=%r12,<r2=%r13
add %r12,%r13
# qhasm: r3 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r3=int64#13
# asm 2: and <squareredmask=%rdx,<r3=%r15
and %rdx,%r15
# qhasm: r3 += squarer21
# asm 1: add <squarer21=int64#12,<r3=int64#13
# asm 2: add <squarer21=%r14,<r3=%r15
add %r14,%r15
# qhasm: squarer41 *= 19
# asm 1: imulq $19, <squarer41=int64#15,<squarer41=int64#15
# asm 2: imulq $19, <squarer41=%rbp,<squarer41=%rbp
imulq $19, %rbp,%rbp
# qhasm: squaret = r0 + squarer41
# asm 1: lea (<r0=int64#6,<squarer41=int64#15),>squaret=int64#6
# asm 2: lea (<r0=%r9,<squarer41=%rbp),>squaret=%r9
lea (%r9,%rbp),%r9
# qhasm: r0 = squaret
# asm 1: mov <squaret=int64#6,>r0=int64#7
# asm 2: mov <squaret=%r9,>r0=%rax
mov %r9,%rax
# qhasm: (uint64) squaret >>= 51
# asm 1: shr $51,<squaret=int64#6
# asm 2: shr $51,<squaret=%r9
shr $51,%r9
# qhasm: squaret += r1
# asm 1: add <r1=int64#9,<squaret=int64#6
# asm 2: add <r1=%r11,<squaret=%r9
add %r11,%r9
# qhasm: r0 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r0=int64#7
# asm 2: and <squareredmask=%rdx,<r0=%rax
and %rdx,%rax
# qhasm: x1 = squaret
# asm 1: mov <squaret=int64#6,>x1=int64#5
# asm 2: mov <squaret=%r9,>x1=%r8
mov %r9,%r8
# qhasm: (uint64) squaret >>= 51
# asm 1: shr $51,<squaret=int64#6
# asm 2: shr $51,<squaret=%r9
shr $51,%r9
# qhasm: squaret += r2
# asm 1: add <r2=int64#11,<squaret=int64#6
# asm 2: add <r2=%r13,<squaret=%r9
add %r13,%r9
# qhasm: x1 &= squareredmask
# asm 1: and <squareredmask=int64#3,<x1=int64#5
# asm 2: and <squareredmask=%rdx,<x1=%r8
and %rdx,%r8
# qhasm: r2 = squaret
# asm 1: mov <squaret=int64#6,>r2=int64#8
# asm 2: mov <squaret=%r9,>r2=%r10
mov %r9,%r10
# qhasm: (uint64) squaret >>= 51
# asm 1: shr $51,<squaret=int64#6
# asm 2: shr $51,<squaret=%r9
shr $51,%r9
# qhasm: squaret += r3
# asm 1: add <r3=int64#13,<squaret=int64#6
# asm 2: add <r3=%r15,<squaret=%r9
add %r15,%r9
# qhasm: r2 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r2=int64#8
# asm 2: and <squareredmask=%rdx,<r2=%r10
and %rdx,%r10
# qhasm: *(uint64 *)(input_0 + 16) = r2
# asm 1: movq <r2=int64#8,16(<input_0=int64#1)
# asm 2: movq <r2=%r10,16(<input_0=%rdi)
movq %r10,16(%rdi)
# qhasm: r3 = squaret
# asm 1: mov <squaret=int64#6,>r3=int64#8
# asm 2: mov <squaret=%r9,>r3=%r10
mov %r9,%r10
# qhasm: (uint64) squaret >>= 51
# asm 1: shr $51,<squaret=int64#6
# asm 2: shr $51,<squaret=%r9
shr $51,%r9
# qhasm: squaret += r4
# asm 1: add <r4=int64#4,<squaret=int64#6
# asm 2: add <r4=%rcx,<squaret=%r9
add %rcx,%r9
# qhasm: r3 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r3=int64#8
# asm 2: and <squareredmask=%rdx,<r3=%r10
and %rdx,%r10
# qhasm: *(uint64 *)(input_0 + 24) = r3
# asm 1: movq <r3=int64#8,24(<input_0=int64#1)
# asm 2: movq <r3=%r10,24(<input_0=%rdi)
movq %r10,24(%rdi)
# qhasm: r4 = squaret
# asm 1: mov <squaret=int64#6,>r4=int64#8
# asm 2: mov <squaret=%r9,>r4=%r10
mov %r9,%r10
# qhasm: (uint64) squaret >>= 51
# asm 1: shr $51,<squaret=int64#6
# asm 2: shr $51,<squaret=%r9
shr $51,%r9
# qhasm: squaret *= 19
# asm 1: imulq $19, <squaret=int64#6,<squaret=int64#6
# asm 2: imulq $19, <squaret=%r9,<squaret=%r9
imulq $19, %r9,%r9
# qhasm: x0 = r0 + squaret
# asm 1: lea (<r0=int64#7,<squaret=int64#6),>x0=int64#4
# asm 2: lea (<r0=%rax,<squaret=%r9),>x0=%rcx
lea (%rax,%r9),%rcx
# qhasm: r4 &= squareredmask
# asm 1: and <squareredmask=int64#3,<r4=int64#8
# asm 2: and <squareredmask=%rdx,<r4=%r10
and %rdx,%r10
# qhasm: *(uint64 *)(input_0 + 32) = r4
# asm 1: movq <r4=int64#8,32(<input_0=int64#1)
# asm 2: movq <r4=%r10,32(<input_0=%rdi)
movq %r10,32(%rdi)
# qhasm: =? iters - 0
# asm 1: cmp $0,<iters=int64#2
# asm 2: cmp $0,<iters=%rsi
cmp $0,%rsi
# comment:fp stack unchanged by jump
# qhasm: goto loop if !=
jne ._loop
# qhasm: *(uint64 *)(input_0 + 0) = x0
# asm 1: movq <x0=int64#4,0(<input_0=int64#1)
# asm 2: movq <x0=%rcx,0(<input_0=%rdi)
movq %rcx,0(%rdi)
# qhasm: *(uint64 *)(input_0 + 8) = x1
# asm 1: movq <x1=int64#5,8(<input_0=int64#1)
# asm 2: movq <x1=%r8,8(<input_0=%rdi)
movq %r8,8(%rdi)
# qhasm: caller_r11 = r11_stack
# asm 1: movq <r11_stack=stack64#1,>caller_r11=int64#9
# asm 2: movq <r11_stack=0(%rsp),>caller_r11=%r11
movq 0(%rsp),%r11
# qhasm: caller_r12 = r12_stack
# asm 1: movq <r12_stack=stack64#2,>caller_r12=int64#10
# asm 2: movq <r12_stack=8(%rsp),>caller_r12=%r12
movq 8(%rsp),%r12
# qhasm: caller_r13 = r13_stack
# asm 1: movq <r13_stack=stack64#3,>caller_r13=int64#11
# asm 2: movq <r13_stack=16(%rsp),>caller_r13=%r13
movq 16(%rsp),%r13
# qhasm: caller_r14 = r14_stack
# asm 1: movq <r14_stack=stack64#4,>caller_r14=int64#12
# asm 2: movq <r14_stack=24(%rsp),>caller_r14=%r14
movq 24(%rsp),%r14
# qhasm: caller_r15 = r15_stack
# asm 1: movq <r15_stack=stack64#5,>caller_r15=int64#13
# asm 2: movq <r15_stack=32(%rsp),>caller_r15=%r15
movq 32(%rsp),%r15
# qhasm: caller_rbx = rbx_stack
# asm 1: movq <rbx_stack=stack64#6,>caller_rbx=int64#14
# asm 2: movq <rbx_stack=40(%rsp),>caller_rbx=%rbx
movq 40(%rsp),%rbx
# qhasm: caller_rbp = rbp_stack
# asm 1: movq <rbp_stack=stack64#7,>caller_rbp=int64#15
# asm 2: movq <rbp_stack=48(%rsp),>caller_rbp=%rbp
movq 48(%rsp),%rbp
# qhasm: return
add %r11,%rsp
ret