@@ -13,7 +13,21 @@ TEXT ·block(SB), NOSPLIT, $0
1313 SUBS $ 64 , R2
1414 BMI complete
1515
16+ // Load constants table pointer
17+ MOVD $·constants(SB) , R3
18+
19+ // Cache constants table in registers v16 - v31
20+ WORD $ 0x4cdf2870 // ld1 {v16.4s - v19.4s} , [ x3 ], # 64
21+ WORD $ 0x4cdf7800 // ld1 {v0.4s} , [ x0 ], # 16
22+ WORD $ 0x4cdf2874 // ld1 {v20.4s - v23.4s} , [ x3 ], # 64
23+
24+ WORD $ 0x4c407801 // ld1 {v1.4s} , [ x0 ]
25+ WORD $ 0x4cdf2878 // ld1 {v24.4s - v27.4s} , [ x3 ], # 64
26+ WORD $ 0xd1004000 // sub x0 , x0 , # 0x10
27+ WORD $ 0x4cdf287c // ld1 {v28.4s - v31.4s} , [ x3 ], # 64
28+
1629loop :
30+ // Main loop
1731 WORD $ 0x4cdf2025 // ld1 {v5.16b - v8.16b} , [ x1 ], # 64
1832 WORD $ 0x4ea01c02 // mov v2.16b , v0.16b
1933 WORD $ 0x4ea11c23 // mov v3.16b , v1.16b
@@ -115,8 +129,46 @@ loop:
115129 SUBS $ 64 , R2
116130 BPL loop
117131
132+ // Store result
118133 WORD $ 0x4c00a800 // st1 {v0.4s , v1.4s} , [ x0 ]
119134
120135complete:
121136 RET
122137
138+
139+ // Constants table
140+ DATA ·constants + 0x0 (SB)/ 8 , $ 0x71374491428a2f98
141+ DATA ·constants + 0x8 (SB)/ 8 , $ 0xe9b5dba5b5c0fbcf
142+ DATA ·constants + 0x10 (SB)/ 8 , $ 0x59f111f13956c25b
143+ DATA ·constants + 0x18 (SB)/ 8 , $ 0xab1c5ed5923f82a4
144+ DATA ·constants + 0x20 (SB)/ 8 , $ 0x12835b01d807aa98
145+ DATA ·constants + 0x28 (SB)/ 8 , $ 0x550c7dc3243185be
146+ DATA ·constants + 0x30 (SB)/ 8 , $ 0x80deb1fe72be5d74
147+ DATA ·constants + 0x38 (SB)/ 8 , $ 0xc19bf1749bdc06a7
148+ DATA ·constants + 0x40 (SB)/ 8 , $ 0xefbe4786e49b69c1
149+ DATA ·constants + 0x48 (SB)/ 8 , $ 0x240ca1cc0fc19dc6
150+ DATA ·constants + 0x50 (SB)/ 8 , $ 0x4a7484aa2de92c6f
151+ DATA ·constants + 0x58 (SB)/ 8 , $ 0x76f988da5cb0a9dc
152+ DATA ·constants + 0x60 (SB)/ 8 , $ 0xa831c66d983e5152
153+ DATA ·constants + 0x68 (SB)/ 8 , $ 0xbf597fc7b00327c8
154+ DATA ·constants + 0x70 (SB)/ 8 , $ 0xd5a79147c6e00bf3
155+ DATA ·constants + 0x78 (SB)/ 8 , $ 0x1429296706ca6351
156+ DATA ·constants + 0x80 (SB)/ 8 , $ 0x2e1b213827b70a85
157+ DATA ·constants + 0x88 (SB)/ 8 , $ 0x53380d134d2c6dfc
158+ DATA ·constants + 0x90 (SB)/ 8 , $ 0x766a0abb650a7354
159+ DATA ·constants + 0x98 (SB)/ 8 , $ 0x92722c8581c2c92e
160+ DATA ·constants + 0xa0 (SB)/ 8 , $ 0xa81a664ba2bfe8a1
161+ DATA ·constants + 0xa8 (SB)/ 8 , $ 0xc76c51a3c24b8b70
162+ DATA ·constants + 0xb0 (SB)/ 8 , $ 0xd6990624d192e819
163+ DATA ·constants + 0xb8 (SB)/ 8 , $ 0x106aa070f40e3585
164+ DATA ·constants + 0xc0 (SB)/ 8 , $ 0x1e376c0819a4c116
165+ DATA ·constants + 0xc8 (SB)/ 8 , $ 0x34b0bcb52748774c
166+ DATA ·constants + 0xd0 (SB)/ 8 , $ 0x4ed8aa4a391c0cb3
167+ DATA ·constants + 0xd8 (SB)/ 8 , $ 0x682e6ff35b9cca4f
168+ DATA ·constants + 0xe0 (SB)/ 8 , $ 0x78a5636f748f82ee
169+ DATA ·constants + 0xe8 (SB)/ 8 , $ 0x8cc7020884c87814
170+ DATA ·constants + 0xf0 (SB)/ 8 , $ 0xa4506ceb90befffa
171+ DATA ·constants + 0xf8 (SB)/ 8 , $ 0xc67178f2bef9a3f7
172+
173+ GLOBL ·constants(SB) , 8 , $ 256
174+
0 commit comments