sha256blockAvx2_amd64.s 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449
  1. //+build !noasm,!appengine
  2. // SHA256 implementation for AVX2
  3. //
  4. // Minio Cloud Storage, (C) 2016 Minio, Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. //
  18. //
  19. // This code is based on an Intel White-Paper:
  20. // "Fast SHA-256 Implementations on Intel Architecture Processors"
  21. //
  22. // together with the reference implementation from the following authors:
  23. // James Guilford <james.guilford@intel.com>
  24. // Kirk Yap <kirk.s.yap@intel.com>
  25. // Tim Chen <tim.c.chen@linux.intel.com>
  26. //
  27. // For Golang it has been converted to Plan 9 assembly with the help of
  28. // github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9
  29. // equivalents
  30. //
  31. DATA K256<>+0x000(SB)/8, $0x71374491428a2f98
  32. DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf
  33. DATA K256<>+0x010(SB)/8, $0x71374491428a2f98
  34. DATA K256<>+0x018(SB)/8, $0xe9b5dba5b5c0fbcf
  35. DATA K256<>+0x020(SB)/8, $0x59f111f13956c25b
  36. DATA K256<>+0x028(SB)/8, $0xab1c5ed5923f82a4
  37. DATA K256<>+0x030(SB)/8, $0x59f111f13956c25b
  38. DATA K256<>+0x038(SB)/8, $0xab1c5ed5923f82a4
  39. DATA K256<>+0x040(SB)/8, $0x12835b01d807aa98
  40. DATA K256<>+0x048(SB)/8, $0x550c7dc3243185be
  41. DATA K256<>+0x050(SB)/8, $0x12835b01d807aa98
  42. DATA K256<>+0x058(SB)/8, $0x550c7dc3243185be
  43. DATA K256<>+0x060(SB)/8, $0x80deb1fe72be5d74
  44. DATA K256<>+0x068(SB)/8, $0xc19bf1749bdc06a7
  45. DATA K256<>+0x070(SB)/8, $0x80deb1fe72be5d74
  46. DATA K256<>+0x078(SB)/8, $0xc19bf1749bdc06a7
  47. DATA K256<>+0x080(SB)/8, $0xefbe4786e49b69c1
  48. DATA K256<>+0x088(SB)/8, $0x240ca1cc0fc19dc6
  49. DATA K256<>+0x090(SB)/8, $0xefbe4786e49b69c1
  50. DATA K256<>+0x098(SB)/8, $0x240ca1cc0fc19dc6
  51. DATA K256<>+0x0a0(SB)/8, $0x4a7484aa2de92c6f
  52. DATA K256<>+0x0a8(SB)/8, $0x76f988da5cb0a9dc
  53. DATA K256<>+0x0b0(SB)/8, $0x4a7484aa2de92c6f
  54. DATA K256<>+0x0b8(SB)/8, $0x76f988da5cb0a9dc
  55. DATA K256<>+0x0c0(SB)/8, $0xa831c66d983e5152
  56. DATA K256<>+0x0c8(SB)/8, $0xbf597fc7b00327c8
  57. DATA K256<>+0x0d0(SB)/8, $0xa831c66d983e5152
  58. DATA K256<>+0x0d8(SB)/8, $0xbf597fc7b00327c8
  59. DATA K256<>+0x0e0(SB)/8, $0xd5a79147c6e00bf3
  60. DATA K256<>+0x0e8(SB)/8, $0x1429296706ca6351
  61. DATA K256<>+0x0f0(SB)/8, $0xd5a79147c6e00bf3
  62. DATA K256<>+0x0f8(SB)/8, $0x1429296706ca6351
  63. DATA K256<>+0x100(SB)/8, $0x2e1b213827b70a85
  64. DATA K256<>+0x108(SB)/8, $0x53380d134d2c6dfc
  65. DATA K256<>+0x110(SB)/8, $0x2e1b213827b70a85
  66. DATA K256<>+0x118(SB)/8, $0x53380d134d2c6dfc
  67. DATA K256<>+0x120(SB)/8, $0x766a0abb650a7354
  68. DATA K256<>+0x128(SB)/8, $0x92722c8581c2c92e
  69. DATA K256<>+0x130(SB)/8, $0x766a0abb650a7354
  70. DATA K256<>+0x138(SB)/8, $0x92722c8581c2c92e
  71. DATA K256<>+0x140(SB)/8, $0xa81a664ba2bfe8a1
  72. DATA K256<>+0x148(SB)/8, $0xc76c51a3c24b8b70
  73. DATA K256<>+0x150(SB)/8, $0xa81a664ba2bfe8a1
  74. DATA K256<>+0x158(SB)/8, $0xc76c51a3c24b8b70
  75. DATA K256<>+0x160(SB)/8, $0xd6990624d192e819
  76. DATA K256<>+0x168(SB)/8, $0x106aa070f40e3585
  77. DATA K256<>+0x170(SB)/8, $0xd6990624d192e819
  78. DATA K256<>+0x178(SB)/8, $0x106aa070f40e3585
  79. DATA K256<>+0x180(SB)/8, $0x1e376c0819a4c116
  80. DATA K256<>+0x188(SB)/8, $0x34b0bcb52748774c
  81. DATA K256<>+0x190(SB)/8, $0x1e376c0819a4c116
  82. DATA K256<>+0x198(SB)/8, $0x34b0bcb52748774c
  83. DATA K256<>+0x1a0(SB)/8, $0x4ed8aa4a391c0cb3
  84. DATA K256<>+0x1a8(SB)/8, $0x682e6ff35b9cca4f
  85. DATA K256<>+0x1b0(SB)/8, $0x4ed8aa4a391c0cb3
  86. DATA K256<>+0x1b8(SB)/8, $0x682e6ff35b9cca4f
  87. DATA K256<>+0x1c0(SB)/8, $0x78a5636f748f82ee
  88. DATA K256<>+0x1c8(SB)/8, $0x8cc7020884c87814
  89. DATA K256<>+0x1d0(SB)/8, $0x78a5636f748f82ee
  90. DATA K256<>+0x1d8(SB)/8, $0x8cc7020884c87814
  91. DATA K256<>+0x1e0(SB)/8, $0xa4506ceb90befffa
  92. DATA K256<>+0x1e8(SB)/8, $0xc67178f2bef9a3f7
  93. DATA K256<>+0x1f0(SB)/8, $0xa4506ceb90befffa
  94. DATA K256<>+0x1f8(SB)/8, $0xc67178f2bef9a3f7
  95. DATA K256<>+0x200(SB)/8, $0x0405060700010203
  96. DATA K256<>+0x208(SB)/8, $0x0c0d0e0f08090a0b
  97. DATA K256<>+0x210(SB)/8, $0x0405060700010203
  98. DATA K256<>+0x218(SB)/8, $0x0c0d0e0f08090a0b
  99. DATA K256<>+0x220(SB)/8, $0x0b0a090803020100
  100. DATA K256<>+0x228(SB)/8, $0xffffffffffffffff
  101. DATA K256<>+0x230(SB)/8, $0x0b0a090803020100
  102. DATA K256<>+0x238(SB)/8, $0xffffffffffffffff
  103. DATA K256<>+0x240(SB)/8, $0xffffffffffffffff
  104. DATA K256<>+0x248(SB)/8, $0x0b0a090803020100
  105. DATA K256<>+0x250(SB)/8, $0xffffffffffffffff
  106. DATA K256<>+0x258(SB)/8, $0x0b0a090803020100
  107. GLOBL K256<>(SB), 8, $608
  108. // We need 0x220 stack space aligned on a 512 boundary, so for the
  109. // worstcase-aligned SP we need twice this amount, being 1088 (=0x440)
  110. //
  111. // SP aligned end-aligned stacksize
  112. // 100013d0 10001400 10001620 592
  113. // 100013d8 10001400 10001620 584
  114. // 100013e0 10001600 10001820 1088
  115. // 100013e8 10001600 10001820 1080
  116. // func blockAvx2(h []uint32, message []uint8)
  117. TEXT ·blockAvx2(SB),$1088-48
  118. MOVQ h+0(FP), DI // DI: &h
  119. MOVQ message_base+24(FP), SI // SI: &message
  120. MOVQ message_len+32(FP), DX // len(message)
  121. ADDQ SI, DX // end pointer of input
  122. MOVQ SP, R11 // copy stack pointer
  123. ADDQ $0x220, SP // sp += 0x220
  124. ANDQ $0xfffffffffffffe00, SP // align stack frame
  125. ADDQ $0x1c0, SP
  126. MOVQ DI, 0x40(SP) // save ctx
  127. MOVQ SI, 0x48(SP) // save input
  128. MOVQ DX, 0x50(SP) // save end pointer
  129. MOVQ R11, 0x58(SP) // save copy of stack pointer
  130. WORD $0xf8c5; BYTE $0x77 // vzeroupper
  131. ADDQ $0x40, SI // input++
  132. MOVL (DI), AX
  133. MOVQ SI, R12 // borrow $T1
  134. MOVL 4(DI), BX
  135. CMPQ SI, DX // $_end
  136. MOVL 8(DI), CX
  137. LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */
  138. MOVL 12(DI), DX
  139. MOVL 16(DI), R8
  140. MOVL 20(DI), R9
  141. MOVL 24(DI), R10
  142. MOVL 28(DI), R11
  143. LEAQ K256<>(SB), BP
  144. LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */
  145. LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */
  146. LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */
  147. loop0:
  148. LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10
  149. // Load first 16 dwords from two blocks
  150. MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40]
  151. MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30]
  152. MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20]
  153. MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10]
  154. // Byte swap data and transpose data into high/low
  155. LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1
  156. LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1
  157. LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7
  158. LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1
  159. LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7
  160. LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1
  161. LEAQ K256<>(SB), BP
  162. LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7
  163. LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp]
  164. LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7
  165. LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp]
  166. LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp]
  167. LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp]
  168. LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4
  169. XORQ R14, R14
  170. LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5
  171. ADDQ $-0x40, SP
  172. MOVQ BX, DI
  173. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  174. XORQ CX, DI // magic
  175. LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7
  176. MOVQ R9, R12
  177. ADDQ $0x80, BP
  178. loop1:
  179. // Schedule 48 input dwords, by doing 3 rounds of 12 each
  180. // Note: SIMD instructions are interleaved with the SHA calculations
  181. ADDQ $-0x40, SP
  182. LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4
  183. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
  184. LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
  185. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  186. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  187. LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4
  188. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  189. LONG $0x30048d42 // lea eax,[rax+r14*1]
  190. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  191. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  192. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  193. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  194. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  195. LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7
  196. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  197. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  198. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  199. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  200. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  201. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  202. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  203. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  204. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  205. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  206. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  207. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  208. WORD $0x2144; BYTE $0xff // and edi,r15d
  209. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  210. WORD $0xdf31 // xor edi,ebx
  211. LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa
  212. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  213. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  214. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  215. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  216. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
  217. LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
  218. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  219. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  220. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  221. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  222. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  223. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  224. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  225. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  226. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  227. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  228. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  229. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  230. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  231. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  232. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  233. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  234. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  235. WORD $0xc731 // xor edi,eax
  236. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  237. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  238. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  239. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  240. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  241. WORD $0x2141; BYTE $0xff // and r15d,edi
  242. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  243. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  244. LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4
  245. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  246. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  247. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  248. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  249. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
  250. LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
  251. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  252. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  253. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  254. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  255. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  256. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  257. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  258. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  259. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  260. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  261. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  262. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  263. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  264. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  265. LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
  266. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  267. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  268. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  269. LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50
  270. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  271. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  272. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  273. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  274. WORD $0x2144; BYTE $0xff // and edi,r15d
  275. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  276. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  277. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  278. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  279. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  280. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  281. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  282. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
  283. LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
  284. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  285. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  286. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  287. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  288. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  289. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  290. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  291. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  292. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  293. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  294. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  295. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  296. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  297. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  298. LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6
  299. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  300. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  301. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  302. LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0]
  303. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  304. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  305. LONG $0x00048d42 // lea eax,[rax+r8*1]
  306. WORD $0x2141; BYTE $0xff // and r15d,edi
  307. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  308. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  309. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  310. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  311. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  312. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  313. LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4
  314. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
  315. LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
  316. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  317. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  318. LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4
  319. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  320. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  321. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  322. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  323. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  324. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  325. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  326. LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7
  327. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  328. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  329. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  330. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  331. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  332. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  333. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  334. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  335. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  336. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  337. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  338. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  339. WORD $0x2144; BYTE $0xff // and edi,r15d
  340. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  341. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  342. LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa
  343. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  344. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  345. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  346. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  347. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
  348. LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
  349. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  350. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  351. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  352. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  353. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  354. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  355. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  356. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  357. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  358. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  359. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  360. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  361. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  362. WORD $0xd789 // mov edi,edx
  363. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  364. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  365. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  366. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  367. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  368. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  369. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  370. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  371. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  372. WORD $0x2141; BYTE $0xff // and r15d,edi
  373. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  374. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  375. LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4
  376. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  377. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  378. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  379. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  380. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
  381. LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
  382. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  383. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  384. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  385. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  386. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  387. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  388. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  389. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  390. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  391. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  392. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  393. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  394. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  395. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  396. LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
  397. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  398. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  399. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  400. LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50
  401. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  402. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  403. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  404. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  405. WORD $0x2144; BYTE $0xff // and edi,r15d
  406. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  407. WORD $0xd731 // xor edi,edx
  408. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  409. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  410. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  411. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  412. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  413. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
  414. LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
  415. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  416. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  417. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  418. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  419. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  420. LONG $0x20048d42 // lea eax,[rax+r12*1]
  421. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  422. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  423. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  424. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  425. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  426. LONG $0x20048d42 // lea eax,[rax+r12*1]
  427. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  428. WORD $0xdf89 // mov edi,ebx
  429. LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6
  430. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  431. LONG $0x28048d42 // lea eax,[rax+r13*1]
  432. WORD $0xcf31 // xor edi,ecx
  433. LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20]
  434. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  435. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  436. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  437. WORD $0x2141; BYTE $0xff // and r15d,edi
  438. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  439. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  440. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  441. LONG $0x38048d42 // lea eax,[rax+r15*1]
  442. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  443. LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
  444. LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40]
  445. LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4
  446. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80)
  447. LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80]
  448. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  449. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  450. LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4
  451. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  452. LONG $0x30048d42 // lea eax,[rax+r14*1]
  453. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  454. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  455. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  456. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  457. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  458. LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7
  459. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  460. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  461. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  462. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  463. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  464. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  465. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  466. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  467. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  468. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  469. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  470. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  471. WORD $0x2144; BYTE $0xff // and edi,r15d
  472. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  473. WORD $0xdf31 // xor edi,ebx
  474. LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa
  475. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  476. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  477. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  478. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  479. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84)
  480. LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84]
  481. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  482. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  483. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  484. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  485. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  486. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  487. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  488. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  489. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  490. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  491. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  492. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  493. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  494. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  495. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  496. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  497. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  498. WORD $0xc731 // xor edi,eax
  499. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  500. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  501. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  502. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  503. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  504. WORD $0x2141; BYTE $0xff // and r15d,edi
  505. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  506. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  507. LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4
  508. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  509. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  510. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  511. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  512. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88)
  513. LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88]
  514. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  515. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  516. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  517. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  518. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  519. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  520. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  521. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  522. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  523. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  524. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  525. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  526. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  527. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  528. LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
  529. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  530. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  531. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  532. LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50
  533. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  534. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  535. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  536. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  537. WORD $0x2144; BYTE $0xff // and edi,r15d
  538. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  539. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  540. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  541. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  542. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  543. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  544. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  545. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c)
  546. LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c]
  547. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  548. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  549. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  550. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  551. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  552. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  553. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  554. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  555. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  556. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  557. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  558. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  559. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  560. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  561. LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6
  562. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  563. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  564. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  565. LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40]
  566. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  567. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  568. LONG $0x00048d42 // lea eax,[rax+r8*1]
  569. WORD $0x2141; BYTE $0xff // and r15d,edi
  570. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  571. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  572. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  573. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  574. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  575. LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6
  576. LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4
  577. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0)
  578. LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0]
  579. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  580. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  581. LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4
  582. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  583. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  584. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  585. LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7
  586. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  587. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  588. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  589. LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7
  590. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  591. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  592. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  593. LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3
  594. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  595. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  596. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  597. LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe
  598. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  599. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  600. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  601. LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6
  602. WORD $0x2144; BYTE $0xff // and edi,r15d
  603. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  604. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  605. LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa
  606. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  607. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  608. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  609. LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb
  610. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4)
  611. LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4]
  612. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  613. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  614. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  615. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  616. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  617. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  618. LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb
  619. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  620. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  621. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  622. LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6
  623. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  624. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  625. WORD $0xd789 // mov edi,edx
  626. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  627. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  628. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  629. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  630. LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5
  631. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  632. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  633. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  634. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  635. WORD $0x2141; BYTE $0xff // and r15d,edi
  636. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  637. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  638. LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4
  639. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  640. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  641. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  642. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  643. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8)
  644. LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8]
  645. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  646. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  647. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  648. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  649. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  650. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  651. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  652. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  653. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  654. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  655. LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8
  656. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  657. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  658. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  659. LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
  660. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  661. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  662. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  663. LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50
  664. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  665. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  666. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  667. LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa
  668. WORD $0x2144; BYTE $0xff // and edi,r15d
  669. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  670. WORD $0xd731 // xor edi,edx
  671. LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11
  672. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  673. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  674. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  675. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  676. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac)
  677. LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac]
  678. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  679. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  680. LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2
  681. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  682. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  683. LONG $0x20048d42 // lea eax,[rax+r12*1]
  684. LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7
  685. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  686. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  687. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  688. LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9
  689. LONG $0x20048d42 // lea eax,[rax+r12*1]
  690. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  691. WORD $0xdf89 // mov edi,ebx
  692. LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6
  693. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  694. LONG $0x28048d42 // lea eax,[rax+r13*1]
  695. WORD $0xcf31 // xor edi,ecx
  696. LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60]
  697. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  698. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  699. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  700. WORD $0x2141; BYTE $0xff // and r15d,edi
  701. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  702. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  703. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  704. LONG $0x38048d42 // lea eax,[rax+r15*1]
  705. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  706. LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6
  707. ADDQ $0x80, BP
  708. CMPB 0x3(BP), $0x0
  709. JNE loop1
  710. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40)
  711. LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40]
  712. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  713. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  714. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  715. LONG $0x30048d42 // lea eax,[rax+r14*1]
  716. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  717. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  718. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  719. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  720. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  721. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  722. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  723. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  724. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  725. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  726. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  727. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  728. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  729. WORD $0x2144; BYTE $0xff // and edi,r15d
  730. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  731. WORD $0xdf31 // xor edi,ebx
  732. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  733. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  734. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  735. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44)
  736. LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44]
  737. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  738. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  739. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  740. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  741. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  742. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  743. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  744. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  745. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  746. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  747. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  748. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  749. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  750. WORD $0xc731 // xor edi,eax
  751. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  752. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  753. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  754. WORD $0x2141; BYTE $0xff // and r15d,edi
  755. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  756. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  757. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  758. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  759. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  760. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48)
  761. LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48]
  762. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  763. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  764. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  765. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  766. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  767. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  768. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  769. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  770. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  771. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  772. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  773. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  774. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  775. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  776. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  777. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  778. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  779. WORD $0x2144; BYTE $0xff // and edi,r15d
  780. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  781. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  782. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  783. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  784. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  785. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c)
  786. LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c]
  787. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  788. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  789. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  790. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  791. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  792. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  793. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  794. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  795. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  796. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  797. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  798. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  799. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  800. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  801. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  802. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  803. LONG $0x00048d42 // lea eax,[rax+r8*1]
  804. WORD $0x2141; BYTE $0xff // and r15d,edi
  805. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  806. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  807. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  808. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  809. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  810. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60)
  811. LONG $0x60245403 // add edx,[rsp+0x60]
  812. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  813. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  814. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  815. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  816. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  817. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  818. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  819. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  820. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  821. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  822. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  823. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  824. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  825. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  826. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  827. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  828. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  829. WORD $0x2144; BYTE $0xff // and edi,r15d
  830. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  831. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  832. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  833. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  834. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  835. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64)
  836. LONG $0x64244c03 // add ecx,[rsp+0x64]
  837. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  838. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  839. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  840. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  841. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  842. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  843. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  844. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  845. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  846. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  847. WORD $0xd789 // mov edi,edx
  848. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  849. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  850. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  851. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  852. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  853. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  854. WORD $0x2141; BYTE $0xff // and r15d,edi
  855. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  856. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  857. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  858. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  859. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  860. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68)
  861. LONG $0x68245c03 // add ebx,[rsp+0x68]
  862. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  863. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  864. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  865. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  866. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  867. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  868. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  869. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  870. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  871. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  872. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  873. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  874. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  875. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  876. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  877. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  878. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  879. WORD $0x2144; BYTE $0xff // and edi,r15d
  880. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  881. WORD $0xd731 // xor edi,edx
  882. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  883. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  884. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  885. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c)
  886. LONG $0x6c244403 // add eax,[rsp+0x6c]
  887. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  888. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  889. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  890. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  891. LONG $0x20048d42 // lea eax,[rax+r12*1]
  892. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  893. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  894. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  895. LONG $0x20048d42 // lea eax,[rax+r12*1]
  896. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  897. WORD $0xdf89 // mov edi,ebx
  898. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  899. LONG $0x28048d42 // lea eax,[rax+r13*1]
  900. WORD $0xcf31 // xor edi,ecx
  901. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  902. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  903. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  904. WORD $0x2141; BYTE $0xff // and r15d,edi
  905. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  906. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  907. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  908. LONG $0x38048d42 // lea eax,[rax+r15*1]
  909. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  910. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00)
  911. LONG $0x241c0344 // add r11d,[rsp]
  912. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  913. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  914. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  915. LONG $0x30048d42 // lea eax,[rax+r14*1]
  916. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  917. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  918. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  919. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  920. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  921. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  922. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  923. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  924. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  925. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  926. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  927. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  928. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  929. WORD $0x2144; BYTE $0xff // and edi,r15d
  930. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  931. WORD $0xdf31 // xor edi,ebx
  932. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  933. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  934. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  935. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04)
  936. LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4]
  937. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  938. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  939. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  940. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  941. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  942. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  943. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  944. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  945. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  946. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  947. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  948. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  949. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  950. WORD $0xc731 // xor edi,eax
  951. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  952. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  953. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  954. WORD $0x2141; BYTE $0xff // and r15d,edi
  955. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  956. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  957. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  958. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  959. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  960. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08)
  961. LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8]
  962. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  963. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  964. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  965. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  966. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  967. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  968. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  969. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  970. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  971. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  972. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  973. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  974. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  975. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  976. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  977. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  978. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  979. WORD $0x2144; BYTE $0xff // and edi,r15d
  980. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  981. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  982. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  983. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  984. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  985. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c)
  986. LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc]
  987. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  988. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  989. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  990. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  991. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  992. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  993. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  994. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  995. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  996. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  997. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  998. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  999. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  1000. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  1001. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  1002. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  1003. LONG $0x00048d42 // lea eax,[rax+r8*1]
  1004. WORD $0x2141; BYTE $0xff // and r15d,edi
  1005. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1006. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  1007. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1008. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  1009. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  1010. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20)
  1011. LONG $0x20245403 // add edx,[rsp+0x20]
  1012. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  1013. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  1014. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  1015. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  1016. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1017. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  1018. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1019. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  1020. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1021. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1022. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  1023. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  1024. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  1025. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  1026. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  1027. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  1028. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  1029. WORD $0x2144; BYTE $0xff // and edi,r15d
  1030. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1031. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  1032. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1033. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  1034. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  1035. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24)
  1036. LONG $0x24244c03 // add ecx,[rsp+0x24]
  1037. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  1038. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  1039. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  1040. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  1041. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1042. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  1043. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1044. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  1045. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1046. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1047. WORD $0xd789 // mov edi,edx
  1048. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  1049. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  1050. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  1051. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  1052. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  1053. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  1054. WORD $0x2141; BYTE $0xff // and r15d,edi
  1055. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1056. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  1057. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1058. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  1059. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  1060. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28)
  1061. LONG $0x28245c03 // add ebx,[rsp+0x28]
  1062. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  1063. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  1064. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  1065. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  1066. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1067. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  1068. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1069. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  1070. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1071. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1072. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  1073. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  1074. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  1075. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  1076. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  1077. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  1078. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  1079. WORD $0x2144; BYTE $0xff // and edi,r15d
  1080. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1081. WORD $0xd731 // xor edi,edx
  1082. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1083. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  1084. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  1085. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c)
  1086. LONG $0x2c244403 // add eax,[rsp+0x2c]
  1087. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  1088. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  1089. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  1090. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  1091. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1092. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  1093. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1094. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  1095. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1096. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1097. WORD $0xdf89 // mov edi,ebx
  1098. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  1099. LONG $0x28048d42 // lea eax,[rax+r13*1]
  1100. WORD $0xcf31 // xor edi,ecx
  1101. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  1102. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  1103. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  1104. WORD $0x2141; BYTE $0xff // and r15d,edi
  1105. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1106. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  1107. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1108. LONG $0x38048d42 // lea eax,[rax+r15*1]
  1109. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  1110. MOVQ 0x200(SP), DI // $_ctx
  1111. ADDQ R14, AX
  1112. LEAQ 0x1c0(SP), BP
  1113. ADDL (DI), AX
  1114. ADDL 4(DI), BX
  1115. ADDL 8(DI), CX
  1116. ADDL 12(DI), DX
  1117. ADDL 16(DI), R8
  1118. ADDL 20(DI), R9
  1119. ADDL 24(DI), R10
  1120. ADDL 28(DI), R11
  1121. MOVL AX, (DI)
  1122. MOVL BX, 4(DI)
  1123. MOVL CX, 8(DI)
  1124. MOVL DX, 12(DI)
  1125. MOVL R8, 16(DI)
  1126. MOVL R9, 20(DI)
  1127. MOVL R10, 24(DI)
  1128. MOVL R11, 28(DI)
  1129. CMPQ SI, 0x50(BP) // $_end
  1130. JE done
  1131. XORQ R14, R14
  1132. MOVQ BX, DI
  1133. XORQ CX, DI // magic
  1134. MOVQ R9, R12
  1135. loop2:
  1136. // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10)
  1137. LONG $0x105d0344 // add r11d,[rbp+0x10]
  1138. WORD $0x2145; BYTE $0xc4 // and r12d,r8d
  1139. LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19
  1140. LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb
  1141. LONG $0x30048d42 // lea eax,[rax+r14*1]
  1142. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  1143. LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d
  1144. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1145. LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6
  1146. LONG $0x231c8d47 // lea r11d,[r11+r12*1]
  1147. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1148. WORD $0x8941; BYTE $0xc7 // mov r15d,eax
  1149. LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16
  1150. LONG $0x2b1c8d47 // lea r11d,[r11+r13*1]
  1151. WORD $0x3141; BYTE $0xdf // xor r15d,ebx
  1152. LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd
  1153. LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2
  1154. LONG $0x1a148d42 // lea edx,[rdx+r11*1]
  1155. WORD $0x2144; BYTE $0xff // and edi,r15d
  1156. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1157. WORD $0xdf31 // xor edi,ebx
  1158. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1159. LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1]
  1160. WORD $0x8945; BYTE $0xc4 // mov r12d,r8d
  1161. // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14)
  1162. LONG $0x14550344 // add r10d,[rbp+0x14]
  1163. WORD $0x2141; BYTE $0xd4 // and r12d,edx
  1164. LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19
  1165. LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb
  1166. LONG $0x331c8d47 // lea r11d,[r11+r14*1]
  1167. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  1168. LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d
  1169. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1170. LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6
  1171. LONG $0x22148d47 // lea r10d,[r10+r12*1]
  1172. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1173. WORD $0x8944; BYTE $0xdf // mov edi,r11d
  1174. LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16
  1175. LONG $0x2a148d47 // lea r10d,[r10+r13*1]
  1176. WORD $0xc731 // xor edi,eax
  1177. LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd
  1178. LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2
  1179. LONG $0x110c8d42 // lea ecx,[rcx+r10*1]
  1180. WORD $0x2141; BYTE $0xff // and r15d,edi
  1181. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1182. WORD $0x3141; BYTE $0xc7 // xor r15d,eax
  1183. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1184. LONG $0x3a148d47 // lea r10d,[r10+r15*1]
  1185. WORD $0x8941; BYTE $0xd4 // mov r12d,edx
  1186. // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18)
  1187. LONG $0x184d0344 // add r9d,[rbp+0x18]
  1188. WORD $0x2141; BYTE $0xcc // and r12d,ecx
  1189. LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19
  1190. LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb
  1191. LONG $0x32148d47 // lea r10d,[r10+r14*1]
  1192. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  1193. LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d
  1194. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1195. LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6
  1196. LONG $0x210c8d47 // lea r9d,[r9+r12*1]
  1197. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1198. WORD $0x8945; BYTE $0xd7 // mov r15d,r10d
  1199. LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16
  1200. LONG $0x290c8d47 // lea r9d,[r9+r13*1]
  1201. WORD $0x3145; BYTE $0xdf // xor r15d,r11d
  1202. LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd
  1203. LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2
  1204. LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1]
  1205. WORD $0x2144; BYTE $0xff // and edi,r15d
  1206. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1207. WORD $0x3144; BYTE $0xdf // xor edi,r11d
  1208. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1209. LONG $0x390c8d45 // lea r9d,[r9+rdi*1]
  1210. WORD $0x8941; BYTE $0xcc // mov r12d,ecx
  1211. // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c)
  1212. LONG $0x1c450344 // add r8d,[rbp+0x1c]
  1213. WORD $0x2141; BYTE $0xdc // and r12d,ebx
  1214. LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19
  1215. LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb
  1216. LONG $0x310c8d47 // lea r9d,[r9+r14*1]
  1217. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  1218. LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx
  1219. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1220. LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6
  1221. LONG $0x20048d47 // lea r8d,[r8+r12*1]
  1222. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1223. WORD $0x8944; BYTE $0xcf // mov edi,r9d
  1224. LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16
  1225. LONG $0x28048d47 // lea r8d,[r8+r13*1]
  1226. WORD $0x3144; BYTE $0xd7 // xor edi,r10d
  1227. LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd
  1228. LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2
  1229. LONG $0x00048d42 // lea eax,[rax+r8*1]
  1230. WORD $0x2141; BYTE $0xff // and r15d,edi
  1231. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1232. WORD $0x3145; BYTE $0xd7 // xor r15d,r10d
  1233. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1234. LONG $0x38048d47 // lea r8d,[r8+r15*1]
  1235. WORD $0x8941; BYTE $0xdc // mov r12d,ebx
  1236. // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30)
  1237. WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30]
  1238. WORD $0x2141; BYTE $0xc4 // and r12d,eax
  1239. LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19
  1240. LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb
  1241. LONG $0x30048d47 // lea r8d,[r8+r14*1]
  1242. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1243. LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx
  1244. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1245. LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6
  1246. LONG $0x22148d42 // lea edx,[rdx+r12*1]
  1247. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1248. WORD $0x8945; BYTE $0xc7 // mov r15d,r8d
  1249. LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16
  1250. LONG $0x2a148d42 // lea edx,[rdx+r13*1]
  1251. WORD $0x3145; BYTE $0xcf // xor r15d,r9d
  1252. LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd
  1253. LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2
  1254. LONG $0x131c8d45 // lea r11d,[r11+rdx*1]
  1255. WORD $0x2144; BYTE $0xff // and edi,r15d
  1256. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1257. WORD $0x3144; BYTE $0xcf // xor edi,r9d
  1258. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1259. WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1]
  1260. WORD $0x8941; BYTE $0xc4 // mov r12d,eax
  1261. // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34)
  1262. WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34]
  1263. WORD $0x2145; BYTE $0xdc // and r12d,r11d
  1264. LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19
  1265. LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb
  1266. LONG $0x32148d42 // lea edx,[rdx+r14*1]
  1267. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1268. LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx
  1269. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1270. LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6
  1271. LONG $0x210c8d42 // lea ecx,[rcx+r12*1]
  1272. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1273. WORD $0xd789 // mov edi,edx
  1274. LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16
  1275. LONG $0x290c8d42 // lea ecx,[rcx+r13*1]
  1276. WORD $0x3144; BYTE $0xc7 // xor edi,r8d
  1277. LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd
  1278. LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2
  1279. LONG $0x0a148d45 // lea r10d,[r10+rcx*1]
  1280. WORD $0x2141; BYTE $0xff // and r15d,edi
  1281. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1282. WORD $0x3145; BYTE $0xc7 // xor r15d,r8d
  1283. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1284. LONG $0x390c8d42 // lea ecx,[rcx+r15*1]
  1285. WORD $0x8945; BYTE $0xdc // mov r12d,r11d
  1286. // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38)
  1287. WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38]
  1288. WORD $0x2145; BYTE $0xd4 // and r12d,r10d
  1289. LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19
  1290. LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb
  1291. LONG $0x310c8d42 // lea ecx,[rcx+r14*1]
  1292. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1293. LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax
  1294. WORD $0x3145; BYTE $0xfd // xor r13d,r15d
  1295. LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6
  1296. LONG $0x231c8d42 // lea ebx,[rbx+r12*1]
  1297. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1298. WORD $0x8941; BYTE $0xcf // mov r15d,ecx
  1299. LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16
  1300. LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1]
  1301. WORD $0x3141; BYTE $0xd7 // xor r15d,edx
  1302. LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd
  1303. LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2
  1304. LONG $0x190c8d45 // lea r9d,[r9+rbx*1]
  1305. WORD $0x2144; BYTE $0xff // and edi,r15d
  1306. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1307. WORD $0xd731 // xor edi,edx
  1308. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1309. WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1]
  1310. WORD $0x8945; BYTE $0xd4 // mov r12d,r10d
  1311. // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c)
  1312. WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c]
  1313. WORD $0x2145; BYTE $0xcc // and r12d,r9d
  1314. LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19
  1315. LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb
  1316. LONG $0x331c8d42 // lea ebx,[rbx+r14*1]
  1317. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1318. LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d
  1319. WORD $0x3141; BYTE $0xfd // xor r13d,edi
  1320. LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6
  1321. LONG $0x20048d42 // lea eax,[rax+r12*1]
  1322. WORD $0x3145; BYTE $0xf5 // xor r13d,r14d
  1323. WORD $0xdf89 // mov edi,ebx
  1324. LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16
  1325. LONG $0x28048d42 // lea eax,[rax+r13*1]
  1326. WORD $0xcf31 // xor edi,ecx
  1327. LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd
  1328. LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2
  1329. LONG $0x00048d45 // lea r8d,[r8+rax*1]
  1330. WORD $0x2141; BYTE $0xff // and r15d,edi
  1331. WORD $0x3145; BYTE $0xe6 // xor r14d,r12d
  1332. WORD $0x3141; BYTE $0xcf // xor r15d,ecx
  1333. WORD $0x3145; BYTE $0xee // xor r14d,r13d
  1334. LONG $0x38048d42 // lea eax,[rax+r15*1]
  1335. WORD $0x8945; BYTE $0xcc // mov r12d,r9d
  1336. ADDQ $-0x40, BP
  1337. CMPQ BP, SP
  1338. JAE loop2
  1339. MOVQ 0x200(SP), DI // $_ctx
  1340. ADDQ R14, AX
  1341. ADDQ $0x1c0, SP
  1342. ADDL (DI), AX
  1343. ADDL 4(DI), BX
  1344. ADDL 8(DI), CX
  1345. ADDL 12(DI), DX
  1346. ADDL 16(DI), R8
  1347. ADDL 20(DI), R9
  1348. ADDQ $0x80, SI // input += 2
  1349. ADDL 24(DI), R10
  1350. MOVQ SI, R12
  1351. ADDL 28(DI), R11
  1352. CMPQ SI, 0x50(SP) // input == _end
  1353. MOVL AX, (DI)
  1354. LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */
  1355. MOVL AX, (DI)
  1356. MOVL BX, 4(DI)
  1357. MOVL CX, 8(DI)
  1358. MOVL DX, 12(DI)
  1359. MOVL R8, 16(DI)
  1360. MOVL R9, 20(DI)
  1361. MOVL R10, 24(DI)
  1362. MOVL R11, 28(DI)
  1363. JBE loop0
  1364. LEAQ (SP), BP
  1365. done:
  1366. MOVQ BP, SP
  1367. MOVQ 0x58(SP), SP // restore saved stack pointer
  1368. WORD $0xf8c5; BYTE $0x77 // vzeroupper
  1369. RET