sha256block_arm64.s 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192
  1. //+build !noasm,!appengine
  2. // ARM64 version of SHA256
  3. //
  4. // Minio Cloud Storage, (C) 2016 Minio, Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License");
  7. // you may not use this file except in compliance with the License.
  8. // You may obtain a copy of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS,
  14. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. // See the License for the specific language governing permissions and
  16. // limitations under the License.
  17. //
  18. //
  19. // Based on implementation as found in https://github.com/jocover/sha256-armv8
  20. //
  21. // Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
  22. // their Plan9 equivalents
  23. //
  24. TEXT ·blockArm(SB), 7, $0
  25. MOVD h+0(FP), R0
  26. MOVD message+24(FP), R1
  27. MOVD message_len+32(FP), R2 // length of message
  28. SUBS $64, R2
  29. BMI complete
  30. // Load constants table pointer
  31. MOVD $·constants(SB), R3
  32. // Cache constants table in registers v16 - v31
  33. WORD $0x4cdf2870 // ld1 {v16.4s-v19.4s}, [x3], #64
  34. WORD $0x4cdf7800 // ld1 {v0.4s}, [x0], #16
  35. WORD $0x4cdf2874 // ld1 {v20.4s-v23.4s}, [x3], #64
  36. WORD $0x4c407801 // ld1 {v1.4s}, [x0]
  37. WORD $0x4cdf2878 // ld1 {v24.4s-v27.4s}, [x3], #64
  38. WORD $0xd1004000 // sub x0, x0, #0x10
  39. WORD $0x4cdf287c // ld1 {v28.4s-v31.4s}, [x3], #64
  40. loop:
  41. // Main loop
  42. WORD $0x4cdf2025 // ld1 {v5.16b-v8.16b}, [x1], #64
  43. WORD $0x4ea01c02 // mov v2.16b, v0.16b
  44. WORD $0x4ea11c23 // mov v3.16b, v1.16b
  45. WORD $0x6e2008a5 // rev32 v5.16b, v5.16b
  46. WORD $0x6e2008c6 // rev32 v6.16b, v6.16b
  47. WORD $0x4eb084a9 // add v9.4s, v5.4s, v16.4s
  48. WORD $0x6e2008e7 // rev32 v7.16b, v7.16b
  49. WORD $0x4eb184ca // add v10.4s, v6.4s, v17.4s
  50. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  51. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  52. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  53. WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
  54. WORD $0x6e200908 // rev32 v8.16b, v8.16b
  55. WORD $0x4eb284e9 // add v9.4s, v7.4s, v18.4s
  56. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  57. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  58. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  59. WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
  60. WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
  61. WORD $0x4eb3850a // add v10.4s, v8.4s, v19.4s
  62. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  63. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  64. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  65. WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
  66. WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
  67. WORD $0x4eb484a9 // add v9.4s, v5.4s, v20.4s
  68. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  69. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  70. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  71. WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
  72. WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
  73. WORD $0x4eb584ca // add v10.4s, v6.4s, v21.4s
  74. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  75. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  76. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  77. WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
  78. WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
  79. WORD $0x4eb684e9 // add v9.4s, v7.4s, v22.4s
  80. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  81. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  82. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  83. WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
  84. WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
  85. WORD $0x4eb7850a // add v10.4s, v8.4s, v23.4s
  86. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  87. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  88. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  89. WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
  90. WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
  91. WORD $0x4eb884a9 // add v9.4s, v5.4s, v24.4s
  92. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  93. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  94. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  95. WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
  96. WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
  97. WORD $0x4eb984ca // add v10.4s, v6.4s, v25.4s
  98. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  99. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  100. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  101. WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s
  102. WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
  103. WORD $0x4eba84e9 // add v9.4s, v7.4s, v26.4s
  104. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  105. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  106. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  107. WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s
  108. WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s
  109. WORD $0x4ebb850a // add v10.4s, v8.4s, v27.4s
  110. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  111. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  112. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  113. WORD $0x5e282907 // sha256su0 v7.4s, v8.4s
  114. WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s
  115. WORD $0x4ebc84a9 // add v9.4s, v5.4s, v28.4s
  116. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  117. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  118. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  119. WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s
  120. WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s
  121. WORD $0x4ebd84ca // add v10.4s, v6.4s, v29.4s
  122. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  123. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  124. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  125. WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s
  126. WORD $0x4ebe84e9 // add v9.4s, v7.4s, v30.4s
  127. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  128. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  129. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  130. WORD $0x4ebf850a // add v10.4s, v8.4s, v31.4s
  131. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  132. WORD $0x5e094062 // sha256h q2, q3, v9.4s
  133. WORD $0x5e095083 // sha256h2 q3, q4, v9.4s
  134. WORD $0x4ea21c44 // mov v4.16b, v2.16b
  135. WORD $0x5e0a4062 // sha256h q2, q3, v10.4s
  136. WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s
  137. WORD $0x4ea38421 // add v1.4s, v1.4s, v3.4s
  138. WORD $0x4ea28400 // add v0.4s, v0.4s, v2.4s
  139. SUBS $64, R2
  140. BPL loop
  141. // Store result
  142. WORD $0x4c00a800 // st1 {v0.4s, v1.4s}, [x0]
  143. complete:
  144. RET
  145. // Constants table
  146. DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
  147. DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
  148. DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
  149. DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
  150. DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
  151. DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
  152. DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
  153. DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
  154. DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
  155. DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
  156. DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
  157. DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
  158. DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
  159. DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
  160. DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
  161. DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
  162. DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
  163. DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
  164. DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
  165. DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
  166. DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
  167. DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
  168. DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
  169. DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
  170. DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
  171. DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
  172. DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
  173. DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
  174. DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
  175. DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
  176. DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
  177. DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
  178. GLOBL ·constants(SB), 8, $256