sha256blockAvx512_amd64.go 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. //+build !noasm,!appengine
  2. /*
  3. * Minio Cloud Storage, (C) 2017 Minio, Inc.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. package sha256
  18. import (
  19. "encoding/binary"
  20. "errors"
  21. "hash"
  22. "sort"
  23. "sync/atomic"
  24. "time"
  25. )
  26. //go:noescape
  27. func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
  28. // Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to
  29. // differentiate with default initialiation value of 0
  30. const Avx512ServerUID = 16
  31. var uidCounter uint64
  32. // NewAvx512 - initialize sha256 Avx512 implementation.
  33. func NewAvx512(a512srv *Avx512Server) hash.Hash {
  34. uid := atomic.AddUint64(&uidCounter, 1)
  35. return &Avx512Digest{uid: uid, a512srv: a512srv}
  36. }
  37. // Avx512Digest - Type for computing SHA256 using Avx512
  38. type Avx512Digest struct {
  39. uid uint64
  40. a512srv *Avx512Server
  41. x [chunk]byte
  42. nx int
  43. len uint64
  44. final bool
  45. result [Size]byte
  46. }
  47. // Size - Return size of checksum
  48. func (d *Avx512Digest) Size() int { return Size }
  49. // BlockSize - Return blocksize of checksum
  50. func (d Avx512Digest) BlockSize() int { return BlockSize }
  51. // Reset - reset sha digest to its initial values
  52. func (d *Avx512Digest) Reset() {
  53. d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
  54. d.nx = 0
  55. d.len = 0
  56. d.final = false
  57. }
  58. // Write to digest
  59. func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
  60. if d.final {
  61. return 0, errors.New("Avx512Digest already finalized. Reset first before writing again")
  62. }
  63. nn = len(p)
  64. d.len += uint64(nn)
  65. if d.nx > 0 {
  66. n := copy(d.x[d.nx:], p)
  67. d.nx += n
  68. if d.nx == chunk {
  69. d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
  70. d.nx = 0
  71. }
  72. p = p[n:]
  73. }
  74. if len(p) >= chunk {
  75. n := len(p) &^ (chunk - 1)
  76. d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
  77. p = p[n:]
  78. }
  79. if len(p) > 0 {
  80. d.nx = copy(d.x[:], p)
  81. }
  82. return
  83. }
  84. // Sum - Return sha256 sum in bytes
  85. func (d *Avx512Digest) Sum(in []byte) (result []byte) {
  86. if d.final {
  87. return append(in, d.result[:]...)
  88. }
  89. trail := make([]byte, 0, 128)
  90. trail = append(trail, d.x[:d.nx]...)
  91. len := d.len
  92. // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
  93. var tmp [64]byte
  94. tmp[0] = 0x80
  95. if len%64 < 56 {
  96. trail = append(trail, tmp[0:56-len%64]...)
  97. } else {
  98. trail = append(trail, tmp[0:64+56-len%64]...)
  99. }
  100. d.nx = 0
  101. // Length in bits.
  102. len <<= 3
  103. for i := uint(0); i < 8; i++ {
  104. tmp[i] = byte(len >> (56 - 8*i))
  105. }
  106. trail = append(trail, tmp[0:8]...)
  107. sumCh := make(chan [Size]byte)
  108. d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
  109. d.result = <-sumCh
  110. d.final = true
  111. return append(in, d.result[:]...)
  112. }
  113. var table = [512]uint64{
  114. 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
  115. 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
  116. 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
  117. 0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
  118. 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
  119. 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
  120. 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
  121. 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
  122. 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
  123. 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
  124. 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
  125. 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
  126. 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
  127. 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
  128. 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
  129. 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
  130. 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
  131. 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
  132. 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
  133. 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
  134. 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
  135. 0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
  136. 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
  137. 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
  138. 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
  139. 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
  140. 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
  141. 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
  142. 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
  143. 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
  144. 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
  145. 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
  146. 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
  147. 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
  148. 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
  149. 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
  150. 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
  151. 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
  152. 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
  153. 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
  154. 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
  155. 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
  156. 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
  157. 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
  158. 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
  159. 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
  160. 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
  161. 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
  162. 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
  163. 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
  164. 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
  165. 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
  166. 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
  167. 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
  168. 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
  169. 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
  170. 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
  171. 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
  172. 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
  173. 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
  174. 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
  175. 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
  176. 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
  177. 0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
  178. 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
  179. 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
  180. 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
  181. 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
  182. 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
  183. 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
  184. 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
  185. 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
  186. 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
  187. 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
  188. 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
  189. 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
  190. 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
  191. 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
  192. 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
  193. 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
  194. 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
  195. 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
  196. 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
  197. 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
  198. 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
  199. 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
  200. 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
  201. 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
  202. 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
  203. 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
  204. 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
  205. 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
  206. 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
  207. 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
  208. 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
  209. 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
  210. 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
  211. 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
  212. 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
  213. 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
  214. 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
  215. 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
  216. 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
  217. 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
  218. 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
  219. 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
  220. 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
  221. 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
  222. 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
  223. 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
  224. 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
  225. 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
  226. 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
  227. 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
  228. 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
  229. 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
  230. 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
  231. 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
  232. 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
  233. 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
  234. 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
  235. 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
  236. 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
  237. 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
  238. 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
  239. 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
  240. 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
  241. 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
  242. // Interface function to assembly ode
  243. func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
  244. scratch := [512]byte{}
  245. sha256X16Avx512(digests, &scratch, &table, mask, input)
  246. output := [16][Size]byte{}
  247. for i := 0; i < 16; i++ {
  248. output[i] = getDigest(i, digests[:])
  249. }
  250. return output
  251. }
  252. func getDigest(index int, state []byte) (sum [Size]byte) {
  253. for j := 0; j < 16; j += 2 {
  254. for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
  255. binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
  256. }
  257. }
  258. return
  259. }
  260. // Message to send across input channel
  261. type blockInput struct {
  262. uid uint64
  263. msg []byte
  264. reset bool
  265. final bool
  266. sumCh chan [Size]byte
  267. }
  268. // Avx512Server - Type to implement 16x parallel handling of SHA256 invocations
  269. type Avx512Server struct {
  270. blocksCh chan blockInput // Input channel
  271. totalIn int // Total number of inputs waiting to be processed
  272. lanes [16]Avx512LaneInfo // Array with info per lane (out of 16)
  273. digests map[uint64][Size]byte // Map of uids to (interim) digest results
  274. }
  275. // Avx512LaneInfo - Info for each lane
  276. type Avx512LaneInfo struct {
  277. uid uint64 // unique identification for this SHA processing
  278. block []byte // input block to be processed
  279. outputCh chan [Size]byte // channel for output result
  280. }
  281. // NewAvx512Server - Create new object for parallel processing handling
  282. func NewAvx512Server() *Avx512Server {
  283. a512srv := &Avx512Server{}
  284. a512srv.digests = make(map[uint64][Size]byte)
  285. a512srv.blocksCh = make(chan blockInput)
  286. // Start a single thread for reading from the input channel
  287. go a512srv.Process()
  288. return a512srv
  289. }
  290. // Process - Sole handler for reading from the input channel
  291. func (a512srv *Avx512Server) Process() {
  292. for {
  293. select {
  294. case block := <-a512srv.blocksCh:
  295. if block.reset {
  296. a512srv.reset(block.uid)
  297. continue
  298. }
  299. index := block.uid & 0xf
  300. // fmt.Println("Adding message:", block.uid, index)
  301. if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
  302. //fmt.Println("Invoking Blocks()")
  303. a512srv.blocks()
  304. }
  305. a512srv.totalIn++
  306. a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
  307. if block.final {
  308. a512srv.lanes[index].outputCh = block.sumCh
  309. }
  310. if a512srv.totalIn == len(a512srv.lanes) {
  311. // fmt.Println("Invoking Blocks() while FULL: ")
  312. a512srv.blocks()
  313. }
  314. // TODO: test with larger timeout
  315. case <-time.After(1 * time.Microsecond):
  316. for _, lane := range a512srv.lanes {
  317. if lane.block != nil { // check if there is any input to process
  318. // fmt.Println("Invoking Blocks() on TIMEOUT: ")
  319. a512srv.blocks()
  320. break // we are done
  321. }
  322. }
  323. }
  324. }
  325. }
  326. // Do a reset for this calculation
  327. func (a512srv *Avx512Server) reset(uid uint64) {
  328. // Check if there is a message still waiting to be processed (and remove if so)
  329. for i, lane := range a512srv.lanes {
  330. if lane.uid == uid {
  331. if lane.block != nil {
  332. a512srv.lanes[i] = Avx512LaneInfo{} // clear message
  333. a512srv.totalIn--
  334. }
  335. }
  336. }
  337. // Delete entry from hash map
  338. delete(a512srv.digests, uid)
  339. }
  340. // Invoke assembly and send results back
  341. func (a512srv *Avx512Server) blocks() {
  342. inputs := [16][]byte{}
  343. for i := range inputs {
  344. inputs[i] = a512srv.lanes[i].block
  345. }
  346. mask := expandMask(genMask(inputs))
  347. outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
  348. a512srv.totalIn = 0
  349. for i := 0; i < len(outputs); i++ {
  350. uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
  351. a512srv.digests[uid] = outputs[i]
  352. a512srv.lanes[i] = Avx512LaneInfo{}
  353. if outputCh != nil {
  354. // Send back result
  355. outputCh <- outputs[i]
  356. delete(a512srv.digests, uid) // Delete entry from hashmap
  357. }
  358. }
  359. }
  360. func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
  361. a512srv.blocksCh <- blockInput{uid: uid, msg: p}
  362. return len(p), nil
  363. }
  364. // Sum - return sha256 sum in bytes for a given sum id.
  365. func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
  366. sumCh := make(chan [32]byte)
  367. a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
  368. return <-sumCh
  369. }
  370. func (a512srv *Avx512Server) getDigests() *[512]byte {
  371. digests := [512]byte{}
  372. for i, lane := range a512srv.lanes {
  373. a, ok := a512srv.digests[lane.uid]
  374. if ok {
  375. binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
  376. binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
  377. binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
  378. binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
  379. binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
  380. binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
  381. binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
  382. binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
  383. } else {
  384. binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
  385. binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
  386. binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
  387. binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
  388. binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
  389. binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
  390. binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
  391. binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
  392. }
  393. }
  394. return &digests
  395. }
  396. // Helper struct for sorting blocks based on length
  397. type lane struct {
  398. len uint
  399. pos uint
  400. }
  401. type lanes []lane
  402. func (lns lanes) Len() int { return len(lns) }
  403. func (lns lanes) Swap(i, j int) { lns[i], lns[j] = lns[j], lns[i] }
  404. func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
  405. // Helper struct for
  406. type maskRounds struct {
  407. mask uint64
  408. rounds uint64
  409. }
  410. func genMask(input [16][]byte) [16]maskRounds {
  411. // Sort on blocks length small to large
  412. var sorted [16]lane
  413. for c, inpt := range input {
  414. sorted[c] = lane{uint(len(inpt)), uint(c)}
  415. }
  416. sort.Sort(lanes(sorted[:]))
  417. // Create mask array including 'rounds' between masks
  418. m, round, index := uint64(0xffff), uint64(0), 0
  419. var mr [16]maskRounds
  420. for _, s := range sorted {
  421. if s.len > 0 {
  422. if uint64(s.len)>>6 > round {
  423. mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
  424. index++
  425. }
  426. round = uint64(s.len) >> 6
  427. }
  428. m = m & ^(1 << uint(s.pos))
  429. }
  430. return mr
  431. }
  432. // TODO: remove function
  433. func expandMask(mr [16]maskRounds) []uint64 {
  434. size := uint64(0)
  435. for _, r := range mr {
  436. size += r.rounds
  437. }
  438. result, index := make([]uint64, size), 0
  439. for _, r := range mr {
  440. for j := uint64(0); j < r.rounds; j++ {
  441. result[index] = r.mask
  442. index++
  443. }
  444. }
  445. return result
  446. }