cpuid.go 40 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504
  1. // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
  2. // Package cpuid provides information about the CPU running the current program.
  3. //
  4. // CPU features are detected on startup, and kept for fast access through the life of the application.
  5. // Currently x86 / x64 (AMD64) as well as arm64 is supported.
  6. //
  7. // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
  8. //
  9. // Package home: https://github.com/klauspost/cpuid
  10. package cpuid
  11. import (
  12. "math"
  13. "strings"
  14. )
  15. // AMD refererence: https://www.amd.com/system/files/TechDocs/25481.pdf
  16. // and Processor Programming Reference (PPR)
  17. // Vendor is a representation of a CPU vendor.
  18. type Vendor int
  19. const (
  20. Other Vendor = iota
  21. Intel
  22. AMD
  23. VIA
  24. Transmeta
  25. NSC
  26. KVM // Kernel-based Virtual Machine
  27. MSVM // Microsoft Hyper-V or Windows Virtual PC
  28. VMware
  29. XenHVM
  30. Bhyve
  31. Hygon
  32. SiS
  33. RDC
  34. )
  35. const (
  36. CMOV = 1 << iota // i686 CMOV
  37. NX // NX (No-Execute) bit
  38. AMD3DNOW // AMD 3DNOW
  39. AMD3DNOWEXT // AMD 3DNowExt
  40. MMX // standard MMX
  41. MMXEXT // SSE integer functions or AMD MMX ext
  42. SSE // SSE functions
  43. SSE2 // P4 SSE functions
  44. SSE3 // Prescott SSE3 functions
  45. SSSE3 // Conroe SSSE3 functions
  46. SSE4 // Penryn SSE4.1 functions
  47. SSE4A // AMD Barcelona microarchitecture SSE4a instructions
  48. SSE42 // Nehalem SSE4.2 functions
  49. AVX // AVX functions
  50. AVX2 // AVX2 functions
  51. FMA3 // Intel FMA 3
  52. FMA4 // Bulldozer FMA4 functions
  53. XOP // Bulldozer XOP functions
  54. F16C // Half-precision floating-point conversion
  55. BMI1 // Bit Manipulation Instruction Set 1
  56. BMI2 // Bit Manipulation Instruction Set 2
  57. TBM // AMD Trailing Bit Manipulation
  58. LZCNT // LZCNT instruction
  59. POPCNT // POPCNT instruction
  60. AESNI // Advanced Encryption Standard New Instructions
  61. CLMUL // Carry-less Multiplication
  62. HTT // Hyperthreading (enabled)
  63. HLE // Hardware Lock Elision
  64. RTM // Restricted Transactional Memory
  65. RDRAND // RDRAND instruction is available
  66. RDSEED // RDSEED instruction is available
  67. ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  68. SHA // Intel SHA Extensions
  69. AVX512F // AVX-512 Foundation
  70. AVX512DQ // AVX-512 Doubleword and Quadword Instructions
  71. AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
  72. AVX512PF // AVX-512 Prefetch Instructions
  73. AVX512ER // AVX-512 Exponential and Reciprocal Instructions
  74. AVX512CD // AVX-512 Conflict Detection Instructions
  75. AVX512BW // AVX-512 Byte and Word Instructions
  76. AVX512VL // AVX-512 Vector Length Extensions
  77. AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
  78. AVX512VBMI2 // AVX-512 Vector Bit Manipulation Instructions, Version 2
  79. AVX512VNNI // AVX-512 Vector Neural Network Instructions
  80. AVX512VPOPCNTDQ // AVX-512 Vector Population Count Doubleword and Quadword
  81. GFNI // Galois Field New Instructions
  82. VAES // Vector AES
  83. AVX512BITALG // AVX-512 Bit Algorithms
  84. VPCLMULQDQ // Carry-Less Multiplication Quadword
  85. AVX512BF16 // AVX-512 BFLOAT16 Instructions
  86. AVX512VP2INTERSECT // AVX-512 Intersect for D/Q
  87. MPX // Intel MPX (Memory Protection Extensions)
  88. ERMS // Enhanced REP MOVSB/STOSB
  89. RDTSCP // RDTSCP Instruction
  90. CX16 // CMPXCHG16B Instruction
  91. SGX // Software Guard Extensions
  92. SGXLC // Software Guard Extensions Launch Control
  93. IBPB // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
  94. STIBP // Single Thread Indirect Branch Predictors
  95. VMX // Virtual Machine Extensions
  96. // Performance indicators
  97. SSE2SLOW // SSE2 is supported, but usually not faster
  98. SSE3SLOW // SSE3 is supported, but usually not faster
  99. ATOM // Atom processor, some SSSE3 instructions are slower
  100. )
  101. var flagNames = map[Flags]string{
  102. CMOV: "CMOV", // i686 CMOV
  103. NX: "NX", // NX (No-Execute) bit
  104. AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
  105. AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
  106. MMX: "MMX", // Standard MMX
  107. MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
  108. SSE: "SSE", // SSE functions
  109. SSE2: "SSE2", // P4 SSE2 functions
  110. SSE3: "SSE3", // Prescott SSE3 functions
  111. SSSE3: "SSSE3", // Conroe SSSE3 functions
  112. SSE4: "SSE4.1", // Penryn SSE4.1 functions
  113. SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  114. SSE42: "SSE4.2", // Nehalem SSE4.2 functions
  115. AVX: "AVX", // AVX functions
  116. AVX2: "AVX2", // AVX functions
  117. FMA3: "FMA3", // Intel FMA 3
  118. FMA4: "FMA4", // Bulldozer FMA4 functions
  119. XOP: "XOP", // Bulldozer XOP functions
  120. F16C: "F16C", // Half-precision floating-point conversion
  121. BMI1: "BMI1", // Bit Manipulation Instruction Set 1
  122. BMI2: "BMI2", // Bit Manipulation Instruction Set 2
  123. TBM: "TBM", // AMD Trailing Bit Manipulation
  124. LZCNT: "LZCNT", // LZCNT instruction
  125. POPCNT: "POPCNT", // POPCNT instruction
  126. AESNI: "AESNI", // Advanced Encryption Standard New Instructions
  127. CLMUL: "CLMUL", // Carry-less Multiplication
  128. HTT: "HTT", // Hyperthreading (enabled)
  129. HLE: "HLE", // Hardware Lock Elision
  130. RTM: "RTM", // Restricted Transactional Memory
  131. RDRAND: "RDRAND", // RDRAND instruction is available
  132. RDSEED: "RDSEED", // RDSEED instruction is available
  133. ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  134. SHA: "SHA", // Intel SHA Extensions
  135. AVX512F: "AVX512F", // AVX-512 Foundation
  136. AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  137. AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  138. AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
  139. AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  140. AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
  141. AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
  142. AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
  143. AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  144. AVX512VBMI2: "AVX512VBMI2", // AVX-512 Vector Bit Manipulation Instructions, Version 2
  145. AVX512VNNI: "AVX512VNNI", // AVX-512 Vector Neural Network Instructions
  146. AVX512VPOPCNTDQ: "AVX512VPOPCNTDQ", // AVX-512 Vector Population Count Doubleword and Quadword
  147. GFNI: "GFNI", // Galois Field New Instructions
  148. VAES: "VAES", // Vector AES
  149. AVX512BITALG: "AVX512BITALG", // AVX-512 Bit Algorithms
  150. VPCLMULQDQ: "VPCLMULQDQ", // Carry-Less Multiplication Quadword
  151. AVX512BF16: "AVX512BF16", // AVX-512 BFLOAT16 Instruction
  152. AVX512VP2INTERSECT: "AVX512VP2INTERSECT", // AVX-512 Intersect for D/Q
  153. MPX: "MPX", // Intel MPX (Memory Protection Extensions)
  154. ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
  155. RDTSCP: "RDTSCP", // RDTSCP Instruction
  156. CX16: "CX16", // CMPXCHG16B Instruction
  157. SGX: "SGX", // Software Guard Extensions
  158. SGXLC: "SGXLC", // Software Guard Extensions Launch Control
  159. IBPB: "IBPB", // Indirect Branch Restricted Speculation and Indirect Branch Predictor Barrier
  160. STIBP: "STIBP", // Single Thread Indirect Branch Predictors
  161. VMX: "VMX", // Virtual Machine Extensions
  162. // Performance indicators
  163. SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
  164. SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
  165. ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
  166. }
  167. /* all special features for arm64 should be defined here */
  168. const (
  169. /* extension instructions */
  170. FP ArmFlags = 1 << iota
  171. ASIMD
  172. EVTSTRM
  173. AES
  174. PMULL
  175. SHA1
  176. SHA2
  177. CRC32
  178. ATOMICS
  179. FPHP
  180. ASIMDHP
  181. ARMCPUID
  182. ASIMDRDM
  183. JSCVT
  184. FCMA
  185. LRCPC
  186. DCPOP
  187. SHA3
  188. SM3
  189. SM4
  190. ASIMDDP
  191. SHA512
  192. SVE
  193. GPA
  194. )
  195. var flagNamesArm = map[ArmFlags]string{
  196. FP: "FP", // Single-precision and double-precision floating point
  197. ASIMD: "ASIMD", // Advanced SIMD
  198. EVTSTRM: "EVTSTRM", // Generic timer
  199. AES: "AES", // AES instructions
  200. PMULL: "PMULL", // Polynomial Multiply instructions (PMULL/PMULL2)
  201. SHA1: "SHA1", // SHA-1 instructions (SHA1C, etc)
  202. SHA2: "SHA2", // SHA-2 instructions (SHA256H, etc)
  203. CRC32: "CRC32", // CRC32/CRC32C instructions
  204. ATOMICS: "ATOMICS", // Large System Extensions (LSE)
  205. FPHP: "FPHP", // Half-precision floating point
  206. ASIMDHP: "ASIMDHP", // Advanced SIMD half-precision floating point
  207. ARMCPUID: "CPUID", // Some CPU ID registers readable at user-level
  208. ASIMDRDM: "ASIMDRDM", // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
  209. JSCVT: "JSCVT", // Javascript-style double->int convert (FJCVTZS)
  210. FCMA: "FCMA", // Floatin point complex number addition and multiplication
  211. LRCPC: "LRCPC", // Weaker release consistency (LDAPR, etc)
  212. DCPOP: "DCPOP", // Data cache clean to Point of Persistence (DC CVAP)
  213. SHA3: "SHA3", // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
  214. SM3: "SM3", // SM3 instructions
  215. SM4: "SM4", // SM4 instructions
  216. ASIMDDP: "ASIMDDP", // SIMD Dot Product
  217. SHA512: "SHA512", // SHA512 instructions
  218. SVE: "SVE", // Scalable Vector Extension
  219. GPA: "GPA", // Generic Pointer Authentication
  220. }
  221. // CPUInfo contains information about the detected system CPU.
  222. type CPUInfo struct {
  223. BrandName string // Brand name reported by the CPU
  224. VendorID Vendor // Comparable CPU vendor ID
  225. VendorString string // Raw vendor string.
  226. Features Flags // Features of the CPU (x64)
  227. Arm ArmFlags // Features of the CPU (arm)
  228. PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  229. ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
  230. LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  231. Family int // CPU family number
  232. Model int // CPU model number
  233. CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
  234. Hz int64 // Clock speed, if known
  235. Cache struct {
  236. L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  237. L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
  238. L2 int // L2 Cache (per core or shared). Will be -1 if undetected
  239. L3 int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
  240. }
  241. SGX SGXSupport
  242. maxFunc uint32
  243. maxExFunc uint32
  244. }
  245. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  246. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  247. var xgetbv func(index uint32) (eax, edx uint32)
  248. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  249. // CPU contains information about the CPU as detected on startup,
  250. // or when Detect last was called.
  251. //
  252. // Use this as the primary entry point to you data.
  253. var CPU CPUInfo
  254. func init() {
  255. initCPU()
  256. Detect()
  257. }
  258. // Detect will re-detect current CPU info.
  259. // This will replace the content of the exported CPU variable.
  260. //
  261. // Unless you expect the CPU to change while you are running your program
  262. // you should not need to call this function.
  263. // If you call this, you must ensure that no other goroutine is accessing the
  264. // exported CPU variable.
  265. func Detect() {
  266. // Set defaults
  267. CPU.ThreadsPerCore = 1
  268. CPU.Cache.L1I = -1
  269. CPU.Cache.L1D = -1
  270. CPU.Cache.L2 = -1
  271. CPU.Cache.L3 = -1
  272. addInfo(&CPU)
  273. }
  274. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  275. // Cmov indicates support of CMOV instructions
  276. func (c CPUInfo) Cmov() bool {
  277. return c.Features&CMOV != 0
  278. }
  279. // Amd3dnow indicates support of AMD 3DNOW! instructions
  280. func (c CPUInfo) Amd3dnow() bool {
  281. return c.Features&AMD3DNOW != 0
  282. }
  283. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  284. func (c CPUInfo) Amd3dnowExt() bool {
  285. return c.Features&AMD3DNOWEXT != 0
  286. }
  287. // VMX indicates support of VMX
  288. func (c CPUInfo) VMX() bool {
  289. return c.Features&VMX != 0
  290. }
  291. // MMX indicates support of MMX instructions
  292. func (c CPUInfo) MMX() bool {
  293. return c.Features&MMX != 0
  294. }
  295. // MMXExt indicates support of MMXEXT instructions
  296. // (SSE integer functions or AMD MMX ext)
  297. func (c CPUInfo) MMXExt() bool {
  298. return c.Features&MMXEXT != 0
  299. }
  300. // SSE indicates support of SSE instructions
  301. func (c CPUInfo) SSE() bool {
  302. return c.Features&SSE != 0
  303. }
  304. // SSE2 indicates support of SSE 2 instructions
  305. func (c CPUInfo) SSE2() bool {
  306. return c.Features&SSE2 != 0
  307. }
  308. // SSE3 indicates support of SSE 3 instructions
  309. func (c CPUInfo) SSE3() bool {
  310. return c.Features&SSE3 != 0
  311. }
  312. // SSSE3 indicates support of SSSE 3 instructions
  313. func (c CPUInfo) SSSE3() bool {
  314. return c.Features&SSSE3 != 0
  315. }
  316. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  317. func (c CPUInfo) SSE4() bool {
  318. return c.Features&SSE4 != 0
  319. }
  320. // SSE42 indicates support of SSE4.2 instructions
  321. func (c CPUInfo) SSE42() bool {
  322. return c.Features&SSE42 != 0
  323. }
  324. // AVX indicates support of AVX instructions
  325. // and operating system support of AVX instructions
  326. func (c CPUInfo) AVX() bool {
  327. return c.Features&AVX != 0
  328. }
  329. // AVX2 indicates support of AVX2 instructions
  330. func (c CPUInfo) AVX2() bool {
  331. return c.Features&AVX2 != 0
  332. }
  333. // FMA3 indicates support of FMA3 instructions
  334. func (c CPUInfo) FMA3() bool {
  335. return c.Features&FMA3 != 0
  336. }
  337. // FMA4 indicates support of FMA4 instructions
  338. func (c CPUInfo) FMA4() bool {
  339. return c.Features&FMA4 != 0
  340. }
  341. // XOP indicates support of XOP instructions
  342. func (c CPUInfo) XOP() bool {
  343. return c.Features&XOP != 0
  344. }
  345. // F16C indicates support of F16C instructions
  346. func (c CPUInfo) F16C() bool {
  347. return c.Features&F16C != 0
  348. }
  349. // BMI1 indicates support of BMI1 instructions
  350. func (c CPUInfo) BMI1() bool {
  351. return c.Features&BMI1 != 0
  352. }
  353. // BMI2 indicates support of BMI2 instructions
  354. func (c CPUInfo) BMI2() bool {
  355. return c.Features&BMI2 != 0
  356. }
  357. // TBM indicates support of TBM instructions
  358. // (AMD Trailing Bit Manipulation)
  359. func (c CPUInfo) TBM() bool {
  360. return c.Features&TBM != 0
  361. }
  362. // Lzcnt indicates support of LZCNT instruction
  363. func (c CPUInfo) Lzcnt() bool {
  364. return c.Features&LZCNT != 0
  365. }
  366. // Popcnt indicates support of POPCNT instruction
  367. func (c CPUInfo) Popcnt() bool {
  368. return c.Features&POPCNT != 0
  369. }
  370. // HTT indicates the processor has Hyperthreading enabled
  371. func (c CPUInfo) HTT() bool {
  372. return c.Features&HTT != 0
  373. }
  374. // SSE2Slow indicates that SSE2 may be slow on this processor
  375. func (c CPUInfo) SSE2Slow() bool {
  376. return c.Features&SSE2SLOW != 0
  377. }
  378. // SSE3Slow indicates that SSE3 may be slow on this processor
  379. func (c CPUInfo) SSE3Slow() bool {
  380. return c.Features&SSE3SLOW != 0
  381. }
  382. // AesNi indicates support of AES-NI instructions
  383. // (Advanced Encryption Standard New Instructions)
  384. func (c CPUInfo) AesNi() bool {
  385. return c.Features&AESNI != 0
  386. }
  387. // Clmul indicates support of CLMUL instructions
  388. // (Carry-less Multiplication)
  389. func (c CPUInfo) Clmul() bool {
  390. return c.Features&CLMUL != 0
  391. }
  392. // NX indicates support of NX (No-Execute) bit
  393. func (c CPUInfo) NX() bool {
  394. return c.Features&NX != 0
  395. }
  396. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  397. func (c CPUInfo) SSE4A() bool {
  398. return c.Features&SSE4A != 0
  399. }
  400. // HLE indicates support of Hardware Lock Elision
  401. func (c CPUInfo) HLE() bool {
  402. return c.Features&HLE != 0
  403. }
  404. // RTM indicates support of Restricted Transactional Memory
  405. func (c CPUInfo) RTM() bool {
  406. return c.Features&RTM != 0
  407. }
  408. // Rdrand indicates support of RDRAND instruction is available
  409. func (c CPUInfo) Rdrand() bool {
  410. return c.Features&RDRAND != 0
  411. }
  412. // Rdseed indicates support of RDSEED instruction is available
  413. func (c CPUInfo) Rdseed() bool {
  414. return c.Features&RDSEED != 0
  415. }
  416. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  417. func (c CPUInfo) ADX() bool {
  418. return c.Features&ADX != 0
  419. }
  420. // SHA indicates support of Intel SHA Extensions
  421. func (c CPUInfo) SHA() bool {
  422. return c.Features&SHA != 0
  423. }
  424. // AVX512F indicates support of AVX-512 Foundation
  425. func (c CPUInfo) AVX512F() bool {
  426. return c.Features&AVX512F != 0
  427. }
  428. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  429. func (c CPUInfo) AVX512DQ() bool {
  430. return c.Features&AVX512DQ != 0
  431. }
  432. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  433. func (c CPUInfo) AVX512IFMA() bool {
  434. return c.Features&AVX512IFMA != 0
  435. }
  436. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  437. func (c CPUInfo) AVX512PF() bool {
  438. return c.Features&AVX512PF != 0
  439. }
  440. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  441. func (c CPUInfo) AVX512ER() bool {
  442. return c.Features&AVX512ER != 0
  443. }
  444. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  445. func (c CPUInfo) AVX512CD() bool {
  446. return c.Features&AVX512CD != 0
  447. }
  448. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  449. func (c CPUInfo) AVX512BW() bool {
  450. return c.Features&AVX512BW != 0
  451. }
  452. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  453. func (c CPUInfo) AVX512VL() bool {
  454. return c.Features&AVX512VL != 0
  455. }
  456. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  457. func (c CPUInfo) AVX512VBMI() bool {
  458. return c.Features&AVX512VBMI != 0
  459. }
  460. // AVX512VBMI2 indicates support of AVX-512 Vector Bit Manipulation Instructions, Version 2
  461. func (c CPUInfo) AVX512VBMI2() bool {
  462. return c.Features&AVX512VBMI2 != 0
  463. }
  464. // AVX512VNNI indicates support of AVX-512 Vector Neural Network Instructions
  465. func (c CPUInfo) AVX512VNNI() bool {
  466. return c.Features&AVX512VNNI != 0
  467. }
  468. // AVX512VPOPCNTDQ indicates support of AVX-512 Vector Population Count Doubleword and Quadword
  469. func (c CPUInfo) AVX512VPOPCNTDQ() bool {
  470. return c.Features&AVX512VPOPCNTDQ != 0
  471. }
  472. // GFNI indicates support of Galois Field New Instructions
  473. func (c CPUInfo) GFNI() bool {
  474. return c.Features&GFNI != 0
  475. }
  476. // VAES indicates support of Vector AES
  477. func (c CPUInfo) VAES() bool {
  478. return c.Features&VAES != 0
  479. }
  480. // AVX512BITALG indicates support of AVX-512 Bit Algorithms
  481. func (c CPUInfo) AVX512BITALG() bool {
  482. return c.Features&AVX512BITALG != 0
  483. }
  484. // VPCLMULQDQ indicates support of Carry-Less Multiplication Quadword
  485. func (c CPUInfo) VPCLMULQDQ() bool {
  486. return c.Features&VPCLMULQDQ != 0
  487. }
  488. // AVX512BF16 indicates support of
  489. func (c CPUInfo) AVX512BF16() bool {
  490. return c.Features&AVX512BF16 != 0
  491. }
  492. // AVX512VP2INTERSECT indicates support of
  493. func (c CPUInfo) AVX512VP2INTERSECT() bool {
  494. return c.Features&AVX512VP2INTERSECT != 0
  495. }
  496. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  497. func (c CPUInfo) MPX() bool {
  498. return c.Features&MPX != 0
  499. }
  500. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  501. func (c CPUInfo) ERMS() bool {
  502. return c.Features&ERMS != 0
  503. }
  504. // RDTSCP Instruction is available.
  505. func (c CPUInfo) RDTSCP() bool {
  506. return c.Features&RDTSCP != 0
  507. }
  508. // CX16 indicates if CMPXCHG16B instruction is available.
  509. func (c CPUInfo) CX16() bool {
  510. return c.Features&CX16 != 0
  511. }
  512. // TSX is split into HLE (Hardware Lock Elision) and RTM (Restricted Transactional Memory) detection.
  513. // So TSX simply checks that.
  514. func (c CPUInfo) TSX() bool {
  515. return c.Features&(HLE|RTM) == HLE|RTM
  516. }
  517. // Atom indicates an Atom processor
  518. func (c CPUInfo) Atom() bool {
  519. return c.Features&ATOM != 0
  520. }
  521. // Intel returns true if vendor is recognized as Intel
  522. func (c CPUInfo) Intel() bool {
  523. return c.VendorID == Intel
  524. }
  525. // AMD returns true if vendor is recognized as AMD
  526. func (c CPUInfo) AMD() bool {
  527. return c.VendorID == AMD
  528. }
  529. // Hygon returns true if vendor is recognized as Hygon
  530. func (c CPUInfo) Hygon() bool {
  531. return c.VendorID == Hygon
  532. }
  533. // Transmeta returns true if vendor is recognized as Transmeta
  534. func (c CPUInfo) Transmeta() bool {
  535. return c.VendorID == Transmeta
  536. }
  537. // NSC returns true if vendor is recognized as National Semiconductor
  538. func (c CPUInfo) NSC() bool {
  539. return c.VendorID == NSC
  540. }
  541. // VIA returns true if vendor is recognized as VIA
  542. func (c CPUInfo) VIA() bool {
  543. return c.VendorID == VIA
  544. }
  545. // RTCounter returns the 64-bit time-stamp counter
  546. // Uses the RDTSCP instruction. The value 0 is returned
  547. // if the CPU does not support the instruction.
  548. func (c CPUInfo) RTCounter() uint64 {
  549. if !c.RDTSCP() {
  550. return 0
  551. }
  552. a, _, _, d := rdtscpAsm()
  553. return uint64(a) | (uint64(d) << 32)
  554. }
  555. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  556. // This variable is OS dependent, but on Linux contains information
  557. // about the current cpu/core the code is running on.
  558. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  559. func (c CPUInfo) Ia32TscAux() uint32 {
  560. if !c.RDTSCP() {
  561. return 0
  562. }
  563. _, _, ecx, _ := rdtscpAsm()
  564. return ecx
  565. }
  566. // LogicalCPU will return the Logical CPU the code is currently executing on.
  567. // This is likely to change when the OS re-schedules the running thread
  568. // to another CPU.
  569. // If the current core cannot be detected, -1 will be returned.
  570. func (c CPUInfo) LogicalCPU() int {
  571. if c.maxFunc < 1 {
  572. return -1
  573. }
  574. _, ebx, _, _ := cpuid(1)
  575. return int(ebx >> 24)
  576. }
  577. // hertz tries to compute the clock speed of the CPU. If leaf 15 is
  578. // supported, use it, otherwise parse the brand string. Yes, really.
  579. func hertz(model string) int64 {
  580. mfi := maxFunctionID()
  581. if mfi >= 0x15 {
  582. eax, ebx, ecx, _ := cpuid(0x15)
  583. if eax != 0 && ebx != 0 && ecx != 0 {
  584. return int64((int64(ecx) * int64(ebx)) / int64(eax))
  585. }
  586. }
  587. // computeHz determines the official rated speed of a CPU from its brand
  588. // string. This insanity is *actually the official documented way to do
  589. // this according to Intel*, prior to leaf 0x15 existing. The official
  590. // documentation only shows this working for exactly `x.xx` or `xxxx`
  591. // cases, e.g., `2.50GHz` or `1300MHz`; this parser will accept other
  592. // sizes.
  593. hz := strings.LastIndex(model, "Hz")
  594. if hz < 3 {
  595. return -1
  596. }
  597. var multiplier int64
  598. switch model[hz-1] {
  599. case 'M':
  600. multiplier = 1000 * 1000
  601. case 'G':
  602. multiplier = 1000 * 1000 * 1000
  603. case 'T':
  604. multiplier = 1000 * 1000 * 1000 * 1000
  605. }
  606. if multiplier == 0 {
  607. return -1
  608. }
  609. freq := int64(0)
  610. divisor := int64(0)
  611. decimalShift := int64(1)
  612. var i int
  613. for i = hz - 2; i >= 0 && model[i] != ' '; i-- {
  614. if model[i] >= '0' && model[i] <= '9' {
  615. freq += int64(model[i]-'0') * decimalShift
  616. decimalShift *= 10
  617. } else if model[i] == '.' {
  618. if divisor != 0 {
  619. return -1
  620. }
  621. divisor = decimalShift
  622. } else {
  623. return -1
  624. }
  625. }
  626. // we didn't find a space
  627. if i < 0 {
  628. return -1
  629. }
  630. if divisor != 0 {
  631. return (freq * multiplier) / divisor
  632. }
  633. return freq * multiplier
  634. }
  635. // VM Will return true if the cpu id indicates we are in
  636. // a virtual machine. This is only a hint, and will very likely
  637. // have many false negatives.
  638. func (c CPUInfo) VM() bool {
  639. switch c.VendorID {
  640. case MSVM, KVM, VMware, XenHVM, Bhyve:
  641. return true
  642. }
  643. return false
  644. }
  645. // Flags contains detected cpu features and characteristics
  646. type Flags uint64
  647. // ArmFlags contains detected ARM cpu features and characteristics
  648. type ArmFlags uint64
  649. // String returns a string representation of the detected
  650. // CPU features.
  651. func (f Flags) String() string {
  652. return strings.Join(f.Strings(), ",")
  653. }
  654. // Strings returns an array of the detected features.
  655. func (f Flags) Strings() []string {
  656. r := make([]string, 0, 20)
  657. for i := uint(0); i < 64; i++ {
  658. key := Flags(1 << i)
  659. val := flagNames[key]
  660. if f&key != 0 {
  661. r = append(r, val)
  662. }
  663. }
  664. return r
  665. }
  666. // String returns a string representation of the detected
  667. // CPU features.
  668. func (f ArmFlags) String() string {
  669. return strings.Join(f.Strings(), ",")
  670. }
  671. // Strings returns an array of the detected features.
  672. func (f ArmFlags) Strings() []string {
  673. r := make([]string, 0, 20)
  674. for i := uint(0); i < 64; i++ {
  675. key := ArmFlags(1 << i)
  676. val := flagNamesArm[key]
  677. if f&key != 0 {
  678. r = append(r, val)
  679. }
  680. }
  681. return r
  682. }
  683. func maxExtendedFunction() uint32 {
  684. eax, _, _, _ := cpuid(0x80000000)
  685. return eax
  686. }
  687. func maxFunctionID() uint32 {
  688. a, _, _, _ := cpuid(0)
  689. return a
  690. }
  691. func brandName() string {
  692. if maxExtendedFunction() >= 0x80000004 {
  693. v := make([]uint32, 0, 48)
  694. for i := uint32(0); i < 3; i++ {
  695. a, b, c, d := cpuid(0x80000002 + i)
  696. v = append(v, a, b, c, d)
  697. }
  698. return strings.Trim(string(valAsString(v...)), " ")
  699. }
  700. return "unknown"
  701. }
  702. func threadsPerCore() int {
  703. mfi := maxFunctionID()
  704. vend, _ := vendorID()
  705. if mfi < 0x4 || (vend != Intel && vend != AMD) {
  706. return 1
  707. }
  708. if mfi < 0xb {
  709. if vend != Intel {
  710. return 1
  711. }
  712. _, b, _, d := cpuid(1)
  713. if (d & (1 << 28)) != 0 {
  714. // v will contain logical core count
  715. v := (b >> 16) & 255
  716. if v > 1 {
  717. a4, _, _, _ := cpuid(4)
  718. // physical cores
  719. v2 := (a4 >> 26) + 1
  720. if v2 > 0 {
  721. return int(v) / int(v2)
  722. }
  723. }
  724. }
  725. return 1
  726. }
  727. _, b, _, _ := cpuidex(0xb, 0)
  728. if b&0xffff == 0 {
  729. return 1
  730. }
  731. return int(b & 0xffff)
  732. }
  733. func logicalCores() int {
  734. mfi := maxFunctionID()
  735. v, _ := vendorID()
  736. switch v {
  737. case Intel:
  738. // Use this on old Intel processors
  739. if mfi < 0xb {
  740. if mfi < 1 {
  741. return 0
  742. }
  743. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  744. // that can be assigned to logical processors in a physical package.
  745. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  746. _, ebx, _, _ := cpuid(1)
  747. logical := (ebx >> 16) & 0xff
  748. return int(logical)
  749. }
  750. _, b, _, _ := cpuidex(0xb, 1)
  751. return int(b & 0xffff)
  752. case AMD, Hygon:
  753. _, b, _, _ := cpuid(1)
  754. return int((b >> 16) & 0xff)
  755. default:
  756. return 0
  757. }
  758. }
  759. func familyModel() (int, int) {
  760. if maxFunctionID() < 0x1 {
  761. return 0, 0
  762. }
  763. eax, _, _, _ := cpuid(1)
  764. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  765. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  766. return int(family), int(model)
  767. }
  768. func physicalCores() int {
  769. v, _ := vendorID()
  770. switch v {
  771. case Intel:
  772. return logicalCores() / threadsPerCore()
  773. case AMD, Hygon:
  774. lc := logicalCores()
  775. tpc := threadsPerCore()
  776. if lc > 0 && tpc > 0 {
  777. return lc / tpc
  778. }
  779. // The following is inaccurate on AMD EPYC 7742 64-Core Processor
  780. if maxExtendedFunction() >= 0x80000008 {
  781. _, _, c, _ := cpuid(0x80000008)
  782. return int(c&0xff) + 1
  783. }
  784. }
  785. return 0
  786. }
  787. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  788. var vendorMapping = map[string]Vendor{
  789. "AMDisbetter!": AMD,
  790. "AuthenticAMD": AMD,
  791. "CentaurHauls": VIA,
  792. "GenuineIntel": Intel,
  793. "TransmetaCPU": Transmeta,
  794. "GenuineTMx86": Transmeta,
  795. "Geode by NSC": NSC,
  796. "VIA VIA VIA ": VIA,
  797. "KVMKVMKVMKVM": KVM,
  798. "Microsoft Hv": MSVM,
  799. "VMwareVMware": VMware,
  800. "XenVMMXenVMM": XenHVM,
  801. "bhyve bhyve ": Bhyve,
  802. "HygonGenuine": Hygon,
  803. "Vortex86 SoC": SiS,
  804. "SiS SiS SiS ": SiS,
  805. "RiseRiseRise": SiS,
  806. "Genuine RDC": RDC,
  807. }
  808. func vendorID() (Vendor, string) {
  809. _, b, c, d := cpuid(0)
  810. v := string(valAsString(b, d, c))
  811. vend, ok := vendorMapping[v]
  812. if !ok {
  813. return Other, v
  814. }
  815. return vend, v
  816. }
  817. func cacheLine() int {
  818. if maxFunctionID() < 0x1 {
  819. return 0
  820. }
  821. _, ebx, _, _ := cpuid(1)
  822. cache := (ebx & 0xff00) >> 5 // cflush size
  823. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  824. _, _, ecx, _ := cpuid(0x80000006)
  825. cache = ecx & 0xff // cacheline size
  826. }
  827. // TODO: Read from Cache and TLB Information
  828. return int(cache)
  829. }
  830. func (c *CPUInfo) cacheSize() {
  831. c.Cache.L1D = -1
  832. c.Cache.L1I = -1
  833. c.Cache.L2 = -1
  834. c.Cache.L3 = -1
  835. vendor, _ := vendorID()
  836. switch vendor {
  837. case Intel:
  838. if maxFunctionID() < 4 {
  839. return
  840. }
  841. for i := uint32(0); ; i++ {
  842. eax, ebx, ecx, _ := cpuidex(4, i)
  843. cacheType := eax & 15
  844. if cacheType == 0 {
  845. break
  846. }
  847. cacheLevel := (eax >> 5) & 7
  848. coherency := int(ebx&0xfff) + 1
  849. partitions := int((ebx>>12)&0x3ff) + 1
  850. associativity := int((ebx>>22)&0x3ff) + 1
  851. sets := int(ecx) + 1
  852. size := associativity * partitions * coherency * sets
  853. switch cacheLevel {
  854. case 1:
  855. if cacheType == 1 {
  856. // 1 = Data Cache
  857. c.Cache.L1D = size
  858. } else if cacheType == 2 {
  859. // 2 = Instruction Cache
  860. c.Cache.L1I = size
  861. } else {
  862. if c.Cache.L1D < 0 {
  863. c.Cache.L1I = size
  864. }
  865. if c.Cache.L1I < 0 {
  866. c.Cache.L1I = size
  867. }
  868. }
  869. case 2:
  870. c.Cache.L2 = size
  871. case 3:
  872. c.Cache.L3 = size
  873. }
  874. }
  875. case AMD, Hygon:
  876. // Untested.
  877. if maxExtendedFunction() < 0x80000005 {
  878. return
  879. }
  880. _, _, ecx, edx := cpuid(0x80000005)
  881. c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
  882. c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
  883. if maxExtendedFunction() < 0x80000006 {
  884. return
  885. }
  886. _, _, ecx, _ = cpuid(0x80000006)
  887. c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  888. // CPUID Fn8000_001D_EAX_x[N:0] Cache Properties
  889. if maxExtendedFunction() < 0x8000001D {
  890. return
  891. }
  892. for i := uint32(0); i < math.MaxUint32; i++ {
  893. eax, ebx, ecx, _ := cpuidex(0x8000001D, i)
  894. level := (eax >> 5) & 7
  895. cacheNumSets := ecx + 1
  896. cacheLineSize := 1 + (ebx & 2047)
  897. cachePhysPartitions := 1 + ((ebx >> 12) & 511)
  898. cacheNumWays := 1 + ((ebx >> 22) & 511)
  899. typ := eax & 15
  900. size := int(cacheNumSets * cacheLineSize * cachePhysPartitions * cacheNumWays)
  901. if typ == 0 {
  902. return
  903. }
  904. switch level {
  905. case 1:
  906. switch typ {
  907. case 1:
  908. // Data cache
  909. c.Cache.L1D = size
  910. case 2:
  911. // Inst cache
  912. c.Cache.L1I = size
  913. default:
  914. if c.Cache.L1D < 0 {
  915. c.Cache.L1I = size
  916. }
  917. if c.Cache.L1I < 0 {
  918. c.Cache.L1I = size
  919. }
  920. }
  921. case 2:
  922. c.Cache.L2 = size
  923. case 3:
  924. c.Cache.L3 = size
  925. }
  926. }
  927. }
  928. return
  929. }
  930. type SGXEPCSection struct {
  931. BaseAddress uint64
  932. EPCSize uint64
  933. }
  934. type SGXSupport struct {
  935. Available bool
  936. LaunchControl bool
  937. SGX1Supported bool
  938. SGX2Supported bool
  939. MaxEnclaveSizeNot64 int64
  940. MaxEnclaveSize64 int64
  941. EPCSections []SGXEPCSection
  942. }
  943. func hasSGX(available, lc bool) (rval SGXSupport) {
  944. rval.Available = available
  945. if !available {
  946. return
  947. }
  948. rval.LaunchControl = lc
  949. a, _, _, d := cpuidex(0x12, 0)
  950. rval.SGX1Supported = a&0x01 != 0
  951. rval.SGX2Supported = a&0x02 != 0
  952. rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
  953. rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  954. rval.EPCSections = make([]SGXEPCSection, 0)
  955. for subleaf := uint32(2); subleaf < 2+8; subleaf++ {
  956. eax, ebx, ecx, edx := cpuidex(0x12, subleaf)
  957. leafType := eax & 0xf
  958. if leafType == 0 {
  959. // Invalid subleaf, stop iterating
  960. break
  961. } else if leafType == 1 {
  962. // EPC Section subleaf
  963. baseAddress := uint64(eax&0xfffff000) + (uint64(ebx&0x000fffff) << 32)
  964. size := uint64(ecx&0xfffff000) + (uint64(edx&0x000fffff) << 32)
  965. section := SGXEPCSection{BaseAddress: baseAddress, EPCSize: size}
  966. rval.EPCSections = append(rval.EPCSections, section)
  967. }
  968. }
  969. return
  970. }
  971. func support() Flags {
  972. mfi := maxFunctionID()
  973. vend, _ := vendorID()
  974. if mfi < 0x1 {
  975. return 0
  976. }
  977. rval := uint64(0)
  978. _, _, c, d := cpuid(1)
  979. if (d & (1 << 15)) != 0 {
  980. rval |= CMOV
  981. }
  982. if (d & (1 << 23)) != 0 {
  983. rval |= MMX
  984. }
  985. if (d & (1 << 25)) != 0 {
  986. rval |= MMXEXT
  987. }
  988. if (d & (1 << 25)) != 0 {
  989. rval |= SSE
  990. }
  991. if (d & (1 << 26)) != 0 {
  992. rval |= SSE2
  993. }
  994. if (c & 1) != 0 {
  995. rval |= SSE3
  996. }
  997. if (c & (1 << 5)) != 0 {
  998. rval |= VMX
  999. }
  1000. if (c & 0x00000200) != 0 {
  1001. rval |= SSSE3
  1002. }
  1003. if (c & 0x00080000) != 0 {
  1004. rval |= SSE4
  1005. }
  1006. if (c & 0x00100000) != 0 {
  1007. rval |= SSE42
  1008. }
  1009. if (c & (1 << 25)) != 0 {
  1010. rval |= AESNI
  1011. }
  1012. if (c & (1 << 1)) != 0 {
  1013. rval |= CLMUL
  1014. }
  1015. if c&(1<<23) != 0 {
  1016. rval |= POPCNT
  1017. }
  1018. if c&(1<<30) != 0 {
  1019. rval |= RDRAND
  1020. }
  1021. if c&(1<<29) != 0 {
  1022. rval |= F16C
  1023. }
  1024. if c&(1<<13) != 0 {
  1025. rval |= CX16
  1026. }
  1027. if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  1028. if threadsPerCore() > 1 {
  1029. rval |= HTT
  1030. }
  1031. }
  1032. if vend == AMD && (d&(1<<28)) != 0 && mfi >= 4 {
  1033. if threadsPerCore() > 1 {
  1034. rval |= HTT
  1035. }
  1036. }
  1037. // Check XGETBV, OXSAVE and AVX bits
  1038. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  1039. // Check for OS support
  1040. eax, _ := xgetbv(0)
  1041. if (eax & 0x6) == 0x6 {
  1042. rval |= AVX
  1043. if (c & 0x00001000) != 0 {
  1044. rval |= FMA3
  1045. }
  1046. }
  1047. }
  1048. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  1049. if mfi >= 7 {
  1050. _, ebx, ecx, edx := cpuidex(7, 0)
  1051. eax1, _, _, _ := cpuidex(7, 1)
  1052. if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
  1053. rval |= AVX2
  1054. }
  1055. if (ebx & 0x00000008) != 0 {
  1056. rval |= BMI1
  1057. if (ebx & 0x00000100) != 0 {
  1058. rval |= BMI2
  1059. }
  1060. }
  1061. if ebx&(1<<2) != 0 {
  1062. rval |= SGX
  1063. }
  1064. if ebx&(1<<4) != 0 {
  1065. rval |= HLE
  1066. }
  1067. if ebx&(1<<9) != 0 {
  1068. rval |= ERMS
  1069. }
  1070. if ebx&(1<<11) != 0 {
  1071. rval |= RTM
  1072. }
  1073. if ebx&(1<<14) != 0 {
  1074. rval |= MPX
  1075. }
  1076. if ebx&(1<<18) != 0 {
  1077. rval |= RDSEED
  1078. }
  1079. if ebx&(1<<19) != 0 {
  1080. rval |= ADX
  1081. }
  1082. if ebx&(1<<29) != 0 {
  1083. rval |= SHA
  1084. }
  1085. if edx&(1<<26) != 0 {
  1086. rval |= IBPB
  1087. }
  1088. if ecx&(1<<30) != 0 {
  1089. rval |= SGXLC
  1090. }
  1091. if edx&(1<<27) != 0 {
  1092. rval |= STIBP
  1093. }
  1094. // Only detect AVX-512 features if XGETBV is supported
  1095. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  1096. // Check for OS support
  1097. eax, _ := xgetbv(0)
  1098. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  1099. // ZMM16-ZMM31 state are enabled by OS)
  1100. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  1101. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  1102. if ebx&(1<<16) != 0 {
  1103. rval |= AVX512F
  1104. }
  1105. if ebx&(1<<17) != 0 {
  1106. rval |= AVX512DQ
  1107. }
  1108. if ebx&(1<<21) != 0 {
  1109. rval |= AVX512IFMA
  1110. }
  1111. if ebx&(1<<26) != 0 {
  1112. rval |= AVX512PF
  1113. }
  1114. if ebx&(1<<27) != 0 {
  1115. rval |= AVX512ER
  1116. }
  1117. if ebx&(1<<28) != 0 {
  1118. rval |= AVX512CD
  1119. }
  1120. if ebx&(1<<30) != 0 {
  1121. rval |= AVX512BW
  1122. }
  1123. if ebx&(1<<31) != 0 {
  1124. rval |= AVX512VL
  1125. }
  1126. // ecx
  1127. if ecx&(1<<1) != 0 {
  1128. rval |= AVX512VBMI
  1129. }
  1130. if ecx&(1<<6) != 0 {
  1131. rval |= AVX512VBMI2
  1132. }
  1133. if ecx&(1<<8) != 0 {
  1134. rval |= GFNI
  1135. }
  1136. if ecx&(1<<9) != 0 {
  1137. rval |= VAES
  1138. }
  1139. if ecx&(1<<10) != 0 {
  1140. rval |= VPCLMULQDQ
  1141. }
  1142. if ecx&(1<<11) != 0 {
  1143. rval |= AVX512VNNI
  1144. }
  1145. if ecx&(1<<12) != 0 {
  1146. rval |= AVX512BITALG
  1147. }
  1148. if ecx&(1<<14) != 0 {
  1149. rval |= AVX512VPOPCNTDQ
  1150. }
  1151. // edx
  1152. if edx&(1<<8) != 0 {
  1153. rval |= AVX512VP2INTERSECT
  1154. }
  1155. // cpuid eax 07h,ecx=1
  1156. if eax1&(1<<5) != 0 {
  1157. rval |= AVX512BF16
  1158. }
  1159. }
  1160. }
  1161. }
  1162. if maxExtendedFunction() >= 0x80000001 {
  1163. _, _, c, d := cpuid(0x80000001)
  1164. if (c & (1 << 5)) != 0 {
  1165. rval |= LZCNT
  1166. rval |= POPCNT
  1167. }
  1168. if (d & (1 << 31)) != 0 {
  1169. rval |= AMD3DNOW
  1170. }
  1171. if (d & (1 << 30)) != 0 {
  1172. rval |= AMD3DNOWEXT
  1173. }
  1174. if (d & (1 << 23)) != 0 {
  1175. rval |= MMX
  1176. }
  1177. if (d & (1 << 22)) != 0 {
  1178. rval |= MMXEXT
  1179. }
  1180. if (c & (1 << 6)) != 0 {
  1181. rval |= SSE4A
  1182. }
  1183. if d&(1<<20) != 0 {
  1184. rval |= NX
  1185. }
  1186. if d&(1<<27) != 0 {
  1187. rval |= RDTSCP
  1188. }
  1189. /* Allow for selectively disabling SSE2 functions on AMD processors
  1190. with SSE2 support but not SSE4a. This includes Athlon64, some
  1191. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  1192. than SSE2 often enough to utilize this special-case flag.
  1193. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  1194. so that SSE2 is used unless explicitly disabled by checking
  1195. AV_CPU_FLAG_SSE2SLOW. */
  1196. if vend != Intel &&
  1197. rval&SSE2 != 0 && (c&0x00000040) == 0 {
  1198. rval |= SSE2SLOW
  1199. }
  1200. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  1201. * used unless the OS has AVX support. */
  1202. if (rval & AVX) != 0 {
  1203. if (c & 0x00000800) != 0 {
  1204. rval |= XOP
  1205. }
  1206. if (c & 0x00010000) != 0 {
  1207. rval |= FMA4
  1208. }
  1209. }
  1210. if vend == Intel {
  1211. family, model := familyModel()
  1212. if family == 6 && (model == 9 || model == 13 || model == 14) {
  1213. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  1214. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  1215. * usually slower than mmx. */
  1216. if (rval & SSE2) != 0 {
  1217. rval |= SSE2SLOW
  1218. }
  1219. if (rval & SSE3) != 0 {
  1220. rval |= SSE3SLOW
  1221. }
  1222. }
  1223. /* The Atom processor has SSSE3 support, which is useful in many cases,
  1224. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  1225. * on the Atom, but is generally faster on other processors supporting
  1226. * SSSE3. This flag allows for selectively disabling certain SSSE3
  1227. * functions on the Atom. */
  1228. if family == 6 && model == 28 {
  1229. rval |= ATOM
  1230. }
  1231. }
  1232. }
  1233. return Flags(rval)
  1234. }
  1235. func valAsString(values ...uint32) []byte {
  1236. r := make([]byte, 4*len(values))
  1237. for i, v := range values {
  1238. dst := r[i*4:]
  1239. dst[0] = byte(v & 0xff)
  1240. dst[1] = byte((v >> 8) & 0xff)
  1241. dst[2] = byte((v >> 16) & 0xff)
  1242. dst[3] = byte((v >> 24) & 0xff)
  1243. switch {
  1244. case dst[0] == 0:
  1245. return r[:i*4]
  1246. case dst[1] == 0:
  1247. return r[:i*4+1]
  1248. case dst[2] == 0:
  1249. return r[:i*4+2]
  1250. case dst[3] == 0:
  1251. return r[:i*4+3]
  1252. }
  1253. }
  1254. return r
  1255. }
  1256. // Single-precision and double-precision floating point
  1257. func (c CPUInfo) ArmFP() bool {
  1258. return c.Arm&FP != 0
  1259. }
  1260. // Advanced SIMD
  1261. func (c CPUInfo) ArmASIMD() bool {
  1262. return c.Arm&ASIMD != 0
  1263. }
  1264. // Generic timer
  1265. func (c CPUInfo) ArmEVTSTRM() bool {
  1266. return c.Arm&EVTSTRM != 0
  1267. }
  1268. // AES instructions
  1269. func (c CPUInfo) ArmAES() bool {
  1270. return c.Arm&AES != 0
  1271. }
  1272. // Polynomial Multiply instructions (PMULL/PMULL2)
  1273. func (c CPUInfo) ArmPMULL() bool {
  1274. return c.Arm&PMULL != 0
  1275. }
  1276. // SHA-1 instructions (SHA1C, etc)
  1277. func (c CPUInfo) ArmSHA1() bool {
  1278. return c.Arm&SHA1 != 0
  1279. }
  1280. // SHA-2 instructions (SHA256H, etc)
  1281. func (c CPUInfo) ArmSHA2() bool {
  1282. return c.Arm&SHA2 != 0
  1283. }
  1284. // CRC32/CRC32C instructions
  1285. func (c CPUInfo) ArmCRC32() bool {
  1286. return c.Arm&CRC32 != 0
  1287. }
  1288. // Large System Extensions (LSE)
  1289. func (c CPUInfo) ArmATOMICS() bool {
  1290. return c.Arm&ATOMICS != 0
  1291. }
  1292. // Half-precision floating point
  1293. func (c CPUInfo) ArmFPHP() bool {
  1294. return c.Arm&FPHP != 0
  1295. }
  1296. // Advanced SIMD half-precision floating point
  1297. func (c CPUInfo) ArmASIMDHP() bool {
  1298. return c.Arm&ASIMDHP != 0
  1299. }
  1300. // Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
  1301. func (c CPUInfo) ArmASIMDRDM() bool {
  1302. return c.Arm&ASIMDRDM != 0
  1303. }
  1304. // Javascript-style double->int convert (FJCVTZS)
  1305. func (c CPUInfo) ArmJSCVT() bool {
  1306. return c.Arm&JSCVT != 0
  1307. }
  1308. // Floatin point complex number addition and multiplication
  1309. func (c CPUInfo) ArmFCMA() bool {
  1310. return c.Arm&FCMA != 0
  1311. }
  1312. // Weaker release consistency (LDAPR, etc)
  1313. func (c CPUInfo) ArmLRCPC() bool {
  1314. return c.Arm&LRCPC != 0
  1315. }
  1316. // Data cache clean to Point of Persistence (DC CVAP)
  1317. func (c CPUInfo) ArmDCPOP() bool {
  1318. return c.Arm&DCPOP != 0
  1319. }
  1320. // SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
  1321. func (c CPUInfo) ArmSHA3() bool {
  1322. return c.Arm&SHA3 != 0
  1323. }
  1324. // SM3 instructions
  1325. func (c CPUInfo) ArmSM3() bool {
  1326. return c.Arm&SM3 != 0
  1327. }
  1328. // SM4 instructions
  1329. func (c CPUInfo) ArmSM4() bool {
  1330. return c.Arm&SM4 != 0
  1331. }
  1332. // SIMD Dot Product
  1333. func (c CPUInfo) ArmASIMDDP() bool {
  1334. return c.Arm&ASIMDDP != 0
  1335. }
  1336. // SHA512 instructions
  1337. func (c CPUInfo) ArmSHA512() bool {
  1338. return c.Arm&SHA512 != 0
  1339. }
  1340. // Scalable Vector Extension
  1341. func (c CPUInfo) ArmSVE() bool {
  1342. return c.Arm&SVE != 0
  1343. }
  1344. // Generic Pointer Authentication
  1345. func (c CPUInfo) ArmGPA() bool {
  1346. return c.Arm&GPA != 0
  1347. }