|  | /** | 
|  | * Identify the characteristics of the host CPU, providing information | 
|  | * about cache sizes and assembly optimisation hints. This module is | 
|  | * provided primarily for assembly language programmers. | 
|  | * | 
|  | * References: | 
|  | * Some of this information was extremely difficult to track down. Some of the | 
|  | * documents below were found only in cached versions stored by search engines! | 
|  | * This code relies on information found in: | 
|  | * | 
|  | * $(UL | 
|  | * $(LI "Intel(R) 64 and IA-32 Architectures Software Developers Manual, | 
|  | *    Volume 2A: Instruction Set Reference, A-M" (2007). | 
|  | * ) | 
|  | * $(LI "AMD CPUID Specification", Advanced Micro Devices, Rev 2.28 (2008). | 
|  | * ) | 
|  | * $(LI "AMD Processor Recognition Application Note For Processors Prior to AMD | 
|  | *    Family 0Fh Processors", Advanced Micro Devices, Rev 3.13 (2005). | 
|  | * ) | 
|  | * $(LI "AMD Geode(TM) GX Processors Data Book", | 
|  | *    Advanced Micro Devices, Publication ID 31505E, (2005). | 
|  | * ) | 
|  | * $(LI "AMD K6 Processor Code Optimisation", Advanced Micro Devices, Rev D (2000). | 
|  | * ) | 
|  | * $(LI "Application note 106: Software Customization for the 6x86 Family", | 
|  | *    Cyrix Corporation, Rev 1.5 (1998) | 
|  | * ) | 
|  | * $(LI $(LINK http://www.datasheetcatalog.org/datasheet/nationalsemiconductor/GX1.pdf)) | 
|  | * $(LI "Geode(TM) GX1 Processor Series Low Power Integrated X86 Solution", | 
|  | *   National Semiconductor, (2002) | 
|  | * ) | 
|  | * $(LI "The VIA Isaiah Architecture", G. Glenn Henry, Centaur Technology, Inc (2008). | 
|  | * ) | 
|  | * $(LI $(LINK http://www.sandpile.org/ia32/cpuid.htm)) | 
|  | * $(LI $(LINK http://www.akkadia.org/drepper/cpumemory.pdf)) | 
|  | * $(LI "What every programmer should know about memory", | 
|  | *    Ulrich Depper, Red Hat, Inc., (2007). | 
|  | * ) | 
|  | * $(LI "CPU Identification by the Windows Kernel", G. Chappell (2009). | 
|  | *   $(LINK http://www.geoffchappell.com/viewer.htm?doc=studies/windows/km/cpu/cx8.htm) | 
|  | * ) | 
|  | * $(LI "Intel(R) Processor Identification and the CPUID Instruction, Application | 
|  | *    Note 485" (2009). | 
|  | * ) | 
|  | * ) | 
|  | * | 
|  | * Bugs: Currently only works on x86 and Itanium CPUs. | 
|  | *      Many processors have bugs in their microcode for the CPUID instruction, | 
|  | *      so sometimes the cache information may be incorrect. | 
|  | * | 
|  | * Copyright: Copyright Don Clugston 2007 - 2009. | 
|  | * License:   $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) | 
|  | * Authors:   Don Clugston, Tomas Lindquist Olsen <tomas@famolsen.dk> | 
|  | * Source:    $(DRUNTIMESRC core/_cpuid.d) | 
|  | */ | 
|  |  | 
|  | module core.cpuid; | 
|  |  | 
|  | version (GNU) version = GNU_OR_LDC; | 
|  | version (LDC) version = GNU_OR_LDC; | 
|  |  | 
|  | @trusted: | 
|  | nothrow: | 
|  | @nogc: | 
|  |  | 
|  | // If optimizing for a particular processor, it is generally better | 
|  | // to identify based on features rather than model. NOTE: Normally | 
|  | // it's only worthwhile to optimise for the latest Intel and AMD CPU, | 
|  | // with a backup for other CPUs. | 
|  | // Pentium    -- preferPentium1() | 
|  | // PMMX       --   + mmx() | 
|  | // PPro       -- default | 
|  | // PII        --   + mmx() | 
|  | // PIII       --   + mmx() + sse() | 
|  | // PentiumM   --   + mmx() + sse() + sse2() | 
|  | // Pentium4   -- preferPentium4() | 
|  | // PentiumD   --   + isX86_64() | 
|  | // Core2      -- default + isX86_64() | 
|  | // AMD K5     -- preferPentium1() | 
|  | // AMD K6     --   + mmx() | 
|  | // AMD K6-II  --   + mmx() + 3dnow() | 
|  | // AMD K7     -- preferAthlon() | 
|  | // AMD K8     --   + sse2() | 
|  | // AMD K10    --   + isX86_64() | 
|  | // Cyrix 6x86 -- preferPentium1() | 
|  | //    6x86MX  --   + mmx() | 
|  |  | 
|  | // GDC support uses extended inline assembly: | 
|  | //   https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html        (general information and hints) | 
|  | //   https://gcc.gnu.org/onlinedocs/gcc/Simple-Constraints.html  (binding variables to registers) | 
|  | //   https://gcc.gnu.org/onlinedocs/gcc/Machine-Constraints.html (x86 specific register short names) | 
|  |  | 
|  | public: | 
|  |  | 
|  | /// Cache size and behaviour | 
|  | struct CacheInfo | 
|  | { | 
|  | /// Size of the cache, in kilobytes, per CPU. | 
|  | /// For L1 unified (data + code) caches, this size is half the physical size. | 
|  | /// (we don't halve it for larger sizes, since normally | 
|  | /// data size is much greater than code size for critical loops). | 
|  | size_t size; | 
|  | /// Number of ways of associativity, eg: | 
|  | /// $(UL | 
|  | /// $(LI 1 = direct mapped) | 
|  | /// $(LI 2 = 2-way set associative) | 
|  | /// $(LI 3 = 3-way set associative) | 
|  | /// $(LI ubyte.max = fully associative) | 
|  | /// ) | 
|  | ubyte associativity; | 
|  | /// Number of bytes read into the cache when a cache miss occurs. | 
|  | uint lineSize; | 
|  | } | 
|  |  | 
|  | public: | 
|  | /// $(RED Scheduled for deprecation. Please use $(D dataCaches) instead.) | 
|  | // Note: When we deprecate it, we simply make it private. | 
|  | __gshared CacheInfo[5] datacache; | 
|  |  | 
|  | @property pure | 
|  | { | 
|  | /// The data caches. If there are fewer than 5 physical caches levels, | 
|  | /// the remaining levels are set to size_t.max (== entire memory space) | 
|  | const(CacheInfo)[5] dataCaches() { return _dataCaches; } | 
|  |  | 
|  | /// Returns vendor string, for display purposes only. | 
|  | /// Do NOT use this to determine features! | 
|  | /// Note that some CPUs have programmable vendorIDs. | 
|  | string vendor()     {return _vendor;} | 
|  | /// Returns processor string, for display purposes only | 
|  | string processor()  {return _processor;} | 
|  |  | 
|  | /// Does it have an x87 FPU on-chip? | 
|  | bool x87onChip()    {return _x87onChip;} | 
|  | /// Is MMX supported? | 
|  | bool mmx()          {return _mmx;} | 
|  | /// Is SSE supported? | 
|  | bool sse()          {return _sse;} | 
|  | /// Is SSE2 supported? | 
|  | bool sse2()         {return _sse2;} | 
|  | /// Is SSE3 supported? | 
|  | bool sse3()         {return _sse3;} | 
|  | /// Is SSSE3 supported? | 
|  | bool ssse3()         {return _ssse3;} | 
|  | /// Is SSE4.1 supported? | 
|  | bool sse41()        {return _sse41;} | 
|  | /// Is SSE4.2 supported? | 
|  | bool sse42()        {return _sse42;} | 
|  | /// Is SSE4a supported? | 
|  | bool sse4a()        {return _sse4a;} | 
|  | /// Is AES supported | 
|  | bool aes()          {return _aes;} | 
|  | /// Is pclmulqdq supported | 
|  | bool hasPclmulqdq() {return _hasPclmulqdq;} | 
|  | /// Is rdrand supported | 
|  | bool hasRdrand()    {return _hasRdrand;} | 
|  | /// Is AVX supported | 
|  | bool avx()          {return _avx;} | 
|  | /// Is VEX-Encoded AES supported | 
|  | bool vaes()         {return _vaes;} | 
|  | /// Is vpclmulqdq supported | 
|  | bool hasVpclmulqdq(){return _hasVpclmulqdq; } | 
|  | /// Is FMA supported | 
|  | bool fma()          {return _fma;} | 
|  | /// Is FP16C supported | 
|  | bool fp16c()        {return _fp16c;} | 
|  | /// Is AVX2 supported | 
|  | bool avx2()         {return _avx2;} | 
|  | /// Is HLE (hardware lock elision) supported | 
|  | bool hle()          {return _hle;} | 
|  | /// Is RTM (restricted transactional memory) supported | 
|  | bool rtm()          {return _rtm;} | 
|  | /// Is AVX512F supported | 
|  | bool avx512f()      {return _avx512f;} | 
|  | /// Is rdseed supported | 
|  | bool hasRdseed()    {return _hasRdseed;} | 
|  | /// Is SHA supported | 
|  | bool hasSha()       {return _hasSha;} | 
|  | /// Is AMD 3DNOW supported? | 
|  | bool amd3dnow()     {return _amd3dnow;} | 
|  | /// Is AMD 3DNOW Ext supported? | 
|  | bool amd3dnowExt()  {return _amd3dnowExt;} | 
|  | /// Are AMD extensions to MMX supported? | 
|  | bool amdMmx()       {return _amdMmx;} | 
|  | /// Is fxsave/fxrstor supported? | 
|  | bool hasFxsr()          {return _hasFxsr;} | 
|  | /// Is cmov supported? | 
|  | bool hasCmov()          {return _hasCmov;} | 
|  | /// Is rdtsc supported? | 
|  | bool hasRdtsc()         {return _hasRdtsc;} | 
|  | /// Is cmpxchg8b supported? | 
|  | bool hasCmpxchg8b()     {return _hasCmpxchg8b;} | 
|  | /// Is cmpxchg8b supported? | 
|  | bool hasCmpxchg16b()    {return _hasCmpxchg16b;} | 
|  | /// Is SYSENTER/SYSEXIT supported? | 
|  | bool hasSysEnterSysExit() {return _hasSysEnterSysExit;} | 
|  | /// Is 3DNow prefetch supported? | 
|  | bool has3dnowPrefetch()   {return _has3dnowPrefetch;} | 
|  | /// Are LAHF and SAHF supported in 64-bit mode? | 
|  | bool hasLahfSahf()        {return _hasLahfSahf;} | 
|  | /// Is POPCNT supported? | 
|  | bool hasPopcnt()        {return _hasPopcnt;} | 
|  | /// Is LZCNT supported? | 
|  | bool hasLzcnt()         {return _hasLzcnt;} | 
|  | /// Is this an Intel64 or AMD 64? | 
|  | bool isX86_64()         {return _isX86_64;} | 
|  |  | 
|  | /// Is this an IA64 (Itanium) processor? | 
|  | bool isItanium()        { return _isItanium; } | 
|  |  | 
|  | /// Is hyperthreading supported? | 
|  | bool hyperThreading()   { return _hyperThreading; } | 
|  | /// Returns number of threads per CPU | 
|  | uint threadsPerCPU()    {return _threadsPerCPU;} | 
|  | /// Returns number of cores in CPU | 
|  | uint coresPerCPU()      {return _coresPerCPU;} | 
|  |  | 
|  | /// Optimisation hints for assembly code. | 
|  | /// | 
|  | /// For forward compatibility, the CPU is compared against different | 
|  | /// microarchitectures. For 32-bit x86, comparisons are made against | 
|  | /// the Intel PPro/PII/PIII/PM family. | 
|  | /// | 
|  | /// The major 32-bit x86 microarchitecture 'dynasties' have been: | 
|  | /// | 
|  | /// $(UL | 
|  | /// $(LI Intel P6 (PentiumPro, PII, PIII, PM, Core, Core2). ) | 
|  | /// $(LI AMD Athlon (K7, K8, K10). ) | 
|  | /// $(LI Intel NetBurst (Pentium 4, Pentium D). ) | 
|  | /// $(LI In-order Pentium (Pentium1, PMMX, Atom) ) | 
|  | /// ) | 
|  | /// | 
|  | /// Other early CPUs (Nx586, AMD K5, K6, Centaur C3, Transmeta, | 
|  | /// Cyrix, Rise) were mostly in-order. | 
|  | /// | 
|  | /// Some new processors do not fit into the existing categories: | 
|  | /// | 
|  | /// $(UL | 
|  | /// $(LI Intel Atom 230/330 (family 6, model 0x1C) is an in-order core. ) | 
|  | /// $(LI Centaur Isiah = VIA Nano (family 6, model F) is an out-of-order core. ) | 
|  | /// ) | 
|  | /// | 
|  | /// Within each dynasty, the optimisation techniques are largely | 
|  | /// identical (eg, use instruction pairing for group 4). Major | 
|  | /// instruction set improvements occur within each dynasty. | 
|  |  | 
|  | /// Does this CPU perform better on AMD K7 code than PentiumPro..Core2 code? | 
|  | bool preferAthlon() { return _preferAthlon; } | 
|  | /// Does this CPU perform better on Pentium4 code than PentiumPro..Core2 code? | 
|  | bool preferPentium4() { return _preferPentium4; } | 
|  | /// Does this CPU perform better on Pentium I code than Pentium Pro code? | 
|  | bool preferPentium1() { return _preferPentium1; } | 
|  | } | 
|  |  | 
|  | private immutable | 
|  | { | 
|  | /* These exist as immutables so that the query property functions can | 
|  | * be backwards compatible with code that called them with (). | 
|  | * Also, immutables can only be set by the static this(). | 
|  | */ | 
|  | const(CacheInfo)[5] _dataCaches; | 
|  | string _vendor; | 
|  | string _processor; | 
|  | bool _x87onChip; | 
|  | bool _mmx; | 
|  | bool _sse; | 
|  | bool _sse2; | 
|  | bool _sse3; | 
|  | bool _ssse3; | 
|  | bool _sse41; | 
|  | bool _sse42; | 
|  | bool _sse4a; | 
|  | bool _aes; | 
|  | bool _hasPclmulqdq; | 
|  | bool _hasRdrand; | 
|  | bool _avx; | 
|  | bool _vaes; | 
|  | bool _hasVpclmulqdq; | 
|  | bool _fma; | 
|  | bool _fp16c; | 
|  | bool _avx2; | 
|  | bool _hle; | 
|  | bool _rtm; | 
|  | bool _avx512f; | 
|  | bool _hasRdseed; | 
|  | bool _hasSha; | 
|  | bool _amd3dnow; | 
|  | bool _amd3dnowExt; | 
|  | bool _amdMmx; | 
|  | bool _hasFxsr; | 
|  | bool _hasCmov; | 
|  | bool _hasRdtsc; | 
|  | bool _hasCmpxchg8b; | 
|  | bool _hasCmpxchg16b; | 
|  | bool _hasSysEnterSysExit; | 
|  | bool _has3dnowPrefetch; | 
|  | bool _hasLahfSahf; | 
|  | bool _hasPopcnt; | 
|  | bool _hasLzcnt; | 
|  | bool _isX86_64; | 
|  | bool _isItanium; | 
|  | bool _hyperThreading; | 
|  | uint _threadsPerCPU; | 
|  | uint _coresPerCPU; | 
|  | bool _preferAthlon; | 
|  | bool _preferPentium4; | 
|  | bool _preferPentium1; | 
|  | } | 
|  |  | 
|  | __gshared: | 
|  | // All these values are set only once, and never subsequently modified. | 
|  | public: | 
|  | /// $(RED Warning: This field will be turned into a property in a future release.) | 
|  | /// | 
|  | /// Processor type (vendor-dependent). | 
|  | /// This should be visible ONLY for display purposes. | 
|  | uint stepping, model, family; | 
|  | /// $(RED This field has been deprecated. Please use $(D cacheLevels) instead.) | 
|  | uint numCacheLevels = 1; | 
|  | /// The number of cache levels in the CPU. | 
|  | @property uint cacheLevels() { return numCacheLevels; } | 
|  | private: | 
|  |  | 
|  | struct CpuFeatures | 
|  | { | 
|  | bool probablyIntel; // true = _probably_ an Intel processor, might be faking | 
|  | bool probablyAMD; // true = _probably_ an AMD or Hygon processor | 
|  | string processorName; | 
|  | char [12] vendorID = 0; | 
|  | char [48] processorNameBuffer = 0; | 
|  | uint features = 0;     // mmx, sse, sse2, hyperthreading, etc | 
|  | uint miscfeatures = 0; // sse3, etc. | 
|  | uint extfeatures = 0;  // HLE, AVX2, RTM, etc. | 
|  | uint amdfeatures = 0;  // 3DNow!, mmxext, etc | 
|  | uint amdmiscfeatures = 0; // sse4a, sse5, svm, etc | 
|  | ulong xfeatures = 0;   // XFEATURES_ENABLED_MASK | 
|  | uint maxCores = 1; | 
|  | uint maxThreads = 1; | 
|  | } | 
|  |  | 
|  | CpuFeatures cpuFeatures; | 
|  |  | 
|  | /* Hide from the optimizer where cf (a register) is coming from, so that | 
|  | * cf doesn't get "optimized away". The idea is to  reference | 
|  | * the global data through cf so not so many fixups are inserted | 
|  | * into the executable image. | 
|  | */ | 
|  | CpuFeatures* getCpuFeatures() @nogc nothrow | 
|  | { | 
|  | pragma(inline, false); | 
|  | return &cpuFeatures; | 
|  | } | 
|  |  | 
|  | // Note that this may indicate multi-core rather than hyperthreading. | 
|  | @property bool hyperThreadingBit()    { return (cpuFeatures.features&HTT_BIT)!=0;} | 
|  |  | 
|  | // feature flags CPUID1_EDX | 
|  | enum : uint | 
|  | { | 
|  | FPU_BIT = 1, | 
|  | TIMESTAMP_BIT = 1<<4, // rdtsc | 
|  | MDSR_BIT = 1<<5,      // RDMSR/WRMSR | 
|  | CMPXCHG8B_BIT = 1<<8, | 
|  | SYSENTERSYSEXIT_BIT = 1<<11, | 
|  | CMOV_BIT = 1<<15, | 
|  | MMX_BIT = 1<<23, | 
|  | FXSR_BIT = 1<<24, | 
|  | SSE_BIT = 1<<25, | 
|  | SSE2_BIT = 1<<26, | 
|  | HTT_BIT = 1<<28, | 
|  | IA64_BIT = 1<<30 | 
|  | } | 
|  | // feature flags misc CPUID1_ECX | 
|  | enum : uint | 
|  | { | 
|  | SSE3_BIT = 1, | 
|  | PCLMULQDQ_BIT = 1<<1, // from AVX | 
|  | MWAIT_BIT = 1<<3, | 
|  | SSSE3_BIT = 1<<9, | 
|  | FMA_BIT = 1<<12,     // from AVX | 
|  | CMPXCHG16B_BIT = 1<<13, | 
|  | SSE41_BIT = 1<<19, | 
|  | SSE42_BIT = 1<<20, | 
|  | POPCNT_BIT = 1<<23, | 
|  | AES_BIT = 1<<25, // AES instructions from AVX | 
|  | OSXSAVE_BIT = 1<<27, // Used for AVX | 
|  | AVX_BIT = 1<<28, | 
|  | FP16C_BIT = 1<<29, | 
|  | RDRAND_BIT = 1<<30, | 
|  | } | 
|  | // Feature flags for cpuid.{EAX = 7, ECX = 0}.EBX. | 
|  | enum : uint | 
|  | { | 
|  | FSGSBASE_BIT = 1 << 0, | 
|  | SGX_BIT = 1 << 2, | 
|  | BMI1_BIT = 1 << 3, | 
|  | HLE_BIT = 1 << 4, | 
|  | AVX2_BIT = 1 << 5, | 
|  | SMEP_BIT = 1 << 7, | 
|  | BMI2_BIT = 1 << 8, | 
|  | ERMS_BIT = 1 << 9, | 
|  | INVPCID_BIT = 1 << 10, | 
|  | RTM_BIT = 1 << 11, | 
|  | AVX512F_BIT = 1 << 16, | 
|  | AVX512DQ_BIT = 1 << 17, | 
|  | RDSEED_BIT = 1 << 18, | 
|  | ADX_BIT = 1 << 19, | 
|  | AVX512IFMA_BIT = 1 << 21, | 
|  | CLFLUSHOPT_BIT = 1 << 23, | 
|  | CLWB_BIT = 1 << 24, | 
|  | AVX512PF_BIT = 1 << 26, | 
|  | AVX512ER_BIT = 1 << 27, | 
|  | AVX512CD_BIT = 1 << 28, | 
|  | SHA_BIT = 1 << 29, | 
|  | AVX512BW_BIT = 1 << 30, | 
|  | AVX512VL_BIT = 1 << 31, | 
|  | } | 
|  | // feature flags XFEATURES_ENABLED_MASK | 
|  | enum : ulong | 
|  | { | 
|  | XF_FP_BIT  = 0x1, | 
|  | XF_SSE_BIT = 0x2, | 
|  | XF_YMM_BIT = 0x4, | 
|  | } | 
|  | // AMD feature flags CPUID80000001_EDX | 
|  | enum : uint | 
|  | { | 
|  | AMD_MMX_BIT = 1<<22, | 
|  | //      FXR_OR_CYRIXMMX_BIT = 1<<24, // Cyrix/NS: 6x86MMX instructions. | 
|  | FFXSR_BIT = 1<<25, | 
|  | PAGE1GB_BIT = 1<<26, // support for 1GB pages | 
|  | RDTSCP_BIT = 1<<27, | 
|  | AMD64_BIT = 1<<29, | 
|  | AMD_3DNOW_EXT_BIT = 1<<30, | 
|  | AMD_3DNOW_BIT = 1<<31 | 
|  | } | 
|  | // AMD misc feature flags CPUID80000001_ECX | 
|  | enum : uint | 
|  | { | 
|  | LAHFSAHF_BIT = 1, | 
|  | LZCNT_BIT = 1<<5, | 
|  | SSE4A_BIT = 1<<6, | 
|  | AMD_3DNOW_PREFETCH_BIT = 1<<8, | 
|  | } | 
|  |  | 
|  |  | 
|  | version (GNU_OR_LDC) { | 
|  | version (X86) | 
|  | enum supportedX86 = true; | 
|  | else version (X86_64) | 
|  | enum supportedX86 = true; | 
|  | else | 
|  | enum supportedX86 = false; | 
|  | } else version (D_InlineAsm_X86) { | 
|  | enum supportedX86 = true; | 
|  | } else version (D_InlineAsm_X86_64) { | 
|  | enum supportedX86 = true; | 
|  | } else { | 
|  | enum supportedX86 = false; | 
|  | } | 
|  |  | 
|  | static if (supportedX86) { | 
|  | // Note that this code will also work for Itanium in x86 mode. | 
|  |  | 
|  | __gshared uint max_cpuid, max_extended_cpuid; | 
|  |  | 
|  | // CPUID2: "cache and tlb information" | 
|  | void getcacheinfoCPUID2() | 
|  | { | 
|  | // We are only interested in the data caches | 
|  | void decipherCpuid2(ubyte x) @nogc nothrow { | 
|  | if (x==0) return; | 
|  | // Values from http://www.sandpile.org/ia32/cpuid.htm. | 
|  | // Includes Itanium and non-Intel CPUs. | 
|  | // | 
|  | static immutable ubyte [63] ids = [ | 
|  | 0x0A, 0x0C, 0x0D, 0x2C, 0x60, 0x0E, 0x66, 0x67, 0x68, | 
|  | // level 2 cache | 
|  | 0x41, 0x42, 0x43, 0x44, 0x45, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7F, | 
|  | 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x49, 0x4E, | 
|  | 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x48, 0x80, 0x81, | 
|  | // level 3 cache | 
|  | 0x22, 0x23, 0x25, 0x29, 0x46, 0x47, 0x4A, 0x4B, 0x4C, 0x4D, | 
|  |  | 
|  | 0xD0, 0xD1, 0xD2, 0xD6, 0xD7, 0xD8, 0xDC, 0xDD, 0xDE, | 
|  | 0xE2, 0xE3, 0xE4, 0xEA, 0xEB, 0xEC | 
|  | ]; | 
|  | static immutable uint [63] sizes = [ | 
|  | 8, 16, 16, 64, 16, 24, 8, 16, 32, | 
|  | 128, 256, 512, 1024, 2048, 1024, 128, 256, 512, 1024, 2048, 512, | 
|  | 256, 512, 1024, 2048, 512, 1024, 4096, 6*1024, | 
|  | 128, 192, 128, 256, 384, 512, 3072, 512, 128, | 
|  | 512, 1024, 2048, 4096, 4096, 8192, 6*1024, 8192, 12*1024, 16*1024, | 
|  |  | 
|  | 512, 1024, 2048, 1024, 2048, 4096, 1024+512, 3*1024, 6*1024, | 
|  | 2*1024, 4*1024, 8*1024, 12*1024, 28*1024, 24*1024 | 
|  | ]; | 
|  | // CPUBUG: Pentium M reports 0x2C but tests show it is only 4-way associative | 
|  | static immutable ubyte [63] ways = [ | 
|  | 2, 4, 4, 8, 8, 6, 4, 4, 4, | 
|  | 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 2, | 
|  | 8, 8, 8, 8, 4, 8, 16, 24, | 
|  | 4, 6, 2, 4, 6, 4, 12, 8, 8, | 
|  | 4, 8, 8, 8, 4, 8, 12, 16, 12, 16, | 
|  | 4, 4, 4, 8, 8, 8, 12, 12, 12, | 
|  | 16, 16, 16, 24, 24, 24 | 
|  | ]; | 
|  | enum { FIRSTDATA2 = 8, FIRSTDATA3 = 28+9 } | 
|  | for (size_t i=0; i< ids.length; ++i) { | 
|  | if (x==ids[i]) { | 
|  | int level = i< FIRSTDATA2 ? 0: i<FIRSTDATA3 ? 1 : 2; | 
|  | if (x==0x49 && family==0xF && model==0x6) level=2; | 
|  | datacache[level].size=sizes[i]; | 
|  | datacache[level].associativity=ways[i]; | 
|  | if (level == 3 || x==0x2C || x==0x0D || (x>=0x48 && x<=0x80) | 
|  | || x==0x86 || x==0x87 | 
|  | || (x>=0x66 && x<=0x68) || (x>=0x39 && x<=0x3E)){ | 
|  | datacache[level].lineSize = 64; | 
|  | } else datacache[level].lineSize = 32; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | uint[4] a; | 
|  | bool firstTime = true; | 
|  | // On a multi-core system, this could theoretically fail, but it's only used | 
|  | // for old single-core CPUs. | 
|  | uint numinfos = 1; | 
|  | do { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a[0]), "=b" (a[1]), "=c" (a[2]), "=d" (a[3]) : "a" (2); | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 2; | 
|  | cpuid; | 
|  | mov a+0, EAX; | 
|  | mov a+4, EBX; | 
|  | mov a+8, ECX; | 
|  | mov a+12, EDX; | 
|  | } | 
|  | if (firstTime) { | 
|  | if (a[0]==0x0000_7001 && a[3]==0x80 && a[1]==0 && a[2]==0) { | 
|  | // Cyrix MediaGX MMXEnhanced returns: EAX= 00007001, EDX=00000080. | 
|  | // These are NOT standard Intel values | 
|  | // (TLB = 32 entry, 4 way associative, 4K pages) | 
|  | // (L1 cache = 16K, 4way, linesize16) | 
|  | datacache[0].size=8; | 
|  | datacache[0].associativity=4; | 
|  | datacache[0].lineSize=16; | 
|  | return; | 
|  | } | 
|  | // lsb of a is how many times to loop. | 
|  | numinfos = a[0] & 0xFF; | 
|  | // and otherwise it should be ignored | 
|  | a[0] &= 0xFFFF_FF00; | 
|  | firstTime = false; | 
|  | } | 
|  | for (int c=0; c<4;++c) { | 
|  | // high bit set == no info. | 
|  | if (a[c] & 0x8000_0000) continue; | 
|  | decipherCpuid2(cast(ubyte)(a[c] & 0xFF)); | 
|  | decipherCpuid2(cast(ubyte)((a[c]>>8) & 0xFF)); | 
|  | decipherCpuid2(cast(ubyte)((a[c]>>16) & 0xFF)); | 
|  | decipherCpuid2(cast(ubyte)((a[c]>>24) & 0xFF)); | 
|  | } | 
|  | } while (--numinfos); | 
|  | } | 
|  |  | 
|  | // CPUID4: "Deterministic cache parameters" leaf | 
|  | void getcacheinfoCPUID4() | 
|  | { | 
|  | int cachenum = 0; | 
|  | for (;;) { | 
|  | uint a, b, number_of_sets; | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=b" (b), "=c" (number_of_sets) : "a" (4), "c" (cachenum) : "edx"; | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 4; | 
|  | mov ECX, cachenum; | 
|  | cpuid; | 
|  | mov a, EAX; | 
|  | mov b, EBX; | 
|  | mov number_of_sets, ECX; | 
|  | } | 
|  | ++cachenum; | 
|  | if ((a&0x1F)==0) break; // no more caches | 
|  | immutable uint numthreads = ((a>>14) & 0xFFF)  + 1; | 
|  | immutable uint numcores = ((a>>26) & 0x3F) + 1; | 
|  | if (numcores > cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; | 
|  | if ((a&0x1F)!=1 && ((a&0x1F)!=3)) continue; // we only want data & unified caches | 
|  |  | 
|  | ++number_of_sets; | 
|  | immutable ubyte level = cast(ubyte)(((a>>5)&7)-1); | 
|  | if (level > datacache.length) continue; // ignore deep caches | 
|  | datacache[level].associativity = a & 0x200 ? ubyte.max :cast(ubyte)((b>>22)+1); | 
|  | datacache[level].lineSize = (b & 0xFFF)+ 1; // system coherency line size | 
|  | immutable uint line_partitions = ((b >> 12)& 0x3FF) + 1; | 
|  | // Size = number of sets * associativity * cachelinesize * linepartitions | 
|  | // and must convert to Kb, also dividing by the number of hyperthreads using this cache. | 
|  | immutable ulong sz = (datacache[level].associativity< ubyte.max)? number_of_sets * | 
|  | datacache[level].associativity : number_of_sets; | 
|  | datacache[level].size = cast(size_t)( | 
|  | (sz * datacache[level].lineSize * line_partitions ) / (numthreads *1024)); | 
|  | if (level == 0 && (a&0xF)==3) { | 
|  | // Halve the size for unified L1 caches | 
|  | datacache[level].size/=2; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // CPUID8000_0005 & 6 | 
|  | void getAMDcacheinfo() | 
|  | { | 
|  | uint dummy, c5, c6, d6; | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (dummy), "=c" (c5) : "a" (0x8000_0005) : "ebx", "edx"; | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0005; // L1 cache | 
|  | cpuid; | 
|  | // EAX has L1_TLB_4M. | 
|  | // EBX has L1_TLB_4K | 
|  | // EDX has L1 instruction cache | 
|  | mov c5, ECX; | 
|  | } | 
|  |  | 
|  | datacache[0].size = ( (c5>>24) & 0xFF); | 
|  | datacache[0].associativity = cast(ubyte)( (c5 >> 16) & 0xFF); | 
|  | datacache[0].lineSize = c5 & 0xFF; | 
|  |  | 
|  | if (max_extended_cpuid >= 0x8000_0006) { | 
|  | // AMD K6-III or K6-2+ or later. | 
|  | uint numcores = 1; | 
|  | if (max_extended_cpuid >= 0x8000_0008) { | 
|  | // read the number of physical cores (minus 1) from the 8 lowest ECX bits | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (dummy), "=c" (numcores) : "a" (0x8000_0008) : "ebx", "edx"; | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0008; | 
|  | cpuid; | 
|  | mov numcores, ECX; | 
|  | } | 
|  | numcores = (numcores & 0xFF) + 1; | 
|  | if (numcores>cpuFeatures.maxCores) cpuFeatures.maxCores = numcores; | 
|  | } | 
|  |  | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (dummy), "=c" (c6), "=d" (d6) : "a" (0x8000_0006) : "ebx"; | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0006; // L2/L3 cache | 
|  | cpuid; | 
|  | mov c6, ECX; // L2 cache info | 
|  | mov d6, EDX; // L3 cache info | 
|  | } | 
|  |  | 
|  | static immutable ubyte [] assocmap = [ 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, 0xFF ]; | 
|  | datacache[1].size = (c6>>16) & 0xFFFF; | 
|  | datacache[1].associativity = assocmap[(c6>>12)&0xF]; | 
|  | datacache[1].lineSize = c6 & 0xFF; | 
|  |  | 
|  | // The L3 cache value is TOTAL, not per core. | 
|  | datacache[2].size = ((d6>>18)*512)/numcores; // could be up to 2 * this, -1. | 
|  | datacache[2].associativity = assocmap[(d6>>12)&0xF]; | 
|  | datacache[2].lineSize = d6 & 0xFF; | 
|  | } | 
|  | } | 
|  |  | 
|  | // For Intel CoreI7 and later, use function 0x0B | 
|  | // to determine number of processors. | 
|  | void getCpuInfo0B() | 
|  | { | 
|  | int threadsPerCore; | 
|  | uint a, b, c, d; | 
|  | // I'm not sure about this. The docs state that there | 
|  | // are 2 hyperthreads per core if HT is factory enabled. | 
|  | for (int level = 0; level < 2; level++) | 
|  | { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (0x0B), "c" (level); | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 0x0B; | 
|  | mov ECX, level; | 
|  | cpuid; | 
|  | mov a, EAX; | 
|  | mov b, EBX; | 
|  | mov c, ECX; | 
|  | mov d, EDX; | 
|  | } | 
|  | if (b != 0) | 
|  | { | 
|  | if (level == 0) | 
|  | threadsPerCore = b & 0xFFFF; | 
|  | else if (level == 1) | 
|  | { | 
|  | cpuFeatures.maxThreads = b & 0xFFFF; | 
|  | cpuFeatures.maxCores = cpuFeatures.maxThreads / threadsPerCore; | 
|  | } | 
|  | } | 
|  | // Got "invalid domain" returned from cpuid | 
|  | if (a == 0 && b == 0) | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | void cpuidX86() | 
|  | { | 
|  | auto cf = getCpuFeatures(); | 
|  |  | 
|  | uint a, b, c, d; | 
|  | uint* venptr = cast(uint*)cf.vendorID.ptr; | 
|  | version (GNU_OR_LDC) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (max_cpuid), "=b" (venptr[0]), "=d" (venptr[1]), "=c" (venptr[2]) : "a" (0); | 
|  | "cpuid" : "=a" (max_extended_cpuid) : "a" (0x8000_0000) : "ebx", "ecx", "edx"; | 
|  | } | 
|  | } | 
|  | else | 
|  | { | 
|  | uint a2; | 
|  | version (D_InlineAsm_X86) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 0; | 
|  | cpuid; | 
|  | mov a, EAX; | 
|  | mov EAX, venptr; | 
|  | mov [EAX], EBX; | 
|  | mov [EAX + 4], EDX; | 
|  | mov [EAX + 8], ECX; | 
|  | } | 
|  | } | 
|  | else version (D_InlineAsm_X86_64) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 0; | 
|  | cpuid; | 
|  | mov a, EAX; | 
|  | mov RAX, venptr; | 
|  | mov [RAX], EBX; | 
|  | mov [RAX + 4], EDX; | 
|  | mov [RAX + 8], ECX; | 
|  | } | 
|  | } | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0000; | 
|  | cpuid; | 
|  | mov a2, EAX; | 
|  | } | 
|  | max_cpuid = a; | 
|  | max_extended_cpuid = a2; | 
|  | } | 
|  |  | 
|  |  | 
|  | cf.probablyIntel = cf.vendorID == "GenuineIntel"; | 
|  | cf.probablyAMD = (cf.vendorID == "AuthenticAMD" || cf.vendorID == "HygonGenuine"); | 
|  | uint apic = 0; // brand index, apic id | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=b" (apic), "=c" (cf.miscfeatures), "=d" (cf.features) : "a" (1); | 
|  | } else { | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 1; // model, stepping | 
|  | cpuid; | 
|  | mov a, EAX; | 
|  | mov apic, EBX; | 
|  | mov c, ECX; | 
|  | mov d, EDX; | 
|  | } | 
|  | cf.features = d; | 
|  | cf.miscfeatures = c; | 
|  | } | 
|  | stepping = a & 0xF; | 
|  | immutable uint fbase = (a >> 8) & 0xF; | 
|  | immutable uint mbase = (a >> 4) & 0xF; | 
|  | family = ((fbase == 0xF) || (fbase == 0)) ? fbase + (a >> 20) & 0xFF : fbase; | 
|  | model = ((fbase == 0xF) || (fbase == 6 && cf.probablyIntel) ) ? | 
|  | mbase + ((a >> 12) & 0xF0) : mbase; | 
|  |  | 
|  | if (max_cpuid >= 7) | 
|  | { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=b" (cf.extfeatures), "=c" (c) : "a" (7), "c" (0) : "edx"; | 
|  | } else { | 
|  | uint ext; | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 7; // Structured extended feature leaf. | 
|  | mov ECX, 0; // Main leaf. | 
|  | cpuid; | 
|  | mov ext, EBX; // HLE, AVX2, RTM, etc. | 
|  | } | 
|  | cf.extfeatures = ext; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (cf.miscfeatures & OSXSAVE_BIT) | 
|  | { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | /* Old assemblers do not recognize xgetbv, and there is no easy way | 
|  | * to conditionally compile based on the assembler used, so use the | 
|  | * raw .byte sequence instead.  */ | 
|  | ".byte 0x0f, 0x01, 0xd0" : "=a" (a), "=d" (d) : "c" (0); | 
|  | } else asm pure nothrow @nogc { | 
|  | mov ECX, 0; | 
|  | xgetbv; | 
|  | mov d, EDX; | 
|  | mov a, EAX; | 
|  | } | 
|  | cf.xfeatures = cast(ulong)d << 32 | a; | 
|  | } | 
|  |  | 
|  | cf.amdfeatures = 0; | 
|  | cf.amdmiscfeatures = 0; | 
|  | if (max_extended_cpuid >= 0x8000_0001) { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=c" (cf.amdmiscfeatures), "=d" (cf.amdfeatures) : "a" (0x8000_0001) : "ebx"; | 
|  | } else { | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0001; | 
|  | cpuid; | 
|  | mov c, ECX; | 
|  | mov d, EDX; | 
|  | } | 
|  | cf.amdmiscfeatures = c; | 
|  | cf.amdfeatures = d; | 
|  | } | 
|  | } | 
|  | // Try to detect fraudulent vendorIDs | 
|  | if (amd3dnow) cf.probablyIntel = false; | 
|  |  | 
|  | if (!cf.probablyIntel && max_extended_cpuid >= 0x8000_0008) { | 
|  | //http://support.amd.com/TechDocs/25481.pdf pg.36 | 
|  | cf.maxCores = 1; | 
|  | if (hyperThreadingBit) { | 
|  | // determine max number of cores for AMD | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=c" (c) : "a" (0x8000_0008) : "ebx", "edx"; | 
|  | } else asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_0008; | 
|  | cpuid; | 
|  | mov c, ECX; | 
|  | } | 
|  | cf.maxCores += c & 0xFF; | 
|  | } | 
|  | } | 
|  |  | 
|  | if (max_extended_cpuid >= 0x8000_0004) { | 
|  | uint* pnb = cast(uint*)cf.processorNameBuffer.ptr; | 
|  | version (GNU_OR_LDC) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (pnb[0]), "=b" (pnb[1]), "=c" (pnb[ 2]), "=d" (pnb[ 3]) : "a" (0x8000_0002); | 
|  | "cpuid" : "=a" (pnb[4]), "=b" (pnb[5]), "=c" (pnb[ 6]), "=d" (pnb[ 7]) : "a" (0x8000_0003); | 
|  | "cpuid" : "=a" (pnb[8]), "=b" (pnb[9]), "=c" (pnb[10]), "=d" (pnb[11]) : "a" (0x8000_0004); | 
|  | } | 
|  | } | 
|  | else version (D_InlineAsm_X86) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | push ESI; | 
|  | mov ESI, pnb; | 
|  | mov EAX, 0x8000_0002; | 
|  | cpuid; | 
|  | mov [ESI], EAX; | 
|  | mov [ESI+4], EBX; | 
|  | mov [ESI+8], ECX; | 
|  | mov [ESI+12], EDX; | 
|  | mov EAX, 0x8000_0003; | 
|  | cpuid; | 
|  | mov [ESI+16], EAX; | 
|  | mov [ESI+20], EBX; | 
|  | mov [ESI+24], ECX; | 
|  | mov [ESI+28], EDX; | 
|  | mov EAX, 0x8000_0004; | 
|  | cpuid; | 
|  | mov [ESI+32], EAX; | 
|  | mov [ESI+36], EBX; | 
|  | mov [ESI+40], ECX; | 
|  | mov [ESI+44], EDX; | 
|  | pop ESI; | 
|  | } | 
|  | } | 
|  | else version (D_InlineAsm_X86_64) | 
|  | { | 
|  | asm pure nothrow @nogc { | 
|  | push RSI; | 
|  | mov RSI, pnb; | 
|  | mov EAX, 0x8000_0002; | 
|  | cpuid; | 
|  | mov [RSI], EAX; | 
|  | mov [RSI+4], EBX; | 
|  | mov [RSI+8], ECX; | 
|  | mov [RSI+12], EDX; | 
|  | mov EAX, 0x8000_0003; | 
|  | cpuid; | 
|  | mov [RSI+16], EAX; | 
|  | mov [RSI+20], EBX; | 
|  | mov [RSI+24], ECX; | 
|  | mov [RSI+28], EDX; | 
|  | mov EAX, 0x8000_0004; | 
|  | cpuid; | 
|  | mov [RSI+32], EAX; | 
|  | mov [RSI+36], EBX; | 
|  | mov [RSI+40], ECX; | 
|  | mov [RSI+44], EDX; | 
|  | pop RSI; | 
|  | } | 
|  | } | 
|  | // Intel P4 and PM pad at front with spaces. | 
|  | // Other CPUs pad at end with nulls. | 
|  | int start = 0, end = 0; | 
|  | while (cf.processorNameBuffer[start] == ' ') { ++start; } | 
|  | while (cf.processorNameBuffer[cf.processorNameBuffer.length-end-1] == 0) { ++end; } | 
|  | cf.processorName = cast(string)(cf.processorNameBuffer[start..$-end]); | 
|  | } else { | 
|  | cf.processorName = "Unknown CPU"; | 
|  | } | 
|  | // Determine cache sizes | 
|  |  | 
|  | // Intel docs specify that they return 0 for 0x8000_0005. | 
|  | // AMD docs do not specify the behaviour for 0004 and 0002. | 
|  | // Centaur/VIA and most other manufacturers use the AMD method, | 
|  | // except Cyrix MediaGX MMX Enhanced uses their OWN form of CPUID2! | 
|  | // NS Geode GX1 provides CyrixCPUID2 _and_ does the same wrong behaviour | 
|  | // for CPUID80000005. But Geode GX uses the AMD method | 
|  |  | 
|  | // Deal with Geode GX1 - make it same as MediaGX MMX. | 
|  | if (max_extended_cpuid==0x8000_0005 && max_cpuid==2) { | 
|  | max_extended_cpuid = 0x8000_0004; | 
|  | } | 
|  | // Therefore, we try the AMD method unless it's an Intel chip. | 
|  | // If we still have no info, try the Intel methods. | 
|  | datacache[0].size = 0; | 
|  | if (max_cpuid<2 || !cf.probablyIntel) { | 
|  | if (max_extended_cpuid >= 0x8000_0005) { | 
|  | getAMDcacheinfo(); | 
|  | } else if (cf.probablyAMD) { | 
|  | // According to AMDProcRecognitionAppNote, this means CPU | 
|  | // K5 model 0, or Am5x86 (model 4), or Am4x86DX4 (model 4) | 
|  | // Am5x86 has 16Kb 4-way unified data & code cache. | 
|  | datacache[0].size = 8; | 
|  | datacache[0].associativity = 4; | 
|  | datacache[0].lineSize = 32; | 
|  | } else { | 
|  | // Some obscure CPU. | 
|  | // Values for Cyrix 6x86MX (family 6, model 0) | 
|  | datacache[0].size = 64; | 
|  | datacache[0].associativity = 4; | 
|  | datacache[0].lineSize = 32; | 
|  | } | 
|  | } | 
|  | if ((datacache[0].size == 0) && max_cpuid>=4) { | 
|  | getcacheinfoCPUID4(); | 
|  | } | 
|  | if ((datacache[0].size == 0) && max_cpuid>=2) { | 
|  | getcacheinfoCPUID2(); | 
|  | } | 
|  | if (datacache[0].size == 0) { | 
|  | // Pentium, PMMX, late model 486, or an obscure CPU | 
|  | if (mmx) { // Pentium MMX. Also has 8kB code cache. | 
|  | datacache[0].size = 16; | 
|  | datacache[0].associativity = 4; | 
|  | datacache[0].lineSize = 32; | 
|  | } else { // Pentium 1 (which also has 8kB code cache) | 
|  | // or 486. | 
|  | // Cyrix 6x86: 16, 4way, 32 linesize | 
|  | datacache[0].size = 8; | 
|  | datacache[0].associativity = 2; | 
|  | datacache[0].lineSize = 32; | 
|  | } | 
|  | } | 
|  | if (cf.probablyIntel && max_cpuid >= 0x0B) { | 
|  | // For Intel i7 and later, use function 0x0B to determine | 
|  | // cores and hyperthreads. | 
|  | getCpuInfo0B(); | 
|  | } else { | 
|  | if (hyperThreadingBit) cf.maxThreads = (apic>>>16) & 0xFF; | 
|  | else cf.maxThreads = cf.maxCores; | 
|  |  | 
|  | if (cf.probablyAMD && max_extended_cpuid >= 0x8000_001E) { | 
|  | version (GNU_OR_LDC) asm pure nothrow @nogc { | 
|  | "cpuid" : "=a" (a), "=b" (b) : "a" (0x8000_001E) : "ecx", "edx"; | 
|  | } else { | 
|  | asm pure nothrow @nogc { | 
|  | mov EAX, 0x8000_001e; | 
|  | cpuid; | 
|  | mov b, EBX; | 
|  | } | 
|  | } | 
|  | ubyte coresPerComputeUnit = ((b >> 8) & 3) + 1; | 
|  | cf.maxCores = cf.maxThreads / coresPerComputeUnit; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // Return true if the cpuid instruction is supported. | 
|  | // BUG(WONTFIX): Returns false for Cyrix 6x86 and 6x86L. They will be treated as 486 machines. | 
|  | bool hasCPUID() | 
|  | { | 
|  | version (X86_64) | 
|  | return true; | 
|  | else | 
|  | { | 
|  | uint flags; | 
|  | version (GNU_OR_LDC) | 
|  | { | 
|  | // http://wiki.osdev.org/CPUID#Checking_CPUID_availability | 
|  | asm nothrow @nogc { " | 
|  | pushfl                    # Save EFLAGS | 
|  | pushfl                    # Store EFLAGS | 
|  | xorl $0x00200000, (%%esp) # Invert the ID bit in stored EFLAGS | 
|  | popfl                     # Load stored EFLAGS (with ID bit inverted) | 
|  | pushfl                    # Store EFLAGS again (ID bit may or may not be inverted) | 
|  | popl %%eax                # eax = modified EFLAGS (ID bit may or may not be inverted) | 
|  | xorl (%%esp), %%eax       # eax = whichever bits were changed | 
|  | popfl                     # Restore original EFLAGS | 
|  | " : "=a" (flags); | 
|  | } | 
|  | } | 
|  | else version (D_InlineAsm_X86) | 
|  | { | 
|  | asm nothrow @nogc { | 
|  | pushfd; | 
|  | pop EAX; | 
|  | mov flags, EAX; | 
|  | xor EAX, 0x0020_0000; | 
|  | push EAX; | 
|  | popfd; | 
|  | pushfd; | 
|  | pop EAX; | 
|  | xor flags, EAX; | 
|  | } | 
|  | } | 
|  | return (flags & 0x0020_0000) != 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | } else { // supported X86 | 
|  |  | 
|  | bool hasCPUID() { return false; } | 
|  |  | 
|  | void cpuidX86() | 
|  | { | 
|  | datacache[0].size = 8; | 
|  | datacache[0].associativity = 2; | 
|  | datacache[0].lineSize = 32; | 
|  | } | 
|  | } | 
|  |  | 
|  | /* | 
|  | // TODO: Implement this function with OS support | 
|  | void cpuidPPC() | 
|  | { | 
|  | enum :int  { PPC601, PPC603, PPC603E, PPC604, | 
|  | PPC604E, PPC620, PPCG3, PPCG4, PPCG5 } | 
|  |  | 
|  | // TODO: | 
|  | // asm { mfpvr; } returns the CPU version but unfortunately it can | 
|  | // only be used in kernel mode. So OS support is required. | 
|  | int cputype = PPC603; | 
|  |  | 
|  | // 601 has a 8KB combined data & code L1 cache. | 
|  | uint sizes[] = [4, 8, 16, 16, 32, 32, 32, 32, 64]; | 
|  | ubyte ways[] = [8, 2,  4,  4,  4,  8,  8,  8,  8]; | 
|  | uint L2size[]= [0, 0,  0,  0,  0,  0,  0,  256,  512]; | 
|  | uint L3size[]= [0, 0,  0,  0,  0,  0,  0,  2048,  0]; | 
|  |  | 
|  | datacache[0].size = sizes[cputype]; | 
|  | datacache[0].associativity = ways[cputype]; | 
|  | datacache[0].lineSize = (cputype==PPCG5)? 128 : | 
|  | (cputype == PPC620 || cputype == PPCG3)? 64 : 32; | 
|  | datacache[1].size = L2size[cputype]; | 
|  | datacache[2].size = L3size[cputype]; | 
|  | datacache[1].lineSize = datacache[0].lineSize; | 
|  | datacache[2].lineSize = datacache[0].lineSize; | 
|  | } | 
|  |  | 
|  | // TODO: Implement this function with OS support | 
|  | void cpuidSparc() | 
|  | { | 
|  | // UltaSparcIIi  : L1 = 16,  2way. L2 = 512, 4 way. | 
|  | // UltraSparcIII : L1 = 64,  4way. L2= 4096 or 8192. | 
|  | // UltraSparcIIIi: L1 = 64,  4way. L2= 1024, 4 way | 
|  | // UltraSparcIV  : L1 = 64,  4way. L2 = 16*1024. | 
|  | // UltraSparcIV+ : L1 = 64,  4way. L2 = 2048, L3=32*1024. | 
|  | // Sparc64V      : L1 = 128, 2way. L2 = 4096 4way. | 
|  | } | 
|  | */ | 
|  |  | 
|  | pragma(crt_constructor) void cpuid_initialization() | 
|  | { | 
|  | auto cf = getCpuFeatures(); | 
|  |  | 
|  | if (hasCPUID()) { | 
|  | cpuidX86(); | 
|  | } else { | 
|  | // it's a 386 or 486, or a Cyrix 6x86. | 
|  | //Probably still has an external cache. | 
|  | } | 
|  | if (datacache[0].size==0) { | 
|  | // Guess same as Pentium 1. | 
|  | datacache[0].size = 8; | 
|  | datacache[0].associativity = 2; | 
|  | datacache[0].lineSize = 32; | 
|  | } | 
|  | numCacheLevels = 1; | 
|  | // And now fill up all the unused levels with full memory space. | 
|  | for (size_t i=1; i< datacache.length; ++i) { | 
|  | if (datacache[i].size==0) { | 
|  | // Set all remaining levels of cache equal to full address space. | 
|  | datacache[i].size = size_t.max/1024; | 
|  | datacache[i].associativity = 1; | 
|  | datacache[i].lineSize = datacache[i-1].lineSize; | 
|  | } | 
|  | else | 
|  | ++numCacheLevels; | 
|  | } | 
|  |  | 
|  | // Set the immortals | 
|  |  | 
|  | _dataCaches =     datacache; | 
|  | _vendor =         cast(string)cf.vendorID; | 
|  | _processor =      cf.processorName; | 
|  | _x87onChip =      (cf.features&FPU_BIT)!=0; | 
|  | _mmx =            (cf.features&MMX_BIT)!=0; | 
|  | _sse =            (cf.features&SSE_BIT)!=0; | 
|  | _sse2 =           (cf.features&SSE2_BIT)!=0; | 
|  | _sse3 =           (cf.miscfeatures&SSE3_BIT)!=0; | 
|  | _ssse3 =          (cf.miscfeatures&SSSE3_BIT)!=0; | 
|  | _sse41 =          (cf.miscfeatures&SSE41_BIT)!=0; | 
|  | _sse42 =          (cf.miscfeatures&SSE42_BIT)!=0; | 
|  | _sse4a =          (cf.amdmiscfeatures&SSE4A_BIT)!=0; | 
|  | _aes =            (cf.miscfeatures&AES_BIT)!=0; | 
|  | _hasPclmulqdq =   (cf.miscfeatures&PCLMULQDQ_BIT)!=0; | 
|  | _hasRdrand =      (cf.miscfeatures&RDRAND_BIT)!=0; | 
|  |  | 
|  | enum avx_mask = XF_SSE_BIT|XF_YMM_BIT; | 
|  | _avx =            (cf.xfeatures & avx_mask) == avx_mask && (cf.miscfeatures&AVX_BIT)!=0; | 
|  |  | 
|  | _vaes =           avx && aes; | 
|  | _hasVpclmulqdq =  avx && hasPclmulqdq; | 
|  | _fma =            avx && (cf.miscfeatures&FMA_BIT)!=0; | 
|  | _fp16c =          avx && (cf.miscfeatures&FP16C_BIT)!=0; | 
|  | _avx2 =           avx && (cf.extfeatures & AVX2_BIT) != 0; | 
|  | _hle =            (cf.extfeatures & HLE_BIT) != 0; | 
|  | _rtm =            (cf.extfeatures & RTM_BIT) != 0; | 
|  | _avx512f =        (cf.extfeatures & AVX512F_BIT) != 0; | 
|  | _hasRdseed =      (cf.extfeatures&RDSEED_BIT)!=0; | 
|  | _hasSha =         (cf.extfeatures&SHA_BIT)!=0; | 
|  | _amd3dnow =       (cf.amdfeatures&AMD_3DNOW_BIT)!=0; | 
|  | _amd3dnowExt =    (cf.amdfeatures&AMD_3DNOW_EXT_BIT)!=0; | 
|  | _amdMmx =         (cf.amdfeatures&AMD_MMX_BIT)!=0; | 
|  | _hasFxsr =        (cf.features&FXSR_BIT)!=0; | 
|  | _hasCmov =        (cf.features&CMOV_BIT)!=0; | 
|  | _hasRdtsc =       (cf.features&TIMESTAMP_BIT)!=0; | 
|  | _hasCmpxchg8b =   (cf.features&CMPXCHG8B_BIT)!=0; | 
|  | _hasCmpxchg16b =  (cf.miscfeatures&CMPXCHG16B_BIT)!=0; | 
|  | _hasSysEnterSysExit = | 
|  | // The SYSENTER/SYSEXIT features were buggy on Pentium Pro and early PentiumII. | 
|  | // (REF: www.geoffchappell.com). | 
|  | (cf.probablyIntel && (family < 6 || (family==6 && (model< 3 || (model==3 && stepping<3))))) | 
|  | ? false | 
|  | : (cf.features & SYSENTERSYSEXIT_BIT)!=0; | 
|  | _has3dnowPrefetch = (cf.amdmiscfeatures&AMD_3DNOW_PREFETCH_BIT)!=0; | 
|  | _hasLahfSahf =    (cf.amdmiscfeatures&LAHFSAHF_BIT)!=0; | 
|  | _hasPopcnt =      (cf.miscfeatures&POPCNT_BIT)!=0; | 
|  | _hasLzcnt =       (cf.amdmiscfeatures&LZCNT_BIT)!=0; | 
|  | _isX86_64 =       (cf.amdfeatures&AMD64_BIT)!=0; | 
|  | _isItanium =      (cf.features&IA64_BIT)!=0; | 
|  | _hyperThreading = cf.maxThreads>cf.maxCores; | 
|  | _threadsPerCPU =  cf.maxThreads; | 
|  | _coresPerCPU =    cf.maxCores; | 
|  | _preferAthlon =   cf.probablyAMD && family >=6; | 
|  | _preferPentium4 = cf.probablyIntel && family == 0xF; | 
|  | _preferPentium1 = family < 6 || (family==6 && model < 0xF && !cf.probablyIntel); | 
|  | } |