Actual source code: sseenabled.c


  2: #include <petscsys.h>

  4: #if defined(PETSC_HAVE_SSE)

  6:   #include PETSC_HAVE_SSE
  7:   #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */

  9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
 10: {
 11:   char      vendor[13];
 12:   char      Intel[13] = "GenuineIntel";
 13:   char      AMD[13]   = "AuthenticAMD";
 14:   char      Hygon[13] = "HygonGenuine";
 15:   PetscBool flg;

 17:   PetscFunctionBegin;
 18:   PetscCall(PetscStrncpy(vendor, "************", sizeof(vendor)));
 19:   CPUID_GET_VENDOR(vendor);
 20:   PetscCall(PetscStrcmp(vendor, Intel, &flg));
 21:   if (!flg) PetscCall(PetscStrcmp(vendor, AMD, &flg));
 22:   if (!flg) {
 23:     PetscCall(PetscStrcmp(vendor, Hygon, &flg));
 24:     if (flg) {
 25:       /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
 26:       /* to denote availability of SSE Support */
 27:       unsigned long myeax, myebx, myecx, myedx;
 28:       CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
 29:       if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
 30:       else *flag = PETSC_FALSE;
 31:     }
 32:     PetscFunctionReturn(PETSC_SUCCESS);
 33:   }
 34: }

 36:   #if defined(PETSC_HAVE_FORK)
 37:     #include <signal.h>
 38:     /*
 39:    Early versions of the Linux kernel disables SSE hardware because
 40:    it does not know how to preserve the SSE state at a context switch.
 41:    To detect this feature, try an sse instruction in another process.
 42:    If it works, great!  If not, an illegal instruction signal will be thrown,
 43:    so catch it and return an error code.
 44: */
 45:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)

 47: static void PetscSSEDisabledHandler(int sig)
 48: {
 49:   signal(SIGILL, SIG_IGN);
 50:   exit(-1);
 51: }

 53: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
 54: {
 55:   int status, pid = 0;

 57:   PetscFunctionBegin;
 58:   signal(SIGILL, PetscSSEDisabledHandler);
 59:   pid = fork();
 60:   if (pid == 0) {
 61:     SSE_SCOPE_BEGIN;
 62:     XOR_PS(XMM0, XMM0);
 63:     SSE_SCOPE_END;
 64:     exit(0);
 65:   } else wait(&status);
 66:   if (!status) *flag = PETSC_TRUE;
 67:   else *flag = PETSC_FALSE;
 68:   PetscFunctionReturn(PETSC_SUCCESS);
 69: }

 71:   #else
 72:     /*
 73:    Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
 74:    Windows ME/2000 doesn't disable SSE Hardware
 75: */
 76:     #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
 77:   #endif

 79: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
 80: {
 81:   PetscFunctionBegin;
 82:   if (flag) *flag = PETSC_TRUE;
 83:   PetscFunctionReturn(PETSC_SUCCESS);
 84: }

 86: #else /* Not defined PETSC_HAVE_SSE */

 88:   #define PetscSSEHardwareTest(arg)  PetscSSEEnabledTest_FALSE(arg)
 89:   #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)

 91: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
 92: {
 93:   PetscFunctionBegin;
 94:   if (flag) *flag = PETSC_FALSE;
 95:   PetscFunctionReturn(PETSC_SUCCESS);
 96: }

 98: #endif /* defined PETSC_HAVE_SSE */

100: static PetscBool petsc_sse_local_is_untested  = PETSC_TRUE;
101: static PetscBool petsc_sse_enabled_local      = PETSC_FALSE;
102: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
103: static PetscBool petsc_sse_enabled_global     = PETSC_FALSE;
104: /*@C
105:      PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
106:      set can be used.  Some operating systems do not allow the use of these instructions despite
107:      hardware availability.

109:      Collective

111:      Input Parameter:
112: .    comm - the MPI Communicator

114:      Output Parameters:
115: +    lflag - Local Flag  `PETSC_TRUE` if enabled in this process
116: -    gflag - Global Flag `PETSC_TRUE` if enabled for all processes in comm

118:      Options Database Key:
119: .    -disable_sse - Disable use of hand tuned Intel SSE implementations

121:      Level: developer

123:      Note:
124:      `NULL` can be specified for `lflag` or `gflag` if either of these values are not desired.
125: @*/
126: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
127: {
128:   PetscBool disabled_option;

130:   PetscFunctionBegin;
131:   if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
132:     disabled_option = PETSC_FALSE;

134:     PetscCall(PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL));
135:     if (disabled_option) {
136:       petsc_sse_local_is_untested  = PETSC_FALSE;
137:       petsc_sse_enabled_local      = PETSC_FALSE;
138:       petsc_sse_global_is_untested = PETSC_FALSE;
139:       petsc_sse_enabled_global     = PETSC_FALSE;
140:     }

142:     if (petsc_sse_local_is_untested) {
143:       PetscCall(PetscSSEHardwareTest(&petsc_sse_enabled_local));
144:       if (petsc_sse_enabled_local) { PetscCall(PetscSSEOSEnabledTest(&petsc_sse_enabled_local)); }
145:       petsc_sse_local_is_untested = PETSC_FALSE;
146:     }

148:     if (gflag && petsc_sse_global_is_untested) {
149:       PetscCall(MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm));

151:       petsc_sse_global_is_untested = PETSC_FALSE;
152:     }
153:   }

155:   if (lflag) *lflag = petsc_sse_enabled_local;
156:   if (gflag) *gflag = petsc_sse_enabled_global;
157:   PetscFunctionReturn(PETSC_SUCCESS);
158: }