Actual source code: sseenabled.c
2: #include <petscsys.h>
4: #if defined(PETSC_HAVE_SSE)
6: #include PETSC_HAVE_SSE
7: #define SSE_FEATURE_FLAG 0x2000000 /* Mask for bit 25 (from bit 0) */
9: PetscErrorCode PetscSSEHardwareTest(PetscBool *flag)
10: {
11: char vendor[13];
12: char Intel[13] = "GenuineIntel";
13: char AMD[13] = "AuthenticAMD";
14: char Hygon[13] = "HygonGenuine";
15: PetscBool flg;
17: PetscFunctionBegin;
18: PetscCall(PetscStrncpy(vendor, "************", sizeof(vendor)));
19: CPUID_GET_VENDOR(vendor);
20: PetscCall(PetscStrcmp(vendor, Intel, &flg));
21: if (!flg) PetscCall(PetscStrcmp(vendor, AMD, &flg));
22: if (!flg) {
23: PetscCall(PetscStrcmp(vendor, Hygon, &flg));
24: if (flg) {
25: /* Intel, AMD, and Hygon use bit 25 of CPUID_FEATURES */
26: /* to denote availability of SSE Support */
27: unsigned long myeax, myebx, myecx, myedx;
28: CPUID(CPUID_FEATURES, &myeax, &myebx, &myecx, &myedx);
29: if (myedx & SSE_FEATURE_FLAG) *flag = PETSC_TRUE;
30: else *flag = PETSC_FALSE;
31: }
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
34: }
36: #if defined(PETSC_HAVE_FORK)
37: #include <signal.h>
38: /*
39: Early versions of the Linux kernel disables SSE hardware because
40: it does not know how to preserve the SSE state at a context switch.
41: To detect this feature, try an sse instruction in another process.
42: If it works, great! If not, an illegal instruction signal will be thrown,
43: so catch it and return an error code.
44: */
45: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_Linux(arg)
47: static void PetscSSEDisabledHandler(int sig)
48: {
49: signal(SIGILL, SIG_IGN);
50: exit(-1);
51: }
53: PetscErrorCode PetscSSEOSEnabledTest_Linux(PetscBool *flag)
54: {
55: int status, pid = 0;
57: PetscFunctionBegin;
58: signal(SIGILL, PetscSSEDisabledHandler);
59: pid = fork();
60: if (pid == 0) {
61: SSE_SCOPE_BEGIN;
62: XOR_PS(XMM0, XMM0);
63: SSE_SCOPE_END;
64: exit(0);
65: } else wait(&status);
66: if (!status) *flag = PETSC_TRUE;
67: else *flag = PETSC_FALSE;
68: PetscFunctionReturn(PETSC_SUCCESS);
69: }
71: #else
72: /*
73: Windows 95/98/NT4 should have a Windows Update/Service Patch which enables this hardware.
74: Windows ME/2000 doesn't disable SSE Hardware
75: */
76: #define PetscSSEOSEnabledTest(arg) PetscSSEOSEnabledTest_TRUE(arg)
77: #endif
79: PetscErrorCode PetscSSEOSEnabledTest_TRUE(PetscBool *flag)
80: {
81: PetscFunctionBegin;
82: if (flag) *flag = PETSC_TRUE;
83: PetscFunctionReturn(PETSC_SUCCESS);
84: }
86: #else /* Not defined PETSC_HAVE_SSE */
88: #define PetscSSEHardwareTest(arg) PetscSSEEnabledTest_FALSE(arg)
89: #define PetscSSEOSEnabledTest(arg) PetscSSEEnabledTest_FALSE(arg)
91: PetscErrorCode PetscSSEEnabledTest_FALSE(PetscBool *flag)
92: {
93: PetscFunctionBegin;
94: if (flag) *flag = PETSC_FALSE;
95: PetscFunctionReturn(PETSC_SUCCESS);
96: }
98: #endif /* defined PETSC_HAVE_SSE */
100: static PetscBool petsc_sse_local_is_untested = PETSC_TRUE;
101: static PetscBool petsc_sse_enabled_local = PETSC_FALSE;
102: static PetscBool petsc_sse_global_is_untested = PETSC_TRUE;
103: static PetscBool petsc_sse_enabled_global = PETSC_FALSE;
104: /*@C
105: PetscSSEIsEnabled - Determines if Intel Streaming SIMD Extensions (SSE) to the x86 instruction
106: set can be used. Some operating systems do not allow the use of these instructions despite
107: hardware availability.
109: Collective
111: Input Parameter:
112: . comm - the MPI Communicator
114: Output Parameters:
115: + lflag - Local Flag `PETSC_TRUE` if enabled in this process
116: - gflag - Global Flag `PETSC_TRUE` if enabled for all processes in comm
118: Options Database Key:
119: . -disable_sse - Disable use of hand tuned Intel SSE implementations
121: Level: developer
123: Note:
124: `NULL` can be specified for `lflag` or `gflag` if either of these values are not desired.
125: @*/
126: PetscErrorCode PetscSSEIsEnabled(MPI_Comm comm, PetscBool *lflag, PetscBool *gflag)
127: {
128: PetscBool disabled_option;
130: PetscFunctionBegin;
131: if (petsc_sse_local_is_untested && petsc_sse_global_is_untested) {
132: disabled_option = PETSC_FALSE;
134: PetscCall(PetscOptionsGetBool(NULL, NULL, "-disable_sse", &disabled_option, NULL));
135: if (disabled_option) {
136: petsc_sse_local_is_untested = PETSC_FALSE;
137: petsc_sse_enabled_local = PETSC_FALSE;
138: petsc_sse_global_is_untested = PETSC_FALSE;
139: petsc_sse_enabled_global = PETSC_FALSE;
140: }
142: if (petsc_sse_local_is_untested) {
143: PetscCall(PetscSSEHardwareTest(&petsc_sse_enabled_local));
144: if (petsc_sse_enabled_local) { PetscCall(PetscSSEOSEnabledTest(&petsc_sse_enabled_local)); }
145: petsc_sse_local_is_untested = PETSC_FALSE;
146: }
148: if (gflag && petsc_sse_global_is_untested) {
149: PetscCall(MPIU_Allreduce(&petsc_sse_enabled_local, &petsc_sse_enabled_global, 1, MPIU_BOOL, MPI_LAND, comm));
151: petsc_sse_global_is_untested = PETSC_FALSE;
152: }
153: }
155: if (lflag) *lflag = petsc_sse_enabled_local;
156: if (gflag) *gflag = petsc_sse_enabled_global;
157: PetscFunctionReturn(PETSC_SUCCESS);
158: }