Actual source code: ex7.c

  1: static const char help[] = "Tests PetscDeviceAllocate().\n\n";

  3: #include "petscdevicetestcommon.h"

  5: #define DebugPrintf(comm, ...) PetscPrintf((comm), "[DEBUG OUTPUT] " __VA_ARGS__)

  7: static PetscErrorCode IncrementSize(PetscRandom rand, PetscInt *value)
  8: {
  9:   PetscReal rval;

 11:   PetscFunctionBegin;
 12:   // set the interval such that *value += rval never goes below 0 or above 500
 13:   PetscCall(PetscRandomSetInterval(rand, -(*value), 500 - (*value)));
 14:   PetscCall(PetscRandomGetValueReal(rand, &rval));
 15:   *value += (PetscInt)rval;
 16:   PetscCall(DebugPrintf(PetscObjectComm((PetscObject)rand), "n: %" PetscInt_FMT "\n", *value));
 17:   PetscFunctionReturn(PETSC_SUCCESS);
 18: }

 20: static PetscErrorCode TestAllocate(PetscDeviceContext dctx, PetscRandom rand, PetscMemType mtype)
 21: {
 22:   PetscScalar *ptr, *tmp_ptr;
 23:   PetscInt     n = 10;

 25:   PetscFunctionBegin;
 26:   if (PetscMemTypeDevice(mtype)) {
 27:     PetscDeviceType dtype;

 29:     PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
 30:     // host device context cannot handle this
 31:     if (dtype == PETSC_DEVICE_HOST) PetscFunctionReturn(PETSC_SUCCESS);
 32:   }
 33:   // test basic allocation, deallocation
 34:   PetscCall(IncrementSize(rand, &n));
 35:   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
 36:   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceMalloc() return NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
 37:   // this ensures the host pointer is at least valid
 38:   if (PetscMemTypeHost(mtype)) {
 39:     for (PetscInt i = 0; i < n; ++i) ptr[i] = (PetscScalar)i;
 40:   }
 41:   PetscCall(PetscDeviceFree(dctx, ptr));

 43:   // test alignment of various types
 44:   {
 45:     char     *char_ptr;
 46:     short    *short_ptr;
 47:     int      *int_ptr;
 48:     double   *double_ptr;
 49:     long int *long_int_ptr;

 51:     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &char_ptr));
 52:     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &short_ptr));
 53:     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &int_ptr));
 54:     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &double_ptr));
 55:     PetscCall(PetscDeviceMalloc(dctx, mtype, 1, &long_int_ptr));

 57:     // if an error occurs here, it means the alignment system is broken!
 58:     PetscCall(PetscDeviceFree(dctx, char_ptr));
 59:     PetscCall(PetscDeviceFree(dctx, short_ptr));
 60:     PetscCall(PetscDeviceFree(dctx, int_ptr));
 61:     PetscCall(PetscDeviceFree(dctx, double_ptr));
 62:     PetscCall(PetscDeviceFree(dctx, long_int_ptr));
 63:   }

 65:   // test that calloc() produces cleared memory
 66:   PetscCall(IncrementSize(rand, &n));
 67:   PetscCall(PetscDeviceCalloc(dctx, mtype, n, &ptr));
 68:   PetscCheck(ptr, PETSC_COMM_SELF, PETSC_ERR_POINTER, "PetscDeviceCalloc() returned NULL pointer for %s allocation size %" PetscInt_FMT, PetscMemTypeToString(mtype), n);
 69:   if (PetscMemTypeHost(mtype)) {
 70:     tmp_ptr = ptr;
 71:   } else {
 72:     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
 73:     PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
 74:   }
 75:   PetscCall(PetscDeviceContextSynchronize(dctx));
 76:   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceCalloc() returned memory that was not cleared, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
 77:   if (tmp_ptr == ptr) {
 78:     tmp_ptr = NULL;
 79:   } else {
 80:     PetscCall(PetscDeviceFree(dctx, tmp_ptr));
 81:   }
 82:   PetscCall(PetscDeviceFree(dctx, ptr));

 84:   // test that devicearrayzero produces cleared memory
 85:   PetscCall(IncrementSize(rand, &n));
 86:   PetscCall(PetscDeviceMalloc(dctx, mtype, n, &ptr));
 87:   PetscCall(PetscDeviceArrayZero(dctx, ptr, n));
 88:   PetscCall(PetscMalloc1(n, &tmp_ptr));
 89:   PetscCall(PetscDeviceRegisterMemory(tmp_ptr, PETSC_MEMTYPE_HOST, n * sizeof(*tmp_ptr)));
 90:   for (PetscInt i = 0; i < n; ++i) tmp_ptr[i] = (PetscScalar)i;
 91:   PetscCall(PetscDeviceArrayCopy(dctx, tmp_ptr, ptr, n));
 92:   PetscCall(PetscDeviceContextSynchronize(dctx));
 93:   for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() did not not clear memory, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
 94:   PetscCall(PetscDeviceFree(dctx, tmp_ptr));
 95:   PetscCall(PetscDeviceFree(dctx, ptr));
 96:   PetscFunctionReturn(PETSC_SUCCESS);
 97: }

 99: static PetscErrorCode TestAsyncCoherence(PetscDeviceContext dctx, PetscRandom rand)
100: {
101:   const PetscInt      nsub = 2;
102:   const PetscInt      n    = 1024;
103:   PetscScalar        *ptr, *tmp_ptr;
104:   PetscDeviceType     dtype;
105:   PetscDeviceContext *sub;

107:   PetscFunctionBegin;
108:   PetscCall(PetscDeviceContextGetDeviceType(dctx, &dtype));
109:   // ensure the streams are nonblocking
110:   PetscCall(PetscDeviceContextForkWithStreamType(dctx, PETSC_STREAM_GLOBAL_NONBLOCKING, nsub, &sub));
111:   // do a warmup to ensure each context acquires any necessary data structures
112:   for (PetscInt i = 0; i < nsub; ++i) {
113:     PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_HOST, n, &ptr));
114:     PetscCall(PetscDeviceFree(sub[i], ptr));
115:     if (dtype != PETSC_DEVICE_HOST) {
116:       PetscCall(PetscDeviceMalloc(sub[i], PETSC_MEMTYPE_DEVICE, n, &ptr));
117:       PetscCall(PetscDeviceFree(sub[i], ptr));
118:     }
119:   }

121:   // allocate on one
122:   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
123:   // free on the other
124:   PetscCall(PetscDeviceFree(sub[1], ptr));

126:   // allocate on one
127:   PetscCall(PetscDeviceMalloc(sub[0], PETSC_MEMTYPE_HOST, n, &ptr));
128:   // zero on the other
129:   PetscCall(PetscDeviceArrayZero(sub[1], ptr, n));
130:   PetscCall(PetscDeviceContextSynchronize(sub[1]));
131:   for (PetscInt i = 0; i < n; ++i) {
132:     for (PetscInt i = 0; i < n; ++i) PetscCheck(ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayZero() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(ptr[i]));
133:   }
134:   PetscCall(PetscDeviceFree(sub[1], ptr));

136:   // test the transfers are serialized
137:   if (dtype != PETSC_DEVICE_HOST) {
138:     PetscCall(PetscDeviceCalloc(dctx, PETSC_MEMTYPE_DEVICE, n, &ptr));
139:     PetscCall(PetscDeviceMalloc(dctx, PETSC_MEMTYPE_HOST, n, &tmp_ptr));
140:     PetscCall(PetscDeviceArrayCopy(sub[0], tmp_ptr, ptr, n));
141:     PetscCall(PetscDeviceContextSynchronize(sub[0]));
142:     for (PetscInt i = 0; i < n; ++i) {
143:       for (PetscInt i = 0; i < n; ++i) PetscCheck(tmp_ptr[i] == (PetscScalar)0.0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscDeviceArrayCopt() was not properly serialized, ptr[%" PetscInt_FMT "] %g != 0", i, (double)PetscAbsScalar(tmp_ptr[i]));
144:     }
145:     PetscCall(PetscDeviceFree(sub[1], ptr));
146:   }

148:   PetscCall(PetscDeviceContextJoin(dctx, nsub, PETSC_DEVICE_CONTEXT_JOIN_DESTROY, &sub));
149:   PetscFunctionReturn(PETSC_SUCCESS);
150: }

152: int main(int argc, char *argv[])
153: {
154:   PetscDeviceContext dctx;
155:   PetscRandom        rand;

157:   PetscFunctionBeginUser;
158:   PetscCall(PetscInitialize(&argc, &argv, NULL, help));

160:   // A vile hack. The -info output is used to test correctness in this test which prints --
161:   // among other things -- the PetscObjectId of the PetscDevicContext and the allocated memory.
162:   //
163:   // Due to device and host creating slightly different number of objects on startup there will
164:   // be a mismatch in the ID's. So for the tests involving the host we sit here creating
165:   // PetscContainers (and incrementing the global PetscObjectId counter) until it reaches some
166:   // arbitrarily high number to ensure that our first PetscDeviceContext has the same ID across
167:   // systems.
168:   {
169:     PetscObjectId prev_id = 0;

171:     do {
172:       PetscContainer c;
173:       PetscObjectId  id;

175:       PetscCall(PetscContainerCreate(PETSC_COMM_WORLD, &c));
176:       PetscCall(PetscObjectGetId((PetscObject)c, &id));
177:       // sanity check, in case PetscContainer ever stops being a PetscObject
178:       PetscCheck(id > prev_id, PETSC_COMM_SELF, PETSC_ERR_PLIB, "PetscObjectIds are not increasing for successively created PetscContainers! current: %" PetscInt64_FMT ", previous: %" PetscInt64_FMT, id, prev_id);
179:       prev_id = id;
180:       PetscCall(PetscContainerDestroy(&c));
181:     } while (prev_id < 50);
182:   }
183:   PetscCall(PetscDeviceContextGetCurrentContext(&dctx));

185:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
186:   // this seed just so happens to keep the allocation size increasing
187:   PetscCall(PetscRandomSetSeed(rand, 123));
188:   PetscCall(PetscRandomSeed(rand));
189:   PetscCall(PetscRandomSetFromOptions(rand));

191:   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_HOST));
192:   PetscCall(TestAllocate(dctx, rand, PETSC_MEMTYPE_DEVICE));
193:   PetscCall(TestAsyncCoherence(dctx, rand));

195:   PetscCall(PetscRandomDestroy(&rand));
196:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "EXIT_SUCCESS\n"));
197:   PetscCall(PetscFinalize());
198:   return 0;
199: }

201: /*TEST

203:   testset:
204:     requires: defined(PETSC_USE_INFO), defined(PETSC_USE_DEBUG), cxx
205:     args: -info :device
206:     suffix: with_info
207:     test:
208:       requires: !device
209:       suffix: host_no_device
210:     test:
211:       requires: device
212:       args: -default_device_type host
213:       filter: sed -e 's/host/IMPL/g' -e 's/cuda/IMPL/g' -e 's/hip/IMPL/g' -e 's/sycl/IMPL/g'
214:       suffix: host_with_device
215:     test:
216:       requires: cuda
217:       args: -default_device_type cuda
218:       suffix: cuda
219:     test:
220:       requires: hip
221:       args: -default_device_type hip
222:       suffix: hip
223:     test:
224:       requires: sycl
225:       args: -default_device_type sycl
226:       suffix: sycl

228:   testset:
229:     output_file: ./output/ExitSuccess.out
230:     requires: !defined(PETSC_USE_DEBUG)
231:     filter: grep -v "\[DEBUG OUTPUT\]"
232:     suffix: no_info
233:     test:
234:       requires: !device
235:       suffix: host_no_device
236:     test:
237:       requires: device
238:       args: -default_device_type host
239:       suffix: host_with_device
240:     test:
241:       requires: cuda
242:       args: -default_device_type cuda
243:       suffix: cuda
244:     test:
245:       requires: hip
246:       args: -default_device_type hip
247:       suffix: hip
248:     test:
249:       requires: sycl
250:       args: -default_device_type sycl
251:       suffix: sycl

253:   test:
254:     requires: !cxx
255:     output_file: ./output/ExitSuccess.out
256:     filter: grep -v "\[DEBUG OUTPUT\]"
257:     suffix: no_cxx

259: TEST*/