Actual source code: bench_spmv.c

  1: static char help[] = "Driver for benchmarking SpMV.";

  3: #include <petscmat.h>
  4: #include "cJSON.h"
  5: #include "mmloader.h"

  7: char *read_file(const char *filename)
  8: {
  9:   FILE  *file       = NULL;
 10:   long   length     = 0;
 11:   char  *content    = NULL;
 12:   size_t read_chars = 0;

 14:   /* open in read binary mode */
 15:   file = fopen(filename, "rb");
 16:   if (file) {
 17:     /* get the length */
 18:     fseek(file, 0, SEEK_END);
 19:     length = ftell(file);
 20:     fseek(file, 0, SEEK_SET);
 21:     /* allocate content buffer */
 22:     content = (char *)malloc((size_t)length + sizeof(""));
 23:     /* read the file into memory */
 24:     read_chars          = fread(content, sizeof(char), (size_t)length, file);
 25:     content[read_chars] = '\0';
 26:     fclose(file);
 27:   }
 28:   return content;
 29: }

 31: void write_file(const char *filename, const char *content)
 32: {
 33:   FILE *file = NULL;
 34:   file       = fopen(filename, "w");
 35:   if (file) { fputs(content, file); }
 36:   fclose(file);
 37: }

 39: int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
 40: {
 41:   char        *content     = read_file(inputjsonfile);
 42:   cJSON       *matrix_json = NULL;
 43:   const cJSON *problem = NULL, *elem = NULL;
 44:   const cJSON *item = NULL;
 45:   char       **filenames, **groupnames, **matnames;
 46:   int          i, n;
 47:   if (!content) return 0;
 48:   matrix_json = cJSON_Parse(content);
 49:   if (!matrix_json) return 0;
 50:   n          = cJSON_GetArraySize(matrix_json);
 51:   *nmat      = n;
 52:   filenames  = (char **)malloc(sizeof(char *) * n);
 53:   groupnames = (char **)malloc(sizeof(char *) * n);
 54:   matnames   = (char **)malloc(sizeof(char *) * n);
 55:   for (i = 0; i < n; i++) {
 56:     elem         = cJSON_GetArrayItem(matrix_json, i);
 57:     item         = cJSON_GetObjectItemCaseSensitive(elem, "filename");
 58:     filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
 59:     strcpy(filenames[i], item->valuestring);
 60:     problem       = cJSON_GetObjectItemCaseSensitive(elem, "problem");
 61:     item          = cJSON_GetObjectItemCaseSensitive(problem, "group");
 62:     groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
 63:     strcpy(groupnames[i], item->valuestring);
 64:     item        = cJSON_GetObjectItemCaseSensitive(problem, "name");
 65:     matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
 66:     strcpy(matnames[i], item->valuestring);
 67:   }
 68:   cJSON_Delete(matrix_json);
 69:   free(content);
 70:   *outputfilenames  = filenames;
 71:   *outputgroupnames = groupnames;
 72:   *outputmatnames   = matnames;
 73:   return 0;
 74: }

 76: int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
 77: {
 78:   char  *content     = read_file(inputjsonfile);
 79:   cJSON *matrix_json = NULL;
 80:   cJSON *elem        = NULL;
 81:   int    i, n;
 82:   if (!content) return 0;
 83:   matrix_json = cJSON_Parse(content);
 84:   if (!matrix_json) return 0;
 85:   n = cJSON_GetArraySize(matrix_json);
 86:   for (i = 0; i < n; i++) {
 87:     cJSON *spmv   = NULL;
 88:     cJSON *format = NULL;
 89:     elem          = cJSON_GetArrayItem(matrix_json, i);
 90:     spmv          = cJSON_GetObjectItem(elem, "spmv");
 91:     if (spmv) {
 92:       format = cJSON_GetObjectItem(spmv, matformat);
 93:       if (format) {
 94:         cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
 95:         cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
 96:       } else {
 97:         format = cJSON_CreateObject();
 98:         cJSON_AddItemToObject(spmv, matformat, format);
 99:         cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100:         cJSON_AddNumberToObject(format, "repetitions", repetitions);
101:       }
102:     } else {
103:       spmv = cJSON_CreateObject();
104:       cJSON_AddItemToObject(elem, "spmv", spmv);
105:       format = cJSON_CreateObject();
106:       cJSON_AddItemToObject(spmv, matformat, format);
107:       cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108:       cJSON_AddNumberToObject(format, "repetitions", repetitions);
109:     }
110:   }
111:   free(content);
112:   content = cJSON_Print(matrix_json);
113:   write_file(inputjsonfile, content);
114:   cJSON_Delete(matrix_json);
115:   free(content);
116:   return 0;
117: }

119: /*
120:   For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121:   The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122:   in each repetition. As a result,  each MatMult call is fresh, and we can capture
123:   the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124:   during consecutive calls.
125: */
126: PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127: {
128:   Mat            A2 = NULL;
129:   PetscInt       i;
130:   Vec            u;
131:   PetscLogDouble vstart = 0, vend = 0;
132:   PetscBool      isaijcusparse, isaijkokkos;

134:   PetscFunctionBeginUser;
135:   PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136:   PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
137:   if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
138:   if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
139:   PetscCall(VecDuplicate(b, &u));
140:   if (time) *time = 0.0;
141:   for (i = 0; i < repetitions; i++) {
142:     if (use_gpu) {
143:       PetscCall(MatDestroy(&A2));
144:       PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
145:       PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
146:     } else A2 = A;
147:     /* Timing MatMult */
148:     if (time) PetscCall(PetscTime(&vstart));

150:     PetscCall(MatMult(A2, b, u));

152:     if (time) {
153:       PetscCall(PetscTime(&vend));
154:       *time += (PetscReal)(vend - vstart);
155:     }
156:   }
157:   PetscCall(VecDestroy(&u));
158:   if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }

162: PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
163: {
164:   PetscLogEvent      event;
165:   PetscEventPerfInfo eventInfo;
166:   //PetscReal          gpuflopRate;

168:   // if (matformat) {
169:   //   PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
170:   // } else {
171:   //  PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
172:   // }
173:   // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
174:   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));

176:   PetscFunctionBeginUser;
177:   PetscCall(PetscLogEventGetId("MatMult", &event));
178:   PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
179:   //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
180:   // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
181:   if (cputime) *cputime = eventInfo.time;
182: #if defined(PETSC_HAVE_DEVICE)
183:   if (gputime) *gputime = eventInfo.GpuTime;
184:   if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
185: #endif
186:   PetscFunctionReturn(PETSC_SUCCESS);
187: }

189: PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
190: {
191:   PetscBool iscsr, issell, iscsrkokkos;

193:   PetscFunctionBeginUser;
194:   PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
195:   if (iscsr) {
196:     if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
197:     else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
198:   } else {
199:     PetscCall(PetscStrcmp(matformat, "sell", &issell));
200:     if (issell) {
201:       if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
202:       else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
203:     } else {
204:       PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
205:       if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
206:     }
207:   }
208:   PetscFunctionReturn(PETSC_SUCCESS);
209: }

211: int main(int argc, char **args)
212: {
213:   PetscInt    nmat = 1, nformats = 5, i, j, repetitions = 1;
214:   Mat         A;
215:   Vec         b;
216:   char        jfilename[PETSC_MAX_PATH_LEN];
217:   char        filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
218:   char        groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
219:   char       *matformats[5];
220:   char      **filenames = NULL, **groupnames = NULL, **matnames = NULL;
221:   char        ordering[256] = MATORDERINGRCM;
222:   PetscBool   bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
223:   IS          rowperm = NULL, colperm = NULL;
224:   PetscViewer fd;
225:   PetscReal   starting_spmv_time = 0, *spmv_times;

227:   PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
228:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
229:   PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
230:   if (!flg1) {
231:     nformats = 1;
232:     PetscCall(PetscStrallocpy("csr", &matformats[0]));
233:   }
234:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
235:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
236:   /* Read matrix and RHS */
237:   PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
238:   PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
239:   PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
240:   PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
241:   PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
242:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
243:   PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
244:   PetscOptionsEnd();
245: #if !defined(PETSC_HAVE_DEVICE)
246:   PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
247: #endif
248:   PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
249:   if (flg3) {
250:     ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
251:     PetscCall(PetscCalloc1(nmat, &spmv_times));
252:   } else if (flg2) {
253:     PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
254:   } else if (flg1) {
255:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
256:     PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
257:     PetscCall(MatSetType(A, MATAIJ));
258:     PetscCall(MatSetFromOptions(A));
259:     PetscCall(MatLoad(A, fd));
260:     PetscCall(PetscViewerDestroy(&fd));
261:   }
262:   if (permute) {
263:     Mat Aperm;
264:     PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
265:     PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
266:     PetscCall(MatDestroy(&A));
267:     A = Aperm; /* Replace original operator with permuted version */
268:   }
269:   /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
270:   PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
271:   if (bflg) {
272:     PetscViewer fb;
273:     PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
274:     PetscCall(VecSetFromOptions(b));
275:     PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
276:     PetscCall(VecLoad(b, fb));
277:     PetscCall(PetscViewerDestroy(&fb));
278:   }

280:   for (j = 0; j < nformats; j++) {
281:     char *petscmatformat = NULL;
282:     PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
283:     PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
284:     if (flg3) { // mat names specified in a JSON file
285:       for (i = 0; i < nmat; i++) {
286:         PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
287:         if (!bflg) {
288:           PetscCall(MatCreateVecs(A, &b, NULL));
289:           PetscCall(VecSet(b, 1.0));
290:         }
291:         PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
292:         if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
293:         else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
294:         PetscCall(MatDestroy(&A));
295:         if (!bflg) PetscCall(VecDestroy(&b));
296:       }
297:       UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
298:       starting_spmv_time = spmv_times[nmat - 1];
299:     } else {
300:       PetscReal spmv_time;
301:       if (!bflg) {
302:         PetscCall(MatCreateVecs(A, &b, NULL));
303:         PetscCall(VecSet(b, 1.0));
304:       }
305:       PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
306:       if (!bflg) PetscCall(VecDestroy(&b));
307:     }
308:     PetscCall(PetscFree(petscmatformat));
309:   }
310:   if (flg3) {
311:     for (i = 0; i < nmat; i++) {
312:       free(filenames[i]);
313:       free(groupnames[i]);
314:       free(matnames[i]);
315:     }
316:     free(filenames);
317:     free(groupnames);
318:     free(matnames);
319:     PetscCall(PetscFree(spmv_times));
320:   }
321:   for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
322:   if (flg1 || flg2) PetscCall(MatDestroy(&A));
323:   if (bflg) PetscCall(VecDestroy(&b));
324:   PetscCall(ISDestroy(&rowperm));
325:   PetscCall(ISDestroy(&colperm));
326:   PetscCall(PetscFinalize());
327:   return 0;
328: }
329: /*TEST

331:    build:
332:       requires:  !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
333:       depends: mmloader.c mmio.c cJSON.c

335:    test:
336:       suffix: 1
337:       args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx

339:    test:
340:       suffix: 2
341:       args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
342:       output_file: output/bench_spmv_1.out
343:       requires: cuda

345: TEST*/