Actual source code: bench_spmv.c
1: static char help[] = "Driver for benchmarking SpMV.";
3: #include <petscmat.h>
4: #include "cJSON.h"
5: #include "mmloader.h"
7: char *read_file(const char *filename)
8: {
9: FILE *file = NULL;
10: long length = 0;
11: char *content = NULL;
12: size_t read_chars = 0;
14: /* open in read binary mode */
15: file = fopen(filename, "rb");
16: if (file) {
17: /* get the length */
18: fseek(file, 0, SEEK_END);
19: length = ftell(file);
20: fseek(file, 0, SEEK_SET);
21: /* allocate content buffer */
22: content = (char *)malloc((size_t)length + sizeof(""));
23: /* read the file into memory */
24: read_chars = fread(content, sizeof(char), (size_t)length, file);
25: content[read_chars] = '\0';
26: fclose(file);
27: }
28: return content;
29: }
31: void write_file(const char *filename, const char *content)
32: {
33: FILE *file = NULL;
34: file = fopen(filename, "w");
35: if (file) { fputs(content, file); }
36: fclose(file);
37: }
39: int ParseJSON(const char *const inputjsonfile, char ***outputfilenames, char ***outputgroupnames, char ***outputmatnames, int *nmat)
40: {
41: char *content = read_file(inputjsonfile);
42: cJSON *matrix_json = NULL;
43: const cJSON *problem = NULL, *elem = NULL;
44: const cJSON *item = NULL;
45: char **filenames, **groupnames, **matnames;
46: int i, n;
47: if (!content) return 0;
48: matrix_json = cJSON_Parse(content);
49: if (!matrix_json) return 0;
50: n = cJSON_GetArraySize(matrix_json);
51: *nmat = n;
52: filenames = (char **)malloc(sizeof(char *) * n);
53: groupnames = (char **)malloc(sizeof(char *) * n);
54: matnames = (char **)malloc(sizeof(char *) * n);
55: for (i = 0; i < n; i++) {
56: elem = cJSON_GetArrayItem(matrix_json, i);
57: item = cJSON_GetObjectItemCaseSensitive(elem, "filename");
58: filenames[i] = (char *)malloc(sizeof(char) * (strlen(item->valuestring) + 1));
59: strcpy(filenames[i], item->valuestring);
60: problem = cJSON_GetObjectItemCaseSensitive(elem, "problem");
61: item = cJSON_GetObjectItemCaseSensitive(problem, "group");
62: groupnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
63: strcpy(groupnames[i], item->valuestring);
64: item = cJSON_GetObjectItemCaseSensitive(problem, "name");
65: matnames[i] = (char *)malloc(sizeof(char) * strlen(item->valuestring) + 1);
66: strcpy(matnames[i], item->valuestring);
67: }
68: cJSON_Delete(matrix_json);
69: free(content);
70: *outputfilenames = filenames;
71: *outputgroupnames = groupnames;
72: *outputmatnames = matnames;
73: return 0;
74: }
76: int UpdateJSON(const char *const inputjsonfile, PetscReal *spmv_times, PetscReal starting_spmv_time, const char *const matformat, PetscBool use_gpu, PetscInt repetitions)
77: {
78: char *content = read_file(inputjsonfile);
79: cJSON *matrix_json = NULL;
80: cJSON *elem = NULL;
81: int i, n;
82: if (!content) return 0;
83: matrix_json = cJSON_Parse(content);
84: if (!matrix_json) return 0;
85: n = cJSON_GetArraySize(matrix_json);
86: for (i = 0; i < n; i++) {
87: cJSON *spmv = NULL;
88: cJSON *format = NULL;
89: elem = cJSON_GetArrayItem(matrix_json, i);
90: spmv = cJSON_GetObjectItem(elem, "spmv");
91: if (spmv) {
92: format = cJSON_GetObjectItem(spmv, matformat);
93: if (format) {
94: cJSON_SetNumberValue(cJSON_GetObjectItem(format, "time"), (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
95: cJSON_SetIntValue(cJSON_GetObjectItem(format, "repetitions"), repetitions);
96: } else {
97: format = cJSON_CreateObject();
98: cJSON_AddItemToObject(spmv, matformat, format);
99: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
100: cJSON_AddNumberToObject(format, "repetitions", repetitions);
101: }
102: } else {
103: spmv = cJSON_CreateObject();
104: cJSON_AddItemToObject(elem, "spmv", spmv);
105: format = cJSON_CreateObject();
106: cJSON_AddItemToObject(spmv, matformat, format);
107: cJSON_AddNumberToObject(format, "time", (spmv_times[i] - ((i == 0) ? starting_spmv_time : spmv_times[i - 1])) / repetitions);
108: cJSON_AddNumberToObject(format, "repetitions", repetitions);
109: }
110: }
111: free(content);
112: content = cJSON_Print(matrix_json);
113: write_file(inputjsonfile, content);
114: cJSON_Delete(matrix_json);
115: free(content);
116: return 0;
117: }
119: /*
120: For GPU formats, we keep two copies of the matrix on CPU and one copy on GPU.
121: The extra CPU copy allows us to destroy the GPU matrix and recreate it efficiently
122: in each repetition. As a result, each MatMult call is fresh, and we can capture
123: the first-time overhead (e.g. of CuSparse SpMV), and avoids the cache effect
124: during consecutive calls.
125: */
126: PetscErrorCode TimedSpMV(Mat A, Vec b, PetscReal *time, const char *petscmatformat, PetscBool use_gpu, PetscInt repetitions)
127: {
128: Mat A2 = NULL;
129: PetscInt i;
130: Vec u;
131: PetscLogDouble vstart = 0, vend = 0;
132: PetscBool isaijcusparse, isaijkokkos;
134: PetscFunctionBeginUser;
135: PetscCall(PetscStrcmp(petscmatformat, MATAIJCUSPARSE, &isaijcusparse));
136: PetscCall(PetscStrcmp(petscmatformat, MATAIJKOKKOS, &isaijkokkos));
137: if (isaijcusparse) PetscCall(VecSetType(b, VECCUDA));
138: if (isaijkokkos) PetscCall(VecSetType(b, VECKOKKOS));
139: PetscCall(VecDuplicate(b, &u));
140: if (time) *time = 0.0;
141: for (i = 0; i < repetitions; i++) {
142: if (use_gpu) {
143: PetscCall(MatDestroy(&A2));
144: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &A2));
145: PetscCall(MatConvert(A2, petscmatformat, MAT_INPLACE_MATRIX, &A2));
146: } else A2 = A;
147: /* Timing MatMult */
148: if (time) PetscCall(PetscTime(&vstart));
150: PetscCall(MatMult(A2, b, u));
152: if (time) {
153: PetscCall(PetscTime(&vend));
154: *time += (PetscReal)(vend - vstart);
155: }
156: }
157: PetscCall(VecDestroy(&u));
158: if (repetitions > 0 && use_gpu) PetscCall(MatDestroy(&A2));
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
162: PetscErrorCode PetscLogSpMVTime(PetscReal *gputime, PetscReal *cputime, PetscReal *gpuflops, const char *petscmatformat)
163: {
164: PetscLogEvent event;
165: PetscEventPerfInfo eventInfo;
166: //PetscReal gpuflopRate;
168: // if (matformat) {
169: // PetscCall(PetscLogEventGetId("MatCUDACopyTo", &event));
170: // } else {
171: // PetscCall(PetscLogEventGetId("MatCUSPARSCopyTo", &event));
172: // }
173: // PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
174: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.4e ", eventInfo.time));
176: PetscFunctionBeginUser;
177: PetscCall(PetscLogEventGetId("MatMult", &event));
178: PetscCall(PetscLogEventGetPerfInfo(PETSC_DETERMINE, event, &eventInfo));
179: //gpuflopRate = eventInfo.GpuFlops/eventInfo.GpuTime;
180: // PetscCall(PetscPrintf(PETSC_COMM_WORLD, "%.2f %.4e %.4e\n", gpuflopRate/1.e6, eventInfo.GpuTime, eventInfo.time));
181: if (cputime) *cputime = eventInfo.time;
182: #if defined(PETSC_HAVE_DEVICE)
183: if (gputime) *gputime = eventInfo.GpuTime;
184: if (gpuflops) *gpuflops = eventInfo.GpuFlops / 1.e6;
185: #endif
186: PetscFunctionReturn(PETSC_SUCCESS);
187: }
189: PetscErrorCode MapToPetscMatType(const char *matformat, PetscBool use_gpu, char **petscmatformat)
190: {
191: PetscBool iscsr, issell, iscsrkokkos;
193: PetscFunctionBeginUser;
194: PetscCall(PetscStrcmp(matformat, "csr", &iscsr));
195: if (iscsr) {
196: if (use_gpu) PetscCall(PetscStrallocpy(MATAIJCUSPARSE, petscmatformat));
197: else PetscCall(PetscStrallocpy(MATAIJ, petscmatformat));
198: } else {
199: PetscCall(PetscStrcmp(matformat, "sell", &issell));
200: if (issell) {
201: if (use_gpu) PetscCall(PetscStrallocpy(MATSELL, petscmatformat)); // placeholder for SELLCUDA
202: else PetscCall(PetscStrallocpy(MATSELL, petscmatformat));
203: } else {
204: PetscCall(PetscStrcmp(matformat, "csrkokkos", &iscsrkokkos));
205: if (iscsrkokkos) PetscCall(PetscStrallocpy(MATAIJKOKKOS, petscmatformat));
206: }
207: }
208: PetscFunctionReturn(PETSC_SUCCESS);
209: }
211: int main(int argc, char **args)
212: {
213: PetscInt nmat = 1, nformats = 5, i, j, repetitions = 1;
214: Mat A;
215: Vec b;
216: char jfilename[PETSC_MAX_PATH_LEN];
217: char filename[PETSC_MAX_PATH_LEN], bfilename[PETSC_MAX_PATH_LEN];
218: char groupname[PETSC_MAX_PATH_LEN], matname[PETSC_MAX_PATH_LEN];
219: char *matformats[5];
220: char **filenames = NULL, **groupnames = NULL, **matnames = NULL;
221: char ordering[256] = MATORDERINGRCM;
222: PetscBool bflg, flg1, flg2, flg3, use_gpu = PETSC_FALSE, permute = PETSC_FALSE;
223: IS rowperm = NULL, colperm = NULL;
224: PetscViewer fd;
225: PetscReal starting_spmv_time = 0, *spmv_times;
227: PetscCall(PetscOptionsInsertString(NULL, "-log_view_gpu_time -log_view :/dev/null"));
228: PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
229: PetscCall(PetscOptionsGetStringArray(NULL, NULL, "-formats", matformats, &nformats, &flg1));
230: if (!flg1) {
231: nformats = 1;
232: PetscCall(PetscStrallocpy("csr", &matformats[0]));
233: }
234: PetscCall(PetscOptionsGetBool(NULL, NULL, "-use_gpu", &use_gpu, NULL));
235: PetscCall(PetscOptionsGetInt(NULL, NULL, "-repetitions", &repetitions, NULL));
236: /* Read matrix and RHS */
237: PetscCall(PetscOptionsGetString(NULL, NULL, "-groupname", groupname, PETSC_MAX_PATH_LEN, NULL));
238: PetscCall(PetscOptionsGetString(NULL, NULL, "-matname", matname, PETSC_MAX_PATH_LEN, NULL));
239: PetscCall(PetscOptionsGetString(NULL, NULL, "-ABIN", filename, PETSC_MAX_PATH_LEN, &flg1));
240: PetscCall(PetscOptionsGetString(NULL, NULL, "-AMTX", filename, PETSC_MAX_PATH_LEN, &flg2));
241: PetscCall(PetscOptionsGetString(NULL, NULL, "-AJSON", jfilename, PETSC_MAX_PATH_LEN, &flg3));
242: PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "Extra options", "");
243: PetscCall(PetscOptionsFList("-permute", "Permute matrix and vector to solving in new ordering", "", MatOrderingList, ordering, ordering, sizeof(ordering), &permute));
244: PetscOptionsEnd();
245: #if !defined(PETSC_HAVE_DEVICE)
246: PetscCheck(!use_gpu, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "To use the option -use_gpu 1, PETSc must be configured with GPU support");
247: #endif
248: PetscCheck(flg1 || flg2 || flg3, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Must indicate an input file with the -ABIN or -AMTX or -AJSON depending on the file format");
249: if (flg3) {
250: ParseJSON(jfilename, &filenames, &groupnames, &matnames, &nmat);
251: PetscCall(PetscCalloc1(nmat, &spmv_times));
252: } else if (flg2) {
253: PetscCall(MatCreateFromMTX(&A, filename, PETSC_TRUE));
254: } else if (flg1) {
255: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, filename, FILE_MODE_READ, &fd));
256: PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
257: PetscCall(MatSetType(A, MATAIJ));
258: PetscCall(MatSetFromOptions(A));
259: PetscCall(MatLoad(A, fd));
260: PetscCall(PetscViewerDestroy(&fd));
261: }
262: if (permute) {
263: Mat Aperm;
264: PetscCall(MatGetOrdering(A, ordering, &rowperm, &colperm));
265: PetscCall(MatPermute(A, rowperm, colperm, &Aperm));
266: PetscCall(MatDestroy(&A));
267: A = Aperm; /* Replace original operator with permuted version */
268: }
269: /* Let the vec object trigger the first CUDA call, which takes a relatively long time to init CUDA */
270: PetscCall(PetscOptionsGetString(NULL, NULL, "-b", bfilename, PETSC_MAX_PATH_LEN, &bflg));
271: if (bflg) {
272: PetscViewer fb;
273: PetscCall(VecCreate(PETSC_COMM_WORLD, &b));
274: PetscCall(VecSetFromOptions(b));
275: PetscCall(PetscViewerBinaryOpen(PETSC_COMM_WORLD, bfilename, FILE_MODE_READ, &fb));
276: PetscCall(VecLoad(b, fb));
277: PetscCall(PetscViewerDestroy(&fb));
278: }
280: for (j = 0; j < nformats; j++) {
281: char *petscmatformat = NULL;
282: PetscCall(MapToPetscMatType(matformats[j], use_gpu, &petscmatformat));
283: PetscCheck(petscmatformat, PETSC_COMM_WORLD, PETSC_ERR_USER_INPUT, "Invalid mat format %s, supported options include csr and sell.", matformats[j]);
284: if (flg3) { // mat names specified in a JSON file
285: for (i = 0; i < nmat; i++) {
286: PetscCall(MatCreateFromMTX(&A, filenames[i], PETSC_TRUE));
287: if (!bflg) {
288: PetscCall(MatCreateVecs(A, &b, NULL));
289: PetscCall(VecSet(b, 1.0));
290: }
291: PetscCall(TimedSpMV(A, b, NULL, petscmatformat, use_gpu, repetitions));
292: if (use_gpu) PetscCall(PetscLogSpMVTime(&spmv_times[i], NULL, NULL, petscmatformat));
293: else PetscCall(PetscLogSpMVTime(NULL, &spmv_times[i], NULL, petscmatformat));
294: PetscCall(MatDestroy(&A));
295: if (!bflg) PetscCall(VecDestroy(&b));
296: }
297: UpdateJSON(jfilename, spmv_times, starting_spmv_time, matformats[j], use_gpu, repetitions);
298: starting_spmv_time = spmv_times[nmat - 1];
299: } else {
300: PetscReal spmv_time;
301: if (!bflg) {
302: PetscCall(MatCreateVecs(A, &b, NULL));
303: PetscCall(VecSet(b, 1.0));
304: }
305: PetscCall(TimedSpMV(A, b, &spmv_time, petscmatformat, use_gpu, repetitions));
306: if (!bflg) PetscCall(VecDestroy(&b));
307: }
308: PetscCall(PetscFree(petscmatformat));
309: }
310: if (flg3) {
311: for (i = 0; i < nmat; i++) {
312: free(filenames[i]);
313: free(groupnames[i]);
314: free(matnames[i]);
315: }
316: free(filenames);
317: free(groupnames);
318: free(matnames);
319: PetscCall(PetscFree(spmv_times));
320: }
321: for (j = 0; j < nformats; j++) PetscCall(PetscFree(matformats[j]));
322: if (flg1 || flg2) PetscCall(MatDestroy(&A));
323: if (bflg) PetscCall(VecDestroy(&b));
324: PetscCall(ISDestroy(&rowperm));
325: PetscCall(ISDestroy(&colperm));
326: PetscCall(PetscFinalize());
327: return 0;
328: }
329: /*TEST
331: build:
332: requires: !complex double !windows_compilers !defined(PETSC_USE_64BIT_INDICES)
333: depends: mmloader.c mmio.c cJSON.c
335: test:
336: suffix: 1
337: args: -AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx
339: test:
340: suffix: 2
341: args:-AMTX ${wPETSC_DIR}/share/petsc/datafiles/matrices/amesos2_test_mat0.mtx -use_gpu
342: output_file: output/bench_spmv_1.out
343: requires: cuda
345: TEST*/