Actual source code: aijmkl.c
2: /*
3: Defines basic operations for the MATSEQAIJMKL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but uses
6: sparse BLAS operations from the Intel Math Kernel Library (MKL)
7: wherever possible.
8: */
10: #include <../src/mat/impls/aij/seq/aij.h>
11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
12: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
13: #define MKL_ILP64
14: #endif
15: #include <mkl_spblas.h>
17: typedef struct {
18: PetscBool no_SpMV2; /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
19: PetscBool eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
20: PetscBool sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
21: PetscObjectState state;
22: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
23: sparse_matrix_t csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
24: struct matrix_descr descr;
25: #endif
26: } Mat_SeqAIJMKL;
28: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);
30: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
31: {
32: /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
33: /* so we will ignore 'MatType type'. */
34: Mat B = *newmat;
35: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
36: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
37: #endif
39: PetscFunctionBegin;
40: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
42: /* Reset the original function pointers. */
43: B->ops->duplicate = MatDuplicate_SeqAIJ;
44: B->ops->assemblyend = MatAssemblyEnd_SeqAIJ;
45: B->ops->destroy = MatDestroy_SeqAIJ;
46: B->ops->mult = MatMult_SeqAIJ;
47: B->ops->multtranspose = MatMultTranspose_SeqAIJ;
48: B->ops->multadd = MatMultAdd_SeqAIJ;
49: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJ;
50: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJ;
51: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
52: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJ_SeqAIJ;
53: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
54: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
55: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqAIJ;
57: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));
59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
60: /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
61: * simply involves destroying the MKL sparse matrix handle and then freeing
62: * the spptr pointer. */
63: if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;
65: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
66: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
67: PetscCall(PetscFree(B->spptr));
69: /* Change the type of B to MATSEQAIJ. */
70: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));
72: *newmat = B;
73: PetscFunctionReturn(PETSC_SUCCESS);
74: }
76: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
77: {
78: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
80: PetscFunctionBegin;
82: /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
83: if (aijmkl) {
84: /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
85: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
86: if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
87: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
88: PetscCall(PetscFree(A->spptr));
89: }
91: /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
92: * to destroy everything that remains. */
93: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
94: /* I don't call MatSetType(). I believe this is because that
95: * is only to be called when *building* a matrix. I could be wrong, but
96: * that is how things work for the SuperLU matrix class. */
97: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
98: PetscCall(MatDestroy_SeqAIJ(A));
99: PetscFunctionReturn(PETSC_SUCCESS);
100: }
102: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
103: * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
104: * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
105: * handle, creates a new one, and then calls mkl_sparse_optimize().
106: * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
107: * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
108: * an unoptimized matrix handle here. */
109: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
110: {
111: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
112: /* If the MKL library does not have mkl_sparse_optimize(), then this routine
113: * does nothing. We make it callable anyway in this case because it cuts
114: * down on littering the code with #ifdefs. */
115: PetscFunctionBegin;
116: PetscFunctionReturn(PETSC_SUCCESS);
117: #else
118: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
119: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
120: PetscInt m, n;
121: MatScalar *aa;
122: PetscInt *aj, *ai;
124: PetscFunctionBegin;
125: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
126: /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
127: * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
128: * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
129: * mkl_sparse_optimize() later. */
130: if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
131: #endif
133: if (aijmkl->sparse_optimized) {
134: /* Matrix has been previously assembled and optimized. Must destroy old
135: * matrix handle before running the optimization step again. */
136: PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
137: }
138: aijmkl->sparse_optimized = PETSC_FALSE;
140: /* Now perform the SpMV2 setup and matrix optimization. */
141: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
142: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
143: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
144: m = A->rmap->n;
145: n = A->cmap->n;
146: aj = a->j; /* aj[k] gives column index for element aa[k]. */
147: aa = a->a; /* Nonzero elements stored row-by-row. */
148: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
149: if (a->nz && aa && !A->structure_only) {
150: /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
151: * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
152: PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
153: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
154: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
155: if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
156: aijmkl->sparse_optimized = PETSC_TRUE;
157: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
158: } else {
159: aijmkl->csrA = NULL;
160: }
162: PetscFunctionReturn(PETSC_SUCCESS);
163: #endif
164: }
166: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
167: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
168: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
169: {
170: sparse_index_base_t indexing;
171: PetscInt m, n;
172: PetscInt *aj, *ai, *dummy;
173: MatScalar *aa;
174: Mat_SeqAIJMKL *aijmkl;
176: PetscFunctionBegin;
177: if (csrA) {
178: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
179: PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
180: PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
181: } else {
182: aj = ai = NULL;
183: aa = NULL;
184: }
186: PetscCall(MatSetType(A, MATSEQAIJ));
187: PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
188: /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
189: * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
190: * they will be destroyed when the MKL handle is destroyed.
191: * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
192: if (csrA) {
193: PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
194: } else {
195: /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
196: PetscCall(MatSetUp(A));
197: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
198: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
199: }
201: /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
202: * Now turn it into a MATSEQAIJMKL. */
203: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
205: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
206: aijmkl->csrA = csrA;
208: /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
209: * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
210: * and just need to be able to run the MKL optimization step. */
211: aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
212: aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
213: aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
214: if (csrA) {
215: PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
216: PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
217: }
218: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
219: PetscFunctionReturn(PETSC_SUCCESS);
220: }
221: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
223: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
224: * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
225: * MatMatMultNumeric(). */
226: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
227: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
228: {
229: PetscInt i;
230: PetscInt nrows, ncols;
231: PetscInt nz;
232: PetscInt *ai, *aj, *dummy;
233: PetscScalar *aa;
234: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
235: sparse_index_base_t indexing;
237: PetscFunctionBegin;
238: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
239: if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);
241: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
242: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
244: /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
245: * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
246: for (i = 0; i < nrows; i++) {
247: nz = ai[i + 1] - ai[i];
248: PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
249: }
251: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
252: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
254: PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
255: /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
256: * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
257: * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
258: aijmkl->sparse_optimized = PETSC_FALSE;
259: PetscFunctionReturn(PETSC_SUCCESS);
260: }
261: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
263: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
264: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
265: {
266: PetscInt i, j, k;
267: PetscInt nrows, ncols;
268: PetscInt nz;
269: PetscInt *ai, *aj, *dummy;
270: PetscScalar *aa;
271: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
272: sparse_index_base_t indexing;
274: PetscFunctionBegin;
275: PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));
277: /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
278: if (!aijmkl->csrA) {
279: PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
280: PetscFunctionReturn(PETSC_SUCCESS);
281: }
283: /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
284: PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
286: k = 0;
287: for (i = 0; i < nrows; i++) {
288: PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
289: nz = ai[i + 1] - ai[i];
290: for (j = 0; j < nz; j++) {
291: if (aa) {
292: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g) ", aj[k], PetscRealPart(aa[k])));
293: } else {
294: PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
295: }
296: k++;
297: }
298: PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
299: }
300: PetscFunctionReturn(PETSC_SUCCESS);
301: }
302: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
304: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
305: {
306: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
307: Mat_SeqAIJMKL *aijmkl_dest;
309: PetscFunctionBegin;
310: PetscCall(MatDuplicate_SeqAIJ(A, op, M));
311: aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
312: PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
313: aijmkl_dest->sparse_optimized = PETSC_FALSE;
314: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
315: PetscFunctionReturn(PETSC_SUCCESS);
316: }
318: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
319: {
320: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
321: Mat_SeqAIJMKL *aijmkl;
323: PetscFunctionBegin;
324: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
326: /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
327: * extra information and some different methods, call the AssemblyEnd
328: * routine for a MATSEQAIJ.
329: * I'm not sure if this is the best way to do this, but it avoids
330: * a lot of code duplication. */
331: a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
332: PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
334: /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
335: * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
336: aijmkl = (Mat_SeqAIJMKL *)A->spptr;
337: if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
343: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
344: {
345: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
346: const PetscScalar *x;
347: PetscScalar *y;
348: const MatScalar *aa;
349: PetscInt m = A->rmap->n;
350: PetscInt n = A->cmap->n;
351: PetscScalar alpha = 1.0;
352: PetscScalar beta = 0.0;
353: const PetscInt *aj, *ai;
354: char matdescra[6];
356: /* Variables not in MatMult_SeqAIJ. */
357: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
359: PetscFunctionBegin;
360: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
361: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
362: PetscCall(VecGetArrayRead(xx, &x));
363: PetscCall(VecGetArray(yy, &y));
364: aj = a->j; /* aj[k] gives column index for element aa[k]. */
365: aa = a->a; /* Nonzero elements stored row-by-row. */
366: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
368: /* Call MKL sparse BLAS routine to do the MatMult. */
369: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
371: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
372: PetscCall(VecRestoreArrayRead(xx, &x));
373: PetscCall(VecRestoreArray(yy, &y));
374: PetscFunctionReturn(PETSC_SUCCESS);
375: }
376: #endif
378: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
379: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
380: {
381: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
382: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
383: const PetscScalar *x;
384: PetscScalar *y;
385: PetscObjectState state;
387: PetscFunctionBegin;
389: /* If there are no nonzero entries, zero yy and return immediately. */
390: if (!a->nz) {
391: PetscCall(VecGetArray(yy, &y));
392: PetscCall(PetscArrayzero(y, A->rmap->n));
393: PetscCall(VecRestoreArray(yy, &y));
394: PetscFunctionReturn(PETSC_SUCCESS);
395: }
397: PetscCall(VecGetArrayRead(xx, &x));
398: PetscCall(VecGetArray(yy, &y));
400: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
404: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
406: /* Call MKL SpMV2 executor routine to do the MatMult. */
407: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
409: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
410: PetscCall(VecRestoreArrayRead(xx, &x));
411: PetscCall(VecRestoreArray(yy, &y));
412: PetscFunctionReturn(PETSC_SUCCESS);
413: }
414: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
416: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
417: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
418: {
419: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
420: const PetscScalar *x;
421: PetscScalar *y;
422: const MatScalar *aa;
423: PetscInt m = A->rmap->n;
424: PetscInt n = A->cmap->n;
425: PetscScalar alpha = 1.0;
426: PetscScalar beta = 0.0;
427: const PetscInt *aj, *ai;
428: char matdescra[6];
430: /* Variables not in MatMultTranspose_SeqAIJ. */
431: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
433: PetscFunctionBegin;
434: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
435: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
436: PetscCall(VecGetArrayRead(xx, &x));
437: PetscCall(VecGetArray(yy, &y));
438: aj = a->j; /* aj[k] gives column index for element aa[k]. */
439: aa = a->a; /* Nonzero elements stored row-by-row. */
440: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
442: /* Call MKL sparse BLAS routine to do the MatMult. */
443: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);
445: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
446: PetscCall(VecRestoreArrayRead(xx, &x));
447: PetscCall(VecRestoreArray(yy, &y));
448: PetscFunctionReturn(PETSC_SUCCESS);
449: }
450: #endif
452: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
453: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
454: {
455: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
456: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
457: const PetscScalar *x;
458: PetscScalar *y;
459: PetscObjectState state;
461: PetscFunctionBegin;
463: /* If there are no nonzero entries, zero yy and return immediately. */
464: if (!a->nz) {
465: PetscCall(VecGetArray(yy, &y));
466: PetscCall(PetscArrayzero(y, A->cmap->n));
467: PetscCall(VecRestoreArray(yy, &y));
468: PetscFunctionReturn(PETSC_SUCCESS);
469: }
471: PetscCall(VecGetArrayRead(xx, &x));
472: PetscCall(VecGetArray(yy, &y));
474: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
475: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
476: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
477: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
478: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
480: /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
481: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);
483: PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
484: PetscCall(VecRestoreArrayRead(xx, &x));
485: PetscCall(VecRestoreArray(yy, &y));
486: PetscFunctionReturn(PETSC_SUCCESS);
487: }
488: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
490: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
491: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
492: {
493: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
494: const PetscScalar *x;
495: PetscScalar *y, *z;
496: const MatScalar *aa;
497: PetscInt m = A->rmap->n;
498: PetscInt n = A->cmap->n;
499: const PetscInt *aj, *ai;
500: PetscInt i;
502: /* Variables not in MatMultAdd_SeqAIJ. */
503: char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
504: PetscScalar alpha = 1.0;
505: PetscScalar beta;
506: char matdescra[6];
508: PetscFunctionBegin;
509: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
510: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
512: PetscCall(VecGetArrayRead(xx, &x));
513: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
514: aj = a->j; /* aj[k] gives column index for element aa[k]. */
515: aa = a->a; /* Nonzero elements stored row-by-row. */
516: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
518: /* Call MKL sparse BLAS routine to do the MatMult. */
519: if (zz == yy) {
520: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
521: beta = 1.0;
522: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
523: } else {
524: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
525: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
526: beta = 0.0;
527: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
528: for (i = 0; i < m; i++) z[i] += y[i];
529: }
531: PetscCall(PetscLogFlops(2.0 * a->nz));
532: PetscCall(VecRestoreArrayRead(xx, &x));
533: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
534: PetscFunctionReturn(PETSC_SUCCESS);
535: }
536: #endif
538: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
539: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
540: {
541: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
542: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
543: const PetscScalar *x;
544: PetscScalar *y, *z;
545: PetscInt m = A->rmap->n;
546: PetscInt i;
548: /* Variables not in MatMultAdd_SeqAIJ. */
549: PetscObjectState state;
551: PetscFunctionBegin;
553: /* If there are no nonzero entries, set zz = yy and return immediately. */
554: if (!a->nz) {
555: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
556: PetscCall(PetscArraycpy(z, y, m));
557: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
558: PetscFunctionReturn(PETSC_SUCCESS);
559: }
561: PetscCall(VecGetArrayRead(xx, &x));
562: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
564: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
565: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
566: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
567: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
568: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
570: /* Call MKL sparse BLAS routine to do the MatMult. */
571: if (zz == yy) {
572: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
573: * with alpha and beta both set to 1.0. */
574: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
575: } else {
576: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
577: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
578: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
579: for (i = 0; i < m; i++) z[i] += y[i];
580: }
582: PetscCall(PetscLogFlops(2.0 * a->nz));
583: PetscCall(VecRestoreArrayRead(xx, &x));
584: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
585: PetscFunctionReturn(PETSC_SUCCESS);
586: }
587: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
589: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
590: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
591: {
592: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
593: const PetscScalar *x;
594: PetscScalar *y, *z;
595: const MatScalar *aa;
596: PetscInt m = A->rmap->n;
597: PetscInt n = A->cmap->n;
598: const PetscInt *aj, *ai;
599: PetscInt i;
601: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
602: char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
603: PetscScalar alpha = 1.0;
604: PetscScalar beta;
605: char matdescra[6];
607: PetscFunctionBegin;
608: matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
609: matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
611: PetscCall(VecGetArrayRead(xx, &x));
612: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
613: aj = a->j; /* aj[k] gives column index for element aa[k]. */
614: aa = a->a; /* Nonzero elements stored row-by-row. */
615: ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */
617: /* Call MKL sparse BLAS routine to do the MatMult. */
618: if (zz == yy) {
619: /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
620: beta = 1.0;
621: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
622: } else {
623: /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
624: * MKL sparse BLAS does not have a MatMultAdd equivalent. */
625: beta = 0.0;
626: mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
627: for (i = 0; i < n; i++) z[i] += y[i];
628: }
630: PetscCall(PetscLogFlops(2.0 * a->nz));
631: PetscCall(VecRestoreArrayRead(xx, &x));
632: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
633: PetscFunctionReturn(PETSC_SUCCESS);
634: }
635: #endif
637: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
638: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
639: {
640: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
641: Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
642: const PetscScalar *x;
643: PetscScalar *y, *z;
644: PetscInt n = A->cmap->n;
645: PetscInt i;
646: PetscObjectState state;
648: /* Variables not in MatMultTransposeAdd_SeqAIJ. */
650: PetscFunctionBegin;
652: /* If there are no nonzero entries, set zz = yy and return immediately. */
653: if (!a->nz) {
654: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
655: PetscCall(PetscArraycpy(z, y, n));
656: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
657: PetscFunctionReturn(PETSC_SUCCESS);
658: }
660: PetscCall(VecGetArrayRead(xx, &x));
661: PetscCall(VecGetArrayPair(yy, zz, &y, &z));
663: /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
664: * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
665: * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
666: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
667: if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
669: /* Call MKL sparse BLAS routine to do the MatMult. */
670: if (zz == yy) {
671: /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
672: * with alpha and beta both set to 1.0. */
673: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
674: } else {
675: /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
676: * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
677: PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
678: for (i = 0; i < n; i++) z[i] += y[i];
679: }
681: PetscCall(PetscLogFlops(2.0 * a->nz));
682: PetscCall(VecRestoreArrayRead(xx, &x));
683: PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
684: PetscFunctionReturn(PETSC_SUCCESS);
685: }
686: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
688: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
689: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
690: {
691: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
692: sparse_matrix_t csrA, csrB, csrC;
693: PetscInt nrows, ncols;
694: struct matrix_descr descr_type_gen;
695: PetscObjectState state;
697: PetscFunctionBegin;
698: /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
699: * not handle sparse matrices with zero rows or columns. */
700: if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
701: else nrows = A->cmap->N;
702: if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
703: else ncols = B->rmap->N;
705: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
706: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
707: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
708: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
709: csrA = a->csrA;
710: csrB = b->csrA;
711: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
713: if (csrA && csrB) {
714: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
715: } else {
716: csrC = NULL;
717: }
719: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));
721: PetscFunctionReturn(PETSC_SUCCESS);
722: }
724: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
725: {
726: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
727: sparse_matrix_t csrA, csrB, csrC;
728: struct matrix_descr descr_type_gen;
729: PetscObjectState state;
731: PetscFunctionBegin;
732: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
733: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
734: PetscCall(PetscObjectStateGet((PetscObject)B, &state));
735: if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
736: csrA = a->csrA;
737: csrB = b->csrA;
738: csrC = c->csrA;
739: descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;
741: if (csrA && csrB) {
742: PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
743: } else {
744: csrC = NULL;
745: }
747: /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
748: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
750: PetscFunctionReturn(PETSC_SUCCESS);
751: }
753: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754: {
755: PetscFunctionBegin;
756: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
757: PetscFunctionReturn(PETSC_SUCCESS);
758: }
760: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
761: {
762: PetscFunctionBegin;
763: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
764: PetscFunctionReturn(PETSC_SUCCESS);
765: }
767: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
768: {
769: PetscFunctionBegin;
770: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
771: PetscFunctionReturn(PETSC_SUCCESS);
772: }
774: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
775: {
776: PetscFunctionBegin;
777: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
778: PetscFunctionReturn(PETSC_SUCCESS);
779: }
781: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
782: {
783: PetscFunctionBegin;
784: PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
785: PetscFunctionReturn(PETSC_SUCCESS);
786: }
788: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
789: {
790: PetscFunctionBegin;
791: PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
792: PetscFunctionReturn(PETSC_SUCCESS);
793: }
795: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796: {
797: Mat_Product *product = C->product;
798: Mat A = product->A, B = product->B;
800: PetscFunctionBegin;
801: PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
802: PetscFunctionReturn(PETSC_SUCCESS);
803: }
805: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
806: {
807: Mat_Product *product = C->product;
808: Mat A = product->A, B = product->B;
809: PetscReal fill = product->fill;
811: PetscFunctionBegin;
812: PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
813: C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
814: PetscFunctionReturn(PETSC_SUCCESS);
815: }
817: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
818: {
819: Mat Ct;
820: Vec zeros;
821: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
822: sparse_matrix_t csrA, csrP, csrC;
823: PetscBool set, flag;
824: struct matrix_descr descr_type_sym;
825: PetscObjectState state;
827: PetscFunctionBegin;
828: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
829: PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");
831: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
832: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
833: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
834: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
835: csrA = a->csrA;
836: csrP = p->csrA;
837: csrC = c->csrA;
838: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
839: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
840: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
842: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
843: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);
845: /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
846: * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
847: * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
848: * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
849: * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
850: * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
851: * the full matrix. */
852: PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
853: PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
854: PetscCall(MatCreateVecs(C, &zeros, NULL));
855: PetscCall(VecSetFromOptions(zeros));
856: PetscCall(VecZeroEntries(zeros));
857: PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
858: PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
859: /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
860: PetscCall(MatProductCreateWithMat(A, P, NULL, C));
861: PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
862: PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
863: PetscCall(VecDestroy(&zeros));
864: PetscCall(MatDestroy(&Ct));
865: PetscFunctionReturn(PETSC_SUCCESS);
866: }
868: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
869: {
870: Mat_Product *product = C->product;
871: Mat A = product->A, P = product->B;
872: Mat_SeqAIJMKL *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
873: sparse_matrix_t csrA, csrP, csrC;
874: struct matrix_descr descr_type_sym;
875: PetscObjectState state;
877: PetscFunctionBegin;
878: PetscCall(PetscObjectStateGet((PetscObject)A, &state));
879: if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
880: PetscCall(PetscObjectStateGet((PetscObject)P, &state));
881: if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
882: csrA = a->csrA;
883: csrP = p->csrA;
884: descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
885: descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
886: descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;
888: /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
889: if (csrP && csrA) {
890: PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
891: } else {
892: csrC = NULL;
893: }
895: /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
896: * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
897: * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
898: * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
899: * is guaranteed. */
900: PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));
902: C->ops->productnumeric = MatProductNumeric_PtAP;
903: PetscFunctionReturn(PETSC_SUCCESS);
904: }
906: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
907: {
908: PetscFunctionBegin;
909: C->ops->productsymbolic = MatProductSymbolic_AB;
910: C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
911: PetscFunctionReturn(PETSC_SUCCESS);
912: }
914: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
915: {
916: PetscFunctionBegin;
917: C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
918: PetscFunctionReturn(PETSC_SUCCESS);
919: }
921: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
922: {
923: PetscFunctionBegin;
924: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
925: C->ops->productsymbolic = MatProductSymbolic_ABt;
926: PetscFunctionReturn(PETSC_SUCCESS);
927: }
929: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
930: {
931: Mat_Product *product = C->product;
932: Mat A = product->A;
933: PetscBool set, flag;
935: PetscFunctionBegin;
936: if (PetscDefined(USE_COMPLEX)) {
937: /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
938: * We do this in several other locations in this file. This works for the time being, but these
939: * routines are considered unsafe and may be removed from the MatProduct code in the future.
940: * TODO: Add proper MATSEQAIJMKL implementations */
941: C->ops->productsymbolic = NULL;
942: } else {
943: /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
944: PetscCall(MatIsSymmetricKnown(A, &set, &flag));
945: if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
946: else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947: /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
948: * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
949: }
950: PetscFunctionReturn(PETSC_SUCCESS);
951: }
953: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
954: {
955: PetscFunctionBegin;
956: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
957: PetscFunctionReturn(PETSC_SUCCESS);
958: }
960: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
961: {
962: PetscFunctionBegin;
963: C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
964: PetscFunctionReturn(PETSC_SUCCESS);
965: }
967: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
968: {
969: Mat_Product *product = C->product;
971: PetscFunctionBegin;
972: switch (product->type) {
973: case MATPRODUCT_AB:
974: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
975: break;
976: case MATPRODUCT_AtB:
977: PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
978: break;
979: case MATPRODUCT_ABt:
980: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
981: break;
982: case MATPRODUCT_PtAP:
983: PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
984: break;
985: case MATPRODUCT_RARt:
986: PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
987: break;
988: case MATPRODUCT_ABC:
989: PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
990: break;
991: default:
992: break;
993: }
994: PetscFunctionReturn(PETSC_SUCCESS);
995: }
996: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */
998: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
999: * SeqAIJMKL matrix. This routine is called by the MatCreate_SeqAIJMKL()
1000: * routine, but can also be used to convert an assembled SeqAIJ matrix
1001: * into a SeqAIJMKL one. */
1002: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1003: {
1004: Mat B = *newmat;
1005: Mat_SeqAIJMKL *aijmkl;
1006: PetscBool set;
1007: PetscBool sametype;
1009: PetscFunctionBegin;
1010: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
1012: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1013: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
1015: PetscCall(PetscNew(&aijmkl));
1016: B->spptr = (void *)aijmkl;
1018: /* Set function pointers for methods that we inherit from AIJ but override.
1019: * We also parse some command line options below, since those determine some of the methods we point to. */
1020: B->ops->duplicate = MatDuplicate_SeqAIJMKL;
1021: B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1022: B->ops->destroy = MatDestroy_SeqAIJMKL;
1024: aijmkl->sparse_optimized = PETSC_FALSE;
1025: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1026: aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1027: #else
1028: aijmkl->no_SpMV2 = PETSC_TRUE;
1029: #endif
1030: aijmkl->eager_inspection = PETSC_FALSE;
1032: /* Parse command line options. */
1033: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1034: PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1035: PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1036: PetscOptionsEnd();
1037: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1038: if (!aijmkl->no_SpMV2) {
1039: PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize(); defaulting to non-SpMV2 routines.\n"));
1040: aijmkl->no_SpMV2 = PETSC_TRUE;
1041: }
1042: #endif
1044: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1045: B->ops->mult = MatMult_SeqAIJMKL_SpMV2;
1046: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL_SpMV2;
1047: B->ops->multadd = MatMultAdd_SeqAIJMKL_SpMV2;
1048: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1049: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1050: B->ops->productsetfromoptions = MatProductSetFromOptions_SeqAIJMKL;
1051: B->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1052: B->ops->matmultnumeric = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053: B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054: B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1055: #if !defined(PETSC_USE_COMPLEX)
1056: B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1057: #else
1058: B->ops->ptapnumeric = NULL;
1059: #endif
1060: #endif
1061: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
1063: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1064: /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1065: * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1066: * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1067: * versions in which the older interface has not been deprecated, we use the old interface. */
1068: if (aijmkl->no_SpMV2) {
1069: B->ops->mult = MatMult_SeqAIJMKL;
1070: B->ops->multtranspose = MatMultTranspose_SeqAIJMKL;
1071: B->ops->multadd = MatMultAdd_SeqAIJMKL;
1072: B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1073: }
1074: #endif
1076: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));
1078: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1079: *newmat = B;
1080: PetscFunctionReturn(PETSC_SUCCESS);
1081: }
1083: /*@C
1084: MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.
1086: Collective
1088: Input Parameters:
1089: + comm - MPI communicator, set to `PETSC_COMM_SELF`
1090: . m - number of rows
1091: . n - number of columns
1092: . nz - number of nonzeros per row (same for all rows)
1093: - nnz - array containing the number of nonzeros in the various rows
1094: (possibly different for each row) or `NULL`
1096: Output Parameter:
1097: . A - the matrix
1099: Options Database Keys:
1100: + -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1101: - -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1102: performing this step the first time the matrix is applied
1104: Level: intermediate
1106: Notes:
1107: If `nnz` is given then `nz` is ignored
1109: This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1110: routines from Intel MKL whenever possible.
1112: If the installed version of MKL supports the "SpMV2" sparse
1113: inspector-executor routines, then those are used by default.
1115: `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1116: (for symmetric A) operations are currently supported.
1117: MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.
1119: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1120: @*/
1121: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1122: {
1123: PetscFunctionBegin;
1124: PetscCall(MatCreate(comm, A));
1125: PetscCall(MatSetSizes(*A, m, n, m, n));
1126: PetscCall(MatSetType(*A, MATSEQAIJMKL));
1127: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1128: PetscFunctionReturn(PETSC_SUCCESS);
1129: }
1131: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1132: {
1133: PetscFunctionBegin;
1134: PetscCall(MatSetType(A, MATSEQAIJ));
1135: PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1136: PetscFunctionReturn(PETSC_SUCCESS);
1137: }