Actual source code: aijmkl.c


  2: /*
  3:   Defines basic operations for the MATSEQAIJMKL matrix class.
  4:   This class is derived from the MATSEQAIJ class and retains the
  5:   compressed row storage (aka Yale sparse matrix format) but uses
  6:   sparse BLAS operations from the Intel Math Kernel Library (MKL)
  7:   wherever possible.
  8: */

 10: #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <../src/mat/impls/aij/seq/aijmkl/aijmkl.h>
 12: #if defined(PETSC_HAVE_MKL_INTEL_ILP64)
 13:   #define MKL_ILP64
 14: #endif
 15: #include <mkl_spblas.h>

 17: typedef struct {
 18:   PetscBool        no_SpMV2;         /* If PETSC_TRUE, then don't use the MKL SpMV2 inspector-executor routines. */
 19:   PetscBool        eager_inspection; /* If PETSC_TRUE, then call mkl_sparse_optimize() in MatDuplicate()/MatAssemblyEnd(). */
 20:   PetscBool        sparse_optimized; /* If PETSC_TRUE, then mkl_sparse_optimize() has been called. */
 21:   PetscObjectState state;
 22: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 23:   sparse_matrix_t     csrA; /* "Handle" used by SpMV2 inspector-executor routines. */
 24:   struct matrix_descr descr;
 25: #endif
 26: } Mat_SeqAIJMKL;

 28: extern PetscErrorCode MatAssemblyEnd_SeqAIJ(Mat, MatAssemblyType);

 30: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJMKL_SeqAIJ(Mat A, MatType type, MatReuse reuse, Mat *newmat)
 31: {
 32:   /* This routine is only called to convert a MATAIJMKL to its base PETSc type, */
 33:   /* so we will ignore 'MatType type'. */
 34:   Mat B = *newmat;
 35: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 36:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
 37: #endif

 39:   PetscFunctionBegin;
 40:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

 42:   /* Reset the original function pointers. */
 43:   B->ops->duplicate               = MatDuplicate_SeqAIJ;
 44:   B->ops->assemblyend             = MatAssemblyEnd_SeqAIJ;
 45:   B->ops->destroy                 = MatDestroy_SeqAIJ;
 46:   B->ops->mult                    = MatMult_SeqAIJ;
 47:   B->ops->multtranspose           = MatMultTranspose_SeqAIJ;
 48:   B->ops->multadd                 = MatMultAdd_SeqAIJ;
 49:   B->ops->multtransposeadd        = MatMultTransposeAdd_SeqAIJ;
 50:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJ;
 51:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJ_SeqAIJ;
 52:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJ_SeqAIJ;
 53:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJ_SeqAIJ;
 54:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJ_SeqAIJ;
 55:   B->ops->ptapnumeric             = MatPtAPNumeric_SeqAIJ_SeqAIJ;

 57:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", NULL));

 59: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 60:   /* Free everything in the Mat_SeqAIJMKL data structure. Currently, this
 61:    * simply involves destroying the MKL sparse matrix handle and then freeing
 62:    * the spptr pointer. */
 63:   if (reuse == MAT_INITIAL_MATRIX) aijmkl = (Mat_SeqAIJMKL *)B->spptr;

 65:   if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 66: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 67:   PetscCall(PetscFree(B->spptr));

 69:   /* Change the type of B to MATSEQAIJ. */
 70:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJ));

 72:   *newmat = B;
 73:   PetscFunctionReturn(PETSC_SUCCESS);
 74: }

 76: PetscErrorCode MatDestroy_SeqAIJMKL(Mat A)
 77: {
 78:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;

 80:   PetscFunctionBegin;

 82:   /* If MatHeaderMerge() was used, then this SeqAIJMKL matrix will not have an spptr pointer. */
 83:   if (aijmkl) {
 84:     /* Clean up everything in the Mat_SeqAIJMKL data structure, then free A->spptr. */
 85: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
 86:     if (aijmkl->sparse_optimized) PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
 87: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */
 88:     PetscCall(PetscFree(A->spptr));
 89:   }

 91:   /* Change the type of A back to SEQAIJ and use MatDestroy_SeqAIJ()
 92:    * to destroy everything that remains. */
 93:   PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
 94:   /* I don't call MatSetType().  I believe this is because that
 95:    * is only to be called when *building* a matrix.  I could be wrong, but
 96:    * that is how things work for the SuperLU matrix class. */
 97:   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqaijmkl_seqaij_C", NULL));
 98:   PetscCall(MatDestroy_SeqAIJ(A));
 99:   PetscFunctionReturn(PETSC_SUCCESS);
100: }

102: /* MatSeqAIJMKL_create_mkl_handle(), if called with an AIJMKL matrix that has not had mkl_sparse_optimize() called for it,
103:  * creates an MKL sparse matrix handle from the AIJ arrays and calls mkl_sparse_optimize().
104:  * If called with an AIJMKL matrix for which aijmkl->sparse_optimized == PETSC_TRUE, then it destroys the old matrix
105:  * handle, creates a new one, and then calls mkl_sparse_optimize().
106:  * Although in normal MKL usage it is possible to have a valid matrix handle on which mkl_sparse_optimize() has not been
107:  * called, for AIJMKL the handle creation and optimization step always occur together, so we don't handle the case of
108:  * an unoptimized matrix handle here. */
109: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_create_mkl_handle(Mat A)
110: {
111: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
112:   /* If the MKL library does not have mkl_sparse_optimize(), then this routine
113:    * does nothing. We make it callable anyway in this case because it cuts
114:    * down on littering the code with #ifdefs. */
115:   PetscFunctionBegin;
116:   PetscFunctionReturn(PETSC_SUCCESS);
117: #else
118:   Mat_SeqAIJ    *a      = (Mat_SeqAIJ *)A->data;
119:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
120:   PetscInt       m, n;
121:   MatScalar     *aa;
122:   PetscInt      *aj, *ai;

124:   PetscFunctionBegin;
125:   #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
126:   /* For MKL versions that still support the old, non-inspector-executor interfaces versions, we simply exit here if the no_SpMV2
127:    * option has been specified. For versions that have deprecated the old interfaces (version 18, update 2 and later), we must
128:    * use the new inspector-executor interfaces, but we can still use the old, non-inspector-executor code by not calling
129:    * mkl_sparse_optimize() later. */
130:   if (aijmkl->no_SpMV2) PetscFunctionReturn(PETSC_SUCCESS);
131:   #endif

133:   if (aijmkl->sparse_optimized) {
134:     /* Matrix has been previously assembled and optimized. Must destroy old
135:      * matrix handle before running the optimization step again. */
136:     PetscCallExternal(mkl_sparse_destroy, aijmkl->csrA);
137:   }
138:   aijmkl->sparse_optimized = PETSC_FALSE;

140:   /* Now perform the SpMV2 setup and matrix optimization. */
141:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
142:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
143:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
144:   m                  = A->rmap->n;
145:   n                  = A->cmap->n;
146:   aj                 = a->j; /* aj[k] gives column index for element aa[k]. */
147:   aa                 = a->a; /* Nonzero elements stored row-by-row. */
148:   ai                 = a->i; /* ai[k] is the position in aa and aj where row k starts. */
149:   if (a->nz && aa && !A->structure_only) {
150:     /* Create a new, optimized sparse matrix handle only if the matrix has nonzero entries.
151:      * The MKL sparse-inspector executor routines don't like being passed an empty matrix. */
152:     PetscCallExternal(mkl_sparse_x_create_csr, &aijmkl->csrA, SPARSE_INDEX_BASE_ZERO, (MKL_INT)m, (MKL_INT)n, (MKL_INT *)ai, (MKL_INT *)(ai + 1), (MKL_INT *)aj, aa);
153:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
154:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
155:     if (!aijmkl->no_SpMV2) PetscCallExternal(mkl_sparse_optimize, aijmkl->csrA);
156:     aijmkl->sparse_optimized = PETSC_TRUE;
157:     PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
158:   } else {
159:     aijmkl->csrA = NULL;
160:   }

162:   PetscFunctionReturn(PETSC_SUCCESS);
163: #endif
164: }

166: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
167: /* Take an already created but empty matrix and set up the nonzero structure from an MKL sparse matrix handle. */
168: static PetscErrorCode MatSeqAIJMKL_setup_structure_from_mkl_handle(MPI_Comm comm, sparse_matrix_t csrA, PetscInt nrows, PetscInt ncols, Mat A)
169: {
170:   sparse_index_base_t indexing;
171:   PetscInt            m, n;
172:   PetscInt           *aj, *ai, *dummy;
173:   MatScalar          *aa;
174:   Mat_SeqAIJMKL      *aijmkl;

176:   PetscFunctionBegin;
177:   if (csrA) {
178:     /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
179:     PetscCallExternal(mkl_sparse_x_export_csr, csrA, &indexing, (MKL_INT *)&m, (MKL_INT *)&n, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);
180:     PetscCheck((m == nrows) && (n == ncols), PETSC_COMM_SELF, PETSC_ERR_LIB, "Number of rows/columns does not match those from mkl_sparse_x_export_csr()");
181:   } else {
182:     aj = ai = NULL;
183:     aa      = NULL;
184:   }

186:   PetscCall(MatSetType(A, MATSEQAIJ));
187:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, nrows, ncols));
188:   /* We use MatSeqAIJSetPreallocationCSR() instead of MatCreateSeqAIJWithArrays() because we must copy the arrays exported
189:    * from MKL; MKL developers tell us that modifying the arrays may cause unexpected results when using the MKL handle, and
190:    * they will be destroyed when the MKL handle is destroyed.
191:    * (In the interest of reducing memory consumption in future, can we figure out good ways to deal with this?) */
192:   if (csrA) {
193:     PetscCall(MatSeqAIJSetPreallocationCSR(A, ai, aj, NULL));
194:   } else {
195:     /* Since MatSeqAIJSetPreallocationCSR does initial set up and assembly begin/end, we must do that ourselves here. */
196:     PetscCall(MatSetUp(A));
197:     PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
198:     PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
199:   }

201:   /* We now have an assembled sequential AIJ matrix created from copies of the exported arrays from the MKL matrix handle.
202:    * Now turn it into a MATSEQAIJMKL. */
203:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));

205:   aijmkl       = (Mat_SeqAIJMKL *)A->spptr;
206:   aijmkl->csrA = csrA;

208:   /* The below code duplicates much of what is in MatSeqAIJKL_create_mkl_handle(). I dislike this code duplication, but
209:    * MatSeqAIJMKL_create_mkl_handle() cannot be used because we don't need to create a handle -- we've already got one,
210:    * and just need to be able to run the MKL optimization step. */
211:   aijmkl->descr.type = SPARSE_MATRIX_TYPE_GENERAL;
212:   aijmkl->descr.mode = SPARSE_FILL_MODE_LOWER;
213:   aijmkl->descr.diag = SPARSE_DIAG_NON_UNIT;
214:   if (csrA) {
215:     PetscCallExternal(mkl_sparse_set_mv_hint, aijmkl->csrA, SPARSE_OPERATION_NON_TRANSPOSE, aijmkl->descr, 1000);
216:     PetscCallExternal(mkl_sparse_set_memory_hint, aijmkl->csrA, SPARSE_MEMORY_AGGRESSIVE);
217:   }
218:   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
219:   PetscFunctionReturn(PETSC_SUCCESS);
220: }
221: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

223: /* MatSeqAIJMKL_update_from_mkl_handle() updates the matrix values array from the contents of the associated MKL sparse matrix handle.
224:  * This is needed after mkl_sparse_sp2m() with SPARSE_STAGE_FINALIZE_MULT has been used to compute new values of the matrix in
225:  * MatMatMultNumeric(). */
226: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
227: static PetscErrorCode MatSeqAIJMKL_update_from_mkl_handle(Mat A)
228: {
229:   PetscInt            i;
230:   PetscInt            nrows, ncols;
231:   PetscInt            nz;
232:   PetscInt           *ai, *aj, *dummy;
233:   PetscScalar        *aa;
234:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
235:   sparse_index_base_t indexing;

237:   PetscFunctionBegin;
238:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
239:   if (!aijmkl->csrA) PetscFunctionReturn(PETSC_SUCCESS);

241:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
242:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

244:   /* We can't just do a copy from the arrays exported by MKL to those used for the PETSc AIJ storage, because the MKL and PETSc
245:    * representations differ in small ways (e.g., more explicit nonzeros per row due to preallocation). */
246:   for (i = 0; i < nrows; i++) {
247:     nz = ai[i + 1] - ai[i];
248:     PetscCall(MatSetValues_SeqAIJ(A, 1, &i, nz, aj + ai[i], aa + ai[i], INSERT_VALUES));
249:   }

251:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
252:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

254:   PetscCall(PetscObjectStateGet((PetscObject)A, &(aijmkl->state)));
255:   /* At this point our matrix has a valid MKL handle, the contents of which match the PETSc AIJ representation.
256:    * The MKL handle has *not* had mkl_sparse_optimize() called on it, though -- the MKL developers have confirmed
257:    * that the matrix inspection/optimization step is not performed when matrix-matrix multiplication is finalized. */
258:   aijmkl->sparse_optimized = PETSC_FALSE;
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }
261: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

263: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
264: PETSC_INTERN PetscErrorCode MatSeqAIJMKL_view_mkl_handle(Mat A, PetscViewer viewer)
265: {
266:   PetscInt            i, j, k;
267:   PetscInt            nrows, ncols;
268:   PetscInt            nz;
269:   PetscInt           *ai, *aj, *dummy;
270:   PetscScalar        *aa;
271:   Mat_SeqAIJMKL      *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
272:   sparse_index_base_t indexing;

274:   PetscFunctionBegin;
275:   PetscCall(PetscViewerASCIIPrintf(viewer, "Contents of MKL sparse matrix handle for MATSEQAIJMKL object:\n"));

277:   /* Exit immediately in case of the MKL matrix handle being NULL; this will be the case for empty matrices (zero rows or columns). */
278:   if (!aijmkl->csrA) {
279:     PetscCall(PetscViewerASCIIPrintf(viewer, "MKL matrix handle is NULL\n"));
280:     PetscFunctionReturn(PETSC_SUCCESS);
281:   }

283:   /* Note: Must pass in &dummy below since MKL can't accept NULL for this output array we don't actually want. */
284:   PetscCallExternal(mkl_sparse_x_export_csr, aijmkl->csrA, &indexing, (MKL_INT *)&nrows, (MKL_INT *)&ncols, (MKL_INT **)&ai, (MKL_INT **)&dummy, (MKL_INT **)&aj, &aa);

286:   k = 0;
287:   for (i = 0; i < nrows; i++) {
288:     PetscCall(PetscViewerASCIIPrintf(viewer, "row %" PetscInt_FMT ": ", i));
289:     nz = ai[i + 1] - ai[i];
290:     for (j = 0; j < nz; j++) {
291:       if (aa) {
292:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", %g)  ", aj[k], PetscRealPart(aa[k])));
293:       } else {
294:         PetscCall(PetscViewerASCIIPrintf(viewer, "(%" PetscInt_FMT ", NULL)", aj[k]));
295:       }
296:       k++;
297:     }
298:     PetscCall(PetscViewerASCIIPrintf(viewer, "\n"));
299:   }
300:   PetscFunctionReturn(PETSC_SUCCESS);
301: }
302: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

304: PetscErrorCode MatDuplicate_SeqAIJMKL(Mat A, MatDuplicateOption op, Mat *M)
305: {
306:   Mat_SeqAIJMKL *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
307:   Mat_SeqAIJMKL *aijmkl_dest;

309:   PetscFunctionBegin;
310:   PetscCall(MatDuplicate_SeqAIJ(A, op, M));
311:   aijmkl_dest = (Mat_SeqAIJMKL *)(*M)->spptr;
312:   PetscCall(PetscArraycpy(aijmkl_dest, aijmkl, 1));
313:   aijmkl_dest->sparse_optimized = PETSC_FALSE;
314:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
315:   PetscFunctionReturn(PETSC_SUCCESS);
316: }

318: PetscErrorCode MatAssemblyEnd_SeqAIJMKL(Mat A, MatAssemblyType mode)
319: {
320:   Mat_SeqAIJ    *a = (Mat_SeqAIJ *)A->data;
321:   Mat_SeqAIJMKL *aijmkl;

323:   PetscFunctionBegin;
324:   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);

326:   /* Since a MATSEQAIJMKL matrix is really just a MATSEQAIJ with some
327:    * extra information and some different methods, call the AssemblyEnd
328:    * routine for a MATSEQAIJ.
329:    * I'm not sure if this is the best way to do this, but it avoids
330:    * a lot of code duplication. */
331:   a->inode.use = PETSC_FALSE; /* Must disable: otherwise the MKL routines won't get used. */
332:   PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));

334:   /* If the user has requested "eager" inspection, create the optimized MKL sparse handle (if needed; the function checks).
335:    * (The default is to do "lazy" inspection, deferring this until something like MatMult() is called.) */
336:   aijmkl = (Mat_SeqAIJMKL *)A->spptr;
337:   if (aijmkl->eager_inspection) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

339:   PetscFunctionReturn(PETSC_SUCCESS);
340: }

342: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
343: PetscErrorCode MatMult_SeqAIJMKL(Mat A, Vec xx, Vec yy)
344: {
345:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
346:   const PetscScalar *x;
347:   PetscScalar       *y;
348:   const MatScalar   *aa;
349:   PetscInt           m     = A->rmap->n;
350:   PetscInt           n     = A->cmap->n;
351:   PetscScalar        alpha = 1.0;
352:   PetscScalar        beta  = 0.0;
353:   const PetscInt    *aj, *ai;
354:   char               matdescra[6];

356:   /* Variables not in MatMult_SeqAIJ. */
357:   char transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */

359:   PetscFunctionBegin;
360:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
361:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
362:   PetscCall(VecGetArrayRead(xx, &x));
363:   PetscCall(VecGetArray(yy, &y));
364:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
365:   aa = a->a; /* Nonzero elements stored row-by-row. */
366:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

368:   /* Call MKL sparse BLAS routine to do the MatMult. */
369:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

371:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
372:   PetscCall(VecRestoreArrayRead(xx, &x));
373:   PetscCall(VecRestoreArray(yy, &y));
374:   PetscFunctionReturn(PETSC_SUCCESS);
375: }
376: #endif

378: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
379: PetscErrorCode MatMult_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
380: {
381:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
382:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
383:   const PetscScalar *x;
384:   PetscScalar       *y;
385:   PetscObjectState   state;

387:   PetscFunctionBegin;

389:   /* If there are no nonzero entries, zero yy and return immediately. */
390:   if (!a->nz) {
391:     PetscCall(VecGetArray(yy, &y));
392:     PetscCall(PetscArrayzero(y, A->rmap->n));
393:     PetscCall(VecRestoreArray(yy, &y));
394:     PetscFunctionReturn(PETSC_SUCCESS);
395:   }

397:   PetscCall(VecGetArrayRead(xx, &x));
398:   PetscCall(VecGetArray(yy, &y));

400:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
401:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
402:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
403:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
404:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

406:   /* Call MKL SpMV2 executor routine to do the MatMult. */
407:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

409:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
410:   PetscCall(VecRestoreArrayRead(xx, &x));
411:   PetscCall(VecRestoreArray(yy, &y));
412:   PetscFunctionReturn(PETSC_SUCCESS);
413: }
414: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

416: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
417: PetscErrorCode MatMultTranspose_SeqAIJMKL(Mat A, Vec xx, Vec yy)
418: {
419:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
420:   const PetscScalar *x;
421:   PetscScalar       *y;
422:   const MatScalar   *aa;
423:   PetscInt           m     = A->rmap->n;
424:   PetscInt           n     = A->cmap->n;
425:   PetscScalar        alpha = 1.0;
426:   PetscScalar        beta  = 0.0;
427:   const PetscInt    *aj, *ai;
428:   char               matdescra[6];

430:   /* Variables not in MatMultTranspose_SeqAIJ. */
431:   char transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */

433:   PetscFunctionBegin;
434:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
435:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */
436:   PetscCall(VecGetArrayRead(xx, &x));
437:   PetscCall(VecGetArray(yy, &y));
438:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
439:   aa = a->a; /* Nonzero elements stored row-by-row. */
440:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

442:   /* Call MKL sparse BLAS routine to do the MatMult. */
443:   mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, y);

445:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
446:   PetscCall(VecRestoreArrayRead(xx, &x));
447:   PetscCall(VecRestoreArray(yy, &y));
448:   PetscFunctionReturn(PETSC_SUCCESS);
449: }
450: #endif

452: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
453: PetscErrorCode MatMultTranspose_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy)
454: {
455:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
456:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
457:   const PetscScalar *x;
458:   PetscScalar       *y;
459:   PetscObjectState   state;

461:   PetscFunctionBegin;

463:   /* If there are no nonzero entries, zero yy and return immediately. */
464:   if (!a->nz) {
465:     PetscCall(VecGetArray(yy, &y));
466:     PetscCall(PetscArrayzero(y, A->cmap->n));
467:     PetscCall(VecRestoreArray(yy, &y));
468:     PetscFunctionReturn(PETSC_SUCCESS);
469:   }

471:   PetscCall(VecGetArrayRead(xx, &x));
472:   PetscCall(VecGetArray(yy, &y));

474:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
475:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
476:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
477:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
478:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

480:   /* Call MKL SpMV2 executor routine to do the MatMultTranspose. */
481:   PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, y);

483:   PetscCall(PetscLogFlops(2.0 * a->nz - a->nonzerorowcnt));
484:   PetscCall(VecRestoreArrayRead(xx, &x));
485:   PetscCall(VecRestoreArray(yy, &y));
486:   PetscFunctionReturn(PETSC_SUCCESS);
487: }
488: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

490: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
491: PetscErrorCode MatMultAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
492: {
493:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
494:   const PetscScalar *x;
495:   PetscScalar       *y, *z;
496:   const MatScalar   *aa;
497:   PetscInt           m = A->rmap->n;
498:   PetscInt           n = A->cmap->n;
499:   const PetscInt    *aj, *ai;
500:   PetscInt           i;

502:   /* Variables not in MatMultAdd_SeqAIJ. */
503:   char        transa = 'n'; /* Used to indicate to MKL that we are not computing the transpose product. */
504:   PetscScalar alpha  = 1.0;
505:   PetscScalar beta;
506:   char        matdescra[6];

508:   PetscFunctionBegin;
509:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
510:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

512:   PetscCall(VecGetArrayRead(xx, &x));
513:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
514:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
515:   aa = a->a; /* Nonzero elements stored row-by-row. */
516:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

518:   /* Call MKL sparse BLAS routine to do the MatMult. */
519:   if (zz == yy) {
520:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
521:     beta = 1.0;
522:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
523:   } else {
524:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
525:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
526:     beta = 0.0;
527:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
528:     for (i = 0; i < m; i++) z[i] += y[i];
529:   }

531:   PetscCall(PetscLogFlops(2.0 * a->nz));
532:   PetscCall(VecRestoreArrayRead(xx, &x));
533:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
534:   PetscFunctionReturn(PETSC_SUCCESS);
535: }
536: #endif

538: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
539: PetscErrorCode MatMultAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
540: {
541:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
542:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
543:   const PetscScalar *x;
544:   PetscScalar       *y, *z;
545:   PetscInt           m = A->rmap->n;
546:   PetscInt           i;

548:   /* Variables not in MatMultAdd_SeqAIJ. */
549:   PetscObjectState state;

551:   PetscFunctionBegin;

553:   /* If there are no nonzero entries, set zz = yy and return immediately. */
554:   if (!a->nz) {
555:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
556:     PetscCall(PetscArraycpy(z, y, m));
557:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
558:     PetscFunctionReturn(PETSC_SUCCESS);
559:   }

561:   PetscCall(VecGetArrayRead(xx, &x));
562:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

564:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
565:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
566:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
567:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
568:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

570:   /* Call MKL sparse BLAS routine to do the MatMult. */
571:   if (zz == yy) {
572:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
573:      * with alpha and beta both set to 1.0. */
574:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
575:   } else {
576:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
577:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
578:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_NON_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
579:     for (i = 0; i < m; i++) z[i] += y[i];
580:   }

582:   PetscCall(PetscLogFlops(2.0 * a->nz));
583:   PetscCall(VecRestoreArrayRead(xx, &x));
584:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
585:   PetscFunctionReturn(PETSC_SUCCESS);
586: }
587: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

589: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
590: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL(Mat A, Vec xx, Vec yy, Vec zz)
591: {
592:   Mat_SeqAIJ        *a = (Mat_SeqAIJ *)A->data;
593:   const PetscScalar *x;
594:   PetscScalar       *y, *z;
595:   const MatScalar   *aa;
596:   PetscInt           m = A->rmap->n;
597:   PetscInt           n = A->cmap->n;
598:   const PetscInt    *aj, *ai;
599:   PetscInt           i;

601:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */
602:   char        transa = 't'; /* Used to indicate to MKL that we are computing the transpose product. */
603:   PetscScalar alpha  = 1.0;
604:   PetscScalar beta;
605:   char        matdescra[6];

607:   PetscFunctionBegin;
608:   matdescra[0] = 'g'; /* Indicates to MKL that we using a general CSR matrix. */
609:   matdescra[3] = 'c'; /* Indicates to MKL that we use C-style (0-based) indexing. */

611:   PetscCall(VecGetArrayRead(xx, &x));
612:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));
613:   aj = a->j; /* aj[k] gives column index for element aa[k]. */
614:   aa = a->a; /* Nonzero elements stored row-by-row. */
615:   ai = a->i; /* ai[k] is the position in aa and aj where row k starts. */

617:   /* Call MKL sparse BLAS routine to do the MatMult. */
618:   if (zz == yy) {
619:     /* If zz and yy are the same vector, we can use MKL's mkl_xcsrmv(), which calculates y = alpha*A*x + beta*y. */
620:     beta = 1.0;
621:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
622:   } else {
623:     /* zz and yy are different vectors, so call MKL's mkl_xcsrmv() with beta=0, then add the result to z.
624:      * MKL sparse BLAS does not have a MatMultAdd equivalent. */
625:     beta = 0.0;
626:     mkl_xcsrmv(&transa, &m, &n, &alpha, matdescra, aa, aj, ai, ai + 1, x, &beta, z);
627:     for (i = 0; i < n; i++) z[i] += y[i];
628:   }

630:   PetscCall(PetscLogFlops(2.0 * a->nz));
631:   PetscCall(VecRestoreArrayRead(xx, &x));
632:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
633:   PetscFunctionReturn(PETSC_SUCCESS);
634: }
635: #endif

637: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
638: PetscErrorCode MatMultTransposeAdd_SeqAIJMKL_SpMV2(Mat A, Vec xx, Vec yy, Vec zz)
639: {
640:   Mat_SeqAIJ        *a      = (Mat_SeqAIJ *)A->data;
641:   Mat_SeqAIJMKL     *aijmkl = (Mat_SeqAIJMKL *)A->spptr;
642:   const PetscScalar *x;
643:   PetscScalar       *y, *z;
644:   PetscInt           n = A->cmap->n;
645:   PetscInt           i;
646:   PetscObjectState   state;

648:   /* Variables not in MatMultTransposeAdd_SeqAIJ. */

650:   PetscFunctionBegin;

652:   /* If there are no nonzero entries, set zz = yy and return immediately. */
653:   if (!a->nz) {
654:     PetscCall(VecGetArrayPair(yy, zz, &y, &z));
655:     PetscCall(PetscArraycpy(z, y, n));
656:     PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
657:     PetscFunctionReturn(PETSC_SUCCESS);
658:   }

660:   PetscCall(VecGetArrayRead(xx, &x));
661:   PetscCall(VecGetArrayPair(yy, zz, &y, &z));

663:   /* In some cases, we get to this point without mkl_sparse_optimize() having been called, so we check and then call
664:    * it if needed. Eventually, when everything in PETSc is properly updating the matrix state, we should probably
665:    * take a "lazy" approach to creation/updating of the MKL matrix handle and plan to always do it here (when needed). */
666:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
667:   if (!aijmkl->sparse_optimized || aijmkl->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));

669:   /* Call MKL sparse BLAS routine to do the MatMult. */
670:   if (zz == yy) {
671:     /* If zz and yy are the same vector, we can use mkl_sparse_x_mv, which calculates y = alpha*A*x + beta*y,
672:      * with alpha and beta both set to 1.0. */
673:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 1.0, z);
674:   } else {
675:     /* zz and yy are different vectors, so we call mkl_sparse_x_mv with alpha=1.0 and beta=0.0, and then
676:      * we add the contents of vector yy to the result; MKL sparse BLAS does not have a MatMultAdd equivalent. */
677:     PetscCallExternal(mkl_sparse_x_mv, SPARSE_OPERATION_TRANSPOSE, 1.0, aijmkl->csrA, aijmkl->descr, x, 0.0, z);
678:     for (i = 0; i < n; i++) z[i] += y[i];
679:   }

681:   PetscCall(PetscLogFlops(2.0 * a->nz));
682:   PetscCall(VecRestoreArrayRead(xx, &x));
683:   PetscCall(VecRestoreArrayPair(yy, zz, &y, &z));
684:   PetscFunctionReturn(PETSC_SUCCESS);
685: }
686: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

688: #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
689: static PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
690: {
691:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr;
692:   sparse_matrix_t     csrA, csrB, csrC;
693:   PetscInt            nrows, ncols;
694:   struct matrix_descr descr_type_gen;
695:   PetscObjectState    state;

697:   PetscFunctionBegin;
698:   /* Determine the number of rows and columns that the result matrix C will have. We have to do this ourselves because MKL does
699:    * not handle sparse matrices with zero rows or columns. */
700:   if (transA == SPARSE_OPERATION_NON_TRANSPOSE) nrows = A->rmap->N;
701:   else nrows = A->cmap->N;
702:   if (transB == SPARSE_OPERATION_NON_TRANSPOSE) ncols = B->cmap->N;
703:   else ncols = B->rmap->N;

705:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
706:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
707:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
708:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
709:   csrA                = a->csrA;
710:   csrB                = b->csrA;
711:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

713:   if (csrA && csrB) {
714:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FULL_MULT_NO_VAL, &csrC);
715:   } else {
716:     csrC = NULL;
717:   }

719:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, nrows, ncols, C));

721:   PetscFunctionReturn(PETSC_SUCCESS);
722: }

724: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(Mat A, const sparse_operation_t transA, Mat B, const sparse_operation_t transB, Mat C)
725: {
726:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *b = (Mat_SeqAIJMKL *)B->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
727:   sparse_matrix_t     csrA, csrB, csrC;
728:   struct matrix_descr descr_type_gen;
729:   PetscObjectState    state;

731:   PetscFunctionBegin;
732:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
733:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
734:   PetscCall(PetscObjectStateGet((PetscObject)B, &state));
735:   if (!b->sparse_optimized || b->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(B));
736:   csrA                = a->csrA;
737:   csrB                = b->csrA;
738:   csrC                = c->csrA;
739:   descr_type_gen.type = SPARSE_MATRIX_TYPE_GENERAL;

741:   if (csrA && csrB) {
742:     PetscCallExternal(mkl_sparse_sp2m, transA, descr_type_gen, csrA, transB, descr_type_gen, csrB, SPARSE_STAGE_FINALIZE_MULT, &csrC);
743:   } else {
744:     csrC = NULL;
745:   }

747:   /* Have to update the PETSc AIJ representation for matrix C from contents of MKL handle. */
748:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));

750:   PetscFunctionReturn(PETSC_SUCCESS);
751: }

753: PetscErrorCode MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
754: {
755:   PetscFunctionBegin;
756:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
757:   PetscFunctionReturn(PETSC_SUCCESS);
758: }

760: PetscErrorCode MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
761: {
762:   PetscFunctionBegin;
763:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
764:   PetscFunctionReturn(PETSC_SUCCESS);
765: }

767: PetscErrorCode MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
768: {
769:   PetscFunctionBegin;
770:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
771:   PetscFunctionReturn(PETSC_SUCCESS);
772: }

774: PetscErrorCode MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
775: {
776:   PetscFunctionBegin;
777:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_TRANSPOSE, B, SPARSE_OPERATION_NON_TRANSPOSE, C));
778:   PetscFunctionReturn(PETSC_SUCCESS);
779: }

781: PetscErrorCode MatMatTransposeMultSymbolic_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, PetscReal fill, Mat C)
782: {
783:   PetscFunctionBegin;
784:   PetscCall(MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
785:   PetscFunctionReturn(PETSC_SUCCESS);
786: }

788: PetscErrorCode MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL(Mat A, Mat B, Mat C)
789: {
790:   PetscFunctionBegin;
791:   PetscCall(MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL_Private(A, SPARSE_OPERATION_NON_TRANSPOSE, B, SPARSE_OPERATION_TRANSPOSE, C));
792:   PetscFunctionReturn(PETSC_SUCCESS);
793: }

795: static PetscErrorCode MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
796: {
797:   Mat_Product *product = C->product;
798:   Mat          A = product->A, B = product->B;

800:   PetscFunctionBegin;
801:   PetscCall(MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL(A, B, C));
802:   PetscFunctionReturn(PETSC_SUCCESS);
803: }

805: static PetscErrorCode MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL(Mat C)
806: {
807:   Mat_Product *product = C->product;
808:   Mat          A = product->A, B = product->B;
809:   PetscReal    fill = product->fill;

811:   PetscFunctionBegin;
812:   PetscCall(MatTransposeMatMultSymbolic_SeqAIJMKL_SeqAIJMKL(A, B, fill, C));
813:   C->ops->productnumeric = MatProductNumeric_AtB_SeqAIJMKL_SeqAIJMKL;
814:   PetscFunctionReturn(PETSC_SUCCESS);
815: }

817: PetscErrorCode MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat A, Mat P, Mat C)
818: {
819:   Mat                 Ct;
820:   Vec                 zeros;
821:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr, *c = (Mat_SeqAIJMKL *)C->spptr;
822:   sparse_matrix_t     csrA, csrP, csrC;
823:   PetscBool           set, flag;
824:   struct matrix_descr descr_type_sym;
825:   PetscObjectState    state;

827:   PetscFunctionBegin;
828:   PetscCall(MatIsSymmetricKnown(A, &set, &flag));
829:   PetscCheck(set && flag, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal() called on matrix A not marked as symmetric");

831:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
832:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
833:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
834:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
835:   csrA                = a->csrA;
836:   csrP                = p->csrA;
837:   csrC                = c->csrA;
838:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
839:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
840:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

842:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
843:   PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FINALIZE_MULT);

845:   /* Update the PETSc AIJ representation for matrix C from contents of MKL handle.
846:    * This is more complicated than it should be: it turns out that, though mkl_sparse_sypr() will accept a full AIJ/CSR matrix,
847:    * the output matrix only contains the upper or lower triangle (we arbitrarily have chosen upper) of the symmetric matrix.
848:    * We have to fill in the missing portion, which we currently do below by forming the transpose and performing at MatAXPY
849:    * operation. This may kill any performance benefit of using the optimized mkl_sparse_sypr() routine. Performance might
850:    * improve if we come up with a more efficient way to do this, or we can convince the MKL team to provide an option to output
851:    * the full matrix. */
852:   PetscCall(MatSeqAIJMKL_update_from_mkl_handle(C));
853:   PetscCall(MatTranspose(C, MAT_INITIAL_MATRIX, &Ct));
854:   PetscCall(MatCreateVecs(C, &zeros, NULL));
855:   PetscCall(VecSetFromOptions(zeros));
856:   PetscCall(VecZeroEntries(zeros));
857:   PetscCall(MatDiagonalSet(Ct, zeros, INSERT_VALUES));
858:   PetscCall(MatAXPY(C, 1.0, Ct, DIFFERENT_NONZERO_PATTERN));
859:   /* Note: The MatAXPY() call destroys the MatProduct, so we must recreate it. */
860:   PetscCall(MatProductCreateWithMat(A, P, NULL, C));
861:   PetscCall(MatProductSetType(C, MATPRODUCT_PtAP));
862:   PetscCall(MatSeqAIJMKL_create_mkl_handle(C));
863:   PetscCall(VecDestroy(&zeros));
864:   PetscCall(MatDestroy(&Ct));
865:   PetscFunctionReturn(PETSC_SUCCESS);
866: }

868: PetscErrorCode MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal(Mat C)
869: {
870:   Mat_Product        *product = C->product;
871:   Mat                 A = product->A, P = product->B;
872:   Mat_SeqAIJMKL      *a = (Mat_SeqAIJMKL *)A->spptr, *p = (Mat_SeqAIJMKL *)P->spptr;
873:   sparse_matrix_t     csrA, csrP, csrC;
874:   struct matrix_descr descr_type_sym;
875:   PetscObjectState    state;

877:   PetscFunctionBegin;
878:   PetscCall(PetscObjectStateGet((PetscObject)A, &state));
879:   if (!a->sparse_optimized || a->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(A));
880:   PetscCall(PetscObjectStateGet((PetscObject)P, &state));
881:   if (!p->sparse_optimized || p->state != state) PetscCall(MatSeqAIJMKL_create_mkl_handle(P));
882:   csrA                = a->csrA;
883:   csrP                = p->csrA;
884:   descr_type_sym.type = SPARSE_MATRIX_TYPE_SYMMETRIC;
885:   descr_type_sym.mode = SPARSE_FILL_MODE_UPPER;
886:   descr_type_sym.diag = SPARSE_DIAG_NON_UNIT;

888:   /* the call below won't work for complex matrices. (We protect this when pointers are assigned in MatConvert.) */
889:   if (csrP && csrA) {
890:     PetscCallExternal(mkl_sparse_sypr, SPARSE_OPERATION_TRANSPOSE, csrP, csrA, descr_type_sym, &csrC, SPARSE_STAGE_FULL_MULT_NO_VAL);
891:   } else {
892:     csrC = NULL;
893:   }

895:   /* Update the I and J arrays of the PETSc AIJ representation for matrix C from contents of MKL handle.
896:    * Note that, because mkl_sparse_sypr() only computes one triangle of the symmetric matrix, this representation will only contain
897:    * the upper triangle of the symmetric matrix. We fix this in MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal(). I believe that
898:    * leaving things in this incomplete state is OK because the numeric product should follow soon after, but am not certain if this
899:    * is guaranteed. */
900:   PetscCall(MatSeqAIJMKL_setup_structure_from_mkl_handle(PETSC_COMM_SELF, csrC, P->cmap->N, P->cmap->N, C));

902:   C->ops->productnumeric = MatProductNumeric_PtAP;
903:   PetscFunctionReturn(PETSC_SUCCESS);
904: }

906: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AB(Mat C)
907: {
908:   PetscFunctionBegin;
909:   C->ops->productsymbolic = MatProductSymbolic_AB;
910:   C->ops->matmultsymbolic = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
911:   PetscFunctionReturn(PETSC_SUCCESS);
912: }

914: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_AtB(Mat C)
915: {
916:   PetscFunctionBegin;
917:   C->ops->productsymbolic = MatProductSymbolic_AtB_SeqAIJMKL_SeqAIJMKL;
918:   PetscFunctionReturn(PETSC_SUCCESS);
919: }

921: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABt(Mat C)
922: {
923:   PetscFunctionBegin;
924:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_SeqAIJ_SeqAIJ;
925:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
926:   PetscFunctionReturn(PETSC_SUCCESS);
927: }

929: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_PtAP(Mat C)
930: {
931:   Mat_Product *product = C->product;
932:   Mat          A       = product->A;
933:   PetscBool    set, flag;

935:   PetscFunctionBegin;
936:   if (PetscDefined(USE_COMPLEX)) {
937:     /* By setting C->ops->productsymbolic to NULL, we ensure that MatProductSymbolic_Unsafe() will be used.
938:      * We do this in several other locations in this file. This works for the time being, but these
939:      * routines are considered unsafe and may be removed from the MatProduct code in the future.
940:      * TODO: Add proper MATSEQAIJMKL implementations */
941:     C->ops->productsymbolic = NULL;
942:   } else {
943:     /* AIJMKL only has an optimized routine for PtAP when A is symmetric and real. */
944:     PetscCall(MatIsSymmetricKnown(A, &set, &flag));
945:     if (set && flag) C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
946:     else C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
947:     /* we don't set C->ops->productnumeric here, as this must happen in MatProductSymbolic_PtAP_XXX(),
948:      * depending on whether the algorithm for the general case vs. the real symmetric one is used. */
949:   }
950:   PetscFunctionReturn(PETSC_SUCCESS);
951: }

953: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_RARt(Mat C)
954: {
955:   PetscFunctionBegin;
956:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
957:   PetscFunctionReturn(PETSC_SUCCESS);
958: }

960: static PetscErrorCode MatProductSetFromOptions_SeqAIJMKL_ABC(Mat C)
961: {
962:   PetscFunctionBegin;
963:   C->ops->productsymbolic = NULL; /* MatProductSymbolic_Unsafe() will be used. */
964:   PetscFunctionReturn(PETSC_SUCCESS);
965: }

967: PetscErrorCode MatProductSetFromOptions_SeqAIJMKL(Mat C)
968: {
969:   Mat_Product *product = C->product;

971:   PetscFunctionBegin;
972:   switch (product->type) {
973:   case MATPRODUCT_AB:
974:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AB(C));
975:     break;
976:   case MATPRODUCT_AtB:
977:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_AtB(C));
978:     break;
979:   case MATPRODUCT_ABt:
980:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABt(C));
981:     break;
982:   case MATPRODUCT_PtAP:
983:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_PtAP(C));
984:     break;
985:   case MATPRODUCT_RARt:
986:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_RARt(C));
987:     break;
988:   case MATPRODUCT_ABC:
989:     PetscCall(MatProductSetFromOptions_SeqAIJMKL_ABC(C));
990:     break;
991:   default:
992:     break;
993:   }
994:   PetscFunctionReturn(PETSC_SUCCESS);
995: }
996: #endif /* PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE */

998: /* MatConvert_SeqAIJ_SeqAIJMKL converts a SeqAIJ matrix into a
999:  * SeqAIJMKL matrix.  This routine is called by the MatCreate_SeqAIJMKL()
1000:  * routine, but can also be used to convert an assembled SeqAIJ matrix
1001:  * into a SeqAIJMKL one. */
1002: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJMKL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
1003: {
1004:   Mat            B = *newmat;
1005:   Mat_SeqAIJMKL *aijmkl;
1006:   PetscBool      set;
1007:   PetscBool      sametype;

1009:   PetscFunctionBegin;
1010:   if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));

1012:   PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
1013:   if (sametype) PetscFunctionReturn(PETSC_SUCCESS);

1015:   PetscCall(PetscNew(&aijmkl));
1016:   B->spptr = (void *)aijmkl;

1018:   /* Set function pointers for methods that we inherit from AIJ but override.
1019:    * We also parse some command line options below, since those determine some of the methods we point to. */
1020:   B->ops->duplicate   = MatDuplicate_SeqAIJMKL;
1021:   B->ops->assemblyend = MatAssemblyEnd_SeqAIJMKL;
1022:   B->ops->destroy     = MatDestroy_SeqAIJMKL;

1024:   aijmkl->sparse_optimized = PETSC_FALSE;
1025: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1026:   aijmkl->no_SpMV2 = PETSC_FALSE; /* Default to using the SpMV2 routines if our MKL supports them. */
1027: #else
1028:   aijmkl->no_SpMV2 = PETSC_TRUE;
1029: #endif
1030:   aijmkl->eager_inspection = PETSC_FALSE;

1032:   /* Parse command line options. */
1033:   PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "AIJMKL Options", "Mat");
1034:   PetscCall(PetscOptionsBool("-mat_aijmkl_no_spmv2", "Disable use of inspector-executor (SpMV 2) routines", "None", (PetscBool)aijmkl->no_SpMV2, (PetscBool *)&aijmkl->no_SpMV2, &set));
1035:   PetscCall(PetscOptionsBool("-mat_aijmkl_eager_inspection", "Run inspection at matrix assembly time, instead of waiting until needed by an operation", "None", (PetscBool)aijmkl->eager_inspection, (PetscBool *)&aijmkl->eager_inspection, &set));
1036:   PetscOptionsEnd();
1037: #if !defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1038:   if (!aijmkl->no_SpMV2) {
1039:     PetscCall(PetscInfo(B, "User requested use of MKL SpMV2 routines, but MKL version does not support mkl_sparse_optimize();  defaulting to non-SpMV2 routines.\n"));
1040:     aijmkl->no_SpMV2 = PETSC_TRUE;
1041:   }
1042: #endif

1044: #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
1045:   B->ops->mult             = MatMult_SeqAIJMKL_SpMV2;
1046:   B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL_SpMV2;
1047:   B->ops->multadd          = MatMultAdd_SeqAIJMKL_SpMV2;
1048:   B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL_SpMV2;
1049:   #if defined(PETSC_HAVE_MKL_SPARSE_SP2M_FEATURE)
1050:   B->ops->productsetfromoptions   = MatProductSetFromOptions_SeqAIJMKL;
1051:   B->ops->matmultsymbolic         = MatMatMultSymbolic_SeqAIJMKL_SeqAIJMKL;
1052:   B->ops->matmultnumeric          = MatMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1053:   B->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_SeqAIJMKL_SeqAIJMKL;
1054:   B->ops->transposematmultnumeric = MatTransposeMatMultNumeric_SeqAIJMKL_SeqAIJMKL;
1055:     #if !defined(PETSC_USE_COMPLEX)
1056:   B->ops->ptapnumeric = MatPtAPNumeric_SeqAIJMKL_SeqAIJMKL_SymmetricReal;
1057:     #else
1058:   B->ops->ptapnumeric = NULL;
1059:     #endif
1060:   #endif
1061: #endif /* PETSC_HAVE_MKL_SPARSE_OPTIMIZE */

1063: #if !defined(PETSC_MKL_SPBLAS_DEPRECATED)
1064:   /* In MKL version 18, update 2, the old sparse BLAS interfaces were marked as deprecated. If "no_SpMV2" has been specified by the
1065:    * user and the old SpBLAS interfaces are deprecated in our MKL version, we use the new _SpMV2 routines (set above), but do not
1066:    * call mkl_sparse_optimize(), which results in the old numerical kernels (without the inspector-executor model) being used. For
1067:    * versions in which the older interface has not been deprecated, we use the old interface. */
1068:   if (aijmkl->no_SpMV2) {
1069:     B->ops->mult             = MatMult_SeqAIJMKL;
1070:     B->ops->multtranspose    = MatMultTranspose_SeqAIJMKL;
1071:     B->ops->multadd          = MatMultAdd_SeqAIJMKL;
1072:     B->ops->multtransposeadd = MatMultTransposeAdd_SeqAIJMKL;
1073:   }
1074: #endif

1076:   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqaijmkl_seqaij_C", MatConvert_SeqAIJMKL_SeqAIJ));

1078:   PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJMKL));
1079:   *newmat = B;
1080:   PetscFunctionReturn(PETSC_SUCCESS);
1081: }

1083: /*@C
1084:    MatCreateSeqAIJMKL - Creates a sparse matrix of type `MATSEQAIJMKL`.

1086:    Collective

1088:    Input Parameters:
1089: +  comm - MPI communicator, set to `PETSC_COMM_SELF`
1090: .  m - number of rows
1091: .  n - number of columns
1092: .  nz - number of nonzeros per row (same for all rows)
1093: -  nnz - array containing the number of nonzeros in the various rows
1094:          (possibly different for each row) or `NULL`

1096:    Output Parameter:
1097: .  A - the matrix

1099:    Options Database Keys:
1100: +  -mat_aijmkl_no_spmv2 - disable use of the SpMV2 inspector-executor routines
1101: -  -mat_aijmkl_eager_inspection - perform MKL "inspection" phase upon matrix assembly; default is to do "lazy" inspection,
1102:                                   performing this step the first time the matrix is applied

1104:    Level: intermediate

1106:    Notes:
1107:    If `nnz` is given then `nz` is ignored

1109:    This type inherits from `MATSEQAIJ` and is largely identical, but uses sparse BLAS
1110:    routines from Intel MKL whenever possible.

1112:   If the installed version of MKL supports the "SpMV2" sparse
1113:    inspector-executor routines, then those are used by default.

1115:   `MatMult()`, `MatMultAdd()`, `MatMultTranspose()`, `MatMultTransposeAdd()`, `MatMatMult()`, `MatTransposeMatMult()`, and `MatPtAP()`
1116:    (for symmetric A) operations are currently supported.
1117:    MKL version 18, update 2 or later is required for `MatPtAP()`, `MatPtAPNumeric()` and `MatMatMultNumeric()`.

1119: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJMKL()`, `MatSetValues()`
1120: @*/
1121: PetscErrorCode MatCreateSeqAIJMKL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
1122: {
1123:   PetscFunctionBegin;
1124:   PetscCall(MatCreate(comm, A));
1125:   PetscCall(MatSetSizes(*A, m, n, m, n));
1126:   PetscCall(MatSetType(*A, MATSEQAIJMKL));
1127:   PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
1128:   PetscFunctionReturn(PETSC_SUCCESS);
1129: }

1131: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJMKL(Mat A)
1132: {
1133:   PetscFunctionBegin;
1134:   PetscCall(MatSetType(A, MATSEQAIJ));
1135:   PetscCall(MatConvert_SeqAIJ_SeqAIJMKL(A, MATSEQAIJMKL, MAT_INPLACE_MATRIX, &A));
1136:   PetscFunctionReturn(PETSC_SUCCESS);
1137: }