Actual source code: ispai.c


  2: /*
  3:    3/99 Modified by Stephen Barnard to support SPAI version 3.0
  4: */

  6: /*
  7:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  8:    Code written by Stephen Barnard.

 10:       Note: there is some BAD memory bleeding below!

 12:       This code needs work

 14:    1) get rid of all memory bleeding
 15:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 16:       rather than having the sp flag for PC_SPAI
 17:    3) fix to set the block size based on the matrix block size

 19: */
 20: #if !defined(PETSC_SKIP_COMPLEX)
 21:   #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
 22: #endif

 24: #include <petsc/private/pcimpl.h>
 25: #include <../src/ksp/pc/impls/spai/petscspai.h>

 27: /*
 28:     These are the SPAI include files
 29: */
 30: EXTERN_C_BEGIN
 31: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
 32: #include <spai.h>
 33: #include <matrix.h>
 34: EXTERN_C_END

 36: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
 37: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
 38: extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
 39: extern PetscErrorCode MM_to_PETSC(char *, char *, char *);

 41: typedef struct {
 42:   matrix *B;  /* matrix in SPAI format */
 43:   matrix *BT; /* transpose of matrix in SPAI format */
 44:   matrix *M;  /* the approximate inverse in SPAI format */

 46:   Mat PM; /* the approximate inverse PETSc format */

 48:   double epsilon;    /* tolerance */
 49:   int    nbsteps;    /* max number of "improvement" steps per line */
 50:   int    max;        /* max dimensions of is_I, q, etc. */
 51:   int    maxnew;     /* max number of new entries per step */
 52:   int    block_size; /* constant block size */
 53:   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
 54:   int    verbose;    /* SPAI prints timing and statistics */

 56:   int      sp;        /* symmetric nonzero pattern */
 57:   MPI_Comm comm_spai; /* communicator to be used with spai */
 58: } PC_SPAI;

 60: static PetscErrorCode PCSetUp_SPAI(PC pc)
 61: {
 62:   PC_SPAI *ispai = (PC_SPAI *)pc->data;
 63:   Mat      AT;

 65:   PetscFunctionBegin;
 66:   init_SPAI();

 68:   if (ispai->sp) {
 69:     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
 70:   } else {
 71:     /* Use the transpose to get the column nonzero structure. */
 72:     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
 73:     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
 74:     PetscCall(MatDestroy(&AT));
 75:   }

 77:   /* Destroy the transpose */
 78:   /* Don't know how to do it. PETSc developers? */

 80:   /* construct SPAI preconditioner */
 81:   /* FILE *messages */    /* file for warning messages */
 82:   /* double epsilon */    /* tolerance */
 83:   /* int nbsteps */       /* max number of "improvement" steps per line */
 84:   /* int max */           /* max dimensions of is_I, q, etc. */
 85:   /* int maxnew */        /* max number of new entries per step */
 86:   /* int block_size */    /* block_size == 1 specifies scalar elements
 87:                               block_size == n specifies nxn constant-block elements
 88:                               block_size == 0 specifies variable-block elements */
 89:   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
 90:   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
 91:                               verbose == 1 prints timing and matrix statistics */

 93:   PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);

 95:   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));

 97:   /* free the SPAI matrices */
 98:   sp_free_matrix(ispai->B);
 99:   sp_free_matrix(ispai->M);
100:   PetscFunctionReturn(PETSC_SUCCESS);
101: }

103: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
104: {
105:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

107:   PetscFunctionBegin;
108:   /* Now using PETSc's multiply */
109:   PetscCall(MatMult(ispai->PM, xx, y));
110:   PetscFunctionReturn(PETSC_SUCCESS);
111: }

113: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
114: {
115:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

117:   PetscFunctionBegin;
118:   /* Now using PETSc's multiply */
119:   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
120:   PetscFunctionReturn(PETSC_SUCCESS);
121: }

123: static PetscErrorCode PCDestroy_SPAI(PC pc)
124: {
125:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

127:   PetscFunctionBegin;
128:   PetscCall(MatDestroy(&ispai->PM));
129:   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
130:   PetscCall(PetscFree(pc->data));
131:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
132:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
133:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
134:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
135:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
136:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
137:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
138:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
139:   PetscFunctionReturn(PETSC_SUCCESS);
140: }

142: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
143: {
144:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
145:   PetscBool iascii;

147:   PetscFunctionBegin;
148:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
149:   if (iascii) {
150:     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
151:     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
152:     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
153:     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
154:     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
155:     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
156:     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
157:     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
158:   }
159:   PetscFunctionReturn(PETSC_SUCCESS);
160: }

162: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
163: {
164:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

166:   PetscFunctionBegin;
167:   ispai->epsilon = (double)epsilon1;
168:   PetscFunctionReturn(PETSC_SUCCESS);
169: }

171: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
172: {
173:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

175:   PetscFunctionBegin;
176:   ispai->nbsteps = (int)nbsteps1;
177:   PetscFunctionReturn(PETSC_SUCCESS);
178: }

180: /* added 1/7/99 g.h. */
181: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
182: {
183:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

185:   PetscFunctionBegin;
186:   ispai->max = (int)max1;
187:   PetscFunctionReturn(PETSC_SUCCESS);
188: }

190: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
191: {
192:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

194:   PetscFunctionBegin;
195:   ispai->maxnew = (int)maxnew1;
196:   PetscFunctionReturn(PETSC_SUCCESS);
197: }

199: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
200: {
201:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

203:   PetscFunctionBegin;
204:   ispai->block_size = (int)block_size1;
205:   PetscFunctionReturn(PETSC_SUCCESS);
206: }

208: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
209: {
210:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

212:   PetscFunctionBegin;
213:   ispai->cache_size = (int)cache_size;
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
218: {
219:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

221:   PetscFunctionBegin;
222:   ispai->verbose = (int)verbose;
223:   PetscFunctionReturn(PETSC_SUCCESS);
224: }

226: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
227: {
228:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

230:   PetscFunctionBegin;
231:   ispai->sp = (int)sp;
232:   PetscFunctionReturn(PETSC_SUCCESS);
233: }

235: /*@
236:   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner

238:   Input Parameters:
239: + pc - the preconditioner
240: - eps - epsilon (default .4)

242:   Note:
243:     Espilon must be between 0 and 1. It controls the
244:                  quality of the approximation of M to the inverse of
245:                  A. Higher values of epsilon lead to more work, more
246:                  fill, and usually better preconditioners. In many
247:                  cases the best choice of epsilon is the one that
248:                  divides the total solution time equally between the
249:                  preconditioner and the solver.

251:   Level: intermediate

253: .seealso: `PCSPAI`, `PCSetType()`
254:   @*/
255: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
256: {
257:   PetscFunctionBegin;
258:   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
259:   PetscFunctionReturn(PETSC_SUCCESS);
260: }

262: /*@
263:   PCSPAISetNBSteps - set maximum number of improvement steps per row in
264:         the `PCSPAI` preconditioner

266:   Input Parameters:
267: + pc - the preconditioner
268: - n - number of steps (default 5)

270:   Note:
271:     `PCSPAI` constructs to approximation to every column of
272:                  the exact inverse of A in a series of improvement
273:                  steps. The quality of the approximation is determined
274:                  by epsilon. If an approximation achieving an accuracy
275:                  of epsilon is not obtained after ns steps, SPAI simply
276:                  uses the best approximation constructed so far.

278:   Level: intermediate

280: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
281: @*/
282: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
283: {
284:   PetscFunctionBegin;
285:   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
286:   PetscFunctionReturn(PETSC_SUCCESS);
287: }

289: /* added 1/7/99 g.h. */
290: /*@
291:   PCSPAISetMax - set the size of various working buffers in
292:         the `PCSPAI` preconditioner

294:   Input Parameters:
295: + pc - the preconditioner
296: - n - size (default is 5000)

298:   Level: intermediate

300: .seealso: `PCSPAI`, `PCSetType()`
301: @*/
302: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
303: {
304:   PetscFunctionBegin;
305:   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }

309: /*@
310:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
311:    in `PCSPAI` preconditioner

313:   Input Parameters:
314: + pc - the preconditioner
315: - n - maximum number (default 5)

317:   Level: intermediate

319: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
320: @*/
321: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
322: {
323:   PetscFunctionBegin;
324:   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
325:   PetscFunctionReturn(PETSC_SUCCESS);
326: }

328: /*@
329:   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner

331:   Input Parameters:
332: + pc - the preconditioner
333: - n - block size (default 1)

335:   Notes:
336:     A block
337:                  size of 1 treats A as a matrix of scalar elements. A
338:                  block size of s > 1 treats A as a matrix of sxs
339:                  blocks. A block size of 0 treats A as a matrix with
340:                  variable sized blocks, which are determined by
341:                  searching for dense square diagonal blocks in A.
342:                  This can be very effective for finite-element
343:                  matrices.

345:                  SPAI will convert A to block form, use a block
346:                  version of the preconditioner algorithm, and then
347:                  convert the result back to scalar form.

349:                  In many cases the a block-size parameter other than 1
350:                  can lead to very significant improvement in
351:                  performance.

353:   Level: intermediate

355: .seealso: `PCSPAI`, `PCSetType()`
356: @*/
357: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
358: {
359:   PetscFunctionBegin;
360:   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
361:   PetscFunctionReturn(PETSC_SUCCESS);
362: }

364: /*@
365:   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner

367:   Input Parameters:
368: + pc - the preconditioner
369: - n -  cache size {0,1,2,3,4,5} (default 5)

371:   Note:
372:     `PCSPAI` uses a hash table to cache messages and avoid
373:                  redundant communication. If suggest always using
374:                  5. This parameter is irrelevant in the serial
375:                  version.

377:   Level: intermediate

379: .seealso: `PCSPAI`, `PCSetType()`
380: @*/
381: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
382: {
383:   PetscFunctionBegin;
384:   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
385:   PetscFunctionReturn(PETSC_SUCCESS);
386: }

388: /*@
389:   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner

391:   Input Parameters:
392: + pc - the preconditioner
393: - n - level (default 1)

395:   Note:
396:     print parameters, timings and matrix statistics

398:   Level: intermediate

400: .seealso: `PCSPAI`, `PCSetType()`
401: @*/
402: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
403: {
404:   PetscFunctionBegin;
405:   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
406:   PetscFunctionReturn(PETSC_SUCCESS);
407: }

409: /*@
410:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner

412:   Input Parameters:
413: + pc - the preconditioner
414: - n - 0 or 1

416:   Note:
417:     If A has a symmetric nonzero pattern use -sp 1 to
418:                  improve performance by eliminating some communication
419:                  in the parallel version. Even if A does not have a
420:                  symmetric nonzero pattern -sp 1 may well lead to good
421:                  results, but the code will not follow the published
422:                  SPAI algorithm exactly.

424:   Level: intermediate

426: .seealso: `PCSPAI`, `PCSetType()`
427: @*/
428: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
429: {
430:   PetscFunctionBegin;
431:   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
432:   PetscFunctionReturn(PETSC_SUCCESS);
433: }

435: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
436: {
437:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
438:   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
439:   double    epsilon1;
440:   PetscBool flg;

442:   PetscFunctionBegin;
443:   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
444:   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
445:   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
446:   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
447:   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
448:   /* added 1/7/99 g.h. */
449:   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
450:   if (flg) PetscCall(PCSPAISetMax(pc, max1));
451:   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
452:   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
453:   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
454:   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
455:   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
456:   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
457:   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
458:   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
459:   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
460:   if (flg) PetscCall(PCSPAISetSp(pc, sp));
461:   PetscOptionsHeadEnd();
462:   PetscFunctionReturn(PETSC_SUCCESS);
463: }

465: /*MC
466:    PCSPAI - Use the Sparse Approximate Inverse method

468:    Options Database Keys:
469: +  -pc_spai_epsilon <eps> - set tolerance
470: .  -pc_spai_nbstep <n> - set nbsteps
471: .  -pc_spai_max <m> - set max
472: .  -pc_spai_max_new <m> - set maxnew
473: .  -pc_spai_block_size <n> - set block size
474: .  -pc_spai_cache_size <n> - set cache size
475: .  -pc_spai_sp <m> - set sp
476: -  -pc_spai_set_verbose <true,false> - verbose output

478:    Level: beginner

480:    Note:
481:     This only works with `MATAIJ` matrices.

483:    References:
484:  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)

486: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
487:           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
488:           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
489: M*/

491: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
492: {
493:   PC_SPAI *ispai;

495:   PetscFunctionBegin;
496:   PetscCall(PetscNew(&ispai));
497:   pc->data = ispai;

499:   pc->ops->destroy         = PCDestroy_SPAI;
500:   pc->ops->apply           = PCApply_SPAI;
501:   pc->ops->matapply        = PCMatApply_SPAI;
502:   pc->ops->applyrichardson = 0;
503:   pc->ops->setup           = PCSetUp_SPAI;
504:   pc->ops->view            = PCView_SPAI;
505:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

507:   ispai->epsilon    = .4;
508:   ispai->nbsteps    = 5;
509:   ispai->max        = 5000;
510:   ispai->maxnew     = 5;
511:   ispai->block_size = 1;
512:   ispai->cache_size = 5;
513:   ispai->verbose    = 0;

515:   ispai->sp = 1;
516:   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));

518:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
519:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
520:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
521:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
522:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
523:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
524:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
525:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
526:   PetscFunctionReturn(PETSC_SUCCESS);
527: }

529: /*
530:    Converts from a PETSc matrix to an SPAI matrix
531: */
532: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
533: {
534:   matrix                  *M;
535:   int                      i, j, col;
536:   int                      row_indx;
537:   int                      len, pe, local_indx, start_indx;
538:   int                     *mapping;
539:   const int               *cols;
540:   const double            *vals;
541:   int                      n, mnl, nnl, nz, rstart, rend;
542:   PetscMPIInt              size, rank;
543:   struct compressed_lines *rows;

545:   PetscFunctionBegin;
546:   PetscCallMPI(MPI_Comm_size(comm, &size));
547:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
548:   PetscCall(MatGetSize(A, &n, &n));
549:   PetscCall(MatGetLocalSize(A, &mnl, &nnl));

551:   /*
552:     not sure why a barrier is required. commenting out
553:   PetscCallMPI(MPI_Barrier(comm));
554:   */

556:   M = new_matrix((SPAI_Comm)comm);

558:   M->n              = n;
559:   M->bs             = 1;
560:   M->max_block_size = 1;

562:   M->mnls          = (int *)malloc(sizeof(int) * size);
563:   M->start_indices = (int *)malloc(sizeof(int) * size);
564:   M->pe            = (int *)malloc(sizeof(int) * n);
565:   M->block_sizes   = (int *)malloc(sizeof(int) * n);
566:   for (i = 0; i < n; i++) M->block_sizes[i] = 1;

568:   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));

570:   M->start_indices[0] = 0;
571:   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];

573:   M->mnl            = M->mnls[M->myid];
574:   M->my_start_index = M->start_indices[M->myid];

576:   for (i = 0; i < size; i++) {
577:     start_indx = M->start_indices[i];
578:     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
579:   }

581:   if (AT) {
582:     M->lines = new_compressed_lines(M->mnls[rank], 1);
583:   } else {
584:     M->lines = new_compressed_lines(M->mnls[rank], 0);
585:   }

587:   rows = M->lines;

589:   /* Determine the mapping from global indices to pointers */
590:   PetscCall(PetscMalloc1(M->n, &mapping));
591:   pe         = 0;
592:   local_indx = 0;
593:   for (i = 0; i < M->n; i++) {
594:     if (local_indx >= M->mnls[pe]) {
595:       pe++;
596:       local_indx = 0;
597:     }
598:     mapping[i] = local_indx + M->start_indices[pe];
599:     local_indx++;
600:   }

602:   /************** Set up the row structure *****************/

604:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
605:   for (i = rstart; i < rend; i++) {
606:     row_indx = i - rstart;
607:     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
608:     /* allocate buffers */
609:     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
610:     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
611:     /* copy the matrix */
612:     for (j = 0; j < nz; j++) {
613:       col = cols[j];
614:       len = rows->len[row_indx]++;

616:       rows->ptrs[row_indx][len] = mapping[col];
617:       rows->A[row_indx][len]    = vals[j];
618:     }
619:     rows->slen[row_indx] = rows->len[row_indx];

621:     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
622:   }

624:   /************** Set up the column structure *****************/

626:   if (AT) {
627:     for (i = rstart; i < rend; i++) {
628:       row_indx = i - rstart;
629:       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
630:       /* allocate buffers */
631:       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
632:       /* copy the matrix (i.e., the structure) */
633:       for (j = 0; j < nz; j++) {
634:         col = cols[j];
635:         len = rows->rlen[row_indx]++;

637:         rows->rptrs[row_indx][len] = mapping[col];
638:       }
639:       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
640:     }
641:   }

643:   PetscCall(PetscFree(mapping));

645:   order_pointers(M);
646:   M->maxnz = calc_maxnz(M);
647:   *B       = M;
648:   PetscFunctionReturn(PETSC_SUCCESS);
649: }

651: /*
652:    Converts from an SPAI matrix B  to a PETSc matrix PB.
653:    This assumes that the SPAI matrix B is stored in
654:    COMPRESSED-ROW format.
655: */
656: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
657: {
658:   PetscMPIInt size, rank;
659:   int         m, n, M, N;
660:   int         d_nz, o_nz;
661:   int        *d_nnz, *o_nnz;
662:   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
663:   PetscScalar val;

665:   PetscFunctionBegin;
666:   PetscCallMPI(MPI_Comm_size(comm, &size));
667:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

669:   m = n = B->mnls[rank];
670:   d_nz = o_nz = 0;

672:   /* Determine preallocation for MatCreateAIJ */
673:   PetscCall(PetscMalloc1(m, &d_nnz));
674:   PetscCall(PetscMalloc1(m, &o_nnz));
675:   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
676:   first_diag_col = B->start_indices[rank];
677:   last_diag_col  = first_diag_col + B->mnls[rank];
678:   for (i = 0; i < B->mnls[rank]; i++) {
679:     for (k = 0; k < B->lines->len[i]; k++) {
680:       global_col = B->lines->ptrs[i][k];
681:       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
682:       else o_nnz[i]++;
683:     }
684:   }

686:   M = N = B->n;
687:   /* Here we only know how to create AIJ format */
688:   PetscCall(MatCreate(comm, PB));
689:   PetscCall(MatSetSizes(*PB, m, n, M, N));
690:   PetscCall(MatSetType(*PB, MATAIJ));
691:   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
692:   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));

694:   for (i = 0; i < B->mnls[rank]; i++) {
695:     global_row = B->start_indices[rank] + i;
696:     for (k = 0; k < B->lines->len[i]; k++) {
697:       global_col = B->lines->ptrs[i][k];

699:       val = B->lines->A[i][k];
700:       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
701:     }
702:   }

704:   PetscCall(PetscFree(d_nnz));
705:   PetscCall(PetscFree(o_nnz));

707:   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
708:   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
709:   PetscFunctionReturn(PETSC_SUCCESS);
710: }

712: /*
713:    Converts from an SPAI vector v  to a PETSc vec Pv.
714: */
715: PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv)
716: {
717:   PetscMPIInt size, rank;
718:   int         m, M, i, *mnls, *start_indices, *global_indices;

720:   PetscFunctionBegin;
721:   PetscCallMPI(MPI_Comm_size(comm, &size));
722:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

724:   m = v->mnl;
725:   M = v->n;

727:   PetscCall(VecCreateMPI(comm, m, M, Pv));

729:   PetscCall(PetscMalloc1(size, &mnls));
730:   PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm));

732:   PetscCall(PetscMalloc1(size, &start_indices));

734:   start_indices[0] = 0;
735:   for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];

737:   PetscCall(PetscMalloc1(v->mnl, &global_indices));
738:   for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;

740:   PetscCall(PetscFree(mnls));
741:   PetscCall(PetscFree(start_indices));

743:   PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES));
744:   PetscCall(VecAssemblyBegin(*Pv));
745:   PetscCall(VecAssemblyEnd(*Pv));

747:   PetscCall(PetscFree(global_indices));
748:   PetscFunctionReturn(PETSC_SUCCESS);
749: }