Actual source code: ispai.c
2: /*
3: 3/99 Modified by Stephen Barnard to support SPAI version 3.0
4: */
6: /*
7: Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8: Code written by Stephen Barnard.
10: Note: there is some BAD memory bleeding below!
12: This code needs work
14: 1) get rid of all memory bleeding
15: 2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16: rather than having the sp flag for PC_SPAI
17: 3) fix to set the block size based on the matrix block size
19: */
20: #if !defined(PETSC_SKIP_COMPLEX)
21: #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
22: #endif
24: #include <petsc/private/pcimpl.h>
25: #include <../src/ksp/pc/impls/spai/petscspai.h>
27: /*
28: These are the SPAI include files
29: */
30: EXTERN_C_BEGIN
31: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
32: #include <spai.h>
33: #include <matrix.h>
34: EXTERN_C_END
36: extern PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
37: extern PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);
38: extern PetscErrorCode ConvertVectorToVec(MPI_Comm, vector *, Vec *);
39: extern PetscErrorCode MM_to_PETSC(char *, char *, char *);
41: typedef struct {
42: matrix *B; /* matrix in SPAI format */
43: matrix *BT; /* transpose of matrix in SPAI format */
44: matrix *M; /* the approximate inverse in SPAI format */
46: Mat PM; /* the approximate inverse PETSc format */
48: double epsilon; /* tolerance */
49: int nbsteps; /* max number of "improvement" steps per line */
50: int max; /* max dimensions of is_I, q, etc. */
51: int maxnew; /* max number of new entries per step */
52: int block_size; /* constant block size */
53: int cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
54: int verbose; /* SPAI prints timing and statistics */
56: int sp; /* symmetric nonzero pattern */
57: MPI_Comm comm_spai; /* communicator to be used with spai */
58: } PC_SPAI;
60: static PetscErrorCode PCSetUp_SPAI(PC pc)
61: {
62: PC_SPAI *ispai = (PC_SPAI *)pc->data;
63: Mat AT;
65: PetscFunctionBegin;
66: init_SPAI();
68: if (ispai->sp) {
69: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
70: } else {
71: /* Use the transpose to get the column nonzero structure. */
72: PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
73: PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
74: PetscCall(MatDestroy(&AT));
75: }
77: /* Destroy the transpose */
78: /* Don't know how to do it. PETSc developers? */
80: /* construct SPAI preconditioner */
81: /* FILE *messages */ /* file for warning messages */
82: /* double epsilon */ /* tolerance */
83: /* int nbsteps */ /* max number of "improvement" steps per line */
84: /* int max */ /* max dimensions of is_I, q, etc. */
85: /* int maxnew */ /* max number of new entries per step */
86: /* int block_size */ /* block_size == 1 specifies scalar elements
87: block_size == n specifies nxn constant-block elements
88: block_size == 0 specifies variable-block elements */
89: /* int cache_size */ /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
90: /* int verbose */ /* verbose == 0 specifies that SPAI is silent
91: verbose == 1 prints timing and matrix statistics */
93: PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);
95: PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));
97: /* free the SPAI matrices */
98: sp_free_matrix(ispai->B);
99: sp_free_matrix(ispai->M);
100: PetscFunctionReturn(PETSC_SUCCESS);
101: }
103: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
104: {
105: PC_SPAI *ispai = (PC_SPAI *)pc->data;
107: PetscFunctionBegin;
108: /* Now using PETSc's multiply */
109: PetscCall(MatMult(ispai->PM, xx, y));
110: PetscFunctionReturn(PETSC_SUCCESS);
111: }
113: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
114: {
115: PC_SPAI *ispai = (PC_SPAI *)pc->data;
117: PetscFunctionBegin;
118: /* Now using PETSc's multiply */
119: PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
123: static PetscErrorCode PCDestroy_SPAI(PC pc)
124: {
125: PC_SPAI *ispai = (PC_SPAI *)pc->data;
127: PetscFunctionBegin;
128: PetscCall(MatDestroy(&ispai->PM));
129: PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
130: PetscCall(PetscFree(pc->data));
131: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
132: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
133: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
134: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
135: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
136: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
137: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
138: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
139: PetscFunctionReturn(PETSC_SUCCESS);
140: }
142: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
143: {
144: PC_SPAI *ispai = (PC_SPAI *)pc->data;
145: PetscBool iascii;
147: PetscFunctionBegin;
148: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
149: if (iascii) {
150: PetscCall(PetscViewerASCIIPrintf(viewer, " epsilon %g\n", ispai->epsilon));
151: PetscCall(PetscViewerASCIIPrintf(viewer, " nbsteps %d\n", ispai->nbsteps));
152: PetscCall(PetscViewerASCIIPrintf(viewer, " max %d\n", ispai->max));
153: PetscCall(PetscViewerASCIIPrintf(viewer, " maxnew %d\n", ispai->maxnew));
154: PetscCall(PetscViewerASCIIPrintf(viewer, " block_size %d\n", ispai->block_size));
155: PetscCall(PetscViewerASCIIPrintf(viewer, " cache_size %d\n", ispai->cache_size));
156: PetscCall(PetscViewerASCIIPrintf(viewer, " verbose %d\n", ispai->verbose));
157: PetscCall(PetscViewerASCIIPrintf(viewer, " sp %d\n", ispai->sp));
158: }
159: PetscFunctionReturn(PETSC_SUCCESS);
160: }
162: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
163: {
164: PC_SPAI *ispai = (PC_SPAI *)pc->data;
166: PetscFunctionBegin;
167: ispai->epsilon = (double)epsilon1;
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }
171: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
172: {
173: PC_SPAI *ispai = (PC_SPAI *)pc->data;
175: PetscFunctionBegin;
176: ispai->nbsteps = (int)nbsteps1;
177: PetscFunctionReturn(PETSC_SUCCESS);
178: }
180: /* added 1/7/99 g.h. */
181: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
182: {
183: PC_SPAI *ispai = (PC_SPAI *)pc->data;
185: PetscFunctionBegin;
186: ispai->max = (int)max1;
187: PetscFunctionReturn(PETSC_SUCCESS);
188: }
190: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
191: {
192: PC_SPAI *ispai = (PC_SPAI *)pc->data;
194: PetscFunctionBegin;
195: ispai->maxnew = (int)maxnew1;
196: PetscFunctionReturn(PETSC_SUCCESS);
197: }
199: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
200: {
201: PC_SPAI *ispai = (PC_SPAI *)pc->data;
203: PetscFunctionBegin;
204: ispai->block_size = (int)block_size1;
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
209: {
210: PC_SPAI *ispai = (PC_SPAI *)pc->data;
212: PetscFunctionBegin;
213: ispai->cache_size = (int)cache_size;
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
218: {
219: PC_SPAI *ispai = (PC_SPAI *)pc->data;
221: PetscFunctionBegin;
222: ispai->verbose = (int)verbose;
223: PetscFunctionReturn(PETSC_SUCCESS);
224: }
226: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
227: {
228: PC_SPAI *ispai = (PC_SPAI *)pc->data;
230: PetscFunctionBegin;
231: ispai->sp = (int)sp;
232: PetscFunctionReturn(PETSC_SUCCESS);
233: }
235: /*@
236: PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner
238: Input Parameters:
239: + pc - the preconditioner
240: - eps - epsilon (default .4)
242: Note:
243: Espilon must be between 0 and 1. It controls the
244: quality of the approximation of M to the inverse of
245: A. Higher values of epsilon lead to more work, more
246: fill, and usually better preconditioners. In many
247: cases the best choice of epsilon is the one that
248: divides the total solution time equally between the
249: preconditioner and the solver.
251: Level: intermediate
253: .seealso: `PCSPAI`, `PCSetType()`
254: @*/
255: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
256: {
257: PetscFunctionBegin;
258: PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
259: PetscFunctionReturn(PETSC_SUCCESS);
260: }
262: /*@
263: PCSPAISetNBSteps - set maximum number of improvement steps per row in
264: the `PCSPAI` preconditioner
266: Input Parameters:
267: + pc - the preconditioner
268: - n - number of steps (default 5)
270: Note:
271: `PCSPAI` constructs to approximation to every column of
272: the exact inverse of A in a series of improvement
273: steps. The quality of the approximation is determined
274: by epsilon. If an approximation achieving an accuracy
275: of epsilon is not obtained after ns steps, SPAI simply
276: uses the best approximation constructed so far.
278: Level: intermediate
280: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
281: @*/
282: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
283: {
284: PetscFunctionBegin;
285: PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
286: PetscFunctionReturn(PETSC_SUCCESS);
287: }
289: /* added 1/7/99 g.h. */
290: /*@
291: PCSPAISetMax - set the size of various working buffers in
292: the `PCSPAI` preconditioner
294: Input Parameters:
295: + pc - the preconditioner
296: - n - size (default is 5000)
298: Level: intermediate
300: .seealso: `PCSPAI`, `PCSetType()`
301: @*/
302: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
303: {
304: PetscFunctionBegin;
305: PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
306: PetscFunctionReturn(PETSC_SUCCESS);
307: }
309: /*@
310: PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
311: in `PCSPAI` preconditioner
313: Input Parameters:
314: + pc - the preconditioner
315: - n - maximum number (default 5)
317: Level: intermediate
319: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
320: @*/
321: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
322: {
323: PetscFunctionBegin;
324: PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: /*@
329: PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner
331: Input Parameters:
332: + pc - the preconditioner
333: - n - block size (default 1)
335: Notes:
336: A block
337: size of 1 treats A as a matrix of scalar elements. A
338: block size of s > 1 treats A as a matrix of sxs
339: blocks. A block size of 0 treats A as a matrix with
340: variable sized blocks, which are determined by
341: searching for dense square diagonal blocks in A.
342: This can be very effective for finite-element
343: matrices.
345: SPAI will convert A to block form, use a block
346: version of the preconditioner algorithm, and then
347: convert the result back to scalar form.
349: In many cases the a block-size parameter other than 1
350: can lead to very significant improvement in
351: performance.
353: Level: intermediate
355: .seealso: `PCSPAI`, `PCSetType()`
356: @*/
357: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
358: {
359: PetscFunctionBegin;
360: PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
361: PetscFunctionReturn(PETSC_SUCCESS);
362: }
364: /*@
365: PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner
367: Input Parameters:
368: + pc - the preconditioner
369: - n - cache size {0,1,2,3,4,5} (default 5)
371: Note:
372: `PCSPAI` uses a hash table to cache messages and avoid
373: redundant communication. If suggest always using
374: 5. This parameter is irrelevant in the serial
375: version.
377: Level: intermediate
379: .seealso: `PCSPAI`, `PCSetType()`
380: @*/
381: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
382: {
383: PetscFunctionBegin;
384: PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
385: PetscFunctionReturn(PETSC_SUCCESS);
386: }
388: /*@
389: PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner
391: Input Parameters:
392: + pc - the preconditioner
393: - n - level (default 1)
395: Note:
396: print parameters, timings and matrix statistics
398: Level: intermediate
400: .seealso: `PCSPAI`, `PCSetType()`
401: @*/
402: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
403: {
404: PetscFunctionBegin;
405: PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
406: PetscFunctionReturn(PETSC_SUCCESS);
407: }
409: /*@
410: PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner
412: Input Parameters:
413: + pc - the preconditioner
414: - n - 0 or 1
416: Note:
417: If A has a symmetric nonzero pattern use -sp 1 to
418: improve performance by eliminating some communication
419: in the parallel version. Even if A does not have a
420: symmetric nonzero pattern -sp 1 may well lead to good
421: results, but the code will not follow the published
422: SPAI algorithm exactly.
424: Level: intermediate
426: .seealso: `PCSPAI`, `PCSetType()`
427: @*/
428: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
429: {
430: PetscFunctionBegin;
431: PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
432: PetscFunctionReturn(PETSC_SUCCESS);
433: }
435: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
436: {
437: PC_SPAI *ispai = (PC_SPAI *)pc->data;
438: int nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
439: double epsilon1;
440: PetscBool flg;
442: PetscFunctionBegin;
443: PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
444: PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
445: if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
446: PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
447: if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
448: /* added 1/7/99 g.h. */
449: PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
450: if (flg) PetscCall(PCSPAISetMax(pc, max1));
451: PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
452: if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
453: PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
454: if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
455: PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
456: if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
457: PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
458: if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
459: PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
460: if (flg) PetscCall(PCSPAISetSp(pc, sp));
461: PetscOptionsHeadEnd();
462: PetscFunctionReturn(PETSC_SUCCESS);
463: }
465: /*MC
466: PCSPAI - Use the Sparse Approximate Inverse method
468: Options Database Keys:
469: + -pc_spai_epsilon <eps> - set tolerance
470: . -pc_spai_nbstep <n> - set nbsteps
471: . -pc_spai_max <m> - set max
472: . -pc_spai_max_new <m> - set maxnew
473: . -pc_spai_block_size <n> - set block size
474: . -pc_spai_cache_size <n> - set cache size
475: . -pc_spai_sp <m> - set sp
476: - -pc_spai_set_verbose <true,false> - verbose output
478: Level: beginner
480: Note:
481: This only works with `MATAIJ` matrices.
483: References:
484: . * - Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)
486: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
487: `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
488: `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
489: M*/
491: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
492: {
493: PC_SPAI *ispai;
495: PetscFunctionBegin;
496: PetscCall(PetscNew(&ispai));
497: pc->data = ispai;
499: pc->ops->destroy = PCDestroy_SPAI;
500: pc->ops->apply = PCApply_SPAI;
501: pc->ops->matapply = PCMatApply_SPAI;
502: pc->ops->applyrichardson = 0;
503: pc->ops->setup = PCSetUp_SPAI;
504: pc->ops->view = PCView_SPAI;
505: pc->ops->setfromoptions = PCSetFromOptions_SPAI;
507: ispai->epsilon = .4;
508: ispai->nbsteps = 5;
509: ispai->max = 5000;
510: ispai->maxnew = 5;
511: ispai->block_size = 1;
512: ispai->cache_size = 5;
513: ispai->verbose = 0;
515: ispai->sp = 1;
516: PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));
518: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
519: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
520: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
521: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
522: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
523: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
524: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
525: PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
526: PetscFunctionReturn(PETSC_SUCCESS);
527: }
529: /*
530: Converts from a PETSc matrix to an SPAI matrix
531: */
532: PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
533: {
534: matrix *M;
535: int i, j, col;
536: int row_indx;
537: int len, pe, local_indx, start_indx;
538: int *mapping;
539: const int *cols;
540: const double *vals;
541: int n, mnl, nnl, nz, rstart, rend;
542: PetscMPIInt size, rank;
543: struct compressed_lines *rows;
545: PetscFunctionBegin;
546: PetscCallMPI(MPI_Comm_size(comm, &size));
547: PetscCallMPI(MPI_Comm_rank(comm, &rank));
548: PetscCall(MatGetSize(A, &n, &n));
549: PetscCall(MatGetLocalSize(A, &mnl, &nnl));
551: /*
552: not sure why a barrier is required. commenting out
553: PetscCallMPI(MPI_Barrier(comm));
554: */
556: M = new_matrix((SPAI_Comm)comm);
558: M->n = n;
559: M->bs = 1;
560: M->max_block_size = 1;
562: M->mnls = (int *)malloc(sizeof(int) * size);
563: M->start_indices = (int *)malloc(sizeof(int) * size);
564: M->pe = (int *)malloc(sizeof(int) * n);
565: M->block_sizes = (int *)malloc(sizeof(int) * n);
566: for (i = 0; i < n; i++) M->block_sizes[i] = 1;
568: PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));
570: M->start_indices[0] = 0;
571: for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];
573: M->mnl = M->mnls[M->myid];
574: M->my_start_index = M->start_indices[M->myid];
576: for (i = 0; i < size; i++) {
577: start_indx = M->start_indices[i];
578: for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
579: }
581: if (AT) {
582: M->lines = new_compressed_lines(M->mnls[rank], 1);
583: } else {
584: M->lines = new_compressed_lines(M->mnls[rank], 0);
585: }
587: rows = M->lines;
589: /* Determine the mapping from global indices to pointers */
590: PetscCall(PetscMalloc1(M->n, &mapping));
591: pe = 0;
592: local_indx = 0;
593: for (i = 0; i < M->n; i++) {
594: if (local_indx >= M->mnls[pe]) {
595: pe++;
596: local_indx = 0;
597: }
598: mapping[i] = local_indx + M->start_indices[pe];
599: local_indx++;
600: }
602: /************** Set up the row structure *****************/
604: PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
605: for (i = rstart; i < rend; i++) {
606: row_indx = i - rstart;
607: PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
608: /* allocate buffers */
609: rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
610: rows->A[row_indx] = (double *)malloc(nz * sizeof(double));
611: /* copy the matrix */
612: for (j = 0; j < nz; j++) {
613: col = cols[j];
614: len = rows->len[row_indx]++;
616: rows->ptrs[row_indx][len] = mapping[col];
617: rows->A[row_indx][len] = vals[j];
618: }
619: rows->slen[row_indx] = rows->len[row_indx];
621: PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
622: }
624: /************** Set up the column structure *****************/
626: if (AT) {
627: for (i = rstart; i < rend; i++) {
628: row_indx = i - rstart;
629: PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
630: /* allocate buffers */
631: rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
632: /* copy the matrix (i.e., the structure) */
633: for (j = 0; j < nz; j++) {
634: col = cols[j];
635: len = rows->rlen[row_indx]++;
637: rows->rptrs[row_indx][len] = mapping[col];
638: }
639: PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
640: }
641: }
643: PetscCall(PetscFree(mapping));
645: order_pointers(M);
646: M->maxnz = calc_maxnz(M);
647: *B = M;
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
651: /*
652: Converts from an SPAI matrix B to a PETSc matrix PB.
653: This assumes that the SPAI matrix B is stored in
654: COMPRESSED-ROW format.
655: */
656: PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
657: {
658: PetscMPIInt size, rank;
659: int m, n, M, N;
660: int d_nz, o_nz;
661: int *d_nnz, *o_nnz;
662: int i, k, global_row, global_col, first_diag_col, last_diag_col;
663: PetscScalar val;
665: PetscFunctionBegin;
666: PetscCallMPI(MPI_Comm_size(comm, &size));
667: PetscCallMPI(MPI_Comm_rank(comm, &rank));
669: m = n = B->mnls[rank];
670: d_nz = o_nz = 0;
672: /* Determine preallocation for MatCreateAIJ */
673: PetscCall(PetscMalloc1(m, &d_nnz));
674: PetscCall(PetscMalloc1(m, &o_nnz));
675: for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
676: first_diag_col = B->start_indices[rank];
677: last_diag_col = first_diag_col + B->mnls[rank];
678: for (i = 0; i < B->mnls[rank]; i++) {
679: for (k = 0; k < B->lines->len[i]; k++) {
680: global_col = B->lines->ptrs[i][k];
681: if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
682: else o_nnz[i]++;
683: }
684: }
686: M = N = B->n;
687: /* Here we only know how to create AIJ format */
688: PetscCall(MatCreate(comm, PB));
689: PetscCall(MatSetSizes(*PB, m, n, M, N));
690: PetscCall(MatSetType(*PB, MATAIJ));
691: PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
692: PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));
694: for (i = 0; i < B->mnls[rank]; i++) {
695: global_row = B->start_indices[rank] + i;
696: for (k = 0; k < B->lines->len[i]; k++) {
697: global_col = B->lines->ptrs[i][k];
699: val = B->lines->A[i][k];
700: PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
701: }
702: }
704: PetscCall(PetscFree(d_nnz));
705: PetscCall(PetscFree(o_nnz));
707: PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
708: PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
709: PetscFunctionReturn(PETSC_SUCCESS);
710: }
712: /*
713: Converts from an SPAI vector v to a PETSc vec Pv.
714: */
715: PetscErrorCode ConvertVectorToVec(MPI_Comm comm, vector *v, Vec *Pv)
716: {
717: PetscMPIInt size, rank;
718: int m, M, i, *mnls, *start_indices, *global_indices;
720: PetscFunctionBegin;
721: PetscCallMPI(MPI_Comm_size(comm, &size));
722: PetscCallMPI(MPI_Comm_rank(comm, &rank));
724: m = v->mnl;
725: M = v->n;
727: PetscCall(VecCreateMPI(comm, m, M, Pv));
729: PetscCall(PetscMalloc1(size, &mnls));
730: PetscCallMPI(MPI_Allgather(&v->mnl, 1, MPI_INT, mnls, 1, MPI_INT, comm));
732: PetscCall(PetscMalloc1(size, &start_indices));
734: start_indices[0] = 0;
735: for (i = 1; i < size; i++) start_indices[i] = start_indices[i - 1] + mnls[i - 1];
737: PetscCall(PetscMalloc1(v->mnl, &global_indices));
738: for (i = 0; i < v->mnl; i++) global_indices[i] = start_indices[rank] + i;
740: PetscCall(PetscFree(mnls));
741: PetscCall(PetscFree(start_indices));
743: PetscCall(VecSetValues(*Pv, v->mnl, global_indices, v->v, INSERT_VALUES));
744: PetscCall(VecAssemblyBegin(*Pv));
745: PetscCall(VecAssemblyEnd(*Pv));
747: PetscCall(PetscFree(global_indices));
748: PetscFunctionReturn(PETSC_SUCCESS);
749: }