Actual source code: mpisbaij.c
1: #include <../src/mat/impls/baij/mpi/mpibaij.h>
2: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
4: #include <petscblaslapack.h>
6: PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
7: {
8: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
10: PetscFunctionBegin;
11: #if defined(PETSC_USE_LOG)
12: PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N));
13: #endif
14: PetscCall(MatStashDestroy_Private(&mat->stash));
15: PetscCall(MatStashDestroy_Private(&mat->bstash));
16: PetscCall(MatDestroy(&baij->A));
17: PetscCall(MatDestroy(&baij->B));
18: #if defined(PETSC_USE_CTABLE)
19: PetscCall(PetscHMapIDestroy(&baij->colmap));
20: #else
21: PetscCall(PetscFree(baij->colmap));
22: #endif
23: PetscCall(PetscFree(baij->garray));
24: PetscCall(VecDestroy(&baij->lvec));
25: PetscCall(VecScatterDestroy(&baij->Mvctx));
26: PetscCall(VecDestroy(&baij->slvec0));
27: PetscCall(VecDestroy(&baij->slvec0b));
28: PetscCall(VecDestroy(&baij->slvec1));
29: PetscCall(VecDestroy(&baij->slvec1a));
30: PetscCall(VecDestroy(&baij->slvec1b));
31: PetscCall(VecScatterDestroy(&baij->sMvctx));
32: PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
33: PetscCall(PetscFree(baij->barray));
34: PetscCall(PetscFree(baij->hd));
35: PetscCall(VecDestroy(&baij->diag));
36: PetscCall(VecDestroy(&baij->bb1));
37: PetscCall(VecDestroy(&baij->xx1));
38: #if defined(PETSC_USE_REAL_MAT_SINGLE)
39: PetscCall(PetscFree(baij->setvaluescopy));
40: #endif
41: PetscCall(PetscFree(baij->in_loc));
42: PetscCall(PetscFree(baij->v_loc));
43: PetscCall(PetscFree(baij->rangebs));
44: PetscCall(PetscFree(mat->data));
46: PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL));
47: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL));
48: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL));
49: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocation_C", NULL));
50: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPISBAIJSetPreallocationCSR_C", NULL));
51: #if defined(PETSC_HAVE_ELEMENTAL)
52: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_elemental_C", NULL));
53: #endif
54: #if defined(PETSC_HAVE_SCALAPACK)
55: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_scalapack_C", NULL));
56: #endif
57: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpiaij_C", NULL));
58: PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpisbaij_mpibaij_C", NULL));
59: PetscFunctionReturn(PETSC_SUCCESS);
60: }
62: /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), MatAssemblyEnd_MPI_Hash(), MatSetUp_MPI_Hash() */
63: #define TYPE SBAIJ
64: #define TYPE_SBAIJ
65: #include "../src/mat/impls/aij/mpi/mpihashmat.h"
66: #undef TYPE
67: #undef TYPE_SBAIJ
69: #if defined(PETSC_HAVE_ELEMENTAL)
70: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Elemental(Mat, MatType, MatReuse, Mat *);
71: #endif
72: #if defined(PETSC_HAVE_SCALAPACK)
73: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *);
74: #endif
76: /* This could be moved to matimpl.h */
77: static PetscErrorCode MatPreallocateWithMats_Private(Mat B, PetscInt nm, Mat X[], PetscBool symm[], PetscBool fill)
78: {
79: Mat preallocator;
80: PetscInt r, rstart, rend;
81: PetscInt bs, i, m, n, M, N;
82: PetscBool cong = PETSC_TRUE;
84: PetscFunctionBegin;
87: for (i = 0; i < nm; i++) {
89: PetscCall(PetscLayoutCompare(B->rmap, X[i]->rmap, &cong));
90: PetscCheck(cong, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for different layouts");
91: }
93: PetscCall(MatGetBlockSize(B, &bs));
94: PetscCall(MatGetSize(B, &M, &N));
95: PetscCall(MatGetLocalSize(B, &m, &n));
96: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &preallocator));
97: PetscCall(MatSetType(preallocator, MATPREALLOCATOR));
98: PetscCall(MatSetBlockSize(preallocator, bs));
99: PetscCall(MatSetSizes(preallocator, m, n, M, N));
100: PetscCall(MatSetUp(preallocator));
101: PetscCall(MatGetOwnershipRange(preallocator, &rstart, &rend));
102: for (r = rstart; r < rend; ++r) {
103: PetscInt ncols;
104: const PetscInt *row;
105: const PetscScalar *vals;
107: for (i = 0; i < nm; i++) {
108: PetscCall(MatGetRow(X[i], r, &ncols, &row, &vals));
109: PetscCall(MatSetValues(preallocator, 1, &r, ncols, row, vals, INSERT_VALUES));
110: if (symm && symm[i]) PetscCall(MatSetValues(preallocator, ncols, row, 1, &r, vals, INSERT_VALUES));
111: PetscCall(MatRestoreRow(X[i], r, &ncols, &row, &vals));
112: }
113: }
114: PetscCall(MatAssemblyBegin(preallocator, MAT_FINAL_ASSEMBLY));
115: PetscCall(MatAssemblyEnd(preallocator, MAT_FINAL_ASSEMBLY));
116: PetscCall(MatPreallocatorPreallocate(preallocator, fill, B));
117: PetscCall(MatDestroy(&preallocator));
118: PetscFunctionReturn(PETSC_SUCCESS);
119: }
121: PETSC_INTERN PetscErrorCode MatConvert_MPISBAIJ_Basic(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
122: {
123: Mat B;
124: PetscInt r;
126: PetscFunctionBegin;
127: if (reuse != MAT_REUSE_MATRIX) {
128: PetscBool symm = PETSC_TRUE, isdense;
129: PetscInt bs;
131: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
132: PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N));
133: PetscCall(MatSetType(B, newtype));
134: PetscCall(MatGetBlockSize(A, &bs));
135: PetscCall(MatSetBlockSize(B, bs));
136: PetscCall(PetscLayoutSetUp(B->rmap));
137: PetscCall(PetscLayoutSetUp(B->cmap));
138: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isdense, MATSEQDENSE, MATMPIDENSE, MATSEQDENSECUDA, ""));
139: if (!isdense) {
140: PetscCall(MatGetRowUpperTriangular(A));
141: PetscCall(MatPreallocateWithMats_Private(B, 1, &A, &symm, PETSC_TRUE));
142: PetscCall(MatRestoreRowUpperTriangular(A));
143: } else {
144: PetscCall(MatSetUp(B));
145: }
146: } else {
147: B = *newmat;
148: PetscCall(MatZeroEntries(B));
149: }
151: PetscCall(MatGetRowUpperTriangular(A));
152: for (r = A->rmap->rstart; r < A->rmap->rend; r++) {
153: PetscInt ncols;
154: const PetscInt *row;
155: const PetscScalar *vals;
157: PetscCall(MatGetRow(A, r, &ncols, &row, &vals));
158: PetscCall(MatSetValues(B, 1, &r, ncols, row, vals, INSERT_VALUES));
159: #if defined(PETSC_USE_COMPLEX)
160: if (A->hermitian == PETSC_BOOL3_TRUE) {
161: PetscInt i;
162: for (i = 0; i < ncols; i++) PetscCall(MatSetValue(B, row[i], r, PetscConj(vals[i]), INSERT_VALUES));
163: } else {
164: PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
165: }
166: #else
167: PetscCall(MatSetValues(B, ncols, row, 1, &r, vals, INSERT_VALUES));
168: #endif
169: PetscCall(MatRestoreRow(A, r, &ncols, &row, &vals));
170: }
171: PetscCall(MatRestoreRowUpperTriangular(A));
172: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
173: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
175: if (reuse == MAT_INPLACE_MATRIX) {
176: PetscCall(MatHeaderReplace(A, &B));
177: } else {
178: *newmat = B;
179: }
180: PetscFunctionReturn(PETSC_SUCCESS);
181: }
183: PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
184: {
185: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
187: PetscFunctionBegin;
188: PetscCall(MatStoreValues(aij->A));
189: PetscCall(MatStoreValues(aij->B));
190: PetscFunctionReturn(PETSC_SUCCESS);
191: }
193: PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
194: {
195: Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
197: PetscFunctionBegin;
198: PetscCall(MatRetrieveValues(aij->A));
199: PetscCall(MatRetrieveValues(aij->B));
200: PetscFunctionReturn(PETSC_SUCCESS);
201: }
203: #define MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, orow, ocol) \
204: { \
205: brow = row / bs; \
206: rp = aj + ai[brow]; \
207: ap = aa + bs2 * ai[brow]; \
208: rmax = aimax[brow]; \
209: nrow = ailen[brow]; \
210: bcol = col / bs; \
211: ridx = row % bs; \
212: cidx = col % bs; \
213: low = 0; \
214: high = nrow; \
215: while (high - low > 3) { \
216: t = (low + high) / 2; \
217: if (rp[t] > bcol) high = t; \
218: else low = t; \
219: } \
220: for (_i = low; _i < high; _i++) { \
221: if (rp[_i] > bcol) break; \
222: if (rp[_i] == bcol) { \
223: bap = ap + bs2 * _i + bs * cidx + ridx; \
224: if (addv == ADD_VALUES) *bap += value; \
225: else *bap = value; \
226: goto a_noinsert; \
227: } \
228: } \
229: if (a->nonew == 1) goto a_noinsert; \
230: PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
231: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \
232: N = nrow++ - 1; \
233: /* shift up all the later entries in this row */ \
234: PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
235: PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
236: PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
237: rp[_i] = bcol; \
238: ap[bs2 * _i + bs * cidx + ridx] = value; \
239: A->nonzerostate++; \
240: a_noinsert:; \
241: ailen[brow] = nrow; \
242: }
244: #define MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, orow, ocol) \
245: { \
246: brow = row / bs; \
247: rp = bj + bi[brow]; \
248: ap = ba + bs2 * bi[brow]; \
249: rmax = bimax[brow]; \
250: nrow = bilen[brow]; \
251: bcol = col / bs; \
252: ridx = row % bs; \
253: cidx = col % bs; \
254: low = 0; \
255: high = nrow; \
256: while (high - low > 3) { \
257: t = (low + high) / 2; \
258: if (rp[t] > bcol) high = t; \
259: else low = t; \
260: } \
261: for (_i = low; _i < high; _i++) { \
262: if (rp[_i] > bcol) break; \
263: if (rp[_i] == bcol) { \
264: bap = ap + bs2 * _i + bs * cidx + ridx; \
265: if (addv == ADD_VALUES) *bap += value; \
266: else *bap = value; \
267: goto b_noinsert; \
268: } \
269: } \
270: if (b->nonew == 1) goto b_noinsert; \
271: PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \
272: MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \
273: N = nrow++ - 1; \
274: /* shift up all the later entries in this row */ \
275: PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \
276: PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \
277: PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \
278: rp[_i] = bcol; \
279: ap[bs2 * _i + bs * cidx + ridx] = value; \
280: B->nonzerostate++; \
281: b_noinsert:; \
282: bilen[brow] = nrow; \
283: }
285: /* Only add/insert a(i,j) with i<=j (blocks).
286: Any a(i,j) with i>j input by user is ignored or generates an error
287: */
288: PetscErrorCode MatSetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv)
289: {
290: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
291: MatScalar value;
292: PetscBool roworiented = baij->roworiented;
293: PetscInt i, j, row, col;
294: PetscInt rstart_orig = mat->rmap->rstart;
295: PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart;
296: PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs;
298: /* Some Variables required in the macro */
299: Mat A = baij->A;
300: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)(A)->data;
301: PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j;
302: MatScalar *aa = a->a;
304: Mat B = baij->B;
305: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data;
306: PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j;
307: MatScalar *ba = b->a;
309: PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol;
310: PetscInt low, high, t, ridx, cidx, bs2 = a->bs2;
311: MatScalar *ap, *bap;
313: /* for stash */
314: PetscInt n_loc, *in_loc = NULL;
315: MatScalar *v_loc = NULL;
317: PetscFunctionBegin;
318: if (!baij->donotstash) {
319: if (n > baij->n_loc) {
320: PetscCall(PetscFree(baij->in_loc));
321: PetscCall(PetscFree(baij->v_loc));
322: PetscCall(PetscMalloc1(n, &baij->in_loc));
323: PetscCall(PetscMalloc1(n, &baij->v_loc));
325: baij->n_loc = n;
326: }
327: in_loc = baij->in_loc;
328: v_loc = baij->v_loc;
329: }
331: for (i = 0; i < m; i++) {
332: if (im[i] < 0) continue;
333: PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1);
334: if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
335: row = im[i] - rstart_orig; /* local row index */
336: for (j = 0; j < n; j++) {
337: if (im[i] / bs > in[j] / bs) {
338: if (a->ignore_ltriangular) {
339: continue; /* ignore lower triangular blocks */
340: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
341: }
342: if (in[j] >= cstart_orig && in[j] < cend_orig) { /* diag entry (A) */
343: col = in[j] - cstart_orig; /* local col index */
344: brow = row / bs;
345: bcol = col / bs;
346: if (brow > bcol) continue; /* ignore lower triangular blocks of A */
347: if (roworiented) value = v[i * n + j];
348: else value = v[i + j * m];
349: MatSetValues_SeqSBAIJ_A_Private(row, col, value, addv, im[i], in[j]);
350: /* PetscCall(MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv)); */
351: } else if (in[j] < 0) {
352: continue;
353: } else {
354: PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1);
355: /* off-diag entry (B) */
356: if (mat->was_assembled) {
357: if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
358: #if defined(PETSC_USE_CTABLE)
359: PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col));
360: col = col - 1;
361: #else
362: col = baij->colmap[in[j] / bs] - 1;
363: #endif
364: if (col < 0 && !((Mat_SeqSBAIJ *)(baij->A->data))->nonew) {
365: PetscCall(MatDisAssemble_MPISBAIJ(mat));
366: col = in[j];
367: /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
368: B = baij->B;
369: b = (Mat_SeqBAIJ *)(B)->data;
370: bimax = b->imax;
371: bi = b->i;
372: bilen = b->ilen;
373: bj = b->j;
374: ba = b->a;
375: } else col += in[j] % bs;
376: } else col = in[j];
377: if (roworiented) value = v[i * n + j];
378: else value = v[i + j * m];
379: MatSetValues_SeqSBAIJ_B_Private(row, col, value, addv, im[i], in[j]);
380: /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */
381: }
382: }
383: } else { /* off processor entry */
384: PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
385: if (!baij->donotstash) {
386: mat->assembled = PETSC_FALSE;
387: n_loc = 0;
388: for (j = 0; j < n; j++) {
389: if (im[i] / bs > in[j] / bs) continue; /* ignore lower triangular blocks */
390: in_loc[n_loc] = in[j];
391: if (roworiented) {
392: v_loc[n_loc] = v[i * n + j];
393: } else {
394: v_loc[n_loc] = v[j * m + i];
395: }
396: n_loc++;
397: }
398: PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n_loc, in_loc, v_loc, PETSC_FALSE));
399: }
400: }
401: }
402: PetscFunctionReturn(PETSC_SUCCESS);
403: }
405: static inline PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
406: {
407: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)A->data;
408: PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
409: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
410: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
411: PetscBool roworiented = a->roworiented;
412: const PetscScalar *value = v;
413: MatScalar *ap, *aa = a->a, *bap;
415: PetscFunctionBegin;
416: if (col < row) {
417: if (a->ignore_ltriangular) PetscFunctionReturn(PETSC_SUCCESS); /* ignore lower triangular block */
418: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
419: }
420: rp = aj + ai[row];
421: ap = aa + bs2 * ai[row];
422: rmax = imax[row];
423: nrow = ailen[row];
424: value = v;
425: low = 0;
426: high = nrow;
428: while (high - low > 7) {
429: t = (low + high) / 2;
430: if (rp[t] > col) high = t;
431: else low = t;
432: }
433: for (i = low; i < high; i++) {
434: if (rp[i] > col) break;
435: if (rp[i] == col) {
436: bap = ap + bs2 * i;
437: if (roworiented) {
438: if (is == ADD_VALUES) {
439: for (ii = 0; ii < bs; ii++) {
440: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
441: }
442: } else {
443: for (ii = 0; ii < bs; ii++) {
444: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
445: }
446: }
447: } else {
448: if (is == ADD_VALUES) {
449: for (ii = 0; ii < bs; ii++) {
450: for (jj = 0; jj < bs; jj++) *bap++ += *value++;
451: }
452: } else {
453: for (ii = 0; ii < bs; ii++) {
454: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
455: }
456: }
457: }
458: goto noinsert2;
459: }
460: }
461: if (nonew == 1) goto noinsert2;
462: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new block index nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
463: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
464: N = nrow++ - 1;
465: high++;
466: /* shift up all the later entries in this row */
467: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
468: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
469: rp[i] = col;
470: bap = ap + bs2 * i;
471: if (roworiented) {
472: for (ii = 0; ii < bs; ii++) {
473: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
474: }
475: } else {
476: for (ii = 0; ii < bs; ii++) {
477: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
478: }
479: }
480: noinsert2:;
481: ailen[row] = nrow;
482: PetscFunctionReturn(PETSC_SUCCESS);
483: }
485: /*
486: This routine is exactly duplicated in mpibaij.c
487: */
488: static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol)
489: {
490: Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data;
491: PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N;
492: PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen;
493: PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs;
494: PetscBool roworiented = a->roworiented;
495: const PetscScalar *value = v;
496: MatScalar *ap, *aa = a->a, *bap;
498: PetscFunctionBegin;
499: rp = aj + ai[row];
500: ap = aa + bs2 * ai[row];
501: rmax = imax[row];
502: nrow = ailen[row];
503: low = 0;
504: high = nrow;
505: value = v;
506: while (high - low > 7) {
507: t = (low + high) / 2;
508: if (rp[t] > col) high = t;
509: else low = t;
510: }
511: for (i = low; i < high; i++) {
512: if (rp[i] > col) break;
513: if (rp[i] == col) {
514: bap = ap + bs2 * i;
515: if (roworiented) {
516: if (is == ADD_VALUES) {
517: for (ii = 0; ii < bs; ii++) {
518: for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++;
519: }
520: } else {
521: for (ii = 0; ii < bs; ii++) {
522: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
523: }
524: }
525: } else {
526: if (is == ADD_VALUES) {
527: for (ii = 0; ii < bs; ii++, value += bs) {
528: for (jj = 0; jj < bs; jj++) bap[jj] += value[jj];
529: bap += bs;
530: }
531: } else {
532: for (ii = 0; ii < bs; ii++, value += bs) {
533: for (jj = 0; jj < bs; jj++) bap[jj] = value[jj];
534: bap += bs;
535: }
536: }
537: }
538: goto noinsert2;
539: }
540: }
541: if (nonew == 1) goto noinsert2;
542: PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol);
543: MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar);
544: N = nrow++ - 1;
545: high++;
546: /* shift up all the later entries in this row */
547: PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1));
548: PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1)));
549: rp[i] = col;
550: bap = ap + bs2 * i;
551: if (roworiented) {
552: for (ii = 0; ii < bs; ii++) {
553: for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++;
554: }
555: } else {
556: for (ii = 0; ii < bs; ii++) {
557: for (jj = 0; jj < bs; jj++) *bap++ = *value++;
558: }
559: }
560: noinsert2:;
561: ailen[row] = nrow;
562: PetscFunctionReturn(PETSC_SUCCESS);
563: }
565: /*
566: This routine could be optimized by removing the need for the block copy below and passing stride information
567: to the above inline routines; similarly in MatSetValuesBlocked_MPIBAIJ()
568: */
569: PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const MatScalar v[], InsertMode addv)
570: {
571: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
572: const MatScalar *value;
573: MatScalar *barray = baij->barray;
574: PetscBool roworiented = baij->roworiented, ignore_ltriangular = ((Mat_SeqSBAIJ *)baij->A->data)->ignore_ltriangular;
575: PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs;
576: PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval;
577: PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2;
579: PetscFunctionBegin;
580: if (!barray) {
581: PetscCall(PetscMalloc1(bs2, &barray));
582: baij->barray = barray;
583: }
585: if (roworiented) {
586: stepval = (n - 1) * bs;
587: } else {
588: stepval = (m - 1) * bs;
589: }
590: for (i = 0; i < m; i++) {
591: if (im[i] < 0) continue;
592: PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1);
593: if (im[i] >= rstart && im[i] < rend) {
594: row = im[i] - rstart;
595: for (j = 0; j < n; j++) {
596: if (im[i] > in[j]) {
597: if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
598: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_USER, "Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
599: }
600: /* If NumCol = 1 then a copy is not required */
601: if ((roworiented) && (n == 1)) {
602: barray = (MatScalar *)v + i * bs2;
603: } else if ((!roworiented) && (m == 1)) {
604: barray = (MatScalar *)v + j * bs2;
605: } else { /* Here a copy is required */
606: if (roworiented) {
607: value = v + i * (stepval + bs) * bs + j * bs;
608: } else {
609: value = v + j * (stepval + bs) * bs + i * bs;
610: }
611: for (ii = 0; ii < bs; ii++, value += stepval) {
612: for (jj = 0; jj < bs; jj++) *barray++ = *value++;
613: }
614: barray -= bs2;
615: }
617: if (in[j] >= cstart && in[j] < cend) {
618: col = in[j] - cstart;
619: PetscCall(MatSetValuesBlocked_SeqSBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j]));
620: } else if (in[j] < 0) {
621: continue;
622: } else {
623: PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1);
624: if (mat->was_assembled) {
625: if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
627: #if defined(PETSC_USE_DEBUG)
628: #if defined(PETSC_USE_CTABLE)
629: {
630: PetscInt data;
631: PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data));
632: PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
633: }
634: #else
635: PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap");
636: #endif
637: #endif
638: #if defined(PETSC_USE_CTABLE)
639: PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col));
640: col = (col - 1) / bs;
641: #else
642: col = (baij->colmap[in[j]] - 1) / bs;
643: #endif
644: if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
645: PetscCall(MatDisAssemble_MPISBAIJ(mat));
646: col = in[j];
647: }
648: } else col = in[j];
649: PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j]));
650: }
651: }
652: } else {
653: PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]);
654: if (!baij->donotstash) {
655: if (roworiented) {
656: PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
657: } else {
658: PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i));
659: }
660: }
661: }
662: }
663: PetscFunctionReturn(PETSC_SUCCESS);
664: }
666: PetscErrorCode MatGetValues_MPISBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[])
667: {
668: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
669: PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend;
670: PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data;
672: PetscFunctionBegin;
673: for (i = 0; i < m; i++) {
674: if (idxm[i] < 0) continue; /* negative row */
675: PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1);
676: if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
677: row = idxm[i] - bsrstart;
678: for (j = 0; j < n; j++) {
679: if (idxn[j] < 0) continue; /* negative column */
680: PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1);
681: if (idxn[j] >= bscstart && idxn[j] < bscend) {
682: col = idxn[j] - bscstart;
683: PetscCall(MatGetValues_SeqSBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j));
684: } else {
685: if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
686: #if defined(PETSC_USE_CTABLE)
687: PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data));
688: data--;
689: #else
690: data = baij->colmap[idxn[j] / bs] - 1;
691: #endif
692: if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0;
693: else {
694: col = data + idxn[j] % bs;
695: PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j));
696: }
697: }
698: }
699: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported");
700: }
701: PetscFunctionReturn(PETSC_SUCCESS);
702: }
704: PetscErrorCode MatNorm_MPISBAIJ(Mat mat, NormType type, PetscReal *norm)
705: {
706: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
707: PetscReal sum[2], *lnorm2;
709: PetscFunctionBegin;
710: if (baij->size == 1) {
711: PetscCall(MatNorm(baij->A, type, norm));
712: } else {
713: if (type == NORM_FROBENIUS) {
714: PetscCall(PetscMalloc1(2, &lnorm2));
715: PetscCall(MatNorm(baij->A, type, lnorm2));
716: *lnorm2 = (*lnorm2) * (*lnorm2);
717: lnorm2++; /* squar power of norm(A) */
718: PetscCall(MatNorm(baij->B, type, lnorm2));
719: *lnorm2 = (*lnorm2) * (*lnorm2);
720: lnorm2--; /* squar power of norm(B) */
721: PetscCall(MPIU_Allreduce(lnorm2, sum, 2, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
722: *norm = PetscSqrtReal(sum[0] + 2 * sum[1]);
723: PetscCall(PetscFree(lnorm2));
724: } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
725: Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ *)baij->A->data;
726: Mat_SeqBAIJ *bmat = (Mat_SeqBAIJ *)baij->B->data;
727: PetscReal *rsum, *rsum2, vabs;
728: PetscInt *jj, *garray = baij->garray, rstart = baij->rstartbs, nz;
729: PetscInt brow, bcol, col, bs = baij->A->rmap->bs, row, grow, gcol, mbs = amat->mbs;
730: MatScalar *v;
732: PetscCall(PetscMalloc2(mat->cmap->N, &rsum, mat->cmap->N, &rsum2));
733: PetscCall(PetscArrayzero(rsum, mat->cmap->N));
734: /* Amat */
735: v = amat->a;
736: jj = amat->j;
737: for (brow = 0; brow < mbs; brow++) {
738: grow = bs * (rstart + brow);
739: nz = amat->i[brow + 1] - amat->i[brow];
740: for (bcol = 0; bcol < nz; bcol++) {
741: gcol = bs * (rstart + *jj);
742: jj++;
743: for (col = 0; col < bs; col++) {
744: for (row = 0; row < bs; row++) {
745: vabs = PetscAbsScalar(*v);
746: v++;
747: rsum[gcol + col] += vabs;
748: /* non-diagonal block */
749: if (bcol > 0 && vabs > 0.0) rsum[grow + row] += vabs;
750: }
751: }
752: }
753: PetscCall(PetscLogFlops(nz * bs * bs));
754: }
755: /* Bmat */
756: v = bmat->a;
757: jj = bmat->j;
758: for (brow = 0; brow < mbs; brow++) {
759: grow = bs * (rstart + brow);
760: nz = bmat->i[brow + 1] - bmat->i[brow];
761: for (bcol = 0; bcol < nz; bcol++) {
762: gcol = bs * garray[*jj];
763: jj++;
764: for (col = 0; col < bs; col++) {
765: for (row = 0; row < bs; row++) {
766: vabs = PetscAbsScalar(*v);
767: v++;
768: rsum[gcol + col] += vabs;
769: rsum[grow + row] += vabs;
770: }
771: }
772: }
773: PetscCall(PetscLogFlops(nz * bs * bs));
774: }
775: PetscCall(MPIU_Allreduce(rsum, rsum2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat)));
776: *norm = 0.0;
777: for (col = 0; col < mat->cmap->N; col++) {
778: if (rsum2[col] > *norm) *norm = rsum2[col];
779: }
780: PetscCall(PetscFree2(rsum, rsum2));
781: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for this norm yet");
782: }
783: PetscFunctionReturn(PETSC_SUCCESS);
784: }
786: PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat, MatAssemblyType mode)
787: {
788: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
789: PetscInt nstash, reallocs;
791: PetscFunctionBegin;
792: if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
794: PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range));
795: PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs));
796: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
797: PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
798: PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs));
799: PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
800: PetscFunctionReturn(PETSC_SUCCESS);
801: }
803: PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat, MatAssemblyType mode)
804: {
805: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
806: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)baij->A->data;
807: PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2;
808: PetscInt *row, *col;
809: PetscBool other_disassembled;
810: PetscMPIInt n;
811: PetscBool r1, r2, r3;
812: MatScalar *val;
814: /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
815: PetscFunctionBegin;
816: if (!baij->donotstash && !mat->nooffprocentries) {
817: while (1) {
818: PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg));
819: if (!flg) break;
821: for (i = 0; i < n;) {
822: /* Now identify the consecutive vals belonging to the same row */
823: for (j = i, rstart = row[j]; j < n; j++) {
824: if (row[j] != rstart) break;
825: }
826: if (j < n) ncols = j - i;
827: else ncols = n - i;
828: /* Now assemble all these values with a single function call */
829: PetscCall(MatSetValues_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode));
830: i = j;
831: }
832: }
833: PetscCall(MatStashScatterEnd_Private(&mat->stash));
834: /* Now process the block-stash. Since the values are stashed column-oriented,
835: set the roworiented flag to column oriented, and after MatSetValues()
836: restore the original flags */
837: r1 = baij->roworiented;
838: r2 = a->roworiented;
839: r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented;
841: baij->roworiented = PETSC_FALSE;
842: a->roworiented = PETSC_FALSE;
844: ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
845: while (1) {
846: PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg));
847: if (!flg) break;
849: for (i = 0; i < n;) {
850: /* Now identify the consecutive vals belonging to the same row */
851: for (j = i, rstart = row[j]; j < n; j++) {
852: if (row[j] != rstart) break;
853: }
854: if (j < n) ncols = j - i;
855: else ncols = n - i;
856: PetscCall(MatSetValuesBlocked_MPISBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode));
857: i = j;
858: }
859: }
860: PetscCall(MatStashScatterEnd_Private(&mat->bstash));
862: baij->roworiented = r1;
863: a->roworiented = r2;
865: ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworinted */
866: }
868: PetscCall(MatAssemblyBegin(baij->A, mode));
869: PetscCall(MatAssemblyEnd(baij->A, mode));
871: /* determine if any processor has disassembled, if so we must
872: also disassemble ourselves, in order that we may reassemble. */
873: /*
874: if nonzero structure of submatrix B cannot change then we know that
875: no processor disassembled thus we can skip this stuff
876: */
877: if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) {
878: PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
879: if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPISBAIJ(mat));
880: }
882: if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPISBAIJ(mat)); /* setup Mvctx and sMvctx */ }
883: PetscCall(MatAssemblyBegin(baij->B, mode));
884: PetscCall(MatAssemblyEnd(baij->B, mode));
886: PetscCall(PetscFree2(baij->rowvalues, baij->rowindices));
888: baij->rowvalues = NULL;
890: /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
891: if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) {
892: PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
893: PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat)));
894: }
895: PetscFunctionReturn(PETSC_SUCCESS);
896: }
898: extern PetscErrorCode MatSetValues_MPIBAIJ(Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[], const PetscScalar[], InsertMode);
899: #include <petscdraw.h>
900: static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer)
901: {
902: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
903: PetscInt bs = mat->rmap->bs;
904: PetscMPIInt rank = baij->rank;
905: PetscBool iascii, isdraw;
906: PetscViewer sviewer;
907: PetscViewerFormat format;
909: PetscFunctionBegin;
910: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
911: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
912: if (iascii) {
913: PetscCall(PetscViewerGetFormat(viewer, &format));
914: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
915: MatInfo info;
916: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
917: PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
918: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
919: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated,
920: mat->rmap->bs, (double)info.memory));
921: PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info));
922: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
923: PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info));
924: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used));
925: PetscCall(PetscViewerFlush(viewer));
926: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
927: PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n"));
928: PetscCall(VecScatterView(baij->Mvctx, viewer));
929: PetscFunctionReturn(PETSC_SUCCESS);
930: } else if (format == PETSC_VIEWER_ASCII_INFO) {
931: PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs));
932: PetscFunctionReturn(PETSC_SUCCESS);
933: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
934: PetscFunctionReturn(PETSC_SUCCESS);
935: }
936: }
938: if (isdraw) {
939: PetscDraw draw;
940: PetscBool isnull;
941: PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw));
942: PetscCall(PetscDrawIsNull(draw, &isnull));
943: if (isnull) PetscFunctionReturn(PETSC_SUCCESS);
944: }
946: {
947: /* assemble the entire matrix onto first processor. */
948: Mat A;
949: Mat_SeqSBAIJ *Aloc;
950: Mat_SeqBAIJ *Bloc;
951: PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs;
952: MatScalar *a;
953: const char *matname;
955: /* Should this be the same type as mat? */
956: PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A));
957: if (rank == 0) {
958: PetscCall(MatSetSizes(A, M, N, M, N));
959: } else {
960: PetscCall(MatSetSizes(A, 0, 0, M, N));
961: }
962: PetscCall(MatSetType(A, MATMPISBAIJ));
963: PetscCall(MatMPISBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL));
964: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
966: /* copy over the A part */
967: Aloc = (Mat_SeqSBAIJ *)baij->A->data;
968: ai = Aloc->i;
969: aj = Aloc->j;
970: a = Aloc->a;
971: PetscCall(PetscMalloc1(bs, &rvals));
973: for (i = 0; i < mbs; i++) {
974: rvals[0] = bs * (baij->rstartbs + i);
975: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
976: for (j = ai[i]; j < ai[i + 1]; j++) {
977: col = (baij->cstartbs + aj[j]) * bs;
978: for (k = 0; k < bs; k++) {
979: PetscCall(MatSetValues_MPISBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
980: col++;
981: a += bs;
982: }
983: }
984: }
985: /* copy over the B part */
986: Bloc = (Mat_SeqBAIJ *)baij->B->data;
987: ai = Bloc->i;
988: aj = Bloc->j;
989: a = Bloc->a;
990: for (i = 0; i < mbs; i++) {
991: rvals[0] = bs * (baij->rstartbs + i);
992: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
993: for (j = ai[i]; j < ai[i + 1]; j++) {
994: col = baij->garray[aj[j]] * bs;
995: for (k = 0; k < bs; k++) {
996: PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES));
997: col++;
998: a += bs;
999: }
1000: }
1001: }
1002: PetscCall(PetscFree(rvals));
1003: PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
1004: PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));
1005: /*
1006: Everyone has to call to draw the matrix since the graphics waits are
1007: synchronized across all processors that share the PetscDraw object
1008: */
1009: PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1010: if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname));
1011: if (rank == 0) {
1012: if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPISBAIJ *)(A->data))->A, matname));
1013: PetscCall(MatView_SeqSBAIJ(((Mat_MPISBAIJ *)(A->data))->A, sviewer));
1014: }
1015: PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer));
1016: PetscCall(PetscViewerFlush(viewer));
1017: PetscCall(MatDestroy(&A));
1018: }
1019: PetscFunctionReturn(PETSC_SUCCESS);
1020: }
1022: /* Used for both MPIBAIJ and MPISBAIJ matrices */
1023: #define MatView_MPISBAIJ_Binary MatView_MPIBAIJ_Binary
1025: PetscErrorCode MatView_MPISBAIJ(Mat mat, PetscViewer viewer)
1026: {
1027: PetscBool iascii, isdraw, issocket, isbinary;
1029: PetscFunctionBegin;
1030: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1031: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw));
1032: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket));
1033: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
1034: if (iascii || isdraw || issocket) {
1035: PetscCall(MatView_MPISBAIJ_ASCIIorDraworSocket(mat, viewer));
1036: } else if (isbinary) PetscCall(MatView_MPISBAIJ_Binary(mat, viewer));
1037: PetscFunctionReturn(PETSC_SUCCESS);
1038: }
1040: PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy)
1041: {
1042: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1043: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1044: PetscScalar *from;
1045: const PetscScalar *x;
1047: PetscFunctionBegin;
1048: /* diagonal part */
1049: PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1050: PetscCall(VecSet(a->slvec1b, 0.0));
1052: /* subdiagonal part */
1053: PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1054: PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1056: /* copy x into the vec slvec0 */
1057: PetscCall(VecGetArray(a->slvec0, &from));
1058: PetscCall(VecGetArrayRead(xx, &x));
1060: PetscCall(PetscArraycpy(from, x, bs * mbs));
1061: PetscCall(VecRestoreArray(a->slvec0, &from));
1062: PetscCall(VecRestoreArrayRead(xx, &x));
1064: PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1065: PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1066: /* supperdiagonal part */
1067: PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1068: PetscFunctionReturn(PETSC_SUCCESS);
1069: }
1071: PetscErrorCode MatMult_MPISBAIJ(Mat A, Vec xx, Vec yy)
1072: {
1073: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1074: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1075: PetscScalar *from;
1076: const PetscScalar *x;
1078: PetscFunctionBegin;
1079: /* diagonal part */
1080: PetscCall((*a->A->ops->mult)(a->A, xx, a->slvec1a));
1081: PetscCall(VecSet(a->slvec1b, 0.0));
1083: /* subdiagonal part */
1084: PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1086: /* copy x into the vec slvec0 */
1087: PetscCall(VecGetArray(a->slvec0, &from));
1088: PetscCall(VecGetArrayRead(xx, &x));
1090: PetscCall(PetscArraycpy(from, x, bs * mbs));
1091: PetscCall(VecRestoreArray(a->slvec0, &from));
1092: PetscCall(VecRestoreArrayRead(xx, &x));
1094: PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1095: PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1096: /* supperdiagonal part */
1097: PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, yy));
1098: PetscFunctionReturn(PETSC_SUCCESS);
1099: }
1101: PetscErrorCode MatMultAdd_MPISBAIJ_Hermitian(Mat A, Vec xx, Vec yy, Vec zz)
1102: {
1103: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1104: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1105: PetscScalar *from, zero = 0.0;
1106: const PetscScalar *x;
1108: PetscFunctionBegin;
1109: /* diagonal part */
1110: PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1111: PetscCall(VecSet(a->slvec1b, zero));
1113: /* subdiagonal part */
1114: PetscCheck(a->B->ops->multhermitiantranspose, PetscObjectComm((PetscObject)a->B), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)a->B)->type_name);
1115: PetscCall((*a->B->ops->multhermitiantranspose)(a->B, xx, a->slvec0b));
1117: /* copy x into the vec slvec0 */
1118: PetscCall(VecGetArray(a->slvec0, &from));
1119: PetscCall(VecGetArrayRead(xx, &x));
1120: PetscCall(PetscArraycpy(from, x, bs * mbs));
1121: PetscCall(VecRestoreArray(a->slvec0, &from));
1123: PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1124: PetscCall(VecRestoreArrayRead(xx, &x));
1125: PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1127: /* supperdiagonal part */
1128: PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1129: PetscFunctionReturn(PETSC_SUCCESS);
1130: }
1132: PetscErrorCode MatMultAdd_MPISBAIJ(Mat A, Vec xx, Vec yy, Vec zz)
1133: {
1134: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1135: PetscInt mbs = a->mbs, bs = A->rmap->bs;
1136: PetscScalar *from, zero = 0.0;
1137: const PetscScalar *x;
1139: PetscFunctionBegin;
1140: /* diagonal part */
1141: PetscCall((*a->A->ops->multadd)(a->A, xx, yy, a->slvec1a));
1142: PetscCall(VecSet(a->slvec1b, zero));
1144: /* subdiagonal part */
1145: PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->slvec0b));
1147: /* copy x into the vec slvec0 */
1148: PetscCall(VecGetArray(a->slvec0, &from));
1149: PetscCall(VecGetArrayRead(xx, &x));
1150: PetscCall(PetscArraycpy(from, x, bs * mbs));
1151: PetscCall(VecRestoreArray(a->slvec0, &from));
1153: PetscCall(VecScatterBegin(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1154: PetscCall(VecRestoreArrayRead(xx, &x));
1155: PetscCall(VecScatterEnd(a->sMvctx, a->slvec0, a->slvec1, ADD_VALUES, SCATTER_FORWARD));
1157: /* supperdiagonal part */
1158: PetscCall((*a->B->ops->multadd)(a->B, a->slvec1b, a->slvec1a, zz));
1159: PetscFunctionReturn(PETSC_SUCCESS);
1160: }
1162: /*
1163: This only works correctly for square matrices where the subblock A->A is the
1164: diagonal block
1165: */
1166: PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A, Vec v)
1167: {
1168: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1170: PetscFunctionBegin;
1171: /* PetscCheck(a->rmap->N == a->cmap->N,PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1172: PetscCall(MatGetDiagonal(a->A, v));
1173: PetscFunctionReturn(PETSC_SUCCESS);
1174: }
1176: PetscErrorCode MatScale_MPISBAIJ(Mat A, PetscScalar aa)
1177: {
1178: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1180: PetscFunctionBegin;
1181: PetscCall(MatScale(a->A, aa));
1182: PetscCall(MatScale(a->B, aa));
1183: PetscFunctionReturn(PETSC_SUCCESS);
1184: }
1186: PetscErrorCode MatGetRow_MPISBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1187: {
1188: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
1189: PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p;
1190: PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB;
1191: PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend;
1192: PetscInt *cmap, *idx_p, cstart = mat->rstartbs;
1194: PetscFunctionBegin;
1195: PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active");
1196: mat->getrowactive = PETSC_TRUE;
1198: if (!mat->rowvalues && (idx || v)) {
1199: /*
1200: allocate enough space to hold information from the longest row.
1201: */
1202: Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ *)mat->A->data;
1203: Mat_SeqBAIJ *Ba = (Mat_SeqBAIJ *)mat->B->data;
1204: PetscInt max = 1, mbs = mat->mbs, tmp;
1205: for (i = 0; i < mbs; i++) {
1206: tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; /* row length */
1207: if (max < tmp) max = tmp;
1208: }
1209: PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices));
1210: }
1212: PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows");
1213: lrow = row - brstart; /* local row index */
1215: pvA = &vworkA;
1216: pcA = &cworkA;
1217: pvB = &vworkB;
1218: pcB = &cworkB;
1219: if (!v) {
1220: pvA = NULL;
1221: pvB = NULL;
1222: }
1223: if (!idx) {
1224: pcA = NULL;
1225: if (!v) pcB = NULL;
1226: }
1227: PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA));
1228: PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB));
1229: nztot = nzA + nzB;
1231: cmap = mat->garray;
1232: if (v || idx) {
1233: if (nztot) {
1234: /* Sort by increasing column numbers, assuming A and B already sorted */
1235: PetscInt imark = -1;
1236: if (v) {
1237: *v = v_p = mat->rowvalues;
1238: for (i = 0; i < nzB; i++) {
1239: if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i];
1240: else break;
1241: }
1242: imark = i;
1243: for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i];
1244: for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i];
1245: }
1246: if (idx) {
1247: *idx = idx_p = mat->rowindices;
1248: if (imark > -1) {
1249: for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1250: } else {
1251: for (i = 0; i < nzB; i++) {
1252: if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1253: else break;
1254: }
1255: imark = i;
1256: }
1257: for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i];
1258: for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs;
1259: }
1260: } else {
1261: if (idx) *idx = NULL;
1262: if (v) *v = NULL;
1263: }
1264: }
1265: *nz = nztot;
1266: PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA));
1267: PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB));
1268: PetscFunctionReturn(PETSC_SUCCESS);
1269: }
1271: PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v)
1272: {
1273: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1275: PetscFunctionBegin;
1276: PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first");
1277: baij->getrowactive = PETSC_FALSE;
1278: PetscFunctionReturn(PETSC_SUCCESS);
1279: }
1281: PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1282: {
1283: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1284: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1286: PetscFunctionBegin;
1287: aA->getrow_utriangular = PETSC_TRUE;
1288: PetscFunctionReturn(PETSC_SUCCESS);
1289: }
1290: PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1291: {
1292: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1293: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1295: PetscFunctionBegin;
1296: aA->getrow_utriangular = PETSC_FALSE;
1297: PetscFunctionReturn(PETSC_SUCCESS);
1298: }
1300: PetscErrorCode MatConjugate_MPISBAIJ(Mat mat)
1301: {
1302: PetscFunctionBegin;
1303: if (PetscDefined(USE_COMPLEX)) {
1304: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)mat->data;
1306: PetscCall(MatConjugate(a->A));
1307: PetscCall(MatConjugate(a->B));
1308: }
1309: PetscFunctionReturn(PETSC_SUCCESS);
1310: }
1312: PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1313: {
1314: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1316: PetscFunctionBegin;
1317: PetscCall(MatRealPart(a->A));
1318: PetscCall(MatRealPart(a->B));
1319: PetscFunctionReturn(PETSC_SUCCESS);
1320: }
1322: PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1323: {
1324: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1326: PetscFunctionBegin;
1327: PetscCall(MatImaginaryPart(a->A));
1328: PetscCall(MatImaginaryPart(a->B));
1329: PetscFunctionReturn(PETSC_SUCCESS);
1330: }
1332: /* Check if isrow is a subset of iscol_local, called by MatCreateSubMatrix_MPISBAIJ()
1333: Input: isrow - distributed(parallel),
1334: iscol_local - locally owned (seq)
1335: */
1336: PetscErrorCode ISEqual_private(IS isrow, IS iscol_local, PetscBool *flg)
1337: {
1338: PetscInt sz1, sz2, *a1, *a2, i, j, k, nmatch;
1339: const PetscInt *ptr1, *ptr2;
1341: PetscFunctionBegin;
1342: PetscCall(ISGetLocalSize(isrow, &sz1));
1343: PetscCall(ISGetLocalSize(iscol_local, &sz2));
1344: if (sz1 > sz2) {
1345: *flg = PETSC_FALSE;
1346: PetscFunctionReturn(PETSC_SUCCESS);
1347: }
1349: PetscCall(ISGetIndices(isrow, &ptr1));
1350: PetscCall(ISGetIndices(iscol_local, &ptr2));
1352: PetscCall(PetscMalloc1(sz1, &a1));
1353: PetscCall(PetscMalloc1(sz2, &a2));
1354: PetscCall(PetscArraycpy(a1, ptr1, sz1));
1355: PetscCall(PetscArraycpy(a2, ptr2, sz2));
1356: PetscCall(PetscSortInt(sz1, a1));
1357: PetscCall(PetscSortInt(sz2, a2));
1359: nmatch = 0;
1360: k = 0;
1361: for (i = 0; i < sz1; i++) {
1362: for (j = k; j < sz2; j++) {
1363: if (a1[i] == a2[j]) {
1364: k = j;
1365: nmatch++;
1366: break;
1367: }
1368: }
1369: }
1370: PetscCall(ISRestoreIndices(isrow, &ptr1));
1371: PetscCall(ISRestoreIndices(iscol_local, &ptr2));
1372: PetscCall(PetscFree(a1));
1373: PetscCall(PetscFree(a2));
1374: if (nmatch < sz1) {
1375: *flg = PETSC_FALSE;
1376: } else {
1377: *flg = PETSC_TRUE;
1378: }
1379: PetscFunctionReturn(PETSC_SUCCESS);
1380: }
1382: PetscErrorCode MatCreateSubMatrix_MPISBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat)
1383: {
1384: IS iscol_local;
1385: PetscInt csize;
1386: PetscBool isequal;
1388: PetscFunctionBegin;
1389: PetscCall(ISGetLocalSize(iscol, &csize));
1390: if (call == MAT_REUSE_MATRIX) {
1391: PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local));
1392: PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse");
1393: } else {
1394: PetscBool issorted;
1396: PetscCall(ISAllGather(iscol, &iscol_local));
1397: PetscCall(ISEqual_private(isrow, iscol_local, &isequal));
1398: PetscCall(ISSorted(iscol_local, &issorted));
1399: PetscCheck(isequal && issorted, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "For symmetric format, iscol must equal isrow and be sorted");
1400: }
1402: /* now call MatCreateSubMatrix_MPIBAIJ() */
1403: PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat));
1404: if (call == MAT_INITIAL_MATRIX) {
1405: PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local));
1406: PetscCall(ISDestroy(&iscol_local));
1407: }
1408: PetscFunctionReturn(PETSC_SUCCESS);
1409: }
1411: PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1412: {
1413: Mat_MPISBAIJ *l = (Mat_MPISBAIJ *)A->data;
1415: PetscFunctionBegin;
1416: PetscCall(MatZeroEntries(l->A));
1417: PetscCall(MatZeroEntries(l->B));
1418: PetscFunctionReturn(PETSC_SUCCESS);
1419: }
1421: PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin, MatInfoType flag, MatInfo *info)
1422: {
1423: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)matin->data;
1424: Mat A = a->A, B = a->B;
1425: PetscLogDouble isend[5], irecv[5];
1427: PetscFunctionBegin;
1428: info->block_size = (PetscReal)matin->rmap->bs;
1430: PetscCall(MatGetInfo(A, MAT_LOCAL, info));
1432: isend[0] = info->nz_used;
1433: isend[1] = info->nz_allocated;
1434: isend[2] = info->nz_unneeded;
1435: isend[3] = info->memory;
1436: isend[4] = info->mallocs;
1438: PetscCall(MatGetInfo(B, MAT_LOCAL, info));
1440: isend[0] += info->nz_used;
1441: isend[1] += info->nz_allocated;
1442: isend[2] += info->nz_unneeded;
1443: isend[3] += info->memory;
1444: isend[4] += info->mallocs;
1445: if (flag == MAT_LOCAL) {
1446: info->nz_used = isend[0];
1447: info->nz_allocated = isend[1];
1448: info->nz_unneeded = isend[2];
1449: info->memory = isend[3];
1450: info->mallocs = isend[4];
1451: } else if (flag == MAT_GLOBAL_MAX) {
1452: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin)));
1454: info->nz_used = irecv[0];
1455: info->nz_allocated = irecv[1];
1456: info->nz_unneeded = irecv[2];
1457: info->memory = irecv[3];
1458: info->mallocs = irecv[4];
1459: } else if (flag == MAT_GLOBAL_SUM) {
1460: PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin)));
1462: info->nz_used = irecv[0];
1463: info->nz_allocated = irecv[1];
1464: info->nz_unneeded = irecv[2];
1465: info->memory = irecv[3];
1466: info->mallocs = irecv[4];
1467: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag);
1468: info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */
1469: info->fill_ratio_needed = 0;
1470: info->factor_mallocs = 0;
1471: PetscFunctionReturn(PETSC_SUCCESS);
1472: }
1474: PetscErrorCode MatSetOption_MPISBAIJ(Mat A, MatOption op, PetscBool flg)
1475: {
1476: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1477: Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ *)a->A->data;
1479: PetscFunctionBegin;
1480: switch (op) {
1481: case MAT_NEW_NONZERO_LOCATIONS:
1482: case MAT_NEW_NONZERO_ALLOCATION_ERR:
1483: case MAT_UNUSED_NONZERO_LOCATION_ERR:
1484: case MAT_KEEP_NONZERO_PATTERN:
1485: case MAT_SUBMAT_SINGLEIS:
1486: case MAT_NEW_NONZERO_LOCATION_ERR:
1487: MatCheckPreallocated(A, 1);
1488: PetscCall(MatSetOption(a->A, op, flg));
1489: PetscCall(MatSetOption(a->B, op, flg));
1490: break;
1491: case MAT_ROW_ORIENTED:
1492: MatCheckPreallocated(A, 1);
1493: a->roworiented = flg;
1495: PetscCall(MatSetOption(a->A, op, flg));
1496: PetscCall(MatSetOption(a->B, op, flg));
1497: break;
1498: case MAT_FORCE_DIAGONAL_ENTRIES:
1499: case MAT_SORTED_FULL:
1500: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1501: break;
1502: case MAT_IGNORE_OFF_PROC_ENTRIES:
1503: a->donotstash = flg;
1504: break;
1505: case MAT_USE_HASH_TABLE:
1506: a->ht_flag = flg;
1507: break;
1508: case MAT_HERMITIAN:
1509: MatCheckPreallocated(A, 1);
1510: PetscCall(MatSetOption(a->A, op, flg));
1511: #if defined(PETSC_USE_COMPLEX)
1512: if (flg) { /* need different mat-vec ops */
1513: A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1514: A->ops->multadd = MatMultAdd_MPISBAIJ_Hermitian;
1515: A->ops->multtranspose = NULL;
1516: A->ops->multtransposeadd = NULL;
1517: A->symmetric = PETSC_BOOL3_FALSE;
1518: }
1519: #endif
1520: break;
1521: case MAT_SPD:
1522: case MAT_SYMMETRIC:
1523: MatCheckPreallocated(A, 1);
1524: PetscCall(MatSetOption(a->A, op, flg));
1525: #if defined(PETSC_USE_COMPLEX)
1526: if (flg) { /* restore to use default mat-vec ops */
1527: A->ops->mult = MatMult_MPISBAIJ;
1528: A->ops->multadd = MatMultAdd_MPISBAIJ;
1529: A->ops->multtranspose = MatMult_MPISBAIJ;
1530: A->ops->multtransposeadd = MatMultAdd_MPISBAIJ;
1531: }
1532: #endif
1533: break;
1534: case MAT_STRUCTURALLY_SYMMETRIC:
1535: MatCheckPreallocated(A, 1);
1536: PetscCall(MatSetOption(a->A, op, flg));
1537: break;
1538: case MAT_SYMMETRY_ETERNAL:
1539: case MAT_STRUCTURAL_SYMMETRY_ETERNAL:
1540: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "Matrix must be symmetric");
1541: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op]));
1542: break;
1543: case MAT_SPD_ETERNAL:
1544: break;
1545: case MAT_IGNORE_LOWER_TRIANGULAR:
1546: aA->ignore_ltriangular = flg;
1547: break;
1548: case MAT_ERROR_LOWER_TRIANGULAR:
1549: aA->ignore_ltriangular = flg;
1550: break;
1551: case MAT_GETROW_UPPERTRIANGULAR:
1552: aA->getrow_utriangular = flg;
1553: break;
1554: default:
1555: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op);
1556: }
1557: PetscFunctionReturn(PETSC_SUCCESS);
1558: }
1560: PetscErrorCode MatTranspose_MPISBAIJ(Mat A, MatReuse reuse, Mat *B)
1561: {
1562: PetscFunctionBegin;
1563: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
1564: if (reuse == MAT_INITIAL_MATRIX) {
1565: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, B));
1566: } else if (reuse == MAT_REUSE_MATRIX) {
1567: PetscCall(MatCopy(A, *B, SAME_NONZERO_PATTERN));
1568: }
1569: PetscFunctionReturn(PETSC_SUCCESS);
1570: }
1572: PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat, Vec ll, Vec rr)
1573: {
1574: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)mat->data;
1575: Mat a = baij->A, b = baij->B;
1576: PetscInt nv, m, n;
1577: PetscBool flg;
1579: PetscFunctionBegin;
1580: if (ll != rr) {
1581: PetscCall(VecEqual(ll, rr, &flg));
1582: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "For symmetric format, left and right scaling vectors must be same");
1583: }
1584: if (!ll) PetscFunctionReturn(PETSC_SUCCESS);
1586: PetscCall(MatGetLocalSize(mat, &m, &n));
1587: PetscCheck(m == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "For symmetric format, local size %" PetscInt_FMT " %" PetscInt_FMT " must be same", m, n);
1589: PetscCall(VecGetLocalSize(rr, &nv));
1590: PetscCheck(nv == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Left and right vector non-conforming local size");
1592: PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1594: /* left diagonalscale the off-diagonal part */
1595: PetscUseTypeMethod(b, diagonalscale, ll, NULL);
1597: /* scale the diagonal part */
1598: PetscUseTypeMethod(a, diagonalscale, ll, rr);
1600: /* right diagonalscale the off-diagonal part */
1601: PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD));
1602: PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec);
1603: PetscFunctionReturn(PETSC_SUCCESS);
1604: }
1606: PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1607: {
1608: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1610: PetscFunctionBegin;
1611: PetscCall(MatSetUnfactored(a->A));
1612: PetscFunctionReturn(PETSC_SUCCESS);
1613: }
1615: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat, MatDuplicateOption, Mat *);
1617: PetscErrorCode MatEqual_MPISBAIJ(Mat A, Mat B, PetscBool *flag)
1618: {
1619: Mat_MPISBAIJ *matB = (Mat_MPISBAIJ *)B->data, *matA = (Mat_MPISBAIJ *)A->data;
1620: Mat a, b, c, d;
1621: PetscBool flg;
1623: PetscFunctionBegin;
1624: a = matA->A;
1625: b = matA->B;
1626: c = matB->A;
1627: d = matB->B;
1629: PetscCall(MatEqual(a, c, &flg));
1630: if (flg) PetscCall(MatEqual(b, d, &flg));
1631: PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A)));
1632: PetscFunctionReturn(PETSC_SUCCESS);
1633: }
1635: PetscErrorCode MatCopy_MPISBAIJ(Mat A, Mat B, MatStructure str)
1636: {
1637: PetscBool isbaij;
1639: PetscFunctionBegin;
1640: PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &isbaij, MATSEQSBAIJ, MATMPISBAIJ, ""));
1641: PetscCheck(isbaij, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "Not for matrix type %s", ((PetscObject)B)->type_name);
1642: /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1643: if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1644: PetscCall(MatGetRowUpperTriangular(A));
1645: PetscCall(MatCopy_Basic(A, B, str));
1646: PetscCall(MatRestoreRowUpperTriangular(A));
1647: } else {
1648: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1649: Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1651: PetscCall(MatCopy(a->A, b->A, str));
1652: PetscCall(MatCopy(a->B, b->B, str));
1653: }
1654: PetscCall(PetscObjectStateIncrease((PetscObject)B));
1655: PetscFunctionReturn(PETSC_SUCCESS);
1656: }
1658: PetscErrorCode MatAXPY_MPISBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str)
1659: {
1660: Mat_MPISBAIJ *xx = (Mat_MPISBAIJ *)X->data, *yy = (Mat_MPISBAIJ *)Y->data;
1661: PetscBLASInt bnz, one = 1;
1662: Mat_SeqSBAIJ *xa, *ya;
1663: Mat_SeqBAIJ *xb, *yb;
1665: PetscFunctionBegin;
1666: if (str == SAME_NONZERO_PATTERN) {
1667: PetscScalar alpha = a;
1668: xa = (Mat_SeqSBAIJ *)xx->A->data;
1669: ya = (Mat_SeqSBAIJ *)yy->A->data;
1670: PetscCall(PetscBLASIntCast(xa->nz, &bnz));
1671: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xa->a, &one, ya->a, &one));
1672: xb = (Mat_SeqBAIJ *)xx->B->data;
1673: yb = (Mat_SeqBAIJ *)yy->B->data;
1674: PetscCall(PetscBLASIntCast(xb->nz, &bnz));
1675: PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, xb->a, &one, yb->a, &one));
1676: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
1677: } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1678: PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
1679: PetscCall(MatAXPY_Basic(Y, a, X, str));
1680: PetscCall(MatSetOption(X, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
1681: } else {
1682: Mat B;
1683: PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs;
1684: PetscCheck(bs == X->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrices must have same block size");
1685: PetscCall(MatGetRowUpperTriangular(X));
1686: PetscCall(MatGetRowUpperTriangular(Y));
1687: PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d));
1688: PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o));
1689: PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B));
1690: PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name));
1691: PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N));
1692: PetscCall(MatSetBlockSizesFromMats(B, Y, Y));
1693: PetscCall(MatSetType(B, MATMPISBAIJ));
1694: PetscCall(MatAXPYGetPreallocation_SeqSBAIJ(yy->A, xx->A, nnz_d));
1695: PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o));
1696: PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o));
1697: PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str));
1698: PetscCall(MatHeaderMerge(Y, &B));
1699: PetscCall(PetscFree(nnz_d));
1700: PetscCall(PetscFree(nnz_o));
1701: PetscCall(MatRestoreRowUpperTriangular(X));
1702: PetscCall(MatRestoreRowUpperTriangular(Y));
1703: }
1704: PetscFunctionReturn(PETSC_SUCCESS);
1705: }
1707: PetscErrorCode MatCreateSubMatrices_MPISBAIJ(Mat A, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *B[])
1708: {
1709: PetscInt i;
1710: PetscBool flg;
1712: PetscFunctionBegin;
1713: PetscCall(MatCreateSubMatrices_MPIBAIJ(A, n, irow, icol, scall, B)); /* B[] are sbaij matrices */
1714: for (i = 0; i < n; i++) {
1715: PetscCall(ISEqual(irow[i], icol[i], &flg));
1716: if (!flg) PetscCall(MatSeqSBAIJZeroOps_Private(*B[i]));
1717: }
1718: PetscFunctionReturn(PETSC_SUCCESS);
1719: }
1721: PetscErrorCode MatShift_MPISBAIJ(Mat Y, PetscScalar a)
1722: {
1723: Mat_MPISBAIJ *maij = (Mat_MPISBAIJ *)Y->data;
1724: Mat_SeqSBAIJ *aij = (Mat_SeqSBAIJ *)maij->A->data;
1726: PetscFunctionBegin;
1727: if (!Y->preallocated) {
1728: PetscCall(MatMPISBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL));
1729: } else if (!aij->nz) {
1730: PetscInt nonew = aij->nonew;
1731: PetscCall(MatSeqSBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL));
1732: aij->nonew = nonew;
1733: }
1734: PetscCall(MatShift_Basic(Y, a));
1735: PetscFunctionReturn(PETSC_SUCCESS);
1736: }
1738: PetscErrorCode MatMissingDiagonal_MPISBAIJ(Mat A, PetscBool *missing, PetscInt *d)
1739: {
1740: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
1742: PetscFunctionBegin;
1743: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices");
1744: PetscCall(MatMissingDiagonal(a->A, missing, d));
1745: if (d) {
1746: PetscInt rstart;
1747: PetscCall(MatGetOwnershipRange(A, &rstart, NULL));
1748: *d += rstart / A->rmap->bs;
1749: }
1750: PetscFunctionReturn(PETSC_SUCCESS);
1751: }
1753: PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A, Mat *a)
1754: {
1755: PetscFunctionBegin;
1756: *a = ((Mat_MPISBAIJ *)A->data)->A;
1757: PetscFunctionReturn(PETSC_SUCCESS);
1758: }
1760: static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1761: MatGetRow_MPISBAIJ,
1762: MatRestoreRow_MPISBAIJ,
1763: MatMult_MPISBAIJ,
1764: /* 4*/ MatMultAdd_MPISBAIJ,
1765: MatMult_MPISBAIJ, /* transpose versions are same as non-transpose */
1766: MatMultAdd_MPISBAIJ,
1767: NULL,
1768: NULL,
1769: NULL,
1770: /* 10*/ NULL,
1771: NULL,
1772: NULL,
1773: MatSOR_MPISBAIJ,
1774: MatTranspose_MPISBAIJ,
1775: /* 15*/ MatGetInfo_MPISBAIJ,
1776: MatEqual_MPISBAIJ,
1777: MatGetDiagonal_MPISBAIJ,
1778: MatDiagonalScale_MPISBAIJ,
1779: MatNorm_MPISBAIJ,
1780: /* 20*/ MatAssemblyBegin_MPISBAIJ,
1781: MatAssemblyEnd_MPISBAIJ,
1782: MatSetOption_MPISBAIJ,
1783: MatZeroEntries_MPISBAIJ,
1784: /* 24*/ NULL,
1785: NULL,
1786: NULL,
1787: NULL,
1788: NULL,
1789: /* 29*/ MatSetUp_MPI_Hash,
1790: NULL,
1791: NULL,
1792: MatGetDiagonalBlock_MPISBAIJ,
1793: NULL,
1794: /* 34*/ MatDuplicate_MPISBAIJ,
1795: NULL,
1796: NULL,
1797: NULL,
1798: NULL,
1799: /* 39*/ MatAXPY_MPISBAIJ,
1800: MatCreateSubMatrices_MPISBAIJ,
1801: MatIncreaseOverlap_MPISBAIJ,
1802: MatGetValues_MPISBAIJ,
1803: MatCopy_MPISBAIJ,
1804: /* 44*/ NULL,
1805: MatScale_MPISBAIJ,
1806: MatShift_MPISBAIJ,
1807: NULL,
1808: NULL,
1809: /* 49*/ NULL,
1810: NULL,
1811: NULL,
1812: NULL,
1813: NULL,
1814: /* 54*/ NULL,
1815: NULL,
1816: MatSetUnfactored_MPISBAIJ,
1817: NULL,
1818: MatSetValuesBlocked_MPISBAIJ,
1819: /* 59*/ MatCreateSubMatrix_MPISBAIJ,
1820: NULL,
1821: NULL,
1822: NULL,
1823: NULL,
1824: /* 64*/ NULL,
1825: NULL,
1826: NULL,
1827: NULL,
1828: NULL,
1829: /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1830: NULL,
1831: MatConvert_MPISBAIJ_Basic,
1832: NULL,
1833: NULL,
1834: /* 74*/ NULL,
1835: NULL,
1836: NULL,
1837: NULL,
1838: NULL,
1839: /* 79*/ NULL,
1840: NULL,
1841: NULL,
1842: NULL,
1843: MatLoad_MPISBAIJ,
1844: /* 84*/ NULL,
1845: NULL,
1846: NULL,
1847: NULL,
1848: NULL,
1849: /* 89*/ NULL,
1850: NULL,
1851: NULL,
1852: NULL,
1853: NULL,
1854: /* 94*/ NULL,
1855: NULL,
1856: NULL,
1857: NULL,
1858: NULL,
1859: /* 99*/ NULL,
1860: NULL,
1861: NULL,
1862: MatConjugate_MPISBAIJ,
1863: NULL,
1864: /*104*/ NULL,
1865: MatRealPart_MPISBAIJ,
1866: MatImaginaryPart_MPISBAIJ,
1867: MatGetRowUpperTriangular_MPISBAIJ,
1868: MatRestoreRowUpperTriangular_MPISBAIJ,
1869: /*109*/ NULL,
1870: NULL,
1871: NULL,
1872: NULL,
1873: MatMissingDiagonal_MPISBAIJ,
1874: /*114*/ NULL,
1875: NULL,
1876: NULL,
1877: NULL,
1878: NULL,
1879: /*119*/ NULL,
1880: NULL,
1881: NULL,
1882: NULL,
1883: NULL,
1884: /*124*/ NULL,
1885: NULL,
1886: NULL,
1887: NULL,
1888: NULL,
1889: /*129*/ NULL,
1890: NULL,
1891: NULL,
1892: NULL,
1893: NULL,
1894: /*134*/ NULL,
1895: NULL,
1896: NULL,
1897: NULL,
1898: NULL,
1899: /*139*/ MatSetBlockSizes_Default,
1900: NULL,
1901: NULL,
1902: NULL,
1903: NULL,
1904: /*144*/ MatCreateMPIMatConcatenateSeqMat_MPISBAIJ,
1905: NULL,
1906: NULL,
1907: NULL,
1908: NULL,
1909: NULL,
1910: /*150*/ NULL,
1911: NULL};
1913: PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz)
1914: {
1915: Mat_MPISBAIJ *b = (Mat_MPISBAIJ *)B->data;
1916: PetscInt i, mbs, Mbs;
1917: PetscMPIInt size;
1919: PetscFunctionBegin;
1920: if (B->hash_active) {
1921: PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops))));
1922: B->hash_active = PETSC_FALSE;
1923: }
1924: if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash));
1925: PetscCall(MatSetBlockSize(B, PetscAbs(bs)));
1926: PetscCall(PetscLayoutSetUp(B->rmap));
1927: PetscCall(PetscLayoutSetUp(B->cmap));
1928: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
1929: PetscCheck(B->rmap->N <= B->cmap->N, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->N, B->cmap->N);
1930: PetscCheck(B->rmap->n <= B->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "MPISBAIJ matrix cannot have more local rows %" PetscInt_FMT " than columns %" PetscInt_FMT, B->rmap->n, B->cmap->n);
1932: mbs = B->rmap->n / bs;
1933: Mbs = B->rmap->N / bs;
1934: PetscCheck(mbs * bs == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "No of local rows %" PetscInt_FMT " must be divisible by blocksize %" PetscInt_FMT, B->rmap->N, bs);
1936: B->rmap->bs = bs;
1937: b->bs2 = bs * bs;
1938: b->mbs = mbs;
1939: b->Mbs = Mbs;
1940: b->nbs = B->cmap->n / bs;
1941: b->Nbs = B->cmap->N / bs;
1943: for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs;
1944: b->rstartbs = B->rmap->rstart / bs;
1945: b->rendbs = B->rmap->rend / bs;
1947: b->cstartbs = B->cmap->rstart / bs;
1948: b->cendbs = B->cmap->rend / bs;
1950: #if defined(PETSC_USE_CTABLE)
1951: PetscCall(PetscHMapIDestroy(&b->colmap));
1952: #else
1953: PetscCall(PetscFree(b->colmap));
1954: #endif
1955: PetscCall(PetscFree(b->garray));
1956: PetscCall(VecDestroy(&b->lvec));
1957: PetscCall(VecScatterDestroy(&b->Mvctx));
1958: PetscCall(VecDestroy(&b->slvec0));
1959: PetscCall(VecDestroy(&b->slvec0b));
1960: PetscCall(VecDestroy(&b->slvec1));
1961: PetscCall(VecDestroy(&b->slvec1a));
1962: PetscCall(VecDestroy(&b->slvec1b));
1963: PetscCall(VecScatterDestroy(&b->sMvctx));
1965: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size));
1966: PetscCall(MatDestroy(&b->B));
1967: PetscCall(MatCreate(PETSC_COMM_SELF, &b->B));
1968: PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0));
1969: PetscCall(MatSetType(b->B, MATSEQBAIJ));
1971: PetscCall(MatDestroy(&b->A));
1972: PetscCall(MatCreate(PETSC_COMM_SELF, &b->A));
1973: PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n));
1974: PetscCall(MatSetType(b->A, MATSEQSBAIJ));
1976: PetscCall(MatSeqSBAIJSetPreallocation(b->A, bs, d_nz, d_nnz));
1977: PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz));
1979: B->preallocated = PETSC_TRUE;
1980: B->was_assembled = PETSC_FALSE;
1981: B->assembled = PETSC_FALSE;
1982: PetscFunctionReturn(PETSC_SUCCESS);
1983: }
1985: PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[])
1986: {
1987: PetscInt m, rstart, cend;
1988: PetscInt i, j, d, nz, bd, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL;
1989: const PetscInt *JJ = NULL;
1990: PetscScalar *values = NULL;
1991: PetscBool roworiented = ((Mat_MPISBAIJ *)B->data)->roworiented;
1992: PetscBool nooffprocentries;
1994: PetscFunctionBegin;
1995: PetscCheck(bs >= 1, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_OUTOFRANGE, "Invalid block size specified, must be positive but it is %" PetscInt_FMT, bs);
1996: PetscCall(PetscLayoutSetBlockSize(B->rmap, bs));
1997: PetscCall(PetscLayoutSetBlockSize(B->cmap, bs));
1998: PetscCall(PetscLayoutSetUp(B->rmap));
1999: PetscCall(PetscLayoutSetUp(B->cmap));
2000: PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs));
2001: m = B->rmap->n / bs;
2002: rstart = B->rmap->rstart / bs;
2003: cend = B->cmap->rend / bs;
2005: PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]);
2006: PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz));
2007: for (i = 0; i < m; i++) {
2008: nz = ii[i + 1] - ii[i];
2009: PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz);
2010: /* count the ones on the diagonal and above, split into diagonal and off diagonal portions. */
2011: JJ = jj + ii[i];
2012: bd = 0;
2013: for (j = 0; j < nz; j++) {
2014: if (*JJ >= i + rstart) break;
2015: JJ++;
2016: bd++;
2017: }
2018: d = 0;
2019: for (; j < nz; j++) {
2020: if (*JJ++ >= cend) break;
2021: d++;
2022: }
2023: d_nnz[i] = d;
2024: o_nnz[i] = nz - d - bd;
2025: nz = nz - bd;
2026: nz_max = PetscMax(nz_max, nz);
2027: }
2028: PetscCall(MatMPISBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz));
2029: PetscCall(MatSetOption(B, MAT_IGNORE_LOWER_TRIANGULAR, PETSC_TRUE));
2030: PetscCall(PetscFree2(d_nnz, o_nnz));
2032: values = (PetscScalar *)V;
2033: if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values));
2034: for (i = 0; i < m; i++) {
2035: PetscInt row = i + rstart;
2036: PetscInt ncols = ii[i + 1] - ii[i];
2037: const PetscInt *icols = jj + ii[i];
2038: if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */
2039: const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0);
2040: PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES));
2041: } else { /* block ordering does not match so we can only insert one block at a time. */
2042: PetscInt j;
2043: for (j = 0; j < ncols; j++) {
2044: const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0);
2045: PetscCall(MatSetValuesBlocked_MPISBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES));
2046: }
2047: }
2048: }
2050: if (!V) PetscCall(PetscFree(values));
2051: nooffprocentries = B->nooffprocentries;
2052: B->nooffprocentries = PETSC_TRUE;
2053: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
2054: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
2055: B->nooffprocentries = nooffprocentries;
2057: PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
2058: PetscFunctionReturn(PETSC_SUCCESS);
2059: }
2061: /*MC
2062: MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
2063: based on block compressed sparse row format. Only the upper triangular portion of the "diagonal" portion of
2064: the matrix is stored.
2066: For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
2067: can call `MatSetOption`(`Mat`, `MAT_HERMITIAN`);
2069: Options Database Key:
2070: . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to `MatSetFromOptions()`
2072: Level: beginner
2074: Note:
2075: The number of rows in the matrix must be less than or equal to the number of columns. Similarly the number of rows in the
2076: diagonal portion of the matrix of each process has to less than or equal the number of columns.
2078: .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MATBAIJ`, `MatCreateBAIJ()`, `MATSEQSBAIJ`, `MatType`
2079: M*/
2081: PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
2082: {
2083: Mat_MPISBAIJ *b;
2084: PetscBool flg = PETSC_FALSE;
2086: PetscFunctionBegin;
2087: PetscCall(PetscNew(&b));
2088: B->data = (void *)b;
2089: PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps)));
2091: B->ops->destroy = MatDestroy_MPISBAIJ;
2092: B->ops->view = MatView_MPISBAIJ;
2093: B->assembled = PETSC_FALSE;
2094: B->insertmode = NOT_SET_VALUES;
2096: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank));
2097: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size));
2099: /* build local table of row and column ownerships */
2100: PetscCall(PetscMalloc1(b->size + 2, &b->rangebs));
2102: /* build cache for off array entries formed */
2103: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash));
2105: b->donotstash = PETSC_FALSE;
2106: b->colmap = NULL;
2107: b->garray = NULL;
2108: b->roworiented = PETSC_TRUE;
2110: /* stuff used in block assembly */
2111: b->barray = NULL;
2113: /* stuff used for matrix vector multiply */
2114: b->lvec = NULL;
2115: b->Mvctx = NULL;
2116: b->slvec0 = NULL;
2117: b->slvec0b = NULL;
2118: b->slvec1 = NULL;
2119: b->slvec1a = NULL;
2120: b->slvec1b = NULL;
2121: b->sMvctx = NULL;
2123: /* stuff for MatGetRow() */
2124: b->rowindices = NULL;
2125: b->rowvalues = NULL;
2126: b->getrowactive = PETSC_FALSE;
2128: /* hash table stuff */
2129: b->ht = NULL;
2130: b->hd = NULL;
2131: b->ht_size = 0;
2132: b->ht_flag = PETSC_FALSE;
2133: b->ht_fact = 0;
2134: b->ht_total_ct = 0;
2135: b->ht_insert_ct = 0;
2137: /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */
2138: b->ijonly = PETSC_FALSE;
2140: b->in_loc = NULL;
2141: b->v_loc = NULL;
2142: b->n_loc = 0;
2144: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPISBAIJ));
2145: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPISBAIJ));
2146: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocation_C", MatMPISBAIJSetPreallocation_MPISBAIJ));
2147: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPISBAIJSetPreallocationCSR_C", MatMPISBAIJSetPreallocationCSR_MPISBAIJ));
2148: #if defined(PETSC_HAVE_ELEMENTAL)
2149: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_elemental_C", MatConvert_MPISBAIJ_Elemental));
2150: #endif
2151: #if defined(PETSC_HAVE_SCALAPACK)
2152: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_scalapack_C", MatConvert_SBAIJ_ScaLAPACK));
2153: #endif
2154: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpiaij_C", MatConvert_MPISBAIJ_Basic));
2155: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpisbaij_mpibaij_C", MatConvert_MPISBAIJ_Basic));
2157: B->symmetric = PETSC_BOOL3_TRUE;
2158: B->structurally_symmetric = PETSC_BOOL3_TRUE;
2159: B->symmetry_eternal = PETSC_TRUE;
2160: B->structural_symmetry_eternal = PETSC_TRUE;
2161: #if defined(PETSC_USE_COMPLEX)
2162: B->hermitian = PETSC_BOOL3_FALSE;
2163: #else
2164: B->hermitian = PETSC_BOOL3_TRUE;
2165: #endif
2167: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPISBAIJ));
2168: PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPISBAIJ matrix 1", "Mat");
2169: PetscCall(PetscOptionsBool("-mat_use_hash_table", "Use hash table to save memory in constructing matrix", "MatSetOption", flg, &flg, NULL));
2170: if (flg) {
2171: PetscReal fact = 1.39;
2172: PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE));
2173: PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL));
2174: if (fact <= 1.0) fact = 1.39;
2175: PetscCall(MatMPIBAIJSetHashTableFactor(B, fact));
2176: PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact));
2177: }
2178: PetscOptionsEnd();
2179: PetscFunctionReturn(PETSC_SUCCESS);
2180: }
2182: /*MC
2183: MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
2185: This matrix type is identical to `MATSEQSBAIJ` when constructed with a single process communicator,
2186: and `MATMPISBAIJ` otherwise.
2188: Options Database Key:
2189: . -mat_type sbaij - sets the matrix type to `MATSBAIJ` during a call to `MatSetFromOptions()`
2191: Level: beginner
2193: .seealso: [](ch_matrices), `Mat`, `MATSEQSBAIJ`, `MATMPISBAIJ`, `MatCreateSBAIJ()`, `MATSEQSBAIJ`, `MATMPISBAIJ`
2194: M*/
2196: /*@C
2197: MatMPISBAIJSetPreallocation - For good matrix assembly performance
2198: the user should preallocate the matrix storage by setting the parameters
2199: d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately,
2200: performance can be increased by more than a factor of 50.
2202: Collective
2204: Input Parameters:
2205: + B - the matrix
2206: . bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
2207: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
2208: . d_nz - number of block nonzeros per block row in diagonal portion of local
2209: submatrix (same for all local rows)
2210: . d_nnz - array containing the number of block nonzeros in the various block rows
2211: in the upper triangular and diagonal part of the in diagonal portion of the local
2212: (possibly different for each block row) or `NULL`. If you plan to factor the matrix you must leave room
2213: for the diagonal entry and set a value even if it is zero.
2214: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2215: submatrix (same for all local rows).
2216: - o_nnz - array containing the number of nonzeros in the various block rows of the
2217: off-diagonal portion of the local submatrix that is right of the diagonal
2218: (possibly different for each block row) or `NULL`.
2220: Options Database Keys:
2221: + -mat_no_unroll - uses code that does not unroll the loops in the
2222: block calculations (much slower)
2223: - -mat_block_size - size of the blocks to use
2225: Level: intermediate
2227: Notes:
2229: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2230: than it must be used on all processors that share the object for that argument.
2232: If the *_nnz parameter is given then the *_nz parameter is ignored
2234: Storage Information:
2235: For a square global matrix we define each processor's diagonal portion
2236: to be its local rows and the corresponding columns (a square submatrix);
2237: each processor's off-diagonal portion encompasses the remainder of the
2238: local matrix (a rectangular submatrix).
2240: The user can specify preallocated storage for the diagonal part of
2241: the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2242: `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2243: memory allocation. Likewise, specify preallocated storage for the
2244: off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2246: You can call `MatGetInfo()` to get information on how effective the preallocation was;
2247: for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2248: You can also run with the option `-info` and look for messages with the string
2249: malloc in them to see if additional memory allocation was needed.
2251: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2252: the figure below we depict these three local rows and all columns (0-11).
2254: .vb
2255: 0 1 2 3 4 5 6 7 8 9 10 11
2256: --------------------------
2257: row 3 |. . . d d d o o o o o o
2258: row 4 |. . . d d d o o o o o o
2259: row 5 |. . . d d d o o o o o o
2260: --------------------------
2261: .ve
2263: Thus, any entries in the d locations are stored in the d (diagonal)
2264: submatrix, and any entries in the o locations are stored in the
2265: o (off-diagonal) submatrix. Note that the d matrix is stored in
2266: `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2268: Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2269: plus the diagonal part of the d matrix,
2270: and `o_nz` should indicate the number of block nonzeros per row in the o matrix
2272: In general, for PDE problems in which most nonzeros are near the diagonal,
2273: one expects `d_nz` >> `o_nz`.
2275: .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `PetscSplitOwnership()`
2276: @*/
2277: PetscErrorCode MatMPISBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[])
2278: {
2279: PetscFunctionBegin;
2283: PetscTryMethod(B, "MatMPISBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz));
2284: PetscFunctionReturn(PETSC_SUCCESS);
2285: }
2287: /*@C
2288: MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format, `MATSBAIJ`,
2289: (block compressed row). For good matrix assembly performance
2290: the user should preallocate the matrix storage by setting the parameters
2291: `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`).
2293: Collective
2295: Input Parameters:
2296: + comm - MPI communicator
2297: . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row
2298: blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()`
2299: . m - number of local rows (or `PETSC_DECIDE` to have calculated if `M` is given)
2300: This value should be the same as the local size used in creating the
2301: y vector for the matrix-vector product y = Ax.
2302: . n - number of local columns (or `PETSC_DECIDE` to have calculated if `N` is given)
2303: This value should be the same as the local size used in creating the
2304: x vector for the matrix-vector product y = Ax.
2305: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2306: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2307: . d_nz - number of block nonzeros per block row in diagonal portion of local
2308: submatrix (same for all local rows)
2309: . d_nnz - array containing the number of block nonzeros in the various block rows
2310: in the upper triangular portion of the in diagonal portion of the local
2311: (possibly different for each block block row) or `NULL`.
2312: If you plan to factor the matrix you must leave room for the diagonal entry and
2313: set its value even if it is zero.
2314: . o_nz - number of block nonzeros per block row in the off-diagonal portion of local
2315: submatrix (same for all local rows).
2316: - o_nnz - array containing the number of nonzeros in the various block rows of the
2317: off-diagonal portion of the local submatrix (possibly different for
2318: each block row) or `NULL`.
2320: Output Parameter:
2321: . A - the matrix
2323: Options Database Keys:
2324: + -mat_no_unroll - uses code that does not unroll the loops in the
2325: block calculations (much slower)
2326: . -mat_block_size - size of the blocks to use
2327: - -mat_mpi - use the parallel matrix data structures even on one processor
2328: (defaults to using SeqBAIJ format on one processor)
2330: Level: intermediate
2332: Notes:
2333: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
2334: MatXXXXSetPreallocation() paradigm instead of this routine directly.
2335: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
2337: The number of rows and columns must be divisible by blocksize.
2338: This matrix type does not support complex Hermitian operation.
2340: The user MUST specify either the local or global matrix dimensions
2341: (possibly both).
2343: If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor
2344: than it must be used on all processors that share the object for that argument.
2346: If the *_nnz parameter is given then the *_nz parameter is ignored
2348: Storage Information:
2349: For a square global matrix we define each processor's diagonal portion
2350: to be its local rows and the corresponding columns (a square submatrix);
2351: each processor's off-diagonal portion encompasses the remainder of the
2352: local matrix (a rectangular submatrix).
2354: The user can specify preallocated storage for the diagonal part of
2355: the local submatrix with either `d_nz` or `d_nnz` (not both). Set
2356: `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic
2357: memory allocation. Likewise, specify preallocated storage for the
2358: off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both).
2360: Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2361: the figure below we depict these three local rows and all columns (0-11).
2363: .vb
2364: 0 1 2 3 4 5 6 7 8 9 10 11
2365: --------------------------
2366: row 3 |. . . d d d o o o o o o
2367: row 4 |. . . d d d o o o o o o
2368: row 5 |. . . d d d o o o o o o
2369: --------------------------
2370: .ve
2372: Thus, any entries in the d locations are stored in the d (diagonal)
2373: submatrix, and any entries in the o locations are stored in the
2374: o (off-diagonal) submatrix. Note that the d matrix is stored in
2375: `MATSEQSBAIJ` format and the o submatrix in `MATSEQBAIJ` format.
2377: Now `d_nz` should indicate the number of block nonzeros per row in the upper triangular
2378: plus the diagonal part of the d matrix,
2379: and `o_nz` should indicate the number of block nonzeros per row in the o matrix.
2380: In general, for PDE problems in which most nonzeros are near the diagonal,
2381: one expects `d_nz` >> `o_nz`.
2383: .seealso: [](ch_matrices), `Mat`, `MATSBAIJ`, `MatCreate()`, `MatCreateSeqSBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`
2384: @*/
2386: PetscErrorCode MatCreateSBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A)
2387: {
2388: PetscMPIInt size;
2390: PetscFunctionBegin;
2391: PetscCall(MatCreate(comm, A));
2392: PetscCall(MatSetSizes(*A, m, n, M, N));
2393: PetscCallMPI(MPI_Comm_size(comm, &size));
2394: if (size > 1) {
2395: PetscCall(MatSetType(*A, MATMPISBAIJ));
2396: PetscCall(MatMPISBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz));
2397: } else {
2398: PetscCall(MatSetType(*A, MATSEQSBAIJ));
2399: PetscCall(MatSeqSBAIJSetPreallocation(*A, bs, d_nz, d_nnz));
2400: }
2401: PetscFunctionReturn(PETSC_SUCCESS);
2402: }
2404: static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat)
2405: {
2406: Mat mat;
2407: Mat_MPISBAIJ *a, *oldmat = (Mat_MPISBAIJ *)matin->data;
2408: PetscInt len = 0, nt, bs = matin->rmap->bs, mbs = oldmat->mbs;
2409: PetscScalar *array;
2411: PetscFunctionBegin;
2412: *newmat = NULL;
2414: PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat));
2415: PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N));
2416: PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name));
2417: PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap));
2418: PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap));
2420: mat->factortype = matin->factortype;
2421: mat->preallocated = PETSC_TRUE;
2422: mat->assembled = PETSC_TRUE;
2423: mat->insertmode = NOT_SET_VALUES;
2425: a = (Mat_MPISBAIJ *)mat->data;
2426: a->bs2 = oldmat->bs2;
2427: a->mbs = oldmat->mbs;
2428: a->nbs = oldmat->nbs;
2429: a->Mbs = oldmat->Mbs;
2430: a->Nbs = oldmat->Nbs;
2432: a->size = oldmat->size;
2433: a->rank = oldmat->rank;
2434: a->donotstash = oldmat->donotstash;
2435: a->roworiented = oldmat->roworiented;
2436: a->rowindices = NULL;
2437: a->rowvalues = NULL;
2438: a->getrowactive = PETSC_FALSE;
2439: a->barray = NULL;
2440: a->rstartbs = oldmat->rstartbs;
2441: a->rendbs = oldmat->rendbs;
2442: a->cstartbs = oldmat->cstartbs;
2443: a->cendbs = oldmat->cendbs;
2445: /* hash table stuff */
2446: a->ht = NULL;
2447: a->hd = NULL;
2448: a->ht_size = 0;
2449: a->ht_flag = oldmat->ht_flag;
2450: a->ht_fact = oldmat->ht_fact;
2451: a->ht_total_ct = 0;
2452: a->ht_insert_ct = 0;
2454: PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 2));
2455: if (oldmat->colmap) {
2456: #if defined(PETSC_USE_CTABLE)
2457: PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap));
2458: #else
2459: PetscCall(PetscMalloc1(a->Nbs, &a->colmap));
2460: PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs));
2461: #endif
2462: } else a->colmap = NULL;
2464: if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) {
2465: PetscCall(PetscMalloc1(len, &a->garray));
2466: PetscCall(PetscArraycpy(a->garray, oldmat->garray, len));
2467: } else a->garray = NULL;
2469: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash));
2470: PetscCall(VecDuplicate(oldmat->lvec, &a->lvec));
2471: PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx));
2473: PetscCall(VecDuplicate(oldmat->slvec0, &a->slvec0));
2474: PetscCall(VecDuplicate(oldmat->slvec1, &a->slvec1));
2476: PetscCall(VecGetLocalSize(a->slvec1, &nt));
2477: PetscCall(VecGetArray(a->slvec1, &array));
2478: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, array, &a->slvec1a));
2479: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec1b));
2480: PetscCall(VecRestoreArray(a->slvec1, &array));
2481: PetscCall(VecGetArray(a->slvec0, &array));
2482: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, array + bs * mbs, &a->slvec0b));
2483: PetscCall(VecRestoreArray(a->slvec0, &array));
2485: /* ierr = VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2486: PetscCall(PetscObjectReference((PetscObject)oldmat->sMvctx));
2487: a->sMvctx = oldmat->sMvctx;
2489: PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A));
2490: PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B));
2491: PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist));
2492: *newmat = mat;
2493: PetscFunctionReturn(PETSC_SUCCESS);
2494: }
2496: /* Used for both MPIBAIJ and MPISBAIJ matrices */
2497: #define MatLoad_MPISBAIJ_Binary MatLoad_MPIBAIJ_Binary
2499: PetscErrorCode MatLoad_MPISBAIJ(Mat mat, PetscViewer viewer)
2500: {
2501: PetscBool isbinary;
2503: PetscFunctionBegin;
2504: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary));
2505: PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name);
2506: PetscCall(MatLoad_MPISBAIJ_Binary(mat, viewer));
2507: PetscFunctionReturn(PETSC_SUCCESS);
2508: }
2510: PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A, Vec v, PetscInt idx[])
2511: {
2512: Mat_MPISBAIJ *a = (Mat_MPISBAIJ *)A->data;
2513: Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(a->B)->data;
2514: PetscReal atmp;
2515: PetscReal *work, *svalues, *rvalues;
2516: PetscInt i, bs, mbs, *bi, *bj, brow, j, ncols, krow, kcol, col, row, Mbs, bcol;
2517: PetscMPIInt rank, size;
2518: PetscInt *rowners_bs, dest, count, source;
2519: PetscScalar *va;
2520: MatScalar *ba;
2521: MPI_Status stat;
2523: PetscFunctionBegin;
2524: PetscCheck(!idx, PETSC_COMM_SELF, PETSC_ERR_SUP, "Send email to petsc-maint@mcs.anl.gov");
2525: PetscCall(MatGetRowMaxAbs(a->A, v, NULL));
2526: PetscCall(VecGetArray(v, &va));
2528: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size));
2529: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank));
2531: bs = A->rmap->bs;
2532: mbs = a->mbs;
2533: Mbs = a->Mbs;
2534: ba = b->a;
2535: bi = b->i;
2536: bj = b->j;
2538: /* find ownerships */
2539: rowners_bs = A->rmap->range;
2541: /* each proc creates an array to be distributed */
2542: PetscCall(PetscCalloc1(bs * Mbs, &work));
2544: /* row_max for B */
2545: if (rank != size - 1) {
2546: for (i = 0; i < mbs; i++) {
2547: ncols = bi[1] - bi[0];
2548: bi++;
2549: brow = bs * i;
2550: for (j = 0; j < ncols; j++) {
2551: bcol = bs * (*bj);
2552: for (kcol = 0; kcol < bs; kcol++) {
2553: col = bcol + kcol; /* local col index */
2554: col += rowners_bs[rank + 1]; /* global col index */
2555: for (krow = 0; krow < bs; krow++) {
2556: atmp = PetscAbsScalar(*ba);
2557: ba++;
2558: row = brow + krow; /* local row index */
2559: if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2560: if (work[col] < atmp) work[col] = atmp;
2561: }
2562: }
2563: bj++;
2564: }
2565: }
2567: /* send values to its owners */
2568: for (dest = rank + 1; dest < size; dest++) {
2569: svalues = work + rowners_bs[dest];
2570: count = rowners_bs[dest + 1] - rowners_bs[dest];
2571: PetscCallMPI(MPI_Send(svalues, count, MPIU_REAL, dest, rank, PetscObjectComm((PetscObject)A)));
2572: }
2573: }
2575: /* receive values */
2576: if (rank) {
2577: rvalues = work;
2578: count = rowners_bs[rank + 1] - rowners_bs[rank];
2579: for (source = 0; source < rank; source++) {
2580: PetscCallMPI(MPI_Recv(rvalues, count, MPIU_REAL, MPI_ANY_SOURCE, MPI_ANY_TAG, PetscObjectComm((PetscObject)A), &stat));
2581: /* process values */
2582: for (i = 0; i < count; i++) {
2583: if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2584: }
2585: }
2586: }
2588: PetscCall(VecRestoreArray(v, &va));
2589: PetscCall(PetscFree(work));
2590: PetscFunctionReturn(PETSC_SUCCESS);
2591: }
2593: PetscErrorCode MatSOR_MPISBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx)
2594: {
2595: Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)matin->data;
2596: PetscInt mbs = mat->mbs, bs = matin->rmap->bs;
2597: PetscScalar *x, *ptr, *from;
2598: Vec bb1;
2599: const PetscScalar *b;
2601: PetscFunctionBegin;
2602: PetscCheck(its > 0 && lits > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits);
2603: PetscCheck(bs <= 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "SSOR for block size > 1 is not yet implemented");
2605: if (flag == SOR_APPLY_UPPER) {
2606: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2607: PetscFunctionReturn(PETSC_SUCCESS);
2608: }
2610: if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2611: if (flag & SOR_ZERO_INITIAL_GUESS) {
2612: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, lits, xx));
2613: its--;
2614: }
2616: PetscCall(VecDuplicate(bb, &bb1));
2617: while (its--) {
2618: /* lower triangular part: slvec0b = - B^T*xx */
2619: PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2621: /* copy xx into slvec0a */
2622: PetscCall(VecGetArray(mat->slvec0, &ptr));
2623: PetscCall(VecGetArray(xx, &x));
2624: PetscCall(PetscArraycpy(ptr, x, bs * mbs));
2625: PetscCall(VecRestoreArray(mat->slvec0, &ptr));
2627: PetscCall(VecScale(mat->slvec0, -1.0));
2629: /* copy bb into slvec1a */
2630: PetscCall(VecGetArray(mat->slvec1, &ptr));
2631: PetscCall(VecGetArrayRead(bb, &b));
2632: PetscCall(PetscArraycpy(ptr, b, bs * mbs));
2633: PetscCall(VecRestoreArray(mat->slvec1, &ptr));
2635: /* set slvec1b = 0 */
2636: PetscCall(VecSet(mat->slvec1b, 0.0));
2638: PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2639: PetscCall(VecRestoreArray(xx, &x));
2640: PetscCall(VecRestoreArrayRead(bb, &b));
2641: PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2643: /* upper triangular part: bb1 = bb1 - B*x */
2644: PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, bb1));
2646: /* local diagonal sweep */
2647: PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, lits, xx));
2648: }
2649: PetscCall(VecDestroy(&bb1));
2650: } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2651: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2652: } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2653: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx));
2654: } else if (flag & SOR_EISENSTAT) {
2655: Vec xx1;
2656: PetscBool hasop;
2657: const PetscScalar *diag;
2658: PetscScalar *sl, scale = (omega - 2.0) / omega;
2659: PetscInt i, n;
2661: if (!mat->xx1) {
2662: PetscCall(VecDuplicate(bb, &mat->xx1));
2663: PetscCall(VecDuplicate(bb, &mat->bb1));
2664: }
2665: xx1 = mat->xx1;
2666: bb1 = mat->bb1;
2668: PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx));
2670: if (!mat->diag) {
2671: /* this is wrong for same matrix with new nonzero values */
2672: PetscCall(MatCreateVecs(matin, &mat->diag, NULL));
2673: PetscCall(MatGetDiagonal(matin, mat->diag));
2674: }
2675: PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop));
2677: if (hasop) {
2678: PetscCall(MatMultDiagonalBlock(matin, xx, bb1));
2679: PetscCall(VecAYPX(mat->slvec1a, scale, bb));
2680: } else {
2681: /*
2682: These two lines are replaced by code that may be a bit faster for a good compiler
2683: PetscCall(VecPointwiseMult(mat->slvec1a,mat->diag,xx));
2684: PetscCall(VecAYPX(mat->slvec1a,scale,bb));
2685: */
2686: PetscCall(VecGetArray(mat->slvec1a, &sl));
2687: PetscCall(VecGetArrayRead(mat->diag, &diag));
2688: PetscCall(VecGetArrayRead(bb, &b));
2689: PetscCall(VecGetArray(xx, &x));
2690: PetscCall(VecGetLocalSize(xx, &n));
2691: if (omega == 1.0) {
2692: for (i = 0; i < n; i++) sl[i] = b[i] - diag[i] * x[i];
2693: PetscCall(PetscLogFlops(2.0 * n));
2694: } else {
2695: for (i = 0; i < n; i++) sl[i] = b[i] + scale * diag[i] * x[i];
2696: PetscCall(PetscLogFlops(3.0 * n));
2697: }
2698: PetscCall(VecRestoreArray(mat->slvec1a, &sl));
2699: PetscCall(VecRestoreArrayRead(mat->diag, &diag));
2700: PetscCall(VecRestoreArrayRead(bb, &b));
2701: PetscCall(VecRestoreArray(xx, &x));
2702: }
2704: /* multiply off-diagonal portion of matrix */
2705: PetscCall(VecSet(mat->slvec1b, 0.0));
2706: PetscCall((*mat->B->ops->multtranspose)(mat->B, xx, mat->slvec0b));
2707: PetscCall(VecGetArray(mat->slvec0, &from));
2708: PetscCall(VecGetArray(xx, &x));
2709: PetscCall(PetscArraycpy(from, x, bs * mbs));
2710: PetscCall(VecRestoreArray(mat->slvec0, &from));
2711: PetscCall(VecRestoreArray(xx, &x));
2712: PetscCall(VecScatterBegin(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2713: PetscCall(VecScatterEnd(mat->sMvctx, mat->slvec0, mat->slvec1, ADD_VALUES, SCATTER_FORWARD));
2714: PetscCall((*mat->B->ops->multadd)(mat->B, mat->slvec1b, mat->slvec1a, mat->slvec1a));
2716: /* local sweep */
2717: PetscCall((*mat->A->ops->sor)(mat->A, mat->slvec1a, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1));
2718: PetscCall(VecAXPY(xx, 1.0, xx1));
2719: } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSORType is not supported for SBAIJ matrix format");
2720: PetscFunctionReturn(PETSC_SUCCESS);
2721: }
2723: /*@
2724: MatCreateMPISBAIJWithArrays - creates a `MATMPISBAIJ` matrix using arrays that contain in standard
2725: CSR format the local rows.
2727: Collective
2729: Input Parameters:
2730: + comm - MPI communicator
2731: . bs - the block size, only a block size of 1 is supported
2732: . m - number of local rows (Cannot be `PETSC_DECIDE`)
2733: . n - This value should be the same as the local size used in creating the
2734: x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have
2735: calculated if `N` is given) For square matrices `n` is almost always `m`.
2736: . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given)
2737: . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given)
2738: . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that row block row of the matrix
2739: . j - column indices
2740: - a - matrix values
2742: Output Parameter:
2743: . mat - the matrix
2745: Level: intermediate
2747: Notes:
2748: The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc;
2749: thus you CANNOT change the matrix entries by changing the values of `a` after you have
2750: called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays.
2752: The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array.
2754: .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`,
2755: `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`
2756: @*/
2757: PetscErrorCode MatCreateMPISBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat)
2758: {
2759: PetscFunctionBegin;
2760: PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0");
2761: PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative");
2762: PetscCall(MatCreate(comm, mat));
2763: PetscCall(MatSetSizes(*mat, m, n, M, N));
2764: PetscCall(MatSetType(*mat, MATMPISBAIJ));
2765: PetscCall(MatMPISBAIJSetPreallocationCSR(*mat, bs, i, j, a));
2766: PetscFunctionReturn(PETSC_SUCCESS);
2767: }
2769: /*@C
2770: MatMPISBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATMPISBAIJ` format using the given nonzero structure and (optional) numerical values
2772: Collective
2774: Input Parameters:
2775: + B - the matrix
2776: . bs - the block size
2777: . i - the indices into `j` for the start of each local row (starts with zero)
2778: . j - the column indices for each local row (starts with zero) these must be sorted for each row
2779: - v - optional values in the matrix
2781: Level: advanced
2783: Notes:
2784: Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries
2785: and usually the numerical values as well
2787: Any entries below the diagonal are ignored
2789: .seealso: [](ch_matrices), `Mat`, `MATMPISBAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`
2790: @*/
2791: PetscErrorCode MatMPISBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[])
2792: {
2793: PetscFunctionBegin;
2794: PetscTryMethod(B, "MatMPISBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v));
2795: PetscFunctionReturn(PETSC_SUCCESS);
2796: }
2798: PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPISBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat)
2799: {
2800: PetscInt m, N, i, rstart, nnz, Ii, bs, cbs;
2801: PetscInt *indx;
2802: PetscScalar *values;
2804: PetscFunctionBegin;
2805: PetscCall(MatGetSize(inmat, &m, &N));
2806: if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */
2807: Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ *)inmat->data;
2808: PetscInt *dnz, *onz, mbs, Nbs, nbs;
2809: PetscInt *bindx, rmax = a->rmax, j;
2810: PetscMPIInt rank, size;
2812: PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2813: mbs = m / bs;
2814: Nbs = N / cbs;
2815: if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N));
2816: nbs = n / cbs;
2818: PetscCall(PetscMalloc1(rmax, &bindx));
2819: MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */
2821: PetscCallMPI(MPI_Comm_rank(comm, &rank));
2822: PetscCallMPI(MPI_Comm_rank(comm, &size));
2823: if (rank == size - 1) {
2824: /* Check sum(nbs) = Nbs */
2825: PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs);
2826: }
2828: rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */
2829: PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2830: for (i = 0; i < mbs; i++) {
2831: PetscCall(MatGetRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */
2832: nnz = nnz / bs;
2833: for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs;
2834: PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz));
2835: PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i * bs, &nnz, &indx, NULL));
2836: }
2837: PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2838: PetscCall(PetscFree(bindx));
2840: PetscCall(MatCreate(comm, outmat));
2841: PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE));
2842: PetscCall(MatSetBlockSizes(*outmat, bs, cbs));
2843: PetscCall(MatSetType(*outmat, MATSBAIJ));
2844: PetscCall(MatSeqSBAIJSetPreallocation(*outmat, bs, 0, dnz));
2845: PetscCall(MatMPISBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz));
2846: MatPreallocateEnd(dnz, onz);
2847: }
2849: /* numeric phase */
2850: PetscCall(MatGetBlockSizes(inmat, &bs, &cbs));
2851: PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL));
2853: PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_TRUE));
2854: for (i = 0; i < m; i++) {
2855: PetscCall(MatGetRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2856: Ii = i + rstart;
2857: PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES));
2858: PetscCall(MatRestoreRow_SeqSBAIJ(inmat, i, &nnz, &indx, &values));
2859: }
2860: PetscCall(MatSetOption(inmat, MAT_GETROW_UPPERTRIANGULAR, PETSC_FALSE));
2861: PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY));
2862: PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY));
2863: PetscFunctionReturn(PETSC_SUCCESS);
2864: }