Actual source code: mmsbaij.c
2: /*
3: Support for the parallel SBAIJ matrix vector multiply
4: */
5: #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
7: PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat)
8: {
9: Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data;
10: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(sbaij->B->data);
11: PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray;
12: PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt;
13: IS from, to;
14: Vec gvec;
15: PetscMPIInt rank = sbaij->rank, lsize;
16: PetscInt *owners = sbaij->rangebs, *ec_owner, k;
17: const PetscInt *sowners;
18: PetscScalar *ptr;
19: #if defined(PETSC_USE_CTABLE)
20: PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */
21: PetscHashIter tpos;
22: PetscInt gid, lid;
23: #else
24: PetscInt Nbs = sbaij->Nbs;
25: PetscInt *indices;
26: #endif
28: PetscFunctionBegin;
29: #if defined(PETSC_USE_CTABLE)
30: PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1));
31: for (i = 0; i < B->mbs; i++) {
32: for (j = 0; j < B->ilen[i]; j++) {
33: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
34: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
35: if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
36: }
37: }
38: /* form array of columns we need */
39: PetscCall(PetscMalloc1(ec, &garray));
40: PetscHashIterBegin(gid1_lid1, tpos);
41: while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42: PetscHashIterGetKey(gid1_lid1, tpos, gid);
43: PetscHashIterGetVal(gid1_lid1, tpos, lid);
44: PetscHashIterNext(gid1_lid1, tpos);
45: gid--;
46: lid--;
47: garray[lid] = gid;
48: }
49: PetscCall(PetscSortInt(ec, garray));
50: PetscCall(PetscHMapIClear(gid1_lid1));
51: for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
52: /* compact out the extra columns in B */
53: for (i = 0; i < B->mbs; i++) {
54: for (j = 0; j < B->ilen[i]; j++) {
55: PetscInt gid1 = aj[B->i[i] + j] + 1;
56: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57: lid--;
58: aj[B->i[i] + j] = lid;
59: }
60: }
61: PetscCall(PetscHMapIDestroy(&gid1_lid1));
62: PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
63: for (i = j = 0; i < ec; i++) {
64: while (garray[i] >= owners[j + 1]) j++;
65: ec_owner[i] = j;
66: }
67: #else
68: /* For the first stab we make an array as long as the number of columns */
69: /* mark those columns that are in sbaij->B */
70: PetscCall(PetscCalloc1(Nbs, &indices));
71: for (i = 0; i < mbs; i++) {
72: for (j = 0; j < B->ilen[i]; j++) {
73: if (!indices[aj[B->i[i] + j]]) ec++;
74: indices[aj[B->i[i] + j]] = 1;
75: }
76: }
78: /* form arrays of columns we need */
79: PetscCall(PetscMalloc1(ec, &garray));
80: PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner));
82: ec = 0;
83: for (j = 0; j < sbaij->size; j++) {
84: for (i = owners[j]; i < owners[j + 1]; i++) {
85: if (indices[i]) {
86: garray[ec] = i;
87: ec_owner[ec] = j;
88: ec++;
89: }
90: }
91: }
93: /* make indices now point into garray */
94: for (i = 0; i < ec; i++) indices[garray[i]] = i;
96: /* compact out the extra columns in B */
97: for (i = 0; i < mbs; i++) {
98: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
99: }
100: PetscCall(PetscFree(indices));
101: #endif
102: B->nbs = ec;
103: PetscCall(PetscLayoutDestroy(&sbaij->B->cmap));
104: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap));
106: PetscCall(VecScatterDestroy(&sbaij->sMvctx));
107: /* create local vector that is used to scatter into */
108: PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec));
110: /* create two temporary index sets for building scatter-gather */
111: PetscCall(PetscMalloc1(2 * ec, &stmp));
112: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
113: for (i = 0; i < ec; i++) stmp[i] = i;
114: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to));
116: /* generate the scatter context
117: -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */
118: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
119: PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx));
120: PetscCall(VecDestroy(&gvec));
122: sbaij->garray = garray;
124: PetscCall(ISDestroy(&from));
125: PetscCall(ISDestroy(&to));
127: /* create parallel vector that is used by SBAIJ matrix to scatter from/into */
128: lsize = (mbs + ec) * bs;
129: PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0));
130: PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1));
131: PetscCall(VecGetSize(sbaij->slvec0, &vec_size));
133: PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners));
135: /* x index in the IS sfrom */
136: for (i = 0; i < ec; i++) {
137: j = ec_owner[i];
138: sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]);
139: }
140: /* b index in the IS sfrom */
141: k = sowners[rank] / bs + mbs;
142: for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j;
143: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from));
145: /* x index in the IS sto */
146: k = sowners[rank] / bs + mbs;
147: for (i = 0; i < ec; i++) stmp[i] = (k + i);
148: /* b index in the IS sto */
149: for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec];
151: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to));
153: PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx));
154: PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
156: PetscCall(VecGetLocalSize(sbaij->slvec1, &nt));
157: PetscCall(VecGetArray(sbaij->slvec1, &ptr));
158: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a));
159: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec1b));
160: PetscCall(VecRestoreArray(sbaij->slvec1, &ptr));
162: PetscCall(VecGetArray(sbaij->slvec0, &ptr));
163: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, ptr + bs * mbs, &sbaij->slvec0b));
164: PetscCall(VecRestoreArray(sbaij->slvec0, &ptr));
166: PetscCall(PetscFree(stmp));
168: PetscCall(ISDestroy(&from));
169: PetscCall(ISDestroy(&to));
170: PetscCall(PetscFree2(sgarray, ec_owner));
171: PetscFunctionReturn(PETSC_SUCCESS);
172: }
174: /*
175: Takes the local part of an already assembled MPISBAIJ matrix
176: and disassembles it. This is to allow new nonzeros into the matrix
177: that require more communication in the matrix vector multiply.
178: Thus certain data-structures must be rebuilt.
180: Kind of slow! But that's what application programmers get when
181: they are sloppy.
182: */
183: PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A)
184: {
185: Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data;
186: Mat B = baij->B, Bnew;
187: Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data;
188: PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray;
189: PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n;
190: MatScalar *a = Bbaij->a;
191: PetscScalar *atmp;
192: #if defined(PETSC_USE_REAL_MAT_SINGLE)
193: PetscInt l;
194: #endif
196: PetscFunctionBegin;
197: #if defined(PETSC_USE_REAL_MAT_SINGLE)
198: PetscCall(PetscMalloc1(A->rmap->bs, &atmp));
199: #endif
200: /* free stuff related to matrix-vec multiply */
201: PetscCall(VecDestroy(&baij->lvec));
202: PetscCall(VecScatterDestroy(&baij->Mvctx));
204: PetscCall(VecDestroy(&baij->slvec0));
205: PetscCall(VecDestroy(&baij->slvec0b));
206: PetscCall(VecDestroy(&baij->slvec1));
207: PetscCall(VecDestroy(&baij->slvec1a));
208: PetscCall(VecDestroy(&baij->slvec1b));
210: if (baij->colmap) {
211: #if defined(PETSC_USE_CTABLE)
212: PetscCall(PetscHMapIDestroy(&baij->colmap));
213: #else
214: PetscCall(PetscFree(baij->colmap));
215: #endif
216: }
218: /* make sure that B is assembled so we can access its values */
219: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
220: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
222: /* invent new B and copy stuff over */
223: PetscCall(PetscMalloc1(mbs, &nz));
224: for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
225: PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew));
226: PetscCall(MatSetSizes(Bnew, m, n, m, n));
227: PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
228: PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
229: PetscCall(PetscFree(nz));
231: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
232: ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
233: }
235: /*
236: Ensure that B's nonzerostate is monotonically increasing.
237: Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call?
238: */
239: Bnew->nonzerostate = B->nonzerostate;
240: PetscCall(PetscMalloc1(bs, &rvals));
241: for (i = 0; i < mbs; i++) {
242: rvals[0] = bs * i;
243: for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1;
244: for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
245: col = garray[Bbaij->j[j]] * bs;
246: for (k = 0; k < bs; k++) {
247: #if defined(PETSC_USE_REAL_MAT_SINGLE)
248: for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l];
249: #else
250: atmp = a + j * bs2 + k * bs;
251: #endif
252: PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode));
253: col++;
254: }
255: }
256: }
257: #if defined(PETSC_USE_REAL_MAT_SINGLE)
258: PetscCall(PetscFree(atmp));
259: #endif
260: PetscCall(PetscFree(baij->garray));
262: baij->garray = NULL;
264: PetscCall(PetscFree(rvals));
265: PetscCall(MatDestroy(&B));
267: baij->B = Bnew;
269: A->was_assembled = PETSC_FALSE;
270: PetscFunctionReturn(PETSC_SUCCESS);
271: }