Actual source code: mmbaij.c
2: /*
3: Support for the parallel BAIJ matrix vector multiply
4: */
5: #include <../src/mat/impls/baij/mpi/mpibaij.h>
6: #include <petsc/private/isimpl.h>
8: PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat)
9: {
10: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
11: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data);
12: PetscInt i, j, *aj = B->j, ec = 0, *garray;
13: PetscInt bs = mat->rmap->bs, *stmp;
14: IS from, to;
15: Vec gvec;
16: #if defined(PETSC_USE_CTABLE)
17: PetscHMapI gid1_lid1 = NULL;
18: PetscHashIter tpos;
19: PetscInt gid, lid;
20: #else
21: PetscInt Nbs = baij->Nbs, *indices;
22: #endif
24: PetscFunctionBegin;
25: #if defined(PETSC_USE_CTABLE)
26: /* use a table - Mark Adams */
27: PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1));
28: for (i = 0; i < B->mbs; i++) {
29: for (j = 0; j < B->ilen[i]; j++) {
30: PetscInt data, gid1 = aj[B->i[i] + j] + 1;
31: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data));
32: if (!data) {
33: /* one based table */
34: PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec));
35: }
36: }
37: }
38: /* form array of columns we need */
39: PetscCall(PetscMalloc1(ec, &garray));
40: PetscHashIterBegin(gid1_lid1, tpos);
41: while (!PetscHashIterAtEnd(gid1_lid1, tpos)) {
42: PetscHashIterGetKey(gid1_lid1, tpos, gid);
43: PetscHashIterGetVal(gid1_lid1, tpos, lid);
44: PetscHashIterNext(gid1_lid1, tpos);
45: gid--;
46: lid--;
47: garray[lid] = gid;
48: }
49: PetscCall(PetscSortInt(ec, garray));
50: PetscCall(PetscHMapIClear(gid1_lid1));
51: for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1));
52: /* compact out the extra columns in B */
53: for (i = 0; i < B->mbs; i++) {
54: for (j = 0; j < B->ilen[i]; j++) {
55: PetscInt gid1 = aj[B->i[i] + j] + 1;
56: PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid));
57: lid--;
58: aj[B->i[i] + j] = lid;
59: }
60: }
61: B->nbs = ec;
62: PetscCall(PetscLayoutDestroy(&baij->B->cmap));
63: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
64: PetscCall(PetscHMapIDestroy(&gid1_lid1));
65: #else
66: /* Make an array as long as the number of columns */
67: /* mark those columns that are in baij->B */
68: PetscCall(PetscCalloc1(Nbs, &indices));
69: for (i = 0; i < B->mbs; i++) {
70: for (j = 0; j < B->ilen[i]; j++) {
71: if (!indices[aj[B->i[i] + j]]) ec++;
72: indices[aj[B->i[i] + j]] = 1;
73: }
74: }
76: /* form array of columns we need */
77: PetscCall(PetscMalloc1(ec, &garray));
78: ec = 0;
79: for (i = 0; i < Nbs; i++) {
80: if (indices[i]) garray[ec++] = i;
81: }
83: /* make indices now point into garray */
84: for (i = 0; i < ec; i++) indices[garray[i]] = i;
86: /* compact out the extra columns in B */
87: for (i = 0; i < B->mbs; i++) {
88: for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]];
89: }
90: B->nbs = ec;
91: PetscCall(PetscLayoutDestroy(&baij->B->cmap));
92: PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap));
93: PetscCall(PetscFree(indices));
94: #endif
96: /* create local vector that is used to scatter into */
97: PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec));
99: /* create two temporary index sets for building scatter-gather */
100: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from));
102: PetscCall(PetscMalloc1(ec, &stmp));
103: for (i = 0; i < ec; i++) stmp[i] = i;
104: PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to));
106: /* create temporary global vector to generate scatter context */
107: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec));
109: PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx));
110: PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view"));
112: baij->garray = garray;
114: PetscCall(ISDestroy(&from));
115: PetscCall(ISDestroy(&to));
116: PetscCall(VecDestroy(&gvec));
117: PetscFunctionReturn(PETSC_SUCCESS);
118: }
120: /*
121: Takes the local part of an already assembled MPIBAIJ matrix
122: and disassembles it. This is to allow new nonzeros into the matrix
123: that require more communication in the matrix vector multiply.
124: Thus certain data-structures must be rebuilt.
126: Kind of slow! But that's what application programmers get when
127: they are sloppy.
128: */
129: PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A)
130: {
131: Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data;
132: Mat B = baij->B, Bnew;
133: Mat_SeqBAIJ *Bbaij;
134: PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray;
135: PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n;
136: MatScalar *a, *atmp;
138: PetscFunctionBegin;
139: /* free stuff related to matrix-vec multiply */
140: PetscCall(VecDestroy(&baij->lvec));
141: PetscCall(VecScatterDestroy(&baij->Mvctx));
142: if (baij->colmap) {
143: #if defined(PETSC_USE_CTABLE)
144: PetscCall(PetscHMapIDestroy(&baij->colmap));
145: #else
146: PetscCall(PetscFree(baij->colmap));
147: #endif
148: }
150: /* make sure that B is assembled so we can access its values */
151: if (B) {
152: PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY));
153: PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY));
154: Bbaij = (Mat_SeqBAIJ *)B->data;
155: mbs = Bbaij->mbs;
156: a = Bbaij->a;
158: /* invent new B and copy stuff over */
159: PetscCall(PetscMalloc1(mbs, &nz));
160: for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i];
161: PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew));
162: PetscCall(MatSetSizes(Bnew, m, n, m, n));
163: PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name));
164: PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz));
165: /*
166: Ensure that B's nonzerostate is monotonically increasing.
167: Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call?
168: */
169: Bnew->nonzerostate = B->nonzerostate;
170: if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */
171: ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew;
172: }
174: PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE));
175: for (i = 0; i < mbs; i++) {
176: for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) {
177: col = garray[Bbaij->j[j]];
178: atmp = a + j * bs2;
179: PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode));
180: }
181: }
182: PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE));
184: PetscCall(PetscFree(nz));
185: PetscCall(MatDestroy(&B));
187: baij->B = Bnew;
188: }
189: PetscCall(PetscFree(baij->garray));
190: A->was_assembled = PETSC_FALSE;
191: A->assembled = PETSC_FALSE;
192: PetscFunctionReturn(PETSC_SUCCESS);
193: }
195: /* ugly stuff added for Glenn someday we should fix this up */
197: static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */
198: static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */
200: PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale)
201: {
202: Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */
203: Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data;
204: PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices;
205: PetscInt *r_rmapd, *r_rmapo;
207: PetscFunctionBegin;
208: PetscCall(MatGetOwnershipRange(inA, &cstart, &cend));
209: PetscCall(MatGetSize(ina->A, NULL, &n));
210: PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd));
211: nt = 0;
212: for (i = 0; i < inA->rmap->mapping->n; i++) {
213: if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) {
214: nt++;
215: r_rmapd[i] = inA->rmap->mapping->indices[i] + 1;
216: }
217: }
218: PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n);
219: PetscCall(PetscMalloc1(n + 1, &uglyrmapd));
220: for (i = 0; i < inA->rmap->mapping->n; i++) {
221: if (r_rmapd[i]) {
222: for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j;
223: }
224: }
225: PetscCall(PetscFree(r_rmapd));
226: PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd));
228: PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices));
229: for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1;
230: no = inA->rmap->mapping->n - nt;
231: PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo));
232: nt = 0;
233: for (i = 0; i < inA->rmap->mapping->n; i++) {
234: if (lindices[inA->rmap->mapping->indices[i]]) {
235: nt++;
236: r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]];
237: }
238: }
239: PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n);
240: PetscCall(PetscFree(lindices));
241: PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo));
242: for (i = 0; i < inA->rmap->mapping->n; i++) {
243: if (r_rmapo[i]) {
244: for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j;
245: }
246: }
247: PetscCall(PetscFree(r_rmapo));
248: PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo));
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale)
253: {
254: /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */
256: PetscFunctionBegin;
257: PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale));
258: PetscFunctionReturn(PETSC_SUCCESS);
259: }
261: PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale)
262: {
263: Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */
264: PetscInt n, i;
265: PetscScalar *d, *o;
266: const PetscScalar *s;
268: PetscFunctionBegin;
269: if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale));
271: PetscCall(VecGetArrayRead(scale, &s));
273: PetscCall(VecGetLocalSize(uglydd, &n));
274: PetscCall(VecGetArray(uglydd, &d));
275: for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ }
276: PetscCall(VecRestoreArray(uglydd, &d));
277: /* column scale "diagonal" portion of local matrix */
278: PetscCall(MatDiagonalScale(a->A, NULL, uglydd));
280: PetscCall(VecGetLocalSize(uglyoo, &n));
281: PetscCall(VecGetArray(uglyoo, &o));
282: for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ }
283: PetscCall(VecRestoreArrayRead(scale, &s));
284: PetscCall(VecRestoreArray(uglyoo, &o));
285: /* column scale "off-diagonal" portion of local matrix */
286: PetscCall(MatDiagonalScale(a->B, NULL, uglyoo));
287: PetscFunctionReturn(PETSC_SUCCESS);
288: }