Actual source code: fdmpiaij.c

  1: #include <../src/mat/impls/sell/mpi/mpisell.h>
  2: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  3: #include <../src/mat/impls/baij/mpi/mpibaij.h>
  4: #include <petsc/private/isimpl.h>

  6: static PetscErrorCode MatFDColoringMarkHost_AIJ(Mat J)
  7: {
  8:   PetscBool    isseqAIJ, ismpiAIJ;
  9:   PetscScalar *v;

 11:   PetscFunctionBegin;
 12:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATMPIAIJ, &ismpiAIJ));
 13:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)J, MATSEQAIJ, &isseqAIJ));
 14:   if (isseqAIJ) {
 15:     PetscCall(MatSeqAIJGetArrayWrite(J, &v));
 16:     PetscCall(MatSeqAIJRestoreArrayWrite(J, &v));
 17:   } else if (ismpiAIJ) {
 18:     Mat dJ, oJ;

 20:     PetscCall(MatMPIAIJGetSeqAIJ(J, &dJ, &oJ, NULL));
 21:     PetscCall(MatSeqAIJGetArrayWrite(dJ, &v));
 22:     PetscCall(MatSeqAIJRestoreArrayWrite(dJ, &v));
 23:     PetscCall(MatSeqAIJGetArrayWrite(oJ, &v));
 24:     PetscCall(MatSeqAIJRestoreArrayWrite(oJ, &v));
 25:   }
 26:   PetscFunctionReturn(PETSC_SUCCESS);
 27: }

 29: PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
 30: {
 31:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
 32:   PetscInt           k, cstart, cend, l, row, col, nz, spidx, i, j;
 33:   PetscScalar        dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy;
 34:   PetscScalar       *vscale_array;
 35:   const PetscScalar *xx;
 36:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
 37:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
 38:   void              *fctx  = coloring->fctx;
 39:   PetscInt           ctype = coloring->ctype, nxloc, nrows_k;
 40:   PetscScalar       *valaddr;
 41:   MatEntry          *Jentry  = coloring->matentry;
 42:   MatEntry2         *Jentry2 = coloring->matentry2;
 43:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
 44:   PetscInt           bs = J->rmap->bs;

 46:   PetscFunctionBegin;
 47:   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
 48:   /* (1) Set w1 = F(x1) */
 49:   if (!coloring->fset) {
 50:     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0));
 51:     PetscCall((*f)(sctx, x1, w1, fctx));
 52:     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0));
 53:   } else {
 54:     coloring->fset = PETSC_FALSE;
 55:   }

 57:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
 58:   PetscCall(VecGetLocalSize(x1, &nxloc));
 59:   if (coloring->htype[0] == 'w') {
 60:     /* vscale = dx is a constant scalar */
 61:     PetscCall(VecNorm(x1, NORM_2, &unorm));
 62:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
 63:   } else {
 64:     PetscCall(VecGetArrayRead(x1, &xx));
 65:     PetscCall(VecGetArray(vscale, &vscale_array));
 66:     for (col = 0; col < nxloc; col++) {
 67:       dx = xx[col];
 68:       if (PetscAbsScalar(dx) < umin) {
 69:         if (PetscRealPart(dx) >= 0.0) dx = umin;
 70:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
 71:       }
 72:       dx *= epsilon;
 73:       vscale_array[col] = 1.0 / dx;
 74:     }
 75:     PetscCall(VecRestoreArrayRead(x1, &xx));
 76:     PetscCall(VecRestoreArray(vscale, &vscale_array));
 77:   }
 78:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
 79:     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
 80:     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
 81:   }

 83:   /* (3) Loop over each color */
 84:   if (!coloring->w3) {
 85:     PetscCall(VecDuplicate(x1, &coloring->w3));
 86:     /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */
 87:     PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE));
 88:   }
 89:   w3 = coloring->w3;

 91:   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
 92:   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
 93:   nz = 0;
 94:   for (k = 0; k < ncolors; k++) {
 95:     coloring->currentcolor = k;

 97:     /*
 98:       (3-1) Loop over each column associated with color
 99:       adding the perturbation to the vector w3 = x1 + dx.
100:     */
101:     PetscCall(VecCopy(x1, w3));
102:     dy_i = dy;
103:     for (i = 0; i < bs; i++) { /* Loop over a block of columns */
104:       PetscCall(VecGetArray(w3, &w3_array));
105:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
106:       if (coloring->htype[0] == 'w') {
107:         for (l = 0; l < ncolumns[k]; l++) {
108:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
109:           w3_array[col] += 1.0 / dx;
110:           if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */
111:         }
112:       } else {                  /* htype == 'ds' */
113:         vscale_array -= cstart; /* shift pointer so global index can be used */
114:         for (l = 0; l < ncolumns[k]; l++) {
115:           col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
116:           w3_array[col] += 1.0 / vscale_array[col];
117:           if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */
118:         }
119:         vscale_array += cstart;
120:       }
121:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
122:       PetscCall(VecRestoreArray(w3, &w3_array));

124:       /*
125:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
126:                            w2 = F(x1 + dx) - F(x1)
127:        */
128:       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
129:       PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */
130:       PetscCall((*f)(sctx, w3, w2, fctx));
131:       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
132:       PetscCall(VecAXPY(w2, -1.0, w1));
133:       PetscCall(VecResetArray(w2));
134:       dy_i += nxloc; /* points to dy+i*nxloc */
135:     }

137:     /*
138:      (3-3) Loop over rows of vector, putting results into Jacobian matrix
139:     */
140:     nrows_k = nrows[k];
141:     if (coloring->htype[0] == 'w') {
142:       for (l = 0; l < nrows_k; l++) {
143:         row     = bs * Jentry2[nz].row; /* local row index */
144:         valaddr = Jentry2[nz++].valaddr;
145:         spidx   = 0;
146:         dy_i    = dy;
147:         for (i = 0; i < bs; i++) {   /* column of the block */
148:           for (j = 0; j < bs; j++) { /* row of the block */
149:             valaddr[spidx++] = dy_i[row + j] * dx;
150:           }
151:           dy_i += nxloc; /* points to dy+i*nxloc */
152:         }
153:       }
154:     } else { /* htype == 'ds' */
155:       for (l = 0; l < nrows_k; l++) {
156:         row     = bs * Jentry[nz].row; /* local row index */
157:         col     = bs * Jentry[nz].col; /* local column index */
158:         valaddr = Jentry[nz++].valaddr;
159:         spidx   = 0;
160:         dy_i    = dy;
161:         for (i = 0; i < bs; i++) {   /* column of the block */
162:           for (j = 0; j < bs; j++) { /* row of the block */
163:             valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i];
164:           }
165:           dy_i += nxloc; /* points to dy+i*nxloc */
166:         }
167:       }
168:     }
169:   }
170:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
171:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
172:   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));

174:   coloring->currentcolor = -1;
175:   PetscCall(VecBindToCPU(x1, PETSC_FALSE));
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */
180: PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx)
181: {
182:   PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f;
183:   PetscInt           k, cstart, cend, l, row, col, nz;
184:   PetscScalar        dx = 0.0, *y, *w3_array;
185:   const PetscScalar *xx;
186:   PetscScalar       *vscale_array;
187:   PetscReal          epsilon = coloring->error_rel, umin = coloring->umin, unorm;
188:   Vec                w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale;
189:   void              *fctx  = coloring->fctx;
190:   ISColoringType     ctype = coloring->ctype;
191:   PetscInt           nxloc, nrows_k;
192:   MatEntry          *Jentry  = coloring->matentry;
193:   MatEntry2         *Jentry2 = coloring->matentry2;
194:   const PetscInt     ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows;
195:   PetscBool          alreadyboundtocpu;

197:   PetscFunctionBegin;
198:   PetscCall(MatFDColoringMarkHost_AIJ(J));
199:   PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu));
200:   PetscCall(VecBindToCPU(x1, PETSC_TRUE));
201:   PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL");
202:   /* (1) Set w1 = F(x1) */
203:   if (!coloring->fset) {
204:     PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
205:     PetscCall((*f)(sctx, x1, w1, fctx));
206:     PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
207:   } else {
208:     coloring->fset = PETSC_FALSE;
209:   }

211:   /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */
212:   if (coloring->htype[0] == 'w') {
213:     /* vscale = 1./dx is a constant scalar */
214:     PetscCall(VecNorm(x1, NORM_2, &unorm));
215:     dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon);
216:   } else {
217:     PetscCall(VecGetLocalSize(x1, &nxloc));
218:     PetscCall(VecGetArrayRead(x1, &xx));
219:     PetscCall(VecGetArray(vscale, &vscale_array));
220:     for (col = 0; col < nxloc; col++) {
221:       dx = xx[col];
222:       if (PetscAbsScalar(dx) < umin) {
223:         if (PetscRealPart(dx) >= 0.0) dx = umin;
224:         else if (PetscRealPart(dx) < 0.0) dx = -umin;
225:       }
226:       dx *= epsilon;
227:       vscale_array[col] = 1.0 / dx;
228:     }
229:     PetscCall(VecRestoreArrayRead(x1, &xx));
230:     PetscCall(VecRestoreArray(vscale, &vscale_array));
231:   }
232:   if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') {
233:     PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD));
234:     PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD));
235:   }

237:   /* (3) Loop over each color */
238:   if (!coloring->w3) PetscCall(VecDuplicate(x1, &coloring->w3));
239:   w3 = coloring->w3;

241:   PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */
242:   if (vscale) PetscCall(VecGetArray(vscale, &vscale_array));
243:   nz = 0;

245:   if (coloring->bcols > 1) { /* use blocked insertion of Jentry */
246:     PetscInt     i, m = J->rmap->n, nbcols, bcols = coloring->bcols;
247:     PetscScalar *dy = coloring->dy, *dy_k;

249:     nbcols = 0;
250:     for (k = 0; k < ncolors; k += bcols) {
251:       /*
252:        (3-1) Loop over each column associated with color
253:        adding the perturbation to the vector w3 = x1 + dx.
254:        */

256:       dy_k = dy;
257:       if (k + bcols > ncolors) bcols = ncolors - k;
258:       for (i = 0; i < bcols; i++) {
259:         coloring->currentcolor = k + i;

261:         PetscCall(VecCopy(x1, w3));
262:         PetscCall(VecGetArray(w3, &w3_array));
263:         if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
264:         if (coloring->htype[0] == 'w') {
265:           for (l = 0; l < ncolumns[k + i]; l++) {
266:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
267:             w3_array[col] += 1.0 / dx;
268:           }
269:         } else {                  /* htype == 'ds' */
270:           vscale_array -= cstart; /* shift pointer so global index can be used */
271:           for (l = 0; l < ncolumns[k + i]; l++) {
272:             col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */
273:             w3_array[col] += 1.0 / vscale_array[col];
274:           }
275:           vscale_array += cstart;
276:         }
277:         if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
278:         PetscCall(VecRestoreArray(w3, &w3_array));

280:         /*
281:          (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
282:                            w2 = F(x1 + dx) - F(x1)
283:          */
284:         PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
285:         PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */
286:         PetscCall((*f)(sctx, w3, w2, fctx));
287:         PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
288:         PetscCall(VecAXPY(w2, -1.0, w1));
289:         PetscCall(VecResetArray(w2));
290:         dy_k += m; /* points to dy+i*nxloc */
291:       }

293:       /*
294:        (3-3) Loop over block rows of vector, putting results into Jacobian matrix
295:        */
296:       nrows_k = nrows[nbcols++];

298:       if (coloring->htype[0] == 'w') {
299:         for (l = 0; l < nrows_k; l++) {
300:           row = Jentry2[nz].row; /* local row index */
301:                                  /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in
302:              another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html
303:            */
304: #if defined(PETSC_USE_COMPLEX)
305:           PetscScalar *tmp = Jentry2[nz].valaddr;
306:           *tmp             = dy[row] * dx;
307: #else
308:           *(Jentry2[nz].valaddr) = dy[row] * dx;
309: #endif
310:           nz++;
311:         }
312:       } else { /* htype == 'ds' */
313:         for (l = 0; l < nrows_k; l++) {
314:           row = Jentry[nz].row; /* local row index */
315: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
316:           PetscScalar *tmp = Jentry[nz].valaddr;
317:           *tmp             = dy[row] * vscale_array[Jentry[nz].col];
318: #else
319:           *(Jentry[nz].valaddr)  = dy[row] * vscale_array[Jentry[nz].col];
320: #endif
321:           nz++;
322:         }
323:       }
324:     }
325:   } else { /* bcols == 1 */
326:     for (k = 0; k < ncolors; k++) {
327:       coloring->currentcolor = k;

329:       /*
330:        (3-1) Loop over each column associated with color
331:        adding the perturbation to the vector w3 = x1 + dx.
332:        */
333:       PetscCall(VecCopy(x1, w3));
334:       PetscCall(VecGetArray(w3, &w3_array));
335:       if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */
336:       if (coloring->htype[0] == 'w') {
337:         for (l = 0; l < ncolumns[k]; l++) {
338:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
339:           w3_array[col] += 1.0 / dx;
340:         }
341:       } else {                  /* htype == 'ds' */
342:         vscale_array -= cstart; /* shift pointer so global index can be used */
343:         for (l = 0; l < ncolumns[k]; l++) {
344:           col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */
345:           w3_array[col] += 1.0 / vscale_array[col];
346:         }
347:         vscale_array += cstart;
348:       }
349:       if (ctype == IS_COLORING_GLOBAL) w3_array += cstart;
350:       PetscCall(VecRestoreArray(w3, &w3_array));

352:       /*
353:        (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
354:                            w2 = F(x1 + dx) - F(x1)
355:        */
356:       PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0));
357:       PetscCall((*f)(sctx, w3, w2, fctx));
358:       PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0));
359:       PetscCall(VecAXPY(w2, -1.0, w1));

361:       /*
362:        (3-3) Loop over rows of vector, putting results into Jacobian matrix
363:        */
364:       nrows_k = nrows[k];
365:       PetscCall(VecGetArray(w2, &y));
366:       if (coloring->htype[0] == 'w') {
367:         for (l = 0; l < nrows_k; l++) {
368:           row = Jentry2[nz].row; /* local row index */
369: #if defined(PETSC_USE_COMPLEX)   /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
370:           PetscScalar *tmp = Jentry2[nz].valaddr;
371:           *tmp             = y[row] * dx;
372: #else
373:           *(Jentry2[nz].valaddr) = y[row] * dx;
374: #endif
375:           nz++;
376:         }
377:       } else { /* htype == 'ds' */
378:         for (l = 0; l < nrows_k; l++) {
379:           row = Jentry[nz].row; /* local row index */
380: #if defined(PETSC_USE_COMPLEX)  /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */
381:           PetscScalar *tmp = Jentry[nz].valaddr;
382:           *tmp             = y[row] * vscale_array[Jentry[nz].col];
383: #else
384:           *(Jentry[nz].valaddr)  = y[row] * vscale_array[Jentry[nz].col];
385: #endif
386:           nz++;
387:         }
388:       }
389:       PetscCall(VecRestoreArray(w2, &y));
390:     }
391:   }

393:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
394:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
395:   if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array));
396:   coloring->currentcolor = -1;
397:   if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE));
398:   PetscFunctionReturn(PETSC_SUCCESS);
399: }

401: PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
402: {
403:   PetscMPIInt            size, *ncolsonproc, *disp, nn;
404:   PetscInt               i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb;
405:   const PetscInt        *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL;
406:   PetscInt               nis = iscoloring->n, nctot, *cols, tmp = 0;
407:   ISLocalToGlobalMapping map   = mat->cmap->mapping;
408:   PetscInt               ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx;
409:   Mat                    A, B;
410:   PetscScalar           *A_val, *B_val, **valaddrhit;
411:   MatEntry              *Jentry;
412:   MatEntry2             *Jentry2;
413:   PetscBool              isBAIJ, isSELL;
414:   PetscInt               bcols = c->bcols;
415: #if defined(PETSC_USE_CTABLE)
416:   PetscHMapI colmap = NULL;
417: #else
418:   PetscInt *colmap = NULL;      /* local col number of off-diag col */
419: #endif

421:   PetscFunctionBegin;
422:   if (ctype == IS_COLORING_LOCAL) {
423:     PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping");
424:     PetscCall(ISLocalToGlobalMappingGetIndices(map, &ltog));
425:   }

427:   PetscCall(MatGetBlockSize(mat, &bs));
428:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
429:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
430:   if (isBAIJ) {
431:     Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data;
432:     Mat_SeqBAIJ *spA, *spB;
433:     A     = baij->A;
434:     spA   = (Mat_SeqBAIJ *)A->data;
435:     A_val = spA->a;
436:     B     = baij->B;
437:     spB   = (Mat_SeqBAIJ *)B->data;
438:     B_val = spB->a;
439:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
440:     if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat));
441:     colmap = baij->colmap;
442:     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
443:     PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

445:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
446:       PetscInt *garray;
447:       PetscCall(PetscMalloc1(B->cmap->n, &garray));
448:       for (i = 0; i < baij->B->cmap->n / bs; i++) {
449:         for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j;
450:       }
451:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale));
452:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
453:       PetscCall(PetscFree(garray));
454:     }
455:   } else if (isSELL) {
456:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
457:     Mat_SeqSELL *spA, *spB;
458:     A     = sell->A;
459:     spA   = (Mat_SeqSELL *)A->data;
460:     A_val = spA->val;
461:     B     = sell->B;
462:     spB   = (Mat_SeqSELL *)B->data;
463:     B_val = spB->val;
464:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
465:     if (!sell->colmap) {
466:       /* Allow access to data structures of local part of matrix
467:        - creates aij->colmap which maps global column number to local number in part B */
468:       PetscCall(MatCreateColmap_MPISELL_Private(mat));
469:     }
470:     colmap = sell->colmap;
471:     PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
472:     PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

474:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

476:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
477:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale));
478:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
479:     }
480:   } else {
481:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
482:     Mat_SeqAIJ *spA, *spB;
483:     A     = aij->A;
484:     spA   = (Mat_SeqAIJ *)A->data;
485:     A_val = spA->a;
486:     B     = aij->B;
487:     spB   = (Mat_SeqAIJ *)B->data;
488:     B_val = spB->a;
489:     nz    = spA->nz + spB->nz; /* total nonzero entries of mat */
490:     if (!aij->colmap) {
491:       /* Allow access to data structures of local part of matrix
492:        - creates aij->colmap which maps global column number to local number in part B */
493:       PetscCall(MatCreateColmap_MPIAIJ_Private(mat));
494:     }
495:     colmap = aij->colmap;
496:     PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
497:     PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));

499:     bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */

501:     if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */
502:       PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale));
503:       PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE));
504:     }
505:   }

507:   m      = mat->rmap->n / bs;
508:   cstart = mat->cmap->rstart / bs;
509:   cend   = mat->cmap->rend / bs;

511:   PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns));
512:   PetscCall(PetscMalloc1(nis, &c->nrows));

514:   if (c->htype[0] == 'd') {
515:     PetscCall(PetscMalloc1(nz, &Jentry));
516:     c->matentry = Jentry;
517:   } else if (c->htype[0] == 'w') {
518:     PetscCall(PetscMalloc1(nz, &Jentry2));
519:     c->matentry2 = Jentry2;
520:   } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported");

522:   PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit));
523:   nz = 0;
524:   PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa));

526:   if (ctype == IS_COLORING_GLOBAL) {
527:     PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
528:     PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp));
529:   }

531:   for (i = 0; i < nis; i++) { /* for each local color */
532:     PetscCall(ISGetLocalSize(c->isa[i], &n));
533:     PetscCall(ISGetIndices(c->isa[i], &is));

535:     c->ncolumns[i] = n; /* local number of columns of this color on this process */
536:     c->columns[i]  = (PetscInt *)is;

538:     if (ctype == IS_COLORING_GLOBAL) {
539:       /* Determine nctot, the total (parallel) number of columns of this color */
540:       /* ncolsonproc[j]: local ncolumns on proc[j] of this color */
541:       PetscCall(PetscMPIIntCast(n, &nn));
542:       PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat)));
543:       nctot = 0;
544:       for (j = 0; j < size; j++) nctot += ncolsonproc[j];
545:       if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n"));

547:       disp[0] = 0;
548:       for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1];

550:       /* Get cols, the complete list of columns for this color on each process */
551:       PetscCall(PetscMalloc1(nctot + 1, &cols));
552:       PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat)));
553:     } else if (ctype == IS_COLORING_LOCAL) {
554:       /* Determine local number of columns of this color on this process, including ghost points */
555:       nctot = n;
556:       cols  = (PetscInt *)is;
557:     } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type");

559:     /* Mark all rows affect by these columns */
560:     PetscCall(PetscArrayzero(rowhit, m));
561:     bs2     = bs * bs;
562:     nrows_i = 0;
563:     for (j = 0; j < nctot; j++) { /* loop over columns*/
564:       if (ctype == IS_COLORING_LOCAL) {
565:         col = ltog[cols[j]];
566:       } else {
567:         col = cols[j];
568:       }
569:       if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */
570:         tmp   = A_ci[col - cstart];
571:         row   = A_cj + tmp;
572:         nrows = A_ci[col - cstart + 1] - tmp;
573:         nrows_i += nrows;

575:         /* loop over columns of A marking them in rowhit */
576:         for (k = 0; k < nrows; k++) {
577:           /* set valaddrhit for part A */
578:           spidx            = bs2 * spidxA[tmp + k];
579:           valaddrhit[*row] = &A_val[spidx];
580:           rowhit[*row++]   = col - cstart + 1; /* local column index */
581:         }
582:       } else { /* column is in B, off-diagonal block of mat */
583: #if defined(PETSC_USE_CTABLE)
584:         PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb));
585:         colb--;
586: #else
587:         colb = colmap[col] - 1; /* local column index */
588: #endif
589:         if (colb == -1) {
590:           nrows = 0;
591:         } else {
592:           colb  = colb / bs;
593:           tmp   = B_ci[colb];
594:           row   = B_cj + tmp;
595:           nrows = B_ci[colb + 1] - tmp;
596:         }
597:         nrows_i += nrows;
598:         /* loop over columns of B marking them in rowhit */
599:         for (k = 0; k < nrows; k++) {
600:           /* set valaddrhit for part B */
601:           spidx            = bs2 * spidxB[tmp + k];
602:           valaddrhit[*row] = &B_val[spidx];
603:           rowhit[*row++]   = colb + 1 + cend - cstart; /* local column index */
604:         }
605:       }
606:     }
607:     c->nrows[i] = nrows_i;

609:     if (c->htype[0] == 'd') {
610:       for (j = 0; j < m; j++) {
611:         if (rowhit[j]) {
612:           Jentry[nz].row     = j;             /* local row index */
613:           Jentry[nz].col     = rowhit[j] - 1; /* local column index */
614:           Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
615:           nz++;
616:         }
617:       }
618:     } else { /* c->htype == 'wp' */
619:       for (j = 0; j < m; j++) {
620:         if (rowhit[j]) {
621:           Jentry2[nz].row     = j;             /* local row index */
622:           Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */
623:           nz++;
624:         }
625:       }
626:     }
627:     if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols));
628:   }
629:   if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp));

631:   if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */
632:     PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz));
633:   }

635:   if (isBAIJ) {
636:     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
637:     PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
638:     PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy));
639:   } else if (isSELL) {
640:     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
641:     PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
642:   } else {
643:     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL));
644:     PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL));
645:   }

647:   PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa));
648:   PetscCall(PetscFree2(rowhit, valaddrhit));

650:   if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, &ltog));
651:   PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols));
652:   PetscFunctionReturn(PETSC_SUCCESS);
653: }

655: PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c)
656: {
657:   PetscInt  bs, nis = iscoloring->n, m = mat->rmap->n;
658:   PetscBool isBAIJ, isSELL;

660:   PetscFunctionBegin;
661:   /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian;
662:    bcols is chosen s.t. dy-array takes 50% of memory space as mat */
663:   PetscCall(MatGetBlockSize(mat, &bs));
664:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ));
665:   PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL));
666:   if (isBAIJ || m == 0) {
667:     c->brows = m;
668:     c->bcols = 1;
669:   } else if (isSELL) {
670:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
671:     Mat_MPISELL *sell = (Mat_MPISELL *)mat->data;
672:     Mat_SeqSELL *spA, *spB;
673:     Mat          A, B;
674:     PetscInt     nz, brows, bcols;
675:     PetscReal    mem;

677:     bs = 1; /* only bs=1 is supported for MPISELL matrix */

679:     A     = sell->A;
680:     spA   = (Mat_SeqSELL *)A->data;
681:     B     = sell->B;
682:     spB   = (Mat_SeqSELL *)B->data;
683:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
684:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
685:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
686:     brows = 1000 / bcols;
687:     if (bcols > nis) bcols = nis;
688:     if (brows == 0 || brows > m) brows = m;
689:     c->brows = brows;
690:     c->bcols = bcols;
691:   } else { /* mpiaij matrix */
692:     /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */
693:     Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data;
694:     Mat_SeqAIJ *spA, *spB;
695:     Mat         A, B;
696:     PetscInt    nz, brows, bcols;
697:     PetscReal   mem;

699:     bs = 1; /* only bs=1 is supported for MPIAIJ matrix */

701:     A     = aij->A;
702:     spA   = (Mat_SeqAIJ *)A->data;
703:     B     = aij->B;
704:     spB   = (Mat_SeqAIJ *)B->data;
705:     nz    = spA->nz + spB->nz; /* total local nonzero entries of mat */
706:     mem   = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt);
707:     bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar)));
708:     brows = 1000 / bcols;
709:     if (bcols > nis) bcols = nis;
710:     if (brows == 0 || brows > m) brows = m;
711:     c->brows = brows;
712:     c->bcols = bcols;
713:   }

715:   c->M       = mat->rmap->N / bs; /* set the global rows and columns and local rows */
716:   c->N       = mat->cmap->N / bs;
717:   c->m       = mat->rmap->n / bs;
718:   c->rstart  = mat->rmap->rstart / bs;
719:   c->ncolors = nis;
720:   PetscFunctionReturn(PETSC_SUCCESS);
721: }

723: /*@C

725:     MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat`

727:    Collective

729:    Input Parameters:
730: +    J - the sparse matrix
731: .    coloring - created with `MatFDColoringCreate()` and a local coloring
732: -    y - column major storage of matrix values with one color of values per column, the number of rows of y should match
733:          the number of local rows of `J` and the number of columns is the number of colors.

735:    Level: intermediate

737:    Notes:
738:    The matrix in compressed color format may come from an automatic differentiation code

740:    The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring

742: .seealso: [](ch_matrices), `Mat`, `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()`
743: @*/
744: PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y)
745: {
746:   MatEntry2      *Jentry2;
747:   PetscInt        row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0;
748:   const PetscInt *nrows;
749:   PetscBool       eq;

751:   PetscFunctionBegin;
754:   PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq));
755:   PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()");
756:   Jentry2 = coloring->matentry2;
757:   nrows   = coloring->nrows;
758:   ncolors = coloring->ncolors;
759:   bcols   = coloring->bcols;

761:   for (i = 0; i < ncolors; i += bcols) {
762:     nrows_k = nrows[nbcols++];
763:     for (l = 0; l < nrows_k; l++) {
764:       row                      = Jentry2[nz].row; /* local row index */
765:       *(Jentry2[nz++].valaddr) = y[row];
766:     }
767:     y += bcols * coloring->m;
768:   }
769:   PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY));
770:   PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY));
771:   PetscFunctionReturn(PETSC_SUCCESS);
772: }