Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
24: PetscFunctionBegin;
25: PetscCall(PetscInfo(NULL, "Freeing Petsc_ScaLAPACK_keyval\n"));
26: PetscCallMPI(MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval));
27: PetscFunctionReturn(PETSC_SUCCESS);
28: }
30: static PetscErrorCode MatView_ScaLAPACK(Mat A, PetscViewer viewer)
31: {
32: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
33: PetscBool iascii;
34: PetscViewerFormat format;
35: Mat Adense;
37: PetscFunctionBegin;
38: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
39: if (iascii) {
40: PetscCall(PetscViewerGetFormat(viewer, &format));
41: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
42: PetscCall(PetscViewerASCIIPrintf(viewer, "block sizes: %d,%d\n", (int)a->mb, (int)a->nb));
43: PetscCall(PetscViewerASCIIPrintf(viewer, "grid height=%d, grid width=%d\n", (int)a->grid->nprow, (int)a->grid->npcol));
44: PetscCall(PetscViewerASCIIPrintf(viewer, "coordinates of process owning first row and column: (%d,%d)\n", (int)a->rsrc, (int)a->csrc));
45: PetscCall(PetscViewerASCIIPrintf(viewer, "dimension of largest local matrix: %d x %d\n", (int)a->locr, (int)a->locc));
46: PetscFunctionReturn(PETSC_SUCCESS);
47: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
48: PetscFunctionReturn(PETSC_SUCCESS);
49: }
50: }
51: /* convert to dense format and call MatView() */
52: PetscCall(MatConvert(A, MATDENSE, MAT_INITIAL_MATRIX, &Adense));
53: PetscCall(MatView(Adense, viewer));
54: PetscCall(MatDestroy(&Adense));
55: PetscFunctionReturn(PETSC_SUCCESS);
56: }
58: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A, MatInfoType flag, MatInfo *info)
59: {
60: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
61: PetscLogDouble isend[2], irecv[2];
63: PetscFunctionBegin;
64: info->block_size = 1.0;
66: isend[0] = a->lld * a->locc; /* locally allocated */
67: isend[1] = a->locr * a->locc; /* used submatrix */
68: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
69: info->nz_allocated = isend[0];
70: info->nz_used = isend[1];
71: } else if (flag == MAT_GLOBAL_MAX) {
72: PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)A)));
73: info->nz_allocated = irecv[0];
74: info->nz_used = irecv[1];
75: } else if (flag == MAT_GLOBAL_SUM) {
76: PetscCall(MPIU_Allreduce(isend, irecv, 2, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)A)));
77: info->nz_allocated = irecv[0];
78: info->nz_used = irecv[1];
79: }
81: info->nz_unneeded = 0;
82: info->assemblies = A->num_ass;
83: info->mallocs = 0;
84: info->memory = 0; /* REVIEW ME */
85: info->fill_ratio_given = 0;
86: info->fill_ratio_needed = 0;
87: info->factor_mallocs = 0;
88: PetscFunctionReturn(PETSC_SUCCESS);
89: }
91: PetscErrorCode MatSetOption_ScaLAPACK(Mat A, MatOption op, PetscBool flg)
92: {
93: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
95: PetscFunctionBegin;
96: switch (op) {
97: case MAT_NEW_NONZERO_LOCATIONS:
98: case MAT_NEW_NONZERO_LOCATION_ERR:
99: case MAT_NEW_NONZERO_ALLOCATION_ERR:
100: case MAT_SYMMETRIC:
101: case MAT_SORTED_FULL:
102: case MAT_HERMITIAN:
103: break;
104: case MAT_ROW_ORIENTED:
105: a->roworiented = flg;
106: break;
107: default:
108: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unsupported option %s", MatOptions[op]);
109: }
110: PetscFunctionReturn(PETSC_SUCCESS);
111: }
113: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A, PetscInt nr, const PetscInt *rows, PetscInt nc, const PetscInt *cols, const PetscScalar *vals, InsertMode imode)
114: {
115: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
116: PetscInt i, j;
117: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
118: PetscBool roworiented = a->roworiented;
120: PetscFunctionBegin;
121: PetscCheck(imode == INSERT_VALUES || imode == ADD_VALUES, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)imode);
122: for (i = 0; i < nr; i++) {
123: if (rows[i] < 0) continue;
124: PetscCall(PetscBLASIntCast(rows[i] + 1, &gridx));
125: for (j = 0; j < nc; j++) {
126: if (cols[j] < 0) continue;
127: PetscCall(PetscBLASIntCast(cols[j] + 1, &gcidx));
128: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
129: if (rsrc == a->grid->myrow && csrc == a->grid->mycol) {
130: if (roworiented) {
131: switch (imode) {
132: case INSERT_VALUES:
133: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i * nc + j];
134: break;
135: default:
136: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i * nc + j];
137: break;
138: }
139: } else {
140: switch (imode) {
141: case INSERT_VALUES:
142: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = vals[i + j * nr];
143: break;
144: default:
145: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += vals[i + j * nr];
146: break;
147: }
148: }
149: } else {
150: PetscCheck(!A->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process entry even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set");
151: A->assembled = PETSC_FALSE;
152: PetscCall(MatStashValuesRow_Private(&A->stash, rows[i], 1, cols + j, roworiented ? vals + i * nc + j : vals + i + j * nr, (PetscBool)(imode == ADD_VALUES)));
153: }
154: }
155: }
156: PetscFunctionReturn(PETSC_SUCCESS);
157: }
159: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A, PetscBool transpose, PetscScalar beta, const PetscScalar *x, PetscScalar *y)
160: {
161: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
162: PetscScalar *x2d, *y2d, alpha = 1.0;
163: const PetscInt *ranges;
164: PetscBLASInt xdesc[9], ydesc[9], x2desc[9], y2desc[9], mb, nb, lszx, lszy, zero = 0, one = 1, xlld, ylld, info;
166: PetscFunctionBegin;
167: if (transpose) {
168: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
169: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
170: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
171: xlld = PetscMax(1, A->rmap->n);
172: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
173: PetscCheckScaLapackInfo("descinit", info);
174: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
175: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* y block size */
176: ylld = 1;
177: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &ylld, &info));
178: PetscCheckScaLapackInfo("descinit", info);
180: /* allocate 2d vectors */
181: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
182: lszy = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
183: PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
184: xlld = PetscMax(1, lszx);
186: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
187: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
188: PetscCheckScaLapackInfo("descinit", info);
189: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &ylld, &info));
190: PetscCheckScaLapackInfo("descinit", info);
192: /* redistribute x as a column of a 2d matrix */
193: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
195: /* redistribute y as a row of a 2d matrix */
196: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxrow));
198: /* call PBLAS subroutine */
199: PetscCallBLAS("PBLASgemv", PBLASgemv_("T", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
201: /* redistribute y from a row of a 2d matrix */
202: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxrow));
204: } else { /* non-transpose */
206: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
207: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
208: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* x block size */
209: xlld = 1;
210: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &xlld, &info));
211: PetscCheckScaLapackInfo("descinit", info);
212: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
213: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* y block size */
214: ylld = PetscMax(1, A->rmap->n);
215: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ydesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &ylld, &info));
216: PetscCheckScaLapackInfo("descinit", info);
218: /* allocate 2d vectors */
219: lszy = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
220: lszx = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
221: PetscCall(PetscMalloc2(lszx, &x2d, lszy, &y2d));
222: ylld = PetscMax(1, lszy);
224: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
225: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &xlld, &info));
226: PetscCheckScaLapackInfo("descinit", info);
227: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(y2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &ylld, &info));
228: PetscCheckScaLapackInfo("descinit", info);
230: /* redistribute x as a row of a 2d matrix */
231: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxrow));
233: /* redistribute y as a column of a 2d matrix */
234: if (beta != 0.0) PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y, &one, &one, ydesc, y2d, &one, &one, y2desc, &a->grid->ictxcol));
236: /* call PBLAS subroutine */
237: PetscCallBLAS("PBLASgemv", PBLASgemv_("N", &a->M, &a->N, &alpha, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &one, &beta, y2d, &one, &one, y2desc, &one));
239: /* redistribute y from a column of a 2d matrix */
240: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, y2d, &one, &one, y2desc, y, &one, &one, ydesc, &a->grid->ictxcol));
241: }
242: PetscCall(PetscFree2(x2d, y2d));
243: PetscFunctionReturn(PETSC_SUCCESS);
244: }
246: static PetscErrorCode MatMult_ScaLAPACK(Mat A, Vec x, Vec y)
247: {
248: const PetscScalar *xarray;
249: PetscScalar *yarray;
251: PetscFunctionBegin;
252: PetscCall(VecGetArrayRead(x, &xarray));
253: PetscCall(VecGetArray(y, &yarray));
254: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 0.0, xarray, yarray));
255: PetscCall(VecRestoreArrayRead(x, &xarray));
256: PetscCall(VecRestoreArray(y, &yarray));
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A, Vec x, Vec y)
261: {
262: const PetscScalar *xarray;
263: PetscScalar *yarray;
265: PetscFunctionBegin;
266: PetscCall(VecGetArrayRead(x, &xarray));
267: PetscCall(VecGetArray(y, &yarray));
268: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 0.0, xarray, yarray));
269: PetscCall(VecRestoreArrayRead(x, &xarray));
270: PetscCall(VecRestoreArray(y, &yarray));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
275: {
276: const PetscScalar *xarray;
277: PetscScalar *zarray;
279: PetscFunctionBegin;
280: if (y != z) PetscCall(VecCopy(y, z));
281: PetscCall(VecGetArrayRead(x, &xarray));
282: PetscCall(VecGetArray(z, &zarray));
283: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_FALSE, 1.0, xarray, zarray));
284: PetscCall(VecRestoreArrayRead(x, &xarray));
285: PetscCall(VecRestoreArray(z, &zarray));
286: PetscFunctionReturn(PETSC_SUCCESS);
287: }
289: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A, Vec x, Vec y, Vec z)
290: {
291: const PetscScalar *xarray;
292: PetscScalar *zarray;
294: PetscFunctionBegin;
295: if (y != z) PetscCall(VecCopy(y, z));
296: PetscCall(VecGetArrayRead(x, &xarray));
297: PetscCall(VecGetArray(z, &zarray));
298: PetscCall(MatMultXXXYYY_ScaLAPACK(A, PETSC_TRUE, 1.0, xarray, zarray));
299: PetscCall(VecRestoreArrayRead(x, &xarray));
300: PetscCall(VecRestoreArray(z, &zarray));
301: PetscFunctionReturn(PETSC_SUCCESS);
302: }
304: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
305: {
306: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
307: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
308: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
309: PetscScalar sone = 1.0, zero = 0.0;
310: PetscBLASInt one = 1;
312: PetscFunctionBegin;
313: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "N", &a->M, &b->N, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
314: C->assembled = PETSC_TRUE;
315: PetscFunctionReturn(PETSC_SUCCESS);
316: }
318: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
319: {
320: PetscFunctionBegin;
321: PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
322: PetscCall(MatSetType(C, MATSCALAPACK));
323: PetscCall(MatSetUp(C));
324: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A, Mat B, Mat C)
329: {
330: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
331: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
332: Mat_ScaLAPACK *c = (Mat_ScaLAPACK *)C->data;
333: PetscScalar sone = 1.0, zero = 0.0;
334: PetscBLASInt one = 1;
336: PetscFunctionBegin;
337: PetscCallBLAS("PBLASgemm", PBLASgemm_("N", "T", &a->M, &b->M, &a->N, &sone, a->loc, &one, &one, a->desc, b->loc, &one, &one, b->desc, &zero, c->loc, &one, &one, c->desc));
338: C->assembled = PETSC_TRUE;
339: PetscFunctionReturn(PETSC_SUCCESS);
340: }
342: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A, Mat B, PetscReal fill, Mat C)
343: {
344: PetscFunctionBegin;
345: PetscCall(MatSetSizes(C, A->rmap->n, B->rmap->n, PETSC_DECIDE, PETSC_DECIDE));
346: PetscCall(MatSetType(C, MATSCALAPACK));
347: PetscCall(MatSetUp(C));
348: PetscFunctionReturn(PETSC_SUCCESS);
349: }
351: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
352: {
353: PetscFunctionBegin;
354: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
355: C->ops->productsymbolic = MatProductSymbolic_AB;
356: PetscFunctionReturn(PETSC_SUCCESS);
357: }
359: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
360: {
361: PetscFunctionBegin;
362: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
363: C->ops->productsymbolic = MatProductSymbolic_ABt;
364: PetscFunctionReturn(PETSC_SUCCESS);
365: }
367: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
368: {
369: Mat_Product *product = C->product;
371: PetscFunctionBegin;
372: switch (product->type) {
373: case MATPRODUCT_AB:
374: PetscCall(MatProductSetFromOptions_ScaLAPACK_AB(C));
375: break;
376: case MATPRODUCT_ABt:
377: PetscCall(MatProductSetFromOptions_ScaLAPACK_ABt(C));
378: break;
379: default:
380: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices", MatProductTypes[product->type]);
381: }
382: PetscFunctionReturn(PETSC_SUCCESS);
383: }
385: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A, Vec D)
386: {
387: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
388: PetscScalar *darray, *d2d, v;
389: const PetscInt *ranges;
390: PetscBLASInt j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
392: PetscFunctionBegin;
393: PetscCall(VecGetArray(D, &darray));
395: if (A->rmap->N <= A->cmap->N) { /* row version */
397: /* create ScaLAPACK descriptor for vector (1d block distribution) */
398: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
399: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
400: dlld = PetscMax(1, A->rmap->n);
401: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
402: PetscCheckScaLapackInfo("descinit", info);
404: /* allocate 2d vector */
405: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
406: PetscCall(PetscCalloc1(lszd, &d2d));
407: dlld = PetscMax(1, lszd);
409: /* create ScaLAPACK descriptor for vector (2d block distribution) */
410: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
411: PetscCheckScaLapackInfo("descinit", info);
413: /* collect diagonal */
414: for (j = 1; j <= a->M; j++) {
415: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("R", " ", &v, a->loc, &j, &j, a->desc));
416: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &j, &one, d2desc, &v));
417: }
419: /* redistribute d from a column of a 2d matrix */
420: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxcol));
421: PetscCall(PetscFree(d2d));
423: } else { /* column version */
425: /* create ScaLAPACK descriptor for vector (1d block distribution) */
426: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
427: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
428: dlld = 1;
429: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
430: PetscCheckScaLapackInfo("descinit", info);
432: /* allocate 2d vector */
433: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
434: PetscCall(PetscCalloc1(lszd, &d2d));
436: /* create ScaLAPACK descriptor for vector (2d block distribution) */
437: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
438: PetscCheckScaLapackInfo("descinit", info);
440: /* collect diagonal */
441: for (j = 1; j <= a->N; j++) {
442: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("C", " ", &v, a->loc, &j, &j, a->desc));
443: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(d2d, &one, &j, d2desc, &v));
444: }
446: /* redistribute d from a row of a 2d matrix */
447: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, d2d, &one, &one, d2desc, darray, &one, &one, ddesc, &a->grid->ictxrow));
448: PetscCall(PetscFree(d2d));
449: }
451: PetscCall(VecRestoreArray(D, &darray));
452: PetscCall(VecAssemblyBegin(D));
453: PetscCall(VecAssemblyEnd(D));
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A, Vec L, Vec R)
458: {
459: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
460: const PetscScalar *d;
461: const PetscInt *ranges;
462: PetscScalar *d2d;
463: PetscBLASInt i, j, ddesc[9], d2desc[9], mb, nb, lszd, zero = 0, one = 1, dlld, info;
465: PetscFunctionBegin;
466: if (R) {
467: PetscCall(VecGetArrayRead(R, (const PetscScalar **)&d));
468: /* create ScaLAPACK descriptor for vector (1d block distribution) */
469: PetscCall(PetscLayoutGetRanges(A->cmap, &ranges));
470: PetscCall(PetscBLASIntCast(ranges[1], &nb)); /* D block size */
471: dlld = 1;
472: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &one, &a->N, &one, &nb, &zero, &zero, &a->grid->ictxrow, &dlld, &info));
473: PetscCheckScaLapackInfo("descinit", info);
475: /* allocate 2d vector */
476: lszd = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
477: PetscCall(PetscCalloc1(lszd, &d2d));
479: /* create ScaLAPACK descriptor for vector (2d block distribution) */
480: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &one, &a->N, &one, &a->nb, &zero, &zero, &a->grid->ictxt, &dlld, &info));
481: PetscCheckScaLapackInfo("descinit", info);
483: /* redistribute d to a row of a 2d matrix */
484: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&one, &a->N, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxrow));
486: /* broadcast along process columns */
487: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld);
488: else Cdgebr2d(a->grid->ictxt, "C", " ", 1, lszd, d2d, dlld, 0, a->grid->mycol);
490: /* local scaling */
491: for (j = 0; j < a->locc; j++)
492: for (i = 0; i < a->locr; i++) a->loc[i + j * a->lld] *= d2d[j];
494: PetscCall(PetscFree(d2d));
495: PetscCall(VecRestoreArrayRead(R, (const PetscScalar **)&d));
496: }
497: if (L) {
498: PetscCall(VecGetArrayRead(L, (const PetscScalar **)&d));
499: /* create ScaLAPACK descriptor for vector (1d block distribution) */
500: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
501: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* D block size */
502: dlld = PetscMax(1, A->rmap->n);
503: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(ddesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &dlld, &info));
504: PetscCheckScaLapackInfo("descinit", info);
506: /* allocate 2d vector */
507: lszd = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
508: PetscCall(PetscCalloc1(lszd, &d2d));
509: dlld = PetscMax(1, lszd);
511: /* create ScaLAPACK descriptor for vector (2d block distribution) */
512: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(d2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &dlld, &info));
513: PetscCheckScaLapackInfo("descinit", info);
515: /* redistribute d to a column of a 2d matrix */
516: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, (PetscScalar *)d, &one, &one, ddesc, d2d, &one, &one, d2desc, &a->grid->ictxcol));
518: /* broadcast along process rows */
519: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld);
520: else Cdgebr2d(a->grid->ictxt, "R", " ", lszd, 1, d2d, dlld, a->grid->myrow, 0);
522: /* local scaling */
523: for (i = 0; i < a->locr; i++)
524: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] *= d2d[i];
526: PetscCall(PetscFree(d2d));
527: PetscCall(VecRestoreArrayRead(L, (const PetscScalar **)&d));
528: }
529: PetscFunctionReturn(PETSC_SUCCESS);
530: }
532: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A, PetscBool *missing, PetscInt *d)
533: {
534: PetscFunctionBegin;
535: *missing = PETSC_FALSE;
536: PetscFunctionReturn(PETSC_SUCCESS);
537: }
539: static PetscErrorCode MatScale_ScaLAPACK(Mat X, PetscScalar a)
540: {
541: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
542: PetscBLASInt n, one = 1;
544: PetscFunctionBegin;
545: n = x->lld * x->locc;
546: PetscCallBLAS("BLASscal", BLASscal_(&n, &a, x->loc, &one));
547: PetscFunctionReturn(PETSC_SUCCESS);
548: }
550: static PetscErrorCode MatShift_ScaLAPACK(Mat X, PetscScalar alpha)
551: {
552: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
553: PetscBLASInt i, n;
554: PetscScalar v;
556: PetscFunctionBegin;
557: n = PetscMin(x->M, x->N);
558: for (i = 1; i <= n; i++) {
559: PetscCallBLAS("SCALAPACKelget", SCALAPACKelget_("-", " ", &v, x->loc, &i, &i, x->desc));
560: v += alpha;
561: PetscCallBLAS("SCALAPACKelset", SCALAPACKelset_(x->loc, &i, &i, x->desc, &v));
562: }
563: PetscFunctionReturn(PETSC_SUCCESS);
564: }
566: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y, PetscScalar alpha, Mat X, MatStructure str)
567: {
568: Mat_ScaLAPACK *x = (Mat_ScaLAPACK *)X->data;
569: Mat_ScaLAPACK *y = (Mat_ScaLAPACK *)Y->data;
570: PetscBLASInt one = 1;
571: PetscScalar beta = 1.0;
573: PetscFunctionBegin;
574: MatScaLAPACKCheckDistribution(Y, 1, X, 3);
575: PetscCallBLAS("SCALAPACKmatadd", SCALAPACKmatadd_(&x->M, &x->N, &alpha, x->loc, &one, &one, x->desc, &beta, y->loc, &one, &one, y->desc));
576: PetscCall(PetscObjectStateIncrease((PetscObject)Y));
577: PetscFunctionReturn(PETSC_SUCCESS);
578: }
580: static PetscErrorCode MatCopy_ScaLAPACK(Mat A, Mat B, MatStructure str)
581: {
582: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
583: Mat_ScaLAPACK *b = (Mat_ScaLAPACK *)B->data;
585: PetscFunctionBegin;
586: PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
587: PetscCall(PetscObjectStateIncrease((PetscObject)B));
588: PetscFunctionReturn(PETSC_SUCCESS);
589: }
591: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A, MatDuplicateOption op, Mat *B)
592: {
593: Mat Bs;
594: MPI_Comm comm;
595: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
597: PetscFunctionBegin;
598: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
599: PetscCall(MatCreate(comm, &Bs));
600: PetscCall(MatSetSizes(Bs, A->rmap->n, A->cmap->n, PETSC_DECIDE, PETSC_DECIDE));
601: PetscCall(MatSetType(Bs, MATSCALAPACK));
602: b = (Mat_ScaLAPACK *)Bs->data;
603: b->M = a->M;
604: b->N = a->N;
605: b->mb = a->mb;
606: b->nb = a->nb;
607: b->rsrc = a->rsrc;
608: b->csrc = a->csrc;
609: PetscCall(MatSetUp(Bs));
610: *B = Bs;
611: if (op == MAT_COPY_VALUES) PetscCall(PetscArraycpy(b->loc, a->loc, a->lld * a->locc));
612: Bs->assembled = PETSC_TRUE;
613: PetscFunctionReturn(PETSC_SUCCESS);
614: }
616: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
617: {
618: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
619: Mat Bs = *B;
620: PetscBLASInt one = 1;
621: PetscScalar sone = 1.0, zero = 0.0;
622: #if defined(PETSC_USE_COMPLEX)
623: PetscInt i, j;
624: #endif
626: PetscFunctionBegin;
627: if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *B));
628: PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
629: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
630: *B = Bs;
631: b = (Mat_ScaLAPACK *)Bs->data;
632: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
633: #if defined(PETSC_USE_COMPLEX)
634: /* undo conjugation */
635: for (i = 0; i < b->locr; i++)
636: for (j = 0; j < b->locc; j++) b->loc[i + j * b->lld] = PetscConj(b->loc[i + j * b->lld]);
637: #endif
638: Bs->assembled = PETSC_TRUE;
639: PetscFunctionReturn(PETSC_SUCCESS);
640: }
642: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
643: {
644: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
645: PetscInt i, j;
647: PetscFunctionBegin;
648: for (i = 0; i < a->locr; i++)
649: for (j = 0; j < a->locc; j++) a->loc[i + j * a->lld] = PetscConj(a->loc[i + j * a->lld]);
650: PetscFunctionReturn(PETSC_SUCCESS);
651: }
653: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A, MatReuse reuse, Mat *B)
654: {
655: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b;
656: Mat Bs = *B;
657: PetscBLASInt one = 1;
658: PetscScalar sone = 1.0, zero = 0.0;
660: PetscFunctionBegin;
661: PetscCheck(reuse == MAT_INITIAL_MATRIX, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Only MAT_INITIAL_MATRIX supported");
662: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->nb, a->mb, a->N, a->M, a->csrc, a->rsrc, &Bs));
663: *B = Bs;
664: b = (Mat_ScaLAPACK *)Bs->data;
665: PetscCallBLAS("PBLAStran", PBLAStran_(&a->N, &a->M, &sone, a->loc, &one, &one, a->desc, &zero, b->loc, &one, &one, b->desc));
666: Bs->assembled = PETSC_TRUE;
667: PetscFunctionReturn(PETSC_SUCCESS);
668: }
670: static PetscErrorCode MatSolve_ScaLAPACK(Mat A, Vec B, Vec X)
671: {
672: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
673: PetscScalar *x, *x2d;
674: const PetscInt *ranges;
675: PetscBLASInt xdesc[9], x2desc[9], mb, lszx, zero = 0, one = 1, xlld, nrhs = 1, info;
677: PetscFunctionBegin;
678: PetscCall(VecCopy(B, X));
679: PetscCall(VecGetArray(X, &x));
681: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
682: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
683: PetscCall(PetscBLASIntCast(ranges[1], &mb)); /* x block size */
684: xlld = PetscMax(1, A->rmap->n);
685: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(xdesc, &a->M, &one, &mb, &one, &zero, &zero, &a->grid->ictxcol, &xlld, &info));
686: PetscCheckScaLapackInfo("descinit", info);
688: /* allocate 2d vector */
689: lszx = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
690: PetscCall(PetscMalloc1(lszx, &x2d));
691: xlld = PetscMax(1, lszx);
693: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
694: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(x2desc, &a->M, &one, &a->mb, &one, &zero, &zero, &a->grid->ictxt, &xlld, &info));
695: PetscCheckScaLapackInfo("descinit", info);
697: /* redistribute x as a column of a 2d matrix */
698: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x, &one, &one, xdesc, x2d, &one, &one, x2desc, &a->grid->ictxcol));
700: /* call ScaLAPACK subroutine */
701: switch (A->factortype) {
702: case MAT_FACTOR_LU:
703: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &nrhs, a->loc, &one, &one, a->desc, a->pivots, x2d, &one, &one, x2desc, &info));
704: PetscCheckScaLapackInfo("getrs", info);
705: break;
706: case MAT_FACTOR_CHOLESKY:
707: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &nrhs, a->loc, &one, &one, a->desc, x2d, &one, &one, x2desc, &info));
708: PetscCheckScaLapackInfo("potrs", info);
709: break;
710: default:
711: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
712: }
714: /* redistribute x from a column of a 2d matrix */
715: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &one, x2d, &one, &one, x2desc, x, &one, &one, xdesc, &a->grid->ictxcol));
717: PetscCall(PetscFree(x2d));
718: PetscCall(VecRestoreArray(X, &x));
719: PetscFunctionReturn(PETSC_SUCCESS);
720: }
722: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A, Vec B, Vec Y, Vec X)
723: {
724: PetscFunctionBegin;
725: PetscCall(MatSolve_ScaLAPACK(A, B, X));
726: PetscCall(VecAXPY(X, 1, Y));
727: PetscFunctionReturn(PETSC_SUCCESS);
728: }
730: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A, Mat B, Mat X)
731: {
732: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data, *b, *x;
733: PetscBool flg1, flg2;
734: PetscBLASInt one = 1, info;
736: PetscFunctionBegin;
737: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSCALAPACK, &flg1));
738: PetscCall(PetscObjectTypeCompare((PetscObject)X, MATSCALAPACK, &flg2));
739: PetscCheck((flg1 && flg2), PETSC_COMM_SELF, PETSC_ERR_SUP, "Both B and X must be of type MATSCALAPACK");
740: MatScaLAPACKCheckDistribution(B, 1, X, 2);
741: b = (Mat_ScaLAPACK *)B->data;
742: x = (Mat_ScaLAPACK *)X->data;
743: PetscCall(PetscArraycpy(x->loc, b->loc, b->lld * b->locc));
745: switch (A->factortype) {
746: case MAT_FACTOR_LU:
747: PetscCallBLAS("SCALAPACKgetrs", SCALAPACKgetrs_("N", &a->M, &x->N, a->loc, &one, &one, a->desc, a->pivots, x->loc, &one, &one, x->desc, &info));
748: PetscCheckScaLapackInfo("getrs", info);
749: break;
750: case MAT_FACTOR_CHOLESKY:
751: PetscCallBLAS("SCALAPACKpotrs", SCALAPACKpotrs_("L", &a->M, &x->N, a->loc, &one, &one, a->desc, x->loc, &one, &one, x->desc, &info));
752: PetscCheckScaLapackInfo("potrs", info);
753: break;
754: default:
755: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Unfactored Matrix or Unsupported MatFactorType");
756: }
757: PetscFunctionReturn(PETSC_SUCCESS);
758: }
760: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A, IS row, IS col, const MatFactorInfo *factorinfo)
761: {
762: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
763: PetscBLASInt one = 1, info;
765: PetscFunctionBegin;
766: if (!a->pivots) PetscCall(PetscMalloc1(a->locr + a->mb, &a->pivots));
767: PetscCallBLAS("SCALAPACKgetrf", SCALAPACKgetrf_(&a->M, &a->N, a->loc, &one, &one, a->desc, a->pivots, &info));
768: PetscCheckScaLapackInfo("getrf", info);
769: A->factortype = MAT_FACTOR_LU;
770: A->assembled = PETSC_TRUE;
772: PetscCall(PetscFree(A->solvertype));
773: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
774: PetscFunctionReturn(PETSC_SUCCESS);
775: }
777: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
778: {
779: PetscFunctionBegin;
780: PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
781: PetscCall(MatLUFactor_ScaLAPACK(F, 0, 0, info));
782: PetscFunctionReturn(PETSC_SUCCESS);
783: }
785: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
786: {
787: PetscFunctionBegin;
788: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
789: PetscFunctionReturn(PETSC_SUCCESS);
790: }
792: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A, IS perm, const MatFactorInfo *factorinfo)
793: {
794: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
795: PetscBLASInt one = 1, info;
797: PetscFunctionBegin;
798: PetscCallBLAS("SCALAPACKpotrf", SCALAPACKpotrf_("L", &a->M, a->loc, &one, &one, a->desc, &info));
799: PetscCheckScaLapackInfo("potrf", info);
800: A->factortype = MAT_FACTOR_CHOLESKY;
801: A->assembled = PETSC_TRUE;
803: PetscCall(PetscFree(A->solvertype));
804: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &A->solvertype));
805: PetscFunctionReturn(PETSC_SUCCESS);
806: }
808: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F, Mat A, const MatFactorInfo *info)
809: {
810: PetscFunctionBegin;
811: PetscCall(MatCopy(A, F, SAME_NONZERO_PATTERN));
812: PetscCall(MatCholeskyFactor_ScaLAPACK(F, 0, info));
813: PetscFunctionReturn(PETSC_SUCCESS);
814: }
816: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F, Mat A, IS perm, const MatFactorInfo *info)
817: {
818: PetscFunctionBegin;
819: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
820: PetscFunctionReturn(PETSC_SUCCESS);
821: }
823: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A, MatSolverType *type)
824: {
825: PetscFunctionBegin;
826: *type = MATSOLVERSCALAPACK;
827: PetscFunctionReturn(PETSC_SUCCESS);
828: }
830: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A, MatFactorType ftype, Mat *F)
831: {
832: Mat B;
833: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
835: PetscFunctionBegin;
836: /* Create the factorization matrix */
837: PetscCall(MatCreateScaLAPACK(PetscObjectComm((PetscObject)A), a->mb, a->nb, a->M, a->N, a->rsrc, a->csrc, &B));
838: B->trivialsymbolic = PETSC_TRUE;
839: B->factortype = ftype;
840: PetscCall(PetscFree(B->solvertype));
841: PetscCall(PetscStrallocpy(MATSOLVERSCALAPACK, &B->solvertype));
843: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_scalapack_scalapack));
844: *F = B;
845: PetscFunctionReturn(PETSC_SUCCESS);
846: }
848: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
849: {
850: PetscFunctionBegin;
851: PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_LU, MatGetFactor_scalapack_scalapack));
852: PetscCall(MatSolverTypeRegister(MATSOLVERSCALAPACK, MATSCALAPACK, MAT_FACTOR_CHOLESKY, MatGetFactor_scalapack_scalapack));
853: PetscFunctionReturn(PETSC_SUCCESS);
854: }
856: static PetscErrorCode MatNorm_ScaLAPACK(Mat A, NormType type, PetscReal *nrm)
857: {
858: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
859: PetscBLASInt one = 1, lwork = 0;
860: const char *ntype;
861: PetscScalar *work = NULL, dummy;
863: PetscFunctionBegin;
864: switch (type) {
865: case NORM_1:
866: ntype = "1";
867: lwork = PetscMax(a->locr, a->locc);
868: break;
869: case NORM_FROBENIUS:
870: ntype = "F";
871: work = &dummy;
872: break;
873: case NORM_INFINITY:
874: ntype = "I";
875: lwork = PetscMax(a->locr, a->locc);
876: break;
877: default:
878: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Unsupported norm type");
879: }
880: if (lwork) PetscCall(PetscMalloc1(lwork, &work));
881: *nrm = SCALAPACKlange_(ntype, &a->M, &a->N, a->loc, &one, &one, a->desc, work);
882: if (lwork) PetscCall(PetscFree(work));
883: PetscFunctionReturn(PETSC_SUCCESS);
884: }
886: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
887: {
888: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
890: PetscFunctionBegin;
891: PetscCall(PetscArrayzero(a->loc, a->lld * a->locc));
892: PetscFunctionReturn(PETSC_SUCCESS);
893: }
895: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A, IS *rows, IS *cols)
896: {
897: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
898: PetscInt i, n, nb, isrc, nproc, iproc, *idx;
900: PetscFunctionBegin;
901: if (rows) {
902: n = a->locr;
903: nb = a->mb;
904: isrc = a->rsrc;
905: nproc = a->grid->nprow;
906: iproc = a->grid->myrow;
907: PetscCall(PetscMalloc1(n, &idx));
908: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
909: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, rows));
910: }
911: if (cols) {
912: n = a->locc;
913: nb = a->nb;
914: isrc = a->csrc;
915: nproc = a->grid->npcol;
916: iproc = a->grid->mycol;
917: PetscCall(PetscMalloc1(n, &idx));
918: for (i = 0; i < n; i++) idx[i] = nproc * nb * (i / nb) + i % nb + ((nproc + iproc - isrc) % nproc) * nb;
919: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx, PETSC_OWN_POINTER, cols));
920: }
921: PetscFunctionReturn(PETSC_SUCCESS);
922: }
924: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A, MatType newtype, MatReuse reuse, Mat *B)
925: {
926: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
927: Mat Bmpi;
928: MPI_Comm comm;
929: PetscInt i, M = A->rmap->N, N = A->cmap->N, m, n, rstart, rend, nz;
930: const PetscInt *ranges, *branges, *cwork;
931: const PetscScalar *vwork;
932: PetscBLASInt bdesc[9], bmb, zero = 0, one = 1, lld, info;
933: PetscScalar *barray;
934: PetscBool differ = PETSC_FALSE;
935: PetscMPIInt size;
937: PetscFunctionBegin;
938: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
939: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
941: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
942: PetscCallMPI(MPI_Comm_size(comm, &size));
943: PetscCall(PetscLayoutGetRanges((*B)->rmap, &branges));
944: for (i = 0; i < size; i++)
945: if (ranges[i + 1] != branges[i + 1]) {
946: differ = PETSC_TRUE;
947: break;
948: }
949: }
951: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
952: PetscCall(MatCreate(comm, &Bmpi));
953: m = PETSC_DECIDE;
954: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
955: n = PETSC_DECIDE;
956: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
957: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
958: PetscCall(MatSetType(Bmpi, MATDENSE));
959: PetscCall(MatSetUp(Bmpi));
961: /* create ScaLAPACK descriptor for B (1d block distribution) */
962: PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
963: lld = PetscMax(A->rmap->n, 1); /* local leading dimension */
964: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
965: PetscCheckScaLapackInfo("descinit", info);
967: /* redistribute matrix */
968: PetscCall(MatDenseGetArray(Bmpi, &barray));
969: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
970: PetscCall(MatDenseRestoreArray(Bmpi, &barray));
971: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
972: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
974: /* transfer rows of auxiliary matrix to the final matrix B */
975: PetscCall(MatGetOwnershipRange(Bmpi, &rstart, &rend));
976: for (i = rstart; i < rend; i++) {
977: PetscCall(MatGetRow(Bmpi, i, &nz, &cwork, &vwork));
978: PetscCall(MatSetValues(*B, 1, &i, nz, cwork, vwork, INSERT_VALUES));
979: PetscCall(MatRestoreRow(Bmpi, i, &nz, &cwork, &vwork));
980: }
981: PetscCall(MatAssemblyBegin(*B, MAT_FINAL_ASSEMBLY));
982: PetscCall(MatAssemblyEnd(*B, MAT_FINAL_ASSEMBLY));
983: PetscCall(MatDestroy(&Bmpi));
985: } else { /* normal cases */
987: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
988: else {
989: PetscCall(MatCreate(comm, &Bmpi));
990: m = PETSC_DECIDE;
991: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
992: n = PETSC_DECIDE;
993: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
994: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
995: PetscCall(MatSetType(Bmpi, MATDENSE));
996: PetscCall(MatSetUp(Bmpi));
997: }
999: /* create ScaLAPACK descriptor for B (1d block distribution) */
1000: PetscCall(PetscBLASIntCast(ranges[1], &bmb)); /* row block size */
1001: lld = PetscMax(A->rmap->n, 1); /* local leading dimension */
1002: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(bdesc, &a->M, &a->N, &bmb, &a->N, &zero, &zero, &a->grid->ictxcol, &lld, &info));
1003: PetscCheckScaLapackInfo("descinit", info);
1005: /* redistribute matrix */
1006: PetscCall(MatDenseGetArray(Bmpi, &barray));
1007: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&a->M, &a->N, a->loc, &one, &one, a->desc, barray, &one, &one, bdesc, &a->grid->ictxcol));
1008: PetscCall(MatDenseRestoreArray(Bmpi, &barray));
1010: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1011: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1012: if (reuse == MAT_INPLACE_MATRIX) {
1013: PetscCall(MatHeaderReplace(A, &Bmpi));
1014: } else *B = Bmpi;
1015: }
1016: PetscFunctionReturn(PETSC_SUCCESS);
1017: }
1019: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map, PetscBool *correct)
1020: {
1021: const PetscInt *ranges;
1022: PetscMPIInt size;
1023: PetscInt i, n;
1025: PetscFunctionBegin;
1026: *correct = PETSC_TRUE;
1027: PetscCallMPI(MPI_Comm_size(map->comm, &size));
1028: if (size > 1) {
1029: PetscCall(PetscLayoutGetRanges(map, &ranges));
1030: n = ranges[1] - ranges[0];
1031: for (i = 1; i < size; i++)
1032: if (ranges[i + 1] - ranges[i] != n) break;
1033: *correct = (PetscBool)(i == size || (i == size - 1 && ranges[i + 1] - ranges[i] <= n));
1034: }
1035: PetscFunctionReturn(PETSC_SUCCESS);
1036: }
1038: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *B)
1039: {
1040: Mat_ScaLAPACK *b;
1041: Mat Bmpi;
1042: MPI_Comm comm;
1043: PetscInt M = A->rmap->N, N = A->cmap->N, m, n;
1044: const PetscInt *ranges, *rows, *cols;
1045: PetscBLASInt adesc[9], amb, zero = 0, one = 1, lld, info;
1046: PetscScalar *aarray;
1047: IS ir, ic;
1048: PetscInt lda;
1049: PetscBool flg;
1051: PetscFunctionBegin;
1052: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
1054: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
1055: else {
1056: PetscCall(MatCreate(comm, &Bmpi));
1057: m = PETSC_DECIDE;
1058: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1059: n = PETSC_DECIDE;
1060: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1061: PetscCall(MatSetSizes(Bmpi, m, n, M, N));
1062: PetscCall(MatSetType(Bmpi, MATSCALAPACK));
1063: PetscCall(MatSetUp(Bmpi));
1064: }
1065: b = (Mat_ScaLAPACK *)Bmpi->data;
1067: PetscCall(MatDenseGetLDA(A, &lda));
1068: PetscCall(MatDenseGetArray(A, &aarray));
1069: PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1070: if (flg) PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1071: if (flg) { /* if the input Mat has a ScaLAPACK-compatible layout, use ScaLAPACK for the redistribution */
1072: /* create ScaLAPACK descriptor for A (1d block distribution) */
1073: PetscCall(PetscLayoutGetRanges(A->rmap, &ranges));
1074: PetscCall(PetscBLASIntCast(ranges[1], &amb)); /* row block size */
1075: lld = PetscMax(lda, 1); /* local leading dimension */
1076: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(adesc, &b->M, &b->N, &amb, &b->N, &zero, &zero, &b->grid->ictxcol, &lld, &info));
1077: PetscCheckScaLapackInfo("descinit", info);
1079: /* redistribute matrix */
1080: PetscCallBLAS("SCALAPACKgemr2d", SCALAPACKgemr2d_(&b->M, &b->N, aarray, &one, &one, adesc, b->loc, &one, &one, b->desc, &b->grid->ictxcol));
1081: Bmpi->nooffprocentries = PETSC_TRUE;
1082: } else { /* if the input Mat has a ScaLAPACK-incompatible layout, redistribute via MatSetValues() */
1083: PetscCheck(lda == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Leading dimension (%" PetscInt_FMT ") different than local number of rows (%" PetscInt_FMT ")", lda, A->rmap->n);
1084: b->roworiented = PETSC_FALSE;
1085: PetscCall(MatGetOwnershipIS(A, &ir, &ic));
1086: PetscCall(ISGetIndices(ir, &rows));
1087: PetscCall(ISGetIndices(ic, &cols));
1088: PetscCall(MatSetValues(Bmpi, A->rmap->n, rows, A->cmap->N, cols, aarray, INSERT_VALUES));
1089: PetscCall(ISRestoreIndices(ir, &rows));
1090: PetscCall(ISRestoreIndices(ic, &cols));
1091: PetscCall(ISDestroy(&ic));
1092: PetscCall(ISDestroy(&ir));
1093: }
1094: PetscCall(MatDenseRestoreArray(A, &aarray));
1095: PetscCall(MatAssemblyBegin(Bmpi, MAT_FINAL_ASSEMBLY));
1096: PetscCall(MatAssemblyEnd(Bmpi, MAT_FINAL_ASSEMBLY));
1097: if (reuse == MAT_INPLACE_MATRIX) {
1098: PetscCall(MatHeaderReplace(A, &Bmpi));
1099: } else *B = Bmpi;
1100: PetscFunctionReturn(PETSC_SUCCESS);
1101: }
1103: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1104: {
1105: Mat mat_scal;
1106: PetscInt M = A->rmap->N, N = A->cmap->N, rstart = A->rmap->rstart, rend = A->rmap->rend, m, n, row, ncols;
1107: const PetscInt *cols;
1108: const PetscScalar *vals;
1110: PetscFunctionBegin;
1111: if (reuse == MAT_REUSE_MATRIX) {
1112: mat_scal = *newmat;
1113: PetscCall(MatZeroEntries(mat_scal));
1114: } else {
1115: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1116: m = PETSC_DECIDE;
1117: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1118: n = PETSC_DECIDE;
1119: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1120: PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1121: PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1122: PetscCall(MatSetUp(mat_scal));
1123: }
1124: for (row = rstart; row < rend; row++) {
1125: PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1126: PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, INSERT_VALUES));
1127: PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1128: }
1129: PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1130: PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1132: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1133: else *newmat = mat_scal;
1134: PetscFunctionReturn(PETSC_SUCCESS);
1135: }
1137: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype, MatReuse reuse, Mat *newmat)
1138: {
1139: Mat mat_scal;
1140: PetscInt M = A->rmap->N, N = A->cmap->N, m, n, row, ncols, j, rstart = A->rmap->rstart, rend = A->rmap->rend;
1141: const PetscInt *cols;
1142: const PetscScalar *vals;
1143: PetscScalar v;
1145: PetscFunctionBegin;
1146: if (reuse == MAT_REUSE_MATRIX) {
1147: mat_scal = *newmat;
1148: PetscCall(MatZeroEntries(mat_scal));
1149: } else {
1150: PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_scal));
1151: m = PETSC_DECIDE;
1152: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &m, &M));
1153: n = PETSC_DECIDE;
1154: PetscCall(PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A), &n, &N));
1155: PetscCall(MatSetSizes(mat_scal, m, n, M, N));
1156: PetscCall(MatSetType(mat_scal, MATSCALAPACK));
1157: PetscCall(MatSetUp(mat_scal));
1158: }
1159: PetscCall(MatGetRowUpperTriangular(A));
1160: for (row = rstart; row < rend; row++) {
1161: PetscCall(MatGetRow(A, row, &ncols, &cols, &vals));
1162: PetscCall(MatSetValues(mat_scal, 1, &row, ncols, cols, vals, ADD_VALUES));
1163: for (j = 0; j < ncols; j++) { /* lower triangular part */
1164: if (cols[j] == row) continue;
1165: v = A->hermitian == PETSC_BOOL3_TRUE ? PetscConj(vals[j]) : vals[j];
1166: PetscCall(MatSetValues(mat_scal, 1, &cols[j], 1, &row, &v, ADD_VALUES));
1167: }
1168: PetscCall(MatRestoreRow(A, row, &ncols, &cols, &vals));
1169: }
1170: PetscCall(MatRestoreRowUpperTriangular(A));
1171: PetscCall(MatAssemblyBegin(mat_scal, MAT_FINAL_ASSEMBLY));
1172: PetscCall(MatAssemblyEnd(mat_scal, MAT_FINAL_ASSEMBLY));
1174: if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &mat_scal));
1175: else *newmat = mat_scal;
1176: PetscFunctionReturn(PETSC_SUCCESS);
1177: }
1179: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1180: {
1181: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1182: PetscInt sz = 0;
1184: PetscFunctionBegin;
1185: PetscCall(PetscLayoutSetUp(A->rmap));
1186: PetscCall(PetscLayoutSetUp(A->cmap));
1187: if (!a->lld) a->lld = a->locr;
1189: PetscCall(PetscFree(a->loc));
1190: PetscCall(PetscIntMultError(a->lld, a->locc, &sz));
1191: PetscCall(PetscCalloc1(sz, &a->loc));
1193: A->preallocated = PETSC_TRUE;
1194: PetscFunctionReturn(PETSC_SUCCESS);
1195: }
1197: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1198: {
1199: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1200: Mat_ScaLAPACK_Grid *grid;
1201: PetscBool flg;
1202: MPI_Comm icomm;
1204: PetscFunctionBegin;
1205: PetscCall(MatStashDestroy_Private(&A->stash));
1206: PetscCall(PetscFree(a->loc));
1207: PetscCall(PetscFree(a->pivots));
1208: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1209: PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1210: if (--grid->grid_refct == 0) {
1211: Cblacs_gridexit(grid->ictxt);
1212: Cblacs_gridexit(grid->ictxrow);
1213: Cblacs_gridexit(grid->ictxcol);
1214: PetscCall(PetscFree(grid));
1215: PetscCallMPI(MPI_Comm_delete_attr(icomm, Petsc_ScaLAPACK_keyval));
1216: }
1217: PetscCall(PetscCommDestroy(&icomm));
1218: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", NULL));
1219: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
1220: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", NULL));
1221: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", NULL));
1222: PetscCall(PetscFree(A->data));
1223: PetscFunctionReturn(PETSC_SUCCESS);
1224: }
1226: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1227: {
1228: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1229: PetscBLASInt info = 0;
1230: PetscBool flg;
1232: PetscFunctionBegin;
1233: PetscCall(PetscLayoutSetUp(A->rmap));
1234: PetscCall(PetscLayoutSetUp(A->cmap));
1236: /* check that the layout is as enforced by MatCreateScaLAPACK() */
1237: PetscCall(MatScaLAPACKCheckLayout(A->rmap, &flg));
1238: PetscCheck(flg, A->rmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local row sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1239: PetscCall(MatScaLAPACKCheckLayout(A->cmap, &flg));
1240: PetscCheck(flg, A->cmap->comm, PETSC_ERR_SUP, "MATSCALAPACK must have equal local column sizes in all processes (except possibly the last one), consider using MatCreateScaLAPACK");
1242: /* compute local sizes */
1243: PetscCall(PetscBLASIntCast(A->rmap->N, &a->M));
1244: PetscCall(PetscBLASIntCast(A->cmap->N, &a->N));
1245: a->locr = SCALAPACKnumroc_(&a->M, &a->mb, &a->grid->myrow, &a->rsrc, &a->grid->nprow);
1246: a->locc = SCALAPACKnumroc_(&a->N, &a->nb, &a->grid->mycol, &a->csrc, &a->grid->npcol);
1247: a->lld = PetscMax(1, a->locr);
1249: /* allocate local array */
1250: PetscCall(MatScaLAPACKSetPreallocation(A));
1252: /* set up ScaLAPACK descriptor */
1253: PetscCallBLAS("SCALAPACKdescinit", SCALAPACKdescinit_(a->desc, &a->M, &a->N, &a->mb, &a->nb, &a->rsrc, &a->csrc, &a->grid->ictxt, &a->lld, &info));
1254: PetscCheckScaLapackInfo("descinit", info);
1255: PetscFunctionReturn(PETSC_SUCCESS);
1256: }
1258: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A, MatAssemblyType type)
1259: {
1260: PetscInt nstash, reallocs;
1262: PetscFunctionBegin;
1263: if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1264: PetscCall(MatStashScatterBegin_Private(A, &A->stash, NULL));
1265: PetscCall(MatStashGetInfo_Private(&A->stash, &nstash, &reallocs));
1266: PetscCall(PetscInfo(A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs));
1267: PetscFunctionReturn(PETSC_SUCCESS);
1268: }
1270: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A, MatAssemblyType type)
1271: {
1272: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1273: PetscMPIInt n;
1274: PetscInt i, flg, *row, *col;
1275: PetscScalar *val;
1276: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1278: PetscFunctionBegin;
1279: if (A->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS);
1280: while (1) {
1281: PetscCall(MatStashScatterGetMesg_Private(&A->stash, &n, &row, &col, &val, &flg));
1282: if (!flg) break;
1283: for (i = 0; i < n; i++) {
1284: PetscCall(PetscBLASIntCast(row[i] + 1, &gridx));
1285: PetscCall(PetscBLASIntCast(col[i] + 1, &gcidx));
1286: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1287: PetscCheck(rsrc == a->grid->myrow && csrc == a->grid->mycol, PetscObjectComm((PetscObject)A), PETSC_ERR_LIB, "Something went wrong, received value does not belong to this process");
1288: switch (A->insertmode) {
1289: case INSERT_VALUES:
1290: a->loc[lridx - 1 + (lcidx - 1) * a->lld] = val[i];
1291: break;
1292: case ADD_VALUES:
1293: a->loc[lridx - 1 + (lcidx - 1) * a->lld] += val[i];
1294: break;
1295: default:
1296: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "No support for InsertMode %d", (int)A->insertmode);
1297: }
1298: }
1299: }
1300: PetscCall(MatStashScatterEnd_Private(&A->stash));
1301: PetscFunctionReturn(PETSC_SUCCESS);
1302: }
1304: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat, PetscViewer viewer)
1305: {
1306: Mat Adense, As;
1307: MPI_Comm comm;
1309: PetscFunctionBegin;
1310: PetscCall(PetscObjectGetComm((PetscObject)newMat, &comm));
1311: PetscCall(MatCreate(comm, &Adense));
1312: PetscCall(MatSetType(Adense, MATDENSE));
1313: PetscCall(MatLoad(Adense, viewer));
1314: PetscCall(MatConvert(Adense, MATSCALAPACK, MAT_INITIAL_MATRIX, &As));
1315: PetscCall(MatDestroy(&Adense));
1316: PetscCall(MatHeaderReplace(newMat, &As));
1317: PetscFunctionReturn(PETSC_SUCCESS);
1318: }
1320: static struct _MatOps MatOps_Values = {MatSetValues_ScaLAPACK,
1321: 0,
1322: 0,
1323: MatMult_ScaLAPACK,
1324: /* 4*/ MatMultAdd_ScaLAPACK,
1325: MatMultTranspose_ScaLAPACK,
1326: MatMultTransposeAdd_ScaLAPACK,
1327: MatSolve_ScaLAPACK,
1328: MatSolveAdd_ScaLAPACK,
1329: 0,
1330: /*10*/ 0,
1331: MatLUFactor_ScaLAPACK,
1332: MatCholeskyFactor_ScaLAPACK,
1333: 0,
1334: MatTranspose_ScaLAPACK,
1335: /*15*/ MatGetInfo_ScaLAPACK,
1336: 0,
1337: MatGetDiagonal_ScaLAPACK,
1338: MatDiagonalScale_ScaLAPACK,
1339: MatNorm_ScaLAPACK,
1340: /*20*/ MatAssemblyBegin_ScaLAPACK,
1341: MatAssemblyEnd_ScaLAPACK,
1342: MatSetOption_ScaLAPACK,
1343: MatZeroEntries_ScaLAPACK,
1344: /*24*/ 0,
1345: MatLUFactorSymbolic_ScaLAPACK,
1346: MatLUFactorNumeric_ScaLAPACK,
1347: MatCholeskyFactorSymbolic_ScaLAPACK,
1348: MatCholeskyFactorNumeric_ScaLAPACK,
1349: /*29*/ MatSetUp_ScaLAPACK,
1350: 0,
1351: 0,
1352: 0,
1353: 0,
1354: /*34*/ MatDuplicate_ScaLAPACK,
1355: 0,
1356: 0,
1357: 0,
1358: 0,
1359: /*39*/ MatAXPY_ScaLAPACK,
1360: 0,
1361: 0,
1362: 0,
1363: MatCopy_ScaLAPACK,
1364: /*44*/ 0,
1365: MatScale_ScaLAPACK,
1366: MatShift_ScaLAPACK,
1367: 0,
1368: 0,
1369: /*49*/ 0,
1370: 0,
1371: 0,
1372: 0,
1373: 0,
1374: /*54*/ 0,
1375: 0,
1376: 0,
1377: 0,
1378: 0,
1379: /*59*/ 0,
1380: MatDestroy_ScaLAPACK,
1381: MatView_ScaLAPACK,
1382: 0,
1383: 0,
1384: /*64*/ 0,
1385: 0,
1386: 0,
1387: 0,
1388: 0,
1389: /*69*/ 0,
1390: 0,
1391: MatConvert_ScaLAPACK_Dense,
1392: 0,
1393: 0,
1394: /*74*/ 0,
1395: 0,
1396: 0,
1397: 0,
1398: 0,
1399: /*79*/ 0,
1400: 0,
1401: 0,
1402: 0,
1403: MatLoad_ScaLAPACK,
1404: /*84*/ 0,
1405: 0,
1406: 0,
1407: 0,
1408: 0,
1409: /*89*/ 0,
1410: 0,
1411: MatMatMultNumeric_ScaLAPACK,
1412: 0,
1413: 0,
1414: /*94*/ 0,
1415: 0,
1416: 0,
1417: MatMatTransposeMultNumeric_ScaLAPACK,
1418: 0,
1419: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1420: 0,
1421: 0,
1422: MatConjugate_ScaLAPACK,
1423: 0,
1424: /*104*/ 0,
1425: 0,
1426: 0,
1427: 0,
1428: 0,
1429: /*109*/ MatMatSolve_ScaLAPACK,
1430: 0,
1431: 0,
1432: 0,
1433: MatMissingDiagonal_ScaLAPACK,
1434: /*114*/ 0,
1435: 0,
1436: 0,
1437: 0,
1438: 0,
1439: /*119*/ 0,
1440: MatHermitianTranspose_ScaLAPACK,
1441: 0,
1442: 0,
1443: 0,
1444: /*124*/ 0,
1445: 0,
1446: 0,
1447: 0,
1448: 0,
1449: /*129*/ 0,
1450: 0,
1451: 0,
1452: 0,
1453: 0,
1454: /*134*/ 0,
1455: 0,
1456: 0,
1457: 0,
1458: 0,
1459: 0,
1460: /*140*/ 0,
1461: 0,
1462: 0,
1463: 0,
1464: 0,
1465: /*145*/ 0,
1466: 0,
1467: 0,
1468: 0,
1469: 0,
1470: /*150*/ 0,
1471: 0};
1473: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat, MatStash *stash, PetscInt *owners)
1474: {
1475: PetscInt *owner, *startv, *starti, tag1 = stash->tag1, tag2 = stash->tag2, bs2;
1476: PetscInt size = stash->size, nsends;
1477: PetscInt count, *sindices, **rindices, i, j, l;
1478: PetscScalar **rvalues, *svalues;
1479: MPI_Comm comm = stash->comm;
1480: MPI_Request *send_waits, *recv_waits, *recv_waits1, *recv_waits2;
1481: PetscMPIInt *sizes, *nlengths, nreceives;
1482: PetscInt *sp_idx, *sp_idy;
1483: PetscScalar *sp_val;
1484: PetscMatStashSpace space, space_next;
1485: PetscBLASInt gridx, gcidx, lridx, lcidx, rsrc, csrc;
1486: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)mat->data;
1488: PetscFunctionBegin;
1489: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1490: InsertMode addv;
1491: PetscCall(MPIU_Allreduce((PetscEnum *)&mat->insertmode, (PetscEnum *)&addv, 1, MPIU_ENUM, MPI_BOR, PetscObjectComm((PetscObject)mat)));
1492: PetscCheck(addv != (ADD_VALUES | INSERT_VALUES), PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONGSTATE, "Some processors inserted others added");
1493: mat->insertmode = addv; /* in case this processor had no cache */
1494: }
1496: bs2 = stash->bs * stash->bs;
1498: /* first count number of contributors to each processor */
1499: PetscCall(PetscCalloc1(size, &nlengths));
1500: PetscCall(PetscMalloc1(stash->n + 1, &owner));
1502: i = j = 0;
1503: space = stash->space_head;
1504: while (space) {
1505: space_next = space->next;
1506: for (l = 0; l < space->local_used; l++) {
1507: PetscCall(PetscBLASIntCast(space->idx[l] + 1, &gridx));
1508: PetscCall(PetscBLASIntCast(space->idy[l] + 1, &gcidx));
1509: PetscCallBLAS("SCALAPACKinfog2l", SCALAPACKinfog2l_(&gridx, &gcidx, a->desc, &a->grid->nprow, &a->grid->npcol, &a->grid->myrow, &a->grid->mycol, &lridx, &lcidx, &rsrc, &csrc));
1510: j = Cblacs_pnum(a->grid->ictxt, rsrc, csrc);
1511: nlengths[j]++;
1512: owner[i] = j;
1513: i++;
1514: }
1515: space = space_next;
1516: }
1518: /* Now check what procs get messages - and compute nsends. */
1519: PetscCall(PetscCalloc1(size, &sizes));
1520: for (i = 0, nsends = 0; i < size; i++) {
1521: if (nlengths[i]) {
1522: sizes[i] = 1;
1523: nsends++;
1524: }
1525: }
1527: {
1528: PetscMPIInt *onodes, *olengths;
1529: /* Determine the number of messages to expect, their lengths, from from-ids */
1530: PetscCall(PetscGatherNumberOfMessages(comm, sizes, nlengths, &nreceives));
1531: PetscCall(PetscGatherMessageLengths(comm, nsends, nreceives, nlengths, &onodes, &olengths));
1532: /* since clubbing row,col - lengths are multiplied by 2 */
1533: for (i = 0; i < nreceives; i++) olengths[i] *= 2;
1534: PetscCall(PetscPostIrecvInt(comm, tag1, nreceives, onodes, olengths, &rindices, &recv_waits1));
1535: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1536: for (i = 0; i < nreceives; i++) olengths[i] = olengths[i] * bs2 / 2;
1537: PetscCall(PetscPostIrecvScalar(comm, tag2, nreceives, onodes, olengths, &rvalues, &recv_waits2));
1538: PetscCall(PetscFree(onodes));
1539: PetscCall(PetscFree(olengths));
1540: }
1542: /* do sends:
1543: 1) starts[i] gives the starting index in svalues for stuff going to
1544: the ith processor
1545: */
1546: PetscCall(PetscMalloc2(bs2 * stash->n, &svalues, 2 * (stash->n + 1), &sindices));
1547: PetscCall(PetscMalloc1(2 * nsends, &send_waits));
1548: PetscCall(PetscMalloc2(size, &startv, size, &starti));
1549: /* use 2 sends the first with all_a, the next with all_i and all_j */
1550: startv[0] = 0;
1551: starti[0] = 0;
1552: for (i = 1; i < size; i++) {
1553: startv[i] = startv[i - 1] + nlengths[i - 1];
1554: starti[i] = starti[i - 1] + 2 * nlengths[i - 1];
1555: }
1557: i = 0;
1558: space = stash->space_head;
1559: while (space) {
1560: space_next = space->next;
1561: sp_idx = space->idx;
1562: sp_idy = space->idy;
1563: sp_val = space->val;
1564: for (l = 0; l < space->local_used; l++) {
1565: j = owner[i];
1566: if (bs2 == 1) {
1567: svalues[startv[j]] = sp_val[l];
1568: } else {
1569: PetscInt k;
1570: PetscScalar *buf1, *buf2;
1571: buf1 = svalues + bs2 * startv[j];
1572: buf2 = space->val + bs2 * l;
1573: for (k = 0; k < bs2; k++) buf1[k] = buf2[k];
1574: }
1575: sindices[starti[j]] = sp_idx[l];
1576: sindices[starti[j] + nlengths[j]] = sp_idy[l];
1577: startv[j]++;
1578: starti[j]++;
1579: i++;
1580: }
1581: space = space_next;
1582: }
1583: startv[0] = 0;
1584: for (i = 1; i < size; i++) startv[i] = startv[i - 1] + nlengths[i - 1];
1586: for (i = 0, count = 0; i < size; i++) {
1587: if (sizes[i]) {
1588: PetscCallMPI(MPI_Isend(sindices + 2 * startv[i], 2 * nlengths[i], MPIU_INT, i, tag1, comm, send_waits + count++));
1589: PetscCallMPI(MPI_Isend(svalues + bs2 * startv[i], bs2 * nlengths[i], MPIU_SCALAR, i, tag2, comm, send_waits + count++));
1590: }
1591: }
1592: #if defined(PETSC_USE_INFO)
1593: PetscCall(PetscInfo(NULL, "No of messages: %" PetscInt_FMT "\n", nsends));
1594: for (i = 0; i < size; i++) {
1595: if (sizes[i]) PetscCall(PetscInfo(NULL, "Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n", i, (size_t)(nlengths[i] * (bs2 * sizeof(PetscScalar) + 2 * sizeof(PetscInt)))));
1596: }
1597: #endif
1598: PetscCall(PetscFree(nlengths));
1599: PetscCall(PetscFree(owner));
1600: PetscCall(PetscFree2(startv, starti));
1601: PetscCall(PetscFree(sizes));
1603: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1604: PetscCall(PetscMalloc1(2 * nreceives, &recv_waits));
1606: for (i = 0; i < nreceives; i++) {
1607: recv_waits[2 * i] = recv_waits1[i];
1608: recv_waits[2 * i + 1] = recv_waits2[i];
1609: }
1610: stash->recv_waits = recv_waits;
1612: PetscCall(PetscFree(recv_waits1));
1613: PetscCall(PetscFree(recv_waits2));
1615: stash->svalues = svalues;
1616: stash->sindices = sindices;
1617: stash->rvalues = rvalues;
1618: stash->rindices = rindices;
1619: stash->send_waits = send_waits;
1620: stash->nsends = nsends;
1621: stash->nrecvs = nreceives;
1622: stash->reproduce_count = 0;
1623: PetscFunctionReturn(PETSC_SUCCESS);
1624: }
1626: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A, PetscInt mb, PetscInt nb)
1627: {
1628: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1630: PetscFunctionBegin;
1631: PetscCheck(!A->preallocated, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot change block sizes after MatSetUp");
1632: PetscCheck(mb >= 1 || mb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "mb %" PetscInt_FMT " must be at least 1", mb);
1633: PetscCheck(nb >= 1 || nb == PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "nb %" PetscInt_FMT " must be at least 1", nb);
1634: PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1635: PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1636: PetscFunctionReturn(PETSC_SUCCESS);
1637: }
1639: /*@
1640: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distribution of
1641: the `MATSCALAPACK` matrix
1643: Logically Collective
1645: Input Parameters:
1646: + A - a `MATSCALAPACK` matrix
1647: . mb - the row block size
1648: - nb - the column block size
1650: Level: intermediate
1652: Note:
1653: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1655: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKGetBlockSizes()`
1656: @*/
1657: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A, PetscInt mb, PetscInt nb)
1658: {
1659: PetscFunctionBegin;
1663: PetscTryMethod(A, "MatScaLAPACKSetBlockSizes_C", (Mat, PetscInt, PetscInt), (A, mb, nb));
1664: PetscFunctionReturn(PETSC_SUCCESS);
1665: }
1667: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A, PetscInt *mb, PetscInt *nb)
1668: {
1669: Mat_ScaLAPACK *a = (Mat_ScaLAPACK *)A->data;
1671: PetscFunctionBegin;
1672: if (mb) *mb = a->mb;
1673: if (nb) *nb = a->nb;
1674: PetscFunctionReturn(PETSC_SUCCESS);
1675: }
1677: /*@
1678: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distribution of
1679: the `MATSCALAPACK` matrix
1681: Not Collective
1683: Input Parameter:
1684: . A - a `MATSCALAPACK` matrix
1686: Output Parameters:
1687: + mb - the row block size
1688: - nb - the column block size
1690: Level: intermediate
1692: Note:
1693: This block size has a different meaning from the block size associated with `MatSetBlockSize()` used for sparse matrices
1695: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MatCreateScaLAPACK()`, `MatScaLAPACKSetBlockSizes()`
1696: @*/
1697: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A, PetscInt *mb, PetscInt *nb)
1698: {
1699: PetscFunctionBegin;
1701: PetscUseMethod(A, "MatScaLAPACKGetBlockSizes_C", (Mat, PetscInt *, PetscInt *), (A, mb, nb));
1702: PetscFunctionReturn(PETSC_SUCCESS);
1703: }
1705: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *, PetscMPIInt *, PetscInt **, PetscInt **, PetscScalar **, PetscInt *);
1706: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash *);
1708: /*MC
1709: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1711: Use `./configure --download-scalapack` to install PETSc to use ScaLAPACK
1713: Options Database Keys:
1714: + -mat_type scalapack - sets the matrix type to `MATSCALAPACK`
1715: . -pc_factor_mat_solver_type scalapack - to use this direct solver with the option `-pc_type lu`
1716: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1717: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1719: Level: intermediate
1721: Note:
1722: Note unlike most matrix formats, this format does not store all the matrix entries for a contiguous
1723: range of rows on an MPI rank. Use `MatGetOwnershipIS()` to determine what values are stored on
1724: the given rank.
1726: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatGetOwnershipIS()`, `MatCreateScaLAPACK()`
1727: M*/
1729: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1730: {
1731: Mat_ScaLAPACK *a;
1732: PetscBool flg, flg1;
1733: Mat_ScaLAPACK_Grid *grid;
1734: MPI_Comm icomm;
1735: PetscBLASInt nprow, npcol, myrow, mycol;
1736: PetscInt optv1, k = 2, array[2] = {0, 0};
1737: PetscMPIInt size;
1739: PetscFunctionBegin;
1740: PetscCall(PetscMemcpy(A->ops, &MatOps_Values, sizeof(struct _MatOps)));
1741: A->insertmode = NOT_SET_VALUES;
1743: PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)A), 1, &A->stash));
1744: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1745: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1746: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1747: A->stash.ScatterDestroy = NULL;
1749: PetscCall(PetscNew(&a));
1750: A->data = (void *)a;
1752: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1753: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1754: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, MPI_COMM_NULL_DELETE_FN, &Petsc_ScaLAPACK_keyval, (void *)0));
1755: PetscCall(PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free));
1756: PetscCall(PetscCitationsRegister(ScaLAPACKCitation, &ScaLAPACKCite));
1757: }
1758: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)A), &icomm, NULL));
1759: PetscCallMPI(MPI_Comm_get_attr(icomm, Petsc_ScaLAPACK_keyval, (void **)&grid, (int *)&flg));
1760: if (!flg) {
1761: PetscCall(PetscNew(&grid));
1763: PetscCallMPI(MPI_Comm_size(icomm, &size));
1764: grid->nprow = (PetscInt)(PetscSqrtReal((PetscReal)size) + 0.001);
1766: PetscOptionsBegin(PetscObjectComm((PetscObject)A), ((PetscObject)A)->prefix, "ScaLAPACK Grid Options", "Mat");
1767: PetscCall(PetscOptionsInt("-mat_scalapack_grid_height", "Grid Height", "None", grid->nprow, &optv1, &flg1));
1768: if (flg1) {
1769: PetscCheck(size % optv1 == 0, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_INCOMP, "Grid Height %" PetscInt_FMT " must evenly divide CommSize %d", optv1, size);
1770: grid->nprow = optv1;
1771: }
1772: PetscOptionsEnd();
1774: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1775: grid->npcol = size / grid->nprow;
1776: PetscCall(PetscBLASIntCast(grid->nprow, &nprow));
1777: PetscCall(PetscBLASIntCast(grid->npcol, &npcol));
1778: grid->ictxt = Csys2blacs_handle(icomm);
1779: Cblacs_gridinit(&grid->ictxt, "R", nprow, npcol);
1780: Cblacs_gridinfo(grid->ictxt, &nprow, &npcol, &myrow, &mycol);
1781: grid->grid_refct = 1;
1782: grid->nprow = nprow;
1783: grid->npcol = npcol;
1784: grid->myrow = myrow;
1785: grid->mycol = mycol;
1786: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1787: grid->ictxrow = Csys2blacs_handle(icomm);
1788: Cblacs_gridinit(&grid->ictxrow, "R", 1, size);
1789: grid->ictxcol = Csys2blacs_handle(icomm);
1790: Cblacs_gridinit(&grid->ictxcol, "R", size, 1);
1791: PetscCallMPI(MPI_Comm_set_attr(icomm, Petsc_ScaLAPACK_keyval, (void *)grid));
1793: } else grid->grid_refct++;
1794: PetscCall(PetscCommDestroy(&icomm));
1795: a->grid = grid;
1796: a->mb = DEFAULT_BLOCKSIZE;
1797: a->nb = DEFAULT_BLOCKSIZE;
1799: PetscOptionsBegin(PetscObjectComm((PetscObject)A), NULL, "ScaLAPACK Options", "Mat");
1800: PetscCall(PetscOptionsIntArray("-mat_scalapack_block_sizes", "Size of the blocks to use (one or two comma-separated integers)", "MatCreateScaLAPACK", array, &k, &flg));
1801: if (flg) {
1802: a->mb = array[0];
1803: a->nb = (k > 1) ? array[1] : a->mb;
1804: }
1805: PetscOptionsEnd();
1807: a->roworiented = PETSC_TRUE;
1808: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetOwnershipIS_C", MatGetOwnershipIS_ScaLAPACK));
1809: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKSetBlockSizes_C", MatScaLAPACKSetBlockSizes_ScaLAPACK));
1810: PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatScaLAPACKGetBlockSizes_C", MatScaLAPACKGetBlockSizes_ScaLAPACK));
1811: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSCALAPACK));
1812: PetscFunctionReturn(PETSC_SUCCESS);
1813: }
1815: /*@C
1816: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1817: (2D block cyclic distribution) for a `MATSCALAPACK` matrix
1819: Collective
1821: Input Parameters:
1822: + comm - MPI communicator
1823: . mb - row block size (or `PETSC_DECIDE` to have it set)
1824: . nb - column block size (or `PETSC_DECIDE` to have it set)
1825: . M - number of global rows
1826: . N - number of global columns
1827: . rsrc - coordinate of process that owns the first row of the distributed matrix
1828: - csrc - coordinate of process that owns the first column of the distributed matrix
1830: Output Parameter:
1831: . A - the matrix
1833: Options Database Key:
1834: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1836: Level: intermediate
1838: Notes:
1839: If `PETSC_DECIDE` is used for the block sizes, then an appropriate value is chosen
1841: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`,
1842: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1843: [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`]
1845: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1846: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1847: significance and are thus ignored. The block sizes refer to the values
1848: used for the distributed matrix, not the same meaning as in `MATBAIJ`.
1850: .seealso: [](ch_matrices), `Mat`, `MATSCALAPACK`, `MATDENSE`, `MATELEMENTAL`, `MatCreate()`, `MatCreateDense()`, `MatSetValues()`
1851: @*/
1852: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm, PetscInt mb, PetscInt nb, PetscInt M, PetscInt N, PetscInt rsrc, PetscInt csrc, Mat *A)
1853: {
1854: Mat_ScaLAPACK *a;
1855: PetscInt m, n;
1857: PetscFunctionBegin;
1858: PetscCall(MatCreate(comm, A));
1859: PetscCall(MatSetType(*A, MATSCALAPACK));
1860: PetscCheck(M != PETSC_DECIDE && N != PETSC_DECIDE, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot use PETSC_DECIDE for matrix dimensions");
1861: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1862: m = PETSC_DECIDE;
1863: PetscCall(PetscSplitOwnershipEqual(comm, &m, &M));
1864: n = PETSC_DECIDE;
1865: PetscCall(PetscSplitOwnershipEqual(comm, &n, &N));
1866: PetscCall(MatSetSizes(*A, m, n, M, N));
1867: a = (Mat_ScaLAPACK *)(*A)->data;
1868: PetscCall(PetscBLASIntCast(M, &a->M));
1869: PetscCall(PetscBLASIntCast(N, &a->N));
1870: PetscCall(PetscBLASIntCast((mb == PETSC_DECIDE) ? DEFAULT_BLOCKSIZE : mb, &a->mb));
1871: PetscCall(PetscBLASIntCast((nb == PETSC_DECIDE) ? a->mb : nb, &a->nb));
1872: PetscCall(PetscBLASIntCast(rsrc, &a->rsrc));
1873: PetscCall(PetscBLASIntCast(csrc, &a->csrc));
1874: PetscCall(MatSetUp(*A));
1875: PetscFunctionReturn(PETSC_SUCCESS);
1876: }