Actual source code: ex1.c


  2: static char help[] = "Tests LU, Cholesky, and QR factorization and MatMatSolve() for a sequential dense matrix. \n\
  3:                       For MATSEQDENSE matrix, the factorization is just a thin wrapper to LAPACK.       \n\
  4:                       For MATSEQDENSECUDA, it uses cusolverDn routines \n\n";

  6: #include <petscmat.h>

  8: static PetscErrorCode createMatsAndVecs(PetscInt m, PetscInt n, PetscInt nrhs, PetscBool full, Mat *_mat, Mat *_RHS, Mat *_SOLU, Vec *_x, Vec *_y, Vec *_b)
  9: {
 10:   PetscRandom rand;
 11:   Mat         mat, RHS, SOLU;
 12:   PetscInt    rstart, rend;
 13:   PetscInt    cstart, cend;
 14:   PetscScalar value = 1.0;
 15:   Vec         x, y, b;

 17:   PetscFunctionBegin;
 18:   /* create multiple vectors RHS and SOLU */
 19:   PetscCall(MatCreate(PETSC_COMM_WORLD, &RHS));
 20:   PetscCall(MatSetSizes(RHS, PETSC_DECIDE, PETSC_DECIDE, m, nrhs));
 21:   PetscCall(MatSetType(RHS, MATDENSE));
 22:   PetscCall(MatSetOptionsPrefix(RHS, "rhs_"));
 23:   PetscCall(MatSetFromOptions(RHS));
 24:   PetscCall(MatSeqDenseSetPreallocation(RHS, NULL));

 26:   PetscCall(PetscRandomCreate(PETSC_COMM_WORLD, &rand));
 27:   PetscCall(PetscRandomSetFromOptions(rand));
 28:   PetscCall(MatSetRandom(RHS, rand));

 30:   if (m == n) {
 31:     PetscCall(MatDuplicate(RHS, MAT_DO_NOT_COPY_VALUES, &SOLU));
 32:   } else {
 33:     PetscCall(MatCreate(PETSC_COMM_WORLD, &SOLU));
 34:     PetscCall(MatSetSizes(SOLU, PETSC_DECIDE, PETSC_DECIDE, n, nrhs));
 35:     PetscCall(MatSetType(SOLU, MATDENSE));
 36:     PetscCall(MatSeqDenseSetPreallocation(SOLU, NULL));
 37:   }
 38:   PetscCall(MatSetRandom(SOLU, rand));

 40:   /* create matrix */
 41:   PetscCall(MatCreate(PETSC_COMM_WORLD, &mat));
 42:   PetscCall(MatSetSizes(mat, PETSC_DECIDE, PETSC_DECIDE, m, n));
 43:   PetscCall(MatSetType(mat, MATDENSE));
 44:   PetscCall(MatSetFromOptions(mat));
 45:   PetscCall(MatSetUp(mat));
 46:   PetscCall(MatGetOwnershipRange(mat, &rstart, &rend));
 47:   PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend));
 48:   if (!full) {
 49:     for (PetscInt i = rstart; i < rend; i++) {
 50:       if (m == n) {
 51:         value = (PetscReal)i + 1;
 52:         PetscCall(MatSetValues(mat, 1, &i, 1, &i, &value, INSERT_VALUES));
 53:       } else {
 54:         for (PetscInt j = cstart; j < cend; j++) {
 55:           value = ((PetscScalar)i + 1.) / (PetscSqr(i - j) + 1.);
 56:           PetscCall(MatSetValues(mat, 1, &i, 1, &j, &value, INSERT_VALUES));
 57:         }
 58:       }
 59:     }
 60:     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
 61:     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
 62:   } else {
 63:     PetscCall(MatSetRandom(mat, rand));
 64:     if (m == n) {
 65:       Mat T;

 67:       PetscCall(MatMatTransposeMult(mat, mat, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &T));
 68:       PetscCall(MatDestroy(&mat));
 69:       mat = T;
 70:     }
 71:   }

 73:   /* create single vectors */
 74:   PetscCall(MatCreateVecs(mat, &x, &b));
 75:   PetscCall(VecDuplicate(x, &y));
 76:   PetscCall(VecSet(x, value));
 77:   PetscCall(PetscRandomDestroy(&rand));
 78:   *_mat  = mat;
 79:   *_RHS  = RHS;
 80:   *_SOLU = SOLU;
 81:   *_x    = x;
 82:   *_y    = y;
 83:   *_b    = b;
 84:   PetscFunctionReturn(PETSC_SUCCESS);
 85: }

 87: int main(int argc, char **argv)
 88: {
 89:   Mat         mat, F, RHS, SOLU;
 90:   MatInfo     info;
 91:   PetscInt    m = 15, n = 10, i, j, nrhs = 2;
 92:   Vec         x, y, b, ytmp;
 93:   IS          perm;
 94:   PetscReal   norm, tol = PETSC_SMALL;
 95:   PetscMPIInt size;
 96:   char        solver[64];
 97:   PetscBool   inplace, full = PETSC_FALSE, ldl = PETSC_TRUE, qr = PETSC_TRUE;

 99:   PetscFunctionBeginUser;
100:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
101:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
102:   PetscCheck(size == 1, PETSC_COMM_WORLD, PETSC_ERR_WRONG_MPI_SIZE, "This is a uniprocessor example only!");
103:   PetscCall(PetscStrncpy(solver, MATSOLVERPETSC, sizeof(solver)));
104:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
105:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
106:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-nrhs", &nrhs, NULL));
107:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-ldl", &ldl, NULL));
108:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-qr", &qr, NULL));
109:   PetscCall(PetscOptionsGetBool(NULL, NULL, "-full", &full, NULL));
110:   PetscCall(PetscOptionsGetString(NULL, NULL, "-solver_type", solver, sizeof(solver), NULL));

112:   PetscCall(createMatsAndVecs(n, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
113:   PetscCall(VecDuplicate(y, &ytmp));

115:   /* Only SeqDense* support in-place factorizations and NULL permutations */
116:   PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQDENSE, &inplace));
117:   PetscCall(MatGetLocalSize(mat, &i, NULL));
118:   PetscCall(MatGetOwnershipRange(mat, &j, NULL));
119:   PetscCall(ISCreateStride(PETSC_COMM_WORLD, i, j, 1, &perm));

121:   PetscCall(MatGetInfo(mat, MAT_LOCAL, &info));
122:   PetscCall(PetscPrintf(PETSC_COMM_WORLD, "matrix nonzeros = %" PetscInt_FMT ", allocated nonzeros = %" PetscInt_FMT "\n", (PetscInt)info.nz_used, (PetscInt)info.nz_allocated));
123:   PetscCall(MatMult(mat, x, b));

125:   /* Cholesky factorization - perm and factinfo are ignored by LAPACK */
126:   /* in-place Cholesky */
127:   if (inplace) {
128:     Mat RHS2;

130:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
131:     if (!ldl) PetscCall(MatSetOption(F, MAT_SPD, PETSC_TRUE));
132:     PetscCall(MatCholeskyFactor(F, perm, 0));
133:     PetscCall(MatSolve(F, b, y));
134:     PetscCall(VecAXPY(y, -1.0, x));
135:     PetscCall(VecNorm(y, NORM_2, &norm));
136:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place Cholesky %g\n", (double)norm));

138:     PetscCall(MatMatSolve(F, RHS, SOLU));
139:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2));
140:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
141:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
142:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place Cholesky (MatMatSolve) %g\n", (double)norm));
143:     PetscCall(MatDestroy(&F));
144:     PetscCall(MatDestroy(&RHS2));
145:   }

147:   /* out-of-place Cholesky */
148:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_CHOLESKY, &F));
149:   if (!ldl) PetscCall(MatSetOption(F, MAT_SPD, PETSC_TRUE));
150:   PetscCall(MatCholeskyFactorSymbolic(F, mat, perm, 0));
151:   PetscCall(MatCholeskyFactorNumeric(F, mat, 0));
152:   PetscCall(MatSolve(F, b, y));
153:   PetscCall(VecAXPY(y, -1.0, x));
154:   PetscCall(VecNorm(y, NORM_2, &norm));
155:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place Cholesky %g\n", (double)norm));
156:   PetscCall(MatDestroy(&F));

158:   /* LU factorization - perms and factinfo are ignored by LAPACK */
159:   i = n - 1;
160:   PetscCall(MatZeroRows(mat, 1, &i, -1.0, NULL, NULL));
161:   PetscCall(MatMult(mat, x, b));

163:   /* in-place LU */
164:   if (inplace) {
165:     Mat RHS2;

167:     PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
168:     PetscCall(MatLUFactor(F, perm, perm, 0));
169:     PetscCall(MatSolve(F, b, y));
170:     PetscCall(VecAXPY(y, -1.0, x));
171:     PetscCall(VecNorm(y, NORM_2, &norm));
172:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place LU %g\n", (double)norm));
173:     PetscCall(MatMatSolve(F, RHS, SOLU));
174:     PetscCall(MatMatMult(mat, SOLU, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &RHS2));
175:     PetscCall(MatAXPY(RHS, -1.0, RHS2, SAME_NONZERO_PATTERN));
176:     PetscCall(MatNorm(RHS, NORM_FROBENIUS, &norm));
177:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of residual for in-place LU (MatMatSolve) %g\n", (double)norm));
178:     PetscCall(MatDestroy(&F));
179:     PetscCall(MatDestroy(&RHS2));
180:   }

182:   /* out-of-place LU */
183:   PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_LU, &F));
184:   PetscCall(MatLUFactorSymbolic(F, mat, perm, perm, 0));
185:   PetscCall(MatLUFactorNumeric(F, mat, 0));
186:   PetscCall(MatSolve(F, b, y));
187:   PetscCall(VecAXPY(y, -1.0, x));
188:   PetscCall(VecNorm(y, NORM_2, &norm));
189:   if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place LU %g\n", (double)norm));

191:   /* free space */
192:   PetscCall(ISDestroy(&perm));
193:   PetscCall(MatDestroy(&F));
194:   PetscCall(MatDestroy(&mat));
195:   PetscCall(MatDestroy(&RHS));
196:   PetscCall(MatDestroy(&SOLU));
197:   PetscCall(VecDestroy(&x));
198:   PetscCall(VecDestroy(&b));
199:   PetscCall(VecDestroy(&y));
200:   PetscCall(VecDestroy(&ytmp));

202:   if (qr) {
203:     /* setup rectangular */
204:     PetscCall(createMatsAndVecs(m, n, nrhs, full, &mat, &RHS, &SOLU, &x, &y, &b));
205:     PetscCall(VecDuplicate(y, &ytmp));

207:     /* QR factorization - perms and factinfo are ignored by LAPACK */
208:     PetscCall(MatMult(mat, x, b));

210:     /* in-place QR */
211:     if (inplace) {
212:       Mat SOLU2;

214:       PetscCall(MatDuplicate(mat, MAT_COPY_VALUES, &F));
215:       PetscCall(MatQRFactor(F, NULL, 0));
216:       PetscCall(MatSolve(F, b, y));
217:       PetscCall(VecAXPY(y, -1.0, x));
218:       PetscCall(VecNorm(y, NORM_2, &norm));
219:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for in-place QR %g\n", (double)norm));
220:       PetscCall(MatMatMult(mat, SOLU, MAT_REUSE_MATRIX, PETSC_DEFAULT, &RHS));
221:       PetscCall(MatDuplicate(SOLU, MAT_DO_NOT_COPY_VALUES, &SOLU2));
222:       PetscCall(MatMatSolve(F, RHS, SOLU2));
223:       PetscCall(MatAXPY(SOLU2, -1.0, SOLU, SAME_NONZERO_PATTERN));
224:       PetscCall(MatNorm(SOLU2, NORM_FROBENIUS, &norm));
225:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Error: Norm of error for in-place QR (MatMatSolve) %g\n", (double)norm));
226:       PetscCall(MatDestroy(&F));
227:       PetscCall(MatDestroy(&SOLU2));
228:     }

230:     /* out-of-place QR */
231:     PetscCall(MatGetFactor(mat, solver, MAT_FACTOR_QR, &F));
232:     PetscCall(MatQRFactorSymbolic(F, mat, NULL, NULL));
233:     PetscCall(MatQRFactorNumeric(F, mat, NULL));
234:     PetscCall(MatSolve(F, b, y));
235:     PetscCall(VecAXPY(y, -1.0, x));
236:     PetscCall(VecNorm(y, NORM_2, &norm));
237:     if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));

239:     if (m == n) {
240:       /* out-of-place MatSolveTranspose */
241:       PetscCall(MatMultTranspose(mat, x, b));
242:       PetscCall(MatSolveTranspose(F, b, y));
243:       PetscCall(VecAXPY(y, -1.0, x));
244:       PetscCall(VecNorm(y, NORM_2, &norm));
245:       if (norm > tol) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "Warning: Norm of error for out-of-place QR %g\n", (double)norm));
246:     }

248:     /* free space */
249:     PetscCall(MatDestroy(&F));
250:     PetscCall(MatDestroy(&mat));
251:     PetscCall(MatDestroy(&RHS));
252:     PetscCall(MatDestroy(&SOLU));
253:     PetscCall(VecDestroy(&x));
254:     PetscCall(VecDestroy(&b));
255:     PetscCall(VecDestroy(&y));
256:     PetscCall(VecDestroy(&ytmp));
257:   }
258:   PetscCall(PetscFinalize());
259:   return 0;
260: }

262: /*TEST

264:    test:

266:    test:
267:      requires: cuda
268:      suffix: seqdensecuda
269:      args: -mat_type seqdensecuda -rhs_mat_type seqdensecuda -ldl 0 -solver_type {{petsc cuda}}
270:      output_file: output/ex1_1.out

272:    test:
273:      requires: cuda
274:      suffix: seqdensecuda_2
275:      args: -ldl 0 -solver_type cuda
276:      output_file: output/ex1_1.out

278:    test:
279:      requires: cuda
280:      suffix: seqdensecuda_seqaijcusparse
281:      args: -mat_type seqaijcusparse -rhs_mat_type seqdensecuda -qr 0
282:      output_file: output/ex1_2.out

284:    test:
285:      requires: cuda viennacl
286:      suffix: seqdensecuda_seqaijviennacl
287:      args: -mat_type seqaijviennacl -rhs_mat_type seqdensecuda -qr 0
288:      output_file: output/ex1_2.out

290:    test:
291:      suffix: 4
292:      args: -m 10 -n 10
293:      output_file: output/ex1_1.out

295: TEST*/