Actual source code: ex32.c


  2: static char help[] = "Tests MATSEQDENSECUDA\n\n";

  4: #include <petscmat.h>

  6: int main(int argc, char **argv)
  7: {
  8:   Mat       A, AC, B;
  9:   PetscInt  m = 10, n = 10;
 10:   PetscReal r, tol    = 10 * PETSC_SMALL;

 12:   PetscFunctionBeginUser;
 13:   PetscCall(PetscInitialize(&argc, &argv, (char *)0, help));
 14:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-m", &m, NULL));
 15:   PetscCall(PetscOptionsGetInt(NULL, NULL, "-n", &n, NULL));
 16:   PetscCall(MatCreate(PETSC_COMM_SELF, &A));
 17:   PetscCall(MatSetSizes(A, PETSC_DECIDE, PETSC_DECIDE, m, n));
 18:   PetscCall(MatSetType(A, MATSEQDENSE));
 19:   PetscCall(MatSetFromOptions(A));
 20:   PetscCall(MatSeqDenseSetPreallocation(A, NULL));
 21:   PetscCall(MatSetRandom(A, NULL));
 22: #if 0
 23:   PetscInt       i,j;
 24:   PetscScalar    val;
 25:   for (i=0; i<m; i++) {
 26:     for (j=0; j<n; j++) {
 27:       val = (PetscScalar)(i+j);
 28:       PetscCall(MatSetValues(A,1,&i,1,&j,&val,INSERT_VALUES));
 29:     }
 30:   }
 31:   PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY));
 32:   PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY));
 33: #endif

 35:   /* Create a CUDA version of A */
 36: #if defined(PETSC_HAVE_CUDA)
 37:   PetscCall(MatConvert(A, MATSEQDENSECUDA, MAT_INITIAL_MATRIX, &AC));
 38: #else
 39:   PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &AC));
 40: #endif
 41:   PetscCall(MatDuplicate(AC, MAT_COPY_VALUES, &B));

 43:   /* full CUDA AXPY */
 44:   PetscCall(MatAXPY(B, -1.0, AC, SAME_NONZERO_PATTERN));
 45:   PetscCall(MatNorm(B, NORM_INFINITY, &r));
 46:   PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY %g", (double)r);

 48:   /* test Copy */
 49:   PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));

 51:   /* call MatAXPY_Basic since B is CUDA, A is CPU,  */
 52:   PetscCall(MatAXPY(B, -1.0, A, SAME_NONZERO_PATTERN));
 53:   PetscCall(MatNorm(B, NORM_INFINITY, &r));
 54:   PetscCheck(r == 0.0, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatDuplicate + MatCopy + MatAXPY_Basic %g", (double)r);

 56:   if (m == n) {
 57:     Mat B1, B2;

 59:     PetscCall(MatCopy(AC, B, SAME_NONZERO_PATTERN));
 60:     /* full CUDA PtAP */
 61:     PetscCall(MatPtAP(B, AC, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B1));

 63:     /* CPU PtAP since A is on the CPU only */
 64:     PetscCall(MatPtAP(B, A, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &B2));

 66:     PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
 67:     PetscCall(MatNorm(B2, NORM_INFINITY, &r));
 68:     PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);

 70:     /* test reuse */
 71:     PetscCall(MatPtAP(B, AC, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B1));
 72:     PetscCall(MatPtAP(B, A, MAT_REUSE_MATRIX, PETSC_DEFAULT, &B2));
 73:     PetscCall(MatAXPY(B2, -1.0, B1, SAME_NONZERO_PATTERN));
 74:     PetscCall(MatNorm(B2, NORM_INFINITY, &r));
 75:     PetscCheck(r <= tol, PetscObjectComm((PetscObject)B), PETSC_ERR_PLIB, "Error MatPtAP %g", (double)r);

 77:     PetscCall(MatDestroy(&B1));
 78:     PetscCall(MatDestroy(&B2));
 79:   }

 81:   PetscCall(MatDestroy(&B));
 82:   PetscCall(MatDestroy(&AC));
 83:   PetscCall(MatDestroy(&A));
 84:   PetscCall(PetscFinalize());
 85:   return 0;
 86: }

 88: /*TEST

 90:    build:
 91:      requires: cuda

 93:    test:
 94:      output_file: output/ex32_1.out
 95:      args: -m {{3 5 12}} -n {{3 5 12}}
 96:      suffix: seqdensecuda

 98: TEST*/