Actual source code: ex183.c

  1: static char help[] = "Example of extracting an array of MPI submatrices from a given MPI matrix.\n"
  2:                      "This test can only be run in parallel.\n"
  3:                      "\n";

  5: #include <petscmat.h>

  7: int main(int argc, char **args)
  8: {
  9:   Mat          A, *submats;
 10:   MPI_Comm     subcomm;
 11:   PetscMPIInt  rank, size, subrank, subsize, color;
 12:   PetscInt     m, n, N, bs, rstart, rend, i, j, k, total_subdomains, hash, nsubdomains = 1;
 13:   PetscInt     nis, *cols, gnsubdomains, gsubdomainnums[1], gsubdomainperm[1], s, gs;
 14:   PetscInt    *rowindices, *colindices, idx, rep;
 15:   PetscScalar *vals;
 16:   IS           rowis[1], colis[1];
 17:   PetscViewer  viewer;
 18:   PetscBool    permute_indices, flg;

 20:   PetscFunctionBeginUser;
 21:   PetscCall(PetscInitialize(&argc, &args, (char *)0, help));
 22:   PetscCallMPI(MPI_Comm_size(PETSC_COMM_WORLD, &size));
 23:   PetscCallMPI(MPI_Comm_rank(PETSC_COMM_WORLD, &rank));

 25:   PetscOptionsBegin(PETSC_COMM_WORLD, NULL, "ex183", "Mat");
 26:   m = 5;
 27:   PetscCall(PetscOptionsInt("-m", "Local matrix size", "MatSetSizes", m, &m, &flg));
 28:   total_subdomains = size - 1;
 29:   PetscCall(PetscOptionsInt("-total_subdomains", "Number of submatrices where 0 < n < comm size", "MatCreateSubMatricesMPI", total_subdomains, &total_subdomains, &flg));
 30:   permute_indices = PETSC_FALSE;
 31:   PetscCall(PetscOptionsBool("-permute_indices", "Whether to permute indices before breaking them into subdomains", "ISCreateGeneral", permute_indices, &permute_indices, &flg));
 32:   hash = 7;
 33:   PetscCall(PetscOptionsInt("-hash", "Permutation factor, which has to be relatively prime to M = size*m (total matrix size)", "ISCreateGeneral", hash, &hash, &flg));
 34:   rep = 2;
 35:   PetscCall(PetscOptionsInt("-rep", "Number of times to carry out submatrix extractions; currently only 1 & 2 are supported", NULL, rep, &rep, &flg));
 36:   PetscOptionsEnd();

 38:   PetscCheck(total_subdomains <= size, PETSC_COMM_WORLD, PETSC_ERR_ARG_WRONG, "Number of subdomains %" PetscInt_FMT " must not exceed comm size %d", total_subdomains, size);
 39:   PetscCheck(total_subdomains >= 1 && total_subdomains <= size, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "number of subdomains must be > 0 and <= %d (comm size), got total_subdomains = %" PetscInt_FMT, size, total_subdomains);
 40:   PetscCheck(rep == 1 || rep == 2, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid number of test repetitions: %" PetscInt_FMT "; must be 1 or 2", rep);

 42:   viewer = PETSC_VIEWER_STDOUT_WORLD;
 43:   /* Create logically sparse, but effectively dense matrix for easy verification of submatrix extraction correctness. */
 44:   PetscCall(MatCreate(PETSC_COMM_WORLD, &A));
 45:   PetscCall(MatSetSizes(A, m, m, PETSC_DECIDE, PETSC_DECIDE));
 46:   PetscCall(MatSetFromOptions(A));
 47:   PetscCall(MatSetUp(A));
 48:   PetscCall(MatGetSize(A, NULL, &N));
 49:   PetscCall(MatGetLocalSize(A, NULL, &n));
 50:   PetscCall(MatGetBlockSize(A, &bs));
 51:   PetscCall(MatSeqAIJSetPreallocation(A, n, NULL));
 52:   PetscCall(MatMPIAIJSetPreallocation(A, n, NULL, N - n, NULL));
 53:   PetscCall(MatSeqBAIJSetPreallocation(A, bs, n / bs, NULL));
 54:   PetscCall(MatMPIBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));
 55:   PetscCall(MatSeqSBAIJSetPreallocation(A, bs, n / bs, NULL));
 56:   PetscCall(MatMPISBAIJSetPreallocation(A, bs, n / bs, NULL, (N - n) / bs, NULL));

 58:   PetscCall(PetscMalloc2(N, &cols, N, &vals));
 59:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
 60:   for (j = 0; j < N; ++j) cols[j] = j;
 61:   for (i = rstart; i < rend; i++) {
 62:     for (j = 0; j < N; ++j) vals[j] = i * 10000 + j;
 63:     PetscCall(MatSetValues(A, 1, &i, N, cols, vals, INSERT_VALUES));
 64:   }
 65:   PetscCall(PetscFree2(cols, vals));
 66:   PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY));
 67:   PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY));

 69:   PetscCall(PetscViewerASCIIPrintf(viewer, "Initial matrix:\n"));
 70:   PetscCall(MatView(A, viewer));

 72:   /*
 73:      Create subcomms and ISs so that each rank participates in one IS.
 74:      The IS either coalesces adjacent rank indices (contiguous),
 75:      or selects indices by scrambling them using a hash.
 76:   */
 77:   k     = size / total_subdomains + (size % total_subdomains > 0); /* There are up to k ranks to a color */
 78:   color = rank / k;
 79:   PetscCallMPI(MPI_Comm_split(PETSC_COMM_WORLD, color, rank, &subcomm));
 80:   PetscCallMPI(MPI_Comm_size(subcomm, &subsize));
 81:   PetscCallMPI(MPI_Comm_rank(subcomm, &subrank));
 82:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
 83:   nis = 1;
 84:   PetscCall(PetscMalloc2(rend - rstart, &rowindices, rend - rstart, &colindices));

 86:   for (j = rstart; j < rend; ++j) {
 87:     if (permute_indices) {
 88:       idx = (j * hash);
 89:     } else {
 90:       idx = j;
 91:     }
 92:     rowindices[j - rstart] = idx % N;
 93:     colindices[j - rstart] = (idx + m) % N;
 94:   }
 95:   PetscCall(ISCreateGeneral(subcomm, rend - rstart, rowindices, PETSC_COPY_VALUES, &rowis[0]));
 96:   PetscCall(ISCreateGeneral(subcomm, rend - rstart, colindices, PETSC_COPY_VALUES, &colis[0]));
 97:   PetscCall(ISSort(rowis[0]));
 98:   PetscCall(ISSort(colis[0]));
 99:   PetscCall(PetscFree2(rowindices, colindices));
100:   /*
101:     Now view the ISs.  To avoid deadlock when viewing a list of objects on different subcomms,
102:     we need to obtain the global numbers of our local objects and wait for the corresponding global
103:     number to be viewed.
104:   */
105:   PetscCall(PetscViewerASCIIPrintf(viewer, "Subdomains"));
106:   if (permute_indices) PetscCall(PetscViewerASCIIPrintf(viewer, " (hash=%" PetscInt_FMT ")", hash));
107:   PetscCall(PetscViewerASCIIPrintf(viewer, ":\n"));
108:   PetscCall(PetscViewerFlush(viewer));

110:   nsubdomains = 1;
111:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
112:   PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)rowis, &gnsubdomains, gsubdomainnums));
113:   PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
114:   for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
115:     if (s < nsubdomains) {
116:       PetscInt ss;
117:       ss = gsubdomainperm[s];
118:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
119:         PetscViewer subviewer = NULL;
120:         PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer));
121:         PetscCall(PetscViewerASCIIPrintf(subviewer, "Row IS %" PetscInt_FMT "\n", gs));
122:         PetscCall(ISView(rowis[ss], subviewer));
123:         PetscCall(PetscViewerFlush(subviewer));
124:         PetscCall(PetscViewerASCIIPrintf(subviewer, "Col IS %" PetscInt_FMT "\n", gs));
125:         PetscCall(ISView(colis[ss], subviewer));
126:         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)rowis[ss]), &subviewer));
127:         ++s;
128:       }
129:     }
130:     PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
131:   }
132:   PetscCall(PetscViewerFlush(viewer));
133:   PetscCall(ISSort(rowis[0]));
134:   PetscCall(ISSort(colis[0]));
135:   nsubdomains = 1;
136:   PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_INITIAL_MATRIX, &submats));
137:   /*
138:     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
139:     we need to obtain the global numbers of our local objects and wait for the corresponding global
140:     number to be viewed.
141:   */
142:   PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 1):\n"));
143:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
144:   PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
145:   PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
146:   for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
147:     if (s < nsubdomains) {
148:       PetscInt ss;
149:       ss = gsubdomainperm[s];
150:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
151:         PetscViewer subviewer = NULL;
152:         PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
153:         PetscCall(MatView(submats[ss], subviewer));
154:         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
155:         ++s;
156:       }
157:     }
158:     PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
159:   }
160:   PetscCall(PetscViewerFlush(viewer));
161:   if (rep == 1) goto cleanup;
162:   nsubdomains = 1;
163:   PetscCall(MatCreateSubMatricesMPI(A, nsubdomains, rowis, colis, MAT_REUSE_MATRIX, &submats));
164:   /*
165:     Now view the matrices.  To avoid deadlock when viewing a list of objects on different subcomms,
166:     we need to obtain the global numbers of our local objects and wait for the corresponding global
167:     number to be viewed.
168:   */
169:   PetscCall(PetscViewerASCIIPrintf(viewer, "Submatrices (repetition 2):\n"));
170:   for (s = 0; s < nsubdomains; ++s) gsubdomainperm[s] = s;
171:   PetscCall(PetscObjectsListGetGlobalNumbering(PETSC_COMM_WORLD, 1, (PetscObject *)submats, &gnsubdomains, gsubdomainnums));
172:   PetscCall(PetscSortIntWithPermutation(nsubdomains, gsubdomainnums, gsubdomainperm));
173:   for (gs = 0, s = 0; gs < gnsubdomains; ++gs) {
174:     if (s < nsubdomains) {
175:       PetscInt ss;
176:       ss = gsubdomainperm[s];
177:       if (gs == gsubdomainnums[ss]) { /* Global subdomain gs being viewed is my subdomain with local number ss. */
178:         PetscViewer subviewer = NULL;
179:         PetscCall(PetscViewerGetSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
180:         PetscCall(MatView(submats[ss], subviewer));
181:         PetscCall(PetscViewerRestoreSubViewer(viewer, PetscObjectComm((PetscObject)submats[ss]), &subviewer));
182:         ++s;
183:       }
184:     }
185:     PetscCallMPI(MPI_Barrier(PETSC_COMM_WORLD));
186:   }
187:   PetscCall(PetscViewerFlush(viewer));
188: cleanup:
189:   for (k = 0; k < nsubdomains; ++k) PetscCall(MatDestroy(submats + k));
190:   PetscCall(PetscFree(submats));
191:   for (k = 0; k < nis; ++k) {
192:     PetscCall(ISDestroy(rowis + k));
193:     PetscCall(ISDestroy(colis + k));
194:   }
195:   PetscCall(MatDestroy(&A));
196:   PetscCallMPI(MPI_Comm_free(&subcomm));
197:   PetscCall(PetscFinalize());
198:   return 0;
199: }

201: /*TEST

203:    test:
204:       nsize: 2
205:       args: -total_subdomains 1
206:       output_file: output/ex183_2_1.out

208:    test:
209:       suffix: 2
210:       nsize: 3
211:       args: -total_subdomains 2
212:       output_file: output/ex183_3_2.out

214:    test:
215:       suffix: 3
216:       nsize: 4
217:       args: -total_subdomains 2
218:       output_file: output/ex183_4_2.out

220:    test:
221:       suffix: 4
222:       nsize: 6
223:       args: -total_subdomains 2
224:       output_file: output/ex183_6_2.out

226: TEST*/