Actual source code: crl.c
2: /*
3: Defines a matrix-vector product for the MATSEQAIJCRL matrix class.
4: This class is derived from the MATSEQAIJ class and retains the
5: compressed row storage (aka Yale sparse matrix format) but augments
6: it with a column oriented storage that is more efficient for
7: matrix vector products on Vector machines.
9: CRL stands for constant row length (that is the same number of columns
10: is kept (padded with zeros) for each row of the sparse matrix.
11: */
12: #include <../src/mat/impls/aij/seq/crl/crl.h>
14: PetscErrorCode MatDestroy_SeqAIJCRL(Mat A)
15: {
16: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
18: PetscFunctionBegin;
19: /* Free everything in the Mat_AIJCRL data structure. */
20: if (aijcrl) PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
21: PetscCall(PetscFree(A->spptr));
22: PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQAIJ));
23: PetscCall(MatDestroy_SeqAIJ(A));
24: PetscFunctionReturn(PETSC_SUCCESS);
25: }
27: PetscErrorCode MatDuplicate_AIJCRL(Mat A, MatDuplicateOption op, Mat *M)
28: {
29: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot duplicate AIJCRL matrices yet");
30: }
32: PetscErrorCode MatSeqAIJCRL_create_aijcrl(Mat A)
33: {
34: Mat_SeqAIJ *a = (Mat_SeqAIJ *)(A)->data;
35: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
36: PetscInt m = A->rmap->n; /* Number of rows in the matrix. */
37: PetscInt *aj = a->j; /* From the CSR representation; points to the beginning of each row. */
38: PetscInt i, j, rmax = a->rmax, *icols, *ilen = a->ilen;
39: MatScalar *aa = a->a;
40: PetscScalar *acols;
42: PetscFunctionBegin;
43: aijcrl->nz = a->nz;
44: aijcrl->m = A->rmap->n;
45: aijcrl->rmax = rmax;
47: PetscCall(PetscFree2(aijcrl->acols, aijcrl->icols));
48: PetscCall(PetscMalloc2(rmax * m, &aijcrl->acols, rmax * m, &aijcrl->icols));
49: acols = aijcrl->acols;
50: icols = aijcrl->icols;
51: for (i = 0; i < m; i++) {
52: for (j = 0; j < ilen[i]; j++) {
53: acols[j * m + i] = *aa++;
54: icols[j * m + i] = *aj++;
55: }
56: for (; j < rmax; j++) { /* empty column entries */
57: acols[j * m + i] = 0.0;
58: icols[j * m + i] = (j) ? icols[(j - 1) * m + i] : 0; /* handle case where row is EMPTY */
59: }
60: }
61: PetscCall(PetscInfo(A, "Percentage of 0's introduced for vectorized multiply %g. Rmax= %" PetscInt_FMT "\n", 1.0 - ((double)a->nz) / ((double)(rmax * m)), rmax));
62: PetscFunctionReturn(PETSC_SUCCESS);
63: }
65: PetscErrorCode MatAssemblyEnd_SeqAIJCRL(Mat A, MatAssemblyType mode)
66: {
67: Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data;
69: PetscFunctionBegin;
70: a->inode.use = PETSC_FALSE;
72: PetscCall(MatAssemblyEnd_SeqAIJ(A, mode));
73: if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(PETSC_SUCCESS);
75: /* Now calculate the permutation and grouping information. */
76: PetscCall(MatSeqAIJCRL_create_aijcrl(A));
77: PetscFunctionReturn(PETSC_SUCCESS);
78: }
80: #include <../src/mat/impls/aij/seq/crl/ftn-kernels/fmultcrl.h>
82: /*
83: Shared by both sequential and parallel versions of CRL matrix: MATMPIAIJCRL and MATSEQAIJCRL
84: - the scatter is used only in the parallel version
86: */
87: PetscErrorCode MatMult_AIJCRL(Mat A, Vec xx, Vec yy)
88: {
89: Mat_AIJCRL *aijcrl = (Mat_AIJCRL *)A->spptr;
90: PetscInt m = aijcrl->m; /* Number of rows in the matrix. */
91: PetscInt rmax = aijcrl->rmax, *icols = aijcrl->icols;
92: PetscScalar *acols = aijcrl->acols;
93: PetscScalar *y;
94: const PetscScalar *x;
95: #if !defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
96: PetscInt i, j, ii;
97: #endif
99: #if defined(PETSC_HAVE_PRAGMA_DISJOINT)
100: #pragma disjoint(*x, *y, *aa)
101: #endif
103: PetscFunctionBegin;
104: if (aijcrl->xscat) {
105: PetscCall(VecCopy(xx, aijcrl->xwork));
106: /* get remote values needed for local part of multiply */
107: PetscCall(VecScatterBegin(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
108: PetscCall(VecScatterEnd(aijcrl->xscat, xx, aijcrl->fwork, INSERT_VALUES, SCATTER_FORWARD));
109: xx = aijcrl->xwork;
110: }
112: PetscCall(VecGetArrayRead(xx, &x));
113: PetscCall(VecGetArray(yy, &y));
115: #if defined(PETSC_USE_FORTRAN_KERNEL_MULTCRL)
116: fortranmultcrl_(&m, &rmax, x, y, icols, acols);
117: #else
119: /* first column */
120: for (j = 0; j < m; j++) y[j] = acols[j] * x[icols[j]];
122: /* other columns */
123: #if defined(PETSC_HAVE_CRAY_VECTOR)
124: #pragma _CRI preferstream
125: #endif
126: for (i = 1; i < rmax; i++) {
127: ii = i * m;
128: #if defined(PETSC_HAVE_CRAY_VECTOR)
129: #pragma _CRI prefervector
130: #endif
131: for (j = 0; j < m; j++) y[j] = y[j] + acols[ii + j] * x[icols[ii + j]];
132: }
133: #endif
134: PetscCall(PetscLogFlops(2.0 * aijcrl->nz - m));
135: PetscCall(VecRestoreArrayRead(xx, &x));
136: PetscCall(VecRestoreArray(yy, &y));
137: PetscFunctionReturn(PETSC_SUCCESS);
138: }
140: /* MatConvert_SeqAIJ_SeqAIJCRL converts a SeqAIJ matrix into a
141: * SeqAIJCRL matrix. This routine is called by the MatCreate_SeqAIJCRL()
142: * routine, but can also be used to convert an assembled SeqAIJ matrix
143: * into a SeqAIJCRL one. */
144: PETSC_INTERN PetscErrorCode MatConvert_SeqAIJ_SeqAIJCRL(Mat A, MatType type, MatReuse reuse, Mat *newmat)
145: {
146: Mat B = *newmat;
147: Mat_AIJCRL *aijcrl;
148: PetscBool sametype;
150: PetscFunctionBegin;
151: if (reuse == MAT_INITIAL_MATRIX) PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
152: PetscCall(PetscObjectTypeCompare((PetscObject)A, type, &sametype));
153: if (sametype) PetscFunctionReturn(PETSC_SUCCESS);
155: PetscCall(PetscNew(&aijcrl));
156: B->spptr = (void *)aijcrl;
158: /* Set function pointers for methods that we inherit from AIJ but override. */
159: B->ops->duplicate = MatDuplicate_AIJCRL;
160: B->ops->assemblyend = MatAssemblyEnd_SeqAIJCRL;
161: B->ops->destroy = MatDestroy_SeqAIJCRL;
162: B->ops->mult = MatMult_AIJCRL;
164: /* If A has already been assembled, compute the permutation. */
165: if (A->assembled) PetscCall(MatSeqAIJCRL_create_aijcrl(B));
166: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATSEQAIJCRL));
167: *newmat = B;
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }
171: /*@C
172: MatCreateSeqAIJCRL - Creates a sparse matrix of type `MATSEQAIJCRL`.
173: This type inherits from `MATSEQAIJ`, but stores some additional
174: information that is used to allow better vectorization of
175: the matrix-vector product. At the cost of increased storage, the `MATSEQAIJ` formatted
176: matrix can be copied to a format in which pieces of the matrix are
177: stored in ELLPACK format, allowing the vectorized matrix multiply
178: routine to use stride-1 memory accesses.
180: Collective
182: Input Parameters:
183: + comm - MPI communicator, set to `PETSC_COMM_SELF`
184: . m - number of rows
185: . n - number of columns
186: . nz - number of nonzeros per row (same for all rows), ignored if `nnz` is given
187: - nnz - array containing the number of nonzeros in the various rows
188: (possibly different for each row) or `NULL`
190: Output Parameter:
191: . A - the matrix
193: Level: intermediate
195: .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateMPIAIJPERM()`, `MatSetValues()`
196: @*/
197: PetscErrorCode MatCreateSeqAIJCRL(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt nz, const PetscInt nnz[], Mat *A)
198: {
199: PetscFunctionBegin;
200: PetscCall(MatCreate(comm, A));
201: PetscCall(MatSetSizes(*A, m, n, m, n));
202: PetscCall(MatSetType(*A, MATSEQAIJCRL));
203: PetscCall(MatSeqAIJSetPreallocation_SeqAIJ(*A, nz, nnz));
204: PetscFunctionReturn(PETSC_SUCCESS);
205: }
207: PETSC_EXTERN PetscErrorCode MatCreate_SeqAIJCRL(Mat A)
208: {
209: PetscFunctionBegin;
210: PetscCall(MatSetType(A, MATSEQAIJ));
211: PetscCall(MatConvert_SeqAIJ_SeqAIJCRL(A, MATSEQAIJCRL, MAT_INPLACE_MATRIX, &A));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }