Actual source code: baijsolvnat2.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /*
  5:       Special case where the matrix was ILU(0) factored in the natural
  6:    ordering. This eliminates the need for the column and row permutation.
  7: */
  8: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering_inplace(Mat A, Vec bb, Vec xx)
  9: {
 10:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 11:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *diag = a->diag;
 12:   const MatScalar   *aa = a->a, *v;
 13:   PetscScalar       *x, s1, s2, x1, x2;
 14:   const PetscScalar *b;
 15:   PetscInt           jdx, idt, idx, nz, i;

 17:   PetscFunctionBegin;
 18:   PetscCall(VecGetArrayRead(bb, &b));
 19:   PetscCall(VecGetArray(xx, &x));

 21:   /* forward solve the lower triangular */
 22:   idx  = 0;
 23:   x[0] = b[0];
 24:   x[1] = b[1];
 25:   for (i = 1; i < n; i++) {
 26:     v  = aa + 4 * ai[i];
 27:     vi = aj + ai[i];
 28:     nz = diag[i] - ai[i];
 29:     idx += 2;
 30:     s1 = b[idx];
 31:     s2 = b[1 + idx];
 32:     while (nz--) {
 33:       jdx = 2 * (*vi++);
 34:       x1  = x[jdx];
 35:       x2  = x[1 + jdx];
 36:       s1 -= v[0] * x1 + v[2] * x2;
 37:       s2 -= v[1] * x1 + v[3] * x2;
 38:       v += 4;
 39:     }
 40:     x[idx]     = s1;
 41:     x[1 + idx] = s2;
 42:   }
 43:   /* backward solve the upper triangular */
 44:   for (i = n - 1; i >= 0; i--) {
 45:     v   = aa + 4 * diag[i] + 4;
 46:     vi  = aj + diag[i] + 1;
 47:     nz  = ai[i + 1] - diag[i] - 1;
 48:     idt = 2 * i;
 49:     s1  = x[idt];
 50:     s2  = x[1 + idt];
 51:     while (nz--) {
 52:       idx = 2 * (*vi++);
 53:       x1  = x[idx];
 54:       x2  = x[1 + idx];
 55:       s1 -= v[0] * x1 + v[2] * x2;
 56:       s2 -= v[1] * x1 + v[3] * x2;
 57:       v += 4;
 58:     }
 59:     v          = aa + 4 * diag[i];
 60:     x[idt]     = v[0] * s1 + v[2] * s2;
 61:     x[1 + idt] = v[1] * s1 + v[3] * s2;
 62:   }

 64:   PetscCall(VecRestoreArrayRead(bb, &b));
 65:   PetscCall(VecRestoreArray(xx, &x));
 66:   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
 67:   PetscFunctionReturn(PETSC_SUCCESS);
 68: }

 70: PetscErrorCode MatSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
 71: {
 72:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
 73:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j, *adiag = a->diag;
 74:   PetscInt           i, k, nz, idx, idt, jdx;
 75:   const MatScalar   *aa = a->a, *v;
 76:   PetscScalar       *x, s1, s2, x1, x2;
 77:   const PetscScalar *b;

 79:   PetscFunctionBegin;
 80:   PetscCall(VecGetArrayRead(bb, &b));
 81:   PetscCall(VecGetArray(xx, &x));
 82:   /* forward solve the lower triangular */
 83:   idx  = 0;
 84:   x[0] = b[idx];
 85:   x[1] = b[1 + idx];
 86:   for (i = 1; i < n; i++) {
 87:     v   = aa + 4 * ai[i];
 88:     vi  = aj + ai[i];
 89:     nz  = ai[i + 1] - ai[i];
 90:     idx = 2 * i;
 91:     s1  = b[idx];
 92:     s2  = b[1 + idx];
 93:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
 94:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
 95:     for (k = 0; k < nz; k++) {
 96:       jdx = 2 * vi[k];
 97:       x1  = x[jdx];
 98:       x2  = x[1 + jdx];
 99:       s1 -= v[0] * x1 + v[2] * x2;
100:       s2 -= v[1] * x1 + v[3] * x2;
101:       v += 4;
102:     }
103:     x[idx]     = s1;
104:     x[1 + idx] = s2;
105:   }

107:   /* backward solve the upper triangular */
108:   for (i = n - 1; i >= 0; i--) {
109:     v   = aa + 4 * (adiag[i + 1] + 1);
110:     vi  = aj + adiag[i + 1] + 1;
111:     nz  = adiag[i] - adiag[i + 1] - 1;
112:     idt = 2 * i;
113:     s1  = x[idt];
114:     s2  = x[1 + idt];
115:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
116:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
117:     for (k = 0; k < nz; k++) {
118:       idx = 2 * vi[k];
119:       x1  = x[idx];
120:       x2  = x[1 + idx];
121:       s1 -= v[0] * x1 + v[2] * x2;
122:       s2 -= v[1] * x1 + v[3] * x2;
123:       v += 4;
124:     }
125:     /* x = inv_diagonal*x */
126:     x[idt]     = v[0] * s1 + v[2] * s2;
127:     x[1 + idt] = v[1] * s1 + v[3] * s2;
128:   }

130:   PetscCall(VecRestoreArrayRead(bb, &b));
131:   PetscCall(VecRestoreArray(xx, &x));
132:   PetscCall(PetscLogFlops(2.0 * 4 * (a->nz) - 2.0 * A->cmap->n));
133:   PetscFunctionReturn(PETSC_SUCCESS);
134: }

136: PetscErrorCode MatForwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
137: {
138:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
139:   const PetscInt     n = a->mbs, *vi, *ai = a->i, *aj = a->j;
140:   PetscInt           i, k, nz, idx, jdx;
141:   const MatScalar   *aa = a->a, *v;
142:   PetscScalar       *x, s1, s2, x1, x2;
143:   const PetscScalar *b;

145:   PetscFunctionBegin;
146:   PetscCall(VecGetArrayRead(bb, &b));
147:   PetscCall(VecGetArray(xx, &x));
148:   /* forward solve the lower triangular */
149:   idx  = 0;
150:   x[0] = b[idx];
151:   x[1] = b[1 + idx];
152:   for (i = 1; i < n; i++) {
153:     v   = aa + 4 * ai[i];
154:     vi  = aj + ai[i];
155:     nz  = ai[i + 1] - ai[i];
156:     idx = 2 * i;
157:     s1  = b[idx];
158:     s2  = b[1 + idx];
159:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
160:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
161:     for (k = 0; k < nz; k++) {
162:       jdx = 2 * vi[k];
163:       x1  = x[jdx];
164:       x2  = x[1 + jdx];
165:       s1 -= v[0] * x1 + v[2] * x2;
166:       s2 -= v[1] * x1 + v[3] * x2;
167:       v += 4;
168:     }
169:     x[idx]     = s1;
170:     x[1 + idx] = s2;
171:   }

173:   PetscCall(VecRestoreArrayRead(bb, &b));
174:   PetscCall(VecRestoreArray(xx, &x));
175:   PetscCall(PetscLogFlops(4.0 * (a->nz) - A->cmap->n));
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: PetscErrorCode MatBackwardSolve_SeqBAIJ_2_NaturalOrdering(Mat A, Vec bb, Vec xx)
180: {
181:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data;
182:   const PetscInt     n = a->mbs, *vi, *aj = a->j, *adiag = a->diag;
183:   PetscInt           i, k, nz, idx, idt;
184:   const MatScalar   *aa = a->a, *v;
185:   PetscScalar       *x, s1, s2, x1, x2;
186:   const PetscScalar *b;

188:   PetscFunctionBegin;
189:   PetscCall(VecGetArrayRead(bb, &b));
190:   PetscCall(VecGetArray(xx, &x));

192:   /* backward solve the upper triangular */
193:   for (i = n - 1; i >= 0; i--) {
194:     v   = aa + 4 * (adiag[i + 1] + 1);
195:     vi  = aj + adiag[i + 1] + 1;
196:     nz  = adiag[i] - adiag[i + 1] - 1;
197:     idt = 2 * i;
198:     s1  = b[idt];
199:     s2  = b[1 + idt];
200:     PetscPrefetchBlock(vi + nz, nz, 0, PETSC_PREFETCH_HINT_NTA);
201:     PetscPrefetchBlock(v + 4 * nz, 4 * nz, 0, PETSC_PREFETCH_HINT_NTA);
202:     for (k = 0; k < nz; k++) {
203:       idx = 2 * vi[k];
204:       x1  = x[idx];
205:       x2  = x[1 + idx];
206:       s1 -= v[0] * x1 + v[2] * x2;
207:       s2 -= v[1] * x1 + v[3] * x2;
208:       v += 4;
209:     }
210:     /* x = inv_diagonal*x */
211:     x[idt]     = v[0] * s1 + v[2] * s2;
212:     x[1 + idt] = v[1] * s1 + v[3] * s2;
213:   }

215:   PetscCall(VecRestoreArrayRead(bb, &b));
216:   PetscCall(VecRestoreArray(xx, &x));
217:   PetscCall(PetscLogFlops(4.0 * a->nz - A->cmap->n));
218:   PetscFunctionReturn(PETSC_SUCCESS);
219: }