Actual source code: nrefine.c
slepc-3.14.1 2020-12-08
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Newton refinement for polynomial eigenproblems.
13: References:
15: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
16: of invariant pairs for matrix polynomials", Linear Algebra Appl.
17: 435(3):514-536, 2011.
19: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
20: polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
21: 730-745, 2016.
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A,M1;
29: BV V,M2,M3,W;
30: PetscInt k,nmat;
31: PetscScalar *fih,*work,*M4;
32: PetscBLASInt *pM4;
33: PetscBool compM1;
34: Vec t;
35: } PEP_REFINE_MATSHELL;
37: typedef struct {
38: Mat E[2],M1;
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool compM1;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W,M2,M3,Wt;
48: PetscScalar *M4,*w,*wt,*d,*dt;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx,*cols;
51: } PEP_REFINE_EXPLICIT;
53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
54: {
55: PetscErrorCode ierr;
56: PEP_REFINE_MATSHELL *ctx;
57: PetscInt k,i;
58: PetscScalar *c;
59: PetscBLASInt k_,one=1,info;
62: MatShellGetContext(M,&ctx);
63: VecCopy(x,ctx->t);
64: k = ctx->k;
65: c = ctx->work;
66: PetscBLASIntCast(k,&k_);
67: MatMult(ctx->M1,x,y);
68: VecConjugate(ctx->t);
69: BVDotVec(ctx->M3,ctx->t,c);
70: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
71: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
72: SlepcCheckLapackInfo("getrs",info);
73: BVMultVec(ctx->M2,-1.0,1.0,y,c);
74: return(0);
75: }
77: /*
78: Evaluates the first d elements of the polynomial basis
79: on a given matrix H which is considered to be triangular
80: */
81: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
82: {
84: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
85: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
86: PetscScalar corr=0.0,alpha,beta;
87: PetscBLASInt k_,ldh_,ldfh_;
90: PetscBLASIntCast(ldh,&ldh_);
91: PetscBLASIntCast(k,&k_);
92: PetscBLASIntCast(ldfh,&ldfh_);
93: PetscArrayzero(fH,nm*k*k);
94: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
95: if (nm>1) {
96: t = b[0]/a[0];
97: off = k;
98: for (j=0;j<k;j++) {
99: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
100: fH[j+j*ldfh] -= t;
101: }
102: }
103: for (i=2;i<nm;i++) {
104: off = i*k;
105: if (i==2) {
106: for (j=0;j<k;j++) {
107: fH[off+j+j*ldfh] = 1.0;
108: H[j+j*ldh] -= b[1];
109: }
110: } else {
111: for (j=0;j<k;j++) {
112: PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
113: H[j+j*ldh] += corr-b[i-1];
114: }
115: }
116: corr = b[i-1];
117: beta = -g[i-1]/a[i-1];
118: alpha = 1/a[i-1];
119: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
120: }
121: for (j=0;j<k;j++) H[j+j*ldh] += corr;
122: return(0);
123: }
125: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
126: {
127: PetscErrorCode ierr;
128: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
129: const PetscScalar *m3,*m2;
130: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
131: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
132: PetscBLASInt k_,lda_,lds_,nloc_,one=1,info;
133: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
134: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
135: MatStructure str;
136: Vec vc;
139: STGetMatStructure(pep->st,&str);
140: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
141: DHii = T12;
142: PetscArrayzero(DHii,k*k*nmat);
143: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
144: for (d=2;d<nmat;d++) {
145: for (j=0;j<k;j++) {
146: for (i=0;i<k;i++) {
147: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
148: }
149: }
150: }
151: /* T11 */
152: if (!ctx->compM1) {
153: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
154: PEPEvaluateBasis(pep,h,0,Ts,NULL);
155: for (j=1;j<nmat;j++) {
156: MatAXPY(M1,Ts[j],A[j],str);
157: }
158: }
160: /* T22 */
161: PetscBLASIntCast(lds,&lds_);
162: PetscBLASIntCast(k,&k_);
163: PetscBLASIntCast(lda,&lda_);
164: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
165: for (i=1;i<deg;i++) {
166: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
167: s = (i==1)?0.0:1.0;
168: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
169: }
170: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
172: /* T12 */
173: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
174: for (i=1;i<nmat;i++) {
175: MatDenseGetArray(Mk,&array);
176: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
177: MatDenseRestoreArray(Mk,&array);
178: BVSetActiveColumns(W,0,k);
179: BVMult(W,1.0,0.0,V,Mk);
180: if (i==1) {
181: BVMatMult(W,A[i],M2);
182: } else {
183: BVMatMult(W,A[i],M3); /* using M3 as work space */
184: BVMult(M2,1.0,1.0,M3,NULL);
185: }
186: }
188: /* T21 */
189: MatDenseGetArray(Mk,&array);
190: for (i=1;i<deg;i++) {
191: s = (i==1)?0.0:1.0;
192: ss = PetscConj(fh[i]);
193: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
194: }
195: MatDenseRestoreArray(Mk,&array);
196: BVSetActiveColumns(M3,0,k);
197: BVMult(M3,1.0,0.0,V,Mk);
198: for (i=0;i<k;i++) {
199: BVGetColumn(M3,i,&vc);
200: VecConjugate(vc);
201: BVRestoreColumn(M3,i,&vc);
202: }
203: MatDestroy(&Mk);
204: PetscFree3(T12,Tr,Ts);
206: VecGetLocalSize(ctx->t,&nloc);
207: PetscBLASIntCast(nloc,&nloc_);
208: PetscMalloc1(nloc*k,&T);
209: KSPGetOperators(pep->refineksp,NULL,&P);
210: if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
211: BVGetArrayRead(ctx->M2,&m2);
212: BVGetArrayRead(ctx->M3,&m3);
213: VecGetArray(ctx->t,&v);
214: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
215: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
216: SlepcCheckLapackInfo("gesv",info);
217: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
218: VecRestoreArray(ctx->t,&v);
219: BVRestoreArrayRead(ctx->M2,&m2);
220: BVRestoreArrayRead(ctx->M3,&m3);
221: MatDiagonalSet(P,ctx->t,ADD_VALUES);
222: PetscFree(T);
223: KSPSetUp(pep->refineksp);
224: return(0);
225: }
227: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
228: {
229: PetscErrorCode ierr;
230: PetscScalar *t0;
231: PetscBLASInt k_,one=1,info,lda_;
232: PetscInt i,lda=nmat*k;
233: Mat M;
234: PEP_REFINE_MATSHELL *ctx;
237: KSPGetOperators(ksp,&M,NULL);
238: MatShellGetContext(M,&ctx);
239: PetscCalloc1(k,&t0);
240: PetscBLASIntCast(lda,&lda_);
241: PetscBLASIntCast(k,&k_);
242: for (i=0;i<k;i++) t0[i] = Rh[i];
243: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
244: SlepcCheckLapackInfo("getrs",info);
245: BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
246: KSPSolve(ksp,Rv,dVi);
247: VecConjugate(dVi);
248: BVDotVec(ctx->M3,dVi,dHi);
249: VecConjugate(dVi);
250: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
251: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
252: SlepcCheckLapackInfo("getrs",info);
253: PetscFree(t0);
254: return(0);
255: }
257: /*
258: Computes the residual P(H,V*S)*e_j for the polynomial
259: */
260: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
261: {
263: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
264: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
265: PetscInt i,ii,jj,lda;
266: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
267: Mat M0;
268: Vec w;
271: PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
272: lda = k*nmat;
273: PetscBLASIntCast(k,&k_);
274: PetscBLASIntCast(lds,&lds_);
275: PetscBLASIntCast(lda,&lda_);
276: PetscBLASIntCast(nmat,&nmat_);
277: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
278: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
279: BVSetActiveColumns(W,0,nmat);
280: BVMult(W,1.0,0.0,V,M0);
281: MatDestroy(&M0);
283: BVGetColumn(W,0,&w);
284: MatMult(A[0],w,Rv);
285: BVRestoreColumn(W,0,&w);
286: for (i=1;i<nmat;i++) {
287: BVGetColumn(W,i,&w);
288: MatMult(A[i],w,t);
289: BVRestoreColumn(W,i,&w);
290: VecAXPY(Rv,1.0,t);
291: }
292: /* Update right-hand side */
293: if (j) {
294: PetscBLASIntCast(ldh,&ldh_);
295: PetscArrayzero(Z,k*k);
296: PetscArrayzero(DS0,k*k);
297: PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
298: /* Update DfH */
299: for (i=1;i<nmat;i++) {
300: if (i>1) {
301: beta = -g[i-1];
302: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
303: tt += -b[i-1];
304: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
305: tt = b[i-1];
306: beta = 1.0/a[i-1];
307: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
308: F = DS0; DS0 = DS1; DS1 = F;
309: } else {
310: PetscArrayzero(DS1,k*k);
311: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
312: }
313: for (jj=j;jj<k;jj++) {
314: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
315: }
316: }
317: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
318: /* Update right-hand side */
319: PetscBLASIntCast(2*k,&k2_);
320: PetscBLASIntCast(j,&j_);
321: PetscBLASIntCast(k+rds,&krds_);
322: c0 = DS0;
323: PetscArrayzero(Rh,k);
324: for (i=0;i<nmat;i++) {
325: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
326: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
327: BVMultVec(V,1.0,0.0,t,h);
328: BVSetActiveColumns(dV,0,rds);
329: BVMultVec(dV,1.0,1.0,t,h+k);
330: BVGetColumn(W,i,&w);
331: MatMult(A[i],t,w);
332: BVRestoreColumn(W,i,&w);
333: if (i>0 && i<nmat-1) {
334: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
335: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
336: }
337: }
339: for (i=0;i<nmat;i++) h[i] = -1.0;
340: BVMultVec(W,1.0,1.0,Rv,h);
341: }
342: PetscFree4(h,DS0,DS1,Z);
343: return(0);
344: }
346: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
347: {
349: PetscInt i,j,incf,incc;
350: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
351: Vec v,t,xx1;
352: BV WW,T;
355: PetscMalloc3(sz,&y,sz,&g,k,&xx2);
356: if (trans) {
357: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
358: } else {
359: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
360: }
361: xx1 = vw;
362: VecCopy(x1,xx1);
363: PetscArraycpy(xx2,x2,sz);
364: PetscArrayzero(sol2,k);
365: for (i=sz-1;i>=0;i--) {
366: BVGetColumn(WW,i,&v);
367: VecConjugate(v);
368: VecDot(xx1,v,y+i);
369: VecConjugate(v);
370: BVRestoreColumn(WW,i,&v);
371: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
372: y[i] = -(y[i]-xx2[i])/dd[i];
373: BVGetColumn(T,i,&t);
374: VecAXPY(xx1,-y[i],t);
375: BVRestoreColumn(T,i,&t);
376: for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
377: g[i] = xx2[i];
378: }
379: if (trans) {
380: KSPSolveTranspose(ksp,xx1,sol1);
381: } else {
382: KSPSolve(ksp,xx1,sol1);
383: }
384: if (trans) {
385: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
386: } else {
387: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
388: }
389: for (i=0;i<sz;i++) {
390: BVGetColumn(T,i,&t);
391: VecConjugate(t);
392: VecDot(sol1,t,&y2);
393: VecConjugate(t);
394: BVRestoreColumn(T,i,&t);
395: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
396: y2 = (g[i]-y2)/dd[i];
397: BVGetColumn(WW,i,&v);
398: VecAXPY(sol1,-y2,v);
399: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
400: sol2[i] = y[i]+y2;
401: BVRestoreColumn(WW,i,&v);
402: }
403: PetscFree3(y,g,xx2);
404: return(0);
405: }
407: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
408: {
410: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
411: Mat M1=matctx->M1,*A,*At,Mk;
412: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
413: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
414: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
415: PetscBLASInt lds_,lda_,k_;
416: MatStructure str;
417: PetscBool flg;
418: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
419: Vec vc,vc2;
422: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
423: STGetMatStructure(pep->st,&str);
424: STGetTransform(pep->st,&flg);
425: if (flg) {
426: PetscMalloc1(pep->nmat,&At);
427: for (i=0;i<pep->nmat;i++) {
428: STGetMatrixTransformed(pep->st,i,&At[i]);
429: }
430: } else At = pep->A;
431: if (matctx->subc) A = matctx->A;
432: else A = At;
433: /* Form the explicit system matrix */
434: DHii = T12;
435: PetscArrayzero(DHii,k*k*nmat);
436: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
437: for (l=2;l<nmat;l++) {
438: for (j=0;j<k;j++) {
439: for (i=0;i<k;i++) {
440: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
441: }
442: }
443: }
445: /* T11 */
446: if (!matctx->compM1) {
447: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
448: PEPEvaluateBasis(pep,h,0,Ts,NULL);
449: for (j=1;j<nmat;j++) {
450: MatAXPY(M1,Ts[j],A[j],str);
451: }
452: }
454: /* T22 */
455: PetscBLASIntCast(lds,&lds_);
456: PetscBLASIntCast(k,&k_);
457: PetscBLASIntCast(lda,&lda_);
458: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
459: for (i=1;i<deg;i++) {
460: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
461: s = (i==1)?0.0:1.0;
462: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
463: }
465: /* T12 */
466: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
467: for (i=1;i<nmat;i++) {
468: MatDenseGetArray(Mk,&array);
469: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
470: MatDenseRestoreArray(Mk,&array);
471: BVSetActiveColumns(W,0,k);
472: BVMult(W,1.0,0.0,V,Mk);
473: if (i==1) {
474: BVMatMult(W,A[i],M2);
475: } else {
476: BVMatMult(W,A[i],M3); /* using M3 as work space */
477: BVMult(M2,1.0,1.0,M3,NULL);
478: }
479: }
481: /* T21 */
482: MatDenseGetArray(Mk,&array);
483: for (i=1;i<deg;i++) {
484: s = (i==1)?0.0:1.0;
485: ss = PetscConj(fh[i]);
486: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
487: }
488: MatDenseRestoreArray(Mk,&array);
489: BVSetActiveColumns(M3,0,k);
490: BVMult(M3,1.0,0.0,V,Mk);
491: for (i=0;i<k;i++) {
492: BVGetColumn(M3,i,&vc);
493: VecConjugate(vc);
494: BVRestoreColumn(M3,i,&vc);
495: }
497: KSPSetOperators(ksp,M1,M1);
498: KSPSetUp(ksp);
499: MatDestroy(&Mk);
501: /* Set up for BEMW */
502: for (i=0;i<k;i++) {
503: BVGetColumn(M2,i,&vc);
504: BVGetColumn(W,i,&vc2);
505: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
506: BVRestoreColumn(M2,i,&vc);
507: BVGetColumn(M3,i,&vc);
508: VecConjugate(vc);
509: VecDot(vc2,vc,&d[i]);
510: VecConjugate(vc);
511: BVRestoreColumn(M3,i,&vc);
512: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
513: d[i] = M4[i+i*k]-d[i];
514: BVRestoreColumn(W,i,&vc2);
516: BVGetColumn(M3,i,&vc);
517: BVGetColumn(Wt,i,&vc2);
518: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
519: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
520: BVRestoreColumn(M3,i,&vc);
521: BVGetColumn(M2,i,&vc);
522: VecConjugate(vc2);
523: VecDot(vc,vc2,&dt[i]);
524: VecConjugate(vc2);
525: BVRestoreColumn(M2,i,&vc);
526: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
527: dt[i] = M4[i+i*k]-dt[i];
528: BVRestoreColumn(Wt,i,&vc2);
529: }
531: if (flg) {
532: PetscFree(At);
533: }
534: PetscFree3(T12,Tr,Ts);
535: return(0);
536: }
538: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
539: {
540: PetscErrorCode ierr;
541: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
542: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
543: Mat M,*E=matctx->E,*A,*At,Mk,Md;
544: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
545: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
546: PetscBLASInt lds_,lda_,k_;
547: const PetscInt *idxmc;
548: const PetscScalar *valsc,*carray;
549: MatStructure str;
550: Vec vc,vc0;
551: PetscBool flg;
554: PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
555: STGetMatStructure(pep->st,&str);
556: KSPGetOperators(ksp,&M,NULL);
557: MatGetOwnershipRange(E[1],&n1,&m1);
558: MatGetOwnershipRange(E[0],&n0,&m0);
559: MatGetOwnershipRange(M,&n,NULL);
560: PetscMalloc1(nmat,&ts);
561: STGetTransform(pep->st,&flg);
562: if (flg) {
563: PetscMalloc1(pep->nmat,&At);
564: for (i=0;i<pep->nmat;i++) {
565: STGetMatrixTransformed(pep->st,i,&At[i]);
566: }
567: } else At = pep->A;
568: if (matctx->subc) A = matctx->A;
569: else A = At;
570: /* Form the explicit system matrix */
571: DHii = T12;
572: PetscArrayzero(DHii,k*k*nmat);
573: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
574: for (d=2;d<nmat;d++) {
575: for (j=0;j<k;j++) {
576: for (i=0;i<k;i++) {
577: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
578: }
579: }
580: }
582: /* T11 */
583: if (!matctx->compM1) {
584: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
585: PEPEvaluateBasis(pep,h,0,Ts,NULL);
586: for (j=1;j<nmat;j++) {
587: MatAXPY(E[0],Ts[j],A[j],str);
588: }
589: }
590: for (i=n0;i<m0;i++) {
591: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
592: idx = n+i-n0;
593: for (j=0;j<ncols;j++) {
594: idxg[j] = matctx->map0[idxmc[j]];
595: }
596: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
597: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
598: }
600: /* T22 */
601: PetscBLASIntCast(lds,&lds_);
602: PetscBLASIntCast(k,&k_);
603: PetscBLASIntCast(lda,&lda_);
604: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
605: for (i=1;i<deg;i++) {
606: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
607: s = (i==1)?0.0:1.0;
608: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
609: }
610: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
611: for (i=0;i<m1-n1;i++) {
612: idx = n+m0-n0+i;
613: for (j=0;j<k;j++) {
614: Tr[j] = T22[n1+i+j*k];
615: }
616: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
617: }
619: /* T21 */
620: for (i=1;i<deg;i++) {
621: s = (i==1)?0.0:1.0;
622: ss = PetscConj(fh[i]);
623: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
624: }
625: BVSetActiveColumns(W,0,k);
626: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
627: BVMult(W,1.0,0.0,V,Mk);
628: for (i=0;i<k;i++) {
629: BVGetColumn(W,i,&vc);
630: VecConjugate(vc);
631: VecGetArrayRead(vc,&carray);
632: idx = matctx->map1[i];
633: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
634: VecRestoreArrayRead(vc,&carray);
635: BVRestoreColumn(W,i,&vc);
636: }
638: /* T12 */
639: for (i=1;i<nmat;i++) {
640: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
641: for (j=0;j<k;j++) {
642: PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
643: }
644: }
645: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
646: for (i=0;i<nmat;i++) ts[i] = 1.0;
647: for (j=0;j<k;j++) {
648: MatDenseGetArray(Md,&array);
649: PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
650: MatDenseRestoreArray(Md,&array);
651: BVSetActiveColumns(W,0,nmat-1);
652: BVMult(W,1.0,0.0,V,Md);
653: for (i=nmat-1;i>0;i--) {
654: BVGetColumn(W,i-1,&vc0);
655: BVGetColumn(W,i,&vc);
656: MatMult(A[i],vc0,vc);
657: BVRestoreColumn(W,i-1,&vc0);
658: BVRestoreColumn(W,i,&vc);
659: }
660: BVSetActiveColumns(W,1,nmat);
661: BVGetColumn(W,0,&vc0);
662: BVMultVec(W,1.0,0.0,vc0,ts);
663: VecGetArrayRead(vc0,&carray);
664: idx = matctx->map1[j];
665: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
666: VecRestoreArrayRead(vc0,&carray);
667: BVRestoreColumn(W,0,&vc0);
668: }
669: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
670: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
671: KSPSetOperators(ksp,M,M);
672: KSPSetUp(ksp);
673: PetscFree(ts);
674: MatDestroy(&Mk);
675: MatDestroy(&Md);
676: if (flg) {
677: PetscFree(At);
678: }
679: PetscFree5(T22,T21,T12,Tr,Ts);
680: return(0);
681: }
683: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
684: {
685: PetscErrorCode ierr;
686: PetscInt n0,m0,n1,m1,i;
687: PetscScalar *arrayV;
688: const PetscScalar *array;
691: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
692: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
694: /* Right side */
695: VecGetArrayRead(Rv,&array);
696: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
697: VecRestoreArrayRead(Rv,&array);
698: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
699: VecAssemblyBegin(matctx->tN);
700: VecAssemblyEnd(matctx->tN);
702: /* Solve */
703: KSPSolve(ksp,matctx->tN,matctx->ttN);
705: /* Retrieve solution */
706: VecGetArray(dVi,&arrayV);
707: VecGetArrayRead(matctx->ttN,&array);
708: PetscArraycpy(arrayV,array,m0-n0);
709: VecRestoreArray(dVi,&arrayV);
710: if (!matctx->subc) {
711: VecGetArray(matctx->t1,&arrayV);
712: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
713: VecRestoreArray(matctx->t1,&arrayV);
714: VecRestoreArrayRead(matctx->ttN,&array);
715: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
716: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
717: VecGetArrayRead(matctx->vseq,&array);
718: for (i=0;i<k;i++) dHi[i] = array[i];
719: VecRestoreArrayRead(matctx->vseq,&array);
720: }
721: return(0);
722: }
724: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
725: {
726: PetscErrorCode ierr;
727: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
728: PetscMPIInt root,len;
729: PetscScalar *array2,h;
730: const PetscScalar *array;
731: Vec R,Vi;
732: PEP_REFINE_MATSHELL *ctx;
733: Mat M;
736: if (!matctx || !matctx->subc) {
737: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
738: h = H[i+i*ldh];
739: idx = i;
740: R = Rv;
741: Vi = dVi;
742: switch (pep->scheme) {
743: case PEP_REFINE_SCHEME_EXPLICIT:
744: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
745: matctx->compM1 = PETSC_FALSE;
746: break;
747: case PEP_REFINE_SCHEME_MBE:
748: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
749: matctx->compM1 = PETSC_FALSE;
750: break;
751: case PEP_REFINE_SCHEME_SCHUR:
752: KSPGetOperators(ksp,&M,NULL);
753: MatShellGetContext(M,&ctx);
754: NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
755: ctx->compM1 = PETSC_FALSE;
756: break;
757: }
758: } else {
759: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
760: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
761: h = H[idx+idx*ldh];
762: matctx->idx = idx;
763: switch (pep->scheme) {
764: case PEP_REFINE_SCHEME_EXPLICIT:
765: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
766: matctx->compM1 = PETSC_FALSE;
767: break;
768: case PEP_REFINE_SCHEME_MBE:
769: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
770: matctx->compM1 = PETSC_FALSE;
771: break;
772: case PEP_REFINE_SCHEME_SCHUR:
773: break;
774: }
775: } else idx = matctx->idx;
776: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
777: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
778: VecGetArrayRead(matctx->tg,&array);
779: VecPlaceArray(matctx->t,array);
780: VecCopy(matctx->t,matctx->Rv);
781: VecResetArray(matctx->t);
782: VecRestoreArrayRead(matctx->tg,&array);
783: R = matctx->Rv;
784: Vi = matctx->Vi;
785: }
786: if (idx==i && idx<k) {
787: switch (pep->scheme) {
788: case PEP_REFINE_SCHEME_EXPLICIT:
789: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
790: break;
791: case PEP_REFINE_SCHEME_MBE:
792: NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
793: break;
794: case PEP_REFINE_SCHEME_SCHUR:
795: NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
796: break;
797: }
798: }
799: if (matctx && matctx->subc) {
800: VecGetLocalSize(Vi,&m);
801: VecGetArrayRead(Vi,&array);
802: VecGetArray(matctx->tg,&array2);
803: PetscArraycpy(array2,array,m);
804: VecRestoreArray(matctx->tg,&array2);
805: VecRestoreArrayRead(Vi,&array);
806: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
807: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
808: switch (pep->scheme) {
809: case PEP_REFINE_SCHEME_EXPLICIT:
810: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
811: VecGetArrayRead(matctx->ttN,&array);
812: VecPlaceArray(matctx->tp,array+m0-n0);
813: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
814: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
815: VecResetArray(matctx->tp);
816: VecRestoreArrayRead(matctx->ttN,&array);
817: VecGetArrayRead(matctx->tpg,&array);
818: for (j=0;j<k;j++) dHi[j] = array[j];
819: VecRestoreArrayRead(matctx->tpg,&array);
820: break;
821: case PEP_REFINE_SCHEME_MBE:
822: root = 0;
823: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
824: PetscMPIIntCast(k,&len);
825: MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
826: break;
827: case PEP_REFINE_SCHEME_SCHUR:
828: break;
829: }
830: }
831: return(0);
832: }
834: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
835: {
836: PetscErrorCode ierr;
837: PetscInt i,nmat=pep->nmat,lda=nmat*k;
838: PetscScalar *fh,*Rh,*DfH;
839: PetscReal norm;
840: BV W;
841: Vec Rv,t,dvi;
842: PEP_REFINE_MATSHELL *ctx;
843: Mat M,*At;
844: PetscBool flg,lindep;
847: PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
848: *rds = 0;
849: BVCreateVec(pep->V,&Rv);
850: switch (pep->scheme) {
851: case PEP_REFINE_SCHEME_EXPLICIT:
852: BVCreateVec(pep->V,&t);
853: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
854: PetscMalloc1(nmat,&fh);
855: break;
856: case PEP_REFINE_SCHEME_MBE:
857: if (matctx->subc) {
858: BVCreateVec(pep->V,&t);
859: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
860: } else {
861: W = matctx->W;
862: PetscObjectReference((PetscObject)W);
863: t = matctx->t;
864: PetscObjectReference((PetscObject)t);
865: }
866: BVScale(matctx->W,0.0);
867: BVScale(matctx->Wt,0.0);
868: BVScale(matctx->M2,0.0);
869: BVScale(matctx->M3,0.0);
870: PetscMalloc1(nmat,&fh);
871: break;
872: case PEP_REFINE_SCHEME_SCHUR:
873: KSPGetOperators(ksp,&M,NULL);
874: MatShellGetContext(M,&ctx);
875: BVCreateVec(pep->V,&t);
876: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
877: fh = ctx->fih;
878: break;
879: }
880: PetscArrayzero(dVS,2*k*k);
881: PetscArrayzero(DfH,lda*k);
882: STGetTransform(pep->st,&flg);
883: if (flg) {
884: PetscMalloc1(pep->nmat,&At);
885: for (i=0;i<pep->nmat;i++) {
886: STGetMatrixTransformed(pep->st,i,&At[i]);
887: }
888: } else At = pep->A;
890: /* Main loop for computing the ith columns of dX and dS */
891: for (i=0;i<k;i++) {
892: /* Compute and update i-th column of the right hand side */
893: PetscArrayzero(Rh,k);
894: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);
896: /* Update and solve system */
897: BVGetColumn(dV,i,&dvi);
898: NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
899: /* Orthogonalize computed solution */
900: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
901: BVRestoreColumn(dV,i,&dvi);
902: if (!lindep) {
903: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
904: if (!lindep) {
905: dVS[k+i+i*2*k] = norm;
906: BVScaleColumn(dV,i,1.0/norm);
907: (*rds)++;
908: }
909: }
910: }
911: BVSetActiveColumns(dV,0,*rds);
912: VecDestroy(&t);
913: VecDestroy(&Rv);
914: BVDestroy(&W);
915: if (flg) {
916: PetscFree(At);
917: }
918: PetscFree2(DfH,Rh);
919: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
920: return(0);
921: }
923: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
924: {
926: PetscInt j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
927: PetscScalar *G,*tau,sone=1.0,zero=0.0,*work;
928: PetscBLASInt lds_,k_,ldh_,info,ldg_,lda_;
931: PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
932: PetscBLASIntCast(lds,&lds_);
933: PetscBLASIntCast(lda,&lda_);
934: PetscBLASIntCast(k,&k_);
936: /* Form auxiliary matrix for the orthogonalization step */
937: ldg = deg*k;
938: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
939: PetscBLASIntCast(ldg,&ldg_);
940: PetscBLASIntCast(ldh,&ldh_);
941: for (j=0;j<deg;j++) {
942: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
943: }
944: /* Orthogonalize and update S */
945: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
946: SlepcCheckLapackInfo("geqrf",info);
947: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
949: /* Update H */
950: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
951: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
952: PetscFree3(tau,work,G);
953: return(0);
954: }
956: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
957: {
959: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
960: PetscScalar *tau,*array,*work;
961: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
962: Mat M0;
965: PetscMalloc2(k,&tau,k,&work);
966: PetscBLASIntCast(lds,&lds_);
967: PetscBLASIntCast(lda,&lda_);
968: PetscBLASIntCast(ldh,&ldh_);
969: PetscBLASIntCast(k,&k_);
970: PetscBLASIntCast(2*k,&k2_);
971: PetscBLASIntCast((k+rds),&kdrs_);
972: /* Update H */
973: for (j=0;j<k;j++) {
974: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
975: }
976: /* Update V */
977: for (j=0;j<k;j++) {
978: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
979: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
980: }
981: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
982: SlepcCheckLapackInfo("geqrf",info);
983: /* Copy triangular matrix in S */
984: for (j=0;j<k;j++) {
985: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
986: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
987: }
988: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
989: SlepcCheckLapackInfo("orgqr",info);
990: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
991: MatDenseGetArray(M0,&array);
992: for (j=0;j<k;j++) {
993: PetscArraycpy(array+j*k,dVS+j*2*k,k);
994: }
995: MatDenseRestoreArray(M0,&array);
996: BVMultInPlace(pep->V,M0,0,k);
997: if (rds) {
998: MatDenseGetArray(M0,&array);
999: for (j=0;j<k;j++) {
1000: PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
1001: }
1002: MatDenseRestoreArray(M0,&array);
1003: BVMultInPlace(dV,M0,0,k);
1004: BVMult(pep->V,1.0,1.0,dV,NULL);
1005: }
1006: MatDestroy(&M0);
1007: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1008: PetscFree2(tau,work);
1009: return(0);
1010: }
1012: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
1013: {
1014: PetscErrorCode ierr;
1015: PEP_REFINE_MATSHELL *ctx;
1016: PetscScalar t,*coef;
1017: const PetscScalar *array;
1018: MatStructure str;
1019: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1020: MPI_Comm comm;
1021: PetscMPIInt np;
1022: const PetscInt *rgs0,*rgs1;
1023: Mat B,C,*E,*A,*At;
1024: IS is1,is2;
1025: Vec v;
1026: PetscBool flg;
1027: Mat M,P;
1030: PetscMalloc1(nmat,&coef);
1031: STGetTransform(pep->st,&flg);
1032: if (flg) {
1033: PetscMalloc1(pep->nmat,&At);
1034: for (i=0;i<pep->nmat;i++) {
1035: STGetMatrixTransformed(pep->st,i,&At[i]);
1036: }
1037: } else At = pep->A;
1038: switch (pep->scheme) {
1039: case PEP_REFINE_SCHEME_EXPLICIT:
1040: if (ini) {
1041: if (matctx->subc) {
1042: A = matctx->A;
1043: comm = PetscSubcommChild(matctx->subc);
1044: } else {
1045: A = At;
1046: PetscObjectGetComm((PetscObject)pep,&comm);
1047: }
1048: E = matctx->E;
1049: STGetMatStructure(pep->st,&str);
1050: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1051: j = (matctx->subc)?matctx->subc->color:0;
1052: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1053: for (j=1;j<nmat;j++) {
1054: MatAXPY(E[0],coef[j],A[j],str);
1055: }
1056: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1057: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1058: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1059: MatGetOwnershipRange(E[0],&n0,&m0);
1060: MatGetOwnershipRange(E[1],&n1,&m1);
1061: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1062: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1063: /* T12 and T21 are computed from V and V*, so,
1064: they must have the same column and row ranges */
1065: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1066: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1067: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1068: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1069: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1070: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1071: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1072: MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1073: MatDestroy(&B);
1074: MatDestroy(&C);
1075: matctx->compM1 = PETSC_TRUE;
1076: MatGetSize(E[0],NULL,&N0);
1077: MatGetSize(E[1],NULL,&N1);
1078: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1079: MatGetOwnershipRanges(E[0],&rgs0);
1080: MatGetOwnershipRanges(E[1],&rgs1);
1081: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1082: /* Create column (and row) mapping */
1083: for (p=0;p<np;p++) {
1084: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1085: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1086: }
1087: MatCreateVecs(M,NULL,&matctx->tN);
1088: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1089: VecDuplicate(matctx->tN,&matctx->ttN);
1090: if (matctx->subc) {
1091: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1092: count = np*k;
1093: PetscMalloc2(count,&idx1,count,&idx2);
1094: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1095: VecGetOwnershipRange(matctx->tp,&l0,NULL);
1096: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1097: for (si=0;si<matctx->subc->n;si++) {
1098: if (matctx->subc->color==si) {
1099: j=0;
1100: if (matctx->subc->color==si) {
1101: for (p=0;p<np;p++) {
1102: for (i=n1;i<m1;i++) {
1103: idx1[j] = l0+i-n1;
1104: idx2[j++] =p*k+i;
1105: }
1106: }
1107: }
1108: count = np*(m1-n1);
1109: } else count =0;
1110: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1111: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1112: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1113: ISDestroy(&is1);
1114: ISDestroy(&is2);
1115: }
1116: PetscFree2(idx1,idx2);
1117: } else {
1118: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1119: }
1120: P = M;
1121: } else {
1122: if (matctx->subc) {
1123: /* Scatter vectors pep->V */
1124: for (i=0;i<k;i++) {
1125: BVGetColumn(pep->V,i,&v);
1126: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1127: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1128: BVRestoreColumn(pep->V,i,&v);
1129: VecGetArrayRead(matctx->tg,&array);
1130: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1131: BVInsertVec(matctx->V,i,matctx->t);
1132: VecResetArray(matctx->t);
1133: VecRestoreArrayRead(matctx->tg,&array);
1134: }
1135: }
1136: }
1137: break;
1138: case PEP_REFINE_SCHEME_MBE:
1139: if (ini) {
1140: if (matctx->subc) {
1141: A = matctx->A;
1142: comm = PetscSubcommChild(matctx->subc);
1143: } else {
1144: matctx->V = pep->V;
1145: A = At;
1146: PetscObjectGetComm((PetscObject)pep,&comm);
1147: MatCreateVecs(pep->A[0],&matctx->t,NULL);
1148: }
1149: STGetMatStructure(pep->st,&str);
1150: MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1151: j = (matctx->subc)?matctx->subc->color:0;
1152: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1153: for (j=1;j<nmat;j++) {
1154: MatAXPY(matctx->M1,coef[j],A[j],str);
1155: }
1156: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1157: BVDuplicateResize(matctx->V,k,&matctx->M2);
1158: BVDuplicate(matctx->M2,&matctx->M3);
1159: BVDuplicate(matctx->M2,&matctx->Wt);
1160: PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1161: matctx->compM1 = PETSC_TRUE;
1162: M = matctx->M1;
1163: P = M;
1164: }
1165: break;
1166: case PEP_REFINE_SCHEME_SCHUR:
1167: if (ini) {
1168: PetscObjectGetComm((PetscObject)pep,&comm);
1169: MatGetSize(At[0],&m0,&n0);
1170: PetscMalloc1(1,&ctx);
1171: STGetMatStructure(pep->st,&str);
1172: /* Create a shell matrix to solve the linear system */
1173: ctx->V = pep->V;
1174: ctx->k = k; ctx->nmat = nmat;
1175: PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1176: for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1177: PetscArrayzero(ctx->M4,k*k);
1178: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1179: MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1180: BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1181: BVDuplicateResize(ctx->V,k,&ctx->M2);
1182: BVDuplicate(ctx->M2,&ctx->M3);
1183: BVCreateVec(pep->V,&ctx->t);
1184: MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1185: PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1186: for (j=1;j<nmat;j++) {
1187: MatAXPY(ctx->M1,coef[j],At[j],str);
1188: }
1189: MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1190: /* Compute a precond matrix for the system */
1191: t = H[0];
1192: PEPEvaluateBasis(pep,t,0,coef,NULL);
1193: for (j=1;j<nmat;j++) {
1194: MatAXPY(P,coef[j],At[j],str);
1195: }
1196: ctx->compM1 = PETSC_TRUE;
1197: }
1198: break;
1199: }
1200: if (ini) {
1201: PEPRefineGetKSP(pep,&pep->refineksp);
1202: KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1203: KSPSetOperators(pep->refineksp,M,P);
1204: KSPSetFromOptions(pep->refineksp);
1205: }
1207: if (!ini && matctx && matctx->subc) {
1208: /* Scatter vectors pep->V */
1209: for (i=0;i<k;i++) {
1210: BVGetColumn(pep->V,i,&v);
1211: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1212: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1213: BVRestoreColumn(pep->V,i,&v);
1214: VecGetArrayRead(matctx->tg,&array);
1215: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1216: BVInsertVec(matctx->V,i,matctx->t);
1217: VecResetArray(matctx->t);
1218: VecRestoreArrayRead(matctx->tg,&array);
1219: }
1220: }
1221: PetscFree(coef);
1222: if (flg) {
1223: PetscFree(At);
1224: }
1225: return(0);
1226: }
1228: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1229: {
1230: PetscErrorCode ierr;
1231: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1232: IS is1,is2;
1233: BVType type;
1234: Vec v;
1235: const PetscScalar *array;
1236: Mat *A;
1237: PetscBool flg;
1240: STGetTransform(pep->st,&flg);
1241: if (flg) {
1242: PetscMalloc1(pep->nmat,&A);
1243: for (i=0;i<pep->nmat;i++) {
1244: STGetMatrixTransformed(pep->st,i,&A[i]);
1245: }
1246: } else A = pep->A;
1248: /* Duplicate pep matrices */
1249: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1250: for (i=0;i<pep->nmat;i++) {
1251: MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1252: }
1254: /* Create Scatter */
1255: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1256: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1257: VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1258: BVGetColumn(pep->V,0,&v);
1259: VecGetOwnershipRange(v,&n0,&m0);
1260: nloc0 = m0-n0;
1261: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1262: j = 0;
1263: for (si=0;si<matctx->subc->n;si++) {
1264: for (i=n0;i<m0;i++) {
1265: idx1[j] = i;
1266: idx2[j++] = i+pep->n*si;
1267: }
1268: }
1269: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1270: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1271: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1272: ISDestroy(&is1);
1273: ISDestroy(&is2);
1274: for (si=0;si<matctx->subc->n;si++) {
1275: j=0;
1276: for (i=n0;i<m0;i++) {
1277: idx1[j] = i;
1278: idx2[j++] = i+pep->n*si;
1279: }
1280: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1281: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1282: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1283: ISDestroy(&is1);
1284: ISDestroy(&is2);
1285: }
1286: BVRestoreColumn(pep->V,0,&v);
1287: PetscFree2(idx1,idx2);
1289: /* Duplicate pep->V vecs */
1290: BVGetType(pep->V,&type);
1291: BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1292: BVSetType(matctx->V,type);
1293: BVSetSizesFromVec(matctx->V,matctx->t,k);
1294: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1295: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1296: }
1297: for (i=0;i<k;i++) {
1298: BVGetColumn(pep->V,i,&v);
1299: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1300: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1301: BVRestoreColumn(pep->V,i,&v);
1302: VecGetArrayRead(matctx->tg,&array);
1303: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1304: BVInsertVec(matctx->V,i,matctx->t);
1305: VecResetArray(matctx->t);
1306: VecRestoreArrayRead(matctx->tg,&array);
1307: }
1309: VecDuplicate(matctx->t,&matctx->Rv);
1310: VecDuplicate(matctx->t,&matctx->Vi);
1311: if (flg) {
1312: PetscFree(A);
1313: }
1314: return(0);
1315: }
1317: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1318: {
1320: PetscInt i;
1323: VecScatterDestroy(&matctx->scatter_sub);
1324: for (i=0;i<matctx->subc->n;i++) {
1325: VecScatterDestroy(&matctx->scatter_id[i]);
1326: }
1327: for (i=0;i<pep->nmat;i++) {
1328: MatDestroy(&matctx->A[i]);
1329: }
1330: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1331: for (i=0;i<matctx->subc->n;i++) {
1332: VecScatterDestroy(&matctx->scatterp_id[i]);
1333: }
1334: VecDestroy(&matctx->tp);
1335: VecDestroy(&matctx->tpg);
1336: BVDestroy(&matctx->W);
1337: }
1338: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1339: BVDestroy(&matctx->V);
1340: VecDestroy(&matctx->t);
1341: VecDestroy(&matctx->tg);
1342: VecDestroy(&matctx->Rv);
1343: VecDestroy(&matctx->Vi);
1344: return(0);
1345: }
1347: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1348: {
1349: PetscErrorCode ierr;
1350: PetscScalar *H,*work,*dH,*fH,*dVS;
1351: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1352: PetscBLASInt k_,ld_,*p,info;
1353: BV dV;
1354: PetscBool sinvert,flg;
1355: PEP_REFINE_EXPLICIT *matctx=NULL;
1356: Vec v;
1357: Mat M,P;
1358: PEP_REFINE_MATSHELL *ctx;
1361: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1362: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1363: /* the input tolerance is not being taken into account (by the moment) */
1364: its = *maxits;
1365: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1366: DSGetLeadingDimension(pep->ds,&ldh);
1367: DSGetArray(pep->ds,DS_MAT_A,&H);
1368: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1369: PetscMalloc1(2*k*k,&dVS);
1370: STGetTransform(pep->st,&flg);
1371: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1372: PetscBLASIntCast(k,&k_);
1373: PetscBLASIntCast(ldh,&ld_);
1374: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1375: if (sinvert) {
1376: DSGetArray(pep->ds,DS_MAT_A,&H);
1377: PetscMalloc1(k,&p);
1378: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1379: SlepcCheckLapackInfo("getrf",info);
1380: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1381: SlepcCheckLapackInfo("getri",info);
1382: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1383: pep->ops->backtransform = NULL;
1384: }
1385: if (sigma!=0.0) {
1386: DSGetArray(pep->ds,DS_MAT_A,&H);
1387: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1388: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1389: pep->ops->backtransform = NULL;
1390: }
1391: }
1392: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1393: DSGetArray(pep->ds,DS_MAT_A,&H);
1394: for (j=0;j<k;j++) {
1395: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1396: }
1397: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1398: if (!flg) {
1399: /* Restore original values */
1400: for (i=0;i<pep->nmat;i++){
1401: pep->pbc[pep->nmat+i] *= pep->sfactor;
1402: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1403: }
1404: }
1405: }
1406: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1407: for (i=0;i<k;i++) {
1408: BVGetColumn(pep->V,i,&v);
1409: VecPointwiseMult(v,v,pep->Dr);
1410: BVRestoreColumn(pep->V,i,&v);
1411: }
1412: }
1413: DSGetArray(pep->ds,DS_MAT_A,&H);
1415: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1416: /* check if H is in Schur form */
1417: for (i=0;i<k-1;i++) {
1418: if (H[i+1+i*ldh]!=0.0) {
1419: #if !defined(PETSC_USE_COMPLEX)
1420: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1421: #else
1422: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1423: #endif
1424: }
1425: }
1426: if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1427: BVSetActiveColumns(pep->V,0,k);
1428: BVDuplicateResize(pep->V,k,&dV);
1429: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1430: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1431: PetscMalloc1(1,&matctx);
1432: if (nsubc>1) { /* spliting in subcommunicators */
1433: matctx->subc = pep->refinesubc;
1434: NRefSubcommSetup(pep,k,matctx,nsubc);
1435: } else matctx->subc=NULL;
1436: }
1438: /* Loop performing iterative refinements */
1439: for (i=0;i<its;i++) {
1440: /* Pre-compute the polynomial basis evaluated in H */
1441: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1442: PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1443: /* Solve the linear system */
1444: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1445: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1446: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1447: }
1448: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1449: if (!flg && sinvert) {
1450: PetscFree(p);
1451: }
1452: PetscFree3(dH,fH,work);
1453: PetscFree(dVS);
1454: BVDestroy(&dV);
1455: switch (pep->scheme) {
1456: case PEP_REFINE_SCHEME_EXPLICIT:
1457: for (i=0;i<2;i++) {
1458: MatDestroy(&matctx->E[i]);
1459: }
1460: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1461: VecDestroy(&matctx->tN);
1462: VecDestroy(&matctx->ttN);
1463: VecDestroy(&matctx->t1);
1464: if (nsubc>1) {
1465: NRefSubcommDestroy(pep,matctx);
1466: } else {
1467: VecDestroy(&matctx->vseq);
1468: VecScatterDestroy(&matctx->scatterctx);
1469: }
1470: PetscFree(matctx);
1471: KSPGetOperators(pep->refineksp,&M,NULL);
1472: MatDestroy(&M);
1473: break;
1474: case PEP_REFINE_SCHEME_MBE:
1475: BVDestroy(&matctx->W);
1476: BVDestroy(&matctx->Wt);
1477: BVDestroy(&matctx->M2);
1478: BVDestroy(&matctx->M3);
1479: MatDestroy(&matctx->M1);
1480: VecDestroy(&matctx->t);
1481: PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1482: if (nsubc>1) {
1483: NRefSubcommDestroy(pep,matctx);
1484: }
1485: PetscFree(matctx);
1486: break;
1487: case PEP_REFINE_SCHEME_SCHUR:
1488: KSPGetOperators(pep->refineksp,&M,&P);
1489: MatShellGetContext(M,&ctx);
1490: PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1491: MatDestroy(&ctx->M1);
1492: BVDestroy(&ctx->M2);
1493: BVDestroy(&ctx->M3);
1494: BVDestroy(&ctx->W);
1495: VecDestroy(&ctx->t);
1496: PetscFree(ctx);
1497: MatDestroy(&M);
1498: MatDestroy(&P);
1499: break;
1500: }
1501: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1502: return(0);
1503: }