Actual source code: pjd.c

slepc-3.10.1 2018-10-23
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc polynomial eigensolver: "jd"

 13:    Method: Jacobi-Davidson

 15:    Algorithm:

 17:        Jacobi-Davidson for polynomial eigenvalue problems.
 18:        Based on code contributed by the authors of [2] below.

 20:    References:

 22:        [1] G.L.G. Sleijpen et al., "Jacobi-Davidson type methods for
 23:            generalized eigenproblems and polynomial eigenproblems", BIT
 24:            36(3):595-633, 1996.

 26:        [2] Feng-Nan Hwang, Zih-Hao Wei, Tsung-Ming Huang, Weichung Wang,
 27:            "A Parallel Additive Schwarz Preconditioned Jacobi-Davidson
 28:            Algorithm for Polynomial Eigenvalue Problems in Quantum Dot
 29:            Simulation", J. Comput. Phys. 229(8):2932-2947, 2010.
 30: */

 32: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 33: #include <slepcblaslapack.h>

 35: typedef struct {
 36:   PetscReal   keep;          /* restart parameter */
 37:   PetscReal   fix;           /* fix parameter */
 38:   BV          V;             /* work basis vectors to store the search space */
 39:   BV          W;             /* work basis vectors to store the test space */
 40:   BV          *TV;           /* work basis vectors to store T*V (each TV[i] is the coefficient for \lambda^i of T*V for the extended T) */
 41:   BV          *AX;           /* work basis vectors to store A_i*X for locked eigenvectors */
 42:   BV          N[2];          /* auxiliary work BVs */
 43:   BV          X;             /* locked eigenvectors */
 44:   PetscScalar *T;            /* matrix of the invariant pair */
 45:   PetscScalar *Tj;           /* matrix containing the powers of the invariant pair matrix */
 46:   PetscScalar *XpX;          /* X^H*X */
 47:   PetscInt    ld;            /* leading dimension for Tj and XpX */
 48:   PC          pcshell;       /* preconditioner including basic precond+projector */
 49:   Mat         Pshell;        /* auxiliary shell matrix */
 50:   PetscInt    nlock;         /* number of locked vectors in the invariant pair */
 51: } PEP_JD;

 53: typedef struct {
 54:   PC          pc;            /* basic preconditioner */
 55:   Vec         Bp;            /* preconditioned residual of derivative polynomial, B\p */
 56:   Vec         u;             /* Ritz vector */
 57:   PetscScalar gamma;         /* precomputed scalar u'*B\p */
 58:   PetscScalar *M;
 59:   PetscScalar *ps;
 60:   PetscInt    ld;
 61:   Vec         *work;
 62:   BV          X;
 63:   PetscInt    n;
 64: } PEP_JD_PCSHELL;

 66: typedef struct {
 67:   Mat         P;             /*  */
 68:   PEP         pep;
 69:   Vec         *work;
 70:   PetscScalar theta;
 71: } PEP_JD_MATSHELL;

 73: /*
 74:    Duplicate and resize auxiliary basis
 75: */
 76: static PetscErrorCode PEPJDDuplicateBasis(PEP pep,BV *basis)
 77: {
 78:   PetscErrorCode     ierr;
 79:   PEP_JD             *pjd = (PEP_JD*)pep->data;
 80:   PetscInt           nloc,m;
 81:   PetscMPIInt        rank,nproc;
 82:   BVType             type;
 83:   BVOrthogType       otype;
 84:   BVOrthogRefineType oref;
 85:   PetscReal          oeta;
 86:   BVOrthogBlockType  oblock;

 89:   if (pjd->ld>1) {
 90:     BVCreate(PetscObjectComm((PetscObject)pep),basis);
 91:     MPI_Comm_rank(PetscObjectComm((PetscObject)pep),&rank);
 92:     MPI_Comm_size(PetscObjectComm((PetscObject)pep),&nproc);
 93:     BVGetSizes(pep->V,&nloc,NULL,&m);
 94:     if (rank==nproc-1) nloc += pjd->ld-1;
 95:     BVSetSizes(*basis,nloc,PETSC_DECIDE,m);
 96:     BVGetType(pep->V,&type);
 97:     BVSetType(*basis,type);
 98:     BVGetOrthogonalization(pep->V,&otype,&oref,&oeta,&oblock);
 99:     BVSetOrthogonalization(*basis,otype,oref,oeta,oblock);
100:     PetscObjectStateIncrease((PetscObject)*basis);
101:   } else {
102:     BVDuplicate(pep->V,basis);
103:   }
104:   return(0);
105: }

107: PetscErrorCode PEPSetUp_JD(PEP pep)
108: {
110:   PEP_JD         *pjd = (PEP_JD*)pep->data;
111:   PetscBool      isprecond,flg;
112:   PetscInt       i;

115:   pep->lineariz = PETSC_FALSE;
116:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
117:   if (!pep->max_it) pep->max_it = PetscMax(100,2*pep->n/pep->ncv);
118:   if (!pep->which) pep->which = PEP_TARGET_MAGNITUDE;
119:   if (pep->which != PEP_TARGET_MAGNITUDE) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"PEPJD only supports which=target_magnitude");

121:   PetscObjectTypeCompare((PetscObject)pep->st,STPRECOND,&isprecond);
122:   if (!isprecond) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"JD only works with PRECOND spectral transformation");

124:   STGetTransform(pep->st,&flg);
125:   if (flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Solver requires the ST transformation flag unset, see STSetTransform()");

127:   if (!pjd->keep) pjd->keep = 0.5;
128:   PEPBasisCoefficients(pep,pep->pbc);
129:   PEPAllocateSolution(pep,0);
130:   PEPSetWorkVecs(pep,5);
131:   pjd->ld = pep->nev;
132: #if !defined (PETSC_USE_COMPLEX)
133:   pjd->ld++;
134: #endif
135:   PetscMalloc2(pep->nmat,&pjd->TV,pep->nmat,&pjd->AX);
136:   for (i=0;i<pep->nmat;i++) {
137:     PEPJDDuplicateBasis(pep,pjd->TV+i);
138:   }
139:   PEPJDDuplicateBasis(pep,&pjd->W);
140:   if (pjd->ld>1) {
141:     PEPJDDuplicateBasis(pep,&pjd->V);
142:     BVSetFromOptions(pjd->V);
143:     for (i=0;i<pep->nmat;i++) {
144:       BVDuplicateResize(pep->V,pjd->ld-1,pjd->AX+i);
145:     }
146:     BVDuplicateResize(pep->V,pjd->ld-1,pjd->N);
147:     BVDuplicateResize(pep->V,pjd->ld-1,pjd->N+1);
148:     pjd->X = pep->V;
149:     PetscCalloc3((pjd->ld)*(pjd->ld),&pjd->XpX,pep->ncv*pep->ncv,&pjd->T,pjd->ld*pjd->ld*pep->nmat,&pjd->Tj);
150:   } else pjd->V = pep->V;
151:   DSSetType(pep->ds,DSPEP);
152:   DSPEPSetDegree(pep->ds,pep->nmat-1);
153:   if (pep->basis!=PEP_BASIS_MONOMIAL) {
154:     DSPEPSetCoefficients(pep->ds,pep->pbc);
155:   }
156:   DSAllocate(pep->ds,pep->ncv);
157:   return(0);
158: }

160: /*
161:    Updates columns (low to (high-1)) of TV[i]
162: */
163: static PetscErrorCode PEPJDUpdateTV(PEP pep,PetscInt low,PetscInt high,Vec *w)
164: {
166:   PEP_JD         *pjd = (PEP_JD*)pep->data;
167:   PetscInt       pp,col,i,nloc,nconv;
168:   Vec            v1,v2,t1,t2;
169:   PetscScalar    *array1,*array2,*x2,*xx,*N,*Np,*y2=NULL,zero=0.0,sone=1.0,*pT,fact,*psc;
170:   PetscReal      *cg,*ca,*cb;
171:   PetscMPIInt    rk,np,count;
172:   PetscBLASInt   n_,ld_,one=1;
173:   Mat            T;
174:   BV             pbv;

177:   ca = pep->pbc; cb = ca+pep->nmat; cg = cb + pep->nmat;
178:   nconv = pjd->nlock;
179:   PetscMalloc5(nconv,&x2,nconv,&xx,nconv*nconv,&pT,nconv*nconv,&N,nconv*nconv,&Np);
180:   MPI_Comm_rank(PetscObjectComm((PetscObject)pep),&rk);
181:   MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
182:   BVGetSizes(pep->V,&nloc,NULL,NULL);
183:   t1 = w[0];
184:   t2 = w[1];
185:   PetscBLASIntCast(pjd->nlock,&n_);
186:   PetscBLASIntCast(pjd->ld,&ld_);
187:   if (nconv){
188:     for (i=0;i<nconv;i++) {
189:       PetscMemcpy(pT+i*nconv,pjd->T+i*pep->ncv,nconv*sizeof(PetscScalar));
190:     }
191:     MatCreateSeqDense(PETSC_COMM_SELF,nconv,nconv,pT,&T);
192:   }
193:   for (col=low;col<high;col++) {
194:     BVGetColumn(pjd->V,col,&v1);
195:     VecGetArray(v1,&array1);
196:     if (nconv>0) {
197:       if (rk==np-1) { for (i=0;i<nconv;i++) x2[i] = array1[nloc+i]; }
198:       PetscMPIIntCast(nconv,&count);
199:       MPI_Bcast(x2,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)pep));
200:     }
201:     VecPlaceArray(t1,array1);
202:     if (nconv) {
203:       BVSetActiveColumns(pjd->N[0],0,nconv);
204:       BVSetActiveColumns(pjd->N[1],0,nconv);
205:       BVDotVec(pjd->X,t1,xx);
206:     }
207:     for (pp=pep->nmat-1;pp>=0;pp--) {
208:       BVGetColumn(pjd->TV[pp],col,&v2);
209:       VecGetArray(v2,&array2);
210:       VecPlaceArray(t2,array2);
211:       MatMult(pep->A[pp],t1,t2);
212:       if (nconv) {
213:         if (rk==np-1 && pp<pep->nmat-1) {
214:           y2 = array2+nloc;
215:           PetscMemcpy(y2,xx,nconv*sizeof(PetscScalar));
216:           PetscStackCallBLAS("BLAStrmv",BLAStrmv_("U","C","N",&n_,pjd->Tj+pjd->ld*pjd->ld*pp,&ld_,y2,&one));
217:         }
218:         if (pp<pep->nmat-3) {
219:           BVMult(pjd->N[0],1.0,-cg[pp+2],pjd->AX[pp+1],NULL);
220:           MatShift(T,-cb[pp+1]);
221:           BVMult(pjd->N[0],1.0/ca[pp],1.0/ca[pp],pjd->N[1],T);
222:           pbv = pjd->N[0]; pjd->N[0] = pjd->N[1]; pjd->N[1] = pbv;
223:           BVMultVec(pjd->N[1],1.0,1.0,t2,x2);
224:           if (rk==np-1) {
225:             fact = -cg[pp+2];
226:             PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,pjd->Tj+(pp+1)*pjd->ld*pjd->ld,&ld_,pjd->XpX,&ld_,&fact,Np,&n_));
227:             fact = 1/ca[pp];
228:             PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&fact,N,&n_,pT,&n_,&fact,Np,&n_));
229:             psc = Np; Np = N; N = psc;
230:             PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,N,&n_,x2,&one,&sone,y2,&one));
231:           }
232:           MatShift(T,cb[pp+1]);
233:         } else if (pp==pep->nmat-3) {
234:           BVCopy(pjd->AX[pp+2],pjd->N[0]);
235:           BVScale(pjd->N[0],1/ca[pp+1]);
236:           BVCopy(pjd->AX[pp+1],pjd->N[1]);
237:           MatShift(T,-cb[pp+1]);
238:           BVMult(pjd->N[1],1.0/ca[pp],1.0/ca[pp],pjd->N[0],T);
239:           BVMultVec(pjd->N[1],1.0,1.0,t2,x2);
240:           if (rk==np-1) {
241:             fact = 1/ca[pp];
242:             PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&fact,pjd->Tj+(pp+1)*pjd->ld*pjd->ld,&ld_,pjd->XpX,&ld_,&zero,N,&n_));
243:             PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,N,&n_,x2,&one,&sone,y2,&one));
244:           }
245:           MatShift(T,cb[pp+1]);
246:         } else if (pp==pep->nmat-2) {
247:           BVMultVec(pjd->AX[pp+1],1.0/ca[pp],1.0,t2,x2);
248:           if (rk==np-1) {
249:             PetscMemzero(Np,nconv*nconv*sizeof(PetscScalar));
250:           }
251:         }
252:       }
253:       VecResetArray(t2);
254:       VecRestoreArray(v2,&array2);
255:       BVRestoreColumn(pjd->TV[pp],col,&v2);
256:     }
257:     VecResetArray(t1);
258:     VecRestoreArray(v1,&array1);
259:     BVRestoreColumn(pjd->V,col,&v1);
260:   }
261:   if (nconv) {MatDestroy(&T);}
262:   PetscFree5(x2,xx,pT,N,Np);
263:   return(0);
264: }

266: /*
267:    RRQR of X. Xin*P=Xou*R. Rank of R is rk
268: */
269: static PetscErrorCode PEPJDOrthogonalize(PetscInt row,PetscInt col,PetscScalar *X,PetscInt ldx,PetscInt *rk,PetscInt *P,PetscScalar *R,PetscInt ldr)
270: {
271: #if defined(SLEPC_MISSING_LAPACK_GEQP3) || defined(PETSC_MISSING_LAPACK_ORGQR)
273:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQP3/QRGQR - Lapack routines are unavailable");
274: #else
276:   PetscInt       i,j,n,r;
277:   PetscBLASInt   row_,col_,ldx_,*p,lwork,info,n_;
278:   PetscScalar    *tau,*work;
279:   PetscReal      tol,*rwork;

282:   PetscBLASIntCast(row,&row_);
283:   PetscBLASIntCast(col,&col_);
284:   PetscBLASIntCast(ldx,&ldx_);
285:   n = PetscMin(row,col);
286:   PetscBLASIntCast(n,&n_);
287:   lwork = 3*col_+1;
288:   PetscMalloc4(col,&p,n,&tau,lwork,&work,2*col,&rwork);
289:   for (i=1;i<col;i++) p[i] = 0;
290:   p[0] = 1;

292:   /* rank revealing QR */
293: #if defined(PETSC_USE_COMPLEX)
294:   PetscStackCallBLAS("LAPACKgeqp3",LAPACKgeqp3_(&row_,&col_,X,&ldx_,p,tau,work,&lwork,rwork,&info));
295: #else
296:   PetscStackCallBLAS("LAPACKgeqp3",LAPACKgeqp3_(&row_,&col_,X,&ldx_,p,tau,work,&lwork,&info));
297: #endif
298:   SlepcCheckLapackInfo("geqp3",info);
299:   if (P) for (i=0;i<col;i++) P[i] = p[i];

301:   /* rank computation */
302:   tol = PetscMax(row,col)*PETSC_MACHINE_EPSILON*PetscAbsScalar(X[0]);
303:   r = 1;
304:   for (i=1;i<n;i++) {
305:     if (PetscAbsScalar(X[i+ldx*i])>tol) r++;
306:     else break;
307:   }
308:   if (rk) *rk=r;

310:   /* copy upper triangular matrix if requested */
311:   if (R) {
312:      for (i=0;i<r;i++) {
313:        PetscMemzero(R+i*ldr,r*sizeof(PetscScalar));
314:        for (j=0;j<=i;j++) R[i*ldr+j] = X[i*ldx+j];
315:      }
316:   }
317:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&row_,&n_,&n_,X,&ldx_,tau,work,&lwork,&info));
318:   SlepcCheckLapackInfo("orgqr",info);
319:   PetscFree4(p,tau,work,rwork);
320:   return(0);
321: #endif
322: }

324: /*
325:    Application of extended preconditioner
326: */
327: static PetscErrorCode PEPJDExtendedPCApply(PC pc,Vec x,Vec y)
328: {
329:   PetscInt          i,j,nloc,n,ld;
330:   PetscMPIInt       rk,np,count;
331:   Vec               tx,ty;
332:   PEP_JD_PCSHELL    *ctx;
333:   PetscErrorCode    ierr;
334:   const PetscScalar *array1;
335:   PetscScalar       *x2=NULL,*t=NULL,*ps,*array2;
336:   PetscBLASInt      one=1.0,ld_,n_;

339:   PCShellGetContext(pc,(void**)&ctx);
340:   n  = ctx->n;
341:   ps = ctx->ps;
342:   ld = ctx->ld;
343:   if (n) {
344:     PetscMalloc2(n,&x2,n,&t);
345:     MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rk);
346:     MPI_Comm_size(PetscObjectComm((PetscObject)pc),&np);
347:     if (rk==np-1) {
348:       VecGetLocalSize(ctx->work[0],&nloc);
349:       VecGetArrayRead(x,&array1);
350:       for (i=0;i<n;i++) x2[i] = array1[nloc+i];
351:       VecRestoreArrayRead(x,&array1);
352:     }
353:     PetscMPIIntCast(n,&count);
354:     MPI_Bcast(x2,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)pc));
355:   }

357:   /* y = B\x apply PC */
358:   tx = ctx->work[0];
359:   ty = ctx->work[1];
360:   VecGetArrayRead(x,&array1);
361:   VecPlaceArray(tx,array1);
362:   VecGetArray(y,&array2);
363:   VecPlaceArray(ty,array2);
364:   PCApply(ctx->pc,tx,ty);
365:   if (n) {
366:     for (j=0;j<n;j++) {
367:       t[j] = 0.0;
368:       for (i=0;i<n;i++) t[j] += ctx->M[i+j*ld]*x2[i];
369:     }
370:     if (rk==np-1) for (i=0;i<n;i++) array2[nloc+i] = t[i];
371:     PetscBLASIntCast(ld,&ld_);
372:     PetscBLASIntCast(n,&n_);
373:     PetscStackCallBLAS("BLAStrmv",BLAStrmv_("U","N","N",&n_,ps,&ld_,t,&one));
374:     BVMultVec(ctx->X,-1.0,1.0,ty,t);
375:     PetscFree2(x2,t);
376:   }
377:   VecResetArray(tx);
378:   VecResetArray(ty);
379:   VecRestoreArrayRead(x,&array1);
380:   VecRestoreArray(y,&array2);
381:   return(0);
382: }

384: /*
385:    Application of shell preconditioner:
386:       y = B\x - eta*B\p,  with eta = (u'*B\x)/(u'*B\p)
387: */
388: static PetscErrorCode PCShellApply_PEPJD(PC pc,Vec x,Vec y)
389: {
391:   PetscScalar    eta;
392:   PEP_JD_PCSHELL *ctx;

395:   PCShellGetContext(pc,(void**)&ctx);

397:   /* y = B\x apply extended PC */
398:   PEPJDExtendedPCApply(pc,x,y);

400:   /* Compute eta = u'*y / u'*Bp */
401:   VecDot(y,ctx->u,&eta);
402:   eta /= ctx->gamma;

404:   /* y = y - eta*Bp */
405:   VecAXPY(y,-eta,ctx->Bp);
406:   return(0);
407: }

409: static PetscErrorCode PEPJDCopyToExtendedVec(PEP pep,Vec v,PetscScalar *a,PetscInt na,PetscInt off,Vec vex,PetscBool back)
410: {
412:   PetscMPIInt    np,rk,count;
413:   PetscScalar    *array1,*array2;
414:   PetscInt       nloc;

417:   MPI_Comm_rank(PetscObjectComm((PetscObject)pep),&rk);
418:   MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
419:   BVGetSizes(pep->V,&nloc,NULL,NULL);
420:   if (v) {
421:     VecGetArray(v,&array1);
422:     VecGetArray(vex,&array2);
423:     if (back) {
424:       PetscMemcpy(array1,array2,nloc*sizeof(PetscScalar));
425:     } else {
426:       PetscMemcpy(array2,array1,nloc*sizeof(PetscScalar));
427:     }
428:     VecRestoreArray(v,&array1);
429:     VecRestoreArray(vex,&array2);
430:   }
431:   if (a) {
432:     if (rk==np-1) {
433:       VecGetArray(vex,&array2);
434:       if (back) {
435:         PetscMemcpy(a,array2+nloc+off,na*sizeof(PetscScalar));
436:       } else {
437:         PetscMemcpy(array2+nloc+off,a,na*sizeof(PetscScalar));
438:       }
439:       VecRestoreArray(vex,&array2);
440:     }
441:     if (back) {
442:       PetscMPIIntCast(na,&count);
443:       MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)pep));
444:     }
445:   }
446:   return(0);
447: }

449: /* Computes Phi^hat(lambda) times a vector or its derivative (depends on beval)
450:      if no vector is provided returns a matrix
451:  */
452: static PetscErrorCode PEPJDEvaluateHatBasis(PEP pep,PetscInt n,PetscScalar *H,PetscInt ldh,PetscScalar *beval,PetscScalar *t,PetscInt idx,PetscScalar *qpp,PetscScalar *qp,PetscScalar *q)
453: {
455:   PetscInt       j,i;
456:   PetscBLASInt   n_,ldh_,one=1;
457:   PetscReal      *a,*b,*g;
458:   PetscScalar    sone=1.0;

461:   a = pep->pbc; b=a+pep->nmat; g=b+pep->nmat;
462:   PetscBLASIntCast(n,&n_);
463:   PetscBLASIntCast(ldh,&ldh_);
464:   if (idx<1) {
465:     PetscMemzero(q,(t?n:n*n)*sizeof(PetscScalar));
466:   } else if (idx==1) {
467:     if (t) {for (j=0;j<n;j++) q[j] = t[j]*beval[idx-1]/a[0];}
468:     else {
469:       PetscMemzero(q,n*n*sizeof(PetscScalar));
470:       for (j=0;j<n;j++) q[(j+1)*n] = beval[idx-1]/a[0];
471:     }
472:   } else {
473:     if (t) {
474:       PetscMemcpy(q,qp,n*sizeof(PetscScalar));
475:       PetscStackCallBLAS("BLAStrmv",BLAStrmv_("U","N","N",&n_,H,&ldh_,q,&one));
476:       for (j=0;j<n;j++) {
477:         q[j] += beval[idx-1]*t[j]-b[idx-1]*qp[j]-g[idx-1]*qpp[j];
478:         q[j] /= a[idx-1]; 
479:       }
480:     } else {
481:       PetscMemcpy(q,qp,n*n*sizeof(PetscScalar));
482:       PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&n_,&n_,&sone,H,&ldh_,q,&n_));
483:       for (j=0;j<n;j++) {
484:         q[j+n*j] += beval[idx-1];
485:         for (i=0;i<n;i++) {
486:           q[i+n*j] += -b[idx-1]*qp[j*n+i]-g[idx-1]*qpp[j*n+i];
487:           q[i+n*j] /= a[idx-1]; 
488:         }
489:       }
490:     }
491:   }
492:   return(0);
493: }

495: static PetscErrorCode PEPJDComputeResidual(PEP pep,PetscBool derivative,Vec u,PetscScalar theta,Vec p,Vec *work)
496: {
497:   PEP_JD         *pjd = (PEP_JD*)pep->data;
499:   PetscMPIInt    rk,np,count;
500:   Vec            tu,tp,w;
501:   PetscScalar    *dval,*array1,*array2,*x2=NULL,*y2,*qj=NULL,*tt=NULL,*xx=NULL,sone=1.0;
502:   PetscInt       i,j,nconv,nloc,deg=pep->nmat-1;
503:   PetscBLASInt   n,ld,one=1;

506:   nconv = pjd->nlock;
507:   if (!nconv) {
508:     PetscMalloc1(pep->nmat,&dval);
509:   } else {
510:     PetscMalloc5(pep->nmat,&dval,nconv,&xx,nconv,&tt,nconv,&x2,nconv*pep->nmat,&qj);
511:     MPI_Comm_rank(PetscObjectComm((PetscObject)pep),&rk);
512:     MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
513:     if (rk==np-1) {
514:       BVGetSizes(pep->V,&nloc,NULL,NULL);
515:       VecGetArray(u,&array1);
516:       for (i=0;i<nconv;i++) x2[i] = array1[nloc+i];
517:       VecRestoreArray(u,&array1);
518:     }
519:     PetscMPIIntCast(nconv,&count);
520:     MPI_Bcast(x2,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)pep));
521:   }
522:   tu = work[0];
523:   tp = work[1];
524:   w  = work[2];
525:   VecGetArray(u,&array1);
526:   VecPlaceArray(tu,array1);
527:   VecGetArray(p,&array2);
528:   VecPlaceArray(tp,array2);
529:   VecSet(tp,0.0);
530:   if (derivative) {
531:     PEPEvaluateBasisDerivative(pep,theta,0.0,dval,NULL);
532:   } else {
533:     PEPEvaluateBasis(pep,theta,0.0,dval,NULL);
534:   }
535:   for (i=derivative?1:0;i<pep->nmat;i++) {
536:     MatMult(pep->A[i],tu,w);
537:     VecAXPY(tp,dval[i],w);
538:   }
539:   if (nconv) {
540:     for (i=0;i<pep->nmat;i++) {
541:       PEPJDEvaluateHatBasis(pep,nconv,pjd->T,pep->ncv,dval,x2,i,i>1?qj+(i-2)*nconv:NULL,i>0?qj+(i-1)*nconv:NULL,qj+i*nconv);
542:     }
543:     for (i=derivative?2:1;i<pep->nmat;i++) {
544:       BVMultVec(pjd->AX[i],1.0,1.0,tp,qj+i*nconv);
545:     }
546:     BVSetActiveColumns(pjd->X,0,nconv);
547:     BVDotVec(pjd->X,tu,xx);
548:     if (rk==np-1) {
549:       PetscBLASIntCast(nconv,&n);
550:       PetscBLASIntCast(pjd->ld,&ld);
551:       y2 = array2+nloc;
552:       PetscMemzero(y2,nconv*sizeof(PetscScalar));
553:       for (j=derivative?1:0;j<deg;j++) {
554:         PetscMemcpy(tt,xx,nconv*sizeof(PetscScalar));
555:         PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&sone,pjd->XpX,&ld,qj+j*nconv,&one,&dval[j],tt,&one));
556:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&sone,pjd->Tj+j*ld*ld,&ld,tt,&one,&sone,y2,&one)); 
557:       }
558:     }
559:     PetscFree5(dval,xx,tt,x2,qj);
560:   } else {
561:     PetscFree(dval);
562:   }
563:   VecResetArray(tu);
564:   VecRestoreArray(u,&array1);
565:   VecResetArray(tp);
566:   VecRestoreArray(p,&array2);
567:   return(0);
568: }

570: static PetscErrorCode PEPJDProcessInitialSpace(PEP pep,Vec *w)
571: {
572:   PEP_JD         *pjd = (PEP_JD*)pep->data;
574:   PetscScalar    *tt;
575:   Vec            vg,wg;
576:   PetscInt       i;
577:   PetscReal      norm;

580:   PetscMalloc1(pjd->ld-1,&tt);
581:   if (pep->nini==0) {
582:     BVSetRandomColumn(pjd->V,0);
583:     for (i=0;i<pjd->ld-1;i++) tt[i] = 0.0;
584:     BVGetColumn(pjd->V,0,&vg);
585:     PEPJDCopyToExtendedVec(pep,NULL,tt,pjd->ld-1,0,vg,PETSC_FALSE);
586:     BVRestoreColumn(pjd->V,0,&vg);
587:     BVNormColumn(pjd->V,0,NORM_2,&norm);
588:     BVScaleColumn(pjd->V,0,1.0/norm);
589:     BVGetColumn(pjd->V,0,&vg);
590:     BVGetColumn(pjd->W,0,&wg);
591:     VecSet(wg,0.0);
592:     /* W = P(target)*V */
593:     PEPJDComputeResidual(pep,PETSC_FALSE,vg,pep->target,wg,w);
594:     BVRestoreColumn(pjd->W,0,&wg);
595:     BVRestoreColumn(pjd->V,0,&vg);
596:     BVNormColumn(pjd->W,0,NORM_2,&norm);
597:     BVScaleColumn(pjd->W,0,1.0/norm);
598:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Support for initial vectors not implemented yet");
599:   PetscFree(tt);
600:   return(0);
601: }

603: static PetscErrorCode PEPJDShellMatMult(Mat P,Vec x,Vec y)
604: {
605:   PetscErrorCode    ierr;
606:   PEP_JD_MATSHELL   *matctx;
607:   PEP_JD            *pjd;
608:   PetscMPIInt       rk,np,count;
609:   PetscInt          i,j,nconv,nloc,nmat,ldt,deg,ncv;
610:   Vec               tx,ty;
611:   PetscScalar       *array2,*x2=NULL,*y2,*tt=NULL,*xx=NULL,theta,sone=1.0,*qj,*val;
612:   PetscBLASInt      n,ld,one=1;
613:   const PetscScalar *array1;

616:   MatShellGetContext(P,(void**)&matctx);
617:   pjd   = (PEP_JD*)(matctx->pep->data);
618:   nconv = pjd->nlock;
619:   theta = matctx->theta;
620:   nmat  = matctx->pep->nmat;
621:   ncv   = matctx->pep->ncv;
622:   deg   = nmat-1;
623:   ldt   = pjd->ld;
624:   if (nconv>0) {
625:     PetscMalloc5(nconv,&tt,nconv,&x2,nconv*nmat,&qj,nconv,&xx,nmat,&val);
626:     MPI_Comm_rank(PetscObjectComm((PetscObject)P),&rk);
627:     MPI_Comm_size(PetscObjectComm((PetscObject)P),&np);
628:     if (rk==np-1) {
629:       BVGetSizes(matctx->pep->V,&nloc,NULL,NULL);
630:       VecGetArrayRead(x,&array1);
631:       for (i=0;i<nconv;i++) x2[i] = array1[nloc+i];
632:       VecRestoreArrayRead(x,&array1);
633:     }
634:     PetscMPIIntCast(nconv,&count);
635:     MPI_Bcast(x2,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)P));
636:   }
637:   tx = matctx->work[0];
638:   ty = matctx->work[1];
639:   VecGetArrayRead(x,&array1);
640:   VecPlaceArray(tx,array1);
641:   VecGetArray(y,&array2);
642:   VecPlaceArray(ty,array2);
643:   VecSet(ty,0.0);
644:   MatMult(matctx->P,tx,ty);
645:   if (nconv) {
646:     PEPEvaluateBasis(matctx->pep,theta,0.0,val,NULL);
647:     for (i=0;i<nmat;i++) {
648:       PEPJDEvaluateHatBasis(matctx->pep,nconv,pjd->T,ncv,val,x2,i,i>1?qj+(i-2)*nconv:NULL,i>0?qj+(i-1)*nconv:NULL,qj+i*nconv);
649:     }
650:     for (i=1;i<nmat;i++) {
651:       BVMultVec(pjd->AX[i],1.0,1.0,ty,qj+i*nconv);
652:     }
653:     BVSetActiveColumns(pjd->X,0,nconv);
654:     BVDotVec(pjd->X,tx,xx);
655:     if (rk==np-1) {
656:       PetscBLASIntCast(pjd->nlock,&n);
657:       PetscBLASIntCast(ldt,&ld);
658:       y2 = array2+nloc;
659:       PetscMemzero(y2,nconv*sizeof(PetscScalar));
660:       for (j=0;j<deg;j++) {
661:         PetscMemcpy(tt,xx,nconv*sizeof(PetscScalar));
662:         PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&sone,pjd->XpX,&ld,qj+j*nconv,&one,&val[j],tt,&one));
663:         PetscStackCallBLAS("BLAStrmv",BLAStrmv_("U","C","N",&n,pjd->Tj+ld*ld*j,&ld,tt,&one));
664:         for (i=0;i<nconv;i++) y2[i] += tt[i];
665:       }
666:     }
667:     PetscFree5(tt,x2,qj,xx,val);
668:   }
669:   VecResetArray(tx);
670:   VecRestoreArrayRead(x,&array1);
671:   VecResetArray(ty);
672:   VecRestoreArray(y,&array2);
673:   return(0);
674: }

676: static PetscErrorCode PEPJDMatSetUp(PEP pep,PetscScalar theta)
677: {
678:   PetscErrorCode  ierr;
679:   PEP_JD          *pjd = (PEP_JD*)pep->data;
680:   PEP_JD_MATSHELL *matctx;
681:   MatStructure    str;
682:   PetscScalar     *vals;
683:   PetscInt        i;

686:   MatShellGetContext(pjd->Pshell,(void**)&matctx);
687:   if (matctx->P && matctx->theta==theta)
688:     return(0);
689:   PetscMalloc1(pep->nmat,&vals);
690:   STGetMatStructure(pep->st,&str);
691:   if (!matctx->P) {
692:     MatDuplicate(pep->A[0],MAT_COPY_VALUES,&matctx->P);
693:   } else {
694:     MatCopy(pep->A[0],matctx->P,str);
695:   }
696:   PEPEvaluateBasis(pep,theta,0.0,vals,NULL);
697:   for (i=1;i<pep->nmat;i++) {
698:     MatAXPY(matctx->P,vals[i],pep->A[i],str);
699:   }
700:   matctx->theta = theta;
701:   PetscFree(vals);
702:   return(0);
703: }

705: static PetscErrorCode PEPJDCreateShellPC(PEP pep,Vec *ww)
706: {
707:   PEP_JD          *pjd = (PEP_JD*)pep->data;
708:   PEP_JD_PCSHELL  *pcctx;
709:   PEP_JD_MATSHELL *matctx;
710:   KSP             ksp;
711:   PetscInt        nloc,mloc;
712:   PetscMPIInt     np,rk;
713:   PetscErrorCode  ierr;

716:   PCCreate(PetscObjectComm((PetscObject)pep),&pjd->pcshell);
717:   PCSetType(pjd->pcshell,PCSHELL);
718:   PCShellSetName(pjd->pcshell,"PCPEPJD");
719:   PCShellSetApply(pjd->pcshell,PCShellApply_PEPJD);
720:   PetscNew(&pcctx);
721:   PCShellSetContext(pjd->pcshell,pcctx);
722:   STGetKSP(pep->st,&ksp);
723:   BVCreateVec(pjd->V,&pcctx->Bp);
724:   KSPGetPC(ksp,&pcctx->pc);
725:   PetscObjectReference((PetscObject)pcctx->pc);
726:   MatGetLocalSize(pep->A[0],&mloc,&nloc);
727:   if (pjd->ld>1) {
728:     MPI_Comm_rank(PetscObjectComm((PetscObject)pep),&rk);
729:     MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
730:     if (rk==np-1) { nloc += pjd->ld-1; mloc += pjd->ld-1; }
731:   }
732:   PetscNew(&matctx);
733:   MatCreateShell(PetscObjectComm((PetscObject)pep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&pjd->Pshell);
734:   MatShellSetOperation(pjd->Pshell,MATOP_MULT,(void(*)())PEPJDShellMatMult);
735:   matctx->pep = pep;
736:   PEPJDMatSetUp(pep,pep->target);
737:   PCSetOperators(pcctx->pc,matctx->P,matctx->P);
738:   PCSetErrorIfFailure(pcctx->pc,PETSC_TRUE);
739:   PCSetReusePreconditioner(pcctx->pc,PETSC_TRUE);
740:   KSPSetPC(ksp,pjd->pcshell);
741:   KSPSetReusePreconditioner(ksp,PETSC_TRUE);
742:   KSPSetOperators(ksp,pjd->Pshell,pjd->Pshell);
743:   if (pjd->ld>1) {
744:     PetscMalloc2(pjd->ld*pjd->ld,&pcctx->M,pjd->ld*pjd->ld,&pcctx->ps);
745:     pcctx->X  = pjd->X;
746:     pcctx->ld = pjd->ld;
747:   }
748:   matctx->work = ww;
749:   pcctx->work  = ww;
750:   return(0);
751: }

753: static PetscErrorCode PEPJDUpdateExtendedPC(PEP pep,PetscScalar theta)
754: {
755: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(PETSC_MISSING_LAPACK_GETRF)
757:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD/GETRI/GETRF - Lapack routines are unavailable");
758: #else
760:   PEP_JD         *pjd = (PEP_JD*)pep->data;
761:   PEP_JD_PCSHELL *pcctx;
762:   PetscInt       i,j,k,n=pjd->nlock,ld=pjd->ld,deg=pep->nmat-1;
763:   PetscScalar    *M,*ps,*work,*U,*V,*S,*Sp,*Spp,snone=-1.0,sone=1.0,zero=0.0,*val;
764:   PetscReal      tol,maxeig=0.0,*sg,*rwork;
765:   PetscBLASInt   n_,info,ld_,*p,lw_,rk=0;

768:   if (n) {
769:     PCShellGetContext(pjd->pcshell,(void**)&pcctx);
770:     pcctx->n = n;
771:     M  = pcctx->M;
772:     ps = pcctx->ps;
773:     PetscCalloc7(2*n*n,&U,3*n*n,&S,n,&sg,10*n,&work,5*n,&rwork,n,&p,deg+1,&val);
774:     V = U+n*n;
775:     /* pseudo-inverse */
776:     for (j=0;j<n;j++) {
777:       for (i=0;i<j;i++) S[n*j+i] = -pjd->T[pep->ncv*j+i];
778:       S[n*j+j] = theta-pjd->T[pep->ncv*j+j];
779:     }
780:     PetscBLASIntCast(n,&n_);
781:     PetscBLASIntCast(ld,&ld_);
782:     lw_ = 10*n_;
783: #if !defined (PETSC_USE_COMPLEX)
784:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&n_,&n_,S,&n_,sg,U,&n_,V,&n_,work,&lw_,&info));
785: #else
786:     PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","S",&n_,&n_,S,&n_,sg,U,&n_,V,&n_,work,&lw_,rwork,&info));
787: #endif
788:     SlepcCheckLapackInfo("gesvd",info);
789:     for (i=0;i<n;i++) maxeig = PetscMax(maxeig,sg[i]);
790:     tol = 10*PETSC_MACHINE_EPSILON*n*maxeig;
791:     for (j=0;j<n;j++) {
792:       if (sg[j]>tol) {
793:         for (i=0;i<n;i++) U[j*n+i] /= sg[j];
794:         rk++;
795:       } else break;
796:     }
797:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&rk,&sone,U,&n_,V,&n_,&zero,ps,&ld_));

799:     /* compute M */
800:     PEPEvaluateBasis(pep,theta,0.0,val,NULL);
801:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,pjd->XpX,&ld_,ps,&ld_,&zero,M,&ld_));
802:     PetscMemzero(S,2*n*n*sizeof(PetscScalar));
803:     Sp = S+n*n;
804:     for (j=0;j<n;j++) S[j*(n+1)] = 1.0; 
805:     for (k=1;k<deg;k++) {
806:       for (j=0;j<n;j++) for (i=0;i<n;i++) V[j*n+i] = S[j*n+i] - ps[j*ld+i]*val[k];
807:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,pjd->XpX,&ld_,V,&n_,&zero,U,&n_));
808:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,pjd->Tj+k*ld*ld,&ld_,U,&n_,&sone,M,&ld_));
809:       Spp = Sp; Sp = S;
810:       PEPJDEvaluateHatBasis(pep,n,pjd->T,pep->ncv,val,NULL,k+1,Spp,Sp,S);
811:     }
812:     /* inverse */
813:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,M,&ld_,p,&info));
814:     SlepcCheckLapackInfo("getrf",info);
815:     PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,M,&ld_,p,work,&n_,&info));
816:     SlepcCheckLapackInfo("getri",info);
817:     PetscFree7(U,S,sg,work,rwork,p,val);
818:   }
819:   return(0);
820: #endif
821: }

823: static PetscErrorCode PEPJDEigenvectors(PEP pep)
824: {
825: #if defined(SLEPC_MISSING_LAPACK_TREVC)
827:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
828: #else
830:   PEP_JD         *pjd = (PEP_JD*)pep->data;
831:   PetscBLASInt   ld,nconv,info,nc;
832:   PetscScalar    *Z,*w;
833:   PetscReal      *wr,norm;
834:   PetscInt       i;
835:   Mat            U;

838:   PetscMalloc3(pep->nconv*pep->nconv,&Z,3*pep->ncv,&wr,2*pep->ncv,&w);
839:   PetscBLASIntCast(pep->ncv,&ld);
840:   PetscBLASIntCast(pep->nconv,&nconv);
841: #if !defined(PETSC_USE_COMPLEX)
842:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_("R","A",NULL,&nconv,pjd->T,&ld,NULL,&nconv,Z,&nconv,&nconv,&nc,wr,&info));
843: #else
844:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_("R","A",NULL,&nconv,pjd->T,&ld,NULL,&nconv,Z,&nconv,&nconv,&nc,w,wr,&info));
845: #endif
846:   SlepcCheckLapackInfo("trevc",info);
847:   MatCreateSeqDense(PETSC_COMM_SELF,nconv,nconv,Z,&U);
848:   BVSetActiveColumns(pjd->X,0,pep->nconv);
849:   BVMultInPlace(pjd->X,U,0,pep->nconv);
850:   for (i=0;i<pep->nconv;i++) {
851:     BVNormColumn(pjd->X,i,NORM_2,&norm);
852:     BVScaleColumn(pjd->X,i,1.0/norm);
853:   }
854:   MatDestroy(&U);
855:   PetscFree3(Z,wr,w);
856:   return(0);
857: #endif
858: }

860: static PetscErrorCode PEPJDLockConverged(PEP pep,PetscInt *nv,PetscInt sz)
861: {
863:   PEP_JD         *pjd = (PEP_JD*)pep->data;
864:   PetscInt       j,i,ldds,rk=0,nvv=*nv;
865:   Vec            v,x,w;
866:   PetscScalar    *R,*pX;
867:   Mat            X;

870:   /* update AX and XpX */
871:   for (i=sz;i>0;i--) {
872:     BVGetColumn(pjd->X,pjd->nlock-i,&x);
873:     for (j=0;j<pep->nmat;j++) {
874:       BVGetColumn(pjd->AX[j],pjd->nlock-i,&v);
875:       MatMult(pep->A[j],x,v);
876:       BVRestoreColumn(pjd->AX[j],pjd->nlock-i,&v);
877:       BVSetActiveColumns(pjd->AX[j],0,pjd->nlock-i+1);
878:     }
879:     BVRestoreColumn(pjd->X,pjd->nlock-i,&x);
880:     BVDotColumn(pjd->X,(pjd->nlock-i),pjd->XpX+(pjd->nlock-i)*(pep->nev));
881:     pjd->XpX[(pjd->nlock-i)*(1+pep->nev)] = 1.0;
882:     for (j=0;j<pjd->nlock-i;j++) pjd->XpX[j*(pep->nev)+pjd->nlock-i] = PetscConj(pjd->XpX[(pjd->nlock-i)*(pep->nev)+j]);
883:   }

885:   /* evaluate the polynomial basis in T */
886:   PetscMemzero(pjd->Tj,pep->nev*pep->nev*pep->nmat*sizeof(PetscScalar));
887:   for (j=0;j<pep->nmat;j++) {
888:     PEPEvaluateBasisMat(pep,pjd->nlock,pjd->T,pep->ncv,j,(j>1)?pjd->Tj+(j-2)*pep->nev*pep->nev:NULL,pep->nev,j?pjd->Tj+(j-1)*pep->nev*pep->nev:NULL,pep->nev,pjd->Tj+j*pep->nev*pep->nev,pep->nev);
889:   }

891:   /* Extend search space */
892:   PetscCalloc1(nvv*nvv,&R);
893:   DSGetLeadingDimension(pep->ds,&ldds);
894:   DSGetArray(pep->ds,DS_MAT_X,&pX);
895:   PEPJDOrthogonalize(nvv,nvv,pX+ldds,ldds,&rk,NULL,NULL,0);
896:   rk -= sz;
897:   for (j=0;j<rk;j++) {
898:     PetscMemcpy(R+j*nvv,pX+(j+sz)*ldds,nvv*sizeof(PetscScalar));
899:   }
900:   DSRestoreArray(pep->ds,DS_MAT_X,&pX);
901:   MatCreateSeqDense(PETSC_COMM_SELF,nvv,rk,R,&X);
902:   BVMultInPlace(pjd->V,X,0,rk);
903:   MatDestroy(&X);
904:   BVSetActiveColumns(pjd->V,0,rk);
905:   for (j=0;j<rk;j++) {
906:     /* W = P(target)*V */
907:     BVGetColumn(pjd->W,j,&w);
908:     BVGetColumn(pjd->V,j,&v);
909:     PEPJDComputeResidual(pep,PETSC_FALSE,v,pep->target,w,pep->work);
910:     BVRestoreColumn(pjd->V,j,&v);
911:     BVRestoreColumn(pjd->W,j,&w);
912:   }
913:   BVSetActiveColumns(pjd->W,0,rk);
914:   BVOrthogonalize(pjd->W,NULL);
915:   *nv = rk;
916:   PetscFree(R);
917:   return(0);
918: }

920: PetscErrorCode PEPSolve_JD(PEP pep)
921: {
922:   PetscErrorCode  ierr;
923:   PEP_JD          *pjd = (PEP_JD*)pep->data;
924:   PetscInt        k,nv,ld,minv,dim,bupdated=0,sz=1,idx;
925:   PetscScalar     theta=0.0,*pX,*eig,ritz;
926:   PetscReal       norm,*res;
927:   PetscBool       lindep;
928:   Vec             t,u,p,r,*ww=pep->work,v;
929:   Mat             G,X,Y;
930:   KSP             ksp;
931:   PEP_JD_PCSHELL  *pcctx;
932:   PEP_JD_MATSHELL *matctx;

935:   DSGetLeadingDimension(pep->ds,&ld);
936:   PetscMalloc2(pep->ncv,&eig,pep->ncv,&res);
937:   BVCreateVec(pjd->V,&u);
938:   VecDuplicate(u,&p);
939:   VecDuplicate(u,&r);
940:   pjd->nlock = 0;
941:   STGetKSP(pep->st,&ksp);
942:   PEPJDProcessInitialSpace(pep,ww);
943:   nv = (pep->nini)?pep->nini:1;
944:   BVCopyVec(pjd->V,0,u);
945:   
946:   /* Replace preconditioner with one containing projectors */
947:   PEPJDCreateShellPC(pep,ww);
948:   PCShellGetContext(pjd->pcshell,(void**)&pcctx);

950:   /* Restart loop */
951:   while (pep->reason == PEP_CONVERGED_ITERATING) {
952:     pep->its++;
953:     DSSetDimensions(pep->ds,nv,0,0,0);
954:     BVSetActiveColumns(pjd->V,bupdated,nv);
955:     PEPJDUpdateTV(pep,bupdated,nv,ww);
956:     BVSetActiveColumns(pjd->W,bupdated,nv);
957:     for (k=0;k<pep->nmat;k++) {
958:       BVSetActiveColumns(pjd->TV[k],bupdated,nv);
959:       DSGetMat(pep->ds,DSMatExtra[k],&G);
960:       BVMatProject(pjd->TV[k],NULL,pjd->W,G);
961:       DSRestoreMat(pep->ds,DSMatExtra[k],&G);
962:     }
963:     BVSetActiveColumns(pjd->V,0,nv);
964:     BVSetActiveColumns(pjd->W,0,nv);

966:     /* Solve projected problem */
967:     DSSetState(pep->ds,DS_STATE_RAW);
968:     DSSolve(pep->ds,pep->eigr,pep->eigi);
969:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
970:     DSSynchronize(pep->ds,pep->eigr,pep->eigi);
971:     idx = 0;
972:     do {
973:       ritz = pep->eigr[idx];
974: #if !defined(PETSC_USE_COMPLEX)
975:       ritzi = pep->eigi[idx];
976:       if (PetscAbsScalar(ritzi!=0.0)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PJD solver not implemented for complex Ritz values in real arithmetic");
977: #endif
978:       /* Compute Ritz vector u=V*X(:,1) */
979:       DSGetArray(pep->ds,DS_MAT_X,&pX);
980:       BVSetActiveColumns(pjd->V,0,nv);
981:       BVMultVec(pjd->V,1.0,0.0,u,pX+idx*ld);
982:       DSRestoreArray(pep->ds,DS_MAT_X,&pX);
983:       PEPJDComputeResidual(pep,PETSC_FALSE,u,ritz,r,ww);
984:       /* Check convergence */
985:       VecNorm(r,NORM_2,&norm);
986:       (*pep->converged)(pep,ritz,0,norm,&pep->errest[pep->nconv],pep->convergedctx);
987:       (*pep->stopping)(pep,pep->its,pep->max_it,(pep->errest[pep->nconv]<pep->tol)?pep->nconv+1:pep->nconv,pep->nev,&pep->reason,pep->stoppingctx);
988:       if (pep->errest[pep->nconv]<pep->tol) {
989:         /* Ritz pair converged */
990:         minv = PetscMin(nv,(PetscInt)(pjd->keep*pep->ncv));
991:         if (pjd->ld>1) {
992:           BVGetColumn(pjd->X,pep->nconv,&v);
993:           PEPJDCopyToExtendedVec(pep,v,pjd->T+pep->ncv*pep->nconv,pjd->ld-1,0,u,PETSC_TRUE);
994:           BVRestoreColumn(pjd->X,pep->nconv,&v);
995:           BVSetActiveColumns(pjd->X,0,pep->nconv+1);
996:           BVNormColumn(pjd->X,pep->nconv,NORM_2,&norm);
997:           BVScaleColumn(pjd->X,pep->nconv,1.0/norm);
998:           for (k=0;k<pep->nconv;k++) pjd->T[pep->ncv*pep->nconv+k] /= norm;
999:           pjd->T[(pep->ncv+1)*pep->nconv] = ritz;
1000:           eig[pep->nconv] = ritz;
1001:           idx++;
1002:         } else {
1003:           BVInsertVec(pep->V,pep->nconv,u);
1004:         }
1005:         pep->nconv++;
1006:       }
1007:     } while (pep->errest[pep->nconv]<pep->tol && pep->nconv<nv);  

1009:     if (pep->reason==PEP_CONVERGED_ITERATING) {
1010:       if (idx) {
1011:         pjd->nlock +=idx;
1012:         PEPJDLockConverged(pep,&nv,idx);
1013:         PEPJDUpdateExtendedPC(pep,pep->target);
1014:       }
1015:       if (nv+sz>=pep->ncv-1) {
1016:         /* Basis full, force restart */
1017:         minv = PetscMin(nv,(PetscInt)(pjd->keep*pep->ncv));
1018:         DSGetDimensions(pep->ds,&dim,NULL,NULL,NULL,NULL);
1019:         DSGetArray(pep->ds,DS_MAT_X,&pX);
1020:         PEPJDOrthogonalize(dim,minv,pX,ld,&minv,NULL,NULL,ld);
1021:         DSRestoreArray(pep->ds,DS_MAT_X,&pX);
1022:         DSGetArray(pep->ds,DS_MAT_Y,&pX);
1023:         PEPJDOrthogonalize(dim,minv,pX,ld,&minv,NULL,NULL,ld);
1024:         DSRestoreArray(pep->ds,DS_MAT_Y,&pX);
1025:         DSGetMat(pep->ds,DS_MAT_X,&X);
1026:         BVMultInPlace(pjd->V,X,0,minv);
1027:         MatDestroy(&X);
1028:         DSGetMat(pep->ds,DS_MAT_Y,&Y);
1029:         BVMultInPlace(pjd->W,Y,0,minv);
1030:         MatDestroy(&Y);
1031:         nv = minv;
1032:         bupdated = 0;
1033:       } else {
1034:         theta = pep->errest[pep->nconv]<pjd->fix?ritz:pep->target;
1035:         /* Update system mat */
1036:         PEPJDMatSetUp(pep,theta);
1037:         /* Compute r' */
1038:         PEPJDComputeResidual(pep,PETSC_TRUE,u,theta,p,ww);
1039:         pcctx->u = u;
1040:         /* Solve correction equation to expand basis */  
1041:         PEPJDExtendedPCApply(pjd->pcshell,p,pcctx->Bp);
1042:         VecDot(pcctx->Bp,u,&pcctx->gamma);
1043:         BVGetColumn(pjd->V,nv,&t);
1044:         KSPSolve(ksp,r,t);
1045:         BVRestoreColumn(pjd->V,nv,&t);
1046:         BVOrthogonalizeColumn(pjd->V,nv,NULL,&norm,&lindep);
1047:         if (lindep || norm==0.0) SETERRQ(PETSC_COMM_SELF,1,"Linearly dependent continuation vector");
1048:         BVScaleColumn(pjd->V,nv,1.0/norm);
1049:         /* W = P(target)*V */
1050:         BVGetColumn(pjd->V,nv,&t);
1051:         BVGetColumn(pjd->W,nv,&v);
1052:         PEPJDComputeResidual(pep,PETSC_FALSE,t,pep->target,v,ww);
1053:         BVRestoreColumn(pjd->W,nv,&v);
1054:         BVRestoreColumn(pjd->V,nv,&t);
1055:         BVOrthogonalizeColumn(pjd->W,nv,NULL,&norm,&lindep);
1056:         if (lindep) SETERRQ(PETSC_COMM_SELF,1,"Linearly dependent continuation vector");
1057:         BVScaleColumn(pjd->W,nv,1.0/norm);
1058:         bupdated = idx?0:nv;
1059:         nv++;
1060:       } 
1061:     }
1062:     for (k=pep->nconv;k<nv;k++) {
1063:       eig[k] = pep->eigr[idx+k-pep->nconv];
1064: #if !defined(PETSC_USE_COMPLEX)
1065:       pep->eigi[k-pep->nconv] = 0.0;
1066: #endif
1067:     }
1068:     PEPMonitor(pep,pep->its,pep->nconv,eig,pep->eigi,pep->errest,pep->nconv+1);
1069:   }
1070:   if (pjd->ld>1) {
1071:     if (pep->nconv>0) { PEPJDEigenvectors(pep); }
1072:     for (k=0;k<pep->nconv;k++) {
1073:       pep->eigr[k] = eig[k];
1074:       pep->eigi[k] = 0.0;
1075:     }
1076:     PetscFree2(pcctx->M,pcctx->ps);
1077:   }
1078:   KSPSetPC(ksp,pcctx->pc);
1079:   MatShellGetContext(pjd->Pshell,(void**)&matctx);
1080:   MatDestroy(&matctx->P);
1081:   VecDestroy(&pcctx->Bp);
1082:   MatDestroy(&pjd->Pshell);
1083:   PCDestroy(&pcctx->pc);
1084:   PetscFree(pcctx);
1085:   PetscFree(matctx);
1086:   PCDestroy(&pjd->pcshell);
1087:   PetscFree2(eig,res);
1088:   VecDestroy(&u);
1089:   VecDestroy(&r);
1090:   VecDestroy(&p);
1091:   return(0);
1092: }

1094: PetscErrorCode PEPJDSetRestart_JD(PEP pep,PetscReal keep)
1095: {
1096:   PEP_JD *pjd = (PEP_JD*)pep->data;

1099:   if (keep==PETSC_DEFAULT) pjd->keep = 0.5;
1100:   else {
1101:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
1102:     pjd->keep = keep;
1103:   }
1104:   return(0);
1105: }

1107: /*@
1108:    PEPJDSetRestart - Sets the restart parameter for the Jacobi-Davidson
1109:    method, in particular the proportion of basis vectors that must be kept
1110:    after restart.

1112:    Logically Collective on PEP

1114:    Input Parameters:
1115: +  pep  - the eigenproblem solver context
1116: -  keep - the number of vectors to be kept at restart

1118:    Options Database Key:
1119: .  -pep_jd_restart - Sets the restart parameter

1121:    Notes:
1122:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

1124:    Level: advanced

1126: .seealso: PEPJDGetRestart()
1127: @*/
1128: PetscErrorCode PEPJDSetRestart(PEP pep,PetscReal keep)
1129: {

1135:   PetscTryMethod(pep,"PEPJDSetRestart_C",(PEP,PetscReal),(pep,keep));
1136:   return(0);
1137: }

1139: PetscErrorCode PEPJDGetRestart_JD(PEP pep,PetscReal *keep)
1140: {
1141:   PEP_JD *pjd = (PEP_JD*)pep->data;

1144:   *keep = pjd->keep;
1145:   return(0);
1146: }

1148: /*@
1149:    PEPJDGetRestart - Gets the restart parameter used in the Jacobi-Davidson method.

1151:    Not Collective

1153:    Input Parameter:
1154: .  pep - the eigenproblem solver context

1156:    Output Parameter:
1157: .  keep - the restart parameter

1159:    Level: advanced

1161: .seealso: PEPJDSetRestart()
1162: @*/
1163: PetscErrorCode PEPJDGetRestart(PEP pep,PetscReal *keep)
1164: {

1170:   PetscUseMethod(pep,"PEPJDGetRestart_C",(PEP,PetscReal*),(pep,keep));
1171:   return(0);
1172: }

1174: PetscErrorCode PEPJDSetFix_JD(PEP pep,PetscReal fix)
1175: {
1176:   PEP_JD *pjd = (PEP_JD*)pep->data;

1179:   if (fix == PETSC_DEFAULT || fix == PETSC_DECIDE) pjd->fix = 0.01;
1180:   else {
1181:     if (fix < 0.0) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"Invalid fix value");
1182:     pjd->fix = fix;
1183:   }
1184:   return(0);
1185: }

1187: /*@
1188:    PEPJDSetFix - Sets the threshold for changing the target in the correction
1189:    equation.

1191:    Logically Collective on PEP

1193:    Input Parameters:
1194: +  pep - the eigenproblem solver context
1195: -  fix - threshold for changing the target

1197:    Options Database Key:
1198: .  -pep_jd_fix - the fix value

1200:    Note:
1201:    The target in the correction equation is fixed at the first iterations.
1202:    When the norm of the residual vector is lower than the fix value,
1203:    the target is set to the corresponding eigenvalue.

1205:    Level: advanced

1207: .seealso: PEPJDGetFix()
1208: @*/
1209: PetscErrorCode PEPJDSetFix(PEP pep,PetscReal fix)
1210: {

1216:   PetscTryMethod(pep,"PEPJDSetFix_C",(PEP,PetscReal),(pep,fix));
1217:   return(0);
1218: }

1220: PetscErrorCode PEPJDGetFix_JD(PEP pep,PetscReal *fix)
1221: {
1222:   PEP_JD *pjd = (PEP_JD*)pep->data;

1225:   *fix = pjd->fix;
1226:   return(0);
1227: }

1229: /*@
1230:    PEPJDGetFix - Returns the threshold for changing the target in the correction
1231:    equation.

1233:    Not Collective

1235:    Input Parameter:
1236: .  pep - the eigenproblem solver context

1238:    Output Parameter:
1239: .  fix - threshold for changing the target

1241:    Note:
1242:    The target in the correction equation is fixed at the first iterations.
1243:    When the norm of the residual vector is lower than the fix value,
1244:    the target is set to the corresponding eigenvalue.

1246:    Level: advanced

1248: .seealso: PEPJDSetFix()
1249: @*/
1250: PetscErrorCode PEPJDGetFix(PEP pep,PetscReal *fix)
1251: {

1257:   PetscUseMethod(pep,"PEPJDGetFix_C",(PEP,PetscReal*),(pep,fix));
1258:   return(0);
1259: }

1261: PetscErrorCode PEPSetFromOptions_JD(PetscOptionItems *PetscOptionsObject,PEP pep)
1262: {
1264:   PetscBool      flg;
1265:   PetscReal      r1;

1268:   PetscOptionsHead(PetscOptionsObject,"PEP JD Options");

1270:     PetscOptionsReal("-pep_jd_restart","Proportion of vectors kept after restart","PEPJDSetRestart",0.5,&r1,&flg);
1271:     if (flg) { PEPJDSetRestart(pep,r1); }

1273:     PetscOptionsReal("-pep_jd_fix","Tolerance for changing the target in the correction equation","PEPJDSetFix",0.01,&r1,&flg);
1274:     if (flg) { PEPJDSetFix(pep,r1); }

1276:   PetscOptionsTail();
1277:   return(0);
1278: }

1280: PetscErrorCode PEPView_JD(PEP pep,PetscViewer viewer)
1281: {
1283:   PEP_JD         *pjd = (PEP_JD*)pep->data;
1284:   PetscBool      isascii;

1287:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
1288:   if (isascii) {
1289:     PetscViewerASCIIPrintf(viewer,"  %d%% of basis vectors kept after restart\n",(int)(100*pjd->keep));
1290:     PetscViewerASCIIPrintf(viewer,"  threshold for changing the target in the correction equation (fix): %g\n",(double)pjd->fix);
1291:   }
1292:   return(0);
1293: }

1295: PetscErrorCode PEPSetDefaultST_JD(PEP pep)
1296: {
1298:   KSP            ksp;

1301:   if (!((PetscObject)pep->st)->type_name) {
1302:     STSetType(pep->st,STPRECOND);
1303:     STPrecondSetKSPHasMat(pep->st,PETSC_TRUE);
1304:   }
1305:   STSetTransform(pep->st,PETSC_FALSE);
1306:   STGetKSP(pep->st,&ksp);
1307:   if (!((PetscObject)ksp)->type_name) {
1308:     KSPSetType(ksp,KSPBCGSL);
1309:     KSPSetTolerances(ksp,1e-5,PETSC_DEFAULT,PETSC_DEFAULT,100);
1310:   }
1311:   return(0);
1312: }

1314: PetscErrorCode PEPReset_JD(PEP pep)
1315: {
1317:   PEP_JD         *pjd = (PEP_JD*)pep->data;
1318:   PetscInt       i;

1321:   for (i=0;i<pep->nmat;i++) {
1322:     BVDestroy(pjd->TV+i);
1323:   }
1324:   BVDestroy(&pjd->W);
1325:   if (pjd->ld>1) {
1326:     BVDestroy(&pjd->V);
1327:     for (i=0;i<pep->nmat;i++) {
1328:       BVDestroy(pjd->AX+i);
1329:     }
1330:     BVDestroy(&pjd->N[0]);
1331:     BVDestroy(&pjd->N[1]);
1332:     PetscFree3(pjd->XpX,pjd->T,pjd->Tj);
1333:   }
1334:   PetscFree2(pjd->TV,pjd->AX);
1335:   return(0);
1336: }

1338: PetscErrorCode PEPDestroy_JD(PEP pep)
1339: {

1343:   PetscFree(pep->data);
1344:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDSetRestart_C",NULL);
1345:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDGetRestart_C",NULL);
1346:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDSetFix_C",NULL);
1347:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDGetFix_C",NULL);
1348:   return(0);
1349: }

1351: PETSC_EXTERN PetscErrorCode PEPCreate_JD(PEP pep)
1352: {
1353:   PEP_JD         *pjd;

1357:   PetscNewLog(pep,&pjd);
1358:   pep->data = (void*)pjd;

1360:   pjd->fix = 0.01;

1362:   pep->ops->solve          = PEPSolve_JD;
1363:   pep->ops->setup          = PEPSetUp_JD;
1364:   pep->ops->setfromoptions = PEPSetFromOptions_JD;
1365:   pep->ops->destroy        = PEPDestroy_JD;
1366:   pep->ops->reset          = PEPReset_JD;
1367:   pep->ops->view           = PEPView_JD;
1368:   pep->ops->setdefaultst   = PEPSetDefaultST_JD;

1370:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDSetRestart_C",PEPJDSetRestart_JD);
1371:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDGetRestart_C",PEPJDGetRestart_JD);
1372:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDSetFix_C",PEPJDSetFix_JD);
1373:   PetscObjectComposeFunction((PetscObject)pep,"PEPJDGetFix_C",PEPJDGetFix_JD);
1374:   return(0);
1375: }