Actual source code: ks-slice.c
slepc-3.9.2 2018-07-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2018, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: SLEPc eigensolver: "krylovschur"
13: Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems
15: References:
17: [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
18: solving sparse symmetric generalized eigenproblems", SIAM J.
19: Matrix Anal. Appl. 15(1):228-272, 1994.
21: [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
22: on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
23: 2012.
24: */
26: #include <slepc/private/epsimpl.h>
27: #include "krylovschur.h"
29: static PetscBool cited = PETSC_FALSE;
30: static const char citation[] =
31: "@Article{slepc-slice,\n"
32: " author = \"C. Campos and J. E. Roman\",\n"
33: " title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
34: " journal = \"Numer. Algorithms\",\n"
35: " volume = \"60\",\n"
36: " number = \"2\",\n"
37: " pages = \"279--295\",\n"
38: " year = \"2012,\"\n"
39: " doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
40: "}\n";
42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON
44: static PetscErrorCode EPSSliceResetSR(EPS eps) {
45: PetscErrorCode ierr;
46: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
47: EPS_SR sr=ctx->sr;
48: EPS_shift s;
51: if (sr) {
52: if (ctx->npart>1) {
53: BVDestroy(&sr->V);
54: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
55: }
56: /* Reviewing list of shifts to free memory */
57: s = sr->s0;
58: if (s) {
59: while (s->neighb[1]) {
60: s = s->neighb[1];
61: PetscFree(s->neighb[0]);
62: }
63: PetscFree(s);
64: }
65: PetscFree(sr);
66: }
67: ctx->sr = NULL;
68: return(0);
69: }
71: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
72: {
73: PetscErrorCode ierr;
74: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
77: if (!ctx->global) return(0);
78: /* Destroy auxiliary EPS */
79: EPSSliceResetSR(ctx->eps);
80: EPSDestroy(&ctx->eps);
81: if (ctx->npart>1) {
82: PetscSubcommDestroy(&ctx->subc);
83: if (ctx->commset) {
84: MPI_Comm_free(&ctx->commrank);
85: ctx->commset = PETSC_FALSE;
86: }
87: }
88: PetscFree(ctx->subintervals);
89: PetscFree(ctx->nconv_loc);
90: EPSSliceResetSR(eps);
91: PetscFree(ctx->inertias);
92: PetscFree(ctx->shifts);
93: if (ctx->npart>1) {
94: ISDestroy(&ctx->isrow);
95: ISDestroy(&ctx->iscol);
96: MatDestroyMatrices(1,&ctx->submata);
97: MatDestroyMatrices(1,&ctx->submatb);
98: }
99: return(0);
100: }
102: /*
103: EPSSliceAllocateSolution - Allocate memory storage for common variables such
104: as eigenvalues and eigenvectors. The argument extra is used for methods
105: that require a working basis slightly larger than ncv.
106: */
107: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
108: {
109: PetscErrorCode ierr;
110: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
111: PetscReal eta;
112: PetscInt k;
113: PetscLogDouble cnt;
114: BVType type;
115: BVOrthogType orthog_type;
116: BVOrthogRefineType orthog_ref;
117: BVOrthogBlockType ob_type;
118: Mat matrix;
119: Vec t;
120: EPS_SR sr = ctx->sr;
123: /* allocate space for eigenvalues and friends */
124: k = PetscMax(1,sr->numEigs);
125: PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
126: PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
127: cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
128: PetscLogObjectMemory((PetscObject)eps,cnt);
130: /* allocate sr->V and transfer options from eps->V */
131: BVDestroy(&sr->V);
132: BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
133: PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
134: if (!eps->V) { EPSGetBV(eps,&eps->V); }
135: if (!((PetscObject)(eps->V))->type_name) {
136: BVSetType(sr->V,BVSVEC);
137: } else {
138: BVGetType(eps->V,&type);
139: BVSetType(sr->V,type);
140: }
141: STMatCreateVecsEmpty(eps->st,&t,NULL);
142: BVSetSizesFromVec(sr->V,t,k);
143: VecDestroy(&t);
144: EPS_SetInnerProduct(eps);
145: BVGetMatrix(eps->V,&matrix,NULL);
146: BVSetMatrix(sr->V,matrix,PETSC_FALSE);
147: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
148: BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
149: return(0);
150: }
152: static PetscErrorCode EPSSliceGetEPS(EPS eps)
153: {
154: PetscErrorCode ierr;
155: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
156: BV V;
157: BVType type;
158: PetscReal eta;
159: BVOrthogType orthog_type;
160: BVOrthogRefineType orthog_ref;
161: BVOrthogBlockType ob_type;
162: Mat A,B=NULL,Ar=NULL,Br=NULL;
163: PetscInt i;
164: PetscReal h,a,b,zero;
165: PetscMPIInt rank;
166: EPS_SR sr=ctx->sr;
167: PC pc;
168: PCType pctype;
169: KSP ksp;
170: KSPType ksptype;
171: STType sttype;
172: PetscObjectState Astate,Bstate=0;
173: PetscObjectId Aid,Bid=0;
174: MatSolverType stype;
177: EPSGetOperators(eps,&A,&B);
178: if (ctx->npart==1) {
179: if (!ctx->eps) { EPSCreate(((PetscObject)eps)->comm,&ctx->eps); }
180: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
181: EPSSetOperators(ctx->eps,A,B);
182: a = eps->inta; b = eps->intb;
183: } else {
184: PetscObjectStateGet((PetscObject)A,&Astate);
185: PetscObjectGetId((PetscObject)A,&Aid);
186: if (B) {
187: PetscObjectStateGet((PetscObject)B,&Bstate);
188: PetscObjectGetId((PetscObject)B,&Bid);
189: }
190: if (!ctx->subc) {
191: /* Create context for subcommunicators */
192: PetscSubcommCreate(PetscObjectComm((PetscObject)eps),&ctx->subc);
193: PetscSubcommSetNumber(ctx->subc,ctx->npart);
194: PetscSubcommSetType(ctx->subc,PETSC_SUBCOMM_CONTIGUOUS);
195: PetscLogObjectMemory((PetscObject)eps,sizeof(PetscSubcomm));
197: /* Duplicate matrices */
198: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
199: ctx->Astate = Astate;
200: ctx->Aid = Aid;
201: if (B) {
202: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
203: ctx->Bstate = Bstate;
204: ctx->Bid = Bid;
205: }
206: } else {
207: if (ctx->Astate != Astate || (B && ctx->Bstate != Bstate) || ctx->Aid != Aid || (B && ctx->Bid != Bid)) {
208: EPSGetOperators(ctx->eps,&Ar,&Br);
209: MatCreateRedundantMatrix(A,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Ar);
210: ctx->Astate = Astate;
211: ctx->Aid = Aid;
212: if (B) {
213: MatCreateRedundantMatrix(B,0,PetscSubcommChild(ctx->subc),MAT_INITIAL_MATRIX,&Br);
214: ctx->Bstate = Bstate;
215: ctx->Bid = Bid;
216: }
217: EPSSetOperators(ctx->eps,Ar,Br);
218: MatDestroy(&Ar);
219: MatDestroy(&Br);
220: }
221: }
223: /* Determine subintervals */
224: if (!ctx->subintset) { /* uniform distribution if no set by user */
225: if (!sr->hasEnd) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"Global interval must be bounded for splitting it in uniform subintervals");
226: h = (eps->intb-eps->inta)/ctx->npart;
227: a = eps->inta+ctx->subc->color*h;
228: b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
229: PetscFree(ctx->subintervals);
230: PetscMalloc1(ctx->npart+1,&ctx->subintervals);
231: for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
232: ctx->subintervals[ctx->npart] = eps->intb;
233: } else {
234: a = ctx->subintervals[ctx->subc->color];
235: b = ctx->subintervals[ctx->subc->color+1];
236: }
238: if (!ctx->eps) {
239: /* Create auxiliary EPS */
240: EPSCreate(PetscSubcommChild(ctx->subc),&ctx->eps);
241: EPSSetOperators(ctx->eps,Ar,Br);
242: MatDestroy(&Ar);
243: MatDestroy(&Br);
244: }
246: /* Create subcommunicator grouping processes with same rank */
247: if (ctx->commset) { MPI_Comm_free(&ctx->commrank); }
248: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
249: MPI_Comm_split(((PetscObject)eps)->comm,rank,ctx->subc->color,&ctx->commrank);
250: ctx->commset = PETSC_TRUE;
251: }
252: EPSSetType(ctx->eps,((PetscObject)eps)->type_name);
254: /* Transfer options for ST, KSP and PC */
255: STGetType(eps->st,&sttype);
256: STSetType(ctx->eps->st,sttype);
257: STGetKSP(eps->st,&ksp);
258: KSPGetType(ksp,&ksptype);
259: KSPGetPC(ksp,&pc);
260: PCGetType(pc,&pctype);
261: PCFactorGetMatSolverType(pc,&stype);
262: PCFactorGetZeroPivot(pc,&zero);
263: STGetKSP(ctx->eps->st,&ksp);
264: KSPSetType(ksp,ksptype);
265: KSPGetPC(ksp,&pc);
266: PCSetType(pc,pctype);
267: PCFactorSetZeroPivot(pc,zero);
268: if (stype) { PCFactorSetMatSolverType(pc,stype); }
270: EPSSetConvergenceTest(ctx->eps,eps->conv);
271: EPSSetInterval(ctx->eps,a,b);
272: ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
273: ctx_local->npart = ctx->npart;
274: ctx_local->detect = ctx->detect;
275: ctx_local->global = PETSC_FALSE;
276: ctx_local->eps = eps;
277: ctx_local->subc = ctx->subc;
278: ctx_local->commrank = ctx->commrank;
280: EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
281: EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);
283: /* transfer options from eps->V */
284: EPSGetBV(ctx->eps,&V);
285: if (!eps->V) { EPSGetBV(eps,&eps->V); }
286: if (!((PetscObject)(eps->V))->type_name) {
287: BVSetType(V,BVSVEC);
288: } else {
289: BVGetType(eps->V,&type);
290: BVSetType(V,type);
291: }
292: BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
293: BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);
294: ctx->eps->which = eps->which;
295: ctx->eps->max_it = eps->max_it;
296: ctx->eps->tol = eps->tol;
297: ctx->eps->purify = eps->purify;
298: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
299: EPSSetProblemType(ctx->eps,eps->problem_type);
300: EPSSetUp(ctx->eps);
301: ctx->eps->nconv = 0;
302: ctx->eps->its = 0;
303: for (i=0;i<ctx->eps->ncv;i++) {
304: ctx->eps->eigr[i] = 0.0;
305: ctx->eps->eigi[i] = 0.0;
306: ctx->eps->errest[i] = 0.0;
307: }
308: return(0);
309: }
311: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
312: {
314: KSP ksp;
315: PC pc;
316: Mat F;
317: PetscReal nzshift;
320: if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
321: if (inertia) *inertia = eps->n;
322: } else if (shift <= PETSC_MIN_REAL) {
323: if (inertia) *inertia = 0;
324: if (zeros) *zeros = 0;
325: } else {
326: /* If the shift is zero, perturb it to a very small positive value.
327: The goal is that the nonzero pattern is the same in all cases and reuse
328: the symbolic factorizations */
329: nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
330: STSetShift(eps->st,nzshift);
331: STSetUp(eps->st);
332: STGetKSP(eps->st,&ksp);
333: KSPGetPC(ksp,&pc);
334: PCFactorGetMatrix(pc,&F);
335: MatGetInertia(F,inertia,zeros,NULL);
336: }
337: return(0);
338: }
340: /*
341: Dummy backtransform operation
342: */
343: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
344: {
346: return(0);
347: }
349: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
350: {
351: PetscErrorCode ierr;
352: PetscBool issinv;
353: EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
354: EPS_SR sr,sr_loc,sr_glob;
355: PetscInt nEigs,dssz=1,i,zeros=0,off=0;
356: PetscMPIInt nproc,rank=0,aux;
357: PetscReal r;
358: MPI_Request req;
359: Mat A,B=NULL;
360: SlepcSC sc;
361: PetscInt flg=0;
362: DSParallelType ptype;
365: if (ctx->global) {
366: if (eps->intb >= PETSC_MAX_REAL && eps->inta <= PETSC_MIN_REAL) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"The defined computational interval should have at least one of their sides bounded");
367: if (!eps->ishermitian) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Spectrum slicing only available for symmetric/Hermitian eigenproblems");
368: if (eps->arbitrary) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Arbitrary selection of eigenpairs cannot be used with spectrum slicing");
369: PetscObjectTypeCompareAny((PetscObject)eps->st,&issinv,STSINVERT,STCAYLEY,"");
370: if (!issinv) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_SUP,"Shift-and-invert or Cayley ST is needed for spectrum slicing");
371: if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL*1e-2; /* use tighter tolerance */
372: if (!eps->max_it) eps->max_it = 100;
373: if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n); /* nev not set, use default value */
374: if (eps->n>10 && ctx->nev<10) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONG,"nev cannot be less than 10 in spectrum slicing runs");
375: }
376: eps->ops->backtransform = EPSBackTransform_Skip;
378: /* create spectrum slicing context and initialize it */
379: EPSSliceResetSR(eps);
380: PetscNewLog(eps,&sr);
381: ctx->sr = sr;
382: sr->itsKs = 0;
383: sr->nleap = 0;
384: sr->nMAXCompl = eps->nev/4;
385: sr->iterCompl = eps->max_it/4;
386: sr->sPres = NULL;
387: sr->nS = 0;
389: if (ctx->npart==1 || ctx->global) {
390: /* check presence of ends and finding direction */
391: if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
392: sr->int0 = eps->inta;
393: sr->int1 = eps->intb;
394: sr->dir = 1;
395: if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
396: sr->hasEnd = PETSC_FALSE;
397: } else sr->hasEnd = PETSC_TRUE;
398: } else {
399: sr->int0 = eps->intb;
400: sr->int1 = eps->inta;
401: sr->dir = -1;
402: sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
403: }
404: }
405: if (ctx->global) {
406: /* prevent computation of factorization in global eps */
407: STSetTransform(eps->st,PETSC_FALSE);
408: EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
409: /* create subintervals and initialize auxiliary eps for slicing runs */
410: EPSSliceGetEPS(eps);
411: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
412: if (ctx->npart>1) {
413: if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
414: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
415: if (!rank) {
416: MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
417: }
418: MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
419: PetscFree(ctx->nconv_loc);
420: PetscMalloc1(ctx->npart,&ctx->nconv_loc);
421: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
422: if (sr->dir<0) off = 1;
423: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
424: PetscMPIIntCast(sr_loc->numEigs,&aux);
425: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
426: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
427: } else {
428: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
429: if (!rank) {
430: PetscMPIIntCast(sr_loc->numEigs,&aux);
431: MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
432: MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
433: }
434: PetscMPIIntCast(ctx->npart,&aux);
435: MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
436: MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
437: }
438: nEigs = 0;
439: for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
440: } else {
441: nEigs = sr_loc->numEigs;
442: sr->inertia0 = sr_loc->inertia0;
443: }
444: sr->inertia1 = sr->inertia0+sr->dir*nEigs;
445: sr->numEigs = nEigs;
446: eps->nev = nEigs;
447: eps->ncv = nEigs;
448: eps->mpd = nEigs;
449: } else {
450: ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
451: sr_glob = ctx_glob->sr;
452: if (ctx->npart>1) {
453: sr->dir = sr_glob->dir;
454: sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
455: sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
456: if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
457: else sr->hasEnd = PETSC_TRUE;
458: }
460: /* compute inertia0 */
461: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
462: PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&flg,NULL);
463: if (zeros) { /* error in factorization */
464: if (sr->int0==ctx->eps->inta || sr->int0==ctx->eps->intb) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in the interval endpoint");
465: else if(ctx_glob->subintset && !flg) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
466: else {
467: if (flg==1) { /* idle subgroup */
468: sr->inertia0 = -1;
469: } else { /* perturb shift */
470: sr->int0 *= (1.0+SLICE_PTOL);
471: EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
472: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",sr->int1);
473: }
474: }
475: }
476: if (ctx->npart>1) {
477: /* inertia1 is received from neighbour */
478: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
479: if (!rank) {
480: if ( sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1)) ) { /* send inertia0 to neighbour0 */
481: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
482: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
483: }
484: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
485: MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
486: MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
487: }
488: if ( sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
489: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
490: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
491: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
492: }
493: }
494: if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
495: MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
496: MPI_Bcast(&sr->int1,1,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
497: } else sr_glob->inertia1 = sr->inertia1;
498: }
500: /* last process in eps comm computes inertia1 */
501: if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
502: EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
503: if (zeros) SETERRQ(((PetscObject)eps)->comm,PETSC_ERR_USER,"Found singular matrix for the transformed problem in an interval endpoint defined by user");
504: if (!rank && sr->inertia0==-1) {
505: sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
506: MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
507: MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
508: }
509: if (sr->hasEnd) {
510: sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
511: i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
512: }
513: }
515: /* number of eigenvalues in interval */
516: sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
517: if (ctx->npart>1) {
518: /* memory allocate for subinterval eigenpairs */
519: EPSSliceAllocateSolution(eps,1);
520: }
521: dssz = eps->ncv+1;
522: if (sr->numEigs>0) {
523: DSGetSlepcSC(eps->ds,&sc);
524: sc->rg = NULL;
525: sc->comparison = SlepcCompareLargestMagnitude;
526: sc->comparisonctx = NULL;
527: sc->map = NULL;
528: sc->mapobj = NULL;
529: }
530: DSGetParallel(ctx->eps->ds,&ptype);
531: DSSetParallel(eps->ds,ptype);
532: }
533: DSSetType(eps->ds,DSHEP);
534: DSSetCompact(eps->ds,PETSC_TRUE);
535: DSAllocate(eps->ds,dssz);
536: /* keep state of subcomm matrices to check that the user does not modify them */
537: EPSGetOperators(eps,&A,&B);
538: PetscObjectStateGet((PetscObject)A,&ctx->Astate);
539: PetscObjectGetId((PetscObject)A,&ctx->Aid);
540: if (B) {
541: PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
542: PetscObjectGetId((PetscObject)B,&ctx->Bid);
543: } else {
544: ctx->Bstate=0;
545: ctx->Bid=0;
546: }
547: return(0);
548: }
550: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
551: {
552: PetscErrorCode ierr;
553: Vec v,vg,v_loc;
554: IS is1,is2;
555: VecScatter vec_sc;
556: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
557: PetscInt nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
558: PetscScalar *array;
559: EPS_SR sr_loc;
560: BV V_loc;
563: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
564: V_loc = sr_loc->V;
566: /* Gather parallel eigenvectors */
567: BVGetColumn(eps->V,0,&v);
568: VecGetOwnershipRange(v,&n0,&m0);
569: BVRestoreColumn(eps->V,0,&v);
570: BVGetColumn(ctx->eps->V,0,&v);
571: VecGetLocalSize(v,&nloc);
572: BVRestoreColumn(ctx->eps->V,0,&v);
573: PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
574: VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
575: idx = -1;
576: for (si=0;si<ctx->npart;si++) {
577: j = 0;
578: for (i=n0;i<m0;i++) {
579: idx1[j] = i;
580: idx2[j++] = i+eps->n*si;
581: }
582: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
583: ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
584: BVGetColumn(eps->V,0,&v);
585: VecScatterCreate(v,is1,vg,is2,&vec_sc);
586: BVRestoreColumn(eps->V,0,&v);
587: ISDestroy(&is1);
588: ISDestroy(&is2);
589: for (i=0;i<ctx->nconv_loc[si];i++) {
590: BVGetColumn(eps->V,++idx,&v);
591: if (ctx->subc->color==si) {
592: BVGetColumn(V_loc,i,&v_loc);
593: VecGetArray(v_loc,&array);
594: VecPlaceArray(vg,array);
595: }
596: VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
597: VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
598: if (ctx->subc->color==si) {
599: VecResetArray(vg);
600: VecRestoreArray(v_loc,&array);
601: BVRestoreColumn(V_loc,i,&v_loc);
602: }
603: BVRestoreColumn(eps->V,idx,&v);
604: }
605: VecScatterDestroy(&vec_sc);
606: }
607: PetscFree2(idx1,idx2);
608: VecDestroy(&vg);
609: return(0);
610: }
612: /*
613: EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
614: */
615: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
616: {
617: PetscErrorCode ierr;
618: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
621: if (ctx->global && ctx->npart>1) {
622: EPSComputeVectors(ctx->eps);
623: EPSSliceGatherEigenVectors(eps);
624: }
625: return(0);
626: }
628: #define SWAP(a,b,t) {t=a;a=b;b=t;}
630: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
631: {
632: PetscErrorCode ierr;
633: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
634: PetscInt i=0,j,tmpi;
635: PetscReal v,tmpr;
636: EPS_shift s;
639: if (!eps->state) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Must call EPSSetUp() first");
640: if (!ctx->sr) SETERRQ(PetscObjectComm((PetscObject)eps),PETSC_ERR_ARG_WRONGSTATE,"Only available in interval computations, see EPSSetInterval()");
641: if (!ctx->sr->s0) { /* EPSSolve not called yet */
642: *n = 2;
643: } else {
644: *n = 1;
645: s = ctx->sr->s0;
646: while (s) {
647: (*n)++;
648: s = s->neighb[1];
649: }
650: }
651: PetscMalloc1(*n,shifts);
652: PetscMalloc1(*n,inertias);
653: if (!ctx->sr->s0) { /* EPSSolve not called yet */
654: (*shifts)[0] = ctx->sr->int0;
655: (*shifts)[1] = ctx->sr->int1;
656: (*inertias)[0] = ctx->sr->inertia0;
657: (*inertias)[1] = ctx->sr->inertia1;
658: } else {
659: s = ctx->sr->s0;
660: while (s) {
661: (*shifts)[i] = s->value;
662: (*inertias)[i++] = s->inertia;
663: s = s->neighb[1];
664: }
665: (*shifts)[i] = ctx->sr->int1;
666: (*inertias)[i] = ctx->sr->inertia1;
667: }
668: /* remove possible duplicate in last position */
669: if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
670: /* sort result */
671: for (i=0;i<*n;i++) {
672: v = (*shifts)[i];
673: for (j=i+1;j<*n;j++) {
674: if (v > (*shifts)[j]) {
675: SWAP((*shifts)[i],(*shifts)[j],tmpr);
676: SWAP((*inertias)[i],(*inertias)[j],tmpi);
677: v = (*shifts)[i];
678: }
679: }
680: }
681: return(0);
682: }
684: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
685: {
686: PetscErrorCode ierr;
687: PetscMPIInt rank,nproc;
688: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
689: PetscInt i,idx,j;
690: PetscInt *perm_loc,off=0,*inertias_loc,ns;
691: PetscScalar *eigr_loc;
692: EPS_SR sr_loc;
693: PetscReal *shifts_loc;
694: PetscMPIInt *disp,*ns_loc,aux;
697: eps->nconv = 0;
698: for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
699: sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
701: /* Gather the shifts used and the inertias computed */
702: EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
703: if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
704: if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
705: ns--;
706: for (i=0;i<ns;i++) {
707: inertias_loc[i] = inertias_loc[i+1];
708: shifts_loc[i] = shifts_loc[i+1];
709: }
710: }
711: PetscMalloc1(ctx->npart,&ns_loc);
712: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
713: PetscMPIIntCast(ns,&aux);
714: if (!rank) { MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank); }
715: PetscMPIIntCast(ctx->npart,&aux);
716: MPI_Bcast(ns_loc,aux,MPI_INT,0,PetscSubcommChild(ctx->subc));
717: ctx->nshifts = 0;
718: for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
719: PetscFree(ctx->inertias);
720: PetscFree(ctx->shifts);
721: PetscMalloc1(ctx->nshifts,&ctx->inertias);
722: PetscMalloc1(ctx->nshifts,&ctx->shifts);
724: /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
725: eigr_loc = sr_loc->eigr;
726: perm_loc = sr_loc->perm;
727: MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
728: PetscMalloc1(ctx->npart,&disp);
729: disp[0] = 0;
730: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
731: if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
732: PetscMPIIntCast(sr_loc->numEigs,&aux);
733: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
734: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
735: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
736: PetscMPIIntCast(ns,&aux);
737: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
738: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
739: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
740: } else { /* subcommunicators with different size */
741: MPI_Comm_rank(PetscSubcommChild(ctx->subc),&rank);
742: if (!rank) {
743: PetscMPIIntCast(sr_loc->numEigs,&aux);
744: MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
745: MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
746: for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
747: PetscMPIIntCast(ns,&aux);
748: MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
749: MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
750: MPI_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
751: }
752: PetscMPIIntCast(eps->nconv,&aux);
753: MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,PetscSubcommChild(ctx->subc));
754: MPI_Bcast(eps->perm,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
755: MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,PetscSubcommChild(ctx->subc));
756: PetscMPIIntCast(ctx->nshifts,&aux);
757: MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,PetscSubcommChild(ctx->subc));
758: MPI_Bcast(&eps->its,1,MPIU_INT,0,PetscSubcommChild(ctx->subc));
759: }
760: /* Update global array eps->perm */
761: idx = ctx->nconv_loc[0];
762: for (i=1;i<ctx->npart;i++) {
763: off += ctx->nconv_loc[i-1];
764: for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
765: }
767: /* Gather parallel eigenvectors */
768: PetscFree(ns_loc);
769: PetscFree(disp);
770: PetscFree(shifts_loc);
771: PetscFree(inertias_loc);
772: return(0);
773: }
775: /*
776: Fills the fields of a shift structure
777: */
778: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
779: {
780: PetscErrorCode ierr;
781: EPS_shift s,*pending2;
782: PetscInt i;
783: EPS_SR sr;
784: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
787: sr = ctx->sr;
788: PetscNewLog(eps,&s);
789: s->value = val;
790: s->neighb[0] = neighb0;
791: if (neighb0) neighb0->neighb[1] = s;
792: s->neighb[1] = neighb1;
793: if (neighb1) neighb1->neighb[0] = s;
794: s->comp[0] = PETSC_FALSE;
795: s->comp[1] = PETSC_FALSE;
796: s->index = -1;
797: s->neigs = 0;
798: s->nconv[0] = s->nconv[1] = 0;
799: s->nsch[0] = s->nsch[1]=0;
800: /* Inserts in the stack of pending shifts */
801: /* If needed, the array is resized */
802: if (sr->nPend >= sr->maxPend) {
803: sr->maxPend *= 2;
804: PetscMalloc1(sr->maxPend,&pending2);
805: PetscLogObjectMemory((PetscObject)eps,sizeof(EPS_shift));
806: for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
807: PetscFree(sr->pending);
808: sr->pending = pending2;
809: }
810: sr->pending[sr->nPend++]=s;
811: return(0);
812: }
814: /* Prepare for Rational Krylov update */
815: static PetscErrorCode EPSPrepareRational(EPS eps)
816: {
817: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
818: PetscErrorCode ierr;
819: PetscInt dir,i,k,ld,nv;
820: PetscScalar *A;
821: EPS_SR sr = ctx->sr;
822: Vec v;
825: DSGetLeadingDimension(eps->ds,&ld);
826: dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
827: dir*=sr->dir;
828: k = 0;
829: for (i=0;i<sr->nS;i++) {
830: if (dir*PetscRealPart(sr->S[i])>0.0) {
831: sr->S[k] = sr->S[i];
832: sr->S[sr->nS+k] = sr->S[sr->nS+i];
833: BVGetColumn(sr->Vnext,k,&v);
834: BVCopyVec(eps->V,eps->nconv+i,v);
835: BVRestoreColumn(sr->Vnext,k,&v);
836: k++;
837: if (k>=sr->nS/2)break;
838: }
839: }
840: /* Copy to DS */
841: DSGetArray(eps->ds,DS_MAT_A,&A);
842: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
843: for (i=0;i<k;i++) {
844: A[i*(1+ld)] = sr->S[i];
845: A[k+i*ld] = sr->S[sr->nS+i];
846: }
847: sr->nS = k;
848: DSRestoreArray(eps->ds,DS_MAT_A,&A);
849: DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL,NULL);
850: DSSetDimensions(eps->ds,nv,0,0,k);
851: /* Append u to V */
852: BVGetColumn(sr->Vnext,sr->nS,&v);
853: BVCopyVec(eps->V,sr->nv,v);
854: BVRestoreColumn(sr->Vnext,sr->nS,&v);
855: return(0);
856: }
858: /* Provides next shift to be computed */
859: static PetscErrorCode EPSExtractShift(EPS eps)
860: {
861: PetscErrorCode ierr;
862: PetscInt iner,zeros=0;
863: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
864: EPS_SR sr;
865: PetscReal newShift;
866: EPS_shift sPres;
869: sr = ctx->sr;
870: if (sr->nPend > 0) {
871: sr->sPrev = sr->sPres;
872: sr->sPres = sr->pending[--sr->nPend];
873: sPres = sr->sPres;
874: EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
875: if (zeros) {
876: newShift = sPres->value*(1.0+SLICE_PTOL);
877: if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
878: else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
879: EPSSliceGetInertia(eps,newShift,&iner,&zeros);
880: if (zeros) SETERRQ1(((PetscObject)eps)->comm,PETSC_ERR_CONV_FAILED,"Inertia computation fails in %g",newShift);
881: sPres->value = newShift;
882: }
883: sr->sPres->inertia = iner;
884: eps->target = sr->sPres->value;
885: eps->reason = EPS_CONVERGED_ITERATING;
886: eps->its = 0;
887: } else sr->sPres = NULL;
888: return(0);
889: }
891: /*
892: Symmetric KrylovSchur adapted to spectrum slicing:
893: Allows searching an specific amount of eigenvalues in the subintervals left and right.
894: Returns whether the search has succeeded
895: */
896: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
897: {
898: PetscErrorCode ierr;
899: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
900: PetscInt i,k,l,ld,nv,*iwork,j;
901: Mat U;
902: PetscScalar *Q,*A;
903: PetscReal *a,*b,beta;
904: PetscBool breakdown;
905: PetscInt count0,count1;
906: PetscReal lambda;
907: EPS_shift sPres;
908: PetscBool complIterating;
909: PetscBool sch0,sch1;
910: PetscInt iterCompl=0,n0,n1;
911: EPS_SR sr = ctx->sr;
914: /* Spectrum slicing data */
915: sPres = sr->sPres;
916: complIterating =PETSC_FALSE;
917: sch1 = sch0 = PETSC_TRUE;
918: DSGetLeadingDimension(eps->ds,&ld);
919: PetscMalloc1(2*ld,&iwork);
920: count0=0;count1=0; /* Found on both sides */
921: if (sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
922: /* Rational Krylov */
923: DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
924: DSGetDimensions(eps->ds,NULL,NULL,NULL,&l,NULL);
925: DSSetDimensions(eps->ds,l+1,0,0,0);
926: BVSetActiveColumns(eps->V,0,l+1);
927: DSGetMat(eps->ds,DS_MAT_Q,&U);
928: BVMultInPlace(eps->V,U,0,l+1);
929: MatDestroy(&U);
930: } else {
931: /* Get the starting Lanczos vector */
932: EPSGetStartVector(eps,0,NULL);
933: l = 0;
934: }
935: /* Restart loop */
936: while (eps->reason == EPS_CONVERGED_ITERATING) {
937: eps->its++; sr->itsKs++;
938: /* Compute an nv-step Lanczos factorization */
939: nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
940: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
941: b = a + ld;
942: EPSFullLanczos(eps,a,b,eps->nconv+l,&nv,&breakdown);
943: sr->nv = nv;
944: beta = b[nv-1];
945: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
946: DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
947: if (l==0) {
948: DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
949: } else {
950: DSSetState(eps->ds,DS_STATE_RAW);
951: }
952: BVSetActiveColumns(eps->V,eps->nconv,nv);
954: /* Solve projected problem and compute residual norm estimates */
955: if (eps->its == 1 && l > 0) {/* After rational update */
956: DSGetArray(eps->ds,DS_MAT_A,&A);
957: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
958: b = a + ld;
959: k = eps->nconv+l;
960: A[k*ld+k-1] = A[(k-1)*ld+k];
961: A[k*ld+k] = a[k];
962: for (j=k+1; j< nv; j++) {
963: A[j*ld+j] = a[j];
964: A[j*ld+j-1] = b[j-1] ;
965: A[(j-1)*ld+j] = b[j-1];
966: }
967: DSRestoreArray(eps->ds,DS_MAT_A,&A);
968: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
969: DSSolve(eps->ds,eps->eigr,NULL);
970: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
971: DSSetCompact(eps->ds,PETSC_TRUE);
972: } else { /* Restart */
973: DSSolve(eps->ds,eps->eigr,NULL);
974: DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
975: }
976: DSSynchronize(eps->ds,eps->eigr,NULL);
978: /* Residual */
979: EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,1.0,&k);
980: /* Checking values obtained for completing */
981: for (i=0;i<k;i++) {
982: sr->back[i]=eps->eigr[i];
983: }
984: STBackTransform(eps->st,k,sr->back,eps->eigi);
985: count0=count1=0;
986: for (i=0;i<k;i++) {
987: lambda = PetscRealPart(sr->back[i]);
988: if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
989: if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
990: }
991: if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
992: else {
993: /* Checks completion */
994: if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
995: eps->reason = EPS_CONVERGED_TOL;
996: } else {
997: if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
998: if (complIterating) {
999: if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
1000: } else if (k >= eps->nev) {
1001: n0 = sPres->nsch[0]-count0;
1002: n1 = sPres->nsch[1]-count1;
1003: if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
1004: /* Iterating for completion*/
1005: complIterating = PETSC_TRUE;
1006: if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
1007: if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
1008: iterCompl = sr->iterCompl;
1009: } else eps->reason = EPS_CONVERGED_TOL;
1010: }
1011: }
1012: }
1013: /* Update l */
1014: if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
1015: else l = nv-k;
1016: if (breakdown) l=0;
1017: if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */
1019: if (eps->reason == EPS_CONVERGED_ITERATING) {
1020: if (breakdown) {
1021: /* Start a new Lanczos factorization */
1022: PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
1023: EPSGetStartVector(eps,k,&breakdown);
1024: if (breakdown) {
1025: eps->reason = EPS_DIVERGED_BREAKDOWN;
1026: PetscInfo(eps,"Unable to generate more start vectors\n");
1027: }
1028: } else {
1029: /* Prepare the Rayleigh quotient for restart */
1030: DSGetArrayReal(eps->ds,DS_MAT_T,&a);
1031: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1032: b = a + ld;
1033: for (i=k;i<k+l;i++) {
1034: a[i] = PetscRealPart(eps->eigr[i]);
1035: b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
1036: }
1037: DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
1038: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1039: }
1040: }
1041: /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
1042: DSGetMat(eps->ds,DS_MAT_Q,&U);
1043: BVMultInPlace(eps->V,U,eps->nconv,k+l);
1044: MatDestroy(&U);
1046: /* Normalize u and append it to V */
1047: if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
1048: BVCopyColumn(eps->V,nv,k+l);
1049: }
1050: eps->nconv = k;
1051: if (eps->reason != EPS_CONVERGED_ITERATING) {
1052: /* Store approximated values for next shift */
1053: DSGetArray(eps->ds,DS_MAT_Q,&Q);
1054: sr->nS = l;
1055: for (i=0;i<l;i++) {
1056: sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
1057: sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
1058: }
1059: DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
1060: }
1061: }
1062: /* Check for completion */
1063: for (i=0;i< eps->nconv; i++) {
1064: if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
1065: else sPres->nconv[0]++;
1066: }
1067: sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
1068: sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
1069: if (count0 > sPres->nsch[0] || count1 > sPres->nsch[1]) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1070: PetscFree(iwork);
1071: return(0);
1072: }
1074: /*
1075: Obtains value of subsequent shift
1076: */
1077: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
1078: {
1079: PetscReal lambda,d_prev;
1080: PetscInt i,idxP;
1081: EPS_SR sr;
1082: EPS_shift sPres,s;
1083: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1086: sr = ctx->sr;
1087: sPres = sr->sPres;
1088: if (sPres->neighb[side]) {
1089: /* Completing a previous interval */
1090: if (!sPres->neighb[side]->neighb[side] && sPres->neighb[side]->nconv[side]==0) { /* One of the ends might be too far from eigenvalues */
1091: if (side) *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[sr->indexEig-1]]))/2;
1092: else *newS = (sPres->value + PetscRealPart(sr->eigr[sr->perm[0]]))/2;
1093: } else *newS=(sPres->value + sPres->neighb[side]->value)/2;
1094: } else { /* (Only for side=1). Creating a new interval. */
1095: if (sPres->neigs==0) {/* No value has been accepted*/
1096: if (sPres->neighb[0]) {
1097: /* Multiplying by 10 the previous distance */
1098: *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1099: sr->nleap++;
1100: /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1101: if (!sr->hasEnd && sr->nleap > 5) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unable to compute the wanted eigenvalues with open interval");
1102: } else { /* First shift */
1103: if (eps->nconv != 0) {
1104: /* Unaccepted values give information for next shift */
1105: idxP=0;/* Number of values left from shift */
1106: for (i=0;i<eps->nconv;i++) {
1107: lambda = PetscRealPart(eps->eigr[i]);
1108: if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1109: else break;
1110: }
1111: /* Avoiding subtraction of eigenvalues (might be the same).*/
1112: if (idxP>0) {
1113: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1114: } else {
1115: d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1116: }
1117: *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1118: } else { /* No values found, no information for next shift */
1119: SETERRQ(PetscObjectComm((PetscObject)eps),1,"First shift renders no information");
1120: }
1121: }
1122: } else { /* Accepted values found */
1123: sr->nleap = 0;
1124: /* Average distance of values in previous subinterval */
1125: s = sPres->neighb[0];
1126: while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1127: s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1128: }
1129: if (s) {
1130: d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1131: } else { /* First shift. Average distance obtained with values in this shift */
1132: /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1133: if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1134: d_prev = PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1135: } else {
1136: d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1137: }
1138: }
1139: /* Average distance is used for next shift by adding it to value on the right or to shift */
1140: if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1141: *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1142: } else { /* Last accepted value is on the left of shift. Adding to shift */
1143: *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1144: }
1145: }
1146: /* End of interval can not be surpassed */
1147: if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1148: }/* of neighb[side]==null */
1149: return(0);
1150: }
1152: /*
1153: Function for sorting an array of real values
1154: */
1155: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1156: {
1157: PetscReal re;
1158: PetscInt i,j,tmp;
1161: if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1162: /* Insertion sort */
1163: for (i=1;i<nr;i++) {
1164: re = PetscRealPart(r[perm[i]]);
1165: j = i-1;
1166: while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1167: tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1168: }
1169: }
1170: return(0);
1171: }
1173: /* Stores the pairs obtained since the last shift in the global arrays */
1174: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1175: {
1176: PetscErrorCode ierr;
1177: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1178: PetscReal lambda,err,norm;
1179: PetscInt i,count;
1180: PetscBool iscayley;
1181: EPS_SR sr = ctx->sr;
1182: EPS_shift sPres;
1183: Vec v,w;
1186: sPres = sr->sPres;
1187: sPres->index = sr->indexEig;
1188: count = sr->indexEig;
1189: /* Back-transform */
1190: STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1191: PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1192: /* Sort eigenvalues */
1193: sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1194: /* Values stored in global array */
1195: for (i=0;i<eps->nconv;i++) {
1196: lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1197: err = eps->errest[eps->perm[i]];
1199: if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1200: if (count>=sr->numEigs) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Unexpected error in Spectrum Slicing");
1201: sr->eigr[count] = lambda;
1202: sr->errest[count] = err;
1203: /* Explicit purification */
1204: BVGetColumn(eps->V,eps->perm[i],&w);
1205: if (eps->purify) {
1206: BVGetColumn(sr->V,count,&v);
1207: STApply(eps->st,w,v);
1208: BVRestoreColumn(sr->V,count,&v);
1209: } else {
1210: BVInsertVec(sr->V,count,w);
1211: }
1212: BVRestoreColumn(eps->V,eps->perm[i],&w);
1213: BVNormColumn(sr->V,count,NORM_2,&norm);
1214: BVScaleColumn(sr->V,count,1.0/norm);
1215: count++;
1216: }
1217: }
1218: sPres->neigs = count - sr->indexEig;
1219: sr->indexEig = count;
1220: /* Global ordering array updating */
1221: sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1222: return(0);
1223: }
1225: static PetscErrorCode EPSLookForDeflation(EPS eps)
1226: {
1227: PetscErrorCode ierr;
1228: PetscReal val;
1229: PetscInt i,count0=0,count1=0;
1230: EPS_shift sPres;
1231: PetscInt ini,fin,k,idx0,idx1;
1232: EPS_SR sr;
1233: Vec v;
1234: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1237: sr = ctx->sr;
1238: sPres = sr->sPres;
1240: if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1241: else ini = 0;
1242: fin = sr->indexEig;
1243: /* Selection of ends for searching new values */
1244: if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1245: else sPres->ext[0] = sPres->neighb[0]->value;
1246: if (!sPres->neighb[1]) {
1247: if (sr->hasEnd) sPres->ext[1] = sr->int1;
1248: else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1249: } else sPres->ext[1] = sPres->neighb[1]->value;
1250: /* Selection of values between right and left ends */
1251: for (i=ini;i<fin;i++) {
1252: val=PetscRealPart(sr->eigr[sr->perm[i]]);
1253: /* Values to the right of left shift */
1254: if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1255: if ((sr->dir)*(val - sPres->value) < 0) count0++;
1256: else count1++;
1257: } else break;
1258: }
1259: /* The number of values on each side are found */
1260: if (sPres->neighb[0]) {
1261: sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1262: if (sPres->nsch[0]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1263: } else sPres->nsch[0] = 0;
1265: if (sPres->neighb[1]) {
1266: sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1267: if (sPres->nsch[1]<0) SETERRQ(PetscObjectComm((PetscObject)eps),1,"Mismatch between number of values found and information from inertia, consider using EPSKrylovSchurSetDetectZeros()");
1268: } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);
1270: /* Completing vector of indexes for deflation */
1271: idx0 = ini;
1272: idx1 = ini+count0+count1;
1273: k=0;
1274: for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1275: BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1276: BVSetNumConstraints(sr->Vnext,k);
1277: for (i=0;i<k;i++) {
1278: BVGetColumn(sr->Vnext,-i-1,&v);
1279: BVCopyVec(sr->V,sr->idxDef[i],v);
1280: BVRestoreColumn(sr->Vnext,-i-1,&v);
1281: }
1283: /* For rational Krylov */
1284: if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) {
1285: EPSPrepareRational(eps);
1286: }
1287: eps->nconv = 0;
1288: /* Get rid of temporary Vnext */
1289: BVDestroy(&eps->V);
1290: eps->V = sr->Vnext;
1291: sr->Vnext = NULL;
1292: return(0);
1293: }
1295: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1296: {
1297: PetscErrorCode ierr;
1298: PetscInt i,lds,ti;
1299: PetscReal newS;
1300: EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1301: EPS_SR sr=ctx->sr;
1302: Mat A,B=NULL;
1303: PetscObjectState Astate,Bstate=0;
1304: PetscObjectId Aid,Bid=0;
1307: PetscCitationsRegister(citation,&cited);
1308: if (ctx->global) {
1309: EPSSolve_KrylovSchur_Slice(ctx->eps);
1310: ctx->eps->state = EPS_STATE_SOLVED;
1311: eps->reason = EPS_CONVERGED_TOL;
1312: if (ctx->npart>1) {
1313: /* Gather solution from subsolvers */
1314: EPSSliceGatherSolution(eps);
1315: } else {
1316: eps->nconv = sr->numEigs;
1317: eps->its = ctx->eps->its;
1318: PetscFree(ctx->inertias);
1319: PetscFree(ctx->shifts);
1320: EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1321: }
1322: } else {
1323: if (ctx->npart==1) {
1324: sr->eigr = ctx->eps->eigr;
1325: sr->eigi = ctx->eps->eigi;
1326: sr->perm = ctx->eps->perm;
1327: sr->errest = ctx->eps->errest;
1328: sr->V = ctx->eps->V;
1329: }
1330: /* Check that the user did not modify subcomm matrices */
1331: EPSGetOperators(eps,&A,&B);
1332: PetscObjectStateGet((PetscObject)A,&Astate);
1333: PetscObjectGetId((PetscObject)A,&Aid);
1334: if (B) {
1335: PetscObjectStateGet((PetscObject)B,&Bstate);
1336: PetscObjectGetId((PetscObject)B,&Bid);
1337: }
1338: if (Astate!=ctx->Astate || (B && Bstate!=ctx->Bstate) || Aid!=ctx->Aid || (B && Bid!=ctx->Bid)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Subcomm matrices have been modified by user");
1339: /* Only with eigenvalues present in the interval ...*/
1340: if (sr->numEigs==0) {
1341: eps->reason = EPS_CONVERGED_TOL;
1342: return(0);
1343: }
1344: /* Array of pending shifts */
1345: sr->maxPend = 100; /* Initial size */
1346: sr->nPend = 0;
1347: PetscMalloc1(sr->maxPend,&sr->pending);
1348: PetscLogObjectMemory((PetscObject)eps,(sr->maxPend)*sizeof(EPS_shift));
1349: EPSCreateShift(eps,sr->int0,NULL,NULL);
1350: /* extract first shift */
1351: sr->sPrev = NULL;
1352: sr->sPres = sr->pending[--sr->nPend];
1353: sr->sPres->inertia = sr->inertia0;
1354: eps->target = sr->sPres->value;
1355: sr->s0 = sr->sPres;
1356: sr->indexEig = 0;
1357: /* Memory reservation for auxiliary variables */
1358: lds = PetscMin(eps->mpd,eps->ncv);
1359: PetscCalloc1(lds*lds,&sr->S);
1360: PetscMalloc1(eps->ncv,&sr->back);
1361: PetscLogObjectMemory((PetscObject)eps,(sr->numEigs+2*eps->ncv)*sizeof(PetscScalar));
1362: for (i=0;i<sr->numEigs;i++) {
1363: sr->eigr[i] = 0.0;
1364: sr->eigi[i] = 0.0;
1365: sr->errest[i] = 0.0;
1366: sr->perm[i] = i;
1367: }
1368: /* Vectors for deflation */
1369: PetscMalloc1(sr->numEigs,&sr->idxDef);
1370: PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1371: sr->indexEig = 0;
1372: /* Main loop */
1373: while (sr->sPres) {
1374: /* Search for deflation */
1375: EPSLookForDeflation(eps);
1376: /* KrylovSchur */
1377: EPSKrylovSchur_Slice(eps);
1379: EPSStoreEigenpairs(eps);
1380: /* Select new shift */
1381: if (!sr->sPres->comp[1]) {
1382: EPSGetNewShiftValue(eps,1,&newS);
1383: EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1384: }
1385: if (!sr->sPres->comp[0]) {
1386: /* Completing earlier interval */
1387: EPSGetNewShiftValue(eps,0,&newS);
1388: EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1389: }
1390: /* Preparing for a new search of values */
1391: EPSExtractShift(eps);
1392: }
1394: /* Updating eps values prior to exit */
1395: PetscFree(sr->S);
1396: PetscFree(sr->idxDef);
1397: PetscFree(sr->pending);
1398: PetscFree(sr->back);
1399: BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1400: BVSetNumConstraints(sr->Vnext,0);
1401: BVDestroy(&eps->V);
1402: eps->V = sr->Vnext;
1403: eps->nconv = sr->indexEig;
1404: eps->reason = EPS_CONVERGED_TOL;
1405: eps->its = sr->itsKs;
1406: eps->nds = 0;
1407: if (sr->dir<0) {
1408: for (i=0;i<eps->nconv/2;i++) {
1409: ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1410: }
1411: }
1412: }
1413: return(0);
1414: }