Actual source code: lmedense.c
slepc-3.14.2 2021-02-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Routines for solving dense matrix equations, in some cases calling SLICOT
12: */
14: #include <slepc/private/lmeimpl.h>
15: #include <slepcblaslapack.h>
17: /*
18: LMEDenseRankSVD - given a square matrix A, compute its SVD U*S*V', and determine the
19: numerical rank. On exit, U contains U*S and A is overwritten with V'
20: */
21: PetscErrorCode LMEDenseRankSVD(LME lme,PetscInt n,PetscScalar *A,PetscInt lda,PetscScalar *U,PetscInt ldu,PetscInt *rank)
22: {
24: PetscInt i,j,rk=0;
25: PetscScalar *work;
26: PetscReal tol,*sg,*rwork;
27: PetscBLASInt n_,lda_,ldu_,info,lw_;
30: PetscCalloc3(n,&sg,10*n,&work,5*n,&rwork);
31: PetscBLASIntCast(n,&n_);
32: PetscBLASIntCast(lda,&lda_);
33: PetscBLASIntCast(ldu,&ldu_);
34: lw_ = 10*n_;
35: #if !defined (PETSC_USE_COMPLEX)
36: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,&info));
37: #else
38: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,rwork,&info));
39: #endif
40: SlepcCheckLapackInfo("gesvd",info);
41: tol = 10*PETSC_MACHINE_EPSILON*n*sg[0];
42: for (j=0;j<n;j++) {
43: if (sg[j]>tol) {
44: for (i=0;i<n;i++) U[i+j*n] *= sg[j];
45: rk++;
46: } else break;
47: }
48: *rank = rk;
49: PetscFree3(sg,work,rwork);
50: return(0);
51: }
53: #if defined(PETSC_USE_INFO)
54: /*
55: LyapunovCholResidual - compute the residual norm ||A*U'*U+U'*U*A'+B*B'||
56: */
57: static PetscErrorCode LyapunovCholResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
58: {
60: PetscBLASInt n,kk,la,lb,lu;
61: PetscScalar *M,*R,zero=0.0,done=1.0;
64: *res = 0;
65: PetscBLASIntCast(lda,&la);
66: PetscBLASIntCast(ldb,&lb);
67: PetscBLASIntCast(ldu,&lu);
68: PetscBLASIntCast(m,&n);
69: PetscBLASIntCast(k,&kk);
70: PetscMalloc2(m*m,&M,m*m,&R);
72: /* R = B*B' */
73: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&done,B,&lb,B,&lb,&zero,R,&n));
74: /* M = A*U' */
75: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,A,&la,U,&lu,&zero,M,&n));
76: /* R = R+M*U */
77: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,M,&n,U,&lu,&done,R,&n));
78: /* R = R+U'*M' */
79: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","C",&n,&n,&n,&done,U,&lu,M,&n,&done,R,&n));
81: *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
82: PetscFree2(M,R);
83: return(0);
84: }
86: /*
87: LyapunovResidual - compute the residual norm ||A*X+X*A'+B||
88: */
89: static PetscErrorCode LyapunovResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx,PetscReal *res)
90: {
92: PetscInt i;
93: PetscBLASInt n,la,lb,lx;
94: PetscScalar *R,done=1.0;
97: *res = 0;
98: PetscBLASIntCast(lda,&la);
99: PetscBLASIntCast(ldb,&lb);
100: PetscBLASIntCast(ldx,&lx);
101: PetscBLASIntCast(m,&n);
102: PetscMalloc1(m*m,&R);
104: /* R = B+A*X */
105: for (i=0;i<m;i++) {
106: PetscArraycpy(R+i*m,B+i*ldb,m);
107: }
108: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,A,&la,X,&lx,&done,R,&n));
109: /* R = R+X*A' */
110: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,X,&lx,A,&la,&done,R,&n));
112: *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
113: PetscFree(R);
114: return(0);
115: }
116: #endif
118: #if defined(SLEPC_HAVE_SLICOT)
119: /*
120: HessLyapunovChol_SLICOT - implementation used when SLICOT is available
121: */
122: static PetscErrorCode HessLyapunovChol_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
123: {
125: PetscBLASInt lwork,info,n,kk,lu,ione=1,sdim;
126: PetscInt i,j;
127: PetscReal scal;
128: PetscScalar *Q,*W,*wr,*wi,*work;
131: PetscBLASIntCast(ldu,&lu);
132: PetscBLASIntCast(m,&n);
133: PetscBLASIntCast(k,&kk);
134: PetscBLASIntCast(6*m,&lwork);
136: /* transpose W = H' */
137: PetscMalloc5(m*m,&W,m*m,&Q,m,&wr,m,&wi,lwork,&work);
138: for (j=0;j<m;j++) {
139: for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
140: }
142: /* compute the real Schur form of W */
143: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
144: SlepcCheckLapackInfo("gees",info);
145: #if defined(PETSC_USE_DEBUG)
146: for (i=0;i<m;i++) if (PetscRealPart(wr[i])>=0.0) SETERRQ(PETSC_COMM_SELF,1,"Eigenvalue with non-negative real part, the coefficient matrix is not stable");
147: #endif
149: /* copy B' into first rows of U */
150: for (i=0;i<k;i++) {
151: for (j=0;j<m;j++) U[i+j*ldu] = B[j+i*ldb];
152: }
154: /* solve Lyapunov equation (Hammarling) */
155: PetscStackCallBLAS("SLICOTsb03od",SLICOTsb03od_("C","F","N",&n,&kk,W,&n,Q,&n,U,&lu,&scal,wr,wi,work,&lwork,&info));
156: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SLICOT subroutine SB03OD: info=%d",(int)info);
157: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
159: /* resnorm = norm(H(m+1,:)*U'*U), use Q(:,1) = U'*U(:,m) */
160: if (res) {
161: for (j=0;j<m;j++) Q[j] = U[j+(m-1)*ldu];
162: PetscStackCallBLAS("BLAStrmv",BLAStrmv_("U","C","N",&n,U,&lu,Q,&ione));
163: if (k!=1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Residual error is intended for k=1 only, but you set k=%d",(int)k);
164: *res *= BLASnrm2_(&n,Q,&ione);
165: }
167: PetscFree5(W,Q,wr,wi,work);
168: return(0);
169: }
171: #else
173: /*
174: Compute the upper Cholesky factor of A
175: */
176: static PetscErrorCode CholeskyFactor(PetscInt m,PetscScalar *A,PetscInt lda)
177: {
179: PetscInt i;
180: PetscScalar *S;
181: PetscBLASInt info,n,ld;
184: PetscBLASIntCast(m,&n);
185: PetscBLASIntCast(lda,&ld);
186: PetscMalloc1(m*m,&S);
188: /* save a copy of matrix in S */
189: for (i=0;i<m;i++) {
190: PetscArraycpy(S+i*m,A+i*lda,m);
191: }
193: /* compute upper Cholesky factor in R */
194: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
195: PetscLogFlops((1.0*n*n*n)/3.0);
197: if (info) {
198: PetscInfo(NULL,"potrf failed, retry on diagonally perturbed matrix\n");
199: for (i=0;i<m;i++) {
200: PetscArraycpy(A+i*lda,S+i*m,m);
201: A[i+i*lda] += 50.0*PETSC_MACHINE_EPSILON;
202: }
203: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
204: SlepcCheckLapackInfo("potrf",info);
205: PetscLogFlops((1.0*n*n*n)/3.0);
206: }
208: /* Zero out entries below the diagonal */
209: for (i=0;i<m-1;i++) {
210: PetscArrayzero(A+i*lda+i+1,m-i-1);
211: }
212: PetscFree(S);
213: return(0);
214: }
216: /*
217: HessLyapunovChol_LAPACK - alternative implementation when SLICOT is not available
218: */
219: static PetscErrorCode HessLyapunovChol_LAPACK(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
220: {
222: PetscBLASInt ilo=1,lwork,info,n,kk,lu,lb,ione=1;
223: PetscInt i,j;
224: PetscReal scal;
225: PetscScalar *Q,*C,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
226: #if !defined(PETSC_USE_COMPLEX)
227: PetscScalar *wi;
228: #endif
231: PetscBLASIntCast(ldb,&lb);
232: PetscBLASIntCast(ldu,&lu);
233: PetscBLASIntCast(m,&n);
234: PetscBLASIntCast(k,&kk);
235: PetscBLASIntCast(6*m,&lwork);
236: C = U;
238: #if !defined(PETSC_USE_COMPLEX)
239: PetscMalloc6(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,m,&wi,lwork,&work);
240: #else
241: PetscMalloc5(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,lwork,&work);
242: #endif
244: /* save a copy W = H */
245: for (j=0;j<m;j++) {
246: for (i=0;i<m;i++) W[i+j*m] = H[i+j*ldh];
247: }
249: /* compute the (real) Schur form of W */
250: #if !defined(PETSC_USE_COMPLEX)
251: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,wi,Q,&n,work,&lwork,&info));
252: #else
253: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,Q,&n,work,&lwork,&info));
254: #endif
255: SlepcCheckLapackInfo("hseqr",info);
256: #if defined(PETSC_USE_DEBUG)
257: for (i=0;i<m;i++) if (PetscRealPart(wr[i])>=0.0) SETERRQ1(PETSC_COMM_SELF,1,"Eigenvalue with non-negative real part %g, the coefficient matrix is not stable",PetscRealPart(wr[i]));
258: #endif
260: /* C = -Z*Z', Z = Q'*B */
261: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&kk,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
262: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&dmone,Z,&n,Z,&n,&zero,C,&lu));
264: /* solve triangular Sylvester equation */
265: PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,C,&lu,&scal,&info));
266: SlepcCheckLapackInfo("trsyl",info);
267: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
269: /* back-transform C = Q*C*Q' */
270: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,C,&n,&zero,W,&n));
271: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,C,&lu));
273: /* resnorm = norm(H(m+1,:)*Y) */
274: if (res) {
275: if (k!=1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Residual error is intended for k=1 only, but you set k=%d",(int)k);
276: *res *= BLASnrm2_(&n,C+m-1,&n);
277: }
279: /* U = chol(C) */
280: CholeskyFactor(m,C,ldu);
282: #if !defined(PETSC_USE_COMPLEX)
283: PetscFree6(Q,W,Z,wr,wi,work);
284: #else
285: PetscFree5(Q,W,Z,wr,work);
286: #endif
287: return(0);
288: }
290: #endif /* SLEPC_HAVE_SLICOT */
292: /*@C
293: LMEDenseHessLyapunovChol - Computes the Cholesky factor of the solution of a
294: dense Lyapunov equation with an upper Hessenberg coefficient matrix.
296: Logically Collective on lme
298: Input Parameters:
299: + lme - linear matrix equation solver context
300: . m - number of rows and columns of H
301: . H - coefficient matrix
302: . ldh - leading dimension of H
303: . k - number of columns of B
304: . B - right-hand side matrix
305: . ldb - leading dimension of B
306: - ldu - leading dimension of U
308: Output Parameter:
309: . U - Cholesky factor of the solution
311: Input/Output Parameter:
312: . res - (optional) residual norm, on input it should contain H(m+1,m)
314: Note:
315: The Lyapunov equation has the form H*X + X*H' = -B*B', where H is an mxm
316: upper Hessenberg matrix, B is an mxk matrix and the solution is expressed
317: as X = U'*U, where U is upper triangular. H is supposed to be stable.
319: When k=1 and the res argument is provided, the last row of X is used to
320: compute the residual norm of a Lyapunov equation projected via Arnoldi.
322: Level: developer
324: .seealso: LMEDenseLyapunov(), LMESolve()
325: @*/
326: PetscErrorCode LMEDenseHessLyapunovChol(LME lme,PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
327: {
329: #if defined(PETSC_USE_INFO)
330: PetscReal error;
331: #endif
345: #if defined(SLEPC_HAVE_SLICOT)
346: HessLyapunovChol_SLICOT(m,H,ldh,k,B,ldb,U,ldu,res);
347: #else
348: HessLyapunovChol_LAPACK(m,H,ldh,k,B,ldb,U,ldu,res);
349: #endif
351: #if defined(PETSC_USE_INFO)
352: if (PetscLogPrintInfo) {
353: LyapunovCholResidual(m,H,ldh,k,B,ldb,U,ldu,&error);
354: PetscInfo1(lme,"Residual norm of dense Lyapunov equation = %g\n",error);
355: }
356: #endif
357: return(0);
358: }
360: #if defined(SLEPC_HAVE_SLICOT)
361: /*
362: Lyapunov_SLICOT - implementation used when SLICOT is available
363: */
364: static PetscErrorCode Lyapunov_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
365: {
367: PetscBLASInt sdim,lwork,info,n,lx,*iwork;
368: PetscInt i,j;
369: PetscReal scal,sep,ferr,*work;
370: PetscScalar *Q,*W,*wr,*wi;
373: PetscBLASIntCast(ldx,&lx);
374: PetscBLASIntCast(m,&n);
375: PetscBLASIntCast(PetscMax(20,m*m),&lwork);
377: /* transpose W = H' */
378: PetscMalloc6(m*m,&W,m*m,&Q,m,&wr,m,&wi,m*m,&iwork,lwork,&work);
379: for (j=0;j<m;j++) {
380: for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
381: }
383: /* compute the real Schur form of W */
384: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
385: SlepcCheckLapackInfo("gees",info);
387: /* copy -B into X */
388: for (i=0;i<m;i++) {
389: for (j=0;j<m;j++) X[i+j*ldx] = -B[i+j*ldb];
390: }
392: /* solve Lyapunov equation (Hammarling) */
393: PetscStackCallBLAS("SLICOTsb03od",SLICOTsb03md_("C","X","F","N",&n,W,&n,Q,&n,X,&lx,&scal,&sep,&ferr,wr,wi,iwork,work,&lwork,&info));
394: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SLICOT subroutine SB03OD: info=%d",(int)info);
395: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
397: PetscFree6(W,Q,wr,wi,iwork,work);
398: return(0);
399: }
401: #else
403: /*
404: Lyapunov_LAPACK - alternative implementation when SLICOT is not available
405: */
406: static PetscErrorCode Lyapunov_LAPACK(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
407: {
409: PetscBLASInt sdim,lwork,info,n,lx,lb,ione=1;
410: PetscInt i,j;
411: PetscReal scal;
412: PetscScalar *Q,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
413: #if defined(PETSC_USE_COMPLEX)
414: PetscReal *rwork;
415: #else
416: PetscScalar *wi;
417: #endif
420: PetscBLASIntCast(ldb,&lb);
421: PetscBLASIntCast(ldx,&lx);
422: PetscBLASIntCast(m,&n);
423: PetscBLASIntCast(6*m,&lwork);
425: #if !defined(PETSC_USE_COMPLEX)
426: PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,m,&wi,lwork,&work);
427: #else
428: PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,lwork,&work,m,&rwork);
429: #endif
431: /* save a copy W = A */
432: for (j=0;j<m;j++) {
433: for (i=0;i<m;i++) W[i+j*m] = A[i+j*lda];
434: }
436: /* compute the (real) Schur form of W */
437: #if !defined(PETSC_USE_COMPLEX)
438: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
439: #else
440: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,Q,&n,work,&lwork,rwork,NULL,&info));
441: #endif
442: SlepcCheckLapackInfo("gees",info);
444: /* X = -Q'*B*Q */
445: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&n,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
446: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&dmone,Z,&n,Q,&n,&zero,X,&lx));
448: /* solve triangular Sylvester equation */
449: PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,X,&lx,&scal,&info));
450: SlepcCheckLapackInfo("trsyl",info);
451: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
453: /* back-transform X = Q*X*Q' */
454: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,X,&n,&zero,W,&n));
455: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,X,&lx));
457: #if !defined(PETSC_USE_COMPLEX)
458: PetscFree6(Q,W,Z,wr,wi,work);
459: #else
460: PetscFree6(Q,W,Z,wr,work,rwork);
461: #endif
462: return(0);
463: }
465: #endif /* SLEPC_HAVE_SLICOT */
467: /*@C
468: LMEDenseLyapunov - Computes the solution of a dense continuous-time Lyapunov
469: equation.
471: Logically Collective on lme
473: Input Parameters:
474: + lme - linear matrix equation solver context
475: . m - number of rows and columns of A
476: . A - coefficient matrix
477: . lda - leading dimension of A
478: . B - right-hand side matrix
479: . ldb - leading dimension of B
480: - ldx - leading dimension of X
482: Output Parameter:
483: . X - the solution
485: Note:
486: The Lyapunov equation has the form A*X + X*A' = -B, where all are mxm
487: matrices, and B is symmetric.
489: Level: developer
491: .seealso: LMEDenseHessLyapunovChol(), LMESolve()
492: @*/
493: PetscErrorCode LMEDenseLyapunov(LME lme,PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
494: {
496: #if defined(PETSC_USE_INFO)
497: PetscReal error;
498: #endif
510: #if defined(SLEPC_HAVE_SLICOT)
511: Lyapunov_SLICOT(m,A,lda,B,ldb,X,ldx);
512: #else
513: Lyapunov_LAPACK(m,A,lda,B,ldb,X,ldx);
514: #endif
516: #if defined(PETSC_USE_INFO)
517: if (PetscLogPrintInfo) {
518: LyapunovResidual(m,A,lda,B,ldb,X,ldx,&error);
519: PetscInfo1(lme,"Residual norm of dense Lyapunov equation = %g\n",error);
520: }
521: #endif
522: return(0);
523: }