Actual source code: lmedense.c
slepc-3.18.1 2022-11-02
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Routines for solving dense matrix equations, in some cases calling SLICOT
12: */
14: #include <slepc/private/lmeimpl.h>
15: #include <slepcblaslapack.h>
17: /*
18: LMEDenseRankSVD - given a square matrix A, compute its SVD U*S*V', and determine the
19: numerical rank. On exit, U contains U*S and A is overwritten with V'
20: */
21: PetscErrorCode LMEDenseRankSVD(LME lme,PetscInt n,PetscScalar *A,PetscInt lda,PetscScalar *U,PetscInt ldu,PetscInt *rank)
22: {
23: PetscInt i,j,rk=0;
24: PetscScalar *work;
25: PetscReal tol,*sg,*rwork;
26: PetscBLASInt n_,lda_,ldu_,info,lw_;
28: PetscCalloc3(n,&sg,10*n,&work,5*n,&rwork);
29: PetscBLASIntCast(n,&n_);
30: PetscBLASIntCast(lda,&lda_);
31: PetscBLASIntCast(ldu,&ldu_);
32: lw_ = 10*n_;
33: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
34: #if !defined (PETSC_USE_COMPLEX)
35: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,&info));
36: #else
37: PetscCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,A,&lda_,sg,U,&ldu_,NULL,&n_,work,&lw_,rwork,&info));
38: #endif
39: PetscFPTrapPop();
40: SlepcCheckLapackInfo("gesvd",info);
41: tol = 10*PETSC_MACHINE_EPSILON*n*sg[0];
42: for (j=0;j<n;j++) {
43: if (sg[j]>tol) {
44: for (i=0;i<n;i++) U[i+j*n] *= sg[j];
45: rk++;
46: } else break;
47: }
48: *rank = rk;
49: PetscFree3(sg,work,rwork);
50: return 0;
51: }
53: #if defined(PETSC_USE_INFO)
54: /*
55: LyapunovCholResidual - compute the residual norm ||A*U'*U+U'*U*A'+B*B'||
56: */
57: static PetscErrorCode LyapunovCholResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
58: {
59: PetscBLASInt n,kk,la,lb,lu;
60: PetscScalar *M,*R,zero=0.0,done=1.0;
62: *res = 0;
63: PetscBLASIntCast(lda,&la);
64: PetscBLASIntCast(ldb,&lb);
65: PetscBLASIntCast(ldu,&lu);
66: PetscBLASIntCast(m,&n);
67: PetscBLASIntCast(k,&kk);
68: PetscMalloc2(m*m,&M,m*m,&R);
70: /* R = B*B' */
71: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&done,B,&lb,B,&lb,&zero,R,&n));
72: /* M = A*U' */
73: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,A,&la,U,&lu,&zero,M,&n));
74: /* R = R+M*U */
75: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,M,&n,U,&lu,&done,R,&n));
76: /* R = R+U'*M' */
77: PetscCallBLAS("BLASgemm",BLASgemm_("C","C",&n,&n,&n,&done,U,&lu,M,&n,&done,R,&n));
79: *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
80: PetscFree2(M,R);
81: return 0;
82: }
84: /*
85: LyapunovResidual - compute the residual norm ||A*X+X*A'+B||
86: */
87: static PetscErrorCode LyapunovResidual(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx,PetscReal *res)
88: {
89: PetscInt i;
90: PetscBLASInt n,la,lb,lx;
91: PetscScalar *R,done=1.0;
93: *res = 0;
94: PetscBLASIntCast(lda,&la);
95: PetscBLASIntCast(ldb,&lb);
96: PetscBLASIntCast(ldx,&lx);
97: PetscBLASIntCast(m,&n);
98: PetscMalloc1(m*m,&R);
100: /* R = B+A*X */
101: for (i=0;i<m;i++) PetscArraycpy(R+i*m,B+i*ldb,m);
102: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,A,&la,X,&lx,&done,R,&n));
103: /* R = R+X*A' */
104: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,X,&lx,A,&la,&done,R,&n));
106: *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
107: PetscFree(R);
108: return 0;
109: }
110: #endif
112: #if defined(SLEPC_HAVE_SLICOT)
113: /*
114: HessLyapunovChol_SLICOT - implementation used when SLICOT is available
115: */
116: static PetscErrorCode HessLyapunovChol_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
117: {
118: PetscBLASInt lwork,info,n,kk,lu,ione=1,sdim;
119: PetscInt i,j;
120: PetscReal scal;
121: PetscScalar *Q,*W,*wr,*wi,*work;
123: PetscBLASIntCast(ldu,&lu);
124: PetscBLASIntCast(m,&n);
125: PetscBLASIntCast(k,&kk);
126: PetscBLASIntCast(6*m,&lwork);
127: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
129: /* transpose W = H' */
130: PetscMalloc5(m*m,&W,m*m,&Q,m,&wr,m,&wi,lwork,&work);
131: for (j=0;j<m;j++) {
132: for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
133: }
135: /* compute the real Schur form of W */
136: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
137: SlepcCheckLapackInfo("gees",info);
138: #if defined(PETSC_USE_DEBUG)
140: #endif
142: /* copy B' into first rows of U */
143: for (i=0;i<k;i++) {
144: for (j=0;j<m;j++) U[i+j*ldu] = B[j+i*ldb];
145: }
147: /* solve Lyapunov equation (Hammarling) */
148: PetscCallBLAS("SLICOTsb03od",SLICOTsb03od_("C","F","N",&n,&kk,W,&n,Q,&n,U,&lu,&scal,wr,wi,work,&lwork,&info));
152: /* resnorm = norm(H(m+1,:)*U'*U), use Q(:,1) = U'*U(:,m) */
153: if (res) {
154: for (j=0;j<m;j++) Q[j] = U[j+(m-1)*ldu];
155: PetscCallBLAS("BLAStrmv",BLAStrmv_("U","C","N",&n,U,&lu,Q,&ione));
157: *res *= BLASnrm2_(&n,Q,&ione);
158: }
160: PetscFPTrapPop();
161: PetscFree5(W,Q,wr,wi,work);
162: return 0;
163: }
165: #else
167: /*
168: Compute the upper Cholesky factor of A
169: */
170: static PetscErrorCode CholeskyFactor(PetscInt m,PetscScalar *A,PetscInt lda)
171: {
172: PetscInt i;
173: PetscScalar *S;
174: PetscBLASInt info,n,ld;
176: PetscBLASIntCast(m,&n);
177: PetscBLASIntCast(lda,&ld);
178: PetscMalloc1(m*m,&S);
179: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
181: /* save a copy of matrix in S */
182: for (i=0;i<m;i++) PetscArraycpy(S+i*m,A+i*lda,m);
184: /* compute upper Cholesky factor in R */
185: PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
186: PetscLogFlops((1.0*n*n*n)/3.0);
188: if (info) {
189: for (i=0;i<m;i++) {
190: PetscArraycpy(A+i*lda,S+i*m,m);
191: A[i+i*lda] += 50.0*PETSC_MACHINE_EPSILON;
192: }
193: PetscCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n,A,&ld,&info));
194: SlepcCheckLapackInfo("potrf",info);
195: PetscLogFlops((1.0*n*n*n)/3.0);
196: }
198: /* Zero out entries below the diagonal */
199: for (i=0;i<m-1;i++) PetscArrayzero(A+i*lda+i+1,m-i-1);
200: PetscFPTrapPop();
201: PetscFree(S);
202: return 0;
203: }
205: /*
206: HessLyapunovChol_LAPACK - alternative implementation when SLICOT is not available
207: */
208: static PetscErrorCode HessLyapunovChol_LAPACK(PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
209: {
210: PetscBLASInt ilo=1,lwork,info,n,kk,lu,lb,ione=1;
211: PetscInt i,j;
212: PetscReal scal;
213: PetscScalar *Q,*C,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
214: #if !defined(PETSC_USE_COMPLEX)
215: PetscScalar *wi;
216: #endif
218: PetscBLASIntCast(ldb,&lb);
219: PetscBLASIntCast(ldu,&lu);
220: PetscBLASIntCast(m,&n);
221: PetscBLASIntCast(k,&kk);
222: PetscBLASIntCast(6*m,&lwork);
223: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
224: C = U;
226: #if !defined(PETSC_USE_COMPLEX)
227: PetscMalloc6(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,m,&wi,lwork,&work);
228: #else
229: PetscMalloc5(m*m,&Q,m*m,&W,m*k,&Z,m,&wr,lwork,&work);
230: #endif
232: /* save a copy W = H */
233: for (j=0;j<m;j++) {
234: for (i=0;i<m;i++) W[i+j*m] = H[i+j*ldh];
235: }
237: /* compute the (real) Schur form of W */
238: #if !defined(PETSC_USE_COMPLEX)
239: PetscCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,wi,Q,&n,work,&lwork,&info));
240: #else
241: PetscCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,W,&n,wr,Q,&n,work,&lwork,&info));
242: #endif
243: SlepcCheckLapackInfo("hseqr",info);
244: #if defined(PETSC_USE_DEBUG)
246: #endif
248: /* C = -Z*Z', Z = Q'*B */
249: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&kk,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
250: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&kk,&dmone,Z,&n,Z,&n,&zero,C,&lu));
252: /* solve triangular Sylvester equation */
253: PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,C,&lu,&scal,&info));
254: SlepcCheckLapackInfo("trsyl",info);
257: /* back-transform C = Q*C*Q' */
258: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,C,&n,&zero,W,&n));
259: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,C,&lu));
261: /* resnorm = norm(H(m+1,:)*Y) */
262: if (res) {
264: *res *= BLASnrm2_(&n,C+m-1,&n);
265: }
267: /* U = chol(C) */
268: CholeskyFactor(m,C,ldu);
270: PetscFPTrapPop();
271: #if !defined(PETSC_USE_COMPLEX)
272: PetscFree6(Q,W,Z,wr,wi,work);
273: #else
274: PetscFree5(Q,W,Z,wr,work);
275: #endif
276: return 0;
277: }
279: #endif /* SLEPC_HAVE_SLICOT */
281: /*@C
282: LMEDenseHessLyapunovChol - Computes the Cholesky factor of the solution of a
283: dense Lyapunov equation with an upper Hessenberg coefficient matrix.
285: Logically Collective on lme
287: Input Parameters:
288: + lme - linear matrix equation solver context
289: . m - number of rows and columns of H
290: . H - coefficient matrix
291: . ldh - leading dimension of H
292: . k - number of columns of B
293: . B - right-hand side matrix
294: . ldb - leading dimension of B
295: - ldu - leading dimension of U
297: Output Parameters:
298: + U - Cholesky factor of the solution
299: - res - (optional) residual norm, on input it should contain H(m+1,m)
301: Note:
302: The Lyapunov equation has the form H*X + X*H' = -B*B', where H is an mxm
303: upper Hessenberg matrix, B is an mxk matrix and the solution is expressed
304: as X = U'*U, where U is upper triangular. H is supposed to be stable.
306: When k=1 and the res argument is provided, the last row of X is used to
307: compute the residual norm of a Lyapunov equation projected via Arnoldi.
309: Level: developer
311: .seealso: LMEDenseLyapunov(), LMESolve()
312: @*/
313: PetscErrorCode LMEDenseHessLyapunovChol(LME lme,PetscInt m,PetscScalar *H,PetscInt ldh,PetscInt k,PetscScalar *B,PetscInt ldb,PetscScalar *U,PetscInt ldu,PetscReal *res)
314: {
315: #if defined(PETSC_USE_INFO)
316: PetscReal error;
317: #endif
330: #if defined(SLEPC_HAVE_SLICOT)
331: HessLyapunovChol_SLICOT(m,H,ldh,k,B,ldb,U,ldu,res);
332: #else
333: HessLyapunovChol_LAPACK(m,H,ldh,k,B,ldb,U,ldu,res);
334: #endif
336: #if defined(PETSC_USE_INFO)
337: if (PetscLogPrintInfo) {
338: LyapunovCholResidual(m,H,ldh,k,B,ldb,U,ldu,&error);
339: PetscInfo(lme,"Residual norm of dense Lyapunov equation = %g\n",(double)error);
340: }
341: #endif
342: return 0;
343: }
345: #if defined(SLEPC_HAVE_SLICOT)
346: /*
347: Lyapunov_SLICOT - implementation used when SLICOT is available
348: */
349: static PetscErrorCode Lyapunov_SLICOT(PetscInt m,PetscScalar *H,PetscInt ldh,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
350: {
351: PetscBLASInt sdim,lwork,info,n,lx,*iwork;
352: PetscInt i,j;
353: PetscReal scal,sep,ferr,*work;
354: PetscScalar *Q,*W,*wr,*wi;
356: PetscBLASIntCast(ldx,&lx);
357: PetscBLASIntCast(m,&n);
358: PetscBLASIntCast(PetscMax(20,m*m),&lwork);
359: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
361: /* transpose W = H' */
362: PetscMalloc6(m*m,&W,m*m,&Q,m,&wr,m,&wi,m*m,&iwork,lwork,&work);
363: for (j=0;j<m;j++) {
364: for (i=0;i<m;i++) W[i+j*m] = H[j+i*ldh];
365: }
367: /* compute the real Schur form of W */
368: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
369: SlepcCheckLapackInfo("gees",info);
371: /* copy -B into X */
372: for (i=0;i<m;i++) {
373: for (j=0;j<m;j++) X[i+j*ldx] = -B[i+j*ldb];
374: }
376: /* solve Lyapunov equation (Hammarling) */
377: PetscCallBLAS("SLICOTsb03md",SLICOTsb03md_("C","X","F","N",&n,W,&n,Q,&n,X,&lx,&scal,&sep,&ferr,wr,wi,iwork,work,&lwork,&info));
381: PetscFPTrapPop();
382: PetscFree6(W,Q,wr,wi,iwork,work);
383: return 0;
384: }
386: #else
388: /*
389: Lyapunov_LAPACK - alternative implementation when SLICOT is not available
390: */
391: static PetscErrorCode Lyapunov_LAPACK(PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
392: {
393: PetscBLASInt sdim,lwork,info,n,lx,lb,ione=1;
394: PetscInt i,j;
395: PetscReal scal;
396: PetscScalar *Q,*W,*Z,*wr,*work,zero=0.0,done=1.0,dmone=-1.0;
397: #if defined(PETSC_USE_COMPLEX)
398: PetscReal *rwork;
399: #else
400: PetscScalar *wi;
401: #endif
403: PetscBLASIntCast(ldb,&lb);
404: PetscBLASIntCast(ldx,&lx);
405: PetscBLASIntCast(m,&n);
406: PetscBLASIntCast(6*m,&lwork);
407: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
409: #if !defined(PETSC_USE_COMPLEX)
410: PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,m,&wi,lwork,&work);
411: #else
412: PetscMalloc6(m*m,&Q,m*m,&W,m*m,&Z,m,&wr,lwork,&work,m,&rwork);
413: #endif
415: /* save a copy W = A */
416: for (j=0;j<m;j++) {
417: for (i=0;i<m;i++) W[i+j*m] = A[i+j*lda];
418: }
420: /* compute the (real) Schur form of W */
421: #if !defined(PETSC_USE_COMPLEX)
422: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,wi,Q,&n,work,&lwork,NULL,&info));
423: #else
424: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,W,&n,&sdim,wr,Q,&n,work,&lwork,rwork,NULL,&info));
425: #endif
426: SlepcCheckLapackInfo("gees",info);
428: /* X = -Q'*B*Q */
429: PetscCallBLAS("BLASgemm",BLASgemm_("C","N",&n,&n,&n,&done,Q,&n,B,&lb,&zero,Z,&n));
430: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&dmone,Z,&n,Q,&n,&zero,X,&lx));
432: /* solve triangular Sylvester equation */
433: PetscCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,W,&n,W,&n,X,&lx,&scal,&info));
434: SlepcCheckLapackInfo("trsyl",info);
437: /* back-transform X = Q*X*Q' */
438: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,X,&n,&zero,W,&n));
439: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,X,&lx));
441: PetscFPTrapPop();
442: #if !defined(PETSC_USE_COMPLEX)
443: PetscFree6(Q,W,Z,wr,wi,work);
444: #else
445: PetscFree6(Q,W,Z,wr,work,rwork);
446: #endif
447: return 0;
448: }
450: #endif /* SLEPC_HAVE_SLICOT */
452: /*@C
453: LMEDenseLyapunov - Computes the solution of a dense continuous-time Lyapunov
454: equation.
456: Logically Collective on lme
458: Input Parameters:
459: + lme - linear matrix equation solver context
460: . m - number of rows and columns of A
461: . A - coefficient matrix
462: . lda - leading dimension of A
463: . B - right-hand side matrix
464: . ldb - leading dimension of B
465: - ldx - leading dimension of X
467: Output Parameter:
468: . X - the solution
470: Note:
471: The Lyapunov equation has the form A*X + X*A' = -B, where all are mxm
472: matrices, and B is symmetric.
474: Level: developer
476: .seealso: LMEDenseHessLyapunovChol(), LMESolve()
477: @*/
478: PetscErrorCode LMEDenseLyapunov(LME lme,PetscInt m,PetscScalar *A,PetscInt lda,PetscScalar *B,PetscInt ldb,PetscScalar *X,PetscInt ldx)
479: {
480: #if defined(PETSC_USE_INFO)
481: PetscReal error;
482: #endif
493: #if defined(SLEPC_HAVE_SLICOT)
494: Lyapunov_SLICOT(m,A,lda,B,ldb,X,ldx);
495: #else
496: Lyapunov_LAPACK(m,A,lda,B,ldb,X,ldx);
497: #endif
499: #if defined(PETSC_USE_INFO)
500: if (PetscLogPrintInfo) {
501: LyapunovResidual(m,A,lda,B,ldb,X,ldx,&error);
502: PetscInfo(lme,"Residual norm of dense Lyapunov equation = %g\n",(double)error);
503: }
504: #endif
505: return 0;
506: }