From eb3c651b3b53c5060e4ed3f18ccee6bd26cdc0ac Mon Sep 17 00:00:00 2001 From: Daniel Bielich Date: Sat, 30 Dec 2023 23:56:26 -0600 Subject: [PATCH] Beginning the work in progress phase for symmetric DMD --- LAPACKE/include/lapack.h | 132 ++++ LAPACKE/include/lapacke.h | 114 +++ LAPACKE/src/CMakeLists.txt | 16 + LAPACKE/src/Makefile | 16 + LAPACKE/src/lapacke_chedmd.c | 115 +++ LAPACKE/src/lapacke_chedmd_work.c | 180 +++++ LAPACKE/src/lapacke_chedmdq.c | 123 +++ LAPACKE/src/lapacke_chedmdq_work.c | 205 +++++ LAPACKE/src/lapacke_dsydmd.c | 112 +++ LAPACKE/src/lapacke_dsydmd_work.c | 179 +++++ LAPACKE/src/lapacke_dsydmdq.c | 119 +++ LAPACKE/src/lapacke_dsydmdq_work.c | 200 +++++ LAPACKE/src/lapacke_ssydmd.c | 112 +++ LAPACKE/src/lapacke_ssydmd_work.c | 179 +++++ LAPACKE/src/lapacke_ssydmdq.c | 119 +++ LAPACKE/src/lapacke_ssydmdq_work.c | 200 +++++ LAPACKE/src/lapacke_zhedmd.c | 116 +++ LAPACKE/src/lapacke_zhedmd_work.c | 182 +++++ LAPACKE/src/lapacke_zhedmdq.c | 123 +++ LAPACKE/src/lapacke_zhedmdq_work.c | 205 +++++ SRC/CMakeLists.txt | 8 +- SRC/Makefile | 8 +- SRC/chedmd.f90 | 1170 +++++++++++++++++++++++++++ SRC/chedmdq.f90 | 725 +++++++++++++++++ SRC/dsydmd.f90 | 1154 +++++++++++++++++++++++++++ SRC/dsydmdq.f90 | 699 +++++++++++++++++ SRC/la_constants.mod | Bin 0 -> 1563 bytes SRC/ssydmd.f90 | 1151 +++++++++++++++++++++++++++ SRC/ssydmdq.f90 | 699 +++++++++++++++++ SRC/zhedmd.f90 | 1171 ++++++++++++++++++++++++++++ SRC/zhedmdq.f90 | 726 +++++++++++++++++ TESTING/EIG/Makefile | 16 +- TESTING/EIG/cchkhedmd.f90 | 761 ++++++++++++++++++ TESTING/EIG/dchksydmd.f90 | 707 +++++++++++++++++ TESTING/EIG/schksydmd.f90 | 702 +++++++++++++++++ TESTING/EIG/zchkhedmd.f90 | 735 +++++++++++++++++ 36 files changed, 13167 insertions(+), 12 deletions(-) create mode 100644 LAPACKE/src/lapacke_chedmd.c create mode 100644 LAPACKE/src/lapacke_chedmd_work.c create mode 100644 LAPACKE/src/lapacke_chedmdq.c create mode 100644 LAPACKE/src/lapacke_chedmdq_work.c create mode 100644 LAPACKE/src/lapacke_dsydmd.c create mode 100644 LAPACKE/src/lapacke_dsydmd_work.c create mode 100644 LAPACKE/src/lapacke_dsydmdq.c create mode 100644 LAPACKE/src/lapacke_dsydmdq_work.c create mode 100644 LAPACKE/src/lapacke_ssydmd.c create mode 100644 LAPACKE/src/lapacke_ssydmd_work.c create mode 100644 LAPACKE/src/lapacke_ssydmdq.c create mode 100644 LAPACKE/src/lapacke_ssydmdq_work.c create mode 100644 LAPACKE/src/lapacke_zhedmd.c create mode 100644 LAPACKE/src/lapacke_zhedmd_work.c create mode 100644 LAPACKE/src/lapacke_zhedmdq.c create mode 100644 LAPACKE/src/lapacke_zhedmdq_work.c create mode 100644 SRC/chedmd.f90 create mode 100644 SRC/chedmdq.f90 create mode 100644 SRC/dsydmd.f90 create mode 100644 SRC/dsydmdq.f90 create mode 100644 SRC/la_constants.mod create mode 100644 SRC/ssydmd.f90 create mode 100644 SRC/ssydmdq.f90 create mode 100644 SRC/zhedmd.f90 create mode 100644 SRC/zhedmdq.f90 create mode 100644 TESTING/EIG/cchkhedmd.f90 create mode 100644 TESTING/EIG/dchksydmd.f90 create mode 100644 TESTING/EIG/schksydmd.f90 create mode 100644 TESTING/EIG/zchkhedmd.f90 diff --git a/LAPACKE/include/lapack.h b/LAPACKE/include/lapack.h index 0b637afb23..f25d27d0fb 100644 --- a/LAPACKE/include/lapack.h +++ b/LAPACKE/include/lapack.h @@ -17088,6 +17088,138 @@ void LAPACK_zsyconv_base( #define LAPACK_zsyconv(...) LAPACK_zsyconv_base(__VA_ARGS__) #endif +#define LAPACK_chedmd LAPACK_GLOBAL(chedmd,CHEDMD) +void LAPACK_chedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + lapack_complex_float* x, lapack_int const* ldx, + lapack_complex_float* y, lapack_int const* ldy, lapack_int const* k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res, + lapack_complex_float* b, lapack_int const* ldb, + lapack_complex_float* w, lapack_int const* ldw, + lapack_complex_float* s, lapack_int const* lds, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_dsydmd LAPACK_GLOBAL(dsydmd,DSYDMD) +void LAPACK_dsydmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + double* x, lapack_int const* ldx, + double* y, lapack_int const* ldy, lapack_int const* k, + double* reig, double* imeig, + double* z, lapack_int const* ldz, double* res, + double* b, lapack_int const* ldb, + double* w, lapack_int const* ldw, + double* s, lapack_int const* lds, + double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_ssydmd LAPACK_GLOBAL(ssydmd,SSYDMD) +void LAPACK_ssydmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + float* x, lapack_int const* ldx, + float* y, lapack_int const* ldy, lapack_int const* k, + float* reig, float* imeig, + float* z, lapack_int const* ldz, float* res, + float* b, lapack_int const* ldb, + float* w, lapack_int const* ldw, + float* s, lapack_int const* lds, + float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_zhedmd LAPACK_GLOBAL(zhedmd,ZHEDMD) +void LAPACK_zhedmd( + char const* jobs, char const* jobz, char const* jobf, + lapack_int const* whtsvd, lapack_int const* m, lapack_int const* n, + lapack_complex_double* x, lapack_int const* ldx, + lapack_complex_double* y, lapack_int const* ldy, lapack_int const* k, + lapack_complex_double* reig, lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int const* ldb, + lapack_complex_double* w, lapack_int const* ldw, + lapack_complex_double* s, lapack_int const* lds, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_chedmdq LAPACK_GLOBAL(chedmdq,CHEDMDQ) +void LAPACK_chedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + lapack_complex_float* f, lapack_int const* ldf, + lapack_complex_float* x, lapack_int const* ldx, + lapack_complex_float* y, lapack_int const* ldy, lapack_int const* nrnk, + float const* tol, lapack_int const* k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int const* ldz, lapack_complex_float* res, + lapack_complex_float* b, lapack_int const* ldb, + lapack_complex_float* v, lapack_int const* ldv, + lapack_complex_float* s, lapack_int const* lds, + lapack_complex_float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_dsydmdq LAPACK_GLOBAL(dsydmdq,DSYDMDQ) +void LAPACK_dsydmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + double* f, lapack_int const* ldf, + double* x, lapack_int const* ldx, + double* y, lapack_int const* ldy, lapack_int const* nrnk, + double const* tol, lapack_int const* k, + double* reig, double* imeig, + double* z, lapack_int const* ldz, double* res, + double* b, lapack_int const* ldb, + double* v, lapack_int const* ldv, + double* s, lapack_int const* lds, + double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_ssydmdq LAPACK_GLOBAL(ssydmdq,SSYDMDQ) +void LAPACK_ssydmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + float* f, lapack_int const* ldf, + float* x, lapack_int const* ldx, + float* y, lapack_int const* ldy, lapack_int const* nrnk, + float const* tol, lapack_int const* k, + float* reig, float* imeig, + float* z, lapack_int const* ldz, float* res, + float* b, lapack_int const* ldb, + float* v, lapack_int const* ldv, + float* s, lapack_int const* lds, + float* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + +#define LAPACK_zhedmdq LAPACK_GLOBAL(zhedmdq,ZHEDMDQ) +void LAPACK_zhedmdq( + char const* jobs, char const* jobz, char const* jobr, char const* jobq, + char const* jobt, char const* jobf, lapack_int const* whtsvd, + lapack_int const* m, lapack_int const* n, + lapack_complex_double* f, lapack_int const* ldf, + lapack_complex_double* x, lapack_int const* ldx, + lapack_complex_double* y, lapack_int const* ldy, lapack_int const* nrnk, + double const* tol, lapack_int const* k, + lapack_complex_double* reig, lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int const* ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int const* ldb, + lapack_complex_double* v, lapack_int const* ldv, + lapack_complex_double* s, lapack_int const* lds, + lapack_complex_double* work, lapack_int const* lwork, + lapack_int* iwork, lapack_int const* liwork, + lapack_int* info ); + #define LAPACK_csyequb_base LAPACK_GLOBAL(csyequb,CSYEQUB) void LAPACK_csyequb_base( char const* uplo, diff --git a/LAPACKE/include/lapacke.h b/LAPACKE/include/lapacke.h index 9a9ab47538..850c53bea6 100644 --- a/LAPACKE/include/lapacke.h +++ b/LAPACKE/include/lapacke.h @@ -9387,6 +9387,120 @@ lapack_int LAPACKE_zsycon_work( int matrix_layout, char uplo, lapack_int n, const lapack_int* ipiv, double anorm, double* rcond, lapack_complex_double* work ); +lapack_int LAPACKE_ssydmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, float* x, lapack_int ldx, + float* y, lapack_int ldy, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* w, lapack_int ldw, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_dsydmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, double* x, lapack_int ldx, + double* y, lapack_int ldy, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* w, lapack_int ldw, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_float* x, + lapack_int ldx, lapack_complex_float* y, + lapack_int ldy, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* w, lapack_int ldw, + lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_zhedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ); + +lapack_int LAPACKE_ssydmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* f, lapack_int ldf, float* x, + lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* v, lapack_int ldv, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_dsydmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* f, lapack_int ldf, double* x, + lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* v, lapack_int ldv, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_chedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* f, lapack_int ldf, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, + lapack_complex_float* b, lapack_int ldb, + lapack_complex_float* v, lapack_int ldv, + lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, + lapack_int liwork ); + +lapack_int LAPACKE_zhedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* f, lapack_int ldf, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* v, lapack_int ldv, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, + lapack_int liwork ); + lapack_int LAPACKE_ssyequb_work( int matrix_layout, char uplo, lapack_int n, const float* a, lapack_int lda, float* s, float* scond, float* amax, float* work ); diff --git a/LAPACKE/src/CMakeLists.txt b/LAPACKE/src/CMakeLists.txt index eebc5f869f..7fbb7a1d3c 100644 --- a/LAPACKE/src/CMakeLists.txt +++ b/LAPACKE/src/CMakeLists.txt @@ -453,6 +453,10 @@ lapacke_csycon_3.c lapacke_csycon_3_work.c lapacke_csyconv.c lapacke_csyconv_work.c +lapacke_chedmd.c +lapacke_chedmd_work.c +lapacke_chedmdq.c +lapacke_chedmdq_work.c lapacke_csyequb.c lapacke_csyequb_work.c lapacke_csyr.c @@ -1051,6 +1055,10 @@ lapacke_dsycon_3.c lapacke_dsycon_3_work.c lapacke_dsyconv.c lapacke_dsyconv_work.c +lapacke_dsydmd.c +lapacke_dsydmd_work.c +lapacke_dsydmdq.c +lapacke_dsydmdq_work.c lapacke_dsyequb.c lapacke_dsyequb_work.c lapacke_dsyev.c @@ -1632,6 +1640,10 @@ lapacke_ssycon_3.c lapacke_ssycon_3_work.c lapacke_ssyconv.c lapacke_ssyconv_work.c +lapacke_ssydmd.c +lapacke_ssydmd_work.c +lapacke_ssydmdq.c +lapacke_ssydmdq_work.c lapacke_ssyequb.c lapacke_ssyequb_work.c lapacke_ssyev.c @@ -2240,6 +2252,10 @@ lapacke_zsycon_3.c lapacke_zsycon_3_work.c lapacke_zsyconv.c lapacke_zsyconv_work.c +lapacke_zhedmd.c +lapacke_zhedmd_work.c +lapacke_zhedmdq.c +lapacke_zhedmdq_work.c lapacke_zsyequb.c lapacke_zsyequb_work.c lapacke_zsyr.c diff --git a/LAPACKE/src/Makefile b/LAPACKE/src/Makefile index fece21af48..1afbf6d806 100644 --- a/LAPACKE/src/Makefile +++ b/LAPACKE/src/Makefile @@ -500,6 +500,10 @@ lapacke_csycon_3.o \ lapacke_csycon_3_work.o \ lapacke_csyconv.o \ lapacke_csyconv_work.o \ +lapacke_chedmd.o \ +lapacke_chedmd_work.o \ +lapacke_chedmdq.o \ +lapacke_chedmdq_work.o \ lapacke_csyequb.o \ lapacke_csyequb_work.o \ lapacke_csyr.o \ @@ -1098,6 +1102,10 @@ lapacke_dsycon_3.o \ lapacke_dsycon_3_work.o \ lapacke_dsyconv.o \ lapacke_dsyconv_work.o \ +lapacke_dsydmd.o \ +lapacke_dsydmd_work.o \ +lapacke_dsydmdq.o \ +lapacke_dsydmdq_work.o \ lapacke_dsyequb.o \ lapacke_dsyequb_work.o \ lapacke_dsyev.o \ @@ -1674,6 +1682,10 @@ lapacke_ssycon_3.o \ lapacke_ssycon_3_work.o \ lapacke_ssyconv.o \ lapacke_ssyconv_work.o \ +lapacke_ssydmd.o \ +lapacke_ssydmd_work.o \ +lapacke_ssydmdq.o \ +lapacke_ssydmdq_work.o \ lapacke_ssyequb.o \ lapacke_ssyequb_work.o \ lapacke_ssyev.o \ @@ -2282,6 +2294,10 @@ lapacke_zsycon_3.o \ lapacke_zsycon_3_work.o \ lapacke_zsyconv.o \ lapacke_zsyconv_work.o \ +lapacke_zhedmd.o \ +lapacke_zhedmd_work.o \ +lapacke_zhedmdq.o \ +lapacke_zhedmdq_work.o \ lapacke_zsyequb.o \ lapacke_zsyequb_work.o \ lapacke_zsyr.o \ diff --git a/LAPACKE/src/lapacke_chedmd.c b/LAPACKE/src/lapacke_chedmd.c new file mode 100644 index 0000000000..5b466101b0 --- /dev/null +++ b/LAPACKE/src/lapacke_chedmd.c @@ -0,0 +1,115 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function chedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, lapack_int k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* w, + lapack_int ldw, lapack_complex_float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_float* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -20; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, s, lds ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_chedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_chedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chedmd", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_chedmd_work.c b/LAPACKE/src/lapacke_chedmd_work.c new file mode 100644 index 0000000000..dd6c997e45 --- /dev/null +++ b/LAPACKE/src/lapacke_chedmd_work.c @@ -0,0 +1,180 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function chedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, lapack_int k, + lapack_complex_float* reig, lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* w, + lapack_int ldw, lapack_complex_float* s, lapack_int lds, + lapack_complex_float* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_chedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_float* x_t = NULL; + lapack_complex_float* y_t = NULL; + lapack_complex_float* z_t = NULL; + lapack_complex_float* b_t = NULL; + lapack_complex_float* w_t = NULL; + lapack_complex_float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_chedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_che_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_che_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_che_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_che_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_che_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_che_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_chedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_chedmd_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_chedmdq.c b/LAPACKE/src/lapacke_chedmdq.c new file mode 100644 index 0000000000..d0dfba3c33 --- /dev/null +++ b/LAPACKE/src/lapacke_chedmdq.c @@ -0,0 +1,123 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function chedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, lapack_complex_float* f, + lapack_int ldf, lapack_complex_float* x, + lapack_int ldx, lapack_complex_float* y, + lapack_int ldy, lapack_int nrnk, float tol, + lapack_int k, lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, lapack_int ldz, + lapack_complex_float* res, lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_float* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_che_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_che_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_chedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_C2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_chedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chedmdq", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_chedmdq_work.c b/LAPACKE/src/lapacke_chedmdq_work.c new file mode 100644 index 0000000000..576a4549af --- /dev/null +++ b/LAPACKE/src/lapacke_chedmdq_work.c @@ -0,0 +1,205 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function chedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_chedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_float* f, lapack_int ldf, + lapack_complex_float* x, lapack_int ldx, + lapack_complex_float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + lapack_complex_float* reig, + lapack_complex_float* imeig, + lapack_complex_float* z, + lapack_int ldz, lapack_complex_float* res, + lapack_complex_float* b, + lapack_int ldb, lapack_complex_float* v, + lapack_int ldv, lapack_complex_float* s, + lapack_int lds, lapack_complex_float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_chedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_float* f_t = NULL; + lapack_complex_float* x_t = NULL; + lapack_complex_float* y_t = NULL; + lapack_complex_float* z_t = NULL; + lapack_complex_float* b_t = NULL; + lapack_complex_float* v_t = NULL; + lapack_complex_float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_chedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_che_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_che_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_che_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_che_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_che_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_che_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_che_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_chedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_che_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_chedmdq_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_dsydmd.c b/LAPACKE/src/lapacke_dsydmd.c new file mode 100644 index 0000000000..24ccbfe206 --- /dev/null +++ b/LAPACKE/src/lapacke_dsydmd.c @@ -0,0 +1,112 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dsydmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsydmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* x, lapack_int ldx, double* y, lapack_int ldy, + lapack_int k, double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, lapack_int ldb, + double* w, lapack_int ldw, double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + double* work = NULL; + lapack_int* iwork = NULL; + double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dsydmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dsydmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_dsydmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsydmd", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_dsydmd_work.c b/LAPACKE/src/lapacke_dsydmd_work.c new file mode 100644 index 0000000000..d55962fd26 --- /dev/null +++ b/LAPACKE/src/lapacke_dsydmd_work.c @@ -0,0 +1,179 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dsydmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsydmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, double* x, lapack_int ldx, + double* y, lapack_int ldy, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* w, lapack_int ldw, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dsydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + double* x_t = NULL; + double* y_t = NULL; + double* z_t = NULL; + double* b_t = NULL; + double* w_t = NULL; + double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_dsydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (double*)LAPACKE_malloc( sizeof(double) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_dsy_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dsydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dsydmd_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_dsydmdq.c b/LAPACKE/src/lapacke_dsydmdq.c new file mode 100644 index 0000000000..4f994f9e6b --- /dev/null +++ b/LAPACKE/src/lapacke_dsydmdq.c @@ -0,0 +1,119 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function dsydmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsydmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, double* f, lapack_int ldf, + double* x, lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, lapack_int ldb, + double* v, lapack_int ldv, double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + double* work = NULL; + lapack_int* iwork = NULL; + double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_dsydmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_dsy_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_dsydmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (double*)LAPACKE_malloc( sizeof(double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_dsydmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsydmdq", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_dsydmdq_work.c b/LAPACKE/src/lapacke_dsydmdq_work.c new file mode 100644 index 0000000000..0e9d57da34 --- /dev/null +++ b/LAPACKE/src/lapacke_dsydmdq_work.c @@ -0,0 +1,200 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function dsydmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_dsydmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + double* f, lapack_int ldf, double* x, + lapack_int ldx, double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + double* reig, double* imeig, double* z, + lapack_int ldz, double* res, double* b, + lapack_int ldb, double* v, lapack_int ldv, + double* s, lapack_int lds, double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_dsydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + double* f_t = NULL; + double* x_t = NULL; + double* y_t = NULL; + double* z_t = NULL; + double* b_t = NULL; + double* v_t = NULL; + double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_dsydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (double*)LAPACKE_malloc( sizeof(double) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (double*)LAPACKE_malloc( sizeof(double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (double*)LAPACKE_malloc( sizeof(double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (double*)LAPACKE_malloc( sizeof(double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (double*)LAPACKE_malloc( sizeof(double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (double*)LAPACKE_malloc( sizeof(double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (double*)LAPACKE_malloc( sizeof(double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_dsy_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_dsy_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_dsydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_dsy_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_dsydmdq_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_ssydmd.c b/LAPACKE/src/lapacke_ssydmd.c new file mode 100644 index 0000000000..40a12a2409 --- /dev/null +++ b/LAPACKE/src/lapacke_ssydmd.c @@ -0,0 +1,112 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function ssydmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssydmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* x, lapack_int ldx, float* y, lapack_int ldy, + lapack_int k, float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, lapack_int ldb, + float* w, lapack_int ldw, float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + float* work = NULL; + lapack_int* iwork = NULL; + float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_ssydmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_ssydmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_ssydmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssydmd", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_ssydmd_work.c b/LAPACKE/src/lapacke_ssydmd_work.c new file mode 100644 index 0000000000..fc185990a1 --- /dev/null +++ b/LAPACKE/src/lapacke_ssydmd_work.c @@ -0,0 +1,179 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function ssydmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssydmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, float* x, lapack_int ldx, + float* y, lapack_int ldy, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* w, lapack_int ldw, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_ssydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + float* x_t = NULL; + float* y_t = NULL; + float* z_t = NULL; + float* b_t = NULL; + float* w_t = NULL; + float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_ssydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (float*)LAPACKE_malloc( sizeof(float) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_ssy_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_ssydmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_ssydmd_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_ssydmdq.c b/LAPACKE/src/lapacke_ssydmdq.c new file mode 100644 index 0000000000..9147dc0557 --- /dev/null +++ b/LAPACKE/src/lapacke_ssydmdq.c @@ -0,0 +1,119 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function ssydmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssydmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, float* f, lapack_int ldf, + float* x, lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, lapack_int ldb, + float* v, lapack_int ldv, float* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + float* work = NULL; + lapack_int* iwork = NULL; + float work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_ssydmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_ssy_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_ssydmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = (lapack_int) work_query; + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (float*)LAPACKE_malloc( sizeof(float) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_ssydmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssydmdq", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_ssydmdq_work.c b/LAPACKE/src/lapacke_ssydmdq_work.c new file mode 100644 index 0000000000..3b04aa00bb --- /dev/null +++ b/LAPACKE/src/lapacke_ssydmdq_work.c @@ -0,0 +1,200 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function ssydmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_ssydmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + float* f, lapack_int ldf, float* x, + lapack_int ldx, float* y, lapack_int ldy, + lapack_int nrnk, float tol, lapack_int k, + float* reig, float* imeig, float* z, + lapack_int ldz, float* res, float* b, + lapack_int ldb, float* v, lapack_int ldv, + float* s, lapack_int lds, float* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_ssydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + float* f_t = NULL; + float* x_t = NULL; + float* y_t = NULL; + float* z_t = NULL; + float* b_t = NULL; + float* v_t = NULL; + float* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_ssydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (float*)LAPACKE_malloc( sizeof(float) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (float*)LAPACKE_malloc( sizeof(float) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (float*)LAPACKE_malloc( sizeof(float) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (float*)LAPACKE_malloc( sizeof(float) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (float*)LAPACKE_malloc( sizeof(float) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (float*)LAPACKE_malloc( sizeof(float) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (float*)LAPACKE_malloc( sizeof(float) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_ssy_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_ssy_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_ssydmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_ssy_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_ssydmdq_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_zhedmd.c b/LAPACKE/src/lapacke_zhedmd.c new file mode 100644 index 0000000000..b6ecc05b7b --- /dev/null +++ b/LAPACKE/src/lapacke_zhedmd.c @@ -0,0 +1,116 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zhedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhedmd( int matrix_layout, char jobs, char jobz, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int k, lapack_complex_double* reig, + lapack_complex_double* imeig, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_double* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_zhedmd", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -8; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -10; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -15; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -18; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, s, lds ) ) { + return -20; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, w, ldw ) ) { + return -22; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zhedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_zhedmd_work( matrix_layout, jobs, jobz, jobf, whtsvd, m, n, + x, ldx, y, ldy, k, reig, imeig, z, ldz, res, + b, ldb, w, ldw, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhedmd", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_zhedmd_work.c b/LAPACKE/src/lapacke_zhedmd_work.c new file mode 100644 index 0000000000..1255ba0e9f --- /dev/null +++ b/LAPACKE/src/lapacke_zhedmd_work.c @@ -0,0 +1,182 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zhedmd +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhedmd_work( int matrix_layout, char jobs, char jobz, + char jobf, lapack_int whtsvd, lapack_int m, + lapack_int n, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, lapack_int ldb, + lapack_complex_double* w, lapack_int ldw, + lapack_complex_double* s, lapack_int lds, + lapack_complex_double* work, lapack_int lwork, + lapack_int* iwork, lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zhedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldw_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_double* x_t = NULL; + lapack_complex_double* y_t = NULL; + lapack_complex_double* z_t = NULL; + lapack_complex_double* b_t = NULL; + lapack_complex_double* w_t = NULL; + lapack_complex_double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldx < n ) { + info = -9; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + if( ldy < n ) { + info = -11; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + if( ldz < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + if( ldb < n ) { + info = -19; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + if( ldw < n ) { + info = -21; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + if( lds < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 ) { + LAPACK_zhedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x, &ldx, y, &ldy, + &k, reig, imeig, z, &ldz, res, b, &ldb, w, &ldw, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + w_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldw_t * MAX(1,n) ); + if( w_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + /* Transpose input matrices */ + LAPACKE_zhe_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, w, ldw, w_t, ldw_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zhedmd( &jobs, &jobz, &jobf, &whtsvd, &m, &n, x_t, &ldx_t, y_t, + &ldy_t, &k, reig, imeig, z_t, &ldz_t, res, b_t, &ldb_t, + w_t, &ldw_t, s_t, &lds_t, work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, w_t, ldw_t, w, ldw ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_5: + LAPACKE_free( w_t ); +exit_level_4: + LAPACKE_free( b_t ); +exit_level_3: + LAPACKE_free( z_t ); +exit_level_2: + LAPACKE_free( y_t ); +exit_level_1: + LAPACKE_free( x_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zhedmd_work", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_zhedmdq.c b/LAPACKE/src/lapacke_zhedmdq.c new file mode 100644 index 0000000000..374ef19627 --- /dev/null +++ b/LAPACKE/src/lapacke_zhedmdq.c @@ -0,0 +1,123 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native high-level C interface to LAPACK function zhedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhedmdq( int matrix_layout, char jobs, char jobz, char jobr, + char jobq, char jobt, char jobf, lapack_int whtsvd, + lapack_int m, lapack_int n, lapack_complex_double* f, + lapack_int ldf, lapack_complex_double* x, + lapack_int ldx, lapack_complex_double* y, + lapack_int ldy, lapack_int nrnk, double tol, + lapack_int k, lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, lapack_int ldz, + lapack_complex_double* res, lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* s, lapack_int lds) +{ + lapack_int info = 0; + lapack_int lwork = -1; + lapack_int liwork = -1; + lapack_complex_double* work = NULL; + lapack_int* iwork = NULL; + lapack_complex_double work_query; + lapack_int iwork_query; + if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) { + LAPACKE_xerbla( "LAPACKE_chedmdq", -1 ); + return -1; + } +#ifndef LAPACK_DISABLE_NAN_CHECK + if( LAPACKE_get_nancheck() ) { + /* Optionally check input matrices for NaNs */ + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, f, ldf ) ) { + return -11; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, x, ldx ) ) { + return -13; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, y, ldy ) ) { + return -15; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, z, ldz ) ) { + return -22; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, b, ldb ) ) { + return -25; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, v, ldv ) ) { + return -27; + } + if( LAPACKE_zhe_nancheck( matrix_layout, m, n, s, lds ) ) { + return -29; + } + } +#endif + /* Query optimal working array(s) size */ + info = LAPACKE_zhedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, &work_query, lwork, + &iwork_query, liwork ); + + if( info != 0 ) { + goto exit_level_0; + } + lwork = LAPACK_Z2INT( work_query ); + liwork = iwork_query; + /* Allocate memory for work arrays */ + work = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lwork ); + if( work == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_0; + } + iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork ); + if( iwork == NULL ) { + info = LAPACK_WORK_MEMORY_ERROR; + goto exit_level_1; + } + /* Call middle-level interface */ + info = LAPACKE_zhedmdq_work( matrix_layout, jobs, jobz, jobr, jobq, jobt, + jobf, whtsvd, m, n, f, ldf, x, ldx, y, ldy, + nrnk, tol, k, reig, imeig, z, ldz, res, + b, ldb, v, ldv, s, lds, work, lwork, iwork, + liwork ); + /* Release memory and exit */ + LAPACKE_free( iwork ); +exit_level_1: + LAPACKE_free( work ); +exit_level_0: + if( info == LAPACK_WORK_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhedmdq", info ); + } + return info; +} diff --git a/LAPACKE/src/lapacke_zhedmdq_work.c b/LAPACKE/src/lapacke_zhedmdq_work.c new file mode 100644 index 0000000000..443017afb1 --- /dev/null +++ b/LAPACKE/src/lapacke_zhedmdq_work.c @@ -0,0 +1,205 @@ +/***************************************************************************** + Copyright (c) 2014, Intel Corp. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + THE POSSIBILITY OF SUCH DAMAGE. +***************************************************************************** +* Contents: Native middle-level C interface to LAPACK function zhedmdq +* Author: Intel Corporation +*****************************************************************************/ + +#include "lapacke_utils.h" + +lapack_int LAPACKE_zhedmdq_work( int matrix_layout, char jobs, char jobz, + char jobr, char jobq, char jobt, char jobf, + lapack_int whtsvd, lapack_int m, lapack_int n, + lapack_complex_double* f, lapack_int ldf, + lapack_complex_double* x, lapack_int ldx, + lapack_complex_double* y, lapack_int ldy, + lapack_int nrnk, double tol, lapack_int k, + lapack_complex_double* reig, + lapack_complex_double* imeig, + lapack_complex_double* z, + lapack_int ldz, lapack_complex_double* res, + lapack_complex_double* b, + lapack_int ldb, lapack_complex_double* v, + lapack_int ldv, lapack_complex_double* s, + lapack_int lds, lapack_complex_double* work, + lapack_int lwork, lapack_int* iwork, + lapack_int liwork ) +{ + lapack_int info = 0; + if( matrix_layout == LAPACK_COL_MAJOR ) { + /* Call LAPACK function and adjust info */ + LAPACK_zhedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + } else if( matrix_layout == LAPACK_ROW_MAJOR ) { + lapack_int ldf_t = MAX(1,m); + lapack_int ldx_t = MAX(1,m); + lapack_int ldy_t = MAX(1,m); + lapack_int ldz_t = MAX(1,m); + lapack_int ldb_t = MAX(1,m); + lapack_int ldv_t = MAX(1,m); + lapack_int lds_t = MAX(1,m); + lapack_complex_double* f_t = NULL; + lapack_complex_double* x_t = NULL; + lapack_complex_double* y_t = NULL; + lapack_complex_double* z_t = NULL; + lapack_complex_double* b_t = NULL; + lapack_complex_double* v_t = NULL; + lapack_complex_double* s_t = NULL; + /* Check leading dimension(s) */ + if( ldf < n ) { + info = -12; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( ldx < n ) { + info = -14; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( ldy < n ) { + info = -16; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( ldz < n ) { + info = -23; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( ldb < n ) { + info = -26; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( ldv < n ) { + info = -28; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + if( lds < n ) { + info = -30; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + return info; + } + /* Query optimal working array(s) size if requested */ + if( lwork == -1 || liwork == -1 ) { + LAPACK_zhedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + return (info < 0) ? (info - 1) : info; + } + /* Allocate memory for temporary array(s) */ + f_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldf_t * MAX(1,n) ); + if( f_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_0; + } + x_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldx_t * MAX(1,n) ); + if( x_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_1; + } + y_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldy_t * MAX(1,n) ); + if( y_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_2; + } + z_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldz_t * MAX(1,n) ); + if( z_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_3; + } + b_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldb_t * MAX(1,n) ); + if( b_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_4; + } + v_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * ldv_t * MAX(1,n) ); + if( v_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_5; + } + s_t = (lapack_complex_double*)LAPACKE_malloc( sizeof(lapack_complex_double) * lds_t * MAX(1,n) ); + if( s_t == NULL ) { + info = LAPACK_TRANSPOSE_MEMORY_ERROR; + goto exit_level_6; + } + /* Transpose input matrices */ + LAPACKE_zhe_trans( matrix_layout, m, n, f, ldf, f_t, ldf_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, x, ldx, x_t, ldx_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, y, ldy, y_t, ldy_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, z, ldz, z_t, ldz_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, b, ldb, b_t, ldb_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, v, ldv, v_t, ldv_t ); + LAPACKE_zhe_trans( matrix_layout, m, n, s, lds, s_t, lds_t ); + /* Call LAPACK function and adjust info */ + LAPACK_zhedmdq( &jobs, &jobz, &jobr, &jobq, &jobt, &jobf, &whtsvd, &m, + &n, f, &ldf, x, &ldx, y, &ldy, &nrnk, &tol, &k, reig, + imeig, z, &ldz, res, b, &ldb, v, &ldv, s, &lds, + work, &lwork, iwork, &liwork, &info ); + if( info < 0 ) { + info = info - 1; + } + /* Transpose output matrices */ + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, f_t, ldf_t, f, ldf ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, x_t, ldx_t, x, ldx ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, y_t, ldy_t, y, ldy ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, z_t, ldz_t, z, ldz ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, b_t, ldb_t, b, ldb ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, v_t, ldv_t, v, ldv ); + LAPACKE_zhe_trans( LAPACK_COL_MAJOR, m, n, s_t, lds_t, s, lds ); + /* Release memory and exit */ + LAPACKE_free( s_t ); +exit_level_6: + LAPACKE_free( v_t ); +exit_level_5: + LAPACKE_free( b_t ); +exit_level_4: + LAPACKE_free( z_t ); +exit_level_3: + LAPACKE_free( y_t ); +exit_level_2: + LAPACKE_free( x_t ); +exit_level_1: + LAPACKE_free( f_t ); +exit_level_0: + if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) { + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + } + } else { + info = -1; + LAPACKE_xerbla( "LAPACKE_zhedmdq_work", info ); + } + return info; +} diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt index cb33de166b..4a609243f3 100644 --- a/SRC/CMakeLists.txt +++ b/SRC/CMakeLists.txt @@ -158,7 +158,7 @@ set(SLASRC ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f - sgesvdq.f sgedmd.f90 sgedmdq.f90) + sgesvdq.f sgedmd.f90 sgedmdq.f90 ssydmd.f90 ssydmdq.f90) set(DSLASRC sgetrf.f sgetrf2.f sgetrs.f sisnan.f slaisnan.f slaswp.f spotrf.f spotrf2.f @@ -262,7 +262,7 @@ set(CLASRC chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f - cgesvdq.f cgedmd.f90 cgedmdq.f90) + cgesvdq.f cgedmd.f90 cgedmdq.f90 chedmd.f90 chedmdq.f90) set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f @@ -360,7 +360,7 @@ set(DLASRC dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f - dgesvdq.f dgedmd.f90 dgedmdq.f90) + dgesvdq.f dgedmd.f90 dgedmdq.f90 dsydmd.f90 dsydmdq.f90) set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f @@ -464,7 +464,7 @@ set(ZLASRC zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f - zgesvdq.f zgedmd.f90 zgedmdq.f90) + zgesvdq.f zgedmd.f90 zgedmdq.f90 zhedmd.f90 zhedmdq.f90) set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f diff --git a/SRC/Makefile b/SRC/Makefile index 54697da0e0..92dfa6a0db 100644 --- a/SRC/Makefile +++ b/SRC/Makefile @@ -191,7 +191,7 @@ SLASRC = \ ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ - sgesvdq.o sgedmd.o sgedmdq.o + sgesvdq.o sgedmd.o sgedmdq.o ssydmd.o ssydmdq.o DSLASRC = spotrs.o sgetrs.o spotrf.o sgetrf.o @@ -295,7 +295,7 @@ CLASRC = \ chetrd_2stage.o chetrd_he2hb.o chetrd_hb2st.o chb2st_kernels.o \ cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ - cgesvdq.o cgedmd.o cgedmdq.o + cgesvdq.o cgedmd.o cgedmdq.o chedmd.o chedmdq.o ifdef USEXBLAS CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ @@ -394,7 +394,7 @@ DLASRC = \ dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ - dgesvdq.o dgedmd.o dgedmdq.o + dgesvdq.o dgedmd.o dgedmdq.o dsydmd.o dsydmdq.o ifdef USEXBLAS DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ @@ -501,7 +501,7 @@ ZLASRC = \ zhetrd_2stage.o zhetrd_he2hb.o zhetrd_hb2st.o zhb2st_kernels.o \ zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ - zgesvdq.o zgedmd.o zgedmdq.o + zgesvdq.o zgedmd.o zgedmdq.o zhedmd.o zhedmdq.o ifdef USEXBLAS ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ diff --git a/SRC/chedmd.f90 b/SRC/chedmd.f90 new file mode 100644 index 0000000000..13e86c6b37 --- /dev/null +++ b/SRC/chedmd.f90 @@ -0,0 +1,1170 @@ + SUBROUTINE CHEDMD( JOBS, JOBZ, JOBR, JOBF, & + WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + ZWORK, LZWORK, RWORK, LRWORK, & + IWORK, LIWORK, INFO ) +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, & + M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS,& + LIWORK, LRWORK, LZWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + COMPLEX(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: RWORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! CHEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible Hermitian +! matrix A, CHEDMD computes a certain number of Ritz pairs +! of A using the standard Rayleigh-Ritz extraction from a +! subspace of range(X) that is determined using the leading +! left singular vectors of X. Optionally, CHEDMD returns +! the residuals of the computed Ritz pairs, the information +! needed for a refinement of the Ritz vectors, or the +! eigenvectors of the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: CGESVD (the QR SVD algorithm) +! 2 :: CGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: CGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the Hermitian Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the Hermitian Rayleigh +! quotient. +! 1 :: CHEEV (the QR algorithm) +! 2 :: CHEEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determinet according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-N array +! If JOBZ =='V' then +! Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) COMPLEX(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! Note that the EDMD vectors may not be even numerically +! orthogonal and that the non-orthogonality may be +! substantial. +! If JOBF == 'X', then the EDMD vectors +! A*U(:,1:K)*W(1:K,1:K) are orthonormalized. To preserve +! information on the residuals, they are reordered and +! the reordering permutation is stored in the array IWORK. +! If JOBF =='N', then B is not referenced. +! See the descriptions of JOBF, X, W, K, IWORK. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) COMPLEX(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! left singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) COMPLEX(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array +! ZWORK is used as complex workspace in the complex SVD, as +! specified by WHTSVD (1,2, 3 or 4) and for CGEEV for computing +! the eigenvalues of a Rayleigh quotient. +! If the call to CHEDMD is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of ZWORK is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_CHEEV), +! where +! for WHTEIG == 1 (CHEEV) LZWORK_CHEEV = MAX(1,2*N-1) +! for WHTEIG == 2 (CHEEVD) LZWORK_CHEEV = 2*N+N*N (JOBZ=='V') +! LZWORK_CHEEV = N+1 (JOBZ=='N') +! and the minimal +! LZWORK_SVD is calculated as follows +! If WHTSVD == 1 :: CGESVD :: +! LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) +! If WHTSVD == 2 :: CGESDD :: +! LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) +! If WHTSVD == 3 :: CGESVDQ :: +! LZWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: CGEJSV :: +! LZWORK_SVD = obtainable by a query +! Further, if JOBF=='X', then LZWORK is +! MAX(LWORK_SVD, LWORK_CHEEV,2*N+N), where N+2*N is needed +! for CGEQRF and CUNGQR. +! If on entry LZWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths and returns them in +! LZWORK(1) and LZORK(2), respectively. +!..... +! RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array +! On exit, RWORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain +! scaling factor RWORK(N+2)/RWORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to CHEDMD is only workspace query, then +! RWORK(1) contains the minimal workspace length and +! RWORK(2) is the optimal workspace length. Hence, the +! length of RWORK is at least 2. +! See the description of LRWORK. +!..... +! LRWORK (input) INTEGER +! The minimal length of the workspace vector RWORK. +! LRWORK is calculated as follows: +! LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_CHEEV), where +! RWORK_SVD is the real workspace for the SVD +! subroutine determined by the input parameter +! WHTSVD. +! If WHTSVD == 1 :: CGESVD :: +! LRWORK_SVD = 5*MIN(M,N) +! If WHTSVD == 2 :: CGESDD :: +! LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), +! 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) +! If WHTSVD == 3 :: CGESVDQ :: +! LRWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: CGEJSV :: +! LRWORK_SVD = obtainable by a query +! LRWORK_CHEEV is the real workspace needed in the +! Hermitian eigensolver. +! If WHTEIG == 1 :: CHEEV :: LWORK_CHEEV = 3*N-2 +! If WHTEIG == 2 :: CHEEVD :: +! If JOBZ == 'V', LWORK_CHEEV = 1+5*N+2*N*N +! If JOBZ == 'N', LWORK_CHEEV = N +! In any case, LRWORK >= 2. +! If on entry LRWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined in two steps. First: +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N). +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + IWRSDD, LWRKEV, LWRSDD, LWRSVD, & + LWRSVJ, LWRSVQ, MLWORK, MWRKEV, & + MWRSDD, MWRSVD, MWRSVJ, MWRSVQ, & + NUMRNK, OLWORK, MLRWRK + LOGICAL :: BADXY, FORWRD, LQUERY, SCCOLX, & + SCCOLY, WNTEX, WNTREF, WNTRES, & + WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2), RDUMMY2(2) +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) CLANGE, SLAMCH, SCNRM2 + EXTERNAL CLANGE, SLAMCH, SCNRM2, ICAMAX, ISAMIN + INTEGER ICAMAX, ISAMIN + LOGICAL SISNAN, LSAME + EXTERNAL SISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CAXPY, CGEMM, CSSCAL + EXTERNAL CHEEV, CHEEVD, CGEJSV, CGEQRF, CGESDD, & + CGESVD, CGESVDQ, CLACPY, CLAPMT, CLASCL, & + CLASSQ, CUNGQR, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC CMPLX, CONJG, DBLE, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + INFO = 0 + LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) & + .OR. ( LRWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) ))THEN + INFO = -5 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -6 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -9 + ELSE IF ( LDX < M ) THEN + INFO = -11 + ELSE IF ( LDY < M ) THEN + INFO = -13 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -14 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -15 + ELSE IF ( LDZ < M ) THEN + INFO = -18 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -22 + ELSE IF ( LDW < N ) THEN + INFO = -24 + ELSE IF ( LDS < N ) THEN + INFO = -26 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + RWORK(1) = 1 + ZWORK(1) = 2 + ZWORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + MLRWRK = MAX(1,N) + + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of CGESVD: + ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MLWORK = MAX(MLWORK,MWRSVD) + MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N)) + IF ( LQUERY ) THEN + CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, & + B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 ) + LWRSVD = INT( ZWORK(1) ) + OLWORK = MAX(OLWORK,LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of CGESDD: + ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) + ! In LAPACK 3.10.1 RWORK is defined differently. + ! Below we take max over the two versions. + ! IMINWR = 8*MIN(M,N) + MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) + MLWORK = MAX(MLWORK,MWRSDD) + IMINWR = 8*MIN(M,N) + MLRWRK = MAX( MLRWRK, N + & + MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), & + 5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), & + 2*MAX(M,N)*MIN(M,N)+ & + 2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) + IF ( LQUERY ) THEN + CALL CGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,& + W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD,INT( ZWORK(1) )) + ! Possible bug in CGESDD optimal workspace size. + OLWORK = MAX(OLWORK,LWRSDD) + END IF + CASE (3) + CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK, & + IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVQ) + MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1))) + IF ( LQUERY ) THEN + LWRSVQ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVQ) + END IF + CASE (4) + JSVOPT = 'J' + CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, -1, RDUMMY, -1, IWORK, INFO1 ) + IMINWR = IWORK(1) + MWRSVJ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVJ) + MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1)))) + IF ( LQUERY ) THEN + LWRSVJ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVJ) + END IF + END SELECT + + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + + SELECT CASE ( WHTEIG ) + CASE (1) + ! Workspace calculation to the CHEEV call + MWRKEV = MAX( 1, 2*N-1 ) + MLWORK = MAX(MLWORK,MWRKEV) + MLRWRK = MAX(MLRWRK,N+MAX(1,3*N-2)) + IF ( LQUERY ) THEN + CALL CHEEV( JOBZL, 'L', N, S, LDS, EIGS, ZWORK, & + -1, RDUMMY, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(ZWORK(1)) ) + OLWORK = MAX( OLWORK, LWRKEV ) + END IF + CASE (2) + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = MAX( 1, 2*N + N*N ) + IWRSDD = MAX( 1, 3+5*N ) + MLRWRK = MAX( MLRWRK, N + (1+5*N+2*N*N)) + ELSE + MWRKEV = MAX( 1, N+1) + IWRSDD = 1 + MLRWRK = MAX( MLRWRK, N + N ) + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL CHEEVD( JOBZL, 'U', N, S, LDS, EIGS, ZWORK, & + -1, RDUMMY, -1, IWORK, -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(ZWORK(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + IWRSDD = IWORK(1) + MLRWRK = MAX(MLRWRK,N+INT(RDUMMY(1))) + ! In CHEEVD optimal and minimal lengts of the + ! real workspace are the same. + END IF + IMINWR = MAX(IMINWR,IWRSDD) + END SELECT + + IF ( LSAME(JOBF,'X') ) THEN + MLWORK = MAX(MLWORK,N+2*N) + ! CGEQRF and CORGQR need >= 2*N locations + IF ( LQUERY ) THEN + CALL CGEQRF( M, N, B, LDB, ZWORK, ZWORK, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(ZWORK(1)) ) + CALL CUNGQR( M, N, N, B, LDB, ZWORK, ZWORK, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(ZWORK(1)) ) + END IF + IMINWR = MAX( IMINWR, N ) ! N locations for a permutation + END IF + + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -28 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'CHEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + RWORK(1) = MLRWRK + RETURN + END IF +!............................................................ +! + OFL = SLAMCH('O') + SMALL = SLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using CLASSQ. + K = 0 + DO i = 1, N + !RWORK(i) = SCNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL CLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('CHEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO1 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + X(1,i), M, INFO1 ) ! LAPACK CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('CHEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( RWORK(i) > ZERO ) THEN + CALL CSSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL CLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, Y(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( ABS(Y(ICAMAX(M, Y(1,i),1),i )) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL CSSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using CLASSQ. + DO i = 1, N + !RWORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL CLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -12 + CALL XERBLA('CHEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL CLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO1 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL CLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + Y(1,i), M, INFO1 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( RWORK(i) > ZERO ) THEN + CALL CSSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL CLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, X(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( ABS(X(ICAMAX(M, X(1,i),1),i )) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL CGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, & + LDB, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (2) + CALL CGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, & + LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (3) + CALL CGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1) ! LAPACK CALL + CALL CLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'C' + CASE (4) + CALL CGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL CLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = RWORK(N+1) + XSCL2 = RWORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case CGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL CLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( RWORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('CHEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( RWORK(i+1) <= RWORK(i)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( RWORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + !<4> Compute the Rayleigh quotient S = U^H * A * U. + ! Depending on the requsted outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^H is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL CSSCAL( N, ONE/RWORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that CGESVD, CGESVDQ and CGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + RWORK(N+i) = ONE/RWORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside CHEDMD). + CALL CGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL CGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, & + LDZ, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL CGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two DGEMM calls here, can use K for LDZ. + CALL CGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, & + LDW, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recal that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL CLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL CLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF + + SELECT CASE ( WHTSYM ) + CASE (1) + CALL CLACPY( 'L', K, K, S, LDS, W, LDW ) + CASE (2) + ! This is the symmetrizer from the piDMD [6], + ! based on a solution of the symmetric Procrustes + ! problem. Here included for comparisons/study and + ! for the sake of completeness. + DO i = 1, K-1 + W(i,i) = S(i,i) + DO j = i+1, K + W(j,i) = ( RWORK(i)*(CONJG(S(j,i))*RWORK(i)) & + + RWORK(j)*(S(i,j)*RWORK(j)) ) / & + ( RWORK(i)**2 + RWORK(j)**2 ) + END DO + END DO + W(k,k) = S(k,k) + END SELECT + ! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + ! The LAPACK eigensolvers CHEEV and CHEEVD return the + ! eigenvectors in the array that contains upper or + ! lower triangle of the symmetric Rayleigh quotient. + ! + SELECT CASE ( WHTEIG ) + CASE (1) + CALL CHEEV( JOBZL, 'L', K, W, LDW, EIGS, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1 ) ! LAPACK CALL + CASE (2) + CALL CHEEVD( JOBZL, 'L', K, W, LDW, EIGS, ZWORK, LZWORK, & + RWORK(N+1), LRWORK-N, IWORK, LIWORK, INFO1 ) ! LAPACK CALL + END SELECT + + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. + IF ( INFO1 > 0 ) THEN + ! CHEEV/CHEEVD failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + ! W is stored in S. ? copy in Q + CALL CGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, & + LDW, ZZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL CLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL CGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL CGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + ! LDS, ZZERO, Z, LDZ ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL CLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL CGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + ! Compute the residuals + CALL CAXPY( M, -CMPLX(EIGS(i),KIND=WP), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = SCNRM2( M, Y(1,i), 1) ! BLAS CALL + END DO + END IF + END IF +! + IF ( LSAME(JOBF,'X') ) THEN + ! If the Exact DMD eigenvectors are requested, the + ! original EDMD vectors must be orthogonalized. + ! Orthogonalization may change the vector so that the + ! corresponding residuals may increase. (Data driven + ! setting does not allow recomputing the Razleigh + ! quotients.) To preseve the quality of the best EDMD + ! vectors, orthogonalization is prformed in order of + ! increasing residuals. For more details see [4]. + DO i = 1, K + IWORK(i) = i + END DO + CALL CCOPY( K, RES, 1, RWORK(N+1), 1 ) + DO i = 1, K-1 + j = ISAMIN( K-i+1, RWORK(N+i), 1 ) + i - 1 + IF ( j /= i ) THEN + INFO1 = IWORK(i) + IWORK(i) = IWORK(j) + IWORK(j) = INFO1 + SCALE = RWORK(N+i) + RWORK(N+i) = RWORK(N+j) + RWORK(N+j) = SCALE + END IF + END DO + FORWRD = .TRUE. + CALL CLAPMT( FORWRD, M, K, B, LDB, IWORK ) + ! Here we need the Gram-Schmidt orthogonalization + ! of the columns of B. The following two lines + ! use the QR factorization subroutine DGEQRF. This + ! can be replaced with a more efficient Gram-Schmidt + ! implementation. The matrix B is not expected to + ! be ill-conditioned, so Gram-Schmid will be OK. + CALL CGEQRF( M, K, B, LDB, ZWORK, ZWORK(K+1), & + LZWORK-K, INFO1 ) + CALL CUNGQR( M, K, K, B, LDB, ZWORK, & + ZWORK(K+1), LZWORK-K, INFO1 ) + END IF + + IF ( WHTSVD == 4 ) THEN + RWORK(N+1) = XSCL1 + RWORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This shouild be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE CHEDMD + diff --git a/SRC/chedmdq.f90 b/SRC/chedmdq.f90 new file mode 100644 index 0000000000..b4362194db --- /dev/null +++ b/SRC/chedmdq.f90 @@ -0,0 +1,725 @@ +SUBROUTINE CHEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, WHTSYM, WHTEIG, M, N, F, LDF, & + X, LDX, Y, LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, LZWORK, WORK, LWORK, & + IWORK, LIWORK, INFO ) +! August 2022 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, M, & + N, LDF, LDX, LDY, & + NRNK, LDZ, LDB, LDV, & + LDS, LZWORK, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*) + COMPLEX(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! CHEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y, with Y = A*X and an unaccessible Hermitian matrix +! A, CHEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, CHEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snaphots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretised operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: CGESVD (the QR SVD algorithm) +! 2 :: CGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: CGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: CGEJSV (the precondiioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: CHEEV (the QR algorithm) +! 2 :: CHEEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the unitary +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by CGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in ZWORK(1:N). +! See the description of ZWORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as worskpace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-mutiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as worskpace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determinet +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then +! Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-mutiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by CHEEV/CHEEVD. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array +! On exit, +! ZWORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by CGEQRF of the +! M-by-N input matrix F. +! If the call to CHEDMDQ is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of ZWORK is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for CGEQRF[M,N]) +! MLWDMD = minimal workspace for CHEDMD (see the +! description of LWORK in CHEDMD) +! MLWMQR = N (minimal workspace for +! CUNMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for CUNGQR[M,N,N]) +! MINMN = MIN(M,N) +! Then +! LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) +! is further updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LZWORK = MAX( LZWORK, MINMN+MLWMQR ) +! if JOBQ == 'Q' THEN +! LZWORK = MAX( ZLWORK, MINMN+MLWGQR) +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to CHEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is the same as in CHEDMD, because in CHEDMDQ +! only CHEDMD requiers real workspace. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined as follows. First: Let N1=N-1 +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N1) +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N1) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N1). +! If on entry LIWORK = -1, then a worskpace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MINMN, MLRWRK, & + MLWDMD, MLWGQR, MLWMQR, MLWORK, & + MLWQR, OLWDMD, OLWGQR, OLWMQR, & + OLWORK, OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ +! REAL(KIND=WP) :: RDUMMY(2) +! +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CGEQRF, CLACPY, CLASET, CUNGQR, & + CUNMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL CHEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.(WNTVEC .OR. WNTVCF)) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -8 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -9 + ELSE IF ( M < 0 ) THEN + INFO = -10 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -11 + ELSE IF ( LDF < M ) THEN + INFO = -13 + ELSE IF ( LDX < MINMN ) THEN + INFO = -15 + ELSE IF ( LDY < MINMN ) THEN + INFO = -17 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK < N ))) ) THEN + INFO = -18 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -19 + ELSE IF ( LDZ < M ) THEN + INFO = -23 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -26 + ELSE IF ( LDV < N-1 ) THEN + INFO = -28 + ELSE IF ( LDS < N-1 ) THEN + INFO = -30 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + MLRWRK = 1 + MLWORK = 2 + OLWORK = 2 + IMINWR = 1 + MLWQR = MAX(1,N) ! Minimal workspace length for CGEQRF. + MLWORK = MAX(MLWORK,MINMN + MLWQR) + + IF ( LQUERY ) THEN + CALL CGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, & + INFO1 ) + OLWQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN + OLWQR) + END IF + CALL CHEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, WHTEIG, & + MINMN, N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, -1, WORK, -1, IWORK, -1, INFO1 ) + MLWDMD = INT(ZWORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + MLRWRK = MAX(MLRWRK,INT(WORK(1))) + IMINWR = MAX(IMINWR,IWORK(1)) + IF ( LQUERY ) THEN + OLWDMD = INT(ZWORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWMQR) + IF ( LQUERY ) THEN + CALL CUNMQR( 'L','N', M, N, MINMN, F, LDF, & + ZWORK, Z, LDZ, ZWORK, -1, INFO1 ) + OLWMQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWGQR) + IF ( LQUERY ) THEN + CALL CUNGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK, -1, INFO1 ) + OLWGQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWGQR) + END IF + END IF + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -36 + IF ( LWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -34 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -32 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'CHEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + WORK(1) = MLRWRK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL CGEQRF( M, N, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL CLASET( 'L', MINMN, N-1, ZZERO, ZZERO, X, LDX ) + CALL CLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL CLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL CLASET( 'L', MINMN-2, N-2, ZZERO, ZZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL CHEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, WHTEIG, MINMN, & + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & + WORK, LWORK, IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See CHEDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL CLASET( 'A', M-MINMN, K, ZZERO, & + ZZERO, Z(MINMN+1,1), LDZ ) + CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the inital QR factorization and + ! the SVD/POD_basis returned by CHEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by CHEDMD. + CALL CLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL CLASET( 'A', M-N, K, ZZERO, ZZERO, & + Z(N+1,1), LDZ ) + CALL CUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to CHEDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL CLASET( 'A', MINMN, N, ZZERO, ZZERO, Y, LDY ) + CALL CLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/unitary factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL CUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE CHEDMDQ + \ No newline at end of file diff --git a/SRC/dsydmd.f90 b/SRC/dsydmd.f90 new file mode 100644 index 0000000000..dd949efe39 --- /dev/null +++ b/SRC/dsydmd.f90 @@ -0,0 +1,1154 @@ + SUBROUTINE DSYDMD( JOBS, JOBZ, JOBR, JOBF, & + WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, & + M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS,& + LWORK, LIWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + REAL(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*), RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! DSYDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible symmetric +! matrix A, DSYDMD computes a certain number of Ritz pairs +! of A using the standard Rayleigh-Ritz extraction from a +! subspace of range(X) that is determined using the leading +! left singular vectors of X. Optionally, DSYDMD returns +! the residuals of the computed Ritz pairs, the information +! needed for a refinement of the Ritz vectors, or the +! eigenvectors of the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: DGESVD (the QR SVD algorithm) +! 2 :: DGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: DGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: DSYEV (the QR algorithm) +! 2 :: DSYEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) REAL(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) REAL(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determinet according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-N array +! If JOBZ =='V' then Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) REAL(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! Note that the EDMD vectors may not be even numerically +! orthogonal and that the non-orthogonality may be +! substantial. +! If JOBF == 'X', then the EDMD vectors +! A*U(:,1:K)*W(1:K,1:K) are orthonormalized. To preserve +! information on the residuals, they are reordered and +! the reordering permutation is stored in the array IWORK. +! If JOBF =='N', then B is not referenced. +! See the descriptions of JOBF, X, W, K, IWORK. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) REAL(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! left singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) REAL(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, WORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain +! scaling factor WORK(N+2)/WORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to DSYDMD is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! If WHTSVD == 1 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(DSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(DSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N' then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal +! workspace length of DGESVD +! If WHTSVD == 2 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(DSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(DSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the +! minimal workspace length of DGESDD. +! If WHTSVD == 3 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LOWRK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(DSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(DSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = N+M+MAX(3*N+1, +! MAX(1,3*N+M,5*N),MAX(1,N)) +! is the minimal workspace length of DGESVDQ. +! If WHTSVD == 4 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(DSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(DSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the +! minimal workspace length of DGEJSV. +! Further, if JOBF=='X', then in addition +! LWORK = MAX(LWORK,2*N+N) (for DGEQRF and DORGQR). +! The above expressions are not simplified in order to +! make the usage of WORK more transparent, and for +! easier checking. In any case, LWORK >= 2. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined as follows. First: +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)). +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1). +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N). +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N). +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + IWRSDD, LWRKEV, LWRSDD, LWRSVD, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK + LOGICAL :: BADXY, FORWRD, LQUERY, SCCOLX, & + SCCOLY, WNTEX, WNTREF, WNTRES, & + WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2), RDUMMY2(2) +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) DLANGE, DLAMCH, DNRM2 + EXTERNAL DLANGE, DLAMCH, DNRM2, IDAMAX, IDAMIN + INTEGER IDAMAX, IDAMIN + LOGICAL DISNAN, LSAME + EXTERNAL DISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DAXPY, DGEMM, DSCAL + EXTERNAL DSYEV, DSYEVD, DGEJSV, DGEQRF, DGESDD, & + DGESVD, DGESVDQ, DLACPY, DLAPMT, DLASCL, & + DLASSQ, DORGQR, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC DBLE, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) ))THEN + INFO = -5 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -6 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -9 + ELSE IF ( LDX < M ) THEN + INFO = -11 + ELSE IF ( LDY < M ) THEN + INFO = -13 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -14 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -15 + ELSE IF ( LDZ < M ) THEN + INFO = -19 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -22 + ELSE IF ( LDW < N ) THEN + INFO = -24 + ELSE IF ( LDS < N ) THEN + INFO = -26 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of DGESVD: + ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MLWORK = MAX(MLWORK,N + MWRSVD) + IF ( LQUERY ) THEN + CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, & + B, LDB, W, LDW, RDUMMY, -1, INFO1 ) + LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of DGESDD: + ! MWRSDD = 3*MIN(M,N)*MIN(M,N) + + ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + ! IMINWR = 8*MIN(M,N) + MWRSDD = 3*MIN(M,N)*MIN(M,N) + & + MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + MLWORK = MAX(MLWORK,N + MWRSDD) + IMINWR = MAX(IMINWR,8*MIN(M,N)) + IF ( LQUERY ) THEN + CALL DGESDD( 'O', M, N, X, LDX, WORK, B, & + LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSDD) + END IF + CASE (3) + !LWQP3 = 3*N+1 + !LWORQ = MAX(N, 1) + !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2) + !MLWORK = N + MWRSVQ + !IMINWR = M+N-1 + CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, RDUMMY, & + -1, RDUMMY2, -1, INFO1 ) + IMINWR = MAX(IMINWR,IWORK(1)) + MWRSVQ = INT(RDUMMY(2)) + INT(RDUMMY2(1)) + MLWORK = MAX(MLWORK,N+MWRSVQ) + IF ( LQUERY ) THEN + LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) ) + OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1))) + END IF + CASE (4) + !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V' + !MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 ) ! for JSVOPT='J' + !This code uses JSVOPT='J'. + JSVOPT = 'J' + MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 ) + MLWORK = MAX(MLWORK,N+MWRSVJ) + IMINWR = MAX(IMINWR,MAX( 3, M+3*N )) + IF ( LQUERY ) THEN + OLWORK = MAX(OLWORK,N+MWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + SELECT CASE ( WHTEIG ) + CASE (1) + ! Workspace calculation to the DSYEV call + MWRKEV = MAX( 1, 3*N-1 ) + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL DSYEV( JOBZL, 'U', N, S, LDS, EIGS, RDUMMY, & + -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + END IF + CASE (2) + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = 1+6*N+2*N*N + IWRSDD = 3+5*N + ELSE + MWRKEV = MAX( 1, 2*N+1) + IWRSDD = 1 + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL DSYEVD( JOBZL, 'U', N, S, LDS, EIGS, RDUMMY, & + -1, IWORK, -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + IWRSDD = IWORK(1) + END IF + IMINWR = MAX(IMINWR,IWRSDD) + END SELECT + + IF ( LSAME(JOBF,'X') ) THEN + MLWORK = MAX(MLWORK,N+2*N) + ! DGEQRF and DORGQR need >= 2*N locations + IF ( LQUERY ) THEN + CALL DGEQRF( M, N, B, LDB, RDUMMY, RDUMMY, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(RDUMMY(1)) ) + CALL DORGQR( M, N, N, B, LDB, RDUMMY, RDUMMY, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(RDUMMY(1)) ) + END IF + IMINWR = MAX( IMINWR, N ) + END IF + + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -28 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'DSYDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = DLAMCH('O') + SMALL = DLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using DLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = DNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL DLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('DSYDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO1 ) + WORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + X(1,i), M, INFO1 ) ! LAPACK CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('DSYDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( WORK(i) > ZERO ) THEN + CALL DSCAL( M, ONE/WORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL DLASCL( 'G', 0, 0, -WORK(i), & + ONE/DBLE(M), M, 1, Y(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( Y(IDAMAX(M, Y(1,i),1),i ) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL DSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using DLASSQ. + DO i = 1, N + !WORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL DLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -12 + CALL XERBLA('DSYDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL DLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO1 ) + WORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL DLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + Y(1,i), M, INFO1 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( WORK(i) > ZERO ) THEN + CALL DSCAL( M, ONE/WORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL DLASCL( 'G', 0, 0, -WORK(i), & + ONE/DBLE(M), M, 1, X(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( X(IDAMAX(M, X(1,i),1),i ) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL DGESVD( 'O', 'S', M, N, X, LDX, WORK, B, & + LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (2) + CALL DGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, & + LDW, WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (3) + CALL DGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1), & + LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1) ! LAPACK CALL + CALL DLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'T' + CASE (4) + CALL DGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, WORK, Z, LDZ, W, LDW, & + WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL DLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = WORK(N+1) + XSCL2 = WORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case DGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL DLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( WORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('DSYDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( WORK(i+1) <= WORK(i)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( WORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + !<4> Compute the Rayleigh quotient S = U^T * A * U. + ! Depending on the requsted outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^T is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL DSCAL( N, ONE/WORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that DGESVD, DGESVDQ and DGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL DSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + WORK(N+i) = ONE/WORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = (WORK(N+i))*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside DSYDMD). + CALL DGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL DGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, & + LDZ, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL DGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two DGEMM calls here, can use K for LDZ. + CALL DGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, & + LDW, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recal that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL DLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL DLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF + + SELECT CASE ( WHTSYM ) + CASE (1) + CALL DLACPY( 'L', K, K, S, LDS, W, LDW ) + CASE (2) + ! This is the symmetrizer from the piDMD [6], + ! based on a solution of the symmetric Procrustes + ! problem. Here included for comparisons/study and + ! for the sake of completeness. + DO i = 1, K-1 + W(i,i) = S(i,i) + DO j = i+1, K + W(j,i) = ( WORK(i)*(S(j,i)*WORK(i)) + & + WORK(j)*(S(i,j)*WORK(j)) ) / & + ( WORK(i)**2 + WORK(j)**2 ) + END DO + END DO + W(k,k) = S(k,k) + END SELECT + ! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + ! The LAPACK eigensolvers DSYEV and DSYEVD return the + ! eigenvectors in the array that contains upper or + ! lower triangle of the symmetric Rayleigh quotient. + ! + SELECT CASE ( WHTEIG ) + CASE (1) + CALL DSYEV( JOBZL, 'L', K, W, LDW, EIGS, WORK(N+1), & + LWORK-N, INFO1 ) ! LAPACK CALL + CASE (2) + CALL DSYEVD( JOBZL, 'L', K, W, LDW, EIGS, WORK(N+1), & + LWORK-N, IWORK, LIWORK, INFO1 ) ! LAPACK CALL + END SELECT + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. + IF ( INFO1 > 0 ) THEN + ! DSYEV/DSYEVD failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + ! W is stored in S. ? copy in Q + CALL DGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, & + LDW, ZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL DLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL DGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS ) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + !CALL DGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + ! LDS, ZERO, Z, LDZ ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL DLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + ! Compute the residuals + CALL DAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = DNRM2( M, Y(1,i), 1) ! BLAS CALL + END DO + END IF + END IF +! + IF ( LSAME(JOBF,'X') ) THEN + ! If the Exact DMD eigenvectors are requested, the + ! original EDMD vectors must be orthogonalized. + ! Orthogonalization may change the vector so that the + ! corresponding residuals may increase. (Data driven + ! setting does not allow recomputing the Razleigh + ! quotients.) To preseve the quality of the best EDMD + ! vectors, orthogonalization is prformed in order of + ! increasing residuals. For more details see [4]. + DO i = 1, K + IWORK(i) = i + END DO + CALL DCOPY( K, RES, 1, WORK(N+1), 1 ) + DO i = 1, K-1 + j = IDAMIN( K-i+1, WORK(N+i), 1 ) + i - 1 + IF ( j /= i ) THEN + INFO1 = IWORK(i) + IWORK(i) = IWORK(j) + IWORK(j) = INFO1 + SCALE = WORK(N+i) + WORK(N+i) = WORK(N+j) + WORK(N+j) = SCALE + END IF + END DO + FORWRD = .TRUE. + CALL DLAPMT( FORWRD, M, K, B, LDB, IWORK ) + ! Here we need the Gram-Schmidt orthogonalization + ! of the columns of B. The following two lines + ! use the QR factorization subroutine DGEQRF. This + ! can be replaced with a more efficient Gram-Schmidt + ! implementation. The matrix B is not expected to + ! be ill-conditioned, so Gram-Schmid will be OK. + CALL DGEQRF( M, K, B, LDB, WORK(N+1), WORK(N+K+1), & + LWORK-(N+K), INFO1 ) + CALL DORGQR( M, K, K, B, LDB, WORK(N+1), & + WORK(N+K+1), LWORK-(N+K), INFO1 ) + END IF + + IF ( WHTSVD == 4 ) THEN + WORK(N+1) = XSCL1 + WORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This shouild be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE DSYDMD + \ No newline at end of file diff --git a/SRC/dsydmdq.f90 b/SRC/dsydmdq.f90 new file mode 100644 index 0000000000..427bc14b21 --- /dev/null +++ b/SRC/dsydmdq.f90 @@ -0,0 +1,699 @@ +SUBROUTINE DSYDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, WHTSYM, WHTEIG, M, N, F, LDF, & + X, LDX, Y, LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, WORK, LWORK, IWORK, LIWORK, INFO ) +! August 2022 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, M, N, & + LDF, LDX, LDY, NRNK, LDZ, LDB,& + LDV, LDS, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*) + REAL(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*), RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! DSYDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices X +! and Y such that Y = A*X with an unaccessible symmetric matrix +! A, DSYDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, DSYDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snaphots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretised operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: DGESVD (the QR SVD algorithm) +! 2 :: DGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: DGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: DGEJSV (the precondiioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: DSYEV (the QR algorithm) +! 2 :: DSYEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) REAL(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by DGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in WORK(1:N). +! See the description of WORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as worskpace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-mutiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as worskpace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determinet +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-mutiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by DSYEV/DSYEVD. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by DGEQRF of the +! M-by-N input matrix F. +! WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to DSYDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for DGEQRF[M,N]) +! MLWDMD = minimal workspace for DSYDMD (see the +! description of LWORK in DSYDMD) +! MLWMQR = N (minimal workspace for +! DORMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for DORGQR[M,N,N]) +! Then +! LWORK = MAX(MIN(M,N)+MLWQR, N+MLWDMD) +! is updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR ) +! if JOBQ == 'Q' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR) +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined as follows. First: Let N1=N-1 +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N1) +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N1) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N1). +! If on entry LIWORK = -1, then a worskpace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MLWDMD, MLWGQR, & + MLWMQR, MLWORK, MLWQR, MINMN, & + OLWDMD, OLWGQR, OLWMQR, OLWORK, & + OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) +! +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DGEMM + EXTERNAL DGEQRF, DLACPY, DLASET, DORGQR, & + DORMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL DSYDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.(WNTVEC .OR. WNTVCF)) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -8 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -9 + ELSE IF ( M < 0 ) THEN + INFO = -10 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -11 + ELSE IF ( LDF < M ) THEN + INFO = -13 + ELSE IF ( LDX < MINMN ) THEN + INFO = -15 + ELSE IF ( LDY < MINMN ) THEN + INFO = -17 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK < N ))) ) THEN + INFO = -18 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -19 + ELSE IF ( LDZ < M ) THEN + INFO = -23 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -26 + ELSE IF ( LDV < N-1 ) THEN + INFO = -28 + ELSE IF ( LDS < N-1 ) THEN + INFO = -30 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWQR = MAX(1,N) ! Minimal workspace length for DGEQRF. + MLWORK = MINMN + MLWQR + IF ( LQUERY ) THEN + CALL DGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, & + INFO1 ) + OLWQR = INT(RDUMMY(1)) + OLWORK = MIN(M,N) + OLWQR + END IF + CALL DSYDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, & + WHTEIG, MINMN, N-1, X, LDX, Y, LDY, NRNK,& + TOL, K, EIGS, Z, LDZ, RES, B, LDB, & + V, LDV, S, LDS, WORK, -1, IWORK, -1, INFO1 ) + MLWDMD = INT(WORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + IMINWR = IWORK(1) + IF ( LQUERY ) THEN + OLWDMD = INT(WORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR) + IF ( LQUERY ) THEN + CALL DORMQR( 'L','N', M, N, MINMN, F, LDF, & + WORK, Z, LDZ, WORK, -1, INFO1 ) + OLWMQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = N + MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR) + IF ( LQUERY ) THEN + CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK, -1, INFO1 ) + OLWGQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR) + END IF + END IF + IMINWR = MAX( 1, IMINWR ) + MLWORK = MAX( 2, MLWORK ) + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -32 + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'DSYDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL DGEQRF( M, N, F, LDF, WORK, & + WORK(MINMN+1), LWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL DLASET( 'L', MINMN, N-1, ZERO, ZERO, X, LDX ) + CALL DLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL DLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL DLASET( 'L', MINMN-2, N-2, ZERO, ZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL DSYDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, & + WHTEIG, MINMN, N-1, X, LDX, Y, LDY, NRNK,& + TOL, K, EIGS, Z, LDZ, RES, B, LDB, V, & + LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & + IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See DSYDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL DLASET( 'A', M-MINMN, K, ZERO, & + ZERO, Z(MINMN+1,1), LDZ ) + CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the inital QR factorization and + ! the SVD/POD_basis returned by DSYDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by DSYDMD. + CALL DLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL DLASET( 'A', M-N, K, ZERO, ZERO, & + Z(N+1,1), LDZ ) + CALL DORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to DSYDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL DLASET( 'A', MINMN, N, ZERO, ZERO, Y, LDY ) + CALL DLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/orthogonal factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL DORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE DSYDMDQ + \ No newline at end of file diff --git a/SRC/la_constants.mod b/SRC/la_constants.mod new file mode 100644 index 0000000000000000000000000000000000000000..613e3ad070fa04464aa66bac4e7de4da730f93c9 GIT binary patch literal 1563 zcmV+$2ITo4iwFP!000006XjdqbD~HPe&?^4H_U@u12dq$t!3Cf>k>>Eb9Z|$78@{2 zoxvQC+b#e4>lqk8;-Hp5q>}7bXyr$L-80kQbT?nW%;t;v*hbGA_jT=||GY5rHvv*L zLq(6F=fs|iR^jFut)1oLCWvAuh@(%d5mtJ=D^$lnMI2lyaJc8cj?^W4Hs}^FeYy;( ziqxJT#GxNV{^J!$D$-D|cQ>C+tm}ulg&yqR?b)Ape`+r*d!Y!Coglu;{$lw3@?cLE z)0vH|8|xOX7~h~fxQ{ZP&&S`EvxkK>UtU{xmVIs6lW&Q>a|_O*{Bn%A01&v40Mc`i zSix5cDZ?0E4ve831}VXyNPYDDwI4j4s6K)}^QnD5op1n3C4gfMuk6v77Bohnu>l(Y zaMml(usDcwg~#*ptp!giD_pd9rtb9i?q)ifF8cT5KP>(L{;bIiKDYkL2857{swtD1 zeZLsPoe!L@;!i`&e?DWCNjR6-JmJW8L7D(^6L_7%sHLd1BQXvD<4fqR{9nz&{0;6h zVK>o)=GiB`UXD*;_x=hWirFLN6rjd|6l_y4P4N**`4P!A8d4|x4rq@lhG2@V7j8P` zZ+zmI9wF8J?mq>u&yB*YUI7dKmYoP-C@%hd)+v#?PJ;_v&wu(6Up%VeE-FoqNn@B^ zqOKw8YZZP1-r=q`ui?dmqDQ+4IEdSLC~Uw_SPlvJi0eQ39K=P^5v|NeG$15y)3Le^ zCum2HD_|e)OQU#1Y!$&}H`cz1Mb)RPLphWIgYuC`#stevKvn9in!_rb%9dK6(R$n&^IWER7B4D?jL!}Wa zN5u$6^t|pMW$)`$zGmo*Wkdk%;!%|sLQ*jV84rm&I0UO!cb!p@*t`72!a0^)>5P%Y zKSJ;F2_rcg6Jr)}2j>%n;DH-VZZqzflnAIlD60>#7j{!!QGZxgAA)S?xc7kM5F=^N#S(BJpxXj$k%X#(S&wNC3SHgOB+g)D zp`Fr5UF3Qs2gB$>J0+32sE?d57>qEqQVKZ}i*u0187$tnR0BB^3^@qKWL%(~;zwQc zUz5*&lW~DoiXJtAsc;PA0j*R!>hep4TNnvwr^<0An8W1_69~YzwGMu zL%h&}ky&`}1EWFq>H|i@!W#k@jk3miFnQ8u&l06@tYxj-Am~{WGzdiW;=-|!bg{yb znKyy5TMhC)NRWnk{~$=CLfaiiSr=aNZPLBQ`6NzTzEb87-;u1R2n3q-;(%a?9u_z@ z^X?Cj28!sL0AVQF4d8er3iTNAqP?&Xt9I`WAo$Es^87wVJl&t>$3fCXf*FoUmY@ZJ zW*JTpj5I|E=Hcl={^TUvmU)V2tTMZa6i8wjou3M~ literal 0 HcmV?d00001 diff --git a/SRC/ssydmd.f90 b/SRC/ssydmd.f90 new file mode 100644 index 0000000000..cc7e47fcc6 --- /dev/null +++ b/SRC/ssydmd.f90 @@ -0,0 +1,1151 @@ + SUBROUTINE SSYDMD( JOBS, JOBZ, JOBR, JOBF, & + WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, & + M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS,& + LWORK, LIWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + REAL(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*), RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! SSYDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible symmetric +! matrix A, SSYDMD computes a certain number of Ritz pairs +! of A using the standard Rayleigh-Ritz extraction from a +! subspace of range(X) that is determined using the leading +! left singular vectors of X. Optionally, SSYDMD returns +! the residuals of the computed Ritz pairs, the information +! needed for a refinement of the Ritz vectors, or the +! eigenvectors of the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: SGESVD (the QR SVD algorithm) +! 2 :: SGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: SGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: SSYEV (the QR algorithm) +! 2 :: SSYEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) REAL(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) REAL(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determinet according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-N array +! If JOBZ =='V' then Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) REAL(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! Note that the EDMD vectors may not be even numerically +! orthogonal and that the non-orthogonality may be +! substantial. +! If JOBF == 'X', then the EDMD vectors +! A*U(:,1:K)*W(1:K,1:K) are orthonormalized. To preserve +! information on the residuals, they are reordered and +! the reordering permutation is stored in the array IWORK. +! If JOBF =='N', then B is not referenced. +! See the descriptions of JOBF, X, W, K, IWORK. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) REAL(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! left singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) REAL(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, WORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then WORK(N+1) and WORK(N+2) contain +! scaling factor WORK(N+2)/WORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to SSYDMD is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! If WHTSVD == 1 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(SSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(SSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N' then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(1,3*N+M,5*N) is the minimal +! workspace length of SGESVD +! If WHTSVD == 2 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(SSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(SSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N + LWORK_SVD, N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(M, 5*N*N+4*N)+3*N*N is the +! minimal workspace length of SGESDD. +! If WHTSVD == 3 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LOWRK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(SSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(SSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = N+M+MAX(3*N+1, +! MAX(1,3*N+M,5*N),MAX(1,N)) +! is the minimal workspace length of SGESVDQ. +! If WHTSVD == 4 :: +! If JOBZ == 'V', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG) +! LWORK_EIG is the work length for the +! symmetric eigenvalue solver. If +! - WHTEIG = 1(SSYEV), LWORK_EIG = MAX(1,3*N-1) +! - WHTEIG = 2(SSYEVD), LWORK_EIG = 1+6*N+2*N**2 +! If JOBZ == 'N', then +! LWORK >= MAX(2, N+LWORK_SVD,N+LWORK_EIG), +! where +! - if WHTEIG = 1, LWORK_EIG = MAX(1,3*N-1) +! - if WHTEIG = 2, LWORK_EIG = 2*N+1 +! Here LWORK_SVD = MAX(7,2*M+N,6*N+2*N*N) is the +! minimal workspace length of SGEJSV. +! Further, if JOBF=='X', then in addition +! LWORK = MAX(LWORK,2*N+N) (for SGEQRF and SORGQR). +! The above expressions are not simplified in order to +! make the usage of WORK more transparent, and for +! easier checking. In any case, LWORK >= 2. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined in two steps. First: +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)). +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1). +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N). +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N). +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + IWRSDD, LWRKEV, LWRSDD, LWRSVD, & + LWRSVQ, MLWORK, MWRKEV, MWRSDD, & + MWRSVD, MWRSVJ, MWRSVQ, NUMRNK, & + OLWORK + LOGICAL :: BADXY, FORWRD, LQUERY, SCCOLX, & + SCCOLY, WNTEX, WNTREF, WNTRES, & + WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2), RDUMMY2(2) +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) SLANGE, SLAMCH, SNRM2 + EXTERNAL SLANGE, SLAMCH, SNRM2, ISAMAX, ISAMIN + INTEGER ISAMAX, ISAMIN + LOGICAL SISNAN, LSAME + EXTERNAL SISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SAXPY, SGEMM, SSCAL + EXTERNAL SSYEV, SSYEVD, SGEJSV, SGEQRF, SGESDD, & + SGESVD, SGESVDQ, SLACPY, SLAPMT, SLASCL, & + SLASSQ, SORGQR, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC INT, FLOAT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) ))THEN + INFO = -5 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -6 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -9 + ELSE IF ( LDX < M ) THEN + INFO = -11 + ELSE IF ( LDY < M ) THEN + INFO = -13 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -14 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -15 + ELSE IF ( LDZ < M ) THEN + INFO = -19 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -22 + ELSE IF ( LDW < N ) THEN + INFO = -24 + ELSE IF ( LDS < N ) THEN + INFO = -26 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of SGESVD: + ! MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + MLWORK = MAX(MLWORK,N + MWRSVD) + IF ( LQUERY ) THEN + CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, & + B, LDB, W, LDW, RDUMMY, -1, INFO1 ) + LWRSVD = MAX( MWRSVD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of SGESDD: + ! MWRSDD = 3*MIN(M,N)*MIN(M,N) + + ! MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + ! IMINWR = 8*MIN(M,N) + MWRSDD = 3*MIN(M,N)*MIN(M,N) + & + MAX( MAX(M,N),5*MIN(M,N)*MIN(M,N)+4*MIN(M,N) ) + MLWORK = MAX(MLWORK,N + MWRSDD) + IMINWR = 8*MIN(M,N) + IF ( LQUERY ) THEN + CALL SGESDD( 'O', M, N, X, LDX, WORK, B, & + LDB, W, LDW, RDUMMY, -1, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD, INT( RDUMMY(1) ) ) + OLWORK = MAX(OLWORK,N + LWRSDD) + END IF + CASE (3) + !LWQP3 = 3*N+1 + !LWORQ = MAX(N, 1) + !MWRSVD = MAX(1,3*MIN(M,N)+MAX(M,N),5*MIN(M,N)) + !MWRSVQ = N + MAX( LWQP3, MWRSVD, LWORQ ) + MAX(M,2) + !MLWORK = N + MWRSVQ + !IMINWR = M+N-1 + CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, RDUMMY, & + -1, RDUMMY2, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(RDUMMY(2)) + INT(RDUMMY2(1)) + MLWORK = MAX(MLWORK,N+MWRSVQ) + IF ( LQUERY ) THEN + LWRSVQ = MAX( MWRSVQ, INT(RDUMMY(1)) ) + OLWORK = MAX(OLWORK,N+LWRSVQ+INT(RDUMMY2(1))) + END IF + CASE (4) + JSVOPT = 'J' + !MWRSVJ = MAX( 7, 2*M+N, 6*N+2*N*N ) ! for JSVOPT='V' + MWRSVJ = MAX( 7, 2*M+N, 4*N+N*N, 2*N+N*N+6 ) + MLWORK = MAX(MLWORK,N+MWRSVJ) + IMINWR = MAX( 3, M+3*N ) + IF ( LQUERY ) THEN + OLWORK = MAX(OLWORK,N+MWRSVJ) + END IF + END SELECT + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + SELECT CASE ( WHTEIG ) + CASE (1) + ! Workspace calculation to the SSYEV call + MWRKEV = MAX( 1, 3*N-1 ) + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL SSYEV( JOBZL, 'U', N, S, LDS, EIGS, RDUMMY, & + -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + END IF + CASE (2) + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = 1 + 6*N + 2*N*N + IWRSDD = 3+5*N + ELSE + MWRKEV = MAX( 1, 2*N+1) + IWRSDD = 1 + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL SSYEVD( JOBZL, 'U', N, S, LDS, EIGS, RDUMMY, & + -1, IWORK, -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(RDUMMY(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + IWRSDD = IWORK(1) + END IF + IMINWR = MAX(IMINWR,IWRSDD) + END SELECT + IF ( LSAME(JOBF,'X') ) THEN + MLWORK = MAX(MLWORK,2*N+N) + ! SGEQRF and SORGQR need >= 2*N locations + IF ( LQUERY ) THEN + CALL SGEQRF( M, N, B, LDB, RDUMMY, RDUMMY, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(RDUMMY(1)) ) + CALL SORGQR( M, N, N, B, LDB, RDUMMY, RDUMMY, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(RDUMMY(1)) ) + END IF + IMINWR = MAX( IMINWR, N ) + END IF + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -28 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'SSYDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!............................................................ +! + OFL = SLAMCH('O') + SMALL = SLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using SLASSQ. + K = 0 + DO i = 1, N + !WORK(i) = DNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL SLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('SSYDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO1 ) + WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + X(1,i), M, INFO1 ) ! LAPACK CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('SSYDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( WORK(i) > ZERO ) THEN + CALL SSCAL( M, ONE/WORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL SLASCL( 'G', 0, 0, -WORK(i), & + ONE/FLOAT(M), M, 1, Y(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( Y(ISAMAX(M, Y(1,i),1),i ) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL SSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using SLASSQ. + DO i = 1, N + !WORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL SLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( SISNAN(SCALE) .OR. SISNAN(SSUM) ) THEN + K = 0 + INFO = -12 + CALL XERBLA('SSYDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL SLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO1 ) + WORK(i) = - SCALE * ( ROOTSC / FLOAT(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + WORK(i) = SCALE * ROOTSC + CALL SLASCL( 'G',0, 0, WORK(i), ONE, M, 1, & + Y(1,i), M, INFO1 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/WORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + WORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( WORK(i) > ZERO ) THEN + CALL SSCAL( M, ONE/WORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/WORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( WORK(i) < ZERO ) THEN + CALL SLASCL( 'G', 0, 0, -WORK(i), & + ONE/FLOAT(M), M, 1, X(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( X(ISAMAX(M, X(1,i),1),i ) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL SGESVD( 'O', 'S', M, N, X, LDX, WORK, B, & + LDB, W, LDW, WORK(N+1), LWORK-N, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (2) + CALL SGESDD( 'O', M, N, X, LDX, WORK, B, LDB, W, & + LDW, WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'T' + CASE (3) + CALL SGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, WORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, WORK(N+MAX(2,M)+1), & + LWORK-N-MAX(2,M), WORK(N+1), MAX(2,M), INFO1) ! LAPACK CALL + CALL SLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'T' + CASE (4) + CALL SGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, WORK, Z, LDZ, W, LDW, & + WORK(N+1), LWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL SLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = WORK(N+1) + XSCL2 = WORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case SGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL SLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( WORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('SSYDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( WORK(i) <= WORK(1)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( WORK(i+1) <= WORK(i)*TOL ) .OR. & + ( WORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( WORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + !<4> Compute the Rayleigh quotient S = U^T * A * U. + ! Depending on the requsted outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^T is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL SSCAL( N, ONE/WORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/WORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that SGESVD, SGESVDQ and SGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL SSCAL( N, ONE/WORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/WORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + WORK(N+i) = ONE/WORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = (WORK(N+i))*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside SSYDMD). + CALL SGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL SGEMM( 'T', 'N', K, K, M, ONE, X, LDX, Z, & + LDZ, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL SGEMM( 'T', 'N', K, N, M, ONE, X, LDX, Y, LDY, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two SGEMM calls here, can use K for LDZ. + CALL SGEMM( 'N', T_OR_N, K, K, N, ONE, Z, LDZ, W, & + LDW, ZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(W(1:K,1:N))) ! INTRINSIC, for T_OR_N=='T' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recal that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL SLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL SLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF + + SELECT CASE ( WHTSYM ) + CASE (1) + CALL SLACPY( 'L', K, K, S, LDS, W, LDW ) + CASE (2) + ! This is the symmetrizer from the piDMD [6], + ! based on a solution of the symmetric Procrustes + ! problem. Here included for comparisons/study and + ! for the sake of completeness. + DO i = 1, K-1 + W(i,i) = S(i,i) + DO j = i+1, K + W(j,i) = ( WORK(i)*(S(j,i)*WORK(i)) + & + WORK(j)*(S(i,j)*WORK(j)) ) / & + ( WORK(i)**2 + WORK(j)**2 ) + END DO + END DO + W(k,k) = S(k,k) + END SELECT + ! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + ! The LAPACK eigensolvers SSYEV and SSYEVD return the + ! eigenvectors in the array that contains upper or + ! lower triangle of the symmetric Rayleigh quotient. + ! + SELECT CASE ( WHTEIG ) + CASE (1) + CALL SSYEV( JOBZL, 'L', K, W, LDW, EIGS, WORK(N+1), & + LWORK-N, INFO1 ) ! LAPACK CALL + CASE (2) + CALL SSYEVD( JOBZL, 'L', K, W, LDW, EIGS, WORK(N+1), & + LWORK-N, IWORK, LIWORK, INFO1 ) ! LAPACK CALL + END SELECT + + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. + IF ( INFO1 > 0 ) THEN + ! SSYEV/SSYEVD failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + ! W is stored in S. ? copy in Q + CALL SGEMM( 'N', 'N', M, K, K, ONE, Z, LDZ, W, & + LDW, ZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL SLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL SGEMM( T_OR_N, 'N', N, K, K, ONE, Z, LDZ, & + W, LDW, ZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + LDS, ZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL SGEMM( 'N', 'N', M, K, N, ONE, Y, LDY, S, & + ! LDS, ZERO, Z, LDZ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL SLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + ! Compute the residuals + CALL SAXPY( M, -EIGS(i), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = SNRM2( M, Y(1,i), 1) ! BLAS CALL + END DO + END IF + END IF +! + IF ( LSAME(JOBF,'X') ) THEN + ! If the Exact DMD eigenvectors are requested, the + ! original EDMD vectors must be orthogonalized. + ! Orthogonalization may change the vector so that the + ! corresponding residuals may increase. (Data driven + ! setting does not allow recomputing the Razleigh + ! quotients.) To preseve the quality of the best EDMD + ! vectors, orthogonalization is prformed in order of + ! increasing residuals. For more details see [4]. + DO i = 1, K + IWORK(i) = i + END DO + CALL SCOPY( K, RES, 1, WORK(N+1), 1 ) + DO i = 1, K-1 + j = ISAMIN( K-i+1, WORK(N+i), 1 ) + i - 1 + IF ( j /= i ) THEN + INFO1 = IWORK(i) + IWORK(i) = IWORK(j) + IWORK(j) = INFO1 + SCALE = WORK(N+i) + WORK(N+i) = WORK(N+j) + WORK(N+j) = SCALE + END IF + END DO + FORWRD = .TRUE. + CALL SLAPMT( FORWRD, M, K, B, LDB, IWORK ) + ! Here we need the Gram-Schmidt orthogonalization + ! of the columns of B. The following two lines + ! use the QR factorization subroutine SGEQRF. This + ! can be replaced with a more efficient Gram-Schmidt + ! implementation. The matrix B is not expected to + ! be ill-conditioned, so Gram-Schmid will be OK. + CALL SGEQRF( M, K, B, LDB, WORK(N+1), WORK(N+K+1), & + LWORK-(N+K), INFO1 ) + CALL SORGQR( M, K, K, B, LDB, WORK(N+1), & + WORK(N+K+1), LWORK-(N+K), INFO1 ) + END IF + + IF ( WHTSVD == 4 ) THEN + WORK(N+1) = XSCL1 + WORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This shouild be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE SSYDMD + \ No newline at end of file diff --git a/SRC/ssydmdq.f90 b/SRC/ssydmdq.f90 new file mode 100644 index 0000000000..580418b7bb --- /dev/null +++ b/SRC/ssydmdq.f90 @@ -0,0 +1,699 @@ +SUBROUTINE SSYDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, WHTSYM, WHTEIG, M, N, F, LDF, & + X, LDX, Y, LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, WORK, LWORK, IWORK, LIWORK, INFO ) +! August 2022 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real32 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, M, N, & + LDF, LDX, LDY, NRNK, LDZ, LDB,& + LDV, LDS, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + REAL(KIND=WP), INTENT(INOUT) :: F(LDF,*) + REAL(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*), RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! SSYDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices X +! and Y such that Y = A*X with an unaccessible symmetric matrix +! A, SSYDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, SSYDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snaphots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretised operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: SGESVD (the QR SVD algorithm) +! 2 :: SGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: SGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: SGEJSV (the precondiioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: SSYEV (the QR algorithm) +! 2 :: SSYEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) REAL(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the orthogonal +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by SGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in WORK(1:N). +! See the description of WORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as worskpace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-mutiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as worskpace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determinet +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) REAL(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) REAL(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-mutiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (workspace/output) REAL(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by SSYEV/SSYEVD. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by SGEQRF of the +! M-by-N input matrix F. +! WORK(MIN(M,N)+1:MIN(M,N)+N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to SSYDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for SGEQRF[M,N]) +! MLWDMD = minimal workspace for SSYDMD (see the +! description of LWORK in SSYDMD) +! MLWMQR = N (minimal workspace for +! SORMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for SORGQR[M,N,N]) +! Then +! LWORK = MAX(MIN(M,N)+MLWQR, N+MLWDMD) +! is updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWMQR ) +! if JOBQ == 'Q' THEN +! LWORK = MAX( LWORK, MIN(M,N)+N-1+MLWGQR) +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined as follows. First: Let N1=N-1 +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N1) +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N1) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N1). +! If on entry LIWORK = -1, then a worskpace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MLWDMD, MLWGQR, & + MLWMQR, MLWORK, MLWQR, MINMN, & + OLWDMD, OLWGQR, OLWMQR, OLWORK, & + OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2) +! +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SGEMM + EXTERNAL SGEQRF, SLACPY, SLASET, SORGQR, & + SORMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL SSYDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.(WNTVEC .OR. WNTVCF)) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -8 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -9 + ELSE IF ( M < 0 ) THEN + INFO = -10 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -11 + ELSE IF ( LDF < M ) THEN + INFO = -13 + ELSE IF ( LDX < MINMN ) THEN + INFO = -15 + ELSE IF ( LDY < MINMN ) THEN + INFO = -17 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK < N ))) ) THEN + INFO = -18 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -19 + ELSE IF ( LDZ < M ) THEN + INFO = -23 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -26 + ELSE IF ( LDV < N-1 ) THEN + INFO = -28 + ELSE IF ( LDS < N-1 ) THEN + INFO = -30 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWQR = MAX(1,N) ! Minimal workspace length for SGEQRF. + MLWORK = MINMN + MLWQR + IF ( LQUERY ) THEN + CALL SGEQRF( M, N, F, LDF, WORK, RDUMMY, -1, & + INFO1 ) + OLWQR = INT(RDUMMY(1)) + OLWORK = MIN(M,N) + OLWQR + END IF + CALL SSYDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, & + WHTEIG, MINMN, N-1, X, LDX, Y, LDY, NRNK,& + TOL, K, EIGS, Z, LDZ, RES, B, LDB, & + V, LDV, S, LDS, WORK, -1, IWORK, -1, INFO1 ) + MLWDMD = INT(WORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + IMINWR = IWORK(1) + IF ( LQUERY ) THEN + OLWDMD = INT(WORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+N-1+MLWMQR) + IF ( LQUERY ) THEN + CALL SORMQR( 'L','N', M, N, MINMN, F, LDF, & + WORK, Z, LDZ, WORK, -1, INFO1 ) + OLWMQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = N + MLWORK = MAX(MLWORK,MINMN+N-1+MLWGQR) + IF ( LQUERY ) THEN + CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK, -1, INFO1 ) + OLWGQR = INT(WORK(1)) + OLWORK = MAX(OLWORK,MINMN+N-1+OLWGQR) + END IF + END IF + IMINWR = MAX( 1, IMINWR ) + MLWORK = MAX( 2, MLWORK ) + IF ( LWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -32 + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -34 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'SSYDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + WORK(1) = MLWORK + WORK(2) = OLWORK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL SGEQRF( M, N, F, LDF, WORK, & + WORK(MINMN+1), LWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL SLASET( 'L', MINMN, N-1, ZERO, ZERO, X, LDX ) + CALL SLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL SLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL SLASET( 'L', MINMN-2, N-2, ZERO, ZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL SSYDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, & + WHTEIG, MINMN, N-1, X, LDX, Y, LDY, NRNK,& + TOL, K, EIGS, Z, LDZ, RES, B, LDB, V, & + LDV, S, LDS, WORK(MINMN+1), LWORK-MINMN, & + IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See SSYDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL SLASET( 'A', M-MINMN, K, ZERO, & + ZERO, Z(MINMN+1,1), LDZ ) + CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the inital QR factorization and + ! the SVD/POD_basis returned by SSYDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by SSYDMD. + CALL SLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL SLASET( 'A', M-N, K, ZERO, ZERO, & + Z(N+1,1), LDZ ) + CALL SORMQR( 'L','N', M, K, MINMN, F, LDF, WORK, Z, & + LDZ, WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to SSYDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL SLASET( 'A', MINMN, N, ZERO, ZERO, Y, LDY ) + CALL SLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/orthogonal factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL SORGQR( M, MINMN, MINMN, F, LDF, WORK, & + WORK(MINMN+N), LWORK-(MINMN+N-1), INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE SSYDMDQ + \ No newline at end of file diff --git a/SRC/zhedmd.f90 b/SRC/zhedmd.f90 new file mode 100644 index 0000000000..8c3c875c76 --- /dev/null +++ b/SRC/zhedmd.f90 @@ -0,0 +1,1171 @@ + SUBROUTINE ZHEDMD( JOBS, JOBZ, JOBR, JOBF, & + WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, EIGS, Z, LDZ, RES, & + B, LDB, W, LDW, S, LDS, & + ZWORK, LZWORK, RWORK, LRWORK, & + IWORK, LIWORK, INFO ) +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, & + M, N, LDX, LDY, & + NRNK, LDZ, LDB, LDW, LDS,& + LIWORK, LRWORK, LZWORK + INTEGER, INTENT(OUT) :: K, INFO + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: X(LDX,*), Y(LDY,*) + COMPLEX(KIND=WP), INTENT(OUT) :: Z(LDZ,*), B(LDB,*), & + W(LDW,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: RWORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!............................................................ +! Purpose +! ======= +! ZHEDMD computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices. For the input matrices +! X and Y such that Y = A*X with an unaccessible Hermitian +! matrix A, ZHEDMD computes a certain number of Ritz pairs +! of A using the standard Rayleigh-Ritz extraction from a +! subspace of range(X) that is determined using the leading +! left singular vectors of X. Optionally, ZHEDMD returns +! the residuals of the computed Ritz pairs, the information +! needed for a refinement of the Ritz vectors, or the +! eigenvectors of the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3], [4]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +!...................................................................... +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!............................................................ +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product X(:,1:K)*W, where X +! contains a POD basis (leading left singular vectors +! of the data matrix X) and W contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of K, X, W, Z. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will be +! computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: ZGESVD (the QR SVD algorithm) +! 2 :: ZGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: ZGEJSV (the preconditioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the Hermitian Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the Hermitian Rayleigh +! quotient. +! 1 :: ZHEEV (the QR algorithm) +! 2 :: ZHEEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M>= 0 +! The state space dimension (the row dimension of X, Y). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshot pairs +! (the number of columns of X and Y). +!..... +! X (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, X contains the data snapshot matrix X. It is +! assumed that the column norms of X are in the range of +! the normalized floating point numbers. +! < On exit, the leading K columns of X contain a POD basis, +! i.e. the leading K left singular vectors of the input +! data matrix X, U(:,1:K). All N columns of X contain all +! left singular vectors of the input matrix X. +! See the descriptions of K, Z and W. +!..... +! LDX (input) INTEGER, LDX >= M +! The leading dimension of the array X. +!..... +! Y (input/workspace/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, Y contains the data snapshot matrix Y +! < On exit, +! If JOBR == 'R', the leading K columns of Y contain +! the residual vectors for the computed Ritz pairs. +! See the description of RES. +! If JOBR == 'N', Y contains the original input data. +!..... +! LDY (input) INTEGER , LDY >= M +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the descriptions of TOL and K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the POD basis for the data snapshot +! matrix X and the number of the computed Ritz pairs. +! The value of K is determinet according to the rule set +! by the parameters NRNK and TOL. +! See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) N-by-1 array +! The leading K (K<=N) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-N array +! If JOBZ =='V' then +! Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) N-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) COMPLEX(KIND=WP) M-by-N array. +! IF JOBF =='R', B(1:M,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:M,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! Note that the EDMD vectors may not be even numerically +! orthogonal and that the non-orthogonality may be +! substantial. +! If JOBF == 'X', then the EDMD vectors +! A*U(:,1:K)*W(1:K,1:K) are orthonormalized. To preserve +! information on the residuals, they are reordered and +! the reordering permutation is stored in the array IWORK. +! If JOBF =='N', then B is not referenced. +! See the descriptions of JOBF, X, W, K, IWORK. +!..... +! LDB (input) INTEGER, LDB >= M +! The leading dimension of the array B. +!..... +! W (workspace/output) COMPLEX(KIND=WP) N-by-N array +! On exit, W(1:K,1:K) contains the K computed +! eigenvectors of the matrix Rayleigh quotient. +! The Ritz vectors (returned in Z) are the +! product of X (containing a POD basis for the input +! matrix X) and W. See the descriptions of K, S, X and Z. +! W is also used as a workspace to temporarily store the +! left singular vectors of X. +!..... +! LDW (input) INTEGER, LDW >= N +! The leading dimension of the array W. +!..... +! S (workspace/output) COMPLEX(KIND=WP) N-by-N array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LZWORK-by-1 array +! ZWORK is used as complex workspace in the complex SVD, as +! specified by WHTSVD (1,2, 3 or 4) and for ZGEEV for computing +! the eigenvalues of a Rayleigh quotient. +! If the call to ZHEDMD is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of ZWORK is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as MAX(LZWORK_SVD, LZWORK_ZHEEV), +! where +! for WHTEIG == 1 (ZHEEV) LZWORK_ZHEEV = MAX(1,2*N-1) +! for WHTEIG == 2 (ZHEEVD) LZWORK_ZHEEV = 2*N+N*N (JOBZ=='V') +! LZWORK_ZHEEV = N+1 (JOBZ=='N') +! and the minimal +! LZWORK_SVD is calculated as follows +! If WHTSVD == 1 :: ZGESVD :: +! LZWORK_SVD = MAX(1,2*MIN(M,N)+MAX(M,N)) +! If WHTSVD == 2 :: ZGESDD :: +! LZWORK_SVD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) +! If WHTSVD == 3 :: ZGESVDQ :: +! LZWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: ZGEJSV :: +! LZWORK_SVD = obtainable by a query +! Further, if JOBF=='X', then LZWORK is +! MAX(LWORK_SVD, LWORK_ZHEEV,2*N+N), where N+2*N is needed +! for ZGEQRF and ZUNGQR. +! If on entry LZWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths and returns them in +! LZWORK(1) and LZORK(2), respectively. +!..... +! RWORK (workspace/output) REAL(KIND=WP) LRWORK-by-1 array +! On exit, RWORK(1:N) contains the singular values of +! X (for JOBS=='N') or column scaled X (JOBS=='S', 'C'). +! If WHTSVD==4, then RWORK(N+1) and RWORK(N+2) contain +! scaling factor RWORK(N+2)/RWORK(N+1) used to scale X +! and Y to avoid overflow in the SVD of X. +! This may be of interest if the scaling option is off +! and as many as possible smallest eigenvalues are +! desired to the highest feasible accuracy. +! If the call to ZHEDMD is only workspace query, then +! RWORK(1) contains the minimal workspace length and +! RWORK(2) is the optimal workspace length. Hence, the +! length of RWORK is at least 2. +! See the description of LRWORK. +!..... +! LRWORK (input) INTEGER +! The minimal length of the workspace vector RWORK. +! LRWORK is calculated as follows: +! LRWORK = MAX(1, N+LRWORK_SVD,N+LRWORK_ZHEEV), where +! RWORK_SVD is the real workspace for the SVD +! subroutine determined by the input parameter +! WHTSVD. +! If WHTSVD == 1 :: ZGESVD :: +! LRWORK_SVD = 5*MIN(M,N) +! If WHTSVD == 2 :: ZGESDD :: +! LRWORK_SVD = MAX(5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), +! 2*MAX(M,N)*MIN(M,N)+2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) +! If WHTSVD == 3 :: ZGESVDQ :: +! LRWORK_SVD = obtainable by a query +! If WHTSVD == 4 :: ZGEJSV :: +! LRWORK_SVD = obtainable by a query +! LRWORK_ZHEEV is the real workspace needed in the +! Hermitian eigensolver. +! If WHTEIG == 1 :: ZHEEV :: LWORK_ZHEEV = 3*N-2 +! If WHTEIG == 2 :: ZHEEVD :: +! If JOBZ == 'V', LWORK_ZHEEV = 1+5*N+2*N*N +! If JOBZ == 'N', LWORK_ZHEEV = N +! In any case, LRWORK >= 2. +! If on entry LRWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined in two steps. First: +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N). +! If on entry LIWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) + +! Local scalars +! ~~~~~~~~~~~~~ + REAL(KIND=WP) :: OFL, ROOTSC, SCALE, SMALL, & + SSUM, XSCL1, XSCL2 + INTEGER :: i, j, IMINWR, INFO1, INFO2, & + IWRSDD, LWRKEV, LWRSDD, LWRSVD, & + LWRSVJ, LWRSVQ, MLWORK, MWRKEV, & + MWRSDD, MWRSVD, MWRSVJ, MWRSVQ, & + NUMRNK, OLWORK, MLRWRK + LOGICAL :: BADXY, FORWRD, LQUERY, SCCOLX, & + SCCOLY, WNTEX, WNTREF, WNTRES, & + WNTVEC + CHARACTER :: JOBZL, T_OR_N + CHARACTER :: JSVOPT + +! Local arrays +! ~~~~~~~~~~~~ + REAL(KIND=WP) :: RDUMMY(2), RDUMMY2(2) +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + REAL(KIND=WP) ZLANGE, DLAMCH, DZNRM2 + EXTERNAL ZLANGE, DLAMCH, DZNRM2, IZAMAX, IDAMIN + INTEGER IZAMAX, IDAMIN + LOGICAL DISNAN, LSAME + EXTERNAL DISNAN, LSAME + +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZAXPY, ZGEMM, ZDSCAL + EXTERNAL ZHEEV, ZHEEVD, ZGEJSV, ZGEQRF, ZGESDD, & + ZGESVD, ZGESVDQ, ZLACPY, ZLAPMT, ZLASCL, & + ZLASSQ, ZUNGQR, XERBLA + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC CMPLX, CONJG, DBLE, INT, MAX, SQRT +!............................................................ +! +! Test the input arguments +! + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME(JOBS,'C') + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + INFO = 0 + LQUERY = ( ( LZWORK == -1 ) .OR. ( LIWORK == -1 ) & + .OR. ( LRWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. LSAME(JOBZ,'N') & + .OR. LSAME(JOBZ,'F')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.WNTVEC) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -4 + ELSE IF ( .NOT.((WHTSVD == 1) .OR. (WHTSVD == 2) .OR. & + (WHTSVD == 3) .OR. (WHTSVD == 4) ))THEN + INFO = -5 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -6 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -7 + ELSE IF ( M < 0 ) THEN + INFO = -8 + ELSE IF ( ( N < 0 ) .OR. ( N > M ) ) THEN + INFO = -9 + ELSE IF ( LDX < M ) THEN + INFO = -11 + ELSE IF ( LDY < M ) THEN + INFO = -13 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK <=N ))) ) THEN + INFO = -14 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -15 + ELSE IF ( LDZ < M ) THEN + INFO = -19 + ELSE IF ( (WNTREF .OR. WNTEX ) .AND. ( LDB < M ) ) THEN + INFO = -22 + ELSE IF ( LDW < N ) THEN + INFO = -24 + ELSE IF ( LDS < N ) THEN + INFO = -26 + END IF +! + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( N == 0 ) THEN + ! Quick return. All output except K is void. + ! INFO=1 signals the void input. + ! In case of a workspace query, the default + ! minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + RWORK(1) = 1 + ZWORK(1) = 2 + ZWORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + MLWORK = MAX(2,N) + OLWORK = MAX(2,N) + IMINWR = 1 + MLRWRK = MAX(1,N) + + SELECT CASE ( WHTSVD ) + CASE (1) + ! The following is specified as the minimal + ! length of WORK in the definition of ZGESVD: + ! MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MWRSVD = MAX(1,2*MIN(M,N)+MAX(M,N)) + MLWORK = MAX(MLWORK,MWRSVD) + MLRWRK = MAX(MLRWRK,N + 5*MIN(M,N)) + IF ( LQUERY ) THEN + CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, & + B, LDB, W, LDW, ZWORK, -1, RDUMMY, INFO1 ) + LWRSVD = INT( ZWORK(1) ) + OLWORK = MAX(OLWORK,LWRSVD) + END IF + CASE (2) + ! The following is specified as the minimal + ! length of WORK in the definition of ZGESDD: + ! MWRSDD = 2*min(M,N)*min(M,N)+2*min(M,N)+max(M,N). + ! RWORK length: 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N) + ! In LAPACK 3.10.1 RWORK is defined differently. + ! Below we take max over the two versions. + ! IMINWR = 8*MIN(M,N) + MWRSDD = 2*MIN(M,N)*MIN(M,N)+2*MIN(M,N)+MAX(M,N) + MLWORK = MAX(MLWORK,MWRSDD) + IMINWR = 8*MIN(M,N) + MLRWRK = MAX( MLRWRK, N + & + MAX( 5*MIN(M,N)*MIN(M,N)+7*MIN(M,N), & + 5*MIN(M,N)*MIN(M,N)+5*MIN(M,N), & + 2*MAX(M,N)*MIN(M,N)+ & + 2*MIN(M,N)*MIN(M,N)+MIN(M,N) ) ) + IF ( LQUERY ) THEN + CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B,LDB,& + W, LDW, ZWORK, -1, RDUMMY, IWORK, INFO1 ) + LWRSDD = MAX( MWRSDD,INT( ZWORK(1) )) + ! Possible bug in ZGESDD optimal workspace size. + OLWORK = MAX(OLWORK,LWRSDD) + END IF + CASE (3) + CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, NUMRNK, & + IWORK, -1, ZWORK, -1, RDUMMY, -1, INFO1 ) + IMINWR = IWORK(1) + MWRSVQ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVQ) + MLRWRK = MAX(MLRWRK,N + INT(RDUMMY(1))) + IF ( LQUERY ) THEN + LWRSVQ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVQ) + END IF + CASE (4) + JSVOPT = 'J' + CALL ZGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, -1, RDUMMY, -1, IWORK, INFO1 ) + IMINWR = IWORK(1) + MWRSVJ = INT(ZWORK(2)) + MLWORK = MAX(MLWORK,MWRSVJ) + MLRWRK = MAX(MLRWRK,N + MAX(7,INT(RDUMMY(1)))) + IF ( LQUERY ) THEN + LWRSVJ = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,LWRSVJ) + END IF + END SELECT + + IF ( WNTVEC .OR. WNTEX .OR. LSAME(JOBZ,'F') ) THEN + JOBZL = 'V' + ELSE + JOBZL = 'N' + END IF + + SELECT CASE ( WHTEIG ) + CASE (1) + ! Workspace calculation to the ZHEEV call + MWRKEV = MAX( 1, 2*N-1 ) + MLWORK = MAX(MLWORK,MWRKEV) + MLRWRK = MAX(MLRWRK,N+MAX(1,3*N-2)) + IF ( LQUERY ) THEN + CALL ZHEEV( JOBZL, 'L', N, S, LDS, EIGS, ZWORK, & + -1, RDUMMY, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(ZWORK(1)) ) + OLWORK = MAX( OLWORK, LWRKEV ) + END IF + CASE (2) + IF ( LSAME(JOBZL,'V') ) THEN + MWRKEV = MAX( 1, 2*N + N*N ) + IWRSDD = MAX( 1, 3+5*N ) + MLRWRK = MAX( MLRWRK, N + (1+5*N+2*N*N)) + ELSE + MWRKEV = MAX( 1, N+1) + IWRSDD = 1 + MLRWRK = MAX( MLRWRK, N + N ) + END IF + MLWORK = MAX(MLWORK,N+MWRKEV) + IF ( LQUERY ) THEN + CALL ZHEEVD( JOBZL, 'U', N, S, LDS, EIGS, ZWORK, & + -1, RDUMMY, -1, IWORK, -1, INFO1 ) ! LAPACK CALL + LWRKEV = MAX( MWRKEV, INT(ZWORK(1)) ) + OLWORK = MAX( OLWORK, N+LWRKEV ) + IWRSDD = IWORK(1) + MLRWRK = MAX(MLRWRK,N+INT(RDUMMY(1))) + ! In ZHEEVD optimal and minimal lengts of the + ! real workspace are the same. + END IF + IMINWR = MAX(IMINWR,IWRSDD) + END SELECT + + IF ( LSAME(JOBF,'X') ) THEN + MLWORK = MAX(MLWORK,N+2*N) + ! ZGEQRF and ZUNGQR need >= 2*N locations + IF ( LQUERY ) THEN + CALL ZGEQRF( M, N, B, LDB, ZWORK, ZWORK, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(ZWORK(1)) ) + CALL ZUNGQR( M, N, N, B, LDB, ZWORK, ZWORK, & + -1, INFO1 ) + OLWORK = MAX( OLWORK, 2*N+INT(ZWORK(1)) ) + END IF + IMINWR = MAX( IMINWR, N ) ! N locations for a permutation + END IF + + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -32 + IF ( LRWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -30 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -28 + END IF +! + IF( INFO /= 0 ) THEN + CALL XERBLA( 'ZHEDMD', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + RWORK(1) = MLRWRK + RETURN + END IF +!............................................................ +! + OFL = DLAMCH('O') + SMALL = DLAMCH('S') + BADXY = .FALSE. +! +! <1> Optional scaling of the snapshots (columns of X, Y) +! ========================================================== + IF ( SCCOLX ) THEN + ! The columns of X will be normalized. + ! To prevent overflows, the column norms of X are + ! carefully computed using ZLASSQ. + K = 0 + DO i = 1, N + !RWORK(i) = DZNRM2( M, X(1,i), 1 ) + SCALE = ZERO + CALL ZLASSQ( M, X(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -10 + CALL XERBLA('ZHEDMD',-INFO) + END IF + IF ( (SCALE /= ZERO) .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of X(:,i) overflows. First, X(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||X(:,i)||_2. +! Next, the norm of X(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of X(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, X(1,i), M, INFO1 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + X(1,i), M, INFO1 ) ! LAPACK CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + K = K + 1 + END IF + END DO + IF ( K == N ) THEN + ! All columns of X are zero. Return error code -8. + ! (the 8th input variable had an illegal value) + K = 0 + INFO = -8 + CALL XERBLA('ZHEDMD',-INFO) + RETURN + END IF + DO i = 1, N +! Now, apply the same scaling to the columns of Y. + IF ( RWORK(i) > ZERO ) THEN + CALL ZDSCAL( M, ONE/RWORK(i), Y(1,i), 1 ) ! BLAS CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL ZLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, Y(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( ABS(Y(IZAMAX(M, Y(1,i),1),i )) & + /= ZERO ) THEN +! X(:,i) is zero vector. For consistency, +! Y(:,i) should also be zero. If Y(:,i) is not +! zero, then the data might be inconsistent or +! corrupted. If JOBS == 'C', Y(:,i) is set to +! zero and a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + IF ( LSAME(JOBS,'C')) & + CALL ZDSCAL( M, ZERO, Y(1,i), 1 ) ! BLAS CALL + END IF + END DO + END IF + ! + IF ( SCCOLY ) THEN + ! The columns of Y will be normalized. + ! To prevent overflows, the column norms of Y are + ! carefully computed using ZLASSQ. + DO i = 1, N + !RWORK(i) = DNRM2( M, Y(1,i), 1 ) + SCALE = ZERO + CALL ZLASSQ( M, Y(1,i), 1, SCALE, SSUM ) + IF ( DISNAN(SCALE) .OR. DISNAN(SSUM) ) THEN + K = 0 + INFO = -12 + CALL XERBLA('ZHEDMD',-INFO) + END IF + IF ( SCALE /= ZERO .AND. (SSUM /= ZERO) ) THEN + ROOTSC = SQRT(SSUM) + IF ( SCALE .GE. (OFL / ROOTSC) ) THEN +! Norm of Y(:,i) overflows. First, Y(:,i) +! is scaled by +! ( ONE / ROOTSC ) / SCALE = 1/||Y(:,i)||_2. +! Next, the norm of Y(:,i) is stored without +! overflow as WORK(i) = - SCALE * (ROOTSC/M), +! the minus sign indicating the 1/M factor. +! Scaling is performed without overflow, and +! underflow may occur in the smallest entries +! of Y(:,i). The relative backward and forward +! errors are small in the ell_2 norm. + CALL ZLASCL( 'G', 0, 0, SCALE, ONE/ROOTSC, & + M, 1, Y(1,i), M, INFO1 ) + RWORK(i) = - SCALE * ( ROOTSC / DBLE(M) ) + ELSE +! X(:,i) will be scaled to unit 2-norm + RWORK(i) = SCALE * ROOTSC + CALL ZLASCL( 'G',0, 0, RWORK(i), ONE, M, 1, & + Y(1,i), M, INFO1 ) ! LAPACK CALL +! Y(1:M,i) = (ONE/RWORK(i)) * Y(1:M,i) ! INTRINSIC + END IF + ELSE + RWORK(i) = ZERO + END IF + END DO + DO i = 1, N +! Now, apply the same scaling to the columns of X. + IF ( RWORK(i) > ZERO ) THEN + CALL ZDSCAL( M, ONE/RWORK(i), X(1,i), 1 ) ! BLAS CALL +! X(1:M,i) = (ONE/RWORK(i)) * X(1:M,i) ! INTRINSIC + ELSE IF ( RWORK(i) < ZERO ) THEN + CALL ZLASCL( 'G', 0, 0, -RWORK(i), & + ONE/DBLE(M), M, 1, X(1,i), M, INFO1 ) ! LAPACK CALL + ELSE IF ( ABS(X(IZAMAX(M, X(1,i),1),i )) & + /= ZERO ) THEN +! Y(:,i) is zero vector. If X(:,i) is not +! zero, then a warning flag is raised. +! The computation continues but the +! situation will be reported in the output. + BADXY = .TRUE. + END IF + END DO + END IF +! +! <2> SVD of the data snapshot matrix X. +! ===================================== +! The left singular vectors are stored in the array X. +! The right singular vectors are in the array W. +! The array W will later on contain the eigenvectors +! of a Rayleigh quotient. + NUMRNK = N + SELECT CASE ( WHTSVD ) + CASE (1) + CALL ZGESVD( 'O', 'S', M, N, X, LDX, RWORK, B, & + LDB, W, LDW, ZWORK, LZWORK, RWORK(N+1), INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (2) + CALL ZGESDD( 'O', M, N, X, LDX, RWORK, B, LDB, W, & + LDW, ZWORK, LZWORK, RWORK(N+1), IWORK, INFO1 ) ! LAPACK CALL + T_OR_N = 'C' + CASE (3) + CALL ZGESVDQ( 'H', 'P', 'N', 'R', 'R', M, N, & + X, LDX, RWORK, Z, LDZ, W, LDW, & + NUMRNK, IWORK, LIWORK, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1) ! LAPACK CALL + CALL ZLACPY( 'A', M, NUMRNK, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'C' + CASE (4) + CALL ZGEJSV( 'F', 'U', JSVOPT, 'N', 'N', 'P', M, & + N, X, LDX, RWORK, Z, LDZ, W, LDW, & + ZWORK, LZWORK, RWORK(N+1), LRWORK-N, IWORK, INFO1 ) ! LAPACK CALL + CALL ZLACPY( 'A', M, N, Z, LDZ, X, LDX ) ! LAPACK CALL + T_OR_N = 'N' + XSCL1 = RWORK(N+1) + XSCL2 = RWORK(N+2) + IF ( XSCL1 /= XSCL2 ) THEN + ! This is an exceptional situation. If the + ! data matrices are not scaled and the + ! largest singular value of X overflows. + ! In that case ZGEJSV can return the SVD + ! in scaled form. The scaling factor can be used + ! to rescale the data (X and Y). + CALL ZLASCL( 'G', 0, 0, XSCL1, XSCL2, M, N, Y, LDY, INFO2 ) + END IF + END SELECT +! + IF ( INFO1 > 0 ) THEN + ! The SVD selected subroutine did not converge. + ! Return with an error code. + INFO = 2 + RETURN + END IF +! + IF ( RWORK(1) == ZERO ) THEN + ! The largest computed singular value of (scaled) + ! X is zero. Return error code -8 + ! (the 8th input variable had an illegal value). + K = 0 + INFO = -8 + CALL XERBLA('ZHEDMD',-INFO) + RETURN + END IF +! + !<3> Determine the numerical rank of the data + ! snapshots matrix X. This depends on the + ! parameters NRNK and TOL. + + SELECT CASE ( NRNK ) + CASE ( -1 ) + K = 1 + DO i = 2, NUMRNK + IF ( ( RWORK(i) <= RWORK(1)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE ( -2 ) + K = 1 + DO i = 1, NUMRNK-1 + IF ( ( RWORK(i+1) <= RWORK(i)*TOL ) .OR. & + ( RWORK(i) <= SMALL ) ) EXIT + K = K + 1 + END DO + CASE DEFAULT + K = 1 + DO i = 2, NRNK + IF ( RWORK(i) <= SMALL ) EXIT + K = K + 1 + END DO + END SELECT + ! Now, U = X(1:M,1:K) is the SVD/POD basis for the + ! snapshot data in the input matrix X. + !<4> Compute the Rayleigh quotient S = U^H * A * U. + ! Depending on the requsted outputs, the computation + ! is organized to compute additional auxiliary + ! matrices (for the residuals and refinements). + ! + ! In all formulas below, we need V_k*Sigma_k^(-1) + ! where either V_k is in W(1:N,1:K), or V_k^H is in + ! W(1:K,1:N). Here Sigma_k=diag(WORK(1:K)). + IF ( LSAME(T_OR_N, 'N') ) THEN + DO i = 1, K + CALL ZDSCAL( N, ONE/RWORK(i), W(1,i), 1 ) ! BLAS CALL + ! W(1:N,i) = (ONE/RWORK(i)) * W(1:N,i) ! INTRINSIC + END DO + ELSE + ! This non-unit stride access is due to the fact + ! that ZGESVD, ZGESVDQ and ZGESDD return the + ! transposed matrix of the right singular vectors. + !DO i = 1, K + ! CALL DSCAL( N, ONE/RWORK(i), W(i,1), LDW ) ! BLAS CALL + ! ! W(i,1:N) = (ONE/RWORK(i)) * W(i,1:N) ! INTRINSIC + !END DO + DO i = 1, K + RWORK(N+i) = ONE/RWORK(i) + END DO + DO j = 1, N + DO i = 1, K + W(i,j) = CMPLX(RWORK(N+i),ZERO,KIND=WP)*W(i,j) + END DO + END DO + END IF +! + IF ( WNTREF ) THEN + ! + ! Need A*U(:,1:K)=Y*V_k*inv(diag(WORK(1:K))) + ! for computing the refined Ritz vectors + ! (optionally, outside ZHEDMD). + CALL ZGEMM( 'N', T_OR_N, M, K, N, ONE, Y, LDY, W, & + LDW, ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C' + ! Z(1:M,1:K)=MATMUL(Y(1:M,1:N),W(1:N,1:K)) ! INTRINSIC, for T_OR_N=='N' + ! + ! At this point Z contains + ! A * U(:,1:K) = Y * V_k * Sigma_k^(-1), and + ! this is needed for computing the residuals. + ! This matrix is returned in the array B and + ! it can be used to compute refined Ritz vectors. + CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) ! BLAS CALL + ! B(1:M,1:K) = Z(1:M,1:K) ! INTRINSIC + + CALL ZGEMM( 'C', 'N', K, K, M, ZONE, X, LDX, Z, & + LDZ, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(TANSPOSE(X(1:M,1:K)),Z(1:M,1:K)) ! INTRINSIC + ! At this point S = U^T * A * U is the Rayleigh quotient. + ELSE + ! A * U(:,1:K) is not explicitly needed and the + ! computation is organized differently. The Rayleigh + ! quotient is computed more efficiently. + CALL ZGEMM( 'C', 'N', K, N, M, ZONE, X, LDX, Y, LDY, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:K,1:N) = MATMUL( TRANSPOSE(X(1:M,1:K)), Y(1:M,1:N) ) ! INTRINSIC + ! In the two ZGEMM calls here, can use K for LDZ. + CALL ZGEMM( 'N', T_OR_N, K, K, N, ZONE, Z, LDZ, W, & + LDW, ZZERO, S, LDS ) ! BLAS CALL + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),TRANSPOSE(CONJG(W(1:K,1:N)))) ! INTRINSIC, for T_OR_N=='C' + ! S(1:K,1:K) = MATMUL(Z(1:K,1:N),(W(1:N,1:K))) ! INTRINSIC, for T_OR_N=='N' + ! At this point S = U^T * A * U is the Rayleigh quotient. + ! If the residuals are requested, save scaled V_k into Z. + ! Recal that V_k or V_k^T is stored in W. + IF ( WNTRES .OR. WNTEX ) THEN + IF ( LSAME(T_OR_N, 'N') ) THEN + CALL ZLACPY( 'A', N, K, W, LDW, Z, LDZ ) + ELSE + CALL ZLACPY( 'A', K, N, W, LDW, Z, LDZ ) + END IF + END IF + END IF + + SELECT CASE ( WHTSYM ) + CASE (1) + CALL ZLACPY( 'L', K, K, S, LDS, W, LDW ) + CASE (2) + ! This is the symmetrizer from the piDMD [6], + ! based on a solution of the symmetric Procrustes + ! problem. Here included for comparisons/study and + ! for the sake of completeness. + DO i = 1, K-1 + W(i,i) = S(i,i) + DO j = i+1, K + W(j,i) = ( RWORK(i)*(CONJG(S(j,i))*RWORK(i)) & + + RWORK(j)*(S(i,j)*RWORK(j)) ) / & + ( RWORK(i)**2 + RWORK(j)**2 ) + END DO + END DO + W(k,k) = S(k,k) + END SELECT + ! + !<5> Compute the Ritz values and (if requested) the + ! right eigenvectors of the Rayleigh quotient. + ! + ! The LAPACK eigensolvers ZHEEV and ZHEEVD return the + ! eigenvectors in the array that contains upper or + ! lower triangle of the symmetric Rayleigh quotient. + ! + SELECT CASE ( WHTEIG ) + CASE (1) + CALL ZHEEV( JOBZL, 'L', K, W, LDW, EIGS, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, INFO1 ) ! LAPACK CALL + CASE (2) + CALL ZHEEVD( JOBZL, 'L', K, W, LDW, EIGS, ZWORK, & + LZWORK, RWORK(N+1), LRWORK-N, IWORK, LIWORK, INFO1 ) ! LAPACK CALL + END SELECT + + ! + ! W(1:K,1:K) contains the eigenvectors of the Rayleigh + ! quotient. + IF ( INFO1 > 0 ) THEN + ! ZHEEV/ZHEEVD failed to compute the eigenvalues and + ! eigenvectors of the Rayleigh quotient. + INFO = 3 + RETURN + END IF +! + ! <6> Compute the eigenvectors (if requested) and, + ! the residuals (if requested). + ! + IF ( WNTVEC .OR. WNTEX ) THEN + IF ( WNTRES ) THEN + IF ( WNTREF ) THEN + ! Here, if the refinement is requested, we have + ! A*U(:,1:K) already computed and stored in Z. + ! For the residuals, need Y = A * U(:,1;K) * W. + ! W is stored in S. + CALL ZGEMM( 'N', 'N', M, K, K, ZONE, Z, LDZ, W, & + LDW, ZZERO, Y, LDY ) ! BLAS CALL + ! Y(1:M,1:K) = Z(1:M,1:K) * W(1:K,1:K) ! INTRINSIC + ! This frees Z; Y contains A * U(:,1:K) * W. + ELSE + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, Z, LDZ) + ! Save a copy of Z into Y and free Z for holding + ! the Ritz vectors. + CALL ZLACPY( 'A', M, K, Z, LDZ, Y, LDY ) + IF ( WNTEX ) CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF + ELSE IF ( WNTEX ) THEN + ! Compute S = V_k * Sigma_k^(-1) * W, where + ! V_k * Sigma_k^(-1) is stored in Z + CALL ZGEMM( T_OR_N, 'N', N, K, K, ZONE, Z, LDZ, & + W, LDW, ZZERO, S, LDS) + ! Then, compute Z = Y * S = + ! = Y * V_k * Sigma_k^(-1) * W(1:K,1:K) = + ! = A * U(:,1:K) * W(1:K,1:K) + CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + LDS, ZZERO, B, LDB ) + ! The above call replaces the following two calls + ! that were used in the developing-testing phase. + ! CALL ZGEMM( 'N', 'N', M, K, N, ZONE, Y, LDY, S, & + ! LDS, ZZERO, Z, LDZ ) + ! Save a copy of Z into B and free Z for holding + ! the Ritz vectors. + ! CALL ZLACPY( 'A', M, K, Z, LDZ, B, LDB ) + END IF +! + ! Compute the Ritz vectors + IF ( WNTVEC ) CALL ZGEMM( 'N', 'N', M, K, K, ZONE, X, LDX, W, LDW, & + ZZERO, Z, LDZ ) ! BLAS CALL + ! Z(1:M,1:K) = MATMUL(X(1:M,1:K), W(1:K,1:K)) ! INTRINSIC +! + IF ( WNTRES ) THEN + DO i = 1, K + ! Compute the residuals + CALL ZAXPY( M, -CMPLX(EIGS(i),KIND=WP), Z(1,i), 1, Y(1,i), 1 ) ! BLAS CALL + ! Y(1:M,i) = Y(1:M,i) - EIGS(i) * Z(1:M,i) ! INTRINSIC + RES(i) = DZNRM2( M, Y(1,i), 1) ! BLAS CALL + END DO + END IF + END IF +! + IF ( LSAME(JOBF,'X') ) THEN + ! If the Exact DMD eigenvectors are requested, the + ! original EDMD vectors must be orthogonalized. + ! Orthogonalization may change the vector so that the + ! corresponding residuals may increase. (Data driven + ! setting does not allow recomputing the Razleigh + ! quotients.) To preseve the quality of the best EDMD + ! vectors, orthogonalization is prformed in order of + ! increasing residuals. For more details see [4]. + DO i = 1, K + IWORK(i) = i + END DO + CALL ZCOPY( K, RES, 1, RWORK(N+1), 1 ) + DO i = 1, K-1 + j = IDAMIN( K-i+1, RWORK(N+i), 1 ) + i - 1 + IF ( j /= i ) THEN + INFO1 = IWORK(i) + IWORK(i) = IWORK(j) + IWORK(j) = INFO1 + SCALE = RWORK(N+i) + RWORK(N+i) = RWORK(N+j) + RWORK(N+j) = SCALE + END IF + END DO + FORWRD = .TRUE. + CALL ZLAPMT( FORWRD, M, K, B, LDB, IWORK ) + ! Here we need the Gram-Schmidt orthogonalization + ! of the columns of B. The following two lines + ! use the QR factorization subroutine ZGEQRF. This + ! can be replaced with a more efficient Gram-Schmidt + ! implementation. The matrix B is not expected to + ! be ill-conditioned, so Gram-Schmid will be OK. + CALL ZGEQRF( M, K, B, LDB, ZWORK, ZWORK(K+1), & + LZWORK-K, INFO1 ) + CALL ZUNGQR( M, K, K, B, LDB, ZWORK, & + ZWORK(K+1), LZWORK-K, INFO1 ) + END IF + + IF ( WHTSVD == 4 ) THEN + RWORK(N+1) = XSCL1 + RWORK(N+2) = XSCL2 + END IF +! +! Successful exit. + IF ( .NOT. BADXY ) THEN + INFO = 0 + ELSE + ! A warning on possible data inconsistency. + ! This shouild be a rare event. + INFO = 4 + END IF +!............................................................ + RETURN +! ...... + END SUBROUTINE ZHEDMD + \ No newline at end of file diff --git a/SRC/zhedmdq.f90 b/SRC/zhedmdq.f90 new file mode 100644 index 0000000000..4a5d6e5b68 --- /dev/null +++ b/SRC/zhedmdq.f90 @@ -0,0 +1,726 @@ +SUBROUTINE ZHEDMDQ( JOBS, JOBZ, JOBR, JOBQ, JOBT, JOBF, & + WHTSVD, WHTSYM, WHTEIG, M, N, F, LDF, & + X, LDX, Y, LDY, NRNK, TOL, K, EIGS, & + Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, LZWORK, WORK, LWORK, & + IWORK, LIWORK, INFO ) +! August 2022 +!..... + USE iso_fortran_env + IMPLICIT NONE + INTEGER, PARAMETER :: WP = real64 +!..... +! Scalar arguments + CHARACTER, INTENT(IN) :: JOBS, JOBZ, JOBR, JOBQ, & + JOBT, JOBF + INTEGER, INTENT(IN) :: WHTSVD, WHTSYM, WHTEIG, M, & + N, LDF, LDX, LDY, & + NRNK, LDZ, LDB, LDV, & + LDS, LZWORK, LWORK, LIWORK + INTEGER, INTENT(OUT) :: INFO, K + REAL(KIND=WP), INTENT(IN) :: TOL +! Array arguments + COMPLEX(KIND=WP), INTENT(INOUT) :: F(LDF,*) + COMPLEX(KIND=WP), INTENT(OUT) :: X(LDX,*), Y(LDY,*), & + Z(LDZ,*), B(LDB,*), & + V(LDV,*), S(LDS,*) + REAL(KIND=WP), INTENT(OUT) :: EIGS(*) + COMPLEX(KIND=WP), INTENT(OUT) :: ZWORK(*) + REAL(KIND=WP), INTENT(OUT) :: RES(*) + REAL(KIND=WP), INTENT(OUT) :: WORK(*) + INTEGER, INTENT(OUT) :: IWORK(*) +!..... +! Purpose +! ======= +! ZHEDMDQ computes the Dynamic Mode Decomposition (DMD) for +! a pair of data snapshot matrices, using a QR factorization +! based compression of the data. For the input matrices +! X and Y, with Y = A*X and an unaccessible Hermitian matrix +! A, ZHEDMDQ computes a certain number of Ritz pairs of A using +! the standard Rayleigh-Ritz extraction from a subspace of +! range(X) that is determined using the leading left singular +! vectors of X. Optionally, ZHEDMDQ returns the residuals +! of the computed Ritz pairs, the information needed for +! a refinement of the Ritz vectors, or the eigenvectors of +! the Exact DMD. +! For furter details see the references listed below. +! For more details of the implementation see [3]. +! +! References +! ========== +! [1] P. Schmid: Dynamic mode decomposition of numerical +! and experimental data, +! Journal of Fluid Mechanics 656, 5-28, 2010. +! [2] Z. Drmac, I. Mezic, R. Mohr: Data driven modal +! decompositions: analysis and enhancements, +! SIAM J. on Sci. Comp. 40 (4), A2253-A2285, 2018. +! [3] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition I. Technical report. AIMDyn Inc. +! October 2022, and LAPACK Working Note 298. +! [4] Z. Drmac: A LAPACK implementation of the Dynamic +! Mode Decomposition II. The symmetric/Hermitian DMD +! (xSYDMD/xHEDMD) Technical report. AIMDyn Inc. +! November 2022. LAPACK Working Note 300. +! [5] J. Tu, C. W. Rowley, D. M. Luchtenburg, S. L. +! Brunton, N. Kutz: On Dynamic Mode Decomposition: +! Theory and Applications, Journal of Computational +! Dynamics 1(2), 391 -421, 2014. +! [6] P. J. Baddoo, B. Herrmann, B. J. McKeon, +! J. N. Kutz, S. L. Brunton: Physics-informed +! dynamic mode decomposition (piDMD), arXiv:2112.04307. +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! and supported by +! - DARPA SBIR project "Koopman Operator-Based Forecasting +! for Nonstationary Processes from Near-Term, Limited +! Observational Data" Contract No: W31P4Q-21-C-0007 +! - DARPA PAI project "Physics-Informed Machine Learning +! Methodologies" Contract No: HR0011-18-9-0033 +! - DARPA MoDyL project "A Data-Driven, Operator-Theoretic +! Framework for Space-Time Analysis of Process Dynamics" +! Contract No: HR0011-16-C-0116 +! Any opinions, findings and conclusions or recommendations +! expressed in this material are those of the author and +! do not necessarily reflect the views of the DARPA SBIR +! Program Office. +!============================================================ +! Distribution Statement A: +! Approved for Public Release, Distribution Unlimited. +! +!============================================================ +!...................................................................... +! Arguments +! ========= +! JOBS (input) CHARACTER*1 +! Determines whether the initial data snapshots are scaled +! by a diagonal matrix. The data snaphots are the columns +! of F. The leading N-1 columns of F are denoted X and the +! trailing N-1 columns are denoted Y. +! 'S' :: The data snapshots matrices X and Y are multiplied +! with a diagonal matrix D so that X*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'C' :: The snapshots are scaled as with the 'S' option. +! If it is found that an i-th column of X is zero +! vector and the corresponding i-th column of Y is +! non-zero, then the i-th column of Y is set to +! zero and a warning flag is raised. +! 'Y' :: The data snapshots matrices X and Y are multiplied +! by a diagonal matrix D so that Y*D has unit +! nonzero columns (in the Euclidean 2-norm) +! 'N' :: No data scaling. +!..... +! JOBZ (input) CHARACTER*1 +! Determines whether the eigenvectors (Koopman modes) will +! be computed. +! 'V' :: The eigenvectors (Koopman modes) will be computed +! and returned in the matrix Z. +! See the description of Z. +! 'F' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Z*V, where Z +! is orthonormal and V contains the eigenvectors +! of the corresponding Rayleigh quotient. +! See the descriptions of V, Z. +! 'Q' :: The eigenvectors (Koopman modes) will be returned +! in factored form as the product Q*Z, where Z +! contains the eigenvectors of the compression of the +! underlying discretised operator onto the span of +! the data snapshots. See the descriptions of F, V, Z. +! Q is from the initial QR factorization. +! 'N' :: The eigenvectors are not computed. +!..... +! JOBR (input) CHARACTER*1 +! Determines whether to compute the residuals. +! 'R' :: The residuals for the computed eigenpairs will +! be computed and stored in the array RES. +! See the description of RES. +! For this option to be legal, JOBZ must be 'V'. +! 'N' :: The residuals are not computed. +!..... +! JOBQ (input) CHARACTER*1 +! Specifies whether to explicitly compute and return the +! orthogonal matrix from the QR factorization. +! 'Q' :: The matrix Q of the QR factorization of the data +! snapshot matrix is computed and stored in the +! array F. See the description of F. +! 'N' :: The matrix Q is not explicitly computed. +!..... +! JOBT (input) CHARACTER*1 +! Specifies whether to return the upper triangular factor +! from the QR factorization. +! 'R' :: The matrix R of the QR factorization of the data +! snapshot matrix F is returned in the array Y. +! See the description of Y and Further details. +! 'N' :: The matrix R is not returned. +!..... +! JOBF (input) CHARACTER*1 +! Specifies whether to store information needed for post- +! processing (e.g. computing refined Ritz vectors) +! 'R' :: The matrix needed for the refinement of the Ritz +! vectors is computed and stored in the array B. +! See the description of B. +! 'E' :: The unscaled eigenvectors of the Exact DMD are +! computed and returned in the array B. See the +! description of B. +! 'X' :: The Exact DMD vectors are orthogonalized and +! returned in the array B. To preserve the +! residuals of the orthogonalized EDMD vectors +! they are reordered and the reordering permutation +! is stored and returned in the array IWORK. +! See the descriptions of B and IWORK, and [4]. +! 'N' :: No eigenvector refinement data is computed. +! To be useful on exit, this option needs JOBQ='Q'. +!..... +! WHTSVD (input) INTEGER, WHSTVD in { 1, 2, 3, 4 } +! Allows for a selection of the SVD algorithm from the +! LAPACK library. +! 1 :: ZGESVD (the QR SVD algorithm) +! 2 :: ZGESDD (the Divide and Conquer algortihm; if enough +! workspace available, this is the fastest option) +! 3 :: ZGESVDQ (the preconditioned QR SVD ; this and 4 +! are the most accurate options) +! 4 :: ZGEJSV (the precondiioned Jacobi SVD; this and 3 +! are the most accurate options) +! For the four methods above, a significant difference in +! the accuracy of small singular values is possible if +! the snapshots vary in norm so that X is severely +! ill-conditioned. If small (smaller than EPS*||X||) +! singular values are of interest and JOBS=='N', then +! the options (3, 4) give the most accurate results, where +! the option 4 is slightly better and with stronger +! theoretical background. +! If JOBS=='S', i.e. the columns of X will be normalized, +! then all methods give nearly equally accurate results. +!..... +! WHTSYM (input) INTEGER +! Specifies the method for restoring the symmetry of the +! Rayleigh quotient. +! 1 :: The lower triangle of the computed Rayleigh +! quotient is used to symmetrize the matrix, +! 2 :: The formulas for the lower triangle of a +! truncated solution of the symmetric Procrustes +! problem are used to symmetrize the computed +! Rayleigh quotient. +!..... +! WHTEIG (input) INTEGER +! Specifies the symmetric eigensolver to compute the +! eigenvalues and eigenvectors of the symmetric Rayleigh +! quotient. +! 1 :: ZHEEV (the QR algorithm) +! 2 :: ZHEEVD (the divide and conquer algorithm) +!..... +! M (input) INTEGER, M >= 0 +! The state space dimension (the number of rows of F). +!..... +! N (input) INTEGER, 0 <= N <= M +! The number of data snapshots from a single trajectory, +! taken at equidistant discrete times. This is the +! number of columns of F. +!..... +! F (input/output) COMPLEX(KIND=WP) M-by-N array +! > On entry, +! the columns of F are the sequence of data snapshots +! from a single trajectory, taken at equidistant discrete +! times. It is assumed that the column norms of F are +! in the range of the normalized floating point numbers. +! < On exit, +! If JOBQ == 'Q', the array F contains the unitary +! matrix/factor of the QR factorization of the initial +! data snapshots matrix F. See the description of JOBQ. +! If JOBQ == 'N', the entries in F strictly below the main +! diagonal contain, column-wise, the information on the +! Householder vectors, as returned by ZGEQRF. The +! remaining information to restore the orthogonal matrix +! of the initial QR factorization is stored in ZWORK(1:N). +! See the description of ZWORK. +!..... +! LDF (input) INTEGER, LDF >= M +! The leading dimension of the array F. +!..... +! X (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! X is used as worskpace to hold representations of the +! leading N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, the leading K columns of X contain the leading +! K left singular vectors of the above described content +! of X. To lift them to the space of the left singular +! vectors U(:,1:K) of the input data, pre-mutiply with the +! Q factor from the initial QR factorization. +! See the descriptions of F, K, V and Z. +!..... +! LDX (input) INTEGER, LDX >= N +! The leading dimension of the array X. +!..... +! Y (workspace/output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array +! Y is used as worskpace to hold representations of the +! trailing N-1 snapshots in the orthonormal basis computed +! in the QR factorization of F. +! On exit, +! If JOBT == 'R', Y contains the MIN(M,N)-by-N upper +! triangular factor from the QR factorization of the data +! snapshot matrix F. +!..... +! LDY (input) INTEGER , LDY >= N +! The leading dimension of the array Y. +!..... +! NRNK (input) INTEGER +! Determines the mode how to compute the numerical rank, +! i.e. how to truncate small singular values of the input +! matrix X. On input, if +! NRNK = -1 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(1) +! NRNK = -2 :: i-th singular value sigma(i) is truncated +! if sigma(i) <= TOL*sigma(i-1) +! The numerical rank can be enforced by using positive +! value of NRNK as follows: +! 0 < NRNK <= N-1 :: at most NRNK largest singular values +! will be used. If the number of the computed nonzero +! singular values is less than NRNK, then only those +! nonzero values will be used and the actually used +! dimension is less than NRNK. The actual number of +! the nonzero singular values is returned in the variable +! K. See the description of K. +!..... +! TOL (input) REAL(KIND=WP), 0 <= TOL < 1 +! The tolerance for truncating small singular values. +! See the description of NRNK. +!..... +! K (output) INTEGER, 0 <= K <= N +! The dimension of the SVD/POD basis for the leading N-1 +! data snapshots (columns of F) and the number of the +! computed Ritz pairs. The value of K is determinet +! according to the rule set by the parameters NRNK and +! TOL. See the descriptions of NRNK and TOL. +!..... +! EIGS (output) REAL(KIND=WP) (N-1)-by-1 array +! The leading K (K<=N-1) entries of EIGS contain +! the computed eigenvalues in ascending order. +! If the eigenvectors are requested, then Z(:,i) +! corresponds to EIGS(i). If JOBF == 'X', then +! orthonormalised Exact DMD vectors are stored +! in the array B and to the eigenvector B(:,i) +! the corresponding eigenvalue is EIGS(IWORK(i)). +! See the descriptions of K, Z, B and IWORK. +!..... +! Z (workspace/output) COMPLEX(KIND=WP) M-by-(N-1) array +! If JOBZ =='V' then +! Z contains Ritz vectors. +! If JOBZ == 'F', then the above descriptions hold for +! the columns of X(:,1:K)*W(1:K,1:K), where the columns +! of W(1:k,1:K) are the computed eigenvectors of the +! K-by-K Rayleigh quotient. +! See the descriptions of EIGS, X and W. +!..... +! LDZ (input) INTEGER , LDZ >= M +! The leading dimension of the array Z. +!..... +! RES (output) REAL(KIND=WP) (N-1)-by-1 array +! RES(1:K) contains the residuals for the K computed +! Ritz pairs. +! RES(i) = || A * Z(:,i) - EIGS(i)*Z(:,i))||_2. +! If JOBF == 'X', the array IWORK on exit +! contains the permutation that sorts RES in +! ascending order. +! See the description of JOBF, EIGS, Z and IWORK. +!..... +! B (output) COMPLEX(KIND=WP) MIN(M,N)-by-(N-1) array. +! IF JOBF =='R', B(1:N,1:K) contains A*U(:,1:K), and can +! be used for computing the refined vectors; see further +! details in the provided references. +! If JOBF == 'E', B(1:N,1;K) contains +! A*U(:,1:K)*W(1:K,1:K), which are the vectors from the +! Exact DMD, up to scaling by the inverse eigenvalues. +! In both cases, the content of B can be lifted to the +! original dimension of the input data by pre-mutiplying +! with the Q factor from the initial QR factorization. +! Here A denotes a compression of the underlying operator. +! See the descriptions of F and X. +! If JOBF =='N', then B is not referenced. +!..... +! LDB (input) INTEGER, LDB >= MIN(M,N) +! The leading dimension of the array B. +!..... +! V (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! On exit, V(1:K,1:K) V contains the K eigenvectors of +! the Rayleigh quotient. The Ritz vectors +! (returned in Z) are the product of X and V; see +! the descriptions of X and Z. +!..... +! LDV (input) INTEGER, LDV >= N-1 +! The leading dimension of the array V. +!..... +! S (workspace/output) COMPLEX(KIND=WP) (N-1)-by-(N-1) array +! The array S(1:K,1:K) is used for the matrix Rayleigh +! quotient. This content is overwritten during +! the eigenvalue decomposition by ZHEEV/ZHEEVD. +! See the description of K. +!..... +! LDS (input) INTEGER, LDS >= N-1 +! The leading dimension of the array S. +!..... +! ZWORK (workspace/output) COMPLEX(KIND=WP) LWORK-by-1 array +! On exit, +! ZWORK(1:MIN(M,N)) contains the scalar factors of the +! elementary reflectors as returned by ZGEQRF of the +! M-by-N input matrix F. +! If the call to ZHEDMDQ is only workspace query, then +! ZWORK(1) contains the minimal complex workspace length and +! ZWORK(2) is the optimal complex workspace length. +! Hence, the length of ZWORK is at least 2. +! See the description of LZWORK. +!..... +! LZWORK (input) INTEGER +! The minimal length of the workspace vector ZWORK. +! LZWORK is calculated as follows: +! Let MLWQR = N (minimal workspace for ZGEQRF[M,N]) +! MLWDMD = minimal workspace for ZHEDMD (see the +! description of LWORK in ZHEDMD) +! MLWMQR = N (minimal workspace for +! ZUNMQR['L','N',M,N,N]) +! MLWGQR = N (minimal workspace for ZUNGQR[M,N,N]) +! MINMN = MIN(M,N) +! Then +! LZWORK = MAX(2, MIN(M,N)+MLWQR, MINMN+MLWDMD) +! is further updated as follows: +! if JOBZ == 'V' or JOBZ == 'F' THEN +! LZWORK = MAX( LZWORK, MINMN+MLWMQR ) +! if JOBQ == 'Q' THEN +! LZWORK = MAX( ZLWORK, MINMN+MLWGQR) +!..... +! WORK (workspace/output) REAL(KIND=WP) LWORK-by-1 array +! On exit, +! WORK(1:N-1) contains the singular values of +! the input submatrix F(1:M,1:N-1). +! If the call to ZHEDMDQ is only workspace query, then +! WORK(1) contains the minimal workspace length and +! WORK(2) is the optimal workspace length. Hence, the +! length of WORK is at least 2. +! See the description of LWORK. +!..... +! LWORK (input) INTEGER +! The minimal length of the workspace vector WORK. +! LWORK is the same as in ZHEDMD, because in ZHEDMDQ +! only ZHEDMD requiers real workspace. +! If on entry LWORK = -1, then a workspace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! IWORK (workspace/output) INTEGER LIWORK-by-1 array +! Workspace that is required if WHTSVD equals +! 2 , 3 or 4. Further, if JOBF=='X', it is used to return +! ordering of the orthonormalized Exact DMD eigenvectors, +! so that EIGS(IWORK(i)) is the eigenvalue that corresponds to +! the i-th EDMD vector. See the descriptions of JOBF and B. +! If on entry LWORK =-1 or LIWORK=-1, then the +! minimal length of IWORK is computed and returned in +! IWORK(1). See the description of LIWORK. +!..... +! LIWORK (input) INTEGER +! The minimal length of the workspace vector IWORK. +! LIWORK is determined as follows. First: Let N1=N-1 +! If WHTSVD == 1, then only IWORK(1) is used; LIWORK >=1 +! If WHTSVD == 2, then LIWORK >= MAX(1,8*MIN(M,N1)) +! If WHTSVD == 3, then LIWORK >= MAX(1,M+N1-1) +! If WHTSVD == 4, then LIWORK >= MAX(3,M+3*N1) +! If WHTEIG == 2 and JOBZ == 'V', then LIWORK >= MAX(1,3+5*N1) +! Then, if JOBF == 'X', then LIWORK = MAX(LIWORK,N1). +! If on entry LIWORK = -1, then a worskpace query is +! assumed and the procedure only computes the minimal +! and the optimal workspace lengths for both WORK and +! IWORK. See the descriptions of WORK and IWORK. +!..... +! INFO (output) INTEGER +! -i < 0 :: On entry, the i-th argument had an +! illegal value +! = 0 :: Successful return. +! = 1 :: Void input. Quick exit (M=0 or N=0). +! = 2 :: The SVD computation of X did not converge. +! Suggestion: Check the input data and/or +! repeat with different WHTSVD. +! = 3 :: The computation of the eigenvalues did not +! converge. +! = 4 :: If data scaling was requested on input and +! the procedure found inconsistency in the data +! such that for some column index i, +! X(:,i) = 0 but Y(:,i) /= 0, then Y(:,i) is set +! to zero if JOBS=='C'. The computation proceeds +! with original or modified data and warning +! flag is set with INFO=4. +!............................................................. +!............................................................. +! Parameters +! ~~~~~~~~~~ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +! +! Local scalars +! ~~~~~~~~~~~~~ + INTEGER :: IMINWR, INFO1, MINMN, MLRWRK, & + MLWDMD, MLWGQR, MLWMQR, MLWORK, & + MLWQR, OLWDMD, OLWGQR, OLWMQR, & + OLWORK, OLWQR + LOGICAL :: LQUERY, SCCOLX, SCCOLY, WANTQ, & + WNTTRF, WNTRES, WNTVEC, WNTVCF, & + WNTVCQ, WNTREF, WNTEX + CHARACTER(LEN=1) :: JOBVL +! +! Local array +! ~~~~~~~~~~~ +! REAL(KIND=WP) :: RDUMMY(2) +! +! External funcions (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~ + LOGICAL LSAME + EXTERNAL LSAME +! +! External subroutines (BLAS and LAPACK) +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZGEQRF, ZLACPY, ZLASET, ZUNGQR, & + ZUNMQR, XERBLA + +! External subroutines +! ~~~~~~~~~~~~~~~~~~~~ + EXTERNAL ZHEDMD + +! Intrinsic functions +! ~~~~~~~~~~~~~~~~~~~ + INTRINSIC MAX, MIN, INT + !.......................................................... + ! + ! Test the input arguments + WNTRES = LSAME(JOBR,'R') + SCCOLX = LSAME(JOBS,'S') .OR. LSAME( JOBS, 'C' ) + SCCOLY = LSAME(JOBS,'Y') + WNTVEC = LSAME(JOBZ,'V') + WNTVCF = LSAME(JOBZ,'F') + WNTVCQ = LSAME(JOBZ,'Q') + WNTREF = LSAME(JOBF,'R') + WNTEX = LSAME(JOBF,'E') .OR. LSAME(JOBF,'X') + WANTQ = LSAME(JOBQ,'Q') + WNTTRF = LSAME(JOBT,'R') + MINMN = MIN(M,N) + INFO = 0 + LQUERY = ( ( LWORK == -1 ) .OR. ( LIWORK == -1 ) ) +! + IF ( .NOT. (SCCOLX .OR. SCCOLY .OR. & + LSAME(JOBS,'N')) ) THEN + INFO = -1 + ELSE IF ( .NOT. (WNTVEC .OR. WNTVCF .OR. WNTVCQ & + .OR. LSAME(JOBZ,'N')) ) THEN + INFO = -2 + ELSE IF ( .NOT. (WNTRES .OR. LSAME(JOBR,'N')) .OR. & + ( WNTRES .AND. (.NOT.(WNTVEC .OR. WNTVCF)) ) ) THEN + INFO = -3 + ELSE IF ( .NOT. (WANTQ .OR. LSAME(JOBQ,'N')) ) THEN + INFO = -4 + ELSE IF ( .NOT. ( WNTTRF .OR. LSAME(JOBT,'N') ) ) THEN + INFO = -5 + ELSE IF ( .NOT. (WNTREF .OR. WNTEX .OR. & + LSAME(JOBF,'N') ) ) THEN + INFO = -6 + ELSE IF ( .NOT. ((WHTSVD == 1).OR.(WHTSVD == 2).OR. & + (WHTSVD == 3).OR.(WHTSVD == 4)) ) THEN + INFO = -7 + ELSE IF ( .NOT.((WHTSYM == 1) .OR. (WHTSYM == 2))) THEN + INFO = -8 + ELSE IF ( .NOT.((WHTEIG == 1) .OR. (WHTEIG == 2))) THEN + INFO = -9 + ELSE IF ( M < 0 ) THEN + INFO = -10 + ELSE IF ( ( N < 0 ) .OR. ( N > M+1 ) ) THEN + INFO = -11 + ELSE IF ( LDF < M ) THEN + INFO = -13 + ELSE IF ( LDX < MINMN ) THEN + INFO = -15 + ELSE IF ( LDY < MINMN ) THEN + INFO = -17 + ELSE IF ( .NOT. (( NRNK == -2).OR.(NRNK == -1).OR. & + ((NRNK >= 1).AND.(NRNK < N ))) ) THEN + INFO = -18 + ELSE IF ( ( TOL < ZERO ) .OR. ( TOL >= ONE ) ) THEN + INFO = -19 + ELSE IF ( LDZ < M ) THEN + INFO = -23 + ELSE IF ( (WNTREF.OR.WNTEX ).AND.( LDB < MINMN ) ) THEN + INFO = -26 + ELSE IF ( LDV < N-1 ) THEN + INFO = -28 + ELSE IF ( LDS < N-1 ) THEN + INFO = -30 + END IF +! + IF ( WNTVEC .OR. WNTVCF .OR. WNTVCQ ) THEN + JOBVL = 'V' + ELSE + JOBVL = 'N' + END IF + IF ( INFO == 0 ) THEN + ! Compute the minimal and the optimal workspace + ! requirements. Simulate running the code and + ! determine minimal and optimal sizes of the + ! workspace at any moment of the run. + IF ( ( N == 0 ) .OR. ( N == 1 ) ) THEN + ! All output except K is void. INFO=1 signals + ! the void input. In case of a workspace query, + ! the minimal workspace lengths are returned. + IF ( LQUERY ) THEN + IWORK(1) = 1 + WORK(1) = 2 + WORK(2) = 2 + ELSE + K = 0 + END IF + INFO = 1 + RETURN + END IF + + MLRWRK = 1 + MLWORK = 2 + OLWORK = 2 + IMINWR = 1 + MLWQR = MAX(1,N) ! Minimal workspace length for ZGEQRF. + MLWORK = MAX(MLWORK,MINMN + MLWQR) + + IF ( LQUERY ) THEN + CALL ZGEQRF( M, N, F, LDF, ZWORK, ZWORK, -1, & + INFO1 ) + OLWQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN + OLWQR) + END IF + CALL ZHEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, WHTEIG, MINMN,& + N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK, -1, WORK, -1, IWORK, -1, INFO1 ) + MLWDMD = INT(ZWORK(1)) + MLWORK = MAX(MLWORK, MINMN + MLWDMD) + MLRWRK = MAX(MLRWRK,INT(WORK(1))) + IMINWR = MAX(IMINWR,IWORK(1)) + IF ( LQUERY ) THEN + OLWDMD = INT(ZWORK(2)) + OLWORK = MAX(OLWORK, MINMN+OLWDMD) + END IF + IF ( WNTVEC .OR. WNTVCF ) THEN + MLWMQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWMQR) + IF ( LQUERY ) THEN + CALL ZUNMQR( 'L','N', M, N, MINMN, F, LDF, & + ZWORK, Z, LDZ, ZWORK, -1, INFO1 ) + OLWMQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWMQR) + END IF + END IF + IF ( WANTQ ) THEN + MLWGQR = MAX(1,N) + MLWORK = MAX(MLWORK,MINMN+MLWGQR) + IF ( LQUERY ) THEN + CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK, -1, INFO1 ) + OLWGQR = INT(ZWORK(1)) + OLWORK = MAX(OLWORK,MINMN+OLWGQR) + END IF + END IF + IF ( LIWORK < IMINWR .AND. (.NOT.LQUERY) ) INFO = -36 + IF ( LWORK < MLRWRK .AND. (.NOT.LQUERY) ) INFO = -34 + IF ( LZWORK < MLWORK .AND. (.NOT.LQUERY) ) INFO = -32 + END IF + IF( INFO /= 0 ) THEN + CALL XERBLA( 'ZHEDMDQ', -INFO ) + RETURN + ELSE IF ( LQUERY ) THEN +! Return minimal and optimal workspace sizes + IWORK(1) = IMINWR + ZWORK(1) = MLWORK + ZWORK(2) = OLWORK + WORK(1) = MLRWRK + RETURN + END IF +!..... +! Initial QR factorization that is used to represent the +! snapshots as elements of lower dimensional subspace. +! For large scale computation with M >>N , at this place +! one can use an out of core QRF. +! + CALL ZGEQRF( M, N, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) +! +! Define X and Y as the snapshots representations in the +! orthogonal basis computed in the QR factorization. +! X corresponds to the leading N-1 and Y to the trailing +! N-1 snapshots. + CALL ZLASET( 'L', MINMN, N-1, ZZERO, ZZERO, X, LDX ) + CALL ZLACPY( 'U', MINMN, N-1, F, LDF, X, LDX ) + CALL ZLACPY( 'A', MINMN, N-1, F(1,2), LDF, Y, LDY ) + IF ( M >= 3 ) THEN + CALL ZLASET( 'L', MINMN-2, N-2, ZZERO, ZZERO, & + Y(3,1), LDY ) + END IF +! +! Compute the DMD of the projected snapshot pairs (X,Y) + CALL ZHEDMD( JOBS, JOBVL, JOBR, JOBF, WHTSVD, WHTSYM, WHTEIG, & + MINMN, N-1, X, LDX, Y, LDY, NRNK, TOL, K, & + EIGS, Z, LDZ, RES, B, LDB, V, LDV, & + S, LDS, ZWORK(MINMN+1), LZWORK-MINMN, & + WORK, LWORK, IWORK, LIWORK, INFO1 ) + IF ( INFO1 == 2 .OR. INFO1 == 3 ) THEN + ! Return with error code. See ZHEDMD for details. + INFO = INFO1 + RETURN + ELSE + INFO = INFO1 + END IF +! +! The Ritz vectors (Koopman modes) can be explicitly +! formed or returned in factored form. + IF ( WNTVEC ) THEN + ! Compute the eigenvectors explicitly. + IF ( M > MINMN ) CALL ZLASET( 'A', M-MINMN, K, ZZERO, & + ZZERO, Z(MINMN+1,1), LDZ ) + CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + ELSE IF ( WNTVCF ) THEN + ! Return the Ritz vectors (eigenvectors) in factored + ! form Z*V, where Z contains orthonormal matrix (the + ! product of Q from the inital QR factorization and + ! the SVD/POD_basis returned by ZHEDMD in X) and the + ! second factor (the eigenvectors of the Rayleigh + ! quotient) is in the array V, as returned by ZHEDMD. + CALL ZLACPY( 'A', N, K, X, LDX, Z, LDZ ) + IF ( M > N ) CALL ZLASET( 'A', M-N, K, ZZERO, ZZERO, & + Z(N+1,1), LDZ ) + CALL ZUNMQR( 'L','N', M, K, MINMN, F, LDF, ZWORK, Z, & + LDZ, ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! +! Some optional output variables: +! +! The upper triangular factor R in the initial QR +! factorization is optionally returned in the array Y. +! This is useful if this call to ZHEDMDQ is to be +! followed by a streaming DMD that is implemented in a +! QR compressed form. + IF ( WNTTRF ) THEN ! Return the upper triangular R in Y + CALL ZLASET( 'A', MINMN, N, ZZERO, ZZERO, Y, LDY ) + CALL ZLACPY( 'U', MINMN, N, F, LDF, Y, LDY ) + END IF +! +! The orthonormal/unitary factor Q in the initial QR +! factorization is optionally returned in the array F. +! Same as with the triangular factor above, this is +! useful in a streaming DMD. + IF ( WANTQ ) THEN ! Q overwrites F + CALL ZUNGQR( M, MINMN, MINMN, F, LDF, ZWORK, & + ZWORK(MINMN+1), LZWORK-MINMN, INFO1 ) + END IF +! + RETURN +! + END SUBROUTINE ZHEDMDQ + \ No newline at end of file diff --git a/TESTING/EIG/Makefile b/TESTING/EIG/Makefile index 5de315b6e6..cea8c78796 100644 --- a/TESTING/EIG/Makefile +++ b/TESTING/EIG/Makefile @@ -64,7 +64,7 @@ SEIGTST = schkee.o \ sort03.o ssbt21.o ssgt01.o sslect.o sspt21.o sstt21.o \ sstt22.o ssyl01.o ssyt21.o ssyt22.o -SDMDEIGTST = schkdmd.o +SDMDEIGTST = schkdmd.o schksydmd.o CEIGTST = cchkee.o \ cbdt01.o cbdt02.o cbdt03.o cbdt05.o \ @@ -83,7 +83,7 @@ CEIGTST = cchkee.o \ csgt01.o cslect.o csyl01.o\ cstt21.o cstt22.o cunt01.o cunt03.o -CDMDEIGTST = cchkdmd.o +CDMDEIGTST = cchkdmd.o cchkhedmd.o DZIGTST = dlafts.o dlahd2.o dlasum.o dlatb9.o dstech.o dstect.o \ dsvdch.o dsvdct.o dsxt1.o @@ -105,7 +105,7 @@ DEIGTST = dchkee.o \ dort03.o dsbt21.o dsgt01.o dslect.o dspt21.o dstt21.o \ dstt22.o dsyl01.o dsyt21.o dsyt22.o -DDMDEIGTST = dchkdmd.o +DDMDEIGTST = dchkdmd.o dchksydmd.o ZEIGTST = zchkee.o \ zbdt01.o zbdt02.o zbdt03.o zbdt05.o \ @@ -124,7 +124,7 @@ ZEIGTST = zchkee.o \ zsgt01.o zslect.o zsyl01.o\ zstt21.o zstt22.o zunt01.o zunt03.o -ZDMDEIGTST = zchkdmd.o +ZDMDEIGTST = zchkdmd.o zchkhedmd.o .PHONY: all all: single complex double complex16 @@ -191,9 +191,17 @@ zchkee.o: zchkee.F $(FC) $(FFLAGS_DRV) -c -o $@ $< schkdmd.o: schkdmd.f90 $(FC) $(FFLAGS_DRV) -c -o $@ $< +schksydmd.o: schksydmd.f90 + $(FC) $(FFLAGS_DRV) -c -o $@ $< cchkdmd.o: cchkdmd.f90 $(FC) $(FFLAGS_DRV) -c -o $@ $< +cchkhedmd.o: cchkhedmd.f90 + $(FC) $(FFLAGS_DRV) -c -o $@ $< dchkdmd.o: dchkdmd.f90 $(FC) $(FFLAGS_DRV) -c -o $@ $< +dchksydmd.o: dchksydmd.f90 + $(FC) $(FFLAGS_DRV) -c -o $@ $< zchkdmd.o: zchkdmd.f90 $(FC) $(FFLAGS_DRV) -c -o $@ $< +zchkhedmd.o: zchkhedmd.f90 + $(FC) $(FFLAGS_DRV) -c -o $@ $< diff --git a/TESTING/EIG/cchkhedmd.f90 b/TESTING/EIG/cchkhedmd.f90 new file mode 100644 index 0000000000..d537cd1eb2 --- /dev/null +++ b/TESTING/EIG/cchkhedmd.f90 @@ -0,0 +1,761 @@ +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! CHEDMD, for computation of the +! Dynamic Mode Decomposition (DMD) +! CHEDMDQ, for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +! How to run the code (compiler, link info) +! ======================================================== +! Compile as FORTRAN 90 (or later) and link with BLAS and +! LAPACK libraries. +! NOTE: The code is developed and tested on top of the +! Intel MKL library (versions 2022.0.3 and 2022.2.0), +! using the Intel Fortran compiler. +! +! For developers of the C++ implementation +! ======================================================== +! See the LAPACK++ and Template Numerical Toolkit (TNT) +! +! Note on a development of the GPU HP implementation +! ======================================================== +! Work in progress. See CUDA, MAGMA, SLATE. +! NOTE: The four SVD subroutines used in this code are +! included as a part of R&D and for the completeness. +! This was also an opportunity to test those SVD codes. +! If the scaling option is used all four are essentially +! equally good. For implementations on HP platforms, +! one can use whichever SVD is available. +!............................................................ + +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + + use iso_fortran_env + IMPLICIT NONE + integer, parameter :: WP = real32 +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + + COMPLEX(KIND=WP), PARAMETER :: CONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: CZERO = ( 0.0_WP, 0.0_WP ) +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & + RES1, RESEX, SINGVX, SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: WDUMMY(2) + INTEGER :: IDUMMY(4), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_XW, & + TMP_EX +!............................................................ + COMPLEX(KIND=WP) :: CMAX + INTEGER :: LCWORK + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: A, AC, & + AU, F, F0, F1, S, W, & + X, X0, Y, Y0, Y1, Z, Z1 + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: CDA, CDR, & + CDL, CEIGS, CEIGSA, CWORK + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: REIG, REIGA + COMPLEX(KIND=WP) :: CDUMMY(22), CDUM2X2(2,2) +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTEIG, WHTSVD, WHTSYM + INTEGER :: iNRNK, iWHTEIG, iWHTSVD, iWHTSYM, K_traj, LWMINOPT + CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + LOGICAL :: TEST_QRDMD + +!..... external subroutines (BLAS and LAPACK) + EXTERNAL CAXPY, CHEEV, CGEMM, CGEMV, CLASCL, SLASCL +!.....external subroutines DMD package +! subroutines under test + EXTERNAL CHEDMD, CHEDMDQ +!..... external functions (BLAS and LAPACK) + EXTERNAL SCNRM2, SLAMCH + REAL(KIND=WP) :: SCNRM2, SLAMCH + EXTERNAL CLANGE + REAL(KIND=WP) :: CLANGE + EXTERNAL ISAMAX + INTEGER ISAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX, SIGN +!............................................................ + + + WRITE(*,*) 'COMPLEX CODE TESTING' + + ! The test is always in pairs : ( CHEDMD and CHEDMDQ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + ! This code by default performs tests on CHEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + + WANTQ = 'Q' + WANTR = 'R' +!................................................................................. + + EPS = SLAMCH( 'P' ) ! machine precision WP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + ! Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = M + LDY = M + LDW = N + LDZ = M + LDAU = M + LDS = N + + TMP_XW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F0(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(LDY,N+1) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( S(LDS,N) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + + TOL = 10*M*EPS + TOL2 = 10*M*N*EPS + +!............. + + DO K_traj = 1, 2 + ! Number of intial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D4 + CMAX = (1.0D1,1.0D1) + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D1 + MODER = 6 + CONDR = 1.0D1 + PIVTNG = 'N' + ! Loop over all parameter MODE values for CLATMR (+-1,..,+-6) + + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE( CDA(M) ) + ALLOCATE( CDL(M) ) + ALLOCATE( CDR(M) ) + + CALL CLATMR( M, M, 'N', ISEED, 'H', CDA, MODE, COND, & + CMAX, RSIGN, GRADE, CDL, MODEL, CONDL, & + CDR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE( CDR ) + DEALLOCATE( CDL ) + DEALLOCATE( CDA ) + DEALLOCATE( IWORK ) + + ! LCWORK = MAX(1,2*M) + ! ALLOCATE( CEIGSA(M) ) + ! ALLOCATE( CWORK(LCWORK) ) + ! ALLOCATE( WORK(2*M) ) + ! AC(1:M,1:M) = A(1:M,1:M) + ! CALL CGEEV( 'N','N', M, AC, LDA, CEIGSA, CDUM2X2, 2, & + ! CDUM2X2, 2, CWORK, LCWORK, WORK, INFO ) ! LAPACK CALL + ! DEALLOCATE(WORK) + ! DEALLOCATE(CWORK) + + LCWORK = MAX(1,2*M-1) + ALLOCATE( REIGA(M) ) + ALLOCATE( CWORK(LCWORK) ) + ALLOCATE( WORK(3*M-2) ) + AC(1:M,1:M) = A(1:M,1:M) + CALL CHEEV( 'V', 'U', M, AC, LDA, REIGA, CWORK, LCWORK, WORK, INFO) ! LAPACK CALL + DEALLOCATE(WORK) + DEALLOCATE(CWORK) + + TMP = ABS(REIGA(ISAMAX(M, REIGA, 1))) ! The spectral radius of A + ! Scale the matrix A to have unit spectral radius. + CALL CLASCL( 'G',0, 0, TMP, ONE, M, M, & + A, LDA, INFO ) + CALL SLASCL( 'G',0, 0, TMP, ONE, M, 1, & + REIGA, M, INFO ) + ANORM = CLANGE( 'F', M, M, A, LDA, WDUMMY ) + + IF ( K_traj == 2 ) THEN + ! generate data as two trajectories + ! with two inital conditions + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N/2 + CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F(1:M,1:N/2) + Y0(1:M,1:N/2) = F(1:M,2:N/2+1) + + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N-N/2 + CALL CGEMV( 'N', M, M, CONE, A, LDA, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F(1:M,2:N-N/2+1) + ELSE + CALL CLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL CGEMV( 'N', M, M, CONE, A, M, F(1,i), 1, & + CZERO, F(1,i+1), 1 ) + END DO + F0(1:M,1:N+1) = F(1:M,1:N+1) + X0(1:M,1:N) = F0(1:M,1:N) + Y0(1:M,1:N) = F0(1:M,2:N+1) + END IF + + DEALLOCATE( REIGA ) +!........................................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' + RESIDS = 'R' + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' + CASE(2) + JOBREF = 'E' + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' + CASE(2) + SCALE = 'C' + CASE(3) + SCALE = 'Y' + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + NRNK = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO iWHTEIG = 1, 2 + ! Check both symmetric eigensolvers in LAPACK + WHTEIG = iWHTEIG + + DO iWHTSYM = 1, 2 + ! Check both symmetrizers of the Rayleigh quotient + WHTSYM = iWHTSYM + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + ! CHEDMD is always tested and its results are also used for + ! comparisons with CHEDMDQ. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + CALL CHEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, REIG, Z, LDZ, RES, & + AU, LDAU, W, LDW, S, LDS, & + CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO ) + + !!CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + !! M, N, X, LDX, Y, LDY, NRNK, TOL, & + !! K, CEIGS, Z, LDZ, RES, & + !! AU, LDAU, W, LDW, S, LDS, & + !! CDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO ) + + IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & + .OR. ( INFO < 0 ) ) THEN + WRITE(*,*) 'Call to CHEDMD workspace query failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ', & + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS + STOP + ELSE + !WRITE(*,*) '... done. Workspace length computed.' + END IF + + LCWORK = INT(CDUMMY(LWMINOPT)) + ALLOCATE(CWORK(LCWORK)) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + + CALL CHEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, & + M, N, X, LDX, Y, LDY, NRNK, TOL, & + K, REIG, Z, LDZ, RES, & + AU, LDAU, W, LDW, S, LDS, & + CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO ) + !!CALL CGEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + !! M, N, X, LDX, Y, LDY, NRNK, TOL, & + !! K, CEIGS, Z, LDZ, RES, & + !! AU, LDAU, W, LDW, S, LDS, & + !! CWORK, LCWORK, WORK, LWORK, IWORK, LIWORK, INFO ) + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to CHEDMD failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVX(1:N) = WORK(1:N) + + !...... CHEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from CHEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the Rayleigh quotient + ! returned in W + CALL CGEMM( 'N', 'N', M, K, K, CONE, X, LDX, W, LDW, & + CZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL CAXPY( M, -CONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, SCNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_XW = MAX(TMP_XW, TMP ) + IF ( TMP_XW <= TOL ) THEN + !WRITE(*,*) ' :) .... OK .........CHEDMD PASSED.' + ELSE + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................CHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + END IF + !...... CHEDMD check point + + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, X, LDX, & + CZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL CAXPY( M, -CONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, SCNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK .........CHEDMD PASSED.' + ELSE + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................CHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL2 + END IF + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, AU, LDAU, CZERO, Y1, LDY ) + + DO i=1, K + CALL CAXPY( M, -CMPLX(REIG(i),KIND=WP), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = SCNRM2( M, Y1(1,i), 1) / SCNRM2(M,AU(1,i),1) + END DO + END IF + !...... CHEDMD check point + + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by CHEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL CGEMM( 'N', 'N', M, K, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in CHEDMD,) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL CAXPY( M, -CMPLX(REIG(i),KIND=WP), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = SCNRM2( M, Y1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK ..........CHEDMD PASSED.' + ELSE + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................CHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + DEALLOCATE(CWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + +!....................................................................................................... + + IF ( K_traj == 1 ) THEN + + F(1:M,1:N+1) = F0(1:M,1:N+1) + + CALL CHEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, WHTSYM, WHTEIG, M, N+1, F, LDF, X, LDX, Y, LDY, & + NRNK, TOL, KQ, REIG, Z, LDZ, RES, AU, & + LDAU, W, LDW, S, LDS, CDUMMY, -1, & + WDUMMY, -1, IDUMMY, -1, INFO ) + !!CALL CGEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + !! WHTSVD, M, N+1, F, LDF, X, LDX, Y, LDY, & + !! NRNK, TOL, K, CEIGS, Z, LDZ, RES, AU, & + !! LDAU, W, LDW, S, LDS, CDUMMY, -1, & + !! WDUMMY, -1, IDUMMY, -1, INFO ) + + LCWORK = INT(CDUMMY(LWMINOPT)) + ALLOCATE(CWORK(LCWORK)) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + + CALL CHEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, WHTSYM, WHTEIG, M, N+1, F, LDF,& + X, LDX, Y, LDY, & + NRNK, TOL, KQ, REIG, Z, LDZ, RES, AU, & + LDAU, W, LDW, S, LDS, CWORK, LCWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to CHEDMDQ failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVQX(1:N) =WORK(1:N) + + !..... CHEDMDQ check point + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + END IF + !..... CGEDMDQ check point + + !..... CHEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F1(1:M,1:N+1) = F0(1:M,1:N+1) + CALL CGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -CONE, F, & + LDF, Y, LDY, CONE, F1, LDF ) + TMP_FQR = CLANGE( 'F', M, N+1, F1, LDF, WORK ) / & + CLANGE( 'F', M, N+1, F0, LDF, WORK ) + IF ( TMP_FQR <= TOL2 ) THEN + !WRITE(*,*) ':) CHEDMDQ ........ PASSED.' + ELSE + WRITE(*,*) ':( CHEDMDQ ........ FAILED.' + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + !..... CHEDMDQ checkpoint + !..... CHEDMDQ checkpoint + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by CHEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL CGEMM( 'N', 'N', M, KQ, M, CONE, A, LDA, Z, LDZ, CZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in CHEDMDQ) + DO i = 1, KQ + ! have a real eigenvalue with real eigenvector + CALL CAXPY( M, -CMPLX(REIG(i),KIND=WP), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = SCNRM2( M, Y1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) '.... OK ........ CHEDMDQ PASSED.' + ELSE + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ CHEDMDQ FAILED!', & + 'Check the code for implementation errors.' + END IF + END IF + + DEALLOCATE(CWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + + END IF + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO + END DO + END DO ! iWHTSVD + !write(*,*) 'WHTSVD loop completed' + END DO ! iNRNK -2:-1 + !write(*,*) 'NRNK loop completed' + END DO ! iSCALE 1:4 + !write(*,*) 'SCALE loop completed' + END DO + !write(*,*) 'JOBREF loop completed' + END DO ! iJOBZ + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE( A ) + DEALLOCATE( AC ) + DEALLOCATE( Z ) + DEALLOCATE( F ) + DEALLOCATE( F0 ) + DEALLOCATE( F1 ) + DEALLOCATE( X ) + DEALLOCATE( X0 ) + DEALLOCATE( Y ) + DEALLOCATE( Y0 ) + DEALLOCATE( Y1 ) + DEALLOCATE( AU ) + DEALLOCATE( W ) + DEALLOCATE( S ) + DEALLOCATE( Z1 ) + DEALLOCATE( RES ) + DEALLOCATE( RES1 ) + DEALLOCATE( RESEX ) + DEALLOCATE( REIG ) + DEALLOCATE( SINGVX ) + DEALLOCATE( SINGVQX ) + + END DO ! LLOOP + + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for CHEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_XW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_z_XV + END IF + + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> CHEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> CHEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for CHEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> CHEDMD and CHEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'CHEDMD and CHEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> CHEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> CHEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/TESTING/EIG/dchksydmd.f90 b/TESTING/EIG/dchksydmd.f90 new file mode 100644 index 0000000000..eda219bc51 --- /dev/null +++ b/TESTING/EIG/dchksydmd.f90 @@ -0,0 +1,707 @@ +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! DSYDMD for computation of the +! Dynamic Mode Decomposition (DMD) +! DSYDMDQ for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +!............................................................ +!............................................................ +! + PROGRAM SYDMD_TEST + use iso_fortran_env, only: real64 + IMPLICIT NONE + integer, parameter :: WP = real64 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & + A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& + Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & + DA, DL, DR, REIG, REIGA, REIGQ, & + RES, RES1, RESEX, SINGVX,& + SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: AB(2,2), WDUMMY(2) + INTEGER :: IDUMMY(2), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX, XNORM, YNORM +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, KDIFF, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTEIG, WHTSVD, WHTSYM + INTEGER iNRNK, iWHTEIG, iWHTSVD, iWHTSYM, K_TRAJ, LWMINOPT + CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + + LOGICAL TEST_QRDMD +!..... external subroutines (BLAS and LAPACK) + EXTERNAL DAXPY, DSYEV, DGEMM, DGEMV, DLACPY, DLASCL + EXTERNAL DLARNV, DLATMR +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL DSYDMD, DSYDMDQ + +!..... external functions (BLAS and LAPACK) + EXTERNAL DLAMCH, DLANGE, DNRM2 + REAL(KIND=WP) :: DLAMCH, DLANGE, DNRM2 + EXTERNAL IDAMAX + INTEGER IDAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX +!............................................................ + + ! The test is always in pairs : ( DSYDMD and DSYDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on DSYDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xSYDMDQ. + WANTQ = 'Q' + WANTR = 'R' +!................................................................................. + + EPS = DLAMCH( 'P' ) ! machine precision DP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + KDIFF = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = MAX(M,N+1) + LDY = MAX(M,N+1) + LDW = N + LDZ = M + LDAU = MAX(M,N+1) + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ! + ! Test the subroutines on real data snapshots. All + ! computation is done in real arithmetic, even when + ! Koopman eigenvalues and modes are real. + ! + ! Allocate memory space + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( DA(M) ) + ALLOCATE( DL(M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( F2(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(M,N+1) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( REIGQ(N) ) + ALLOCATE( REIGA(M) ) + ALLOCATE( VA(LDA,M) ) + ALLOCATE( LAMBDA(N,2) ) + ALLOCATE( LAMBDAQ(N,2) ) + ALLOCATE( EIGA(M,2) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( S(N,N) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of intial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D8 + DMAX = 1.0D2 + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D2 + MODER = 6 + CONDR = 1.0D2 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE(DR(N)) + CALL DLATMR( M, M, 'S', ISEED, 'S', DA, MODE, COND, & + DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & + DR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE(IWORK) + DEALLOCATE(DR) + + !! Store reference eigenvalues and eigenvectors + !! WRITE(*,'(A)', advance='no') & + !!'Compute and (if needed) store the eigenvalues & + !! &and eigenvectors of A (DSYEV) . ..' + LWORK = MAX(1,3*M-1) + ALLOCATE(WORK(LWORK)) + AC(1:M,1:M) = A(1:M,1:M) + CALL DSYEV( 'V', 'U', M, AC, LDA, REIGA, WORK, LWORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + + TMP = ABS(REIGA(IDAMAX(M, REIGA, 1))) ! The spectral radius of A + + ! Scale A to have the desirable spectral radius. + CALL DLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) + CALL DLASCL( 'G', 0, 0, TMP, ONE, M, 1, REIGA, M, INFO ) + + ! Compute the norm of A + ANORM = DLANGE( 'F', N, N, A, M, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data with two inital conditions + CALL DLARNV(2, ISEED, M, F1(1,1) ) + F1(1:M,1) = 1.0E-10*F1(1:M,1) + DO i = 1, N/2 + CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F1(1:M,1:N/2) + Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) + + CALL DLARNV(2, ISEED, M, F1(1,1) ) + DO i = 1, N-N/2 + CALL DGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) + ELSE + CALL DLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL DGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & + F(1,i+1), 1 ) + END DO + X0(1:M,1:N) = F(1:M,1:N) + Y0(1:M,1:N) = F(1:M,2:N+1) + END IF + + XNORM = DLANGE( 'F', M, N, X0, LDX, WDUMMY ) + YNORM = DLANGE( 'F', M, N, Y0, LDX, WDUMMY ) +!............................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' ! Ritz vectors will be computed + RESIDS = 'R' ! Residuals will be computed + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' ! Ritz vectors in factored form + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' ! Data for refined Ritz vectors + CASE(2) + JOBREF = 'E' ! Exact DMD vectors + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' ! X data normalized + CASE(2) + SCALE = 'C' ! X normalized, consist. check + CASE(3) + SCALE = 'Y' ! Y data normalized + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + ! Two truncation strategies. The "-2" case for R&D + ! purposes only - it uses possibly low accuracy small + ! singular values, in which case the formulas used in + ! the DMD are highly sensitive. + NRNK = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO iWHTEIG = 1, 2 + ! Check both symmetric eigensolvers in LAPACK + WHTEIG = iWHTEIG + + DO iWHTSYM = 1, 2 + ! Check both symmetrizers of the Rayleigh quotient + WHTSYM = iWHTSYM + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + ! DSYDMD: Workspace query and workspace allocation + CALL DSYDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1, & + IDUMMY, -1, INFO ) + + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE( WORK(LWORK) ) + + ! DSYDMD test: CALL DSYDMD + CALL DSYDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& + IWORK, LIWORK, INFO ) + + + SINGVX(1:N) = WORK(1:N) + + !...... DSYDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from DSYDMD + ! This checks that the returned aigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the rayleigh quotient + ! returned in W + CALL DGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL DAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, DNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + + IF ( TMP_ZXW > 10*M*EPS ) THEN + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................DSYDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + END IF + + !...... DSYDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL DAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, DNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................DSYDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, M ) + i=1 + DO WHILE ( i <= K ) + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = DNRM2( M, Y1(1,i), 1) / DNRM2(M,AU(1,i),1) + i = i + 1 + END DO + END IF + + !...... DSYDMD check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by DSYDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL DGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in DSYDMD,) + i = 1 + DO WHILE ( i <= K ) + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = DNRM2( M, Y1(1,i), 1) + i = i + 1 + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................DSYDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + + DEALLOCATE(IWORK) + DEALLOCATE(WORK) + + !====================================================================== + ! Now test the DSYDMDQ + !====================================================================== + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + F1 = F + + ! DSYDMDQ test: Workspace query and workspace allocation + CALL DSYDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, WHTSYM, WHTEIG, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & + -1, IDUMMY, -1, INFO ) + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE(WORK(LWORK)) + ! DSYDMDQ test: CALL DSYDMDQ + CALL DSYDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, & + JOBREF, WHTSVD, WHTSYM, WHTEIG, M, N+1, F1, LDF, X, LDX, Y, & + LDY, NRNK, TOL, KQ, REIGQ, Z, LDZ, & + RES, AU, LDAU, W, LDW, S, LDS, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + + SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) + + !..... DSYDMDQ check point + IF ( KQ /= K ) THEN + KDIFF = KDIFF+1 + END IF + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + !! DO j =1, 3 + !! write(*,*) j, SINGVX(j), SINGVQX(j) + !! read(*,*) + !! END DO + END IF + + !..... DSYDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F2 = F + CALL DGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & + LDF, Y, LDY, ONE, F2, LDF ) + TMP_FQR = DLANGE( 'F', M, N+1, F2, LDF, WORK ) / & + DLANGE( 'F', M, N+1, F, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + + !..... DSYDMDQ check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by DSYDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL DGEMM( 'N', 'N', M, KQ, M, ONE, A, M, Z, M, ZERO, Y1, M ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in DSYDMDQ) + i = 1 + DO WHILE ( i <= KQ ) + ! have a real eigenvalue with real eigenvector + CALL DAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = DNRM2( M, Y1(1,i), 1) + i = i + 1 + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(K)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP > TOL2 ) THEN + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ DSYDMDQ FAILED!', & + 'Check the code for implementation errors.' + STOP + END IF + + END IF + + + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + END IF ! TEST_QRDMD +!====================================================================== + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! WHTSYM LOOP + END DO ! WHTEIG LOOP + END DO ! WHTSVD LOOP + !write(*,*) 'WHTSVD loop completed' + END DO ! NRNK LOOP + !write(*,*) 'NRNK loop completed' + END DO ! SCALE LOOP + !write(*,*) 'SCALE loop completed' + END DO ! JOBF LOOP + !write(*,*) 'JOBREF loop completed' + END DO ! JOBZ LOOP + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE(A) + DEALLOCATE(AC) + DEALLOCATE(DA) + DEALLOCATE(DL) + DEALLOCATE(F) + DEALLOCATE(F1) + DEALLOCATE(F2) + DEALLOCATE(X) + DEALLOCATE(X0) + DEALLOCATE(SINGVX) + DEALLOCATE(SINGVQX) + DEALLOCATE(Y) + DEALLOCATE(Y0) + DEALLOCATE(Y1) + DEALLOCATE(Z) + DEALLOCATE(Z1) + DEALLOCATE(RES) + DEALLOCATE(RES1) + DEALLOCATE(RESEX) + DEALLOCATE(REIG) + DEALLOCATE(REIGQ) + DEALLOCATE(REIGA) + DEALLOCATE(VA) + DEALLOCATE(LAMBDA) + DEALLOCATE(LAMBDAQ) + DEALLOCATE(EIGA) + DEALLOCATE(W) + DEALLOCATE(AU) + DEALLOCATE(S) + +!............................................................ + ! Generate random M-by-M matrix A. Use DLATMR from + END DO ! LLOOP + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for DSYDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> DSYDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> DSYDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for DSYDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> DSYDMD and DSYDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'DSYDMD and DSYDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> DSYDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> DSYDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/TESTING/EIG/schksydmd.f90 b/TESTING/EIG/schksydmd.f90 new file mode 100644 index 0000000000..9aff3bb130 --- /dev/null +++ b/TESTING/EIG/schksydmd.f90 @@ -0,0 +1,702 @@ +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! SSYDMD for computation of the +! Dynamic Mode Decomposition (DMD) +! SSYDMDQ for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +!............................................................ +!............................................................ +! + PROGRAM DMD_TEST + use iso_fortran_env, only: real32 + IMPLICIT NONE + integer, parameter :: WP = real32 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: & + A, AC, EIGA, LAMBDA, LAMBDAQ, F, F1, F2,& + Z, Z1, S, AU, W, VA, X, X0, Y, Y0, Y1 + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: & + DA, DL, DR, REIG, REIGA, REIGQ, IEIG, & + IEIGA, IEIGQ, RES, RES1, RESEX, SINGVX,& + SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: AB(2,2), WDUMMY(2) + INTEGER :: IDUMMY(2), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, DMAX, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX, XNORM, YNORM +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, L, LLOOP, NRNK + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, KDIFF, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTEIG, WHTSVD, WHTSYM + INTEGER iNRNK, iWHTEIG, iWHTSVD, iWHTSYM, K_TRAJ, LWMINOPT + CHARACTER(LEN=1) GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + + LOGICAL TEST_QRDMD +!..... external subroutines (BLAS and LAPACK) + EXTERNAL SAXPY, SSYEV, SGEMM, SGEMV, SLACPY, SLASCL + EXTERNAL SLARNV, SLATMR +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL SSYDMD, SSYDMDQ + +!..... external functions (BLAS and LAPACK) + EXTERNAL SLAMCH, SLANGE, SNRM2 + REAL(KIND=WP) :: SLAMCH, SLANGE, SNRM2 + EXTERNAL ISAMAX + INTEGER ISAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX +!............................................................ + + ! The test is always in pairs : ( SSYDMD and SSYDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on SSYDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + WANTQ = 'Q' + WANTR = 'R' +!................................................................................. + + EPS = SLAMCH( 'P' ) ! machine precision SP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + KDIFF = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = MAX(M,N+1) + LDY = MAX(M,N+1) + LDW = N + LDZ = M + LDAU = MAX(M,N+1) + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ! + ! Test the subroutines on real data snapshots. All + ! computation is done in real arithmetic, even when + ! Koopman eigenvalues and modes are real. + ! + ! Allocate memory space + ALLOCATE( A(LDA,M) ) + ALLOCATE( AC(LDA,M) ) + ALLOCATE( DA(M) ) + ALLOCATE( DL(M) ) + ALLOCATE( F(LDF,N+1) ) + ALLOCATE( F1(LDF,N+1) ) + ALLOCATE( F2(LDF,N+1) ) + ALLOCATE( X(LDX,N) ) + ALLOCATE( X0(LDX,N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + ALLOCATE( Y(LDY,N+1) ) + ALLOCATE( Y0(LDY,N+1) ) + ALLOCATE( Y1(LDY,N+1) ) + ALLOCATE( Z(LDZ,N) ) + ALLOCATE( Z1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( IEIG(N) ) + ALLOCATE( REIGQ(N) ) + ALLOCATE( IEIGQ(N) ) + ALLOCATE( REIGA(M) ) + ALLOCATE( IEIGA(M) ) + ALLOCATE( VA(LDA,M) ) + ALLOCATE( LAMBDA(N,2) ) + ALLOCATE( LAMBDAQ(N,2) ) + ALLOCATE( EIGA(M,2) ) + ALLOCATE( W(LDW,N) ) + ALLOCATE( AU(LDAU,N) ) + ALLOCATE( S(LDS,LDS) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of intial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D8 + DMAX = 1.0D2 + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D2 + MODER = 6 + CONDR = 1.0D2 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE(DR(N)) + CALL SLATMR( M, M, 'S', ISEED, 'S', DA, MODE, COND, & + DMAX, RSIGN, GRADE, DL, MODEL, CONDL, & + DR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', A, LDA, IWORK(M+1), INFO ) + DEALLOCATE(IWORK) + DEALLOCATE(DR) + + + LWORK = MAX(1,3*M-1) + ALLOCATE(WORK(LWORK)) + AC(1:M,1:M) = A(1:M,1:M) + CALL SSYEV( 'V', 'U', M, AC, LDA, REIGA, WORK, LWORK, INFO ) ! LAPACK CALL + DEALLOCATE(WORK) + + TMP = ABS(REIGA(ISAMAX(M, REIGA, 1))) ! The spectral radius of A + + + ! Scale A to have the desirable spectral radius. + CALL SLASCL( 'G', 0, 0, TMP, ONE, M, M, A, M, INFO ) + CALL SLASCL( 'G', 0, 0, TMP, ONE, M, 1, REIGA, M, INFO ) + + ! Compute the norm of A + ANORM = SLANGE( 'F', N, N, A, M, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data with two inital conditions + CALL SLARNV(2, ISEED, M, F1(1,1) ) + F1(1:M,1) = 1.0E-10*F1(1:M,1) + DO i = 1, N/2 + CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,1:N/2) = F1(1:M,1:N/2) + Y0(1:M,1:N/2) = F1(1:M,2:N/2+1) + + CALL SLARNV(2, ISEED, M, F1(1,1) ) + DO i = 1, N-N/2 + CALL SGEMV( 'N', M, M, ONE, A, M, F1(1,i), 1, ZERO, & + F1(1,i+1), 1 ) + END DO + X0(1:M,N/2+1:N) = F1(1:M,1:N-N/2) + Y0(1:M,N/2+1:N) = F1(1:M,2:N-N/2+1) + ELSE + ! single trajectory + CALL SLARNV(2, ISEED, M, F(1,1) ) + DO i = 1, N + CALL SGEMV( 'N', M, M, ONE, A, M, F(1,i), 1, ZERO, & + F(1,i+1), 1 ) + END DO + X0(1:M,1:N) = F(1:M,1:N) + Y0(1:M,1:N) = F(1:M,2:N+1) + END IF + + XNORM = SLANGE( 'F', M, N, X0, LDX, WDUMMY ) + YNORM = SLANGE( 'F', M, N, Y0, LDX, WDUMMY ) +!............................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' ! Ritz vectors will be computed + RESIDS = 'R' ! Residuals will be computed + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' ! Ritz vectors in factored form + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' ! Data for refined Ritz vectors + CASE(2) + JOBREF = 'E' ! Exact DMD vectors + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' ! X data normalized + CASE(2) + SCALE = 'C' ! X normalized, consist. check + CASE(3) + SCALE = 'Y' ! Y data normalized + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + ! Two truncation strategies. The "-2" case for R&D + ! purposes only - it uses possibly low accuracy small + ! singular values, in which case the formulas used in + ! the DMD are highly sensitive. + NRNK = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + + DO iWHTEIG = 1, 2 + ! Check both symmetric eigensolvers in LAPACK + WHTEIG = iWHTEIG + + DO iWHTSYM = 1, 2 + ! Check both symmetrizers of the Rayleigh quotient + WHTSYM = iWHTSYM + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + X(1:M,1:N) = X0(1:M,1:N) + Y(1:M,1:N) = Y0(1:M,1:N) + + ! SSYDMD: Workspace query and workspace allocation + + CALL SSYDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, -1,& + IDUMMY, -1, INFO ) + + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE( WORK(LWORK) ) + + ! SSYDMD test: CALL SSYDMD + + CALL SSYDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, & + N, X, LDX, Y, LDY, NRNK, TOL, K, REIG, Z, & + LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, LWORK,& + IWORK, LIWORK, INFO ) + + SINGVX(1:N) = WORK(1:N) + + !...... SSYDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from SSYDMD + ! This checks that the returned aigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the rayleigh quotient + ! returned in W + CALL SGEMM( 'N', 'N', M, K, K, ONE, X, LDX, W, LDW, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL SAXPY( M, -ONE, Z(1,i), 1, Z1(1,i), 1) + TMP = MAX(TMP, SNRM2( M, Z1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + + IF ( TMP_ZXW > 10*M*EPS ) THEN + NFAIL_Z_XV = NFAIL_Z_XV + 1 + END IF + + END IF + + !...... SSYDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, X, LDX, & + ZERO, Z1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL SAXPY( M, -ONE, AU(1,i), 1, Z1(1,i), 1) + TMP = MAX( TMP, SNRM2( M, Z1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_AU = NFAIL_AU + 1 + END IF + + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, AU, LDAU, ZERO, Y1, LDY ) + i=1 + DO WHILE ( i <= K ) + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIG(i), AU(1,i), 1, Y1(1,i), 1 ) + RESEX(i) = SNRM2( M, Y1(1,i), 1) / SNRM2(M,AU(1,i),1) + i = i + 1 + END DO + + END IF + + !...... SSYDMD check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by SSYDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL SGEMM( 'N', 'N', M, K, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in SSYDMD,) + i = 1 + DO WHILE ( i <= K ) + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIG(i), Z(1,i), 1, Y1(1,i), 1 ) + RES1(i) = SNRM2( M, Y1(1,i), 1) + i = i + 1 + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + + IF ( TMP > TOL2 ) THEN + NFAIL_REZ = NFAIL_REZ + 1 + END IF + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + ! ... store the results for inspection +!! DO i = 1, K +!! LAMBDA(i,1) = REIG(i) +!! LAMBDA(i,2) = IEIG(i) +!! END DO + + DEALLOCATE(IWORK) + DEALLOCATE(WORK) + + !====================================================================== + ! Now test the SSYDMDQ, if requested. + !====================================================================== + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + + F1(1:M,1:N+1) = F(1:M,1:N+1) + + ! SSYDMDQ test: Workspace query and workspace allocation + + CALL SSYDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, N+1, & + F1, LDF, X, LDX, Y, LDY, NRNK, TOL, KQ, REIGQ, & + Z, LDZ, RES, AU, LDAU, W, LDW, S, LDS, WDUMMY, & + -1, IDUMMY, -1, INFO ) + + LIWORK = IDUMMY(1) + ALLOCATE( IWORK(LIWORK) ) + LWORK = INT(WDUMMY(LWMINOPT)) + ALLOCATE(WORK(LWORK)) + + ! SSYDMDQ test: CALL SSYDMDQ + + CALL SSYDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, WHTSVD, WHTSYM, WHTEIG, M, N+1, & + F1, LDF, X, LDX, Y, LDY, NRNK, TOL, KQ, REIGQ, & + Z, LDZ, RES, AU, LDAU, W, LDW, S, LDS, WORK, & + LWORK, IWORK, LIWORK, INFO ) + + + SINGVQX(1:KQ) = WORK(MIN(M,N+1)+1: MIN(M,N+1)+KQ) + + !..... SSYDMDQ check point + IF ( KQ /= K ) THEN + KDIFF = KDIFF+1 + END IF + + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + END IF + + !..... SSYDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + F2 = F + CALL SGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ONE, F1, & + LDF, Y, LDY, ONE, F2, LDF ) + TMP_FQR = SLANGE( 'F', M, N+1, F2, LDF, WORK ) / & + SLANGE( 'F', M, N+1, F, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + NFAIL_F_QR = NFAIL_F_QR + 1 + END IF + END IF + + !..... SSYDMDQ checkpoint + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by SSYDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL SGEMM( 'N', 'N', M, KQ, M, ONE, A, LDA, Z, LDZ, ZERO, Y1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in SSYDMDQ) + i = 1 + DO WHILE ( i <= KQ ) + ! have a real eigenvalue with real eigenvector + CALL SAXPY( M, -REIGQ(i), Z(1,i), 1, Y1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = SNRM2( M, Y1(1,i), 1) + i = i + 1 + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(K)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP > TOL2 ) THEN + NFAIL_REZQ = NFAIL_REZQ + 1 + END IF + + END IF + + DO i = 1, KQ + LAMBDAQ(i,1) = REIGQ(i) + LAMBDAQ(i,2) = IEIGQ(i) + END DO + + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + END IF ! TEST_QRDMD +!====================================================================== + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO + END DO + END DO ! WHTSVD LOOP + !write(*,*) 'WHTSVD loop completed' + END DO ! NRNK LOOP + !write(*,*) 'NRNK loop completed' + END DO ! SCALE LOOP + !write(*,*) 'SCALE loop completed' + END DO ! JOBF LOOP + !write(*,*) 'JOBREF loop completed' + END DO ! JOBZ LOOP + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE(A) + DEALLOCATE(AC) + DEALLOCATE(DA) + DEALLOCATE(DL) + DEALLOCATE(F) + DEALLOCATE(F1) + DEALLOCATE(F2) + DEALLOCATE(X) + DEALLOCATE(X0) + DEALLOCATE(SINGVX) + DEALLOCATE(SINGVQX) + DEALLOCATE(Y) + DEALLOCATE(Y0) + DEALLOCATE(Y1) + DEALLOCATE(Z) + DEALLOCATE(Z1) + DEALLOCATE(RES) + DEALLOCATE(RES1) + DEALLOCATE(RESEX) + DEALLOCATE(REIG) + DEALLOCATE(IEIG) + DEALLOCATE(REIGQ) + DEALLOCATE(IEIGQ) + DEALLOCATE(REIGA) + DEALLOCATE(IEIGA) + DEALLOCATE(VA) + DEALLOCATE(LAMBDA) + DEALLOCATE(LAMBDAQ) + DEALLOCATE(EIGA) + DEALLOCATE(W) + DEALLOCATE(AU) + DEALLOCATE(S) + +!............................................................ + ! Generate random M-by-M matrix A. Use DLATMR from + END DO ! LLOOP + + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for SSYDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> SSYDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> SSYDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for SSYDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> SSYDMD and SSYDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'SSYDMD and SSYDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> SSYDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> SSYDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END diff --git a/TESTING/EIG/zchkhedmd.f90 b/TESTING/EIG/zchkhedmd.f90 new file mode 100644 index 0000000000..e2599b17a7 --- /dev/null +++ b/TESTING/EIG/zchkhedmd.f90 @@ -0,0 +1,735 @@ +! This is a test program for checking the implementations of +! the implementations of the following subroutines +! +! ZHEDMD, for computation of the +! Dynamic Mode Decomposition (DMD) +! ZHEDMDQ, for computation of a +! QR factorization based compressed DMD +! +! Developed and supported by: +! =========================== +! Developed and coded by Zlatko Drmac, Faculty of Science, +! University of Zagreb; drmac@math.hr +! In cooperation with +! AIMdyn Inc., Santa Barbara, CA. +! ======================================================== +!............................................................ +!............................................................ +! + PROGRAM HEDMD_TEST + use iso_fortran_env, only: real64 + IMPLICIT NONE + integer, parameter :: WP = real64 + +!............................................................ + REAL(KIND=WP), PARAMETER :: ONE = 1.0_WP + REAL(KIND=WP), PARAMETER :: ZERO = 0.0_WP + + COMPLEX(KIND=WP), PARAMETER :: ZONE = ( 1.0_WP, 0.0_WP ) + COMPLEX(KIND=WP), PARAMETER :: ZZERO = ( 0.0_WP, 0.0_WP ) +!............................................................ + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: RES, & + RES1, RESEX, SINGVX, SINGVQX, WORK + INTEGER , ALLOCATABLE, DIMENSION(:) :: IWORK + REAL(KIND=WP) :: WDUMMY(2) + INTEGER :: IDUMMY(4), ISEED(4) + REAL(KIND=WP) :: ANORM, COND, CONDL, CONDR, EPS, & + TOL, TOL2, SVDIFF, TMP, TMP_AU, & + TMP_FQR, TMP_REZ, TMP_REZQ, TMP_ZXW, & + TMP_EX + +!............................................................ + COMPLEX(KIND=WP) :: ZMAX + INTEGER :: LZWORK + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:,:) :: ZA, ZAC, & + ZAU, ZF, ZF0, ZF1, ZS, ZW, & + ZX, ZX0, ZY, ZY0, ZY1, ZZ, ZZ1 + COMPLEX(KIND=WP), ALLOCATABLE, DIMENSION(:) :: ZDA, ZDR, & + ZDL, ZWORK + REAL(KIND=WP), ALLOCATABLE, DIMENSION(:) :: REIG, REIGA + COMPLEX(KIND=WP) :: ZDUMMY(22), ZDUM2X2(2,2) +!............................................................ + INTEGER :: K, KQ, LDF, LDS, LDA, LDAU, LDW, LDX, LDY, & + LDZ, LIWORK, LWORK, M, N, LLOOP, NRNK, NRNKsp + INTEGER :: i, iJOBREF, iJOBZ, iSCALE, INFO, j, & + NFAIL, NFAIL_AU, NFAIL_F_QR, NFAIL_REZ, & + NFAIL_REZQ, NFAIL_SVDIFF, NFAIL_TOTAL, NFAILQ_TOTAL, & + NFAIL_Z_XV, MODE, MODEL, MODER, WHTEIG, WHTSVD, WHTSYM, & + WHTSVDsp + INTEGER :: iNRNK, iWHTEIG, iWHTSVD, iWHTSYM, K_TRAJ, LWMINOPT + CHARACTER :: GRADE, JOBREF, JOBZ, PIVTNG, RSIGN, & + SCALE, RESIDS, WANTQ, WANTR + LOGICAL :: TEST_QRDMD + +!.....external subroutines (BLAS and LAPACK) + EXTERNAL DAXPY, DGEMM, DGEMV, DLACPY, DLASCL + EXTERNAL ZHEEV, ZGEMV, ZLASCL + EXTERNAL ZLARNV, ZLATMR + EXTERNAL ZAXPY, ZGEMM +!.....external subroutines DMD package, part 1 +! subroutines under test + EXTERNAL ZHEDMD, ZHEDMDQ +!.....external functions (BLAS and LAPACK) + EXTERNAL DLAMCH, DZNRM2 + REAL(KIND=WP) :: DLAMCH, DZNRM2 + REAL(KIND=WP) :: ZLANGE + EXTERNAL IDAMAX + INTEGER IDAMAX + EXTERNAL LSAME + LOGICAL LSAME + + INTRINSIC ABS, INT, MIN, MAX, SIGN +!............................................................ + + ! The test is always in pairs : ( ZHEDMD and ZHEDMDQ ) + ! because the test includes comparing the results (in pairs). +!..................................................................................... + TEST_QRDMD = .TRUE. ! This code by default performs tests on ZHEDMDQ + ! Since the QR factorizations based algorithm is designed for + ! single trajectory data, only single trajectory tests will + ! be performed with xGEDMDQ. + WANTQ = 'Q' + WANTR = 'R' +!................................................................................. + + EPS = DLAMCH( 'P' ) ! machine precision DP + + ! Global counters of failures of some particular tests + NFAIL = 0 + NFAIL_REZ = 0 + NFAIL_REZQ = 0 + NFAIL_Z_XV = 0 + NFAIL_F_QR = 0 + NFAIL_AU = 0 + NFAIL_SVDIFF = 0 + NFAIL_TOTAL = 0 + NFAILQ_TOTAL = 0 + + DO LLOOP = 1, 4 + + WRITE(*,*) 'L Loop Index = ', LLOOP + + ! Set the dimensions of the problem ... + WRITE(*,*) 'M = ' + READ(*,*) M + WRITE(*,*) M + ! ... and the number of snapshots. + WRITE(*,*) 'N = ' + READ(*,*) N + WRITE(*,*) N + + ! ... Test the dimensions + IF ( ( MIN(M,N) == 0 ) .OR. ( M < N ) ) THEN + WRITE(*,*) 'Bad dimensions. Required: M >= N > 0.' + STOP + END IF +!............. + ! The seed inside the LLOOP so that each pass can be reproduced easily. + ISEED(1) = 4 + ISEED(2) = 3 + ISEED(3) = 2 + ISEED(4) = 1 + + LDA = M + LDF = M + LDX = M + LDY = M + LDW = N + LDZ = M + LDAU = M + LDS = N + + TMP_ZXW = ZERO + TMP_AU = ZERO + TMP_REZ = ZERO + TMP_REZQ = ZERO + SVDIFF = ZERO + TMP_EX = ZERO + + ALLOCATE( ZA(LDA,M) ) + ALLOCATE( ZAC(LDA,M) ) + ALLOCATE( ZF(LDF,N+1) ) + ALLOCATE( ZF0(LDF,N+1) ) + ALLOCATE( ZF1(LDF,N+1) ) + ALLOCATE( ZX(LDX,N) ) + ALLOCATE( ZX0(LDX,N) ) + ALLOCATE( ZY(LDY,N+1) ) + ALLOCATE( ZY0(LDY,N+1) ) + ALLOCATE( ZY1(LDY,N+1) ) + ALLOCATE( ZAU(LDAU,N) ) + ALLOCATE( ZW(LDW,N) ) + ALLOCATE( ZS(LDS,N) ) + ALLOCATE( ZZ(LDZ,N) ) + ALLOCATE( ZZ1(LDZ,N) ) + ALLOCATE( RES(N) ) + ALLOCATE( RES1(N) ) + ALLOCATE( RESEX(N) ) + ALLOCATE( REIG(N) ) + ALLOCATE( SINGVX(N) ) + ALLOCATE( SINGVQX(N) ) + + TOL = M*EPS + ! This mimics O(M*N)*EPS bound for accumulated roundoff error. + ! The factor 10 is somewhat arbitrary. + TOL2 = 10*M*N*EPS + +!............. + + DO K_TRAJ = 1, 2 + ! Number of intial conditions in the simulation/trajectories (1 or 2) + + COND = 1.0D4 + ZMAX = (1.0D1,1.0D1) + RSIGN = 'F' + GRADE = 'N' + MODEL = 6 + CONDL = 1.0D1 + MODER = 6 + CONDR = 1.0D1 + PIVTNG = 'N' + + ! Loop over all parameter MODE values for ZLATMR (+1,..,+6) + DO MODE = 1, 6 + + ALLOCATE( IWORK(2*M) ) + ALLOCATE( ZDA(M) ) + ALLOCATE( ZDL(M) ) + ALLOCATE( ZDR(M) ) + + CALL ZLATMR( M, M, 'N', ISEED, 'H', ZDA, MODE, COND, & + ZMAX, RSIGN, GRADE, ZDL, MODEL, CONDL, & + ZDR, MODER, CONDR, PIVTNG, IWORK, M, M, & + ZERO, -ONE, 'N', ZA, LDA, IWORK(M+1), INFO ) + DEALLOCATE( ZDR ) + DEALLOCATE( ZDL ) + DEALLOCATE( ZDA ) + DEALLOCATE( IWORK ) + + + LZWORK = MAX(1,2*M-1) + ALLOCATE( REIGA(M) ) + ALLOCATE( ZWORK(LZWORK) ) + ALLOCATE( WORK(3*M-2) ) + ZAC(1:M,1:M) = ZA(1:M,1:M) + CALL ZHEEV( 'V', 'U', M, ZAC, LDA, REIGA, ZWORK, LZWORK, WORK, INFO) ! LAPACK CALL + DEALLOCATE(WORK) + DEALLOCATE(ZWORK) + + TMP = ABS(REIGA(IDAMAX(M, REIGA, 1))) ! The spectral radius of ZA + ! Scale the matrix ZA to have unit spectral radius. + CALL ZLASCL( 'G',0, 0, TMP, ONE, M, M, & + ZA, LDA, INFO ) + CALL DLASCL( 'G',0, 0, TMP, ONE, M, 1, & + REIGA, M, INFO ) + ANORM = ZLANGE( 'F', M, M, ZA, LDA, WDUMMY ) + + IF ( K_TRAJ == 2 ) THEN + ! generate data as two trajectories + ! with two inital conditions + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N/2 + CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZX0(1:M,1:N/2) = ZF(1:M,1:N/2) + ZY0(1:M,1:N/2) = ZF(1:M,2:N/2+1) + + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N-N/2 + CALL ZGEMV( 'N', M, M, ZONE, ZA, LDA, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZX0(1:M,N/2+1:N) = ZF(1:M,1:N-N/2) + ZY0(1:M,N/2+1:N) = ZF(1:M,2:N-N/2+1) + ELSE + CALL ZLARNV(2, ISEED, M, ZF(1,1) ) + DO i = 1, N + CALL ZGEMV( 'N', M, M, ZONE, ZA, M, ZF(1,i), 1, & + ZZERO, ZF(1,i+1), 1 ) + END DO + ZF0(1:M,1:N+1) = ZF(1:M,1:N+1) + ZX0(1:M,1:N) = ZF0(1:M,1:N) + ZY0(1:M,1:N) = ZF0(1:M,2:N+1) + END IF + + DEALLOCATE( REIGA ) +!........................................................................ + + DO iJOBZ = 1, 4 + + SELECT CASE ( iJOBZ ) + CASE(1) + JOBZ = 'V' + RESIDS = 'R' + CASE(2) + JOBZ = 'V' + RESIDS = 'N' + CASE(3) + JOBZ = 'F' + RESIDS = 'N' + CASE(4) + JOBZ = 'N' + RESIDS = 'N' + END SELECT + + DO iJOBREF = 1, 3 + + SELECT CASE ( iJOBREF ) + CASE(1) + JOBREF = 'R' + CASE(2) + JOBREF = 'E' + CASE(3) + JOBREF = 'N' + END SELECT + + DO iSCALE = 1, 4 + + SELECT CASE ( iSCALE ) + CASE(1) + SCALE = 'S' + CASE(2) + SCALE = 'C' + CASE(3) + SCALE = 'Y' + CASE(4) + SCALE = 'N' + END SELECT + + DO iNRNK = -1, -2, -1 + NRNK = iNRNK + NRNKsp = iNRNK + + DO iWHTSVD = 1, 4 + ! Check all four options to compute the POD basis + ! via the SVD. + WHTSVD = iWHTSVD + WHTSVDsp = iWHTSVD + + DO iWHTEIG = 1, 2 + ! Check both symmetric eigensolvers in LAPACK + WHTEIG = iWHTEIG + + DO iWHTSYM = 1, 2 + ! Check both symmetrizers of the Rayleigh quotient + WHTSYM = iWHTSYM + + DO LWMINOPT = 1, 2 + ! Workspace query for the minimal (1) and for the optimal + ! (2) workspace lengths determined by workspace query. + + ! ZHEDMD is always tested and its results are also used for + ! comparisons with ZHEDMDQ. + + ZX(1:M,1:N) = ZX0(1:M,1:N) + ZY(1:M,1:N) = ZY0(1:M,1:N) + + CALL ZHEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, & + M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & + K, REIG, ZZ, LDZ, RES, & + ZAU, LDAU, ZW, LDW, ZS, LDS, & + ZDUMMY, -1, WDUMMY, -1, IDUMMY, -1, INFO ) + + IF ( (INFO .EQ. 2) .OR. ( INFO .EQ. 3 ) & + .OR. ( INFO < 0 ) ) THEN + WRITE(*,*) 'Call to ZHEDMD workspace query failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ', & + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL, LDZ, LDAU, LDW, LDS + STOP + END IF + + LZWORK = INT(ZDUMMY(LWMINOPT)) + LWORK = INT(WDUMMY(1)) + LIWORK = IDUMMY(1) + + ALLOCATE(ZWORK(LZWORK)) + ALLOCATE( WORK(LWORK)) + ALLOCATE(IWORK(LIWORK)) + + CALL ZHEDMD( SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, WHTSYM, WHTEIG, & + M, N, ZX, LDX, ZY, LDY, NRNK, TOL, & + K, REIG, ZZ, LDZ, RES, ZAU, LDAU, & + ZW, LDW, ZS, LDS,ZWORK, LZWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to ZHEDMD failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + + SINGVX(1:N) = WORK(1:N) + + !...... ZHEDMD check point + IF ( LSAME(JOBZ,'V') ) THEN + ! Check that Z = X*W, on return from ZHEDMD + ! This checks that the returned eigenvectors in Z are + ! the product of the SVD'POD basis returned in X + ! and the eigenvectors of the rayleigh quotient + ! returned in W + CALL ZGEMM( 'N', 'N', M, K, K, ZONE, ZX, LDX, ZW, LDW, & + ZZERO, ZZ1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL ZAXPY( M, -ZONE, ZZ(1,i), 1, ZZ1(1,i), 1) + TMP = MAX(TMP, DZNRM2( M, ZZ1(1,i), 1 ) ) + END DO + TMP_ZXW = MAX(TMP_ZXW, TMP ) + IF ( TMP_ZXW <= 10*M*EPS ) THEN + !WRITE(*,*) ' :) .... OK .........ZHEDMD PASSED.' + ELSE + NFAIL_Z_XV = NFAIL_Z_XV + 1 + WRITE(*,*) ':( .................ZHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + END IF + + + !...... ZHEDMD check point + IF ( LSAME(JOBREF,'R') ) THEN + ! The matrix A*U is returned for computing refined Ritz vectors. + ! Check that A*U is computed correctly using the formula + ! A*U = Y * V * inv(SIGMA). This depends on the + ! accuracy in the computed singular values and vectors of X. + ! See the paper for an error analysis. + ! Note that the left singular vectors of the input matrix X + ! are returned in the array X. + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZX, LDX, & + ZZERO, ZZ1, LDZ ) + TMP = ZERO + DO i = 1, K + CALL ZAXPY( M, -ZONE, ZAU(1,i), 1, ZZ1(1,i), 1) + TMP = MAX( TMP, DZNRM2( M, ZZ1(1,i),1 ) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_AU = MAX( TMP_AU, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK .........ZHEDMD PASSED.' + ELSE + NFAIL_AU = NFAIL_AU + 1 + WRITE(*,*) ':( .................ZHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + ELSEIF ( LSAME(JOBREF,'E') ) THEN + ! The unscaled vectors of the Exact DMD are computed. + ! This option is included for the sake of completeness, + ! for users who prefer the Exact DMD vectors. The + ! returned vectors are in the real form, in the same way + ! as the Ritz vectors. Here we just save the vectors + ! and test them separately using a Matlab script. + + + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZAU, LDAU, ZZERO, ZY1, LDY ) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -CMPLX(REIG(i),KIND=WP), ZAU(1,i), 1, ZY1(1,i), 1 ) + RESEX(i) = DZNRM2( M, ZY1(1,i), 1) / DZNRM2(M,ZAU(1,i),1) + END DO + END IF + !...... ZHEDMD check point + + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by ZHEDMD with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL ZGEMM( 'N', 'N', M, K, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in ZHEDMD,) + + DO i=1, K + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -CMPLX(REIG(i),KIND=WP), ZZ(1,i), 1, ZY1(1,i), 1 ) + RES1(i) = DZNRM2( M, ZY1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVX(K)/(ANORM*SINGVX(1)) ) + END DO + TMP_REZ = MAX( TMP_REZ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) ':) .... OK ..........ZHEDMD PASSED.' + ELSE + NFAIL_REZ = NFAIL_REZ + 1 + WRITE(*,*) ':( ..................ZHEDMD FAILED!', & + 'Check the code for implementation errors.' + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, JOBREF, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + END IF + + + IF ( LSAME(JOBREF,'E') ) THEN + TMP = ZERO + DO i = 1, K + TMP = MAX( TMP, ABS(RES1(i) - RESEX(i))/(RES1(i)+RESEX(i)) ) + END DO + TMP_EX = MAX(TMP_EX,TMP) + END IF + + END IF + + DEALLOCATE(ZWORK) + DEALLOCATE(WORK) + DEALLOCATE(IWORK) + + IF ( TEST_QRDMD .AND. (K_TRAJ == 1) ) THEN + + ZF(1:M,1:N+1) = ZF0(1:M,1:N+1) + CALL ZHEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, WHTSYM, WHTEIG, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & + NRNK, TOL, K, REIG, ZZ, LDZ, RES, ZAU, & + LDAU, ZW, LDW, ZS, LDS, ZDUMMY, -1, & + WDUMMY, -1, IDUMMY, -1, INFO ) + + + LZWORK = INT(ZDUMMY(LWMINOPT)) + ALLOCATE( ZWORK(LZWORK) ) + LIWORK = IDUMMY(1) + ALLOCATE(IWORK(LIWORK)) + LWORK = INT(WDUMMY(1)) + ALLOCATE(WORK(LWORK)) + CALL ZHEDMDQ( SCALE, JOBZ, RESIDS, WANTQ, WANTR, JOBREF, & + WHTSVD, WHTSYM, WHTEIG, M, N+1, ZF, LDF, ZX, LDX, ZY, LDY, & + NRNK, TOL, KQ, REIG, ZZ, LDZ, RES, ZAU, & + LDAU, ZW, LDW, ZS, LDS, ZWORK, LZWORK, & + WORK, LWORK, IWORK, LIWORK, INFO ) + + + IF ( INFO /= 0 ) THEN + WRITE(*,*) 'Call to ZHEDMDQ failed. & + &Check the calling sequence and the code.' + WRITE(*,*) 'The error code is ', INFO + WRITE(*,*) 'The input parameters were ',& + SCALE, JOBZ, RESIDS, WANTQ, WANTR, WHTSVD, & + M, N, LDX, LDY, NRNK, TOL + STOP + END IF + SINGVQX(1:N) = WORK(1:N) + + !..... ZHEDMDQ check point + + IF ( 1 == 0 ) THEN + ! Comparison of ZHEDMD and ZHEDMDQ singular values disabled + TMP = ZERO + DO i = 1, MIN(K, KQ) + TMP = MAX(TMP, ABS(SINGVX(i)-SINGVQX(i)) / & + SINGVX(1) ) + END DO + SVDIFF = MAX( SVDIFF, TMP ) + IF ( TMP > M*N*EPS ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_SVDIFF = NFAIL_SVDIFF + 1 + DO j =1, 3 + write(*,*) j, SINGVX(j), SINGVQX(j) + read(*,*) + END DO + + END IF + END IF + + !..... ZHEDMDQ check point + IF ( LSAME(WANTQ,'Q') .AND. LSAME(WANTR,'R') ) THEN + ! Check that the QR factors are computed and returned + ! as requested. The residual ||F-Q*R||_F / ||F||_F + ! is compared to M*N*EPS. + ZF1(1:M,1:N+1) = ZF0(1:M,1:N+1) + CALL ZGEMM( 'N', 'N', M, N+1, MIN(M,N+1), -ZONE, ZF, & + LDF, ZY, LDY, ZONE, ZF1, LDF ) + TMP_FQR = ZLANGE( 'F', M, N+1, ZF1, LDF, WORK ) / & + ZLANGE( 'F', M, N+1, ZF0, LDF, WORK ) + IF ( TMP_FQR > TOL2 ) THEN + WRITE(*,*) 'FAILED! Something was wrong with the run.' + NFAIL_F_QR = NFAIL_F_QR + 1 + ELSE + !WRITE(*,*) '........ PASSED.' + END IF + END IF + + !..... ZHEDMDQ check point + IF ( LSAME(RESIDS, 'R') ) THEN + ! Compare the residuals returned by ZHEDMDQ with the + ! explicitly computed residuals using the matrix A. + ! Compute explicitly Y1 = A*Z + CALL ZGEMM( 'N', 'N', M, KQ, M, ZONE, ZA, LDA, ZZ, LDZ, ZZERO, ZY1, LDY ) + ! ... and then A*Z(:,i) - LAMBDA(i)*Z(:,i), using the real forms + ! of the invariant subspaces that correspond to complex conjugate + ! pairs of eigencalues. (See the description of Z in ZHEDMDQ) + + DO i=1, KQ + ! have a real eigenvalue with real eigenvector + CALL ZAXPY( M, -CMPLX(REIG(i),KIND=WP), ZZ(1,i), 1, ZY1(1,i), 1 ) + ! Y(1:M,i) = Y(1:M,i) - REIG(i)*Z(1:M,i) + RES1(i) = DZNRM2( M, ZY1(1,i), 1) + END DO + TMP = ZERO + DO i = 1, KQ + TMP = MAX( TMP, ABS(RES(i) - RES1(i)) * & + SINGVQX(KQ)/(ANORM*SINGVQX(1)) ) + END DO + TMP_REZQ = MAX( TMP_REZQ, TMP ) + IF ( TMP <= TOL2 ) THEN + !WRITE(*,*) '.... OK ........ ZHEDMDQ PASSED.' + ELSE + NFAIL_REZQ = NFAIL_REZQ + 1 + WRITE(*,*) '................ ZHEDMDQ FAILED!', & + 'Check the code for implementation errors.' + STOP + END IF + + END IF + + DEALLOCATE( ZWORK ) + DEALLOCATE( WORK ) + DEALLOCATE( IWORK ) + + END IF ! ZHEDMDQ + +!....................................................................................................... + + END DO ! LWMINOPT + !write(*,*) 'LWMINOPT loop completed' + END DO ! WHTSYM LOOP + END DO ! WHTEIG LOOP + END DO ! iWHTSVD + !write(*,*) 'WHTSVD loop completed' + END DO ! iNRNK -2:-1 + !write(*,*) 'NRNK loop completed' + END DO ! iSCALE 1:4 + !write(*,*) 'SCALE loop completed' + END DO + !write(*,*) 'JOBREF loop completed' + END DO ! iJOBZ + !write(*,*) 'JOBZ loop completed' + + END DO ! MODE -6:6 + !write(*,*) 'MODE loop completed' + END DO ! 1 or 2 trajectories + !write(*,*) 'trajectories loop completed' + + DEALLOCATE( ZA ) + DEALLOCATE( ZAC ) + DEALLOCATE( ZZ ) + DEALLOCATE( ZF ) + DEALLOCATE( ZF0 ) + DEALLOCATE( ZF1 ) + DEALLOCATE( ZX ) + DEALLOCATE( ZX0 ) + DEALLOCATE( ZY ) + DEALLOCATE( ZY0 ) + DEALLOCATE( ZY1 ) + DEALLOCATE( ZAU ) + DEALLOCATE( ZW ) + DEALLOCATE( ZS ) + DEALLOCATE( ZZ1 ) + DEALLOCATE( RES ) + DEALLOCATE( RES1 ) + DEALLOCATE( RESEX ) + DEALLOCATE( REIG ) + DEALLOCATE( SINGVX ) + DEALLOCATE( SINGVQX ) + + END DO ! LLOOP + + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for ZHEDMD :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + IF ( NFAIL_Z_XV == 0 ) THEN + WRITE(*,*) '>>>> Z - U*V test PASSED.' + ELSE + WRITE(*,*) 'Z - U*V test FAILED ', NFAIL_Z_XV, ' time(s)' + WRITE(*,*) 'Max error ||Z-U*V||_F was ', TMP_ZXW + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_Z_XV + END IF + IF ( NFAIL_AU == 0 ) THEN + WRITE(*,*) '>>>> A*U test PASSED. ' + ELSE + WRITE(*,*) 'A*U test FAILED ', NFAIL_AU, ' time(s)' + WRITE(*,*) 'Max A*U test adjusted error measure was ', TMP_AU + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_AU + END IF + + IF ( NFAIL_REZ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAIL_TOTAL = NFAIL_TOTAL + NFAIL_REZ + END IF + + IF ( NFAIL_TOTAL == 0 ) THEN + WRITE(*,*) '>>>> ZHEDMD :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAIL_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>>>>>>>>> ZHEDMD :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + IF ( TEST_QRDMD ) THEN + WRITE(*,*) + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) ' Test summary for ZHEDMDQ :' + WRITE(*,*) '>>>>>>>>>>>>>>>>>>>>>>>>>>' + WRITE(*,*) + + IF ( NFAIL_SVDIFF == 0 ) THEN + WRITE(*,*) '>>>> ZHEDMD and ZHEDMDQ computed singular & + &values test PASSED.' + ELSE + WRITE(*,*) 'ZHEDMD and ZHEDMDQ discrepancies in & + &the singular values unacceptable ', & + NFAIL_SVDIFF, ' times. Test FAILED.' + WRITE(*,*) 'The maximal discrepancy in the singular values (relative to the norm) was ', SVDIFF + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_SVDIFF + END IF + + IF ( NFAIL_F_QR == 0 ) THEN + WRITE(*,*) '>>>> F - Q*R test PASSED.' + ELSE + WRITE(*,*) 'F - Q*R test FAILED ', NFAIL_F_QR, ' time(s)' + WRITE(*,*) 'The largest relative residual was ', TMP_FQR + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_F_QR + END IF + + IF ( NFAIL_REZQ == 0 ) THEN + WRITE(*,*) '>>>> Rezidual computation test PASSED.' + ELSE + WRITE(*,*) 'Rezidual computation test FAILED ', NFAIL_REZQ, 'time(s)' + WRITE(*,*) 'Max residual computing test adjusted error measure was ', TMP_REZQ + WRITE(*,*) 'It should be up to O(M*N) times EPS, EPS = ', EPS + NFAILQ_TOTAL = NFAILQ_TOTAL + NFAIL_REZQ + END IF + + IF ( NFAILQ_TOTAL == 0 ) THEN + WRITE(*,*) '>>>>>>> ZHEDMDQ :: ALL TESTS PASSED.' + ELSE + WRITE(*,*) NFAILQ_TOTAL, 'FAILURES!' + WRITE(*,*) '>>>>>>> ZHEDMDQ :: TESTS FAILED. CHECK THE IMPLEMENTATION.' + END IF + + END IF + + WRITE(*,*) + WRITE(*,*) 'Test completed.' + STOP + END