diff --git a/SRC/CMakeLists.txt b/SRC/CMakeLists.txt
index abd05731d7..daa0ba6725 100644
--- a/SRC/CMakeLists.txt
+++ b/SRC/CMakeLists.txt
@@ -54,7 +54,7 @@ set(SCLAUX
    slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f
    slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
    slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
-   slasr.f  slasrt.f slassq.f90 slasv2.f spttrf.f sstebz.f sstedc.f
+   slasr.f  slasrt.f slasrti.f slasrtr.f slassq.f90 slasv2.f spttrf.f sstebz.f sstedc.f
    ssteqr.f ssterf.f slaisnan.f sisnan.f
    slartgp.f slartgs.f
    ${SECOND_SRC})
@@ -73,7 +73,7 @@ set(DZLAUX
    dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
    dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
    dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
-   dlasr.f  dlasrt.f dlassq.f90 dlasv2.f dpttrf.f dstebz.f dstedc.f
+   dlasr.f  dlasrt.f dlasrti.f dlasrtr.f dlassq.f90 dlasv2.f dpttrf.f dstebz.f dstedc.f
    dsteqr.f dsterf.f dlaisnan.f disnan.f
    dlartgp.f dlartgs.f
    ../INSTALL/dlamch.f ${DSECOND_SRC})
@@ -89,7 +89,7 @@ set(SLASRC
    sgetrf2.f sgetri.f
    sggbak.f sggbal.f
    sgges.f  sgges3.f sggesx.f sggev.f  sggev3.f sggevx.f
-   sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f
+   sggglm.f sgghrd.f sgghd3.f sgglse.f sggqrf.f sggqrcs.f
    sggrqf.f sggsvd3.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f
    sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f
    slaqz0.f slaqz1.f slaqz2.f slaqz3.f slaqz4.f
@@ -180,7 +180,7 @@ set(CLASRC
    cgetri.f
    cggbak.f cggbal.f
    cgges.f  cgges3.f cggesx.f cggev.f  cggev3.f cggevx.f
-   cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f
+   cggglm.f cgghrd.f cgghd3.f cgglse.f cggqrf.f cggrqf.f cggqrcs.f
    cggsvd3.f cggsvp3.f
    cgtcon.f cgtrfs.f cgtsv.f  cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f
    chbevd.f chbevx.f chbgst.f chbgv.f  chbgvd.f chbgvx.f chbtrd.f
@@ -213,7 +213,7 @@ set(CLASRC
    claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
    clarf.f  clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
    clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f90 clartv.f
-   clarz.f  clarzb.f clarzt.f clascl.f claset.f clasr.f  classq.f90
+   clarz.f  clarzb.f clarzt.f clascl.f claset.f clasr.f clasrtr.f classq.f90
    claswp.f clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
    clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f
    clauu2.f clauum.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f
@@ -284,7 +284,7 @@ set(DLASRC
    dgetrf.f dgetrf2.f dgetri.f
    dgetrs.f dggbak.f dggbal.f
    dgges.f  dgges3.f dggesx.f dggev.f  dggev3.f dggevx.f
-   dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f
+   dggglm.f dgghrd.f dgghd3.f dgglse.f dggqrf.f dggqrcs.f
    dggrqf.f dggsvd3.f dggsvp3.f dgtcon.f dgtrfs.f dgtsv.f
    dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f
    dlaqz0.f dlaqz1.f dlaqz2.f dlaqz3.f dlaqz4.f
@@ -375,7 +375,7 @@ set(ZLASRC
    zgetri.f zgetrs.f
    zggbak.f zggbal.f
    zgges.f  zgges3.f zggesx.f zggev.f  zggev3.f zggevx.f
-   zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f
+   zggglm.f zgghrd.f zgghd3.f zgglse.f zggqrf.f zggrqf.f zggqrcs.f
    zggsvd3.f zggsvp3.f
    zgtcon.f zgtrfs.f zgtsv.f  zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f
    zhbevd.f zhbevx.f zhbgst.f zhbgv.f  zhbgvd.f zhbgvx.f zhbtrd.f
@@ -410,7 +410,7 @@ set(ZLASRC
    zlarcm.f zlarf.f  zlarfb.f zlarfb_gett.f
    zlarfg.f zlarfgp.f zlarft.f
    zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f90 zlartv.f
-   zlarz.f  zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
+   zlarz.f  zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f zlasrtr.f
    zlassq.f90 zlaswp.f zlasyf.f zlasyf_rook.f zlasyf_rk.f zlasyf_aa.f
    zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlauu2.f
    zlauum.f zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f
diff --git a/SRC/Makefile b/SRC/Makefile
index 527fb086db..ba1050ab7e 100644
--- a/SRC/Makefile
+++ b/SRC/Makefile
@@ -124,7 +124,7 @@ SLASRC = \
    sgetc2.o sgetf2.o sgetri.o \
    sggbak.o sggbal.o sgges.o  sgges3.o sggesx.o \
    sggev.o  sggev3.o sggevx.o \
-   sggglm.o sgghrd.o sgghd3.o sgglse.o sggqrf.o \
+   sggglm.o sgghrd.o sgghd3.o sgglse.o sggqrcs.o sggqrf.o \
    sggrqf.o sggsvd3.o sggsvp3.o sgtcon.o sgtrfs.o sgtsv.o \
    sgtsvx.o sgttrf.o sgttrs.o sgtts2.o shgeqz.o \
    slaqz0.o slaqz1.o slaqz2.o slaqz3.o slaqz4.o \
@@ -218,7 +218,7 @@ CLASRC = \
    cgesvx.o cgetc2.o cgetf2.o cgetri.o \
    cggbak.o cggbal.o cgges.o  cgges3.o cggesx.o \
    cggev.o  cggev3.o cggevx.o cggglm.o \
-   cgghrd.o cgghd3.o cgglse.o cggqrf.o cggrqf.o \
+   cgghrd.o cgghd3.o cgglse.o cggqrcs.o cggqrf.o cggrqf.o \
    cggsvd3.o cggsvp3.o \
    cgtcon.o cgtrfs.o cgtsv.o  cgtsvx.o cgttrf.o cgttrs.o cgtts2.o chbev.o \
    chbevd.o chbevx.o chbgst.o chbgv.o  chbgvd.o chbgvx.o chbtrd.o \
@@ -326,7 +326,7 @@ DLASRC = \
    dgetc2.o dgetf2.o dgetrf.o dgetri.o \
    dgetrs.o dggbak.o dggbal.o dgges.o  dgges3.o dggesx.o \
    dggev.o  dggev3.o dggevx.o \
-   dggglm.o dgghrd.o dgghd3.o dgglse.o dggqrf.o \
+   dggglm.o dgghrd.o dgghd3.o dgglse.o dggqrcs.o dggqrf.o \
    dggrqf.o dggsvd3.o dggsvp3.o dgtcon.o dgtrfs.o dgtsv.o \
    dgtsvx.o dgttrf.o dgttrs.o dgtts2.o dhgeqz.o \
    dlaqz0.o dlaqz1.o dlaqz2.o dlaqz3.o dlaqz4.o \
@@ -420,7 +420,7 @@ ZLASRC = \
    zgetri.o zgetrs.o \
    zggbak.o zggbal.o zgges.o  zgges3.o zggesx.o \
    zggev.o  zggev3.o zggevx.o zggglm.o \
-   zgghrd.o zgghd3.o zgglse.o zggqrf.o zggrqf.o \
+   zgghrd.o zgghd3.o zgglse.o zggqrcs.o zggqrf.o zggrqf.o \
    zggsvd3.o zggsvp3.o \
    zgtcon.o zgtrfs.o zgtsv.o  zgtsvx.o zgttrf.o zgttrs.o zgtts2.o zhbev.o \
    zhbevd.o zhbevx.o zhbgst.o zhbgv.o  zhbgvd.o zhbgvx.o zhbtrd.o \
diff --git a/SRC/cggqrcs.f b/SRC/cggqrcs.f
new file mode 100644
index 0000000000..f51ead2947
--- /dev/null
+++ b/SRC/cggqrcs.f
@@ -0,0 +1,714 @@
+*> \brief <b> CGGQRCS computes the singular value decomposition (SVD) for OTHER matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download CGGQRCS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/cggqrcs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/cggqrcs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/cggqrcs.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE CGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L, SWAPPED,
+*                           A, LDA, B, LDB,
+*                           ALPHA, BETA,
+*                           U1, LDU1, U2, LDU2
+*                           WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          JOBU1, JOB2, JOBX
+*       INTEGER            INFO, LDA, LDB, LDU1, LDU2, M, N, P, L,
+*                          LWORK, LRWORK
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       REAL               ALPHA( N ), BETA( N ), RWORK( * )
+*       COMPLEX            A( LDA, * ), B( LDB, * ),
+*      $                   U1( LDU1, * ), U2( LDU2, * ),
+*      $                   WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> CGGQRCS computes the generalized singular value decomposition (GSVD)
+*> of an M-by-N complex matrix A and P-by-N complex matrix B:
+*>
+*>       A = U1 * D1 * X,           B = U2 * D2 * X
+*>
+*> where U1 and U2 are unitary matrices. CGGQRCS uses the QR
+*> factorization with column pivoting and the 2-by-1 CS decomposition to
+*> compute the GSVD.
+*>
+*> Let L be the effective numerical rank of the matrix (A**H,B**H)**H,
+*> then X is a L-by-N nonsingular matrix, D1 and D2 are M-by-L and
+*> P-by-L "diagonal" matrices. If SWAPPED is false, then D1 and D2 are
+*> of the of the following structures, respectively:
+*>
+*>                 K1  K
+*>            K1 [ I   0   0 ]
+*>       D1 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*>                     K   K2
+*>               [ 0   0   0 ]
+*>       D2 = K  [ 0   S   0 ]
+*>            K2 [ 0   0   I ]
+*>
+*> where
+*>
+*>   K  = MIN(M, P, L, M + P - L),
+*>   K1 = MAX(L - P, 0),
+*>   K2 = MAX(L - M, 0),
+*>   C  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   S  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> If SWAPPED is true, then D1 and D2 are of the of the following
+*> structures, respectively:
+*>
+*>                     K   K1
+*>               [ 0   0   0 ]
+*>       D1 = K  [ 0   S   0 ]
+*>            K1 [ 0   0   I ]
+*>
+*>                 K2  K
+*>            K2 [ I   0   0 ]
+*>       D2 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*> where
+*>
+*>   S  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   C  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> The routine computes C, S and optionally the matrices U1, U2, and X.
+*> On exit, X is stored in WORK( 2:L*N+1 ).
+*>
+*> If B is an N-by-N nonsingular matrix, then the GSVD of the matrix
+*> pair (A, B) implicitly gives the SVD of A*inv(B):
+*>
+*>       A*inv(B) = U1*(D1*inv(D2))*U2**H.
+*>
+*> If (A**H,B**H)**H  has orthonormal columns, then the GSVD of A and B
+*> is also equal to the CS decomposition of A and B. Furthermore, the
+*> GSVD can be used to derive the solution of the eigenvalue problem:
+*>
+*>       A**H*A x = lambda * B**H*B x.
+*>
+*> In some literature, the GSVD of A and B is presented in the form
+*>
+*>       A = U1*D1*( 0 R )*Q**H,    B = U2*D2*( 0 R )*Q**H
+*>
+*> where U1, U2, and Q are unitary matrices. This latter GSVD form is
+*> computed directly by DGGSVD3. It is possible to convert between the
+*> two representations by calculating the RQ decomposition of X but this
+*> is not recommended for reasons of numerical stability.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBU1
+*> \verbatim
+*>          JOBU1 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U1 is computed;
+*>          = 'N':  U1 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBU2
+*> \verbatim
+*>          JOBU2 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U2 is computed;
+*>          = 'N':  U2 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBX
+*> \verbatim
+*>          JOBX is CHARACTER*1
+*>          = 'Y':  Matrix X is computed;
+*>          = 'N':  X is not computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 1.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrices A and B.  N >= 1.
+*> \endverbatim
+*>
+*> \param[in] P
+*> \verbatim
+*>          P is INTEGER
+*>          The number of rows of the matrix B.  P >= 1.
+*> \endverbatim
+*>
+*> \param[out] L
+*> \verbatim
+*>          L is INTEGER
+*>          On exit, the effective numerical rank of the matrix
+*>          (A**H, B**H)**H.
+*> \endverbatim
+*>
+*> \param[out] SWAPPED
+*> \verbatim
+*>          L is LOGICAL
+*>          On exit, SWAPPED is true if CGGQRCS swapped the input
+*>          matrices A, B and computed the GSVD of (B, A); false
+*>          otherwise.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is COMPLEX array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is COMPLEX array, dimension (LDB,N)
+*>          On entry, the P-by-N matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,P).
+*> \endverbatim
+*>
+*> \param[out] ALPHA
+*> \verbatim
+*>          ALPHA is REAL array, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] BETA
+*> \verbatim
+*>          BETA is REAL array, dimension (N)
+*>
+*>          On exit, ALPHA and BETA contain the K generalized singular
+*>          value pairs of A and B.
+*> \endverbatim
+*>
+*> \param[out] U1
+*> \verbatim
+*>          U1 is COMPLEX array, dimension (LDU1,M)
+*>          If JOBU1 = 'Y', U1 contains the M-by-M unitary matrix U1.
+*>          If JOBU1 = 'N', U1 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU1
+*> \verbatim
+*>          LDU1 is INTEGER
+*>          The leading dimension of the array U1. LDU1 >= max(1,M) if
+*>          JOBU1 = 'Y'; LDU1 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] U2
+*> \verbatim
+*>          U2 is COMPLEX array, dimension (LDU2,P)
+*>          If JOBU2 = 'Y', U2 contains the P-by-P unitary matrix U2.
+*>          If JOBU2 = 'N', U2 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU2
+*> \verbatim
+*>          LDU2 is INTEGER
+*>          The leading dimension of the array U2. LDU2 >= max(1,P) if
+*>          JOBU2 = 'Y'; LDU2 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is COMPLEX array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the
+*>          routine only calculates the optimal size of the WORK array,
+*>          returns this value as the first entry of the WORK array, and
+*>          no error message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] RWORK
+*> \verbatim
+*>          RWORK is REAL array, dimension (MAX(1,LRWORK))
+*> \endverbatim
+*>
+*> \param[in] LRWORK
+*> \verbatim
+*>          LRWORK is INTEGER
+*>          The dimension of the array RWORK.
+*>
+*>          If LRWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the RWORK array, returns
+*>          this value as the first entry of the work array, and no error
+*>          message related to LRWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (M + N + P)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  CBBCSD did not converge. For further details, see
+*>                subroutine CUNCSDBY1.
+*> \endverbatim
+*
+*> \par Internal Parameters:
+*  =========================
+*>
+*> \verbatim
+*>  W       REAL
+*>          W is a radix power chosen such that the Frobenius norm of A
+*>          and W*B are with SQRT(RADIX) and 1/SQRT(RADIX) of each
+*>          other.
+*>
+*>  TOL     REAL
+*>          Let G = (A**H,B**H)**H. TOL is the threshold to determine
+*>          the effective rank of G. Generally, it is set to
+*>                   TOL = MAX( M + P, N ) * norm(G) * MACHEPS,
+*>          where norm(G) is the Frobenius norm of G.
+*>          The size of TOL may affect the size of backward error of the
+*>          decomposition.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup realGEsing
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Christoph Conrads (https://christoph-conrads.name)
+*>
+*
+*> \par Further Details:
+*  =====================
+*>
+*>  CGGQRCS should be significantly faster than CGGSVD3 for large
+*>  matrices because the matrices A and B are reduced to a pair of
+*>  well-conditioned bidiagonal matrices instead of pairs of upper
+*>  triangular matrices. On the downside, CGGQRCS requires a much larger
+*>  workspace whose dimension must be queried at run-time. CGGQRCS also
+*>  offers no guarantees which of the two possible diagonal matrices
+*>  is used for the matrix factorization.
+*>
+*  =====================================================================
+      RECURSIVE SUBROUTINE CGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L,
+     $                              SWAPPED,
+     $                              A, LDA, B, LDB,
+     $                              ALPHA, BETA,
+     $                              U1, LDU1, U2, LDU2,
+     $                              WORK, LWORK, RWORK, LRWORK, IWORK,
+     $                              INFO )
+*
+*  -- LAPACK driver routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
+*
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      LOGICAL            SWAPPED
+      CHARACTER          JOBU1, JOBU2, JOBX
+      INTEGER            INFO, LDA, LDB, LDU1, LDU2, L, M, N, P, LWORK,
+     $                   LRWKOPT, LRWORK, LRWORK2BY1
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               ALPHA( N ), BETA( N ), RWORK( * )
+      COMPLEX            A( LDA, * ), B( LDB, * ),
+     $                   U1( LDU1, * ), U2( LDU2, * ),
+     $                   WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX            CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0E0, 0.0E0 ),
+     $                   CZERO = ( 0.0E0, 0.0E0 ) )
+*     .. Local Scalars ..
+      LOGICAL            WANTU1, WANTU2, WANTX, LQUERY
+      INTEGER            I, J, K, K1, LMAX, IG, IG11, IG21, IG22,
+     $                   IVT, IVT12, LDG, LDX, LDVT, LWKMIN, LWKOPT
+      REAL               BASE, NORMA, NORMB, NORMG, TOL, ULP, UNFL,
+     $                   THETA, IOTA, W
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, CLANGE
+      EXTERNAL           LSAME, SLAMCH, CLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           CGEMM, CGEQP3, CLACPY, CLAPMT, CLASCL,
+     $                   CLASET, CUNGQR, CUNCSD2BY1, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, MAX, MIN, SIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU1 = LSAME( JOBU1, 'Y' )
+      WANTU2 = LSAME( JOBU2, 'Y' )
+      WANTX = LSAME( JOBX, 'Y' )
+      LQUERY = LWORK.EQ.-1 .OR. LRWORK.EQ.-1
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( .NOT.( WANTU1 .OR. LSAME( JOBU1, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTU2 .OR. LSAME( JOBU2, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTX .OR. LSAME( JOBX, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU1.LT.1 .OR. ( WANTU1 .AND. LDU1.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDU2.LT.1 .OR. ( WANTU2 .AND. LDU2.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      ELSE IF( LRWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -22
+      END IF
+*
+*     Make sure A is the matrix smaller in norm
+*
+      IF( INFO.EQ.0 ) THEN
+         NORMA = CLANGE( 'F', M, N, A, LDA, RWORK )
+         NORMB = CLANGE( 'F', P, N, B, LDB, RWORK )
+*
+         IF( NORMA.GT.SQRT( 2.0E0 ) * NORMB ) THEN
+            CALL CGGQRCS( JOBU2, JOBU1, JOBX, P, N, M, L,
+     $                    SWAPPED,
+     $                    B, LDB, A, LDA,
+     $                    BETA, ALPHA,
+     $                    U2, LDU2, U1, LDU1,
+     $                    WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
+            SWAPPED = .TRUE.
+            RETURN
+         ENDIF
+*
+*     Past this point, we know that
+*     * NORMA <= NORMB (almost)
+*     * W >= 1
+*     * ALPHA will contain cosine values at the end
+*     * BETA will contain sine values at the end
+*
+      END IF
+*
+*     Initialize variables
+*
+*
+      SWAPPED = .FALSE.
+      L = 0
+*     The leading dimension must never be zero
+      LDG = MAX( M + P, 1 )
+      LDVT = N
+      LMAX = MIN( M + P, N )
+      IG = 1
+      IG11 = 1
+      IG21 = M + 1
+      IG22 = LDG * M + M + 1
+      IVT = LDG * N + 2
+      IVT12 = IVT + LDVT * M
+      THETA = -1
+      IOTA = -1
+      W = -1
+*
+*     Compute workspace
+*
+      IF( INFO.EQ.0 ) THEN
+         LWKMIN = 0
+         LWKOPT = 0
+*
+         CALL CGEQP3( M + P, N, WORK( IG ), LDG, IWORK, WORK, WORK, -1,
+     $                RWORK, INFO )
+         LWKMIN = MAX( LWKMIN, N + 1 )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL CUNGQR( M + P, LMAX, LMAX, WORK( IG ), LDG, WORK, WORK,
+     $                -1, INFO )
+         LWKMIN = MAX( LWKMIN, LMAX )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL CUNCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, LMAX,
+     $                    WORK( IG ), LDG, WORK( IG ), LDG,
+     $                    ALPHA,
+     $                    U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                    WORK, -1, RWORK, LRWORK, IWORK, INFO )
+         LWKMIN = MAX( LWKMIN, INT( WORK( 1 ) ) )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*        The matrix (A, B) must be stored sequentially for CUNGQR
+         LWKMIN = LWKMIN + IVT
+         LWKOPT = LWKOPT + IVT
+*        2-by-1 CSD matrix V1 must be stored
+         IF( WANTX ) THEN
+            LWKMIN = LWKMIN + LDVT*N
+            LWKOPT = LWKOPT + LDVT*N
+         END IF
+*        Adjust CUNCSD2BY1 LRWORK for case with maximum memory
+*        consumption
+         LRWORK2BY1 = INT( RWORK(1) )
+*        Select safe xUNCSD2BY1 IBBCSD value
+     $                - 9 * MAX( 0, MIN( M, P, N, M+P-N-1 ) )
+     $                + 9 * MAX( 1, MIN( M, P, N ) )
+*        Select safe xUNCSD2BY1 LBBCSD value
+     $                - 8 * MAX( 0, MIN( M, P, N, M+P-N ) )
+     $                + 8 * MIN( M, P, N )
+         LRWKOPT = MAX( 2*N, LRWORK2BY1 )
+*        Check workspace size
+         IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+         IF( LRWORK.LT.LRWKOPT .AND. .NOT.LQUERY ) THEN
+            INFO = -22
+         END IF
+*
+         WORK( 1 ) = CMPLX( REAL( LWKOPT ), 0.0E0 )
+         RWORK( 1 ) = REAL( LRWKOPT )
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CGGQRCS', -INFO )
+         RETURN
+      END IF
+      IF( LQUERY ) THEN
+         RETURN
+      ENDIF
+*     Finish initialization
+      IF( .NOT.WANTX ) THEN
+         LDVT = 0
+      END IF
+*
+*     Scale matrix A such that norm(A) \approx norm(B)
+*
+      IF( NORMA.EQ.0.0E0 ) THEN
+         W = 1.0E0
+      ELSE
+         BASE = SLAMCH( 'B' )
+         W = BASE ** INT( LOG( NORMB / NORMA ) / LOG( BASE ) )
+*
+         CALL CLASCL( 'G', -1, -1, 1.0E0, W, M, N, A, LDA, INFO )
+         IF ( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+      END IF
+*
+*     Copy matrices A, B into the (M+P) x N matrix G
+*
+      CALL CLACPY( 'A', M, N, A, LDA, WORK( IG11 ), LDG )
+      CALL CLACPY( 'A', P, N, B, LDB, WORK( IG21 ), LDG )
+*
+*     Compute the Frobenius norm of matrix G
+*
+      NORMG = NORMB * SQRT( 1.0E0 + ( ( W * NORMA ) / NORMB )**2 )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrix G.
+*
+      ULP = SLAMCH( 'Precision' )
+      UNFL = SLAMCH( 'Safe Minimum' )
+      TOL = MAX( M + P, N ) * MAX( NORMG, UNFL ) * ULP
+*
+*     IWORK stores the column permutations computed by CGEQP3.
+*     Columns J where IWORK( J ) is non-zero are permuted to the front
+*     so we set the all entries to zero here.
+*
+      IWORK( 1:N ) = 0
+*
+*     Compute the QR factorization with column pivoting GΠ = Q1 R1
+*
+      CALL CGEQP3( M + P, N, WORK( IG ), LDG, IWORK, WORK( IVT ),
+     $             WORK( IVT + LMAX ), LWORK - IVT - LMAX + 1, RWORK,
+     $             INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Determine the rank of G
+*
+      DO I = 1, MIN( M + P, N )
+         IF( ABS( WORK( (I-1) * LDG + I ) ).LE.TOL ) THEN
+            EXIT
+         END IF
+         L = L + 1
+      END DO
+*
+*     Handle rank=0 case
+*
+      IF( L.EQ.0 ) THEN
+         IF( WANTU1 ) THEN
+            CALL CLASET( 'A', M, M, CZERO, CONE, U1, LDU1 )
+         END IF
+         IF( WANTU2 ) THEN
+            CALL CLASET( 'A', P, P, CZERO, CONE, U2, LDU2 )
+         END IF
+*
+         WORK( 1 ) = CMPLX( REAL ( LWKOPT ), 0.0E0 )
+         RWORK( 1 ) = REAL( LRWKOPT )
+         RETURN
+      END IF
+*
+*     Copy R1( 1:L, : ) into A, B and set lower triangular part to zero
+*
+      IF( WANTX ) THEN
+         IF( L.LE.M ) THEN
+             CALL CLACPY( 'U', L, N, WORK( IG ), LDG, A, LDA )
+             CALL CLASET( 'L', L - 1, N, CZERO, CZERO, A( 2, 1 ), LDA )
+         ELSE
+             CALL CLACPY( 'U', M, N, WORK( IG ), LDG, A, LDA )
+             CALL CLACPY( 'U', L - M, N - M, WORK( IG22 ), LDG, B, LDB )
+*
+             CALL CLASET( 'L', M - 1, N, CZERO, CZERO, A( 2, 1 ), LDA )
+             CALL CLASET( 'L', L-M-1, N, CZERO, CZERO, B( 2, 1 ), LDB )
+         END IF
+      END IF
+*
+*     Explicitly form Q1 so that we can compute the CS decomposition
+*
+      CALL CUNGQR( M + P, L, L, WORK( IG ), LDG, WORK( IVT ),
+     $             WORK( IVT + L ), LWORK - IVT - L + 1, INFO )
+      IF ( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute the CS decomposition of Q1( :, 1:L )
+*
+      K = MIN( M, P, L, M + P - L )
+      K1 = MAX( L - P, 0 )
+      CALL CUNCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, L,
+     $                 WORK( IG11 ), LDG, WORK( IG21 ), LDG,
+     $                 ALPHA,
+     $                 U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                 WORK( IVT + LDVT*N ), LWORK - IVT - LDVT*N + 1,
+     $                 RWORK, LRWORK,
+     $                 IWORK( N + 1 ), INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute X = V^T R1( 1:L, : ) and adjust for matrix scaling
+*
+      IF( WANTX ) THEN
+         LDX = L
+         IF ( L.LE.M ) THEN
+            CALL CGEMM( 'N', 'N', L, N, L,
+     $                  CONE, WORK( IVT ), LDVT, A, LDA,
+     $                  CZERO, WORK( 2 ), LDX )
+         ELSE
+            CALL CGEMM( 'N', 'N', L, N, M,
+     $                  CONE, WORK( IVT ), LDVT, A, LDA,
+     $                  CZERO, WORK( 2 ), LDX )
+            CALL CGEMM( 'N', 'N', L, N - M, L - M,
+     $                  CONE, WORK( IVT12 ), LDVT, B, LDB,
+     $                  CONE, WORK( L*M + 2 ), LDX )
+         END IF
+*        Revert column permutation Π by permuting the columns of X
+         CALL CLAPMT( .FALSE., L, N, WORK( 2 ), LDX, IWORK )
+      END IF
+*
+*     Adjust generalized singular values for matrix scaling
+*     Compute sine, cosine values
+*     Prepare row scaling of X
+*
+      DO I = 1, K
+         THETA = ALPHA( I )
+*        Do not adjust singular value if THETA is greater
+*        than pi/2 (infinite singular values won't change)
+         IF( COS( THETA ).LE.0.0E0 ) THEN
+            ALPHA( I ) = 0.0E0
+            BETA( I ) = 1.0E0
+            IF( WANTX ) THEN
+               RWORK( I ) = 1.0E0
+            END IF
+         ELSE
+*           iota comes in the greek alphabet after theta
+            IOTA = ATAN( W * TAN( THETA ) )
+*           ensure sine, cosine divisor is far away from zero
+*           w is a power of two and will cause no trouble
+            IF( SIN( IOTA ) .GE. COS( IOTA ) ) THEN
+               ALPHA( I ) =  ( SIN( IOTA ) / TAN( THETA ) ) / W
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  RWORK( I ) = SIN( THETA ) / SIN( IOTA )
+               END IF
+            ELSE
+               ALPHA( I ) = COS( IOTA )
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  RWORK( I ) = COS( THETA ) / COS( IOTA ) / W
+               END IF
+            END IF
+         END IF
+      END DO
+*     Adjust rows of X for matrix scaling
+      IF( WANTX ) THEN
+         DO J = 0, N-1
+            DO I = 1, K1
+               WORK( LDX*J + I + 1 ) = WORK( LDX*J + I + 1 ) / W
+            END DO
+            DO I = 1, K
+               WORK( LDX*J + I + K1 + 1 ) =
+     $         WORK( LDX*J + I + K1 + 1 ) * RWORK( I )
+            END DO
+         END DO
+      END IF
+*
+      WORK( 1 ) = CMPLX( REAL( LWKOPT ), 0.0E0 )
+      RWORK( 1 ) = REAL( LRWKOPT )
+      RETURN
+*
+*     End of CGGQRCS
+*
+      END
diff --git a/SRC/clasrtr.f b/SRC/clasrtr.f
new file mode 100644
index 0000000000..b2450965d9
--- /dev/null
+++ b/SRC/clasrtr.f
@@ -0,0 +1,191 @@
+*> \brief \b CLASRTR sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download CLASRTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/clasrtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/clasrtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/clasrtr.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE CLASRTR( ID, M, N, A, LDA, IPVT, RWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, M, N, LDA
+*       ..
+*       .. Array Arguments ..
+*       COMPLEX            A( LDA, * )
+*       REAL               RWORK( * )
+*       INTEGER            IPVT( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*> Apply row sorting based on the maximum norm of the rows of A. The
+*> rows can be sorted in increasing (if ID = 'I') or decreasing order
+*> (if ID = 'D').
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort the rows of A by increasing row norm;
+*>          = 'D': sort the rows of A by decreasing row norm.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is COMPLEX array, dimension (LDA,N)
+*>          On entry, the M by N matrix A.
+*>          On exit, A contains the row-permuted matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] IPVT
+*> \verbatim
+*>          IPVT is INTEGER array, dimension (M)
+*>          On exit, if IPVT(J)=K, then the J-th row of A*P was the
+*>          the K-th row of A.
+*> \endverbatim
+*>
+*> \param[out] RWORK
+*> \verbatim
+*>          RWORK is REAL array, dimension (M).
+*>          On exit, RWORK contains the maximum norms of the rows of A.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE CLASRTR( ID, M, N, A, LDA, IPVT, RWORK, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, M, N, LDA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX            A( LDA, * )
+      REAL               RWORK( * )
+      INTEGER            IPVT( * )
+*     ..
+*
+*  =====================================================================
+*
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, CLAPMR, SLASRTI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( ID, 'I' ) .AND. .NOT.LSAME( ID, 'D' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'CLASRTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.1 .OR. N.LT.1 )
+     $   RETURN
+*
+*     Compute maximum norm of each row
+*
+      DO I = 1, M
+         RWORK( I ) = ABS( A( I, 1 ) )
+      END DO
+*
+      DO I = 1, M
+         DO J = 2, N
+            RWORK( I ) = MAX( RWORK( I ), ABS( A( I, J ) ) )
+         END DO
+      END DO
+*
+*     Sort row indices
+*
+      DO I = 1, M
+         IPVT( I ) = I
+      END DO
+*
+      CALL SLASRTI( ID, M, RWORK, IPVT, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Sort rows
+*
+      CALL CLAPMR( .TRUE., M, N, A, LDA, IPVT )
+*
+*     End of CLASRTR
+*
+      END
diff --git a/SRC/dggqrcs.f b/SRC/dggqrcs.f
new file mode 100644
index 0000000000..fc56c2e6c1
--- /dev/null
+++ b/SRC/dggqrcs.f
@@ -0,0 +1,670 @@
+*> \brief <b> DGGQRCS computes the singular value decomposition (SVD) for OTHER matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DGGQRCS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dggqrcs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dggqrcs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dggqrcs.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L, SWAPPED,
+*                           A, LDA, B, LDB,
+*                           ALPHA, BETA,
+*                           U1, LDU1, U2, LDU2
+*                           WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          JOBU1, JOB2, JOBX
+*       INTEGER            INFO, LDA, LDB, LDU1, LDU2, M, N, P, L, LWORK
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   A( LDA, * ), B( LDB, * ),
+*      $                   ALPHA( N ), BETA( N ),
+*      $                   U1( LDU1, * ), U2( LDU2, * ),
+*      $                   WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> DGGQRCS computes the generalized singular value decomposition (GSVD)
+*> of an M-by-N real matrix A and P-by-N real matrix B:
+*>
+*>       A = U1 * D1 * X,           B = U2 * D2 * X
+*>
+*> where U1 and U2 are orthogonal matrices. DGGQRCS uses the QR
+*> factorization with column pivoting and the 2-by-1 CS decomposition to
+*> compute the GSVD.
+*>
+*> Let L be the effective numerical rank of the matrix (A**T,B**T)**T,
+*> then X is a L-by-N nonsingular matrix, D1 and D2 are M-by-L and
+*> P-by-L "diagonal" matrices. If SWAPPED is false, then D1 and D2 are
+*> of the of the following structures, respectively:
+*>
+*>                 K1  K
+*>            K1 [ I   0   0 ]
+*>       D1 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*>                     K   K2
+*>               [ 0   0   0 ]
+*>       D2 = K  [ 0   S   0 ]
+*>            K2 [ 0   0   I ]
+*>
+*> where
+*>
+*>   K  = MIN(M, P, L, M + P - L),
+*>   K1 = MAX(L - P, 0),
+*>   K2 = MAX(L - M, 0),
+*>   C  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   S  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> If SWAPPED is true, then D1 and D2 are of the of the following
+*> structures, respectively:
+*>
+*>                     K   K1
+*>               [ 0   0   0 ]
+*>       D1 = K  [ 0   S   0 ]
+*>            K1 [ 0   0   I ]
+*>
+*>                 K2  K
+*>            K2 [ I   0   0 ]
+*>       D2 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*> where
+*>
+*>   S  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   C  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> The routine computes C, S and optionally the matrices U1, U2, and X.
+*> On exit, X is stored in WORK( 2:L*N+1 ).
+*>
+*> If B is an N-by-N nonsingular matrix, then the GSVD of the matrix
+*> pair (A, B) implicitly gives the SVD of A*inv(B):
+*>
+*>       A*inv(B) = U1*(D1*inv(D2))*U2**T.
+*>
+*> If (A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B
+*> is also equal to the CS decomposition of A and B. Furthermore, the
+*> GSVD can be used to derive the solution of the eigenvalue problem:
+*>
+*>       A**T*A x = lambda * B**T*B x.
+*>
+*> In some literature, the GSVD of A and B is presented in the form
+*>
+*>       A = U1*D1*( 0 R )*Q**T,    B = U2*D2*( 0 R )*Q**T
+*>
+*> where U1, U2, and Q are orthogonal matrices. This latter GSVD form is
+*> computed directly by DGGSVD3. It is possible to convert between the
+*> two representations by calculating the RQ decomposition of X but this
+*> is not recommended for reasons of numerical stability.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBU1
+*> \verbatim
+*>          JOBU1 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U1 is computed;
+*>          = 'N':  U1 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBU2
+*> \verbatim
+*>          JOBU2 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U2 is computed;
+*>          = 'N':  U2 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBX
+*> \verbatim
+*>          JOBX is CHARACTER*1
+*>          = 'Y':  Matrix X is computed;
+*>          = 'N':  X is not computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 1.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrices A and B.  N >= 1.
+*> \endverbatim
+*>
+*> \param[in] P
+*> \verbatim
+*>          P is INTEGER
+*>          The number of rows of the matrix B.  P >= 1.
+*> \endverbatim
+*>
+*> \param[out] L
+*> \verbatim
+*>          L is INTEGER
+*>          On exit, the effective numerical rank of the matrix
+*>          (A**T, B**T)**T.
+*> \endverbatim
+*>
+*> \param[out] SWAPPED
+*> \verbatim
+*>          L is LOGICAL
+*>          On exit, SWAPPED is true if DGGQRCS swapped the input
+*>          matrices A, B and computed the GSVD of (B, A); false
+*>          otherwise.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is DOUBLE PRECISION array, dimension (LDB,N)
+*>          On entry, the P-by-N matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,P).
+*> \endverbatim
+*>
+*> \param[out] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISION array, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISION array, dimension (N)
+*>
+*>          On exit, ALPHA and BETA contain the K generalized singular
+*>          value pairs of A and B.
+*> \endverbatim
+*>
+*> \param[out] U1
+*> \verbatim
+*>          U1 is DOUBLE PRECISION array, dimension (LDU1,M)
+*>          If JOBU1 = 'Y', U1 contains the M-by-M orthogonal matrix U1.
+*>          If JOBU1 = 'N', U1 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU1
+*> \verbatim
+*>          LDU1 is INTEGER
+*>          The leading dimension of the array U1. LDU1 >= max(1,M) if
+*>          JOBU1 = 'Y'; LDU1 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] U2
+*> \verbatim
+*>          U2 is DOUBLE PRECISION array, dimension (LDU2,P)
+*>          If JOBU2 = 'Y', U2 contains the P-by-P orthogonal matrix U2.
+*>          If JOBU2 = 'N', U2 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU2
+*> \verbatim
+*>          LDU2 is INTEGER
+*>          The leading dimension of the array U2. LDU2 >= max(1,P) if
+*>          JOBU2 = 'Y'; LDU2 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the
+*>          routine only calculates the optimal size of the WORK array,
+*>          returns this value as the first entry of the WORK array, and
+*>          no error message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (M + N + P)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  DBBCSD did not converge. For further details, see
+*>                subroutine DORCSDBY1.
+*> \endverbatim
+*
+*> \par Internal Parameters:
+*  =========================
+*>
+*> \verbatim
+*>  W       DOUBLE PRECISION
+*>          W is a radix power chosen such that the Frobenius norm of A
+*>          and W*B are with SQRT(RADIX) and 1/SQRT(RADIX) of each
+*>          other.
+*>
+*>  TOL     DOUBLE PRECISION
+*>          Let G = (A**T,B**T)**T. TOL is the threshold to determine
+*>          the effective rank of G. Generally, it is set to
+*>                   TOL = MAX( M + P, N ) * norm(G) * MACHEPS,
+*>          where norm(G) is the Frobenius norm of G.
+*>          The size of TOL may affect the size of backward error of the
+*>          decomposition.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup doubleGEsing
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Christoph Conrads (https://christoph-conrads.name)
+*>
+*
+*> \par Further Details:
+*  =====================
+*>
+*>  DGGQRCS should be significantly faster than DGGSVD3 for large
+*>  matrices because the matrices A and B are reduced to a pair of
+*>  well-conditioned bidiagonal matrices instead of pairs of upper
+*>  triangular matrices. On the downside, DGGQRCS requires a much larger
+*>  workspace whose dimension must be queried at run-time. DGGQRCS also
+*>  offers no guarantees which of the two possible diagonal matrices
+*>  is used for the matrix factorization.
+*>
+*  =====================================================================
+      RECURSIVE SUBROUTINE DGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L,
+     $                              SWAPPED,
+     $                              A, LDA, B, LDB,
+     $                              ALPHA, BETA,
+     $                              U1, LDU1, U2, LDU2,
+     $                              WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
+*
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      LOGICAL            SWAPPED
+      CHARACTER          JOBU1, JOBU2, JOBX
+      INTEGER            INFO, LDA, LDB, LDU1, LDU2, L, M, N, P, LWORK
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   A( LDA, * ), B( LDB, * ),
+     $                   ALPHA( N ), BETA( N ),
+     $                   U1( LDU1, * ), U2( LDU2, * ),
+     $                   WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTU1, WANTU2, WANTX, LQUERY
+      INTEGER            I, J, K, K1, LMAX, IG, IG11, IG21, IG22,
+     $                   IVT, IVT12, LDG, LDX, LDVT, LWKMIN, LWKOPT
+      DOUBLE PRECISION   BASE, NORMA, NORMB, NORMG, TOL, ULP, UNFL,
+     $                   THETA, IOTA, W
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, DLANGE
+      EXTERNAL           LSAME, DLAMCH, DLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           DGEMM, DGEQP3, DLACPY, DLAPMT, DLASCL,
+     $                   DLASET, DORGQR, DORCSD2BY1, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, MAX, MIN, SIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU1 = LSAME( JOBU1, 'Y' )
+      WANTU2 = LSAME( JOBU2, 'Y' )
+      WANTX = LSAME( JOBX, 'Y' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( .NOT.( WANTU1 .OR. LSAME( JOBU1, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTU2 .OR. LSAME( JOBU2, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTX .OR. LSAME( JOBX, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU1.LT.1 .OR. ( WANTU1 .AND. LDU1.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDU2.LT.1 .OR. ( WANTU2 .AND. LDU2.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+*     Make sure A is the matrix smaller in norm
+*
+      IF( INFO.EQ.0 ) THEN
+         NORMA = DLANGE( 'F', M, N, A, LDA, WORK )
+         NORMB = DLANGE( 'F', P, N, B, LDB, WORK )
+*
+         IF( NORMA.GT.SQRT( 2.0D0 ) * NORMB ) THEN
+            CALL DGGQRCS( JOBU2, JOBU1, JOBX, P, N, M, L,
+     $                    SWAPPED,
+     $                    B, LDB, A, LDA,
+     $                    BETA, ALPHA,
+     $                    U2, LDU2, U1, LDU1,
+     $                    WORK, LWORK, IWORK, INFO )
+            SWAPPED = .TRUE.
+            RETURN
+         ENDIF
+*
+*     Past this point, we know that
+*     * NORMA <= NORMB (almost)
+*     * W >= 1
+*     * ALPHA will contain cosine values at the end
+*     * BETA will contain sine values at the end
+*
+      END IF
+*
+*     Initialize variables
+*
+      SWAPPED = .FALSE.
+      L = 0
+*     The leading dimension must never be zero
+      LDG = MAX( M + P, 1 )
+      LDVT = N
+      LMAX = MIN( M + P, N )
+      IG = 1
+      IG11 = 1
+      IG21 = M + 1
+      IG22 = LDG * M + M + 1
+      IVT = LDG * N + 2
+      IVT12 = IVT + LDVT * M
+      THETA = -1
+      IOTA = -1
+      W = -1
+*
+*     Compute workspace
+*
+      IF( INFO.EQ.0 ) THEN
+         LWKMIN = 0
+         LWKOPT = 0
+*
+         CALL DGEQP3( M + P, N, WORK( IG ), LDG, IWORK, ALPHA, WORK, -1,
+     $                INFO )
+         LWKMIN = MAX( LWKMIN, 3 * N + 1 )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL DORGQR( M + P, LMAX, LMAX, WORK( IG ), LDG, ALPHA, WORK,
+     $                -1, INFO )
+         LWKMIN = MAX( LWKMIN, LMAX )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL DORCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, LMAX,
+     $                    WORK( IG ), LDG, WORK( IG ), LDG,
+     $                    ALPHA,
+     $                    U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                    WORK, -1, IWORK, INFO )
+         LWKMIN = MAX( LWKMIN, INT( WORK( 1 ) ) )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*        The matrix (A, B) must be stored sequentially for DORGQR
+         LWKMIN = LWKMIN + IVT
+         LWKOPT = LWKOPT + IVT
+*        2-by-1 CSD matrix V1 must be stored
+         IF( WANTX ) THEN
+            LWKMIN = LWKMIN + LDVT*N
+            LWKOPT = LWKOPT + LDVT*N
+         END IF
+*        Check for minimum workspace size
+         IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+*
+         WORK( 1 ) = DBLE( LWKOPT )
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DGGQRCS', -INFO )
+         RETURN
+      END IF
+      IF( LQUERY ) THEN
+         RETURN
+      ENDIF
+*     Finish initialization
+      IF( .NOT.WANTX ) THEN
+         LDVT = 0
+      END IF
+*
+*     Scale matrix A such that norm(A) \approx norm(B)
+*
+      IF( NORMA.EQ.0.0D0 ) THEN
+         W = 1.0D0
+      ELSE
+         BASE = DLAMCH( 'B' )
+         W = BASE ** INT( LOG( NORMB / NORMA ) / LOG( BASE ) )
+*
+         CALL DLASCL( 'G', -1, -1, 1.0D0, W, M, N, A, LDA, INFO )
+         IF ( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+      END IF
+*
+*     Copy matrices A, B into the (M+P) x N matrix G
+*
+      CALL DLACPY( 'A', M, N, A, LDA, WORK( IG11 ), LDG )
+      CALL DLACPY( 'A', P, N, B, LDB, WORK( IG21 ), LDG )
+*
+*     Compute the Frobenius norm of matrix G
+*
+      NORMG = NORMB * SQRT( 1.0D0 + ( ( W * NORMA ) / NORMB )**2 )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrix G.
+*
+      ULP = DLAMCH( 'Precision' )
+      UNFL = DLAMCH( 'Safe Minimum' )
+      TOL = MAX( M + P, N ) * MAX( NORMG, UNFL ) * ULP
+*
+*     IWORK stores the column permutations computed by DGEQP3.
+*     Columns J where IWORK( J ) is non-zero are permuted to the front
+*     so we set the all entries to zero here.
+*
+      IWORK( 1:N ) = 0
+*
+*     Compute the QR factorization with column pivoting GΠ = Q1 R1
+*
+      CALL DGEQP3( M + P, N, WORK( IG ), LDG, IWORK, ALPHA,
+     $             WORK( IVT ), LWORK - IVT + 1, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Determine the rank of G
+*
+      DO I = 1, MIN( M + P, N )
+         IF( ABS( WORK( (I-1) * LDG + I ) ).LE.TOL ) THEN
+            EXIT
+         END IF
+         L = L + 1
+      END DO
+*
+*     Handle rank=0 case
+*
+      IF( L.EQ.0 ) THEN
+         IF( WANTU1 ) THEN
+            CALL DLASET( 'A', M, M, 0.0D0, 1.0D0, U1, LDU1 )
+         END IF
+         IF( WANTU2 ) THEN
+            CALL DLASET( 'A', P, P, 0.0D0, 1.0D0, U2, LDU2 )
+         END IF
+*
+         WORK( 1 ) = DBLE( LWKOPT )
+         RETURN
+      END IF
+*
+*     Copy R1( 1:L, : ) into A, B and set lower triangular part to zero
+*
+      IF( WANTX ) THEN
+         IF( L.LE.M ) THEN
+             CALL DLACPY( 'U', L, N, WORK( IG ), LDG, A, LDA )
+             CALL DLASET( 'L', L - 1, N, 0.0D0, 0.0D0, A( 2, 1 ), LDA )
+         ELSE
+             CALL DLACPY( 'U', M, N, WORK( IG ), LDG, A, LDA )
+             CALL DLACPY( 'U', L - M, N - M, WORK( IG22 ), LDG, B, LDB )
+*
+             CALL DLASET( 'L', M - 1, N, 0.0D0, 0.0D0, A( 2, 1 ), LDA )
+             CALL DLASET( 'L', L-M-1, N, 0.0D0, 0.0D0, B( 2, 1 ), LDB )
+         END IF
+      END IF
+*
+*     Explicitly form Q1 so that we can compute the CS decomposition
+*
+      CALL DORGQR( M + P, L, L, WORK( IG ), LDG, ALPHA,
+     $             WORK( IVT ), LWORK - IVT + 1, INFO )
+      IF ( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute the CS decomposition of Q1( :, 1:L )
+*
+      K = MIN( M, P, L, M + P - L )
+      K1 = MAX( L - P, 0 )
+      CALL DORCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, L,
+     $                 WORK( IG11 ), LDG, WORK( IG21 ), LDG,
+     $                 ALPHA,
+     $                 U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                 WORK( IVT + LDVT*N ), LWORK - IVT - LDVT*N + 1,
+     $                 IWORK( N + 1 ), INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute X = V^T R1( 1:L, : ) and adjust for matrix scaling
+*
+      IF( WANTX ) THEN
+         LDX = L
+         IF ( L.LE.M ) THEN
+            CALL DGEMM( 'N', 'N', L, N, L,
+     $                  1.0D0, WORK( IVT ), LDVT, A, LDA,
+     $                  0.0D0, WORK( 2 ), LDX )
+         ELSE
+            CALL DGEMM( 'N', 'N', L, N, M,
+     $                  1.0D0, WORK( IVT ), LDVT, A, LDA,
+     $                  0.0D0, WORK( 2 ), LDX )
+            CALL DGEMM( 'N', 'N', L, N - M, L - M,
+     $                  1.0D0, WORK( IVT12 ), LDVT, B, LDB,
+     $                  1.0D0, WORK( L*M + 2 ), LDX )
+         END IF
+*        Revert column permutation Π by permuting the columns of X
+         CALL DLAPMT( .FALSE., L, N, WORK( 2 ), LDX, IWORK )
+      END IF
+*
+*     Adjust generalized singular values for matrix scaling
+*     Compute sine, cosine values
+*     Prepare row scaling of X
+*
+      DO I = 1, K
+         THETA = ALPHA( I )
+*        Do not adjust singular value if THETA is greater
+*        than pi/2 (infinite singular values won't change)
+         IF( COS( THETA ).LE.0.0D0 ) THEN
+            ALPHA( I ) = 0.0D0
+            BETA( I ) = 1.0D0
+            IF( WANTX ) THEN
+               WORK( IVT + I ) = 1.0D0
+            END IF
+         ELSE
+*           iota comes in the greek alphabet after theta
+            IOTA = ATAN( W * TAN( THETA ) )
+*           ensure sine, cosine divisor is far away from zero
+*           w is a power of two and will cause no trouble
+            IF( SIN( IOTA ) .GE. COS( IOTA ) ) THEN
+               ALPHA( I ) =  ( SIN( IOTA ) / TAN( THETA ) ) / W
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  WORK( IVT + I ) = SIN( THETA ) / SIN( IOTA )
+               END IF
+            ELSE
+               ALPHA( I ) = COS( IOTA )
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  WORK( IVT + I ) = COS( THETA ) / COS( IOTA ) / W
+               END IF
+            END IF
+         END IF
+      END DO
+*     Adjust rows of X for matrix scaling
+      IF( WANTX ) THEN
+         DO J = 0, N-1
+            DO I = 1, K1
+               WORK( LDX*J + I + 1 ) = WORK( LDX*J + I + 1 ) / W
+            END DO
+            DO I = 1, K
+               WORK( LDX*J + I + K1 + 1 ) =
+     $         WORK( LDX*J + I + K1 + 1 ) * WORK( IVT + I )
+            END DO
+         END DO
+      END IF
+*
+      WORK( 1 ) = DBLE( LWKOPT )
+      RETURN
+*
+*     End of DGGQRCS
+*
+      END
diff --git a/SRC/dlasrti.f b/SRC/dlasrti.f
new file mode 100644
index 0000000000..23e8299b8a
--- /dev/null
+++ b/SRC/dlasrti.f
@@ -0,0 +1,311 @@
+*> \brief \b DLASRTI sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASRTI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasrti.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasrti.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasrti.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASRTI( ID, N, X, INDICES, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, N
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   X( * )
+*       INTEGER            INDICES( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> Sort the numbers in X indirectly in increasing order (if ID = 'I') or
+*> in decreasing order (if ID = 'D' ) using the array of INDICES.
+*>
+*> Use Quick Sort, reverting to Insertion sort on arrays of
+*> size <= 20. Dimension of STACK limits N to about 2**32.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort X in increasing order;
+*>          = 'D': sort X in decreasing order.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The length of the array X and INDICES.
+*> \endverbatim
+*>
+*> \param[in] X
+*> \verbatim
+*>          X is DOUBLE PRECISION array, dimension (N)
+*>          The values to be sorted.
+*> \endverbatim
+*>
+*> \param[in,out] INDICES
+*> \verbatim
+*>          INDICES is INTEGER array, dimension (N)
+*>          On entry, the indices of values in X to be sorted.
+*>          On exit, the indices have been sorted such that
+*>          X( INDICES(1) ) <= ... <= X( INDICES(N) )
+*>          or decreasing order such that
+*>          X( INDICES(1) ) >= ... >= X( INDICES(N) )
+*>          depending on ID.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE DLASRTI( ID, N, X, INDICES, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   X( * )
+      INTEGER            INDICES( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            SELECT
+      PARAMETER          ( SELECT = 20 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIR, ENDD, I, J, START, STKPNT, TMP
+      DOUBLE PRECISION   P1, P2, P3, PIVOT
+*     ..
+*     .. Local Arrays ..
+      INTEGER            STACK( 2, 32 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DIR = -1
+      IF( LSAME( ID, 'D' ) ) THEN
+         DIR = 0
+      ELSE IF( LSAME( ID, 'I' ) ) THEN
+         DIR = 1
+      END IF
+      IF( DIR.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASRTI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      STKPNT = 1
+      STACK( 1, 1 ) = 1
+      STACK( 2, 1 ) = N
+   10 CONTINUE
+      START = STACK( 1, STKPNT )
+      ENDD = STACK( 2, STKPNT )
+      STKPNT = STKPNT - 1
+      IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN
+*
+*        Do Insertion sort on X( START:ENDD )
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            DO 30 I = START + 1, ENDD
+               DO 20 J = I, START + 1, -1
+                  IF( X( INDICES(J) ).GT.X( INDICES(J-1) ) ) THEN
+                     TMP = INDICES( J )
+                     INDICES( J ) = INDICES( J-1 )
+                     INDICES( J-1 ) = TMP
+                  ELSE
+                     GO TO 30
+                  END IF
+   20          CONTINUE
+   30       CONTINUE
+*
+         ELSE
+*
+*           Sort into increasing order
+*
+            DO 50 I = START + 1, ENDD
+               DO 40 J = I, START + 1, -1
+                  IF( X( INDICES(J) ).LT.X( INDICES(J-1) ) ) THEN
+                     TMP = INDICES( J )
+                     INDICES( J ) = INDICES( J-1 )
+                     INDICES( J-1 ) = TMP
+                  ELSE
+                     GO TO 50
+                  END IF
+   40          CONTINUE
+   50       CONTINUE
+*
+         END IF
+*
+      ELSE IF( ENDD-START.GT.SELECT ) THEN
+*
+*        Partition X( START:ENDD ) and stack parts, largest one first
+*
+*        Choose partition entry as median of 3
+*
+         P1 = X( INDICES(START) )
+         P2 = X( INDICES(ENDD) )
+         I = ( START+ENDD ) / 2
+         P3 = X( INDICES(I) )
+         IF( P1.LT.P2 ) THEN
+            IF( P3.LT.P1 ) THEN
+               PIVOT = P1
+            ELSE IF( P3.LT.P2 ) THEN
+               PIVOT = P3
+            ELSE
+               PIVOT = P2
+            END IF
+         ELSE
+            IF( P3.LT.P2 ) THEN
+               PIVOT = P2
+            ELSE IF( P3.LT.P1 ) THEN
+               PIVOT = P3
+            ELSE
+               PIVOT = P1
+            END IF
+         END IF
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   60       CONTINUE
+   70       CONTINUE
+            J = J - 1
+            IF( X( INDICES(J) ).LT.PIVOT )
+     $         GO TO 70
+   80       CONTINUE
+            I = I + 1
+            IF( X( INDICES(I) ).GT.PIVOT )
+     $         GO TO 80
+            IF( I.LT.J ) THEN
+               TMP = INDICES( I )
+               INDICES( I ) = INDICES( J )
+               INDICES( J ) = TMP
+               GO TO 60
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         ELSE
+*
+*           Sort into increasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   90       CONTINUE
+  100       CONTINUE
+            J = J - 1
+            IF( X( INDICES(J) ).GT.PIVOT )
+     $         GO TO 100
+  110       CONTINUE
+            I = I + 1
+            IF( X( INDICES(I) ).LT.PIVOT )
+     $         GO TO 110
+            IF( I.LT.J ) THEN
+               TMP = INDICES( I )
+               INDICES( I ) = INDICES( J )
+               INDICES( J ) = TMP
+               GO TO 90
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         END IF
+      END IF
+      IF( STKPNT.GT.0 )
+     $   GO TO 10
+      RETURN
+*
+*     End of DLASRTI
+*
+      END
diff --git a/SRC/dlasrtr.f b/SRC/dlasrtr.f
new file mode 100644
index 0000000000..91b1bf91f8
--- /dev/null
+++ b/SRC/dlasrtr.f
@@ -0,0 +1,189 @@
+*> \brief \b DLASRTR sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download DLASRTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlasrtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlasrtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlasrtr.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE DLASRTR( ID, M, N, A, LDA, IPVT, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, M, N, LDA
+*       ..
+*       .. Array Arguments ..
+*       DOUBLE PRECISION   A( LDA, * ), WORK( * )
+*       INTEGER            IPVT( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*> Apply row sorting based on the maximum norm of the rows of A. The
+*> rows can be sorted in increasing (if ID = 'I') or decreasing order
+*> (if ID = 'D').
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort the rows of A by increasing row norm;
+*>          = 'D': sort the rows of A by decreasing row norm.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is DOUBLE PRECISION array, dimension (LDA,N)
+*>          On entry, the M by N matrix A.
+*>          On exit, A contains the row-permuted matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] IPVT
+*> \verbatim
+*>          IPVT is INTEGER array, dimension (M)
+*>          On exit, if IPVT(J)=K, then the J-th row of A*P was the
+*>          the K-th row of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is DOUBLE PRECISION array, dimension (M).
+*>          On exit, WORK contains the maximum norms of the rows of A.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE DLASRTR( ID, M, N, A, LDA, IPVT, WORK, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, M, N, LDA
+*     ..
+*     .. Array Arguments ..
+      DOUBLE PRECISION   A( LDA, * ), WORK( * )
+      INTEGER            IPVT( * )
+*     ..
+*
+*  =====================================================================
+*
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, DLAPMR, DLASRTI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( ID, 'I' ) .AND. .NOT.LSAME( ID, 'D' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'DLASRTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.1 .OR. N.LT.1 )
+     $   RETURN
+*
+*     Compute maximum norm of each row
+*
+      DO I = 1, M
+         WORK( I ) = ABS( A( I, 1 ) )
+      END DO
+*
+      DO I = 1, M
+         DO J = 2, N
+            WORK( I ) = MAX( WORK( I ), ABS( A( I, J ) ) )
+         END DO
+      END DO
+*
+*     Sort row indices
+*
+      DO I = 1, M
+         IPVT( I ) = I
+      END DO
+*
+      CALL DLASRTI( ID, M, WORK, IPVT, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Sort rows
+*
+      CALL DLAPMR( .TRUE., M, N, A, LDA, IPVT )
+*
+*     End of DLASRTR
+*
+      END
diff --git a/SRC/sggqrcs.f b/SRC/sggqrcs.f
new file mode 100644
index 0000000000..2303447f85
--- /dev/null
+++ b/SRC/sggqrcs.f
@@ -0,0 +1,670 @@
+*> \brief <b> SGGQRCS computes the singular value decomposition (SVD) for OTHER matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download SGGQRCS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sggqrcs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sggqrcs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sggqrcs.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L, SWAPPED,
+*                           A, LDA, B, LDB,
+*                           ALPHA, BETA,
+*                           U1, LDU1, U2, LDU2
+*                           WORK, LWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          JOBU1, JOB2, JOBX
+*       INTEGER            INFO, LDA, LDB, LDU1, LDU2, M, N, P, L, LWORK
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       REAL               A( LDA, * ), B( LDB, * ),
+*      $                   ALPHA( N ), BETA( N ),
+*      $                   U1( LDU1, * ), U2( LDU2, * ),
+*      $                   WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> SGGQRCS computes the generalized singular value decomposition (GSVD)
+*> of an M-by-N real matrix A and P-by-N real matrix B:
+*>
+*>       A = U1 * D1 * X,           B = U2 * D2 * X
+*>
+*> where U1 and U2 are orthogonal matrices. SGGQRCS uses the QR
+*> factorization with column pivoting and the 2-by-1 CS decomposition to
+*> compute the GSVD.
+*>
+*> Let L be the effective numerical rank of the matrix (A**T,B**T)**T,
+*> then X is a L-by-N nonsingular matrix, D1 and D2 are M-by-L and
+*> P-by-L "diagonal" matrices. If SWAPPED is false, then D1 and D2 are
+*> of the of the following structures, respectively:
+*>
+*>                 K1  K
+*>            K1 [ I   0   0 ]
+*>       D1 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*>                     K   K2
+*>               [ 0   0   0 ]
+*>       D2 = K  [ 0   S   0 ]
+*>            K2 [ 0   0   I ]
+*>
+*> where
+*>
+*>   K  = MIN(M, P, L, M + P - L),
+*>   K1 = MAX(L - P, 0),
+*>   K2 = MAX(L - M, 0),
+*>   C  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   S  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> If SWAPPED is true, then D1 and D2 are of the of the following
+*> structures, respectively:
+*>
+*>                     K   K1
+*>               [ 0   0   0 ]
+*>       D1 = K  [ 0   S   0 ]
+*>            K1 [ 0   0   I ]
+*>
+*>                 K2  K
+*>            K2 [ I   0   0 ]
+*>       D2 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*> where
+*>
+*>   S  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   C  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> The routine computes C, S and optionally the matrices U1, U2, and X.
+*> On exit, X is stored in WORK( 2:L*N+1 ).
+*>
+*> If B is an N-by-N nonsingular matrix, then the GSVD of the matrix
+*> pair (A, B) implicitly gives the SVD of A*inv(B):
+*>
+*>       A*inv(B) = U1*(D1*inv(D2))*U2**T.
+*>
+*> If (A**T,B**T)**T  has orthonormal columns, then the GSVD of A and B
+*> is also equal to the CS decomposition of A and B. Furthermore, the
+*> GSVD can be used to derive the solution of the eigenvalue problem:
+*>
+*>       A**T*A x = lambda * B**T*B x.
+*>
+*> In some literature, the GSVD of A and B is presented in the form
+*>
+*>       A = U1*D1*( 0 R )*Q**T,    B = U2*D2*( 0 R )*Q**T
+*>
+*> where U1, U2, and Q are orthogonal matrices. This latter GSVD form is
+*> computed directly by DGGSVD3. It is possible to convert between the
+*> two representations by calculating the RQ decomposition of X but this
+*> is not recommended for reasons of numerical stability.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBU1
+*> \verbatim
+*>          JOBU1 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U1 is computed;
+*>          = 'N':  U1 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBU2
+*> \verbatim
+*>          JOBU2 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U2 is computed;
+*>          = 'N':  U2 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBX
+*> \verbatim
+*>          JOBX is CHARACTER*1
+*>          = 'Y':  Matrix X is computed;
+*>          = 'N':  X is not computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 1.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrices A and B.  N >= 1.
+*> \endverbatim
+*>
+*> \param[in] P
+*> \verbatim
+*>          P is INTEGER
+*>          The number of rows of the matrix B.  P >= 1.
+*> \endverbatim
+*>
+*> \param[out] L
+*> \verbatim
+*>          L is INTEGER
+*>          On exit, the effective numerical rank of the matrix
+*>          (A**T, B**T)**T.
+*> \endverbatim
+*>
+*> \param[out] SWAPPED
+*> \verbatim
+*>          L is LOGICAL
+*>          On exit, SWAPPED is true if SGGQRCS swapped the input
+*>          matrices A, B and computed the GSVD of (B, A); false
+*>          otherwise.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is REAL array, dimension (LDB,N)
+*>          On entry, the P-by-N matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,P).
+*> \endverbatim
+*>
+*> \param[out] ALPHA
+*> \verbatim
+*>          ALPHA is REAL array, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] BETA
+*> \verbatim
+*>          BETA is REAL array, dimension (N)
+*>
+*>          On exit, ALPHA and BETA contain the K generalized singular
+*>          value pairs of A and B.
+*> \endverbatim
+*>
+*> \param[out] U1
+*> \verbatim
+*>          U1 is REAL array, dimension (LDU1,M)
+*>          If JOBU1 = 'Y', U1 contains the M-by-M orthogonal matrix U1.
+*>          If JOBU1 = 'N', U1 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU1
+*> \verbatim
+*>          LDU1 is INTEGER
+*>          The leading dimension of the array U1. LDU1 >= max(1,M) if
+*>          JOBU1 = 'Y'; LDU1 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] U2
+*> \verbatim
+*>          U2 is REAL array, dimension (LDU2,P)
+*>          If JOBU2 = 'Y', U2 contains the P-by-P orthogonal matrix U2.
+*>          If JOBU2 = 'N', U2 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU2
+*> \verbatim
+*>          LDU2 is INTEGER
+*>          The leading dimension of the array U2. LDU2 >= max(1,P) if
+*>          JOBU2 = 'Y'; LDU2 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is REAL array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the
+*>          routine only calculates the optimal size of the WORK array,
+*>          returns this value as the first entry of the WORK array, and
+*>          no error message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (M + N + P)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  SBBCSD did not converge. For further details, see
+*>                subroutine SORCSDBY1.
+*> \endverbatim
+*
+*> \par Internal Parameters:
+*  =========================
+*>
+*> \verbatim
+*>  W       REAL
+*>          W is a radix power chosen such that the Frobenius norm of A
+*>          and W*B are with SQRT(RADIX) and 1/SQRT(RADIX) of each
+*>          other.
+*>
+*>  TOL     REAL
+*>          Let G = (A**T,B**T)**T. TOL is the threshold to determine
+*>          the effective rank of G. Generally, it is set to
+*>                   TOL = MAX( M + P, N ) * norm(G) * MACHEPS,
+*>          where norm(G) is the Frobenius norm of G.
+*>          The size of TOL may affect the size of backward error of the
+*>          decomposition.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup realGEsing
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Christoph Conrads (https://christoph-conrads.name)
+*>
+*
+*> \par Further Details:
+*  =====================
+*>
+*>  SGGQRCS should be significantly faster than SGGSVD3 for large
+*>  matrices because the matrices A and B are reduced to a pair of
+*>  well-conditioned bidiagonal matrices instead of pairs of upper
+*>  triangular matrices. On the downside, SGGQRCS requires a much larger
+*>  workspace whose dimension must be queried at run-time. SGGQRCS also
+*>  offers no guarantees which of the two possible diagonal matrices
+*>  is used for the matrix factorization.
+*>
+*  =====================================================================
+      RECURSIVE SUBROUTINE SGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L,
+     $                              SWAPPED,
+     $                              A, LDA, B, LDB,
+     $                              ALPHA, BETA,
+     $                              U1, LDU1, U2, LDU2,
+     $                              WORK, LWORK, IWORK, INFO )
+*
+*  -- LAPACK driver routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
+*
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      LOGICAL            SWAPPED
+      CHARACTER          JOBU1, JOBU2, JOBX
+      INTEGER            INFO, LDA, LDB, LDU1, LDU2, L, M, N, P, LWORK
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      REAL               A( LDA, * ), B( LDB, * ),
+     $                   ALPHA( N ), BETA( N ),
+     $                   U1( LDU1, * ), U2( LDU2, * ),
+     $                   WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Local Scalars ..
+      LOGICAL            WANTU1, WANTU2, WANTX, LQUERY
+      INTEGER            I, J, K, K1, LMAX, IG, IG11, IG21, IG22,
+     $                   IVT, IVT12, LDG, LDX, LDVT, LWKMIN, LWKOPT
+      REAL               BASE, NORMA, NORMB, NORMG, TOL, ULP, UNFL,
+     $                   THETA, IOTA, W
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      REAL               SLAMCH, SLANGE
+      EXTERNAL           LSAME, SLAMCH, SLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           SGEMM, SGEQP3, SLACPY, SLAPMT, SLASCL,
+     $                   SLASET, SORGQR, SORCSD2BY1, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, MAX, MIN, SIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU1 = LSAME( JOBU1, 'Y' )
+      WANTU2 = LSAME( JOBU2, 'Y' )
+      WANTX = LSAME( JOBX, 'Y' )
+      LQUERY = ( LWORK.EQ.-1 )
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( .NOT.( WANTU1 .OR. LSAME( JOBU1, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTU2 .OR. LSAME( JOBU2, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTX .OR. LSAME( JOBX, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU1.LT.1 .OR. ( WANTU1 .AND. LDU1.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDU2.LT.1 .OR. ( WANTU2 .AND. LDU2.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      END IF
+*
+*     Make sure A is the matrix smaller in norm
+*
+      IF( INFO.EQ.0 ) THEN
+         NORMA = SLANGE( 'F', M, N, A, LDA, WORK )
+         NORMB = SLANGE( 'F', P, N, B, LDB, WORK )
+*
+         IF( NORMA.GT.SQRT( 2.0E0 ) * NORMB ) THEN
+            CALL SGGQRCS( JOBU2, JOBU1, JOBX, P, N, M, L,
+     $                    SWAPPED,
+     $                    B, LDB, A, LDA,
+     $                    BETA, ALPHA,
+     $                    U2, LDU2, U1, LDU1,
+     $                    WORK, LWORK, IWORK, INFO )
+            SWAPPED = .TRUE.
+            RETURN
+         ENDIF
+*
+*     Past this point, we know that
+*     * NORMA <= NORMB (almost)
+*     * W >= 1
+*     * ALPHA will contain cosine values at the end
+*     * BETA will contain sine values at the end
+*
+      END IF
+*
+*     Initialize variables
+*
+      SWAPPED = .FALSE.
+      L = 0
+*     The leading dimension must never be zero
+      LDG = MAX( M + P, 1 )
+      LDVT = N
+      LMAX = MIN( M + P, N )
+      IG = 1
+      IG11 = 1
+      IG21 = M + 1
+      IG22 = LDG * M + M + 1
+      IVT = LDG * N + 2
+      IVT12 = IVT + LDVT * M
+      THETA = -1
+      IOTA = -1
+      W = -1
+*
+*     Compute workspace
+*
+      IF( INFO.EQ.0 ) THEN
+         LWKMIN = 0
+         LWKOPT = 0
+*
+         CALL SGEQP3( M + P, N, WORK( IG ), LDG, IWORK, ALPHA, WORK, -1,
+     $                INFO )
+         LWKMIN = MAX( LWKMIN, 3 * N + 1 )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL SORGQR( M + P, LMAX, LMAX, WORK( IG ), LDG, ALPHA, WORK,
+     $                -1, INFO )
+         LWKMIN = MAX( LWKMIN, LMAX )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL SORCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, LMAX,
+     $                    WORK( IG ), LDG, WORK( IG ), LDG,
+     $                    ALPHA,
+     $                    U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                    WORK, -1, IWORK, INFO )
+         LWKMIN = MAX( LWKMIN, INT( WORK( 1 ) ) )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*        The matrix (A, B) must be stored sequentially for SORGQR
+         LWKMIN = LWKMIN + IVT
+         LWKOPT = LWKOPT + IVT
+*        2-by-1 CSD matrix V1 must be stored
+         IF( WANTX ) THEN
+            LWKMIN = LWKMIN + LDVT*N
+            LWKOPT = LWKOPT + LDVT*N
+         END IF
+*        Check workspace size
+         IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+*
+         WORK( 1 ) = REAL( LWKOPT )
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SGGQRCS', -INFO )
+         RETURN
+      END IF
+      IF( LQUERY ) THEN
+         RETURN
+      ENDIF
+*     Finish initialization
+      IF( .NOT.WANTX ) THEN
+         LDVT = 0
+      END IF
+*
+*     Scale matrix A such that norm(A) \approx norm(B)
+*
+      IF( NORMA.EQ.0.0E0 ) THEN
+         W = 1.0E0
+      ELSE
+         BASE = SLAMCH( 'B' )
+         W = BASE ** INT( LOG( NORMB / NORMA ) / LOG( BASE ) )
+*
+         CALL SLASCL( 'G', -1, -1, 1.0E0, W, M, N, A, LDA, INFO )
+         IF ( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+      END IF
+*
+*     Copy matrices A, B into the (M+P) x N matrix G
+*
+      CALL SLACPY( 'A', M, N, A, LDA, WORK( IG11 ), LDG )
+      CALL SLACPY( 'A', P, N, B, LDB, WORK( IG21 ), LDG )
+*
+*     Compute the Frobenius norm of matrix G
+*
+      NORMG = NORMB * SQRT( 1.0E0 + ( ( W * NORMA ) / NORMB )**2 )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrix G.
+*
+      ULP = SLAMCH( 'Precision' )
+      UNFL = SLAMCH( 'Safe Minimum' )
+      TOL = MAX( M + P, N ) * MAX( NORMG, UNFL ) * ULP
+*
+*     IWORK stores the column permutations computed by SGEQP3.
+*     Columns J where IWORK( J ) is non-zero are permuted to the front
+*     so we set the all entries to zero here.
+*
+      IWORK( 1:N ) = 0
+*
+*     Compute the QR factorization with column pivoting GΠ = Q1 R1
+*
+      CALL SGEQP3( M + P, N, WORK( IG ), LDG, IWORK, ALPHA,
+     $             WORK( IVT ), LWORK - IVT + 1, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Determine the rank of G
+*
+      DO I = 1, MIN( M + P, N )
+         IF( ABS( WORK( (I-1) * LDG + I ) ).LE.TOL ) THEN
+            EXIT
+         END IF
+         L = L + 1
+      END DO
+*
+*     Handle rank=0 case
+*
+      IF( L.EQ.0 ) THEN
+         IF( WANTU1 ) THEN
+            CALL SLASET( 'A', M, M, 0.0E0, 1.0E0, U1, LDU1 )
+         END IF
+         IF( WANTU2 ) THEN
+            CALL SLASET( 'A', P, P, 0.0E0, 1.0E0, U2, LDU2 )
+         END IF
+*
+         WORK( 1 ) = REAL( LWKOPT )
+         RETURN
+      END IF
+*
+*     Copy R1( 1:L, : ) into A, B and set lower triangular part to zero
+*
+      IF( WANTX ) THEN
+         IF( L.LE.M ) THEN
+             CALL SLACPY( 'U', L, N, WORK( IG ), LDG, A, LDA )
+             CALL SLASET( 'L', L - 1, N, 0.0E0, 0.0E0, A( 2, 1 ), LDA )
+         ELSE
+             CALL SLACPY( 'U', M, N, WORK( IG ), LDG, A, LDA )
+             CALL SLACPY( 'U', L - M, N - M, WORK( IG22 ), LDG, B, LDB )
+*
+             CALL SLASET( 'L', M - 1, N, 0.0E0, 0.0E0, A( 2, 1 ), LDA )
+             CALL SLASET( 'L', L-M-1, N, 0.0E0, 0.0E0, B( 2, 1 ), LDB )
+         END IF
+      END IF
+*
+*     Explicitly form Q1 so that we can compute the CS decomposition
+*
+      CALL SORGQR( M + P, L, L, WORK( IG ), LDG, ALPHA,
+     $             WORK( IVT ), LWORK - IVT + 1, INFO )
+      IF ( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute the CS decomposition of Q1( :, 1:L )
+*
+      K = MIN( M, P, L, M + P - L )
+      K1 = MAX( L - P, 0 )
+      CALL SORCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, L,
+     $                 WORK( IG11 ), LDG, WORK( IG21 ), LDG,
+     $                 ALPHA,
+     $                 U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                 WORK( IVT + LDVT*N ), LWORK - IVT - LDVT*N + 1,
+     $                 IWORK( N + 1 ), INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute X = V^T R1( 1:L, : ) and adjust for matrix scaling
+*
+      IF( WANTX ) THEN
+         LDX = L
+         IF ( L.LE.M ) THEN
+            CALL SGEMM( 'N', 'N', L, N, L,
+     $                  1.0E0, WORK( IVT ), LDVT, A, LDA,
+     $                  0.0E0, WORK( 2 ), LDX )
+         ELSE
+            CALL SGEMM( 'N', 'N', L, N, M,
+     $                  1.0E0, WORK( IVT ), LDVT, A, LDA,
+     $                  0.0E0, WORK( 2 ), LDX )
+            CALL SGEMM( 'N', 'N', L, N - M, L - M,
+     $                  1.0E0, WORK( IVT12 ), LDVT, B, LDB,
+     $                  1.0E0, WORK( L*M + 2 ), LDX )
+         END IF
+*        Revert column permutation Π by permuting the columns of X
+         CALL SLAPMT( .FALSE., L, N, WORK( 2 ), LDX, IWORK )
+      END IF
+*
+*     Adjust generalized singular values for matrix scaling
+*     Compute sine, cosine values
+*     Prepare row scaling of X
+*
+      DO I = 1, K
+         THETA = ALPHA( I )
+*        Do not adjust singular value if THETA is greater
+*        than pi/2 (infinite singular values won't change)
+         IF( COS( THETA ).LE.0.0E0 ) THEN
+            ALPHA( I ) = 0.0E0
+            BETA( I ) = 1.0E0
+            IF( WANTX ) THEN
+               WORK( IVT + I ) = 1.0E0
+            END IF
+         ELSE
+*           iota comes in the greek alphabet after theta
+            IOTA = ATAN( W * TAN( THETA ) )
+*           ensure sine, cosine divisor is far away from zero
+*           w is a power of two and will cause no trouble
+            IF( SIN( IOTA ) .GE. COS( IOTA ) ) THEN
+               ALPHA( I ) =  ( SIN( IOTA ) / TAN( THETA ) ) / W
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  WORK( IVT + I ) = SIN( THETA ) / SIN( IOTA )
+               END IF
+            ELSE
+               ALPHA( I ) = COS( IOTA )
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  WORK( IVT + I ) = COS( THETA ) / COS( IOTA ) / W
+               END IF
+            END IF
+         END IF
+      END DO
+*     Adjust rows of X for matrix scaling
+      IF( WANTX ) THEN
+         DO J = 0, N-1
+            DO I = 1, K1
+               WORK( LDX*J + I + 1 ) = WORK( LDX*J + I + 1 ) / W
+            END DO
+            DO I = 1, K
+               WORK( LDX*J + I + K1 + 1 ) =
+     $         WORK( LDX*J + I + K1 + 1 ) * WORK( IVT + I )
+            END DO
+         END DO
+      END IF
+*
+      WORK( 1 ) = REAL( LWKOPT )
+      RETURN
+*
+*     End of SGGQRCS
+*
+      END
diff --git a/SRC/slasrti.f b/SRC/slasrti.f
new file mode 100644
index 0000000000..36bf402680
--- /dev/null
+++ b/SRC/slasrti.f
@@ -0,0 +1,311 @@
+*> \brief \b SLASRTI sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download SLASRTI + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasrti.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasrti.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasrti.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SLASRTI( ID, N, X, INDICES, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, N
+*       ..
+*       .. Array Arguments ..
+*       REAL               X( * )
+*       INTEGER            INDICES( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> Sort the numbers in X indirectly in increasing order (if ID = 'I') or
+*> in decreasing order (if ID = 'D' ) using the array of INDICES.
+*>
+*> Use Quick Sort, reverting to Insertion sort on arrays of
+*> size <= 20. Dimension of STACK limits N to about 2**32.
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort X in increasing order;
+*>          = 'D': sort X in decreasing order.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The length of the array X and INDICES.
+*> \endverbatim
+*>
+*> \param[in] X
+*> \verbatim
+*>          X is REAL array, dimension (N)
+*>          The values to be sorted.
+*> \endverbatim
+*>
+*> \param[in,out] INDICES
+*> \verbatim
+*>          INDICES is INTEGER array, dimension (N)
+*>          On entry, the indices of values in X to be sorted.
+*>          On exit, the indices have been sorted such that
+*>          X( INDICES(1) ) <= ... <= X( INDICES(N) )
+*>          or decreasing order such that
+*>          X( INDICES(1) ) >= ... >= X( INDICES(N) )
+*>          depending on ID.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*> \author Univ. of Tennessee
+*> \author Univ. of California Berkeley
+*> \author Univ. of Colorado Denver
+*> \author NAG Ltd.
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE SLASRTI( ID, N, X, INDICES, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, N
+*     ..
+*     .. Array Arguments ..
+      REAL               X( * )
+      INTEGER            INDICES( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      INTEGER            SELECT
+      PARAMETER          ( SELECT = 20 )
+*     ..
+*     .. Local Scalars ..
+      INTEGER            DIR, ENDD, I, J, START, STKPNT, TMP
+      REAL               P1, P2, P3, PIVOT
+*     ..
+*     .. Local Arrays ..
+      INTEGER            STACK( 2, 32 )
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      DIR = -1
+      IF( LSAME( ID, 'D' ) ) THEN
+         DIR = 0
+      ELSE IF( LSAME( ID, 'I' ) ) THEN
+         DIR = 1
+      END IF
+      IF( DIR.EQ.-1 ) THEN
+         INFO = -1
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -2
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASRTI', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( N.LE.1 )
+     $   RETURN
+*
+      STKPNT = 1
+      STACK( 1, 1 ) = 1
+      STACK( 2, 1 ) = N
+   10 CONTINUE
+      START = STACK( 1, STKPNT )
+      ENDD = STACK( 2, STKPNT )
+      STKPNT = STKPNT - 1
+      IF( ENDD-START.LE.SELECT .AND. ENDD-START.GT.0 ) THEN
+*
+*        Do Insertion sort on X( START:ENDD )
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            DO 30 I = START + 1, ENDD
+               DO 20 J = I, START + 1, -1
+                  IF( X( INDICES(J) ).GT.X( INDICES(J-1) ) ) THEN
+                     TMP = INDICES( J )
+                     INDICES( J ) = INDICES( J-1 )
+                     INDICES( J-1 ) = TMP
+                  ELSE
+                     GO TO 30
+                  END IF
+   20          CONTINUE
+   30       CONTINUE
+*
+         ELSE
+*
+*           Sort into increasing order
+*
+            DO 50 I = START + 1, ENDD
+               DO 40 J = I, START + 1, -1
+                  IF( X( INDICES(J) ).LT.X( INDICES(J-1) ) ) THEN
+                     TMP = INDICES( J )
+                     INDICES( J ) = INDICES( J-1 )
+                     INDICES( J-1 ) = TMP
+                  ELSE
+                     GO TO 50
+                  END IF
+   40          CONTINUE
+   50       CONTINUE
+*
+         END IF
+*
+      ELSE IF( ENDD-START.GT.SELECT ) THEN
+*
+*        Partition X( START:ENDD ) and stack parts, largest one first
+*
+*        Choose partition entry as median of 3
+*
+         P1 = X( INDICES(START) )
+         P2 = X( INDICES(ENDD) )
+         I = ( START+ENDD ) / 2
+         P3 = X( INDICES(I) )
+         IF( P1.LT.P2 ) THEN
+            IF( P3.LT.P1 ) THEN
+               PIVOT = P1
+            ELSE IF( P3.LT.P2 ) THEN
+               PIVOT = P3
+            ELSE
+               PIVOT = P2
+            END IF
+         ELSE
+            IF( P3.LT.P2 ) THEN
+               PIVOT = P2
+            ELSE IF( P3.LT.P1 ) THEN
+               PIVOT = P3
+            ELSE
+               PIVOT = P1
+            END IF
+         END IF
+*
+         IF( DIR.EQ.0 ) THEN
+*
+*           Sort into decreasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   60       CONTINUE
+   70       CONTINUE
+            J = J - 1
+            IF( X( INDICES(J) ).LT.PIVOT )
+     $         GO TO 70
+   80       CONTINUE
+            I = I + 1
+            IF( X( INDICES(I) ).GT.PIVOT )
+     $         GO TO 80
+            IF( I.LT.J ) THEN
+               TMP = INDICES( I )
+               INDICES( I ) = INDICES( J )
+               INDICES( J ) = TMP
+               GO TO 60
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         ELSE
+*
+*           Sort into increasing order
+*
+            I = START - 1
+            J = ENDD + 1
+   90       CONTINUE
+  100       CONTINUE
+            J = J - 1
+            IF( X( INDICES(J) ).GT.PIVOT )
+     $         GO TO 100
+  110       CONTINUE
+            I = I + 1
+            IF( X( INDICES(I) ).LT.PIVOT )
+     $         GO TO 110
+            IF( I.LT.J ) THEN
+               TMP = INDICES( I )
+               INDICES( I ) = INDICES( J )
+               INDICES( J ) = TMP
+               GO TO 90
+            END IF
+            IF( J-START.GT.ENDD-J-1 ) THEN
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+            ELSE
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = J + 1
+               STACK( 2, STKPNT ) = ENDD
+               STKPNT = STKPNT + 1
+               STACK( 1, STKPNT ) = START
+               STACK( 2, STKPNT ) = J
+            END IF
+         END IF
+      END IF
+      IF( STKPNT.GT.0 )
+     $   GO TO 10
+      RETURN
+*
+*     End of SLASRTI
+*
+      END
diff --git a/SRC/slasrtr.f b/SRC/slasrtr.f
new file mode 100644
index 0000000000..9896f5134c
--- /dev/null
+++ b/SRC/slasrtr.f
@@ -0,0 +1,189 @@
+*> \brief \b SLASRTR sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download SLASRTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/slasrtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/slasrtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/slasrtr.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE SLASRTR( ID, M, N, A, LDA, IPVT, WORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, M, N, LDA
+*       ..
+*       .. Array Arguments ..
+*       REAL               A( LDA, * ), WORK( * )
+*       INTEGER            IPVT( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*> Apply row sorting based on the maximum norm of the rows of A. The
+*> rows can be sorted in increasing (if ID = 'I') or decreasing order
+*> (if ID = 'D').
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort the rows of A by increasing row norm;
+*>          = 'D': sort the rows of A by decreasing row norm.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is REAL array, dimension (LDA,N)
+*>          On entry, the M by N matrix A.
+*>          On exit, A contains the row-permuted matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] IPVT
+*> \verbatim
+*>          IPVT is INTEGER array, dimension (M)
+*>          On exit, if IPVT(J)=K, then the J-th row of A*P was the
+*>          the K-th row of A.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is REAL array, dimension (M).
+*>          On exit, WORK contains the maximum norms of the rows of A.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE SLASRTR( ID, M, N, A, LDA, IPVT, WORK, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, M, N, LDA
+*     ..
+*     .. Array Arguments ..
+      REAL               A( LDA, * ), WORK( * )
+      INTEGER            IPVT( * )
+*     ..
+*
+*  =====================================================================
+*
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, SLAPMR, SLASRTI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( ID, 'I' ) .AND. .NOT.LSAME( ID, 'D' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'SLASRTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.1 .OR. N.LT.1 )
+     $   RETURN
+*
+*     Compute maximum norm of each row
+*
+      DO I = 1, M
+         WORK( I ) = ABS( A( I, 1 ) )
+      END DO
+*
+      DO I = 1, M
+         DO J = 2, N
+            WORK( I ) = MAX( WORK( I ), ABS( A( I, J ) ) )
+         END DO
+      END DO
+*
+*     Sort row indices
+*
+      DO I = 1, M
+         IPVT( I ) = I
+      END DO
+*
+      CALL SLASRTI( ID, M, WORK, IPVT, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Sort rows
+*
+      CALL SLAPMR( .TRUE., M, N, A, LDA, IPVT )
+*
+*     End of SLASRTR
+*
+      END
diff --git a/SRC/sorcsd2by1.f b/SRC/sorcsd2by1.f
index 25c317f6f6..1521347f96 100644
--- a/SRC/sorcsd2by1.f
+++ b/SRC/sorcsd2by1.f
@@ -517,10 +517,12 @@ SUBROUTINE SORCSD2BY1( JOBU1, JOBU2, JOBV1T, M, P, Q, X11, LDX11,
                V1T(1,J) = ZERO
                V1T(J,1) = ZERO
             END DO
-            CALL SLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2),
+            IF( Q.GT.1 ) THEN
+               CALL SLACPY( 'U', Q-1, Q-1, X21(1,2), LDX21, V1T(2,2),
      $                   LDV1T )
-            CALL SORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T, WORK(ITAUQ1),
+               CALL SORGLQ( Q-1, Q-1, Q-1, V1T(2,2), LDV1T,WORK(ITAUQ1),
      $                   WORK(IORGLQ), LORGLQ, CHILDINFO )
+            ENDIF
          END IF
 *
 *        Simultaneously diagonalize X11 and X21.
diff --git a/SRC/zggqrcs.f b/SRC/zggqrcs.f
new file mode 100644
index 0000000000..de3295e54d
--- /dev/null
+++ b/SRC/zggqrcs.f
@@ -0,0 +1,713 @@
+*> \brief <b> ZGGQRCS computes the singular value decomposition (SVD) for OTHER matrices</b>
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download ZGGQRCS + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zggqrcs.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zggqrcs.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zggqrcs.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE ZGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L, SWAPPED,
+*                           A, LDA, B, LDB,
+*                           ALPHA, BETA,
+*                           U1, LDU1, U2, LDU2
+*                           WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          JOBU1, JOB2, JOBX
+*       INTEGER            INFO, LDA, LDB, LDU1, LDU2, M, N, P, L,
+*                          LWORK, LRWORK
+*       ..
+*       .. Array Arguments ..
+*       INTEGER            IWORK( * )
+*       DOUBLE PRECISION   ALPHA( N ), BETA( N ), RWORK( * )
+*       COMPLEX*16         A( LDA, * ), B( LDB, * ),
+*      $                   U1( LDU1, * ), U2( LDU2, * ),
+*      $                   WORK( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*>
+*> ZGGQRCS computes the generalized singular value decomposition (GSVD)
+*> of an M-by-N complex matrix A and P-by-N complex matrix B:
+*>
+*>       A = U1 * D1 * X,           B = U2 * D2 * X
+*>
+*> where U1 and U2 are unitary matrices. ZGGQRCS uses the QR
+*> factorization with column pivoting and the 2-by-1 CS decomposition to
+*> compute the GSVD.
+*>
+*> Let L be the effective numerical rank of the matrix (A**H,B**H)**H,
+*> then X is a L-by-N nonsingular matrix, D1 and D2 are M-by-L and
+*> P-by-L "diagonal" matrices. If SWAPPED is false, then D1 and D2 are
+*> of the of the following structures, respectively:
+*>
+*>                 K1  K
+*>            K1 [ I   0   0 ]
+*>       D1 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*>                     K   K2
+*>               [ 0   0   0 ]
+*>       D2 = K  [ 0   S   0 ]
+*>            K2 [ 0   0   I ]
+*>
+*> where
+*>
+*>   K  = MIN(M, P, L, M + P - L),
+*>   K1 = MAX(L - P, 0),
+*>   K2 = MAX(L - M, 0),
+*>   C  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   S  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> If SWAPPED is true, then D1 and D2 are of the of the following
+*> structures, respectively:
+*>
+*>                     K   K1
+*>               [ 0   0   0 ]
+*>       D1 = K  [ 0   S   0 ]
+*>            K1 [ 0   0   I ]
+*>
+*>                 K2  K
+*>            K2 [ I   0   0 ]
+*>       D2 = K  [ 0   C   0 ]
+*>               [ 0   0   0 ]
+*>
+*> where
+*>
+*>   S  = diag( ALPHA(1), ..., ALPHA(K) ),
+*>   C  = diag( BETA(1), ..., BETA(K) ), and
+*>   C^2 + S^2 = I.
+*>
+*> The routine computes C, S and optionally the matrices U1, U2, and X.
+*> On exit, X is stored in WORK( 2:L*N+1 ).
+*>
+*> If B is an N-by-N nonsingular matrix, then the GSVD of the matrix
+*> pair (A, B) implicitly gives the SVD of A*inv(B):
+*>
+*>       A*inv(B) = U1*(D1*inv(D2))*U2**H.
+*>
+*> If (A**H,B**H)**H  has orthonormal columns, then the GSVD of A and B
+*> is also equal to the CS decomposition of A and B. Furthermore, the
+*> GSVD can be used to derive the solution of the eigenvalue problem:
+*>
+*>       A**H*A x = lambda * B**H*B x.
+*>
+*> In some literature, the GSVD of A and B is presented in the form
+*>
+*>       A = U1*D1*( 0 R )*Q**H,    B = U2*D2*( 0 R )*Q**H
+*>
+*> where U1, U2, and Q are unitary matrices. This latter GSVD form is
+*> computed directly by DGGSVD3. It is possible to convert between the
+*> two representations by calculating the RQ decomposition of X but this
+*> is not recommended for reasons of numerical stability.
+*>
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] JOBU1
+*> \verbatim
+*>          JOBU1 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U1 is computed;
+*>          = 'N':  U1 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBU2
+*> \verbatim
+*>          JOBU2 is CHARACTER*1
+*>          = 'Y':  Orthogonal matrix U2 is computed;
+*>          = 'N':  U2 is not computed.
+*> \endverbatim
+*>
+*> \param[in] JOBX
+*> \verbatim
+*>          JOBX is CHARACTER*1
+*>          = 'Y':  Matrix X is computed;
+*>          = 'N':  X is not computed.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A.  M >= 1.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrices A and B.  N >= 1.
+*> \endverbatim
+*>
+*> \param[in] P
+*> \verbatim
+*>          P is INTEGER
+*>          The number of rows of the matrix B.  P >= 1.
+*> \endverbatim
+*>
+*> \param[out] L
+*> \verbatim
+*>          L is INTEGER
+*>          On exit, the effective numerical rank of the matrix
+*>          (A**H, B**H)**H.
+*> \endverbatim
+*>
+*> \param[out] SWAPPED
+*> \verbatim
+*>          L is LOGICAL
+*>          On exit, SWAPPED is true if ZGGQRCS swapped the input
+*>          matrices A, B and computed the GSVD of (B, A); false
+*>          otherwise.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is COMPLEX*16 array, dimension (LDA,N)
+*>          On entry, the M-by-N matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[in,out] B
+*> \verbatim
+*>          B is COMPLEX*16 array, dimension (LDB,N)
+*>          On entry, the P-by-N matrix B.
+*> \endverbatim
+*>
+*> \param[in] LDB
+*> \verbatim
+*>          LDB is INTEGER
+*>          The leading dimension of the array B. LDB >= max(1,P).
+*> \endverbatim
+*>
+*> \param[out] ALPHA
+*> \verbatim
+*>          ALPHA is DOUBLE PRECISIONarray, dimension (N)
+*> \endverbatim
+*>
+*> \param[out] BETA
+*> \verbatim
+*>          BETA is DOUBLE PRECISIONarray, dimension (N)
+*>
+*>          On exit, ALPHA and BETA contain the K generalized singular
+*>          value pairs of A and B.
+*> \endverbatim
+*>
+*> \param[out] U1
+*> \verbatim
+*>          U1 is COMPLEX*16 array, dimension (LDU1,M)
+*>          If JOBU1 = 'Y', U1 contains the M-by-M unitary matrix U1.
+*>          If JOBU1 = 'N', U1 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU1
+*> \verbatim
+*>          LDU1 is INTEGER
+*>          The leading dimension of the array U1. LDU1 >= max(1,M) if
+*>          JOBU1 = 'Y'; LDU1 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] U2
+*> \verbatim
+*>          U2 is COMPLEX*16 array, dimension (LDU2,P)
+*>          If JOBU2 = 'Y', U2 contains the P-by-P unitary matrix U2.
+*>          If JOBU2 = 'N', U2 is not referenced.
+*> \endverbatim
+*>
+*> \param[in] LDU2
+*> \verbatim
+*>          LDU2 is INTEGER
+*>          The leading dimension of the array U2. LDU2 >= max(1,P) if
+*>          JOBU2 = 'Y'; LDU2 >= 1 otherwise.
+*> \endverbatim
+*>
+*> \param[out] WORK
+*> \verbatim
+*>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
+*>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
+*> \endverbatim
+*>
+*> \param[in] LWORK
+*> \verbatim
+*>          LWORK is INTEGER
+*>          The dimension of the array WORK.
+*>
+*>          If LWORK = -1, then a workspace query is assumed; the
+*>          routine only calculates the optimal size of the WORK array,
+*>          returns this value as the first entry of the WORK array, and
+*>          no error message related to LWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] RWORK
+*> \verbatim
+*>          RWORK is DOUBLE PRECISIONarray, dimension (MAX(1,LRWORK))
+*> \endverbatim
+*>
+*> \param[in] LRWORK
+*> \verbatim
+*>          LRWORK is INTEGER
+*>          The dimension of the array RWORK.
+*>
+*>          If LRWORK = -1, then a workspace query is assumed; the routine
+*>          only calculates the optimal size of the RWORK array, returns
+*>          this value as the first entry of the work array, and no error
+*>          message related to LRWORK is issued by XERBLA.
+*> \endverbatim
+*>
+*> \param[out] IWORK
+*> \verbatim
+*>          IWORK is INTEGER array, dimension (M + N + P)
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit.
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value.
+*>          > 0:  ZBBCSD did not converge. For further details, see
+*>                subroutine ZUNCSDBY1.
+*> \endverbatim
+*
+*> \par Internal Parameters:
+*  =========================
+*>
+*> \verbatim
+*>  W       DOUBLE PRECISION
+*>          W is a radix power chosen such that the Frobenius norm of A
+*>          and W*B are with SQRT(RADIX) and 1/SQRT(RADIX) of each
+*>          other.
+*>
+*>  TOL     DOUBLE PRECISION
+*>          Let G = (A**H,B**H)**H. TOL is the threshold to determine
+*>          the effective rank of G. Generally, it is set to
+*>                   TOL = MAX( M + P, N ) * norm(G) * MACHEPS,
+*>          where norm(G) is the Frobenius norm of G.
+*>          The size of TOL may affect the size of backward error of the
+*>          decomposition.
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup realGEsing
+*
+*> \par Contributors:
+*  ==================
+*>
+*>     Christoph Conrads (https://christoph-conrads.name)
+*>
+*
+*> \par Further Details:
+*  =====================
+*>
+*>  ZGGQRCS should be significantly faster than ZGGSVD3 for large
+*>  matrices because the matrices A and B are reduced to a pair of
+*>  well-conditioned bidiagonal matrices instead of pairs of upper
+*>  triangular matrices. On the downside, ZGGQRCS requires a much larger
+*>  workspace whose dimension must be queried at run-time. ZGGQRCS also
+*>  offers no guarantees which of the two possible diagonal matrices
+*>  is used for the matrix factorization.
+*>
+*  =====================================================================
+      RECURSIVE SUBROUTINE ZGGQRCS( JOBU1, JOBU2, JOBX, M, N, P, L,
+     $                              SWAPPED,
+     $                              A, LDA, B, LDB,
+     $                              ALPHA, BETA,
+     $                              U1, LDU1, U2, LDU2,
+     $                              WORK, LWORK, RWORK, LRWORK, IWORK,
+     $                              INFO )
+*
+*  -- LAPACK driver routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd. --
+*
+      IMPLICIT NONE
+*     .. Scalar Arguments ..
+      LOGICAL            SWAPPED
+      CHARACTER          JOBU1, JOBU2, JOBX
+      INTEGER            INFO, LDA, LDB, LDU1, LDU2, L, M, N, P, LWORK,
+     $                   LRWKOPT, LRWORK, LRWORK2BY1
+*     ..
+*     .. Array Arguments ..
+      INTEGER            IWORK( * )
+      DOUBLE PRECISION   ALPHA( N ), BETA( N ), RWORK( * )
+      COMPLEX*16         A( LDA, * ), B( LDB, * ),
+     $                   U1( LDU1, * ), U2( LDU2, * ),
+     $                   WORK( * )
+*     ..
+*
+*  =====================================================================
+*
+*     .. Parameters ..
+      COMPLEX*16         CONE, CZERO
+      PARAMETER          ( CONE = ( 1.0D0, 0.0D0 ),
+     $                   CZERO = ( 0.0D0, 0.0D0 ) )
+*     .. Local Scalars ..
+      LOGICAL            WANTU1, WANTU2, WANTX, LQUERY
+      INTEGER            I, J, K, K1, LMAX, IG, IG11, IG21, IG22,
+     $                   IVT, IVT12, LDG, LDX, LDVT, LWKMIN, LWKOPT
+      DOUBLE PRECISION   BASE, NORMA, NORMB, NORMG, TOL, ULP, UNFL,
+     $                   THETA, IOTA, W
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      DOUBLE PRECISION   DLAMCH, ZLANGE
+      EXTERNAL           LSAME, DLAMCH, ZLANGE
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           ZGEMM, ZGEQP3, ZLACPY, ZLAPMT, ZLASCL,
+     $                   ZLASET, ZUNGQR, ZUNCSD2BY1, XERBLA
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          COS, MAX, MIN, SIN, SQRT
+*     ..
+*     .. Executable Statements ..
+*
+*     Decode and test the input parameters
+*
+      WANTU1 = LSAME( JOBU1, 'Y' )
+      WANTU2 = LSAME( JOBU2, 'Y' )
+      WANTX = LSAME( JOBX, 'Y' )
+      LQUERY = LWORK.EQ.-1 .OR. LRWORK.EQ.-1
+*
+*     Test the input arguments
+*
+      INFO = 0
+      IF( .NOT.( WANTU1 .OR. LSAME( JOBU1, 'N' ) ) ) THEN
+         INFO = -1
+      ELSE IF( .NOT.( WANTU2 .OR. LSAME( JOBU2, 'N' ) ) ) THEN
+         INFO = -2
+      ELSE IF( .NOT.( WANTX .OR. LSAME( JOBX, 'N' ) ) ) THEN
+         INFO = -3
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -4
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -5
+      ELSE IF( P.LT.0 ) THEN
+         INFO = -6
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -10
+      ELSE IF( LDB.LT.MAX( 1, P ) ) THEN
+         INFO = -12
+      ELSE IF( LDU1.LT.1 .OR. ( WANTU1 .AND. LDU1.LT.M ) ) THEN
+         INFO = -16
+      ELSE IF( LDU2.LT.1 .OR. ( WANTU2 .AND. LDU2.LT.P ) ) THEN
+         INFO = -18
+      ELSE IF( LWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -20
+      ELSE IF( LRWORK.LT.1 .AND. .NOT.LQUERY ) THEN
+         INFO = -22
+      END IF
+*
+*     Make sure A is the matrix smaller in norm
+*
+      IF( INFO.EQ.0 ) THEN
+         NORMA = ZLANGE( 'F', M, N, A, LDA, RWORK )
+         NORMB = ZLANGE( 'F', P, N, B, LDB, RWORK )
+*
+         IF( NORMA.GT.SQRT( 2.0D0 ) * NORMB ) THEN
+            CALL ZGGQRCS( JOBU2, JOBU1, JOBX, P, N, M, L,
+     $                    SWAPPED,
+     $                    B, LDB, A, LDA,
+     $                    BETA, ALPHA,
+     $                    U2, LDU2, U1, LDU1,
+     $                    WORK, LWORK, RWORK, LRWORK, IWORK, INFO )
+            SWAPPED = .TRUE.
+            RETURN
+         ENDIF
+*
+*     Past this point, we know that
+*     * NORMA <= NORMB (almost)
+*     * W >= 1
+*     * ALPHA will contain cosine values at the end
+*     * BETA will contain sine values at the end
+*
+      END IF
+*
+*     Initialize variables
+*
+      SWAPPED = .FALSE.
+      L = 0
+*     The leading dimension must never be zero
+      LDG = MAX( M + P, 1 )
+      LDVT = N
+      LMAX = MIN( M + P, N )
+      IG = 1
+      IG11 = 1
+      IG21 = M + 1
+      IG22 = LDG * M + M + 1
+      IVT = LDG * N + 2
+      IVT12 = IVT + LDVT * M
+      THETA = -1
+      IOTA = -1
+      W = -1
+*
+*     Compute workspace
+*
+      IF( INFO.EQ.0 ) THEN
+         LWKMIN = 0
+         LWKOPT = 0
+*
+         CALL ZGEQP3( M + P, N, WORK( IG ), LDG, IWORK, WORK, WORK, -1,
+     $                RWORK, INFO )
+         LWKMIN = MAX( LWKMIN, N + 1 )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL ZUNGQR( M + P, LMAX, LMAX, WORK( IG ), LDG, WORK, WORK,
+     $                -1, INFO )
+         LWKMIN = MAX( LWKMIN, LMAX )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*
+         CALL ZUNCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, LMAX,
+     $                    WORK( IG ), LDG, WORK( IG ), LDG,
+     $                    ALPHA,
+     $                    U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                    WORK, -1, RWORK, LRWORK, IWORK, INFO )
+         LWKMIN = MAX( LWKMIN, INT( WORK( 1 ) ) )
+         LWKOPT = MAX( LWKOPT, INT( WORK( 1 ) ) )
+*        The matrix (A, B) must be stored sequentially for ZUNGQR
+         LWKMIN = LWKMIN + IVT
+         LWKOPT = LWKOPT + IVT
+*        2-by-1 CSD matrix V1 must be stored
+         IF( WANTX ) THEN
+            LWKMIN = LWKMIN + LDVT*N
+            LWKOPT = LWKOPT + LDVT*N
+         END IF
+*        Adjust ZUNCSD2BY1 LRWORK for case with maximum memory
+*        consumption
+         LRWORK2BY1 = INT( RWORK(1) )
+*        Select safe xUNCSD2BY1 IBBCSD value
+     $                - 9 * MAX( 0, MIN( M, P, N, M+P-N-1 ) )
+     $                + 9 * MAX( 1, MIN( M, P, N ) )
+*        Select safe xUNCSD2BY1 LBBCSD value
+     $                - 8 * MAX( 0, MIN( M, P, N, M+P-N ) )
+     $                + 8 * MIN( M, P, N )
+         LRWKOPT = MAX( 2*N, LRWORK2BY1 )
+*        Check workspace size
+         IF( LWORK.LT.LWKMIN .AND. .NOT.LQUERY ) THEN
+            INFO = -20
+         END IF
+         IF( LRWORK.LT.LRWKOPT .AND. .NOT.LQUERY ) THEN
+            INFO = -22
+         END IF
+*
+         WORK( 1 ) = DCMPLX( DBLE( LWKOPT ), 0.0D0 )
+         RWORK( 1 ) = DBLE( LRWKOPT )
+      END IF
+*
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZGGQRCS', -INFO )
+         RETURN
+      END IF
+      IF( LQUERY ) THEN
+         RETURN
+      ENDIF
+*     Finish initialization
+      IF( .NOT.WANTX ) THEN
+         LDVT = 0
+      END IF
+*
+*     Scale matrix A such that norm(A) \approx norm(B)
+*
+      IF( NORMA.EQ.0.0D0 ) THEN
+         W = 1.0D0
+      ELSE
+         BASE = DLAMCH( 'B' )
+         W = BASE ** INT( LOG( NORMB / NORMA ) / LOG( BASE ) )
+*
+         CALL ZLASCL( 'G', -1, -1, 1.0D0, W, M, N, A, LDA, INFO )
+         IF ( INFO.NE.0 ) THEN
+            RETURN
+         END IF
+      END IF
+*
+*     Copy matrices A, B into the (M+P) x N matrix G
+*
+      CALL ZLACPY( 'A', M, N, A, LDA, WORK( IG11 ), LDG )
+      CALL ZLACPY( 'A', P, N, B, LDB, WORK( IG21 ), LDG )
+*
+*     Compute the Frobenius norm of matrix G
+*
+      NORMG = NORMB * SQRT( 1.0D0 + ( ( W * NORMA ) / NORMB )**2 )
+*
+*     Get machine precision and set up threshold for determining
+*     the effective numerical rank of the matrix G.
+*
+      ULP = DLAMCH( 'Precision' )
+      UNFL = DLAMCH( 'Safe Minimum' )
+      TOL = MAX( M + P, N ) * MAX( NORMG, UNFL ) * ULP
+*
+*     IWORK stores the column permutations computed by ZGEQP3.
+*     Columns J where IWORK( J ) is non-zero are permuted to the front
+*     so we set the all entries to zero here.
+*
+      IWORK( 1:N ) = 0
+*
+*     Compute the QR factorization with column pivoting GΠ = Q1 R1
+*
+      CALL ZGEQP3( M + P, N, WORK( IG ), LDG, IWORK, WORK( IVT ),
+     $             WORK( IVT + LMAX ), LWORK - IVT - LMAX + 1, RWORK,
+     $             INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Determine the rank of G
+*
+      DO I = 1, MIN( M + P, N )
+         IF( ABS( WORK( (I-1) * LDG + I ) ).LE.TOL ) THEN
+            EXIT
+         END IF
+         L = L + 1
+      END DO
+*
+*     Handle rank=0 case
+*
+      IF( L.EQ.0 ) THEN
+         IF( WANTU1 ) THEN
+            CALL ZLASET( 'A', M, M, CZERO, CONE, U1, LDU1 )
+         END IF
+         IF( WANTU2 ) THEN
+            CALL ZLASET( 'A', P, P, CZERO, CONE, U2, LDU2 )
+         END IF
+*
+         WORK( 1 ) = DCMPLX( DBLE( LWKOPT ), 0.0D0 )
+         RWORK( 1 ) = DBLE( LRWKOPT )
+         RETURN
+      END IF
+*
+*     Copy R1( 1:L, : ) into A, B and set lower triangular part to zero
+*
+      IF( WANTX ) THEN
+         IF( L.LE.M ) THEN
+             CALL ZLACPY( 'U', L, N, WORK( IG ), LDG, A, LDA )
+             CALL ZLASET( 'L', L - 1, N, CZERO, CZERO, A( 2, 1 ), LDA )
+         ELSE
+             CALL ZLACPY( 'U', M, N, WORK( IG ), LDG, A, LDA )
+             CALL ZLACPY( 'U', L - M, N - M, WORK( IG22 ), LDG, B, LDB )
+*
+             CALL ZLASET( 'L', M - 1, N, CZERO, CZERO, A( 2, 1 ), LDA )
+             CALL ZLASET( 'L', L-M-1, N, CZERO, CZERO, B( 2, 1 ), LDB )
+         END IF
+      END IF
+*
+*     Explicitly form Q1 so that we can compute the CS decomposition
+*
+      CALL ZUNGQR( M + P, L, L, WORK( IG ), LDG, WORK( IVT ),
+     $             WORK( IVT + L ), LWORK - IVT - L + 1, INFO )
+      IF ( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute the CS decomposition of Q1( :, 1:L )
+*
+      K = MIN( M, P, L, M + P - L )
+      K1 = MAX( L - P, 0 )
+      CALL ZUNCSD2BY1( JOBU1, JOBU2, JOBX, M + P, M, L,
+     $                 WORK( IG11 ), LDG, WORK( IG21 ), LDG,
+     $                 ALPHA,
+     $                 U1, LDU1, U2, LDU2, WORK( IVT ), LDVT,
+     $                 WORK( IVT + LDVT*N ), LWORK - IVT - LDVT*N + 1,
+     $                 RWORK, LRWORK,
+     $                 IWORK( N + 1 ), INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Compute X = V^T R1( 1:L, : ) and adjust for matrix scaling
+*
+      IF( WANTX ) THEN
+         LDX = L
+         IF ( L.LE.M ) THEN
+            CALL ZGEMM( 'N', 'N', L, N, L,
+     $                  CONE, WORK( IVT ), LDVT, A, LDA,
+     $                  CZERO, WORK( 2 ), LDX )
+         ELSE
+            CALL ZGEMM( 'N', 'N', L, N, M,
+     $                  CONE, WORK( IVT ), LDVT, A, LDA,
+     $                  CZERO, WORK( 2 ), LDX )
+            CALL ZGEMM( 'N', 'N', L, N - M, L - M,
+     $                  CONE, WORK( IVT12 ), LDVT, B, LDB,
+     $                  CONE, WORK( L*M + 2 ), LDX )
+         END IF
+*        Revert column permutation Π by permuting the columns of X
+         CALL ZLAPMT( .FALSE., L, N, WORK( 2 ), LDX, IWORK )
+      END IF
+*
+*     Adjust generalized singular values for matrix scaling
+*     Compute sine, cosine values
+*     Prepare row scaling of X
+*
+      DO I = 1, K
+         THETA = ALPHA( I )
+*        Do not adjust singular value if THETA is greater
+*        than pi/2 (infinite singular values won't change)
+         IF( COS( THETA ).LE.0.0D0 ) THEN
+            ALPHA( I ) = 0.0D0
+            BETA( I ) = 1.0D0
+            IF( WANTX ) THEN
+               RWORK( I ) = 1.0D0
+            END IF
+         ELSE
+*           iota comes in the greek alphabet after theta
+            IOTA = ATAN( W * TAN( THETA ) )
+*           ensure sine, cosine divisor is far away from zero
+*           w is a power of two and will cause no trouble
+            IF( SIN( IOTA ) .GE. COS( IOTA ) ) THEN
+               ALPHA( I ) =  ( SIN( IOTA ) / TAN( THETA ) ) / W
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  RWORK( I ) = SIN( THETA ) / SIN( IOTA )
+               END IF
+            ELSE
+               ALPHA( I ) = COS( IOTA )
+               BETA( I ) = SIN( IOTA )
+               IF( WANTX ) THEN
+                  RWORK( I ) = COS( THETA ) / COS( IOTA ) / W
+               END IF
+            END IF
+         END IF
+      END DO
+*     Adjust rows of X for matrix scaling
+      IF( WANTX ) THEN
+         DO J = 0, N-1
+            DO I = 1, K1
+               WORK( LDX*J + I + 1 ) = WORK( LDX*J + I + 1 ) / W
+            END DO
+            DO I = 1, K
+               WORK( LDX*J + I + K1 + 1 ) =
+     $         WORK( LDX*J + I + K1 + 1 ) * RWORK( I )
+            END DO
+         END DO
+      END IF
+*
+      WORK( 1 ) = DCMPLX( DBLE( LWKOPT ), 0.0D0 )
+      RWORK( 1 ) = DBLE( LRWKOPT )
+      RETURN
+*
+*     End of ZGGQRCS
+*
+      END
diff --git a/SRC/zlasrtr.f b/SRC/zlasrtr.f
new file mode 100644
index 0000000000..22a7f3b021
--- /dev/null
+++ b/SRC/zlasrtr.f
@@ -0,0 +1,191 @@
+*> \brief \b ZLASRTR sorts array indices based on the referenced numbers
+*
+*  =========== DOCUMENTATION ===========
+*
+* Online html documentation available at
+*            http://www.netlib.org/lapack/explore-html/
+*
+*> \htmlonly
+*> Download ZLASRTR + dependencies
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zlasrtr.f">
+*> [TGZ]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zlasrtr.f">
+*> [ZIP]</a>
+*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zlasrtr.f">
+*> [TXT]</a>
+*> \endhtmlonly
+*
+*  Definition:
+*  ===========
+*
+*       SUBROUTINE ZLASRTR( ID, M, N, A, LDA, IPVT, RWORK, INFO )
+*
+*       .. Scalar Arguments ..
+*       CHARACTER          ID
+*       INTEGER            INFO, M, N, LDA
+*       ..
+*       .. Array Arguments ..
+*       COMPLEX*16         A( LDA, * )
+*       DOUBLE PRECISION   RWORK( * )
+*       INTEGER            IPVT( * )
+*       ..
+*
+*
+*> \par Purpose:
+*  =============
+*>
+*> \verbatim
+*> Apply row sorting based on the maximum norm of the rows of A. The
+*> rows can be sorted in increasing (if ID = 'I') or decreasing order
+*> (if ID = 'D').
+*> \endverbatim
+*
+*  Arguments:
+*  ==========
+*
+*> \param[in] ID
+*> \verbatim
+*>          ID is CHARACTER*1
+*>          = 'I': sort the rows of A by increasing row norm;
+*>          = 'D': sort the rows of A by decreasing row norm.
+*> \endverbatim
+*>
+*> \param[in] M
+*> \verbatim
+*>          M is INTEGER
+*>          The number of rows of the matrix A. M >= 0.
+*> \endverbatim
+*>
+*> \param[in] N
+*> \verbatim
+*>          N is INTEGER
+*>          The number of columns of the matrix A. N >= 0.
+*> \endverbatim
+*>
+*> \param[in,out] A
+*> \verbatim
+*>          A is COMPLEX*16 array, dimension (LDA,N)
+*>          On entry, the M by N matrix A.
+*>          On exit, A contains the row-permuted matrix A.
+*> \endverbatim
+*>
+*> \param[in] LDA
+*> \verbatim
+*>          LDA is INTEGER
+*>          The leading dimension of the array A. LDA >= max(1,M).
+*> \endverbatim
+*>
+*> \param[out] IPVT
+*> \verbatim
+*>          IPVT is INTEGER array, dimension (M)
+*>          On exit, if IPVT(J)=K, then the J-th row of A*P was the
+*>          the K-th row of A.
+*> \endverbatim
+*>
+*> \param[out] RWORK
+*> \verbatim
+*>          RWORK is DOUBLE PRECISION array, dimension (M).
+*>          On exit, RWORK contains the maximum norms of the rows of A.
+*> \endverbatim
+*>
+*> \param[out] INFO
+*> \verbatim
+*>          INFO is INTEGER
+*>          = 0:  successful exit
+*>          < 0:  if INFO = -i, the i-th argument had an illegal value
+*> \endverbatim
+*
+*  Authors:
+*  ========
+*
+*> \author Christoph Conrads (https://christoph-conrads.name)
+*
+*> \ingroup auxOTHERcomputational
+*
+*  =====================================================================
+      SUBROUTINE ZLASRTR( ID, M, N, A, LDA, IPVT, RWORK, INFO )
+*
+*  -- LAPACK computational routine --
+*  -- LAPACK is a software package provided by Univ. of Tennessee,    --
+*  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
+*
+*     .. Scalar Arguments ..
+      CHARACTER          ID
+      INTEGER            INFO, M, N, LDA
+*     ..
+*     .. Array Arguments ..
+      COMPLEX*16         A( LDA, * )
+      DOUBLE PRECISION   RWORK( * )
+      INTEGER            IPVT( * )
+*     ..
+*
+*  =====================================================================
+*
+*     ..
+*     .. Local Scalars ..
+      INTEGER            I, J
+*     ..
+*     .. External Functions ..
+      LOGICAL            LSAME
+      EXTERNAL           LSAME
+*     ..
+*     .. External Subroutines ..
+      EXTERNAL           XERBLA, ZLAPMR, DLASRTI
+*     ..
+*     .. Intrinsic Functions ..
+      INTRINSIC          ABS, MAX
+*     ..
+*     .. Executable Statements ..
+*
+*     Test the input parameters.
+*
+      INFO = 0
+      IF( .NOT.LSAME( ID, 'I' ) .AND. .NOT.LSAME( ID, 'D' ) ) THEN
+         INFO = -1
+      ELSE IF( M.LT.0 ) THEN
+         INFO = -2
+      ELSE IF( N.LT.0 ) THEN
+         INFO = -3
+      ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
+         INFO = -5
+      END IF
+      IF( INFO.NE.0 ) THEN
+         CALL XERBLA( 'ZLASRTR', -INFO )
+         RETURN
+      END IF
+*
+*     Quick return if possible
+*
+      IF( M.LE.1 .OR. N.LT.1 )
+     $   RETURN
+*
+*     Compute maximum norm of each row
+*
+      DO I = 1, M
+         RWORK( I ) = ABS( A( I, 1 ) )
+      END DO
+*
+      DO I = 1, M
+         DO J = 2, N
+            RWORK( I ) = MAX( RWORK( I ), ABS( A( I, J ) ) )
+         END DO
+      END DO
+*
+*     Sort row indices
+*
+      DO I = 1, M
+         IPVT( I ) = I
+      END DO
+*
+      CALL DLASRTI( ID, M, RWORK, IPVT, INFO )
+      IF( INFO.NE.0 ) THEN
+         RETURN
+      END IF
+*
+*     Sort rows
+*
+      CALL ZLAPMR( .TRUE., M, N, A, LDA, IPVT )
+*
+*     End of ZLASRTR
+*
+      END
diff --git a/TESTING/EIG/cerrgg.f b/TESTING/EIG/cerrgg.f
index 3839b5f2c3..0a4ca7248a 100644
--- a/TESTING/EIG/cerrgg.f
+++ b/TESTING/EIG/cerrgg.f
@@ -22,7 +22,7 @@
 *> \verbatim
 *>
 *> CERRGG tests the error exits for CGGES, CGGESX, CGGEV, CGGEVX,
-*> CGGES3, CGGEV3, CGGGLM, CGGHRD, CGGLSE, CGGQRF, CGGRQF,
+*> CGGES3, CGGEV3, CGGGLM, CGGHRD, CGGLSE, CGGQRCS, CGGQRF, CGGRQF,
 *> CGGSVD3, CGGSVP3, CHGEQZ, CTGEVC, CTGEXC, CTGSEN, CTGSJA,
 *> CTGSNA, CTGSYL, and CUNCSD.
 *> \endverbatim
@@ -94,7 +94,7 @@ SUBROUTINE CERRGG( PATH, NUNIT )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           CGGES, CGGESX, CGGEV, CGGEVX, CGGGLM, CGGHRD,
-     $                   CGGLSE, CGGQRF, CGGRQF, CHGEQZ,
+     $                   CGGLSE, CGGQRCS, CGGQRF, CGGRQF, CHGEQZ,
      $                   CHKXER, CTGEVC, CTGEXC, CTGSEN, CTGSJA, CTGSNA,
      $                   CTGSYL, CUNCSD, CGGES3, CGGEV3, CGGHD3,
      $                   CGGSVD3, CGGSVP3, XLAENV
@@ -637,6 +637,71 @@ SUBROUTINE CERRGG( PATH, NUNIT )
 *
       ELSE IF( LSAMEN( 3, PATH, 'GQR' ) ) THEN
 *
+*        CGGQRCS
+*
+         SRNAMT = 'CGGQRCS'
+         INFOT = 1
+         CALL CGGQRCS( '/', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL CGGQRCS( 'N', '/', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL CGGQRCS( 'N', 'N', '/', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL CGGQRCS( 'N', 'N', 'N', -1, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL CGGQRCS( 'N', 'N', 'N', 0, -1, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, -1, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 0, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 12
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 0, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 16
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 0, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 18
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 0,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 20
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, 0, RW, LW, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 22
+         CALL CGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, 0, IW, INFO )
+         CALL CHKXER( 'CGGQRCS', INFOT, NOUT, LERR, OK )
+         NT = NT + 12
+*
 *        CGGQRF
 *
          SRNAMT = 'CGGQRF'
diff --git a/TESTING/EIG/derrgg.f b/TESTING/EIG/derrgg.f
index 6fc61b8727..0ef495f00e 100644
--- a/TESTING/EIG/derrgg.f
+++ b/TESTING/EIG/derrgg.f
@@ -22,7 +22,7 @@
 *> \verbatim
 *>
 *> DERRGG tests the error exits for DGGES, DGGESX, DGGEV,  DGGEVX,
-*> DGGGLM, DGGHRD, DGGLSE, DGGQRF, DGGRQF, DGGSVD3,
+*> DGGGLM, DGGHRD, DGGLSE, DGGQRCS, DGGQRF, DGGRQF, DGGSVD3,
 *> DGGSVP3, DHGEQZ, DORCSD, DTGEVC, DTGEXC, DTGSEN, DTGSJA, DTGSNA,
 *> DGGES3, DGGEV3, and DTGSYL.
 *> \endverbatim
@@ -93,7 +93,7 @@ SUBROUTINE DERRGG( PATH, NUNIT )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           CHKXER, DGGES, DGGESX, DGGEV, DGGEVX, DGGGLM,
-     $                   DGGHRD, DGGLSE, DGGQRF, DGGRQF,
+     $                   DGGHRD, DGGLSE, DGGQRCS, DGGQRF, DGGRQF,
      $                   DHGEQZ, DORCSD, DTGEVC, DTGEXC, DTGSEN, DTGSJA,
      $                   DTGSNA, DTGSYL, DGGHD3, DGGES3, DGGEV3,
      $                   DGGSVD3, DGGSVP3, XLAENV
@@ -609,6 +609,55 @@ SUBROUTINE DERRGG( PATH, NUNIT )
 *
       ELSE IF( LSAMEN( 3, PATH, 'GQR' ) ) THEN
 *
+*        DGGQRCS
+*
+         SRNAMT = 'DGGQRCS'
+         INFOT = 1
+         CALL DGGQRCS( '/', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL DGGQRCS( 'N', '/', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL DGGQRCS( 'N', 'N', '/', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL DGGQRCS( 'N', 'N', 'N', -1, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL DGGQRCS( 'N', 'N', 'N', 0, -1, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, -1, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 0, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 12
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 0, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 16
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 0, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 18
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 0, W, LW, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 20
+         CALL DGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, 0, IW, INFO )
+         CALL CHKXER( 'DGGQRCS', INFOT, NOUT, LERR, OK )
+         NT = NT + 11
+*
 *        DGGQRF
 *
          SRNAMT = 'DGGQRF'
diff --git a/TESTING/EIG/serrgg.f b/TESTING/EIG/serrgg.f
index 7824f8618d..9b3b2f2d42 100644
--- a/TESTING/EIG/serrgg.f
+++ b/TESTING/EIG/serrgg.f
@@ -22,7 +22,7 @@
 *> \verbatim
 *>
 *> SERRGG tests the error exits for SGGES, SGGESX, SGGEV, SGGEVX,
-*> SGGES3, SGGEV3, SGGGLM, SGGHRD, SGGLSE, SGGQRF, SGGRQF,
+*> SGGES3, SGGEV3, SGGGLM, SGGHRD, SGGLSE, SGGQRCS, SGGQRF, SGGRQF,
 *> SGGSVD3, SGGSVP3, SHGEQZ, SORCSD, STGEVC, STGEXC, STGSEN,
 *> STGSJA, STGSNA, and STGSYL.
 *> \endverbatim
@@ -73,6 +73,7 @@ SUBROUTINE SERRGG( PATH, NUNIT )
       PARAMETER          ( ONE = 1.0E+0, ZERO = 0.0E+0 )
 *     ..
 *     .. Local Scalars ..
+      LOGICAL            SWAPPED
       CHARACTER*2        C2
       INTEGER            DUMMYK, DUMMYL, I, IFST, ILO, IHI, ILST, INFO,
      $                   J, M, NCYCLE, NT, SDIM, LWORK
@@ -93,7 +94,7 @@ SUBROUTINE SERRGG( PATH, NUNIT )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           CHKXER, SGGES, SGGESX, SGGEV, SGGEVX, SGGGLM,
-     $                   SGGHRD, SGGLSE, SGGQRF, SGGRQF,
+     $                   SGGHRD, SGGLSE, SGGQRCS, SGGQRF, SGGRQF,
      $                   SHGEQZ, SORCSD, STGEVC, STGEXC, STGSEN, STGSJA,
      $                   STGSNA, STGSYL, SGGES3, SGGEV3, SGGHD3,
      $                   SGGSVD3, SGGSVP3, XLAENV
@@ -655,6 +656,55 @@ SUBROUTINE SERRGG( PATH, NUNIT )
          CALL CHKXER( 'SGGRQF', INFOT, NOUT, LERR, OK )
          NT = NT + 6
 *
+*        SGGQRCS
+*
+         SRNAMT = 'SGGQRCS'
+         INFOT = 1
+         CALL SGGQRCS( '/', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL SGGQRCS( 'N', '/', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL SGGQRCS( 'N', 'N', '/', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL SGGQRCS( 'N', 'N', 'N', -1, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL SGGQRCS( 'N', 'N', 'N', 0, -1, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, -1, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 0, B, 1, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 12
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 0, R1, R2, U, 1, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 16
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 0, V, 1, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 18
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 0, W, LW, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 20
+         CALL SGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1, W, 0, IW, INFO )
+         CALL CHKXER( 'SGGQRCS', INFOT, NOUT, LERR, OK )
+         NT = NT + 11
+*
 *     Test error exits for the SGS, SGV, SGX, and SXV paths.
 *
       ELSE IF( LSAMEN( 3, PATH, 'SGS' ) .OR.
diff --git a/TESTING/EIG/zerrgg.f b/TESTING/EIG/zerrgg.f
index 26e8df9834..d8f84f64da 100644
--- a/TESTING/EIG/zerrgg.f
+++ b/TESTING/EIG/zerrgg.f
@@ -22,7 +22,7 @@
 *> \verbatim
 *>
 *> ZERRGG tests the error exits for ZGGES, ZGGESX, ZGGEV, ZGGEVX,
-*> ZGGES3, ZGGEV3, ZGGGLM, ZGGHRD, ZGGLSE, ZGGQRF, ZGGRQF,
+*> ZGGES3, ZGGEV3, ZGGGLM, ZGGHRD, ZGGLSE, ZGGQRCS, ZGGQRF, ZGGRQF,
 *> ZGGSVD3, ZGGSVP3, ZHGEQZ, ZTGEVC, ZTGEXC, ZTGSEN, ZTGSJA,
 *> ZTGSNA, ZTGSYL, and ZUNCSD.
 *> \endverbatim
@@ -94,7 +94,7 @@ SUBROUTINE ZERRGG( PATH, NUNIT )
 *     ..
 *     .. External Subroutines ..
       EXTERNAL           CHKXER, ZGGES,  ZGGESX, ZGGEV,  ZGGEVX, ZGGGLM,
-     $                   ZGGHRD, ZGGLSE, ZGGQRF, ZGGRQF,
+     $                   ZGGHRD, ZGGLSE, ZGGQRCS, ZGGQRF, ZGGRQF,
      $                   ZHGEQZ, ZTGEVC, ZTGEXC, ZTGSEN, ZTGSJA, ZTGSNA,
      $                   ZTGSYL, ZUNCSD, ZGGES3, ZGGEV3, ZGGHD3,
      $                   ZGGSVD3, ZGGSVP3, XLAENV
@@ -637,6 +637,71 @@ SUBROUTINE ZERRGG( PATH, NUNIT )
 *
       ELSE IF( LSAMEN( 3, PATH, 'GQR' ) ) THEN
 *
+*        ZGGQRCS
+*
+         SRNAMT = 'ZGGQRCS'
+         INFOT = 1
+         CALL ZGGQRCS( '/', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 2
+         CALL ZGGQRCS( 'N', '/', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 3
+         CALL ZGGQRCS( 'N', 'N', '/', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 4
+         CALL ZGGQRCS( 'N', 'N', 'N', -1, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 5
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, -1, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 6
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, -1, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 10
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 0, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 12
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 0, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 16
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 0, V, 1,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 18
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 0,
+     $                W, LW, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 20
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, 0, RW, LW, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         INFOT = 22
+         CALL ZGGQRCS( 'N', 'N', 'N', 0, 0, 0, I, SWAPPED,
+     $                A, 1, B, 1, R1, R2, U, 1, V, 1,
+     $                W, LW, RW, 0, IW, INFO )
+         CALL CHKXER( 'ZGGQRCS', INFOT, NOUT, LERR, OK )
+         NT = NT + 12
+*
 *        ZGGQRF
 *
          SRNAMT = 'ZGGQRF'