-
Notifications
You must be signed in to change notification settings - Fork 480
Description
Description
The functions multiplying elementary reflectors with matrices called
- {S,D}ORMQR,
- {S,D}ORMRQ,
- {S,D}ORMLQ,
- {S,D}ORMQL,
- {C,Z}UNMQR,
- {C,Z}UNMRQ,
- {C,Z}UNMLQ,
- {C,Z}UNMQL
return at least 4096 for the optimal workspace size even for dimensions as tiny as zero or one. My expectation is that the optimal workspace size is less than 4096 for tiny numbers of columns and rows. The behavior occurs for SORMQR because the optimal workspace size is computed as
LWKOPT = NW*NB + TSIZEwhere TSIZE = LDT * NBMAX = 64 * 65 = 4160.
Relevant initialization code:
Lines 185 to 187 in 2dafa3d
| INTEGER NBMAX, LDT, TSIZE | |
| PARAMETER ( NBMAX = 64, LDT = NBMAX+1, | |
| $ TSIZE = LDT*NBMAX ) |
Workspace computation:
Lines 241 to 249 in 2dafa3d
| IF( INFO.EQ.0 ) THEN | |
| * | |
| * Compute the workspace requirements | |
| * | |
| NB = MIN( NBMAX, ILAENV( 1, 'SORMQR', SIDE // TRANS, M, N, K, | |
| $ -1 ) ) | |
| LWKOPT = NW*NB + TSIZE | |
| WORK( 1 ) = LWKOPT | |
| END IF |
Demo program in C++11 (compile with c++ -Wextra -Wall -pedantic -std=c++11 sormqr-workspace.cpp -llapack -L/path/to/lapack-build/lib, run with env LD_LIBRARY_PATH=/tmp/path/to/lapack-build/lib ./a.out):
#include <cassert>
#include <complex>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <algorithm>
#include <limits>
#define lapack_complex_float float _Complex
#define lapack_complex_double double _Complex
extern "C" {
typedef int32_t lapack_int;
void dormqr_(char const* side, char const* trans, lapack_int const* m,
lapack_int const* n, lapack_int const* k, double const* A,
lapack_int const* lda, double const* tau, double* C, lapack_int const* ldc,
double* work, lapack_int const* lwork, lapack_int* info, size_t side_len,
size_t trans_len);
void sormqr_(char const* side, char const* trans, lapack_int const* m,
lapack_int const* n, lapack_int const* k, float const* A,
lapack_int const* lda, float const* tau, float* C, lapack_int const* ldc,
float* work, lapack_int const* lwork, lapack_int* info, size_t side_len,
size_t trans_len);
void cunmqr_(char const* side, char const* trans, lapack_int const* m,
lapack_int const* n, lapack_int const* k, lapack_complex_float const* A,
lapack_int const* lda, lapack_complex_float const* tau,
lapack_complex_float* C, lapack_int const* ldc, lapack_complex_float* work,
lapack_int const* lwork, lapack_int* info, size_t side_len,
size_t trans_len);
void zunmqr_(char const* side, char const* trans, lapack_int const* m,
lapack_int const* n, lapack_int const* k, lapack_complex_double const* A,
lapack_int const* lda, lapack_complex_double const* tau,
lapack_complex_double* C, lapack_int const* ldc,
lapack_complex_double* work, lapack_int const* lwork, lapack_int* info,
size_t side_len, size_t trans_len);
}
int main()
{
constexpr auto nan_s = std::numeric_limits<float>::quiet_NaN();
constexpr auto nan_d = std::numeric_limits<double>::quiet_NaN();
auto side = 'L';
auto trans = 'N';
auto a_s = float{ 1.0 };
auto a_d = double{ 1.0 };
auto a_c = std::complex<float>{ 1.0, 2.0 };
auto a_z = std::complex<double>{ 1.0, 2.0 };
auto tau_s = nan_s;
auto tau_d = nan_d;
auto tau_c = std::complex<float>{ 1.0, 2.0 };
auto tau_z = std::complex<double>{ 1.0, 2.0 };
auto c_s = float{ 1.0 };
auto c_d = double{ 1.0 };
auto c_c = std::complex<float>{ 1.0, 2.0 };
auto c_z = std::complex<double>{ 1.0, 2.0 };
auto work_s = nan_s;
auto work_d = nan_d;
auto work_c = std::complex<float>{ nan_s, nan_s };
auto work_z = std::complex<double>{ nan_s, nan_s };
auto lwork = lapack_int{ -1 };
auto info = lapack_int{ -1 };
auto side_len = std::size_t{ 1 };
auto trans_len = std::size_t{ 1 };
for (auto m = lapack_int{ 0 }; m < 3; ++m) {
auto lda = std::max(m, lapack_int{ 1 });
auto ldc = std::max(m, lapack_int{ 1 });
for (auto n = lapack_int{ 0 }; n < 3; ++n) {
auto k_max = side == 'L' ? m : n;
for (auto k = lapack_int{ 0 }; k <= k_max; ++k) {
sormqr_(&side, &trans, &m, &n, &k, &a_s, &lda, &tau_s, &c_s,
&ldc, &work_s, &lwork, &info, side_len, trans_len);
auto opt_s = work_s;
if (info) {
fprintf(stderr, "sormqr m=%d n=%d k=%d returned info=%d\n",
m, n, k, info);
} else {
printf("sormqr m=%d n=%d k=%d work=%g\n", m, n, k, work_s);
}
dormqr_(&side, &trans, &m, &n, &k, &a_d, &lda, &tau_d, &c_d,
&ldc, &work_d, &lwork, &info, side_len, trans_len);
if (info) {
fprintf(stderr, "dormqr m=%d n=%d k=%d returned info=%d\n",
m, n, k, info);
} else if (opt_s != work_d) {
printf("dormqr m=%d n=%d k=%d work=%g\n", m, n, k, work_d);
}
cunmqr_(&side, &trans, &m, &n, &k, (lapack_complex_float*)&a_c,
&lda, (lapack_complex_float*)&tau_c,
(lapack_complex_float*)&c_c, &ldc,
(lapack_complex_float*)&work_c, &lwork, &info, side_len,
trans_len);
if (info) {
fprintf(stderr, "cunmqr m=%d n=%d k=%d returned info=%d\n",
m, n, k, info);
} else if(opt_s != std::real(work_c)) {
printf("cunmqr m=%d n=%d k=%d work=%g\n", m, n, k,
std::real(work_c));
}
zunmqr_(&side, &trans, &m, &n, &k, (lapack_complex_double*)&a_z,
&lda, (lapack_complex_double*)&tau_z,
(lapack_complex_double*)&c_z, &ldc,
(lapack_complex_double*)&work_z, &lwork, &info, side_len,
trans_len);
if (info) {
fprintf(stderr, "zunmqr m=%d n=%d k=%d returned info=%d\n",
m, n, k, info);
} else if(opt_s != std::real(work_z)) {
printf("zunmqr m=%d n=%d k=%d work=%g\n", m, n, k,
std::real(work_z));
}
}
}
}
}SORMRQ, SORMQL return 1 as the optimal workspace size if m == 0 or n == 0. Not all parameter combinations were tested.
Checklist
- I've included a minimal example to reproduce the issue
- I'd be willing to make a PR to solve this issue [Sure -- if I knew where to start. Strictly speaking the code is working fine.]