summaryrefslogtreecommitdiff
path: root/packages/base/src/Internal/C/lapack-aux.c
diff options
context:
space:
mode:
authorAlberto Ruiz <aruiz@um.es>2015-06-04 12:39:33 +0200
committerAlberto Ruiz <aruiz@um.es>2015-06-04 12:39:33 +0200
commitb7e57952112eae61054fc9171ddb311fbaca0841 (patch)
treeef72da1ad7d661a3bc0a2f72fd65a2074ccf1a73 /packages/base/src/Internal/C/lapack-aux.c
parent27334e233edd3d891d2837330289a26e54d05fd1 (diff)
move c sources
Diffstat (limited to 'packages/base/src/Internal/C/lapack-aux.c')
-rw-r--r--packages/base/src/Internal/C/lapack-aux.c1686
1 files changed, 1686 insertions, 0 deletions
diff --git a/packages/base/src/Internal/C/lapack-aux.c b/packages/base/src/Internal/C/lapack-aux.c
new file mode 100644
index 0000000..1402050
--- /dev/null
+++ b/packages/base/src/Internal/C/lapack-aux.c
@@ -0,0 +1,1686 @@
1#include <stdio.h>
2#include <stdlib.h>
3#include <string.h>
4#include <math.h>
5#include <time.h>
6#include "lapack-aux.h"
7
8#define MACRO(B) do {B} while (0)
9#define ERROR(CODE) MACRO(return CODE;)
10#define REQUIRES(COND, CODE) MACRO(if(!(COND)) {ERROR(CODE);})
11
12#define MIN(A,B) ((A)<(B)?(A):(B))
13#define MAX(A,B) ((A)>(B)?(A):(B))
14
15// #define DBGL
16
17#ifdef DBGL
18#define DEBUGMSG(M) printf("\nLAPACK "M"\n");
19#else
20#define DEBUGMSG(M)
21#endif
22
23#define OK return 0;
24
25// #ifdef DBGL
26// #define DEBUGMSG(M) printf("LAPACK Wrapper "M"\n: "); size_t t0 = time(NULL);
27// #define OK MACRO(printf("%ld s\n",time(0)-t0); return 0;);
28// #else
29// #define DEBUGMSG(M)
30// #define OK return 0;
31// #endif
32
33#define TRACEMAT(M) {int q; printf(" %d x %d: ",M##r,M##c); \
34 for(q=0;q<M##r*M##c;q++) printf("%.1f ",M##p[q]); printf("\n");}
35
36#define CHECK(RES,CODE) MACRO(if(RES) return CODE;)
37
38#define BAD_SIZE 2000
39#define BAD_CODE 2001
40#define MEM 2002
41#define BAD_FILE 2003
42#define SINGULAR 2004
43#define NOCONVER 2005
44#define NODEFPOS 2006
45#define NOSPRTD 2007
46
47//---------------------------------------
48void asm_finit() {
49#ifdef i386
50
51// asm("finit");
52
53 static unsigned char buf[108];
54 asm("FSAVE %0":"=m" (buf));
55
56 #if FPUDEBUG
57 if(buf[8]!=255 || buf[9]!=255) { // print warning in red
58 printf("%c[;31mWarning: FPU TAG = %x %x\%c[0m\n",0x1B,buf[8],buf[9],0x1B);
59 }
60 #endif
61
62 #if NANDEBUG
63 asm("FRSTOR %0":"=m" (buf));
64 #endif
65
66#endif
67}
68
69//---------------------------------------
70
71#if NANDEBUG
72
73#define CHECKNANR(M,msg) \
74{ int k; \
75for(k=0; k<(M##r * M##c); k++) { \
76 if(M##p[k] != M##p[k]) { \
77 printf(msg); \
78 TRACEMAT(M) \
79 /*exit(1);*/ \
80 } \
81} \
82}
83
84#define CHECKNANC(M,msg) \
85{ int k; \
86for(k=0; k<(M##r * M##c); k++) { \
87 if( M##p[k].r != M##p[k].r \
88 || M##p[k].i != M##p[k].i) { \
89 printf(msg); \
90 /*exit(1);*/ \
91 } \
92} \
93}
94
95#else
96#define CHECKNANC(M,msg)
97#define CHECKNANR(M,msg)
98#endif
99
100//---------------------------------------
101
102//////////////////// real svd ////////////////////////////////////
103
104/* Subroutine */ int dgesvd_(char *jobu, char *jobvt, integer *m, integer *n,
105 doublereal *a, integer *lda, doublereal *s, doublereal *u, integer *
106 ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
107 integer *info);
108
109int svd_l_R(KDMAT(a),DMAT(u), DVEC(s),DMAT(v)) {
110 integer m = ar;
111 integer n = ac;
112 integer q = MIN(m,n);
113 REQUIRES(sn==q,BAD_SIZE);
114 REQUIRES(up==NULL || (ur==m && (uc==m || uc==q)),BAD_SIZE);
115 char* jobu = "A";
116 if (up==NULL) {
117 jobu = "N";
118 } else {
119 if (uc==q) {
120 jobu = "S";
121 }
122 }
123 REQUIRES(vp==NULL || (vc==n && (vr==n || vr==q)),BAD_SIZE);
124 char* jobvt = "A";
125 integer ldvt = n;
126 if (vp==NULL) {
127 jobvt = "N";
128 } else {
129 if (vr==q) {
130 jobvt = "S";
131 ldvt = q;
132 }
133 }
134 DEBUGMSG("svd_l_R");
135 double *B = (double*)malloc(m*n*sizeof(double));
136 CHECK(!B,MEM);
137 memcpy(B,ap,m*n*sizeof(double));
138 integer lwork = -1;
139 integer res;
140 // ask for optimal lwork
141 double ans;
142 dgesvd_ (jobu,jobvt,
143 &m,&n,B,&m,
144 sp,
145 up,&m,
146 vp,&ldvt,
147 &ans, &lwork,
148 &res);
149 lwork = ceil(ans);
150 double * work = (double*)malloc(lwork*sizeof(double));
151 CHECK(!work,MEM);
152 dgesvd_ (jobu,jobvt,
153 &m,&n,B,&m,
154 sp,
155 up,&m,
156 vp,&ldvt,
157 work, &lwork,
158 &res);
159 CHECK(res,res);
160 free(work);
161 free(B);
162 OK
163}
164
165// (alternative version)
166
167/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
168 a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
169 doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
170 integer *iwork, integer *info);
171
172int svd_l_Rdd(KDMAT(a),DMAT(u), DVEC(s),DMAT(v)) {
173 integer m = ar;
174 integer n = ac;
175 integer q = MIN(m,n);
176 REQUIRES(sn==q,BAD_SIZE);
177 REQUIRES((up == NULL && vp == NULL)
178 || (ur==m && vc==n
179 && ((uc == q && vr == q)
180 || (uc == m && vc==n))),BAD_SIZE);
181 char* jobz = "A";
182 integer ldvt = n;
183 if (up==NULL) {
184 jobz = "N";
185 } else {
186 if (uc==q && vr == q) {
187 jobz = "S";
188 ldvt = q;
189 }
190 }
191 DEBUGMSG("svd_l_Rdd");
192 double *B = (double*)malloc(m*n*sizeof(double));
193 CHECK(!B,MEM);
194 memcpy(B,ap,m*n*sizeof(double));
195 integer* iwk = (integer*) malloc(8*q*sizeof(integer));
196 CHECK(!iwk,MEM);
197 integer lwk = -1;
198 integer res;
199 // ask for optimal lwk
200 double ans;
201 dgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,&ans,&lwk,iwk,&res);
202 lwk = ans;
203 double * workv = (double*)malloc(lwk*sizeof(double));
204 CHECK(!workv,MEM);
205 dgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,workv,&lwk,iwk,&res);
206 CHECK(res,res);
207 free(iwk);
208 free(workv);
209 free(B);
210 OK
211}
212
213//////////////////// complex svd ////////////////////////////////////
214
215// not in clapack.h
216
217int zgesvd_(char *jobu, char *jobvt, integer *m, integer *n,
218 doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u,
219 integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work,
220 integer *lwork, doublereal *rwork, integer *info);
221
222int svd_l_C(KCMAT(a),CMAT(u), DVEC(s),CMAT(v)) {
223 integer m = ar;
224 integer n = ac;
225 integer q = MIN(m,n);
226 REQUIRES(sn==q,BAD_SIZE);
227 REQUIRES(up==NULL || (ur==m && (uc==m || uc==q)),BAD_SIZE);
228 char* jobu = "A";
229 if (up==NULL) {
230 jobu = "N";
231 } else {
232 if (uc==q) {
233 jobu = "S";
234 }
235 }
236 REQUIRES(vp==NULL || (vc==n && (vr==n || vr==q)),BAD_SIZE);
237 char* jobvt = "A";
238 integer ldvt = n;
239 if (vp==NULL) {
240 jobvt = "N";
241 } else {
242 if (vr==q) {
243 jobvt = "S";
244 ldvt = q;
245 }
246 }DEBUGMSG("svd_l_C");
247 doublecomplex *B = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
248 CHECK(!B,MEM);
249 memcpy(B,ap,m*n*sizeof(doublecomplex));
250
251 double *rwork = (double*) malloc(5*q*sizeof(double));
252 CHECK(!rwork,MEM);
253 integer lwork = -1;
254 integer res;
255 // ask for optimal lwork
256 doublecomplex ans;
257 zgesvd_ (jobu,jobvt,
258 &m,&n,B,&m,
259 sp,
260 up,&m,
261 vp,&ldvt,
262 &ans, &lwork,
263 rwork,
264 &res);
265 lwork = ceil(ans.r);
266 doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
267 CHECK(!work,MEM);
268 zgesvd_ (jobu,jobvt,
269 &m,&n,B,&m,
270 sp,
271 up,&m,
272 vp,&ldvt,
273 work, &lwork,
274 rwork,
275 &res);
276 CHECK(res,res);
277 free(work);
278 free(rwork);
279 free(B);
280 OK
281}
282
283int zgesdd_ (char *jobz, integer *m, integer *n,
284 doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u,
285 integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work,
286 integer *lwork, doublereal *rwork, integer* iwork, integer *info);
287
288int svd_l_Cdd(KCMAT(a),CMAT(u), DVEC(s),CMAT(v)) {
289 //printf("entro\n");
290 integer m = ar;
291 integer n = ac;
292 integer q = MIN(m,n);
293 REQUIRES(sn==q,BAD_SIZE);
294 REQUIRES((up == NULL && vp == NULL)
295 || (ur==m && vc==n
296 && ((uc == q && vr == q)
297 || (uc == m && vc==n))),BAD_SIZE);
298 char* jobz = "A";
299 integer ldvt = n;
300 if (up==NULL) {
301 jobz = "N";
302 } else {
303 if (uc==q && vr == q) {
304 jobz = "S";
305 ldvt = q;
306 }
307 }
308 DEBUGMSG("svd_l_Cdd");
309 doublecomplex *B = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
310 CHECK(!B,MEM);
311 memcpy(B,ap,m*n*sizeof(doublecomplex));
312 integer* iwk = (integer*) malloc(8*q*sizeof(integer));
313 CHECK(!iwk,MEM);
314 int lrwk;
315 if (0 && *jobz == 'N') {
316 lrwk = 5*q; // does not work, crash at free below
317 } else {
318 lrwk = 5*q*q + 7*q;
319 }
320 double *rwk = (double*)malloc(lrwk*sizeof(double));;
321 CHECK(!rwk,MEM);
322 //printf("%s %ld %d\n",jobz,q,lrwk);
323 integer lwk = -1;
324 integer res;
325 // ask for optimal lwk
326 doublecomplex ans;
327 zgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,&ans,&lwk,rwk,iwk,&res);
328 lwk = ans.r;
329 //printf("lwk = %ld\n",lwk);
330 doublecomplex * workv = (doublecomplex*)malloc(lwk*sizeof(doublecomplex));
331 CHECK(!workv,MEM);
332 zgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,workv,&lwk,rwk,iwk,&res);
333 //printf("res = %ld\n",res);
334 CHECK(res,res);
335 free(workv); // printf("freed workv\n");
336 free(rwk); // printf("freed rwk\n");
337 free(iwk); // printf("freed iwk\n");
338 free(B); // printf("freed B, salgo\n");
339 OK
340}
341
342//////////////////// general complex eigensystem ////////////
343
344/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n,
345 doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl,
346 integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work,
347 integer *lwork, doublereal *rwork, integer *info);
348
349int eig_l_C(KCMAT(a), CMAT(u), CVEC(s),CMAT(v)) {
350 integer n = ar;
351 REQUIRES(ac==n && sn==n, BAD_SIZE);
352 REQUIRES(up==NULL || (ur==n && uc==n), BAD_SIZE);
353 char jobvl = up==NULL?'N':'V';
354 REQUIRES(vp==NULL || (vr==n && vc==n), BAD_SIZE);
355 char jobvr = vp==NULL?'N':'V';
356 DEBUGMSG("eig_l_C");
357 doublecomplex *B = (doublecomplex*)malloc(n*n*sizeof(doublecomplex));
358 CHECK(!B,MEM);
359 memcpy(B,ap,n*n*sizeof(doublecomplex));
360 double *rwork = (double*) malloc(2*n*sizeof(double));
361 CHECK(!rwork,MEM);
362 integer lwork = -1;
363 integer res;
364 // ask for optimal lwork
365 doublecomplex ans;
366 //printf("ask zgeev\n");
367 zgeev_ (&jobvl,&jobvr,
368 &n,B,&n,
369 sp,
370 up,&n,
371 vp,&n,
372 &ans, &lwork,
373 rwork,
374 &res);
375 lwork = ceil(ans.r);
376 //printf("ans = %d\n",lwork);
377 doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
378 CHECK(!work,MEM);
379 //printf("zgeev\n");
380 zgeev_ (&jobvl,&jobvr,
381 &n,B,&n,
382 sp,
383 up,&n,
384 vp,&n,
385 work, &lwork,
386 rwork,
387 &res);
388 CHECK(res,res);
389 free(work);
390 free(rwork);
391 free(B);
392 OK
393}
394
395
396
397//////////////////// general real eigensystem ////////////
398
399/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
400 a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
401 integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
402 integer *lwork, integer *info);
403
404int eig_l_R(KDMAT(a),DMAT(u), CVEC(s),DMAT(v)) {
405 integer n = ar;
406 REQUIRES(ac==n && sn==n, BAD_SIZE);
407 REQUIRES(up==NULL || (ur==n && uc==n), BAD_SIZE);
408 char jobvl = up==NULL?'N':'V';
409 REQUIRES(vp==NULL || (vr==n && vc==n), BAD_SIZE);
410 char jobvr = vp==NULL?'N':'V';
411 DEBUGMSG("eig_l_R");
412 double *B = (double*)malloc(n*n*sizeof(double));
413 CHECK(!B,MEM);
414 memcpy(B,ap,n*n*sizeof(double));
415 integer lwork = -1;
416 integer res;
417 // ask for optimal lwork
418 double ans;
419 //printf("ask dgeev\n");
420 dgeev_ (&jobvl,&jobvr,
421 &n,B,&n,
422 (double*)sp, (double*)sp+n,
423 up,&n,
424 vp,&n,
425 &ans, &lwork,
426 &res);
427 lwork = ceil(ans);
428 //printf("ans = %d\n",lwork);
429 double * work = (double*)malloc(lwork*sizeof(double));
430 CHECK(!work,MEM);
431 //printf("dgeev\n");
432 dgeev_ (&jobvl,&jobvr,
433 &n,B,&n,
434 (double*)sp, (double*)sp+n,
435 up,&n,
436 vp,&n,
437 work, &lwork,
438 &res);
439 CHECK(res,res);
440 free(work);
441 free(B);
442 OK
443}
444
445
446//////////////////// symmetric real eigensystem ////////////
447
448/* Subroutine */ int dsyev_(char *jobz, char *uplo, integer *n, doublereal *a,
449 integer *lda, doublereal *w, doublereal *work, integer *lwork,
450 integer *info);
451
452int eig_l_S(int wantV,KDMAT(a),DVEC(s),DMAT(v)) {
453 integer n = ar;
454 REQUIRES(ac==n && sn==n, BAD_SIZE);
455 REQUIRES(vr==n && vc==n, BAD_SIZE);
456 char jobz = wantV?'V':'N';
457 DEBUGMSG("eig_l_S");
458 memcpy(vp,ap,n*n*sizeof(double));
459 integer lwork = -1;
460 char uplo = 'U';
461 integer res;
462 // ask for optimal lwork
463 double ans;
464 //printf("ask dsyev\n");
465 dsyev_ (&jobz,&uplo,
466 &n,vp,&n,
467 sp,
468 &ans, &lwork,
469 &res);
470 lwork = ceil(ans);
471 //printf("ans = %d\n",lwork);
472 double * work = (double*)malloc(lwork*sizeof(double));
473 CHECK(!work,MEM);
474 dsyev_ (&jobz,&uplo,
475 &n,vp,&n,
476 sp,
477 work, &lwork,
478 &res);
479 CHECK(res,res);
480 free(work);
481 OK
482}
483
484//////////////////// hermitian complex eigensystem ////////////
485
486/* Subroutine */ int zheev_(char *jobz, char *uplo, integer *n, doublecomplex
487 *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork,
488 doublereal *rwork, integer *info);
489
490int eig_l_H(int wantV,KCMAT(a),DVEC(s),CMAT(v)) {
491 integer n = ar;
492 REQUIRES(ac==n && sn==n, BAD_SIZE);
493 REQUIRES(vr==n && vc==n, BAD_SIZE);
494 char jobz = wantV?'V':'N';
495 DEBUGMSG("eig_l_H");
496 memcpy(vp,ap,2*n*n*sizeof(double));
497 double *rwork = (double*) malloc((3*n-2)*sizeof(double));
498 CHECK(!rwork,MEM);
499 integer lwork = -1;
500 char uplo = 'U';
501 integer res;
502 // ask for optimal lwork
503 doublecomplex ans;
504 //printf("ask zheev\n");
505 zheev_ (&jobz,&uplo,
506 &n,vp,&n,
507 sp,
508 &ans, &lwork,
509 rwork,
510 &res);
511 lwork = ceil(ans.r);
512 //printf("ans = %d\n",lwork);
513 doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
514 CHECK(!work,MEM);
515 zheev_ (&jobz,&uplo,
516 &n,vp,&n,
517 sp,
518 work, &lwork,
519 rwork,
520 &res);
521 CHECK(res,res);
522 free(work);
523 free(rwork);
524 OK
525}
526
527//////////////////// general real linear system ////////////
528
529/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
530 *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info);
531
532int linearSolveR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
533 integer n = ar;
534 integer nhrs = bc;
535 REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
536 DEBUGMSG("linearSolveR_l");
537 double*AC = (double*)malloc(n*n*sizeof(double));
538 memcpy(AC,ap,n*n*sizeof(double));
539 memcpy(xp,bp,n*nhrs*sizeof(double));
540 integer * ipiv = (integer*)malloc(n*sizeof(integer));
541 integer res;
542 dgesv_ (&n,&nhrs,
543 AC, &n,
544 ipiv,
545 xp, &n,
546 &res);
547 if(res>0) {
548 return SINGULAR;
549 }
550 CHECK(res,res);
551 free(ipiv);
552 free(AC);
553 OK
554}
555
556//////////////////// general complex linear system ////////////
557
558/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a,
559 integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *
560 info);
561
562int linearSolveC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
563 integer n = ar;
564 integer nhrs = bc;
565 REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
566 DEBUGMSG("linearSolveC_l");
567 doublecomplex*AC = (doublecomplex*)malloc(n*n*sizeof(doublecomplex));
568 memcpy(AC,ap,n*n*sizeof(doublecomplex));
569 memcpy(xp,bp,n*nhrs*sizeof(doublecomplex));
570 integer * ipiv = (integer*)malloc(n*sizeof(integer));
571 integer res;
572 zgesv_ (&n,&nhrs,
573 AC, &n,
574 ipiv,
575 xp, &n,
576 &res);
577 if(res>0) {
578 return SINGULAR;
579 }
580 CHECK(res,res);
581 free(ipiv);
582 free(AC);
583 OK
584}
585
586//////// symmetric positive definite real linear system using Cholesky ////////////
587
588/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs,
589 doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
590 info);
591
592int cholSolveR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
593 integer n = ar;
594 integer nhrs = bc;
595 REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
596 DEBUGMSG("cholSolveR_l");
597 memcpy(xp,bp,n*nhrs*sizeof(double));
598 integer res;
599 dpotrs_ ("U",
600 &n,&nhrs,
601 (double*)ap, &n,
602 xp, &n,
603 &res);
604 CHECK(res,res);
605 OK
606}
607
608//////// Hermitian positive definite real linear system using Cholesky ////////////
609
610/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs,
611 doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
612 integer *info);
613
614int cholSolveC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
615 integer n = ar;
616 integer nhrs = bc;
617 REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
618 DEBUGMSG("cholSolveC_l");
619 memcpy(xp,bp,n*nhrs*sizeof(doublecomplex));
620 integer res;
621 zpotrs_ ("U",
622 &n,&nhrs,
623 (doublecomplex*)ap, &n,
624 xp, &n,
625 &res);
626 CHECK(res,res);
627 OK
628}
629
630//////////////////// least squares real linear system ////////////
631
632/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer *
633 nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb,
634 doublereal *work, integer *lwork, integer *info);
635
636int linearSolveLSR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
637 integer m = ar;
638 integer n = ac;
639 integer nrhs = bc;
640 integer ldb = xr;
641 REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
642 DEBUGMSG("linearSolveLSR_l");
643 double*AC = (double*)malloc(m*n*sizeof(double));
644 memcpy(AC,ap,m*n*sizeof(double));
645 if (m>=n) {
646 memcpy(xp,bp,m*nrhs*sizeof(double));
647 } else {
648 int k;
649 for(k = 0; k<nrhs; k++) {
650 memcpy(xp+ldb*k,bp+m*k,m*sizeof(double));
651 }
652 }
653 integer res;
654 integer lwork = -1;
655 double ans;
656 dgels_ ("N",&m,&n,&nrhs,
657 AC,&m,
658 xp,&ldb,
659 &ans,&lwork,
660 &res);
661 lwork = ceil(ans);
662 //printf("ans = %d\n",lwork);
663 double * work = (double*)malloc(lwork*sizeof(double));
664 dgels_ ("N",&m,&n,&nrhs,
665 AC,&m,
666 xp,&ldb,
667 work,&lwork,
668 &res);
669 if(res>0) {
670 return SINGULAR;
671 }
672 CHECK(res,res);
673 free(work);
674 free(AC);
675 OK
676}
677
678//////////////////// least squares complex linear system ////////////
679
680/* Subroutine */ int zgels_(char *trans, integer *m, integer *n, integer *
681 nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
682 doublecomplex *work, integer *lwork, integer *info);
683
684int linearSolveLSC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
685 integer m = ar;
686 integer n = ac;
687 integer nrhs = bc;
688 integer ldb = xr;
689 REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
690 DEBUGMSG("linearSolveLSC_l");
691 doublecomplex*AC = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
692 memcpy(AC,ap,m*n*sizeof(doublecomplex));
693 if (m>=n) {
694 memcpy(xp,bp,m*nrhs*sizeof(doublecomplex));
695 } else {
696 int k;
697 for(k = 0; k<nrhs; k++) {
698 memcpy(xp+ldb*k,bp+m*k,m*sizeof(doublecomplex));
699 }
700 }
701 integer res;
702 integer lwork = -1;
703 doublecomplex ans;
704 zgels_ ("N",&m,&n,&nrhs,
705 AC,&m,
706 xp,&ldb,
707 &ans,&lwork,
708 &res);
709 lwork = ceil(ans.r);
710 //printf("ans = %d\n",lwork);
711 doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
712 zgels_ ("N",&m,&n,&nrhs,
713 AC,&m,
714 xp,&ldb,
715 work,&lwork,
716 &res);
717 if(res>0) {
718 return SINGULAR;
719 }
720 CHECK(res,res);
721 free(work);
722 free(AC);
723 OK
724}
725
726//////////////////// least squares real linear system using SVD ////////////
727
728/* Subroutine */ int dgelss_(integer *m, integer *n, integer *nrhs,
729 doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
730 s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
731 integer *info);
732
733int linearSolveSVDR_l(double rcond,KDMAT(a),KDMAT(b),DMAT(x)) {
734 integer m = ar;
735 integer n = ac;
736 integer nrhs = bc;
737 integer ldb = xr;
738 REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
739 DEBUGMSG("linearSolveSVDR_l");
740 double*AC = (double*)malloc(m*n*sizeof(double));
741 double*S = (double*)malloc(MIN(m,n)*sizeof(double));
742 memcpy(AC,ap,m*n*sizeof(double));
743 if (m>=n) {
744 memcpy(xp,bp,m*nrhs*sizeof(double));
745 } else {
746 int k;
747 for(k = 0; k<nrhs; k++) {
748 memcpy(xp+ldb*k,bp+m*k,m*sizeof(double));
749 }
750 }
751 integer res;
752 integer lwork = -1;
753 integer rank;
754 double ans;
755 dgelss_ (&m,&n,&nrhs,
756 AC,&m,
757 xp,&ldb,
758 S,
759 &rcond,&rank,
760 &ans,&lwork,
761 &res);
762 lwork = ceil(ans);
763 //printf("ans = %d\n",lwork);
764 double * work = (double*)malloc(lwork*sizeof(double));
765 dgelss_ (&m,&n,&nrhs,
766 AC,&m,
767 xp,&ldb,
768 S,
769 &rcond,&rank,
770 work,&lwork,
771 &res);
772 if(res>0) {
773 return NOCONVER;
774 }
775 CHECK(res,res);
776 free(work);
777 free(S);
778 free(AC);
779 OK
780}
781
782//////////////////// least squares complex linear system using SVD ////////////
783
784// not in clapack.h
785
786int zgelss_(integer *m, integer *n, integer *nhrs,
787 doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s,
788 doublereal *rcond, integer* rank,
789 doublecomplex *work, integer* lwork, doublereal* rwork,
790 integer *info);
791
792int linearSolveSVDC_l(double rcond, KCMAT(a),KCMAT(b),CMAT(x)) {
793 integer m = ar;
794 integer n = ac;
795 integer nrhs = bc;
796 integer ldb = xr;
797 REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
798 DEBUGMSG("linearSolveSVDC_l");
799 doublecomplex*AC = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
800 double*S = (double*)malloc(MIN(m,n)*sizeof(double));
801 double*RWORK = (double*)malloc(5*MIN(m,n)*sizeof(double));
802 memcpy(AC,ap,m*n*sizeof(doublecomplex));
803 if (m>=n) {
804 memcpy(xp,bp,m*nrhs*sizeof(doublecomplex));
805 } else {
806 int k;
807 for(k = 0; k<nrhs; k++) {
808 memcpy(xp+ldb*k,bp+m*k,m*sizeof(doublecomplex));
809 }
810 }
811 integer res;
812 integer lwork = -1;
813 integer rank;
814 doublecomplex ans;
815 zgelss_ (&m,&n,&nrhs,
816 AC,&m,
817 xp,&ldb,
818 S,
819 &rcond,&rank,
820 &ans,&lwork,
821 RWORK,
822 &res);
823 lwork = ceil(ans.r);
824 //printf("ans = %d\n",lwork);
825 doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
826 zgelss_ (&m,&n,&nrhs,
827 AC,&m,
828 xp,&ldb,
829 S,
830 &rcond,&rank,
831 work,&lwork,
832 RWORK,
833 &res);
834 if(res>0) {
835 return NOCONVER;
836 }
837 CHECK(res,res);
838 free(work);
839 free(RWORK);
840 free(S);
841 free(AC);
842 OK
843}
844
845//////////////////// Cholesky factorization /////////////////////////
846
847/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
848 integer *lda, integer *info);
849
850int chol_l_H(KCMAT(a),CMAT(l)) {
851 integer n = ar;
852 REQUIRES(n>=1 && ac == n && lr==n && lc==n,BAD_SIZE);
853 DEBUGMSG("chol_l_H");
854 memcpy(lp,ap,n*n*sizeof(doublecomplex));
855 char uplo = 'U';
856 integer res;
857 zpotrf_ (&uplo,&n,lp,&n,&res);
858 CHECK(res>0,NODEFPOS);
859 CHECK(res,res);
860 doublecomplex zero = {0.,0.};
861 int r,c;
862 for (r=0; r<lr-1; r++) {
863 for(c=r+1; c<lc; c++) {
864 lp[r*lc+c] = zero;
865 }
866 }
867 OK
868}
869
870
871/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
872 lda, integer *info);
873
874int chol_l_S(KDMAT(a),DMAT(l)) {
875 integer n = ar;
876 REQUIRES(n>=1 && ac == n && lr==n && lc==n,BAD_SIZE);
877 DEBUGMSG("chol_l_S");
878 memcpy(lp,ap,n*n*sizeof(double));
879 char uplo = 'U';
880 integer res;
881 dpotrf_ (&uplo,&n,lp,&n,&res);
882 CHECK(res>0,NODEFPOS);
883 CHECK(res,res);
884 int r,c;
885 for (r=0; r<lr-1; r++) {
886 for(c=r+1; c<lc; c++) {
887 lp[r*lc+c] = 0.;
888 }
889 }
890 OK
891}
892
893//////////////////// QR factorization /////////////////////////
894
895/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
896 lda, doublereal *tau, doublereal *work, integer *info);
897
898int qr_l_R(KDMAT(a), DVEC(tau), DMAT(r)) {
899 integer m = ar;
900 integer n = ac;
901 integer mn = MIN(m,n);
902 REQUIRES(m>=1 && n >=1 && rr== m && rc == n && taun == mn, BAD_SIZE);
903 DEBUGMSG("qr_l_R");
904 double *WORK = (double*)malloc(n*sizeof(double));
905 CHECK(!WORK,MEM);
906 memcpy(rp,ap,m*n*sizeof(double));
907 integer res;
908 dgeqr2_ (&m,&n,rp,&m,taup,WORK,&res);
909 CHECK(res,res);
910 free(WORK);
911 OK
912}
913
914/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a,
915 integer *lda, doublecomplex *tau, doublecomplex *work, integer *info);
916
917int qr_l_C(KCMAT(a), CVEC(tau), CMAT(r)) {
918 integer m = ar;
919 integer n = ac;
920 integer mn = MIN(m,n);
921 REQUIRES(m>=1 && n >=1 && rr== m && rc == n && taun == mn, BAD_SIZE);
922 DEBUGMSG("qr_l_C");
923 doublecomplex *WORK = (doublecomplex*)malloc(n*sizeof(doublecomplex));
924 CHECK(!WORK,MEM);
925 memcpy(rp,ap,m*n*sizeof(doublecomplex));
926 integer res;
927 zgeqr2_ (&m,&n,rp,&m,taup,WORK,&res);
928 CHECK(res,res);
929 free(WORK);
930 OK
931}
932
933/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
934 a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
935 integer *info);
936
937int c_dorgqr(KDMAT(a), KDVEC(tau), DMAT(r)) {
938 integer m = ar;
939 integer n = MIN(ac,ar);
940 integer k = taun;
941 DEBUGMSG("c_dorgqr");
942 integer lwork = 8*n; // FIXME
943 double *WORK = (double*)malloc(lwork*sizeof(double));
944 CHECK(!WORK,MEM);
945 memcpy(rp,ap,m*k*sizeof(double));
946 integer res;
947 dorgqr_ (&m,&n,&k,rp,&m,(double*)taup,WORK,&lwork,&res);
948 CHECK(res,res);
949 free(WORK);
950 OK
951}
952
953/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k,
954 doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
955 work, integer *lwork, integer *info);
956
957int c_zungqr(KCMAT(a), KCVEC(tau), CMAT(r)) {
958 integer m = ar;
959 integer n = MIN(ac,ar);
960 integer k = taun;
961 DEBUGMSG("z_ungqr");
962 integer lwork = 8*n; // FIXME
963 doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
964 CHECK(!WORK,MEM);
965 memcpy(rp,ap,m*k*sizeof(doublecomplex));
966 integer res;
967 zungqr_ (&m,&n,&k,rp,&m,(doublecomplex*)taup,WORK,&lwork,&res);
968 CHECK(res,res);
969 free(WORK);
970 OK
971}
972
973
974//////////////////// Hessenberg factorization /////////////////////////
975
976/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
977 doublereal *a, integer *lda, doublereal *tau, doublereal *work,
978 integer *lwork, integer *info);
979
980int hess_l_R(KDMAT(a), DVEC(tau), DMAT(r)) {
981 integer m = ar;
982 integer n = ac;
983 integer mn = MIN(m,n);
984 REQUIRES(m>=1 && n == m && rr== m && rc == n && taun == mn-1, BAD_SIZE);
985 DEBUGMSG("hess_l_R");
986 integer lwork = 5*n; // fixme
987 double *WORK = (double*)malloc(lwork*sizeof(double));
988 CHECK(!WORK,MEM);
989 memcpy(rp,ap,m*n*sizeof(double));
990 integer res;
991 integer one = 1;
992 dgehrd_ (&n,&one,&n,rp,&n,taup,WORK,&lwork,&res);
993 CHECK(res,res);
994 free(WORK);
995 OK
996}
997
998
999/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi,
1000 doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
1001 work, integer *lwork, integer *info);
1002
1003int hess_l_C(KCMAT(a), CVEC(tau), CMAT(r)) {
1004 integer m = ar;
1005 integer n = ac;
1006 integer mn = MIN(m,n);
1007 REQUIRES(m>=1 && n == m && rr== m && rc == n && taun == mn-1, BAD_SIZE);
1008 DEBUGMSG("hess_l_C");
1009 integer lwork = 5*n; // fixme
1010 doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
1011 CHECK(!WORK,MEM);
1012 memcpy(rp,ap,m*n*sizeof(doublecomplex));
1013 integer res;
1014 integer one = 1;
1015 zgehrd_ (&n,&one,&n,rp,&n,taup,WORK,&lwork,&res);
1016 CHECK(res,res);
1017 free(WORK);
1018 OK
1019}
1020
1021//////////////////// Schur factorization /////////////////////////
1022
1023/* Subroutine */ int dgees_(char *jobvs, char *sort, L_fp select, integer *n,
1024 doublereal *a, integer *lda, integer *sdim, doublereal *wr,
1025 doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work,
1026 integer *lwork, logical *bwork, integer *info);
1027
1028int schur_l_R(KDMAT(a), DMAT(u), DMAT(s)) {
1029 integer m = ar;
1030 integer n = ac;
1031 REQUIRES(m>=1 && n==m && ur==n && uc==n && sr==n && sc==n, BAD_SIZE);
1032 DEBUGMSG("schur_l_R");
1033 //int k;
1034 //printf("---------------------------\n");
1035 //printf("%p: ",ap); for(k=0;k<n*n;k++) printf("%f ",ap[k]); printf("\n");
1036 //printf("%p: ",up); for(k=0;k<n*n;k++) printf("%f ",up[k]); printf("\n");
1037 //printf("%p: ",sp); for(k=0;k<n*n;k++) printf("%f ",sp[k]); printf("\n");
1038 memcpy(sp,ap,n*n*sizeof(double));
1039 integer lwork = 6*n; // fixme
1040 double *WORK = (double*)malloc(lwork*sizeof(double));
1041 double *WR = (double*)malloc(n*sizeof(double));
1042 double *WI = (double*)malloc(n*sizeof(double));
1043 // WR and WI not really required in this call
1044 logical *BWORK = (logical*)malloc(n*sizeof(logical));
1045 integer res;
1046 integer sdim;
1047 dgees_ ("V","N",NULL,&n,sp,&n,&sdim,WR,WI,up,&n,WORK,&lwork,BWORK,&res);
1048 //printf("%p: ",ap); for(k=0;k<n*n;k++) printf("%f ",ap[k]); printf("\n");
1049 //printf("%p: ",up); for(k=0;k<n*n;k++) printf("%f ",up[k]); printf("\n");
1050 //printf("%p: ",sp); for(k=0;k<n*n;k++) printf("%f ",sp[k]); printf("\n");
1051 if(res>0) {
1052 return NOCONVER;
1053 }
1054 CHECK(res,res);
1055 free(WR);
1056 free(WI);
1057 free(BWORK);
1058 free(WORK);
1059 OK
1060}
1061
1062
1063/* Subroutine */ int zgees_(char *jobvs, char *sort, L_fp select, integer *n,
1064 doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w,
1065 doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork,
1066 doublereal *rwork, logical *bwork, integer *info);
1067
1068int schur_l_C(KCMAT(a), CMAT(u), CMAT(s)) {
1069 integer m = ar;
1070 integer n = ac;
1071 REQUIRES(m>=1 && n==m && ur==n && uc==n && sr==n && sc==n, BAD_SIZE);
1072 DEBUGMSG("schur_l_C");
1073 memcpy(sp,ap,n*n*sizeof(doublecomplex));
1074 integer lwork = 6*n; // fixme
1075 doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
1076 doublecomplex *W = (doublecomplex*)malloc(n*sizeof(doublecomplex));
1077 // W not really required in this call
1078 logical *BWORK = (logical*)malloc(n*sizeof(logical));
1079 double *RWORK = (double*)malloc(n*sizeof(double));
1080 integer res;
1081 integer sdim;
1082 zgees_ ("V","N",NULL,&n,sp,&n,&sdim,W,
1083 up,&n,
1084 WORK,&lwork,RWORK,BWORK,&res);
1085 if(res>0) {
1086 return NOCONVER;
1087 }
1088 CHECK(res,res);
1089 free(W);
1090 free(BWORK);
1091 free(WORK);
1092 OK
1093}
1094
1095//////////////////// LU factorization /////////////////////////
1096
1097/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
1098 lda, integer *ipiv, integer *info);
1099
1100int lu_l_R(KDMAT(a), DVEC(ipiv), DMAT(r)) {
1101 integer m = ar;
1102 integer n = ac;
1103 integer mn = MIN(m,n);
1104 REQUIRES(m>=1 && n >=1 && ipivn == mn, BAD_SIZE);
1105 DEBUGMSG("lu_l_R");
1106 integer* auxipiv = (integer*)malloc(mn*sizeof(integer));
1107 memcpy(rp,ap,m*n*sizeof(double));
1108 integer res;
1109 dgetrf_ (&m,&n,rp,&m,auxipiv,&res);
1110 if(res>0) {
1111 res = 0; // fixme
1112 }
1113 CHECK(res,res);
1114 int k;
1115 for (k=0; k<mn; k++) {
1116 ipivp[k] = auxipiv[k];
1117 }
1118 free(auxipiv);
1119 OK
1120}
1121
1122
1123/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a,
1124 integer *lda, integer *ipiv, integer *info);
1125
1126int lu_l_C(KCMAT(a), DVEC(ipiv), CMAT(r)) {
1127 integer m = ar;
1128 integer n = ac;
1129 integer mn = MIN(m,n);
1130 REQUIRES(m>=1 && n >=1 && ipivn == mn, BAD_SIZE);
1131 DEBUGMSG("lu_l_C");
1132 integer* auxipiv = (integer*)malloc(mn*sizeof(integer));
1133 memcpy(rp,ap,m*n*sizeof(doublecomplex));
1134 integer res;
1135 zgetrf_ (&m,&n,rp,&m,auxipiv,&res);
1136 if(res>0) {
1137 res = 0; // fixme
1138 }
1139 CHECK(res,res);
1140 int k;
1141 for (k=0; k<mn; k++) {
1142 ipivp[k] = auxipiv[k];
1143 }
1144 free(auxipiv);
1145 OK
1146}
1147
1148
1149//////////////////// LU substitution /////////////////////////
1150
1151/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
1152 doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
1153 ldb, integer *info);
1154
1155int luS_l_R(KDMAT(a), KDVEC(ipiv), KDMAT(b), DMAT(x)) {
1156 integer m = ar;
1157 integer n = ac;
1158 integer mrhs = br;
1159 integer nrhs = bc;
1160
1161 REQUIRES(m==n && m==mrhs && m==ipivn,BAD_SIZE);
1162 integer* auxipiv = (integer*)malloc(n*sizeof(integer));
1163 int k;
1164 for (k=0; k<n; k++) {
1165 auxipiv[k] = (integer)ipivp[k];
1166 }
1167 integer res;
1168 memcpy(xp,bp,mrhs*nrhs*sizeof(double));
1169 dgetrs_ ("N",&n,&nrhs,(/*no const (!?)*/ double*)ap,&m,auxipiv,xp,&mrhs,&res);
1170 CHECK(res,res);
1171 free(auxipiv);
1172 OK
1173}
1174
1175
1176/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs,
1177 doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b,
1178 integer *ldb, integer *info);
1179
1180int luS_l_C(KCMAT(a), KDVEC(ipiv), KCMAT(b), CMAT(x)) {
1181 integer m = ar;
1182 integer n = ac;
1183 integer mrhs = br;
1184 integer nrhs = bc;
1185
1186 REQUIRES(m==n && m==mrhs && m==ipivn,BAD_SIZE);
1187 integer* auxipiv = (integer*)malloc(n*sizeof(integer));
1188 int k;
1189 for (k=0; k<n; k++) {
1190 auxipiv[k] = (integer)ipivp[k];
1191 }
1192 integer res;
1193 memcpy(xp,bp,mrhs*nrhs*sizeof(doublecomplex));
1194 zgetrs_ ("N",&n,&nrhs,(doublecomplex*)ap,&m,auxipiv,xp,&mrhs,&res);
1195 CHECK(res,res);
1196 free(auxipiv);
1197 OK
1198}
1199
1200//////////////////// Matrix Product /////////////////////////
1201
1202void dgemm_(char *, char *, integer *, integer *, integer *,
1203 double *, const double *, integer *, const double *,
1204 integer *, double *, double *, integer *);
1205
1206int multiplyR(int ta, int tb, KDMAT(a),KDMAT(b),DMAT(r)) {
1207 //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
1208 DEBUGMSG("dgemm_");
1209 CHECKNANR(a,"NaN multR Input\n")
1210 CHECKNANR(b,"NaN multR Input\n")
1211 integer m = ta?ac:ar;
1212 integer n = tb?br:bc;
1213 integer k = ta?ar:ac;
1214 integer lda = ar;
1215 integer ldb = br;
1216 integer ldc = rr;
1217 double alpha = 1;
1218 double beta = 0;
1219 dgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,ap,&lda,bp,&ldb,&beta,rp,&ldc);
1220 CHECKNANR(r,"NaN multR Output\n")
1221 OK
1222}
1223
1224void zgemm_(char *, char *, integer *, integer *, integer *,
1225 doublecomplex *, const doublecomplex *, integer *, const doublecomplex *,
1226 integer *, doublecomplex *, doublecomplex *, integer *);
1227
1228int multiplyC(int ta, int tb, KCMAT(a),KCMAT(b),CMAT(r)) {
1229 //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
1230 DEBUGMSG("zgemm_");
1231 CHECKNANC(a,"NaN multC Input\n")
1232 CHECKNANC(b,"NaN multC Input\n")
1233 integer m = ta?ac:ar;
1234 integer n = tb?br:bc;
1235 integer k = ta?ar:ac;
1236 integer lda = ar;
1237 integer ldb = br;
1238 integer ldc = rr;
1239 doublecomplex alpha = {1,0};
1240 doublecomplex beta = {0,0};
1241 zgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,
1242 ap,&lda,
1243 bp,&ldb,&beta,
1244 rp,&ldc);
1245 CHECKNANC(r,"NaN multC Output\n")
1246 OK
1247}
1248
1249void sgemm_(char *, char *, integer *, integer *, integer *,
1250 float *, const float *, integer *, const float *,
1251 integer *, float *, float *, integer *);
1252
1253int multiplyF(int ta, int tb, KFMAT(a),KFMAT(b),FMAT(r)) {
1254 //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
1255 DEBUGMSG("sgemm_");
1256 integer m = ta?ac:ar;
1257 integer n = tb?br:bc;
1258 integer k = ta?ar:ac;
1259 integer lda = ar;
1260 integer ldb = br;
1261 integer ldc = rr;
1262 float alpha = 1;
1263 float beta = 0;
1264 sgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,ap,&lda,bp,&ldb,&beta,rp,&ldc);
1265 OK
1266}
1267
1268void cgemm_(char *, char *, integer *, integer *, integer *,
1269 complex *, const complex *, integer *, const complex *,
1270 integer *, complex *, complex *, integer *);
1271
1272int multiplyQ(int ta, int tb, KQMAT(a),KQMAT(b),QMAT(r)) {
1273 //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
1274 DEBUGMSG("cgemm_");
1275 integer m = ta?ac:ar;
1276 integer n = tb?br:bc;
1277 integer k = ta?ar:ac;
1278 integer lda = ar;
1279 integer ldb = br;
1280 integer ldc = rr;
1281 complex alpha = {1,0};
1282 complex beta = {0,0};
1283 cgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,
1284 ap,&lda,
1285 bp,&ldb,&beta,
1286 rp,&ldc);
1287 OK
1288}
1289
1290
1291int multiplyI(KOIMAT(a), KOIMAT(b), OIMAT(r)) {
1292 { TRAV(r,i,j) {
1293 int k;
1294 AT(r,i,j) = 0;
1295 for (k=0;k<ac;k++) {
1296 AT(r,i,j) += AT(a,i,k) * AT(b,k,j);
1297 }
1298 }
1299 }
1300 OK
1301}
1302
1303//////////////////// transpose /////////////////////////
1304
1305int transF(KFMAT(x),FMAT(t)) {
1306 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1307 DEBUGMSG("transF");
1308 int i,j;
1309 for (i=0; i<tr; i++) {
1310 for (j=0; j<tc; j++) {
1311 tp[i*tc+j] = xp[j*xc+i];
1312 }
1313 }
1314 OK
1315}
1316
1317int transR(KDMAT(x),DMAT(t)) {
1318 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1319 DEBUGMSG("transR");
1320 int i,j;
1321 for (i=0; i<tr; i++) {
1322 for (j=0; j<tc; j++) {
1323 tp[i*tc+j] = xp[j*xc+i];
1324 }
1325 }
1326 OK
1327}
1328
1329int transQ(KQMAT(x),QMAT(t)) {
1330 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1331 DEBUGMSG("transQ");
1332 int i,j;
1333 for (i=0; i<tr; i++) {
1334 for (j=0; j<tc; j++) {
1335 tp[i*tc+j] = xp[j*xc+i];
1336 }
1337 }
1338 OK
1339}
1340
1341int transC(KCMAT(x),CMAT(t)) {
1342 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1343 DEBUGMSG("transC");
1344 int i,j;
1345 for (i=0; i<tr; i++) {
1346 for (j=0; j<tc; j++) {
1347 tp[i*tc+j] = xp[j*xc+i];
1348 }
1349 }
1350 OK
1351}
1352
1353int transP(KPMAT(x), PMAT(t)) {
1354 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1355 REQUIRES(xs==ts,NOCONVER);
1356 DEBUGMSG("transP");
1357 int i,j;
1358 for (i=0; i<tr; i++) {
1359 for (j=0; j<tc; j++) {
1360 memcpy(tp+(i*tc+j)*xs,xp +(j*xc+i)*xs,xs);
1361 }
1362 }
1363 OK
1364}
1365
1366int transI(KIMAT(x),IMAT(t)) {
1367 REQUIRES(xr==tc && xc==tr,BAD_SIZE);
1368 DEBUGMSG("transI");
1369 int i,j;
1370 for (i=0; i<tr; i++) {
1371 for (j=0; j<tc; j++) {
1372 tp[i*tc+j] = xp[j*xc+i];
1373 }
1374 }
1375 OK
1376}
1377
1378
1379//////////////////// constant /////////////////////////
1380
1381int constantF(float * pval, FVEC(r)) {
1382 DEBUGMSG("constantF")
1383 int k;
1384 double val = *pval;
1385 for(k=0;k<rn;k++) {
1386 rp[k]=val;
1387 }
1388 OK
1389}
1390
1391int constantR(double * pval, DVEC(r)) {
1392 DEBUGMSG("constantR")
1393 int k;
1394 double val = *pval;
1395 for(k=0;k<rn;k++) {
1396 rp[k]=val;
1397 }
1398 OK
1399}
1400
1401int constantQ(complex* pval, QVEC(r)) {
1402 DEBUGMSG("constantQ")
1403 int k;
1404 complex val = *pval;
1405 for(k=0;k<rn;k++) {
1406 rp[k]=val;
1407 }
1408 OK
1409}
1410
1411int constantC(doublecomplex* pval, CVEC(r)) {
1412 DEBUGMSG("constantC")
1413 int k;
1414 doublecomplex val = *pval;
1415 for(k=0;k<rn;k++) {
1416 rp[k]=val;
1417 }
1418 OK
1419}
1420
1421int constantP(void* pval, PVEC(r)) {
1422 DEBUGMSG("constantP")
1423 int k;
1424 for(k=0;k<rn;k++) {
1425 memcpy(rp+k*rs,pval,rs);
1426 }
1427 OK
1428}
1429
1430
1431int constantI(int * pval, IVEC(r)) {
1432 DEBUGMSG("constantI")
1433 int k;
1434 int val = *pval;
1435 for(k=0;k<rn;k++) {
1436 rp[k]=val;
1437 }
1438 OK
1439}
1440
1441
1442//////////////////// float-double conversion /////////////////////////
1443
1444int float2double(FVEC(x),DVEC(y)) {
1445 DEBUGMSG("float2double")
1446 int k;
1447 for(k=0;k<xn;k++) {
1448 yp[k]=xp[k];
1449 }
1450 OK
1451}
1452
1453int float2int(KFVEC(x),IVEC(y)) {
1454 DEBUGMSG("float2int")
1455 int k;
1456 for(k=0;k<xn;k++) {
1457 yp[k]=xp[k];
1458 }
1459 OK
1460}
1461
1462
1463int double2float(DVEC(x),FVEC(y)) {
1464 DEBUGMSG("double2float")
1465 int k;
1466 for(k=0;k<xn;k++) {
1467 yp[k]=xp[k];
1468 }
1469 OK
1470}
1471
1472
1473int double2int(KDVEC(x),IVEC(y)) {
1474 DEBUGMSG("double2int")
1475 int k;
1476 for(k=0;k<xn;k++) {
1477 yp[k]=xp[k];
1478 }
1479 OK
1480}
1481
1482
1483int int2float(KIVEC(x),FVEC(y)) {
1484 DEBUGMSG("int2float")
1485 int k;
1486 for(k=0;k<xn;k++) {
1487 yp[k]=xp[k];
1488 }
1489 OK
1490}
1491
1492
1493int int2double(KIVEC(x),DVEC(y)) {
1494 DEBUGMSG("int2double")
1495 int k;
1496 for(k=0;k<xn;k++) {
1497 yp[k]=xp[k];
1498 }
1499 OK
1500}
1501
1502
1503//////////////////// conjugate /////////////////////////
1504
1505int conjugateQ(KQVEC(x),QVEC(t)) {
1506 REQUIRES(xn==tn,BAD_SIZE);
1507 DEBUGMSG("conjugateQ");
1508 int k;
1509 for(k=0;k<xn;k++) {
1510 tp[k].r = xp[k].r;
1511 tp[k].i = -xp[k].i;
1512 }
1513 OK
1514}
1515
1516int conjugateC(KCVEC(x),CVEC(t)) {
1517 REQUIRES(xn==tn,BAD_SIZE);
1518 DEBUGMSG("conjugateC");
1519 int k;
1520 for(k=0;k<xn;k++) {
1521 tp[k].r = xp[k].r;
1522 tp[k].i = -xp[k].i;
1523 }
1524 OK
1525}
1526
1527//////////////////// step /////////////////////////
1528
1529#define STEP_IMP \
1530 int k; \
1531 for(k=0;k<xn;k++) { \
1532 yp[k]=xp[k]>0; \
1533 } \
1534 OK
1535
1536int stepF(KFVEC(x),FVEC(y)) {
1537 STEP_IMP
1538}
1539
1540int stepD(KDVEC(x),DVEC(y)) {
1541 STEP_IMP
1542}
1543
1544int stepI(KIVEC(x),IVEC(y)) {
1545 STEP_IMP
1546}
1547
1548//////////////////// cond /////////////////////////
1549
1550#define COMPARE_IMP \
1551 REQUIRES(xn==yn && xn==rn ,BAD_SIZE); \
1552 int k; \
1553 for(k=0;k<xn;k++) { \
1554 rp[k] = xp[k]<yp[k]?-1:(xp[k]>yp[k]?1:0); \
1555 } \
1556 OK
1557
1558
1559int compareF(KFVEC(x),KFVEC(y),IVEC(r)) {
1560 COMPARE_IMP
1561}
1562
1563int compareD(KDVEC(x),KDVEC(y),IVEC(r)) {
1564 COMPARE_IMP
1565}
1566
1567int compareI(KIVEC(x),KIVEC(y),IVEC(r)) {
1568 COMPARE_IMP
1569}
1570
1571
1572#define COND_IMP \
1573 REQUIRES(xn==yn && xn==ltn && xn==eqn && xn==gtn && xn==rn ,BAD_SIZE); \
1574 int k; \
1575 for(k=0;k<xn;k++) { \
1576 rp[k] = xp[k]<yp[k]?ltp[k]:(xp[k]>yp[k]?gtp[k]:eqp[k]); \
1577 } \
1578 OK
1579
1580int condF(FVEC(x),FVEC(y),FVEC(lt),FVEC(eq),FVEC(gt),FVEC(r)) {
1581 COND_IMP
1582}
1583
1584int condD(DVEC(x),DVEC(y),DVEC(lt),DVEC(eq),DVEC(gt),DVEC(r)) {
1585 COND_IMP
1586}
1587
1588int condI(KIVEC(x),KIVEC(y),KIVEC(lt),KIVEC(eq),KIVEC(gt),IVEC(r)) {
1589 COND_IMP
1590}
1591
1592
1593#define CHOOSE_IMP \
1594 REQUIRES(condn==ltn && ltn==eqn && ltn==gtn && ltn==rn ,BAD_SIZE); \
1595 int k; \
1596 for(k=0;k<condn;k++) { \
1597 rp[k] = condp[k]<0?ltp[k]:(condp[k]>0?gtp[k]:eqp[k]); \
1598 } \
1599 OK
1600
1601int chooseF(KIVEC(cond),KFVEC(lt),KFVEC(eq),KFVEC(gt),FVEC(r)) {
1602 CHOOSE_IMP
1603}
1604
1605int chooseD(KIVEC(cond),KDVEC(lt),KDVEC(eq),KDVEC(gt),DVEC(r)) {
1606 CHOOSE_IMP
1607}
1608
1609int chooseI(KIVEC(cond),KIVEC(lt),KIVEC(eq),KIVEC(gt),IVEC(r)) {
1610 CHOOSE_IMP
1611}
1612
1613int chooseC(KIVEC(cond),KCVEC(lt),KCVEC(eq),KCVEC(gt),CVEC(r)) {
1614 CHOOSE_IMP
1615}
1616
1617int chooseQ(KIVEC(cond),KQVEC(lt),KQVEC(eq),KQVEC(gt),QVEC(r)) {
1618 CHOOSE_IMP
1619}
1620
1621//////////////////////// extract /////////////////////////////////
1622
1623#define EXTRACT_IMP \
1624 int i,j,si,sj,ni,nj; \
1625 ni = modei ? in : ip[1]-ip[0]+1; \
1626 nj = modej ? jn : jp[1]-jp[0]+1; \
1627 \
1628 for (i=0; i<ni; i++) { \
1629 si = modei ? ip[i] : i+ip[0]; \
1630 \
1631 for (j=0; j<nj; j++) { \
1632 sj = modej ? jp[j] : j+jp[0]; \
1633 \
1634 AT(r,i,j) = AT(m,si,sj); \
1635 } \
1636 } \
1637 OK
1638
1639int extractD(int modei, int modej, KIVEC(i), KIVEC(j), KODMAT(m), ODMAT(r)) {
1640 EXTRACT_IMP
1641}
1642
1643int extractF(int modei, int modej, KIVEC(i), KIVEC(j), KOFMAT(m), OFMAT(r)) {
1644 EXTRACT_IMP
1645}
1646
1647int extractC(int modei, int modej, KIVEC(i), KIVEC(j), KOCMAT(m), OCMAT(r)) {
1648 EXTRACT_IMP
1649}
1650
1651int extractQ(int modei, int modej, KIVEC(i), KIVEC(j), KOQMAT(m), OQMAT(r)) {
1652 EXTRACT_IMP
1653}
1654
1655int extractI(int modei, int modej, KIVEC(i), KIVEC(j), KOIMAT(m), OIMAT(r)) {
1656 EXTRACT_IMP
1657}
1658
1659//////////////////////// remap /////////////////////////////////
1660
1661#define REMAP_IMP \
1662 REQUIRES(ir==jr && ic==jc && ir==rr && ic==rc ,BAD_SIZE); \
1663 { TRAV(r,a,b) { AT(r,a,b) = AT(m,AT(i,a,b),AT(j,a,b)); } \
1664 } \
1665 OK
1666
1667int remapD(KOIMAT(i), KOIMAT(j), KODMAT(m), ODMAT(r)) {
1668 REMAP_IMP
1669}
1670
1671int remapF(KOIMAT(i), KOIMAT(j), KOFMAT(m), OFMAT(r)) {
1672 REMAP_IMP
1673}
1674
1675int remapI(KOIMAT(i), KOIMAT(j), KOIMAT(m), OIMAT(r)) {
1676 REMAP_IMP
1677}
1678
1679int remapC(KOIMAT(i), KOIMAT(j), KOCMAT(m), OCMAT(r)) {
1680 REMAP_IMP
1681}
1682
1683int remapQ(KOIMAT(i), KOIMAT(j), KOQMAT(m), OQMAT(r)) {
1684 REMAP_IMP
1685}
1686