diff options
Diffstat (limited to 'packages/base/src/Internal/C')
-rw-r--r-- | packages/base/src/Internal/C/lapack-aux.c | 59 | ||||
-rw-r--r-- | packages/base/src/Internal/C/lapack-aux.h | 1 |
2 files changed, 60 insertions, 0 deletions
diff --git a/packages/base/src/Internal/C/lapack-aux.c b/packages/base/src/Internal/C/lapack-aux.c index 2843ab5..4d48594 100644 --- a/packages/base/src/Internal/C/lapack-aux.c +++ b/packages/base/src/Internal/C/lapack-aux.c | |||
@@ -1398,6 +1398,65 @@ ROWOP(int64_t) | |||
1398 | ROWOP_MOD(int32_t,mod) | 1398 | ROWOP_MOD(int32_t,mod) |
1399 | ROWOP_MOD(int64_t,mod_l) | 1399 | ROWOP_MOD(int64_t,mod_l) |
1400 | 1400 | ||
1401 | /////////////////////////////// inplace GEMM //////////////////////////////// | ||
1402 | |||
1403 | #define GEMM(T) int gemm_##T(VECG(T,c),VECG(int,p),MATG(T,a),MATG(T,b),MATG(T,r)) { \ | ||
1404 | T a = cp[0], b = cp[1]; \ | ||
1405 | int r1a = pp[0], c1a = pp[2], c2a = pp[3] ; \ | ||
1406 | int r1b = pp[4], c1b = pp[6] ; \ | ||
1407 | int r1r = pp[8], r2r = pp[9], c1r = pp[10], c2r = pp[11]; \ | ||
1408 | int dra = r1a - r1r; \ | ||
1409 | int dcb = c1b-c1r; \ | ||
1410 | int nk = c2a-c1a+1; \ | ||
1411 | int i,j,k; \ | ||
1412 | T t; \ | ||
1413 | for (i=r1r; i<=r2r; i++) { \ | ||
1414 | for (j=c1r; j<=c2r; j++) { \ | ||
1415 | t = 0; \ | ||
1416 | for(k=0; k<nk; k++) { \ | ||
1417 | t += AT(a,i+dra,k+c1a) * AT(b,k+r1b,j+dcb); \ | ||
1418 | } \ | ||
1419 | AT(r,i,j) = b*AT(r,i,j) + a*t; \ | ||
1420 | } \ | ||
1421 | } \ | ||
1422 | OK \ | ||
1423 | } | ||
1424 | |||
1425 | GEMM(double) | ||
1426 | GEMM(float) | ||
1427 | GEMM(TCD) | ||
1428 | GEMM(TCF) | ||
1429 | GEMM(int32_t) | ||
1430 | GEMM(int64_t) | ||
1431 | |||
1432 | #define GEMM_MOD(T,M) int gemm_mod_##T(T m, VECG(T,c),VECG(int,p),MATG(T,a),MATG(T,b),MATG(T,r)) { \ | ||
1433 | T a = cp[0], b = cp[1]; \ | ||
1434 | int r1a = pp[0], c1a = pp[2], c2a = pp[3] ; \ | ||
1435 | int r1b = pp[4], c1b = pp[6] ; \ | ||
1436 | int r1r = pp[8], r2r = pp[9], c1r = pp[10], c2r = pp[11]; \ | ||
1437 | int dra = r1a - r1r; \ | ||
1438 | int dcb = c1b-c1r; \ | ||
1439 | int nk = c2a-c1a+1; \ | ||
1440 | int i,j,k; \ | ||
1441 | T t; \ | ||
1442 | for (i=r1r; i<=r2r; i++) { \ | ||
1443 | for (j=c1r; j<=c2r; j++) { \ | ||
1444 | t = 0; \ | ||
1445 | for(k=0; k<nk; k++) { \ | ||
1446 | t = M(t+M(AT(a,i+dra,k+c1a) * AT(b,k+r1b,j+dcb))); \ | ||
1447 | } \ | ||
1448 | AT(r,i,j) = M(M(b*AT(r,i,j)) + M(a*t)); \ | ||
1449 | } \ | ||
1450 | } \ | ||
1451 | OK \ | ||
1452 | } | ||
1453 | |||
1454 | #define MOD32(X) mod(X,m) | ||
1455 | #define MOD64(X) mod_l(X,m) | ||
1456 | |||
1457 | GEMM_MOD(int32_t,MOD32) | ||
1458 | GEMM_MOD(int64_t,MOD64) | ||
1459 | |||
1401 | ////////////////// sparse matrix-product /////////////////////////////////////// | 1460 | ////////////////// sparse matrix-product /////////////////////////////////////// |
1402 | 1461 | ||
1403 | 1462 | ||
diff --git a/packages/base/src/Internal/C/lapack-aux.h b/packages/base/src/Internal/C/lapack-aux.h index e4d95bc..bf8c5e9 100644 --- a/packages/base/src/Internal/C/lapack-aux.h +++ b/packages/base/src/Internal/C/lapack-aux.h | |||
@@ -59,6 +59,7 @@ typedef short ftnlen; | |||
59 | #define OQMAT(A) int A##r, int A##c, int A##Xr, int A##Xc, complex* A##p | 59 | #define OQMAT(A) int A##r, int A##c, int A##Xr, int A##Xc, complex* A##p |
60 | #define OCMAT(A) int A##r, int A##c, int A##Xr, int A##Xc, doublecomplex* A##p | 60 | #define OCMAT(A) int A##r, int A##c, int A##Xr, int A##Xc, doublecomplex* A##p |
61 | 61 | ||
62 | #define VECG(T,A) int A##n, T* A##p | ||
62 | #define MATG(T,A) int A##r, int A##c, int A##Xr, int A##Xc, T* A##p | 63 | #define MATG(T,A) int A##r, int A##c, int A##Xr, int A##Xc, T* A##p |
63 | 64 | ||
64 | #define KIVEC(A) int A##n, const int*A##p | 65 | #define KIVEC(A) int A##n, const int*A##p |