summaryrefslogtreecommitdiff
path: root/packages/base/src/Internal/C/lapack-aux.c
diff options
context:
space:
mode:
authorAlberto Ruiz <aruiz@um.es>2015-06-17 13:02:40 +0200
committerAlberto Ruiz <aruiz@um.es>2015-06-17 13:02:40 +0200
commit61d90ff66af8bfe53ef8cdda8dfe1e70463c213c (patch)
tree79ca6e2024731708a4378ff691b5317d8da11d11 /packages/base/src/Internal/C/lapack-aux.c
parente7d2916f78b5c140738fc4f4f95c9b13c1768293 (diff)
gemmm
Diffstat (limited to 'packages/base/src/Internal/C/lapack-aux.c')
-rw-r--r--packages/base/src/Internal/C/lapack-aux.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/packages/base/src/Internal/C/lapack-aux.c b/packages/base/src/Internal/C/lapack-aux.c
index 2843ab5..4d48594 100644
--- a/packages/base/src/Internal/C/lapack-aux.c
+++ b/packages/base/src/Internal/C/lapack-aux.c
@@ -1398,6 +1398,65 @@ ROWOP(int64_t)
1398ROWOP_MOD(int32_t,mod) 1398ROWOP_MOD(int32_t,mod)
1399ROWOP_MOD(int64_t,mod_l) 1399ROWOP_MOD(int64_t,mod_l)
1400 1400
1401/////////////////////////////// inplace GEMM ////////////////////////////////
1402
1403#define GEMM(T) int gemm_##T(VECG(T,c),VECG(int,p),MATG(T,a),MATG(T,b),MATG(T,r)) { \
1404 T a = cp[0], b = cp[1]; \
1405 int r1a = pp[0], c1a = pp[2], c2a = pp[3] ; \
1406 int r1b = pp[4], c1b = pp[6] ; \
1407 int r1r = pp[8], r2r = pp[9], c1r = pp[10], c2r = pp[11]; \
1408 int dra = r1a - r1r; \
1409 int dcb = c1b-c1r; \
1410 int nk = c2a-c1a+1; \
1411 int i,j,k; \
1412 T t; \
1413 for (i=r1r; i<=r2r; i++) { \
1414 for (j=c1r; j<=c2r; j++) { \
1415 t = 0; \
1416 for(k=0; k<nk; k++) { \
1417 t += AT(a,i+dra,k+c1a) * AT(b,k+r1b,j+dcb); \
1418 } \
1419 AT(r,i,j) = b*AT(r,i,j) + a*t; \
1420 } \
1421 } \
1422 OK \
1423}
1424
1425GEMM(double)
1426GEMM(float)
1427GEMM(TCD)
1428GEMM(TCF)
1429GEMM(int32_t)
1430GEMM(int64_t)
1431
1432#define GEMM_MOD(T,M) int gemm_mod_##T(T m, VECG(T,c),VECG(int,p),MATG(T,a),MATG(T,b),MATG(T,r)) { \
1433 T a = cp[0], b = cp[1]; \
1434 int r1a = pp[0], c1a = pp[2], c2a = pp[3] ; \
1435 int r1b = pp[4], c1b = pp[6] ; \
1436 int r1r = pp[8], r2r = pp[9], c1r = pp[10], c2r = pp[11]; \
1437 int dra = r1a - r1r; \
1438 int dcb = c1b-c1r; \
1439 int nk = c2a-c1a+1; \
1440 int i,j,k; \
1441 T t; \
1442 for (i=r1r; i<=r2r; i++) { \
1443 for (j=c1r; j<=c2r; j++) { \
1444 t = 0; \
1445 for(k=0; k<nk; k++) { \
1446 t = M(t+M(AT(a,i+dra,k+c1a) * AT(b,k+r1b,j+dcb))); \
1447 } \
1448 AT(r,i,j) = M(M(b*AT(r,i,j)) + M(a*t)); \
1449 } \
1450 } \
1451 OK \
1452}
1453
1454#define MOD32(X) mod(X,m)
1455#define MOD64(X) mod_l(X,m)
1456
1457GEMM_MOD(int32_t,MOD32)
1458GEMM_MOD(int64_t,MOD64)
1459
1401////////////////// sparse matrix-product /////////////////////////////////////// 1460////////////////// sparse matrix-product ///////////////////////////////////////
1402 1461
1403 1462