13 files changed, 3617 insertions, 0 deletions
diff --git a/packages/base/src/C/lapack-aux.c b/packages/base/src/C/lapack-aux.c
new file mode 100644
index 0000000..e5e45ef
--- /dev/null
+++ b/packages/base/src/C/lapack-aux.c
@@ -0,0 +1,1489 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+#include "lapack-aux.h"
+#define MACRO(B) do {B} while (0)
+#define ERROR(CODE) MACRO(return CODE;)
+#define REQUIRES(COND, CODE) MACRO(if(!(COND)) {ERROR(CODE);})
+#define MIN(A,B) ((A)<(B)?(A):(B))
+#define MAX(A,B) ((A)>(B)?(A):(B))
+// #define DBGL
+#ifdef DBGL
+#define DEBUGMSG(M) printf("\nLAPACK "M"\n");
+#else
+#define DEBUGMSG(M)
+#endif
+#define OK return 0;
+// #ifdef DBGL
+// #define DEBUGMSG(M) printf("LAPACK Wrapper "M"\n: "); size_t t0 = time(NULL);
+// #define OK MACRO(printf("%ld s\n",time(0)-t0); return 0;);
+// #else
+// #define DEBUGMSG(M)
+// #define OK return 0;
+// #endif
+#define TRACEMAT(M) {int q; printf(" %d x %d: ",M##r,M##c); \
+                     for(q=0;q<M##r*M##c;q++) printf("%.1f ",M##p[q]); printf("\n");}
+#define CHECK(RES,CODE) MACRO(if(RES) return CODE;)
+#define BAD_SIZE 2000
+#define BAD_CODE 2001
+#define MEM      2002
+#define BAD_FILE 2003
+#define SINGULAR 2004
+#define NOCONVER 2005
+#define NODEFPOS 2006
+#define NOSPRTD  2007
+//---------------------------------------
+void asm_finit() {
+#ifdef i386
+//  asm("finit");
+    static unsigned char buf[108];
+    asm("FSAVE %0":"=m" (buf));
+    #if FPUDEBUG
+    if(buf[8]!=255 || buf[9]!=255) {  // print warning in red
+        printf("%c[;31mWarning: FPU TAG = %x %x\%c[0m\n",0x1B,buf[8],buf[9],0x1B);
+    }
+    #endif
+    #if NANDEBUG
+    asm("FRSTOR %0":"=m" (buf));
+    #endif
+#endif
+}
+//---------------------------------------
+#if NANDEBUG
+#define CHECKNANR(M,msg)                     \
+{ int k;                                     \
+for(k=0; k<(M##r * M##c); k++) {             \
+    if(M##p[k] != M##p[k]) {                 \
+        printf(msg);                         \
+        TRACEMAT(M)                          \
+        /*exit(1);*/                         \
+    }                                        \
+}                                            \
+}
+#define CHECKNANC(M,msg)                     \
+{ int k;                                     \
+for(k=0; k<(M##r * M##c); k++) {             \
+    if(  M##p[k].r != M##p[k].r              \
+      || M##p[k].i != M##p[k].i) {           \
+        printf(msg);                         \
+        /*exit(1);*/                         \
+    }                                        \
+}                                            \
+}
+#else
+#define CHECKNANC(M,msg)
+#define CHECKNANR(M,msg)
+#endif
+//---------------------------------------
+//////////////////// real svd ////////////////////////////////////
+/* Subroutine */ int dgesvd_(char *jobu, char *jobvt, integer *m, integer *n,
+        doublereal *a, integer *lda, doublereal *s, doublereal *u, integer *
+        ldu, doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
+        integer *info);
+int svd_l_R(KDMAT(a),DMAT(u), DVEC(s),DMAT(v)) {
+    integer m = ar;
+    integer n = ac;
+    integer q = MIN(m,n);
+    REQUIRES(sn==q,BAD_SIZE);
+    REQUIRES(up==NULL || (ur==m && (uc==m || uc==q)),BAD_SIZE);
+    char* jobu  = "A";
+    if (up==NULL) {
+        jobu = "N";
+    } else {
+        if (uc==q) {
+            jobu = "S";
+        }
+    }
+    REQUIRES(vp==NULL || (vc==n && (vr==n || vr==q)),BAD_SIZE);
+    char* jobvt  = "A";
+    integer ldvt = n;
+    if (vp==NULL) {
+        jobvt = "N";
+    } else {
+        if (vr==q) {
+            jobvt = "S";
+            ldvt = q;
+        }
+    }
+    DEBUGMSG("svd_l_R");
+    double *B = (double*)malloc(m*n*sizeof(double));
+    CHECK(!B,MEM);
+    memcpy(B,ap,m*n*sizeof(double));
+    integer lwork = -1;
+    integer res;
+    // ask for optimal lwork
+    double ans;
+    dgesvd_ (jobu,jobvt,
+             &m,&n,B,&m,
+             sp,
+             up,&m,
+             vp,&ldvt,
+             &ans, &lwork,
+             &res);
+    lwork = ceil(ans);
+    double * work = (double*)malloc(lwork*sizeof(double));
+    CHECK(!work,MEM);
+    dgesvd_ (jobu,jobvt,
+             &m,&n,B,&m,
+             sp,
+             up,&m,
+             vp,&ldvt,
+             work, &lwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    free(B);
+    OK
+}
+// (alternative version)
+/* Subroutine */ int dgesdd_(char *jobz, integer *m, integer *n, doublereal *
+        a, integer *lda, doublereal *s, doublereal *u, integer *ldu,
+        doublereal *vt, integer *ldvt, doublereal *work, integer *lwork,
+        integer *iwork, integer *info);
+int svd_l_Rdd(KDMAT(a),DMAT(u), DVEC(s),DMAT(v)) {
+    integer m = ar;
+    integer n = ac;
+    integer q = MIN(m,n);
+    REQUIRES(sn==q,BAD_SIZE);
+    REQUIRES((up == NULL && vp == NULL)
+             || (ur==m && vc==n
+                &&   ((uc == q && vr == q)
+                   || (uc == m && vc==n))),BAD_SIZE);
+    char* jobz  = "A";
+    integer ldvt = n;
+    if (up==NULL) {
+        jobz = "N";
+    } else {
+        if (uc==q && vr == q) {
+            jobz = "S";
+            ldvt = q;
+        }
+    }
+    DEBUGMSG("svd_l_Rdd");
+    double *B = (double*)malloc(m*n*sizeof(double));
+    CHECK(!B,MEM);
+    memcpy(B,ap,m*n*sizeof(double));
+    integer* iwk = (integer*) malloc(8*q*sizeof(integer));
+    CHECK(!iwk,MEM);
+    integer lwk = -1;
+    integer res;
+    // ask for optimal lwk
+    double ans;
+    dgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,&ans,&lwk,iwk,&res);
+    lwk = ans;
+    double * workv = (double*)malloc(lwk*sizeof(double));
+    CHECK(!workv,MEM);
+    dgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,workv,&lwk,iwk,&res);
+    CHECK(res,res);
+    free(iwk);
+    free(workv);
+    free(B);
+    OK
+}
+//////////////////// complex svd ////////////////////////////////////
+// not in clapack.h
+int zgesvd_(char *jobu, char *jobvt, integer *m, integer *n,
+    doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u,
+    integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work,
+    integer *lwork, doublereal *rwork, integer *info);
+int svd_l_C(KCMAT(a),CMAT(u), DVEC(s),CMAT(v)) {
+    integer m = ar;
+    integer n = ac;
+    integer q = MIN(m,n);
+    REQUIRES(sn==q,BAD_SIZE);
+    REQUIRES(up==NULL || (ur==m && (uc==m || uc==q)),BAD_SIZE);
+    char* jobu  = "A";
+    if (up==NULL) {
+        jobu = "N";
+    } else {
+        if (uc==q) {
+            jobu = "S";
+        }
+    }
+    REQUIRES(vp==NULL || (vc==n && (vr==n || vr==q)),BAD_SIZE);
+    char* jobvt  = "A";
+    integer ldvt = n;
+    if (vp==NULL) {
+        jobvt = "N";
+    } else {
+        if (vr==q) {
+            jobvt = "S";
+            ldvt = q;
+        }
+    }DEBUGMSG("svd_l_C");
+    doublecomplex *B = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
+    CHECK(!B,MEM);
+    memcpy(B,ap,m*n*sizeof(doublecomplex));
+    double *rwork = (double*) malloc(5*q*sizeof(double));
+    CHECK(!rwork,MEM);
+    integer lwork = -1;
+    integer res;
+    // ask for optimal lwork
+    doublecomplex ans;
+    zgesvd_ (jobu,jobvt,
+             &m,&n,B,&m,
+             sp,
+             up,&m,
+             vp,&ldvt,
+             &ans, &lwork,
+             rwork,
+             &res);
+    lwork = ceil(ans.r);
+    doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    CHECK(!work,MEM);
+    zgesvd_ (jobu,jobvt,
+             &m,&n,B,&m,
+             sp,
+             up,&m,
+             vp,&ldvt,
+             work, &lwork,
+             rwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    free(rwork);
+    free(B);
+    OK
+}
+int zgesdd_ (char *jobz, integer *m, integer *n,
+    doublecomplex *a, integer *lda, doublereal *s, doublecomplex *u,
+    integer *ldu, doublecomplex *vt, integer *ldvt, doublecomplex *work,
+    integer *lwork, doublereal *rwork, integer* iwork, integer *info);
+int svd_l_Cdd(KCMAT(a),CMAT(u), DVEC(s),CMAT(v)) {
+    //printf("entro\n");
+    integer m = ar;
+    integer n = ac;
+    integer q = MIN(m,n);
+    REQUIRES(sn==q,BAD_SIZE);
+    REQUIRES((up == NULL && vp == NULL)
+             || (ur==m && vc==n
+                &&   ((uc == q && vr == q)
+                   || (uc == m && vc==n))),BAD_SIZE);
+    char* jobz  = "A";
+    integer ldvt = n;
+    if (up==NULL) {
+        jobz = "N";
+    } else {
+        if (uc==q && vr == q) {
+            jobz = "S";
+            ldvt = q;
+        }
+    }
+    DEBUGMSG("svd_l_Cdd");
+    doublecomplex *B = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
+    CHECK(!B,MEM);
+    memcpy(B,ap,m*n*sizeof(doublecomplex));
+    integer* iwk = (integer*) malloc(8*q*sizeof(integer));
+    CHECK(!iwk,MEM);
+    int lrwk;
+    if (0 && *jobz == 'N') {
+        lrwk = 5*q; // does not work, crash at free below
+    } else {
+        lrwk = 5*q*q + 7*q;
+    }
+    double *rwk = (double*)malloc(lrwk*sizeof(double));;
+    CHECK(!rwk,MEM);
+    //printf("%s %ld %d\n",jobz,q,lrwk);
+    integer lwk = -1;
+    integer res;
+    // ask for optimal lwk
+    doublecomplex ans;
+    zgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,&ans,&lwk,rwk,iwk,&res);
+    lwk = ans.r;
+    //printf("lwk = %ld\n",lwk);
+    doublecomplex * workv = (doublecomplex*)malloc(lwk*sizeof(doublecomplex));
+    CHECK(!workv,MEM);
+    zgesdd_ (jobz,&m,&n,B,&m,sp,up,&m,vp,&ldvt,workv,&lwk,rwk,iwk,&res);
+    //printf("res = %ld\n",res);
+    CHECK(res,res);
+    free(workv); // printf("freed workv\n");
+    free(rwk);   // printf("freed rwk\n");
+    free(iwk);   // printf("freed iwk\n");
+    free(B);     // printf("freed B, salgo\n");
+    OK
+}
+//////////////////// general complex eigensystem ////////////
+/* Subroutine */ int zgeev_(char *jobvl, char *jobvr, integer *n,
+        doublecomplex *a, integer *lda, doublecomplex *w, doublecomplex *vl,
+        integer *ldvl, doublecomplex *vr, integer *ldvr, doublecomplex *work,
+        integer *lwork, doublereal *rwork, integer *info);
+int eig_l_C(KCMAT(a), CMAT(u), CVEC(s),CMAT(v)) {
+    integer n = ar;
+    REQUIRES(ac==n && sn==n, BAD_SIZE);
+    REQUIRES(up==NULL || (ur==n && uc==n), BAD_SIZE);
+    char jobvl = up==NULL?'N':'V';
+    REQUIRES(vp==NULL || (vr==n && vc==n), BAD_SIZE);
+    char jobvr = vp==NULL?'N':'V';
+    DEBUGMSG("eig_l_C");
+    doublecomplex *B = (doublecomplex*)malloc(n*n*sizeof(doublecomplex));
+    CHECK(!B,MEM);
+    memcpy(B,ap,n*n*sizeof(doublecomplex));
+    double *rwork = (double*) malloc(2*n*sizeof(double));
+    CHECK(!rwork,MEM);
+    integer lwork = -1;
+    integer res;
+    // ask for optimal lwork
+    doublecomplex ans;
+    //printf("ask zgeev\n");
+    zgeev_  (&jobvl,&jobvr,
+             &n,B,&n,
+             sp,
+             up,&n,
+             vp,&n,
+             &ans, &lwork,
+             rwork,
+             &res);
+    lwork = ceil(ans.r);
+    //printf("ans = %d\n",lwork);
+    doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    CHECK(!work,MEM);
+    //printf("zgeev\n");
+    zgeev_  (&jobvl,&jobvr,
+             &n,B,&n,
+             sp,
+             up,&n,
+             vp,&n,
+             work, &lwork,
+             rwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    free(rwork);
+    free(B);
+    OK
+}
+//////////////////// general real eigensystem ////////////
+/* Subroutine */ int dgeev_(char *jobvl, char *jobvr, integer *n, doublereal *
+        a, integer *lda, doublereal *wr, doublereal *wi, doublereal *vl,
+        integer *ldvl, doublereal *vr, integer *ldvr, doublereal *work,
+        integer *lwork, integer *info);
+int eig_l_R(KDMAT(a),DMAT(u), CVEC(s),DMAT(v)) {
+    integer n = ar;
+    REQUIRES(ac==n && sn==n, BAD_SIZE);
+    REQUIRES(up==NULL || (ur==n && uc==n), BAD_SIZE);
+    char jobvl = up==NULL?'N':'V';
+    REQUIRES(vp==NULL || (vr==n && vc==n), BAD_SIZE);
+    char jobvr = vp==NULL?'N':'V';
+    DEBUGMSG("eig_l_R");
+    double *B = (double*)malloc(n*n*sizeof(double));
+    CHECK(!B,MEM);
+    memcpy(B,ap,n*n*sizeof(double));
+    integer lwork = -1;
+    integer res;
+    // ask for optimal lwork
+    double ans;
+    //printf("ask dgeev\n");
+    dgeev_  (&jobvl,&jobvr,
+             &n,B,&n,
+             (double*)sp, (double*)sp+n,
+             up,&n,
+             vp,&n,
+             &ans, &lwork,
+             &res);
+    lwork = ceil(ans);
+    //printf("ans = %d\n",lwork);
+    double * work = (double*)malloc(lwork*sizeof(double));
+    CHECK(!work,MEM);
+    //printf("dgeev\n");
+    dgeev_  (&jobvl,&jobvr,
+             &n,B,&n,
+             (double*)sp, (double*)sp+n,
+             up,&n,
+             vp,&n,
+             work, &lwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    free(B);
+    OK
+}
+//////////////////// symmetric real eigensystem ////////////
+/* Subroutine */ int dsyev_(char *jobz, char *uplo, integer *n, doublereal *a,
+         integer *lda, doublereal *w, doublereal *work, integer *lwork,
+        integer *info);
+int eig_l_S(int wantV,KDMAT(a),DVEC(s),DMAT(v)) {
+    integer n = ar;
+    REQUIRES(ac==n && sn==n, BAD_SIZE);
+    REQUIRES(vr==n && vc==n, BAD_SIZE);
+    char jobz = wantV?'V':'N';
+    DEBUGMSG("eig_l_S");
+    memcpy(vp,ap,n*n*sizeof(double));
+    integer lwork = -1;
+    char uplo = 'U';
+    integer res;
+    // ask for optimal lwork
+    double ans;
+    //printf("ask dsyev\n");
+    dsyev_  (&jobz,&uplo,
+             &n,vp,&n,
+             sp,
+             &ans, &lwork,
+             &res);
+    lwork = ceil(ans);
+    //printf("ans = %d\n",lwork);
+    double * work = (double*)malloc(lwork*sizeof(double));
+    CHECK(!work,MEM);
+    dsyev_  (&jobz,&uplo,
+             &n,vp,&n,
+             sp,
+             work, &lwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    OK
+}
+//////////////////// hermitian complex eigensystem ////////////
+/* Subroutine */ int zheev_(char *jobz, char *uplo, integer *n, doublecomplex
+        *a, integer *lda, doublereal *w, doublecomplex *work, integer *lwork,
+        doublereal *rwork, integer *info);
+int eig_l_H(int wantV,KCMAT(a),DVEC(s),CMAT(v)) {
+    integer n = ar;
+    REQUIRES(ac==n && sn==n, BAD_SIZE);
+    REQUIRES(vr==n && vc==n, BAD_SIZE);
+    char jobz = wantV?'V':'N';
+    DEBUGMSG("eig_l_H");
+    memcpy(vp,ap,2*n*n*sizeof(double));
+    double *rwork = (double*) malloc((3*n-2)*sizeof(double));
+    CHECK(!rwork,MEM);
+    integer lwork = -1;
+    char uplo = 'U';
+    integer res;
+    // ask for optimal lwork
+    doublecomplex ans;
+    //printf("ask zheev\n");
+    zheev_  (&jobz,&uplo,
+             &n,vp,&n,
+             sp,
+             &ans, &lwork,
+             rwork,
+             &res);
+    lwork = ceil(ans.r);
+    //printf("ans = %d\n",lwork);
+    doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    CHECK(!work,MEM);
+    zheev_  (&jobz,&uplo,
+             &n,vp,&n,
+             sp,
+             work, &lwork,
+             rwork,
+             &res);
+    CHECK(res,res);
+    free(work);
+    free(rwork);
+    OK
+}
+//////////////////// general real linear system ////////////
+/* Subroutine */ int dgesv_(integer *n, integer *nrhs, doublereal *a, integer
+        *lda, integer *ipiv, doublereal *b, integer *ldb, integer *info);
+int linearSolveR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
+    integer n = ar;
+    integer nhrs = bc;
+    REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
+    DEBUGMSG("linearSolveR_l");
+    double*AC = (double*)malloc(n*n*sizeof(double));
+    memcpy(AC,ap,n*n*sizeof(double));
+    memcpy(xp,bp,n*nhrs*sizeof(double));
+    integer * ipiv = (integer*)malloc(n*sizeof(integer));
+    integer res;
+    dgesv_  (&n,&nhrs,
+             AC, &n,
+             ipiv,
+             xp, &n,
+             &res);
+    if(res>0) {
+        return SINGULAR;
+    }
+    CHECK(res,res);
+    free(ipiv);
+    free(AC);
+    OK
+}
+//////////////////// general complex linear system ////////////
+/* Subroutine */ int zgesv_(integer *n, integer *nrhs, doublecomplex *a,
+        integer *lda, integer *ipiv, doublecomplex *b, integer *ldb, integer *
+        info);
+int linearSolveC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
+    integer n = ar;
+    integer nhrs = bc;
+    REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
+    DEBUGMSG("linearSolveC_l");
+    doublecomplex*AC = (doublecomplex*)malloc(n*n*sizeof(doublecomplex));
+    memcpy(AC,ap,n*n*sizeof(doublecomplex));
+    memcpy(xp,bp,n*nhrs*sizeof(doublecomplex));
+    integer * ipiv = (integer*)malloc(n*sizeof(integer));
+    integer res;
+    zgesv_  (&n,&nhrs,
+             AC, &n,
+             ipiv,
+             xp, &n,
+             &res);
+    if(res>0) {
+        return SINGULAR;
+    }
+    CHECK(res,res);
+    free(ipiv);
+    free(AC);
+    OK
+}
+//////// symmetric positive definite real linear system using Cholesky ////////////
+/* Subroutine */ int dpotrs_(char *uplo, integer *n, integer *nrhs,
+        doublereal *a, integer *lda, doublereal *b, integer *ldb, integer *
+        info);
+int cholSolveR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
+    integer n = ar;
+    integer nhrs = bc;
+    REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
+    DEBUGMSG("cholSolveR_l");
+    memcpy(xp,bp,n*nhrs*sizeof(double));
+    integer res;
+    dpotrs_ ("U",
+             &n,&nhrs,
+             (double*)ap, &n,
+             xp, &n,
+             &res);
+    CHECK(res,res);
+    OK
+}
+//////// Hermitian positive definite real linear system using Cholesky ////////////
+/* Subroutine */ int zpotrs_(char *uplo, integer *n, integer *nrhs,
+        doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
+        integer *info);
+int cholSolveC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
+    integer n = ar;
+    integer nhrs = bc;
+    REQUIRES(n>=1 && ar==ac && ar==br,BAD_SIZE);
+    DEBUGMSG("cholSolveC_l");
+    memcpy(xp,bp,n*nhrs*sizeof(doublecomplex));
+    integer res;
+    zpotrs_  ("U",
+             &n,&nhrs,
+             (doublecomplex*)ap, &n,
+             xp, &n,
+             &res);
+    CHECK(res,res);
+    OK
+}
+//////////////////// least squares real linear system ////////////
+/* Subroutine */ int dgels_(char *trans, integer *m, integer *n, integer *
+        nrhs, doublereal *a, integer *lda, doublereal *b, integer *ldb,
+        doublereal *work, integer *lwork, integer *info);
+int linearSolveLSR_l(KDMAT(a),KDMAT(b),DMAT(x)) {
+    integer m = ar;
+    integer n = ac;
+    integer nrhs = bc;
+    integer ldb = xr;
+    REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
+    DEBUGMSG("linearSolveLSR_l");
+    double*AC = (double*)malloc(m*n*sizeof(double));
+    memcpy(AC,ap,m*n*sizeof(double));
+    if (m>=n) {
+        memcpy(xp,bp,m*nrhs*sizeof(double));
+    } else {
+        int k;
+        for(k = 0; k<nrhs; k++) {
+            memcpy(xp+ldb*k,bp+m*k,m*sizeof(double));
+        }
+    }
+    integer res;
+    integer lwork = -1;
+    double ans;
+    dgels_  ("N",&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             &ans,&lwork,
+             &res);
+    lwork = ceil(ans);
+    //printf("ans = %d\n",lwork);
+    double * work = (double*)malloc(lwork*sizeof(double));
+    dgels_  ("N",&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             work,&lwork,
+             &res);
+    if(res>0) {
+        return SINGULAR;
+    }
+    CHECK(res,res);
+    free(work);
+    free(AC);
+    OK
+}
+//////////////////// least squares complex linear system ////////////
+/* Subroutine */ int zgels_(char *trans, integer *m, integer *n, integer *
+        nrhs, doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb,
+        doublecomplex *work, integer *lwork, integer *info);
+int linearSolveLSC_l(KCMAT(a),KCMAT(b),CMAT(x)) {
+    integer m = ar;
+    integer n = ac;
+    integer nrhs = bc;
+    integer ldb = xr;
+    REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
+    DEBUGMSG("linearSolveLSC_l");
+    doublecomplex*AC = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
+    memcpy(AC,ap,m*n*sizeof(doublecomplex));
+    if (m>=n) {
+        memcpy(xp,bp,m*nrhs*sizeof(doublecomplex));
+    } else {
+        int k;
+        for(k = 0; k<nrhs; k++) {
+            memcpy(xp+ldb*k,bp+m*k,m*sizeof(doublecomplex));
+        }
+    }
+    integer res;
+    integer lwork = -1;
+    doublecomplex ans;
+    zgels_  ("N",&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             &ans,&lwork,
+             &res);
+    lwork = ceil(ans.r);
+    //printf("ans = %d\n",lwork);
+    doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    zgels_  ("N",&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             work,&lwork,
+             &res);
+    if(res>0) {
+        return SINGULAR;
+    }
+    CHECK(res,res);
+    free(work);
+    free(AC);
+    OK
+}
+//////////////////// least squares real linear system using SVD ////////////
+/* Subroutine */ int dgelss_(integer *m, integer *n, integer *nrhs,
+        doublereal *a, integer *lda, doublereal *b, integer *ldb, doublereal *
+        s, doublereal *rcond, integer *rank, doublereal *work, integer *lwork,
+         integer *info);
+int linearSolveSVDR_l(double rcond,KDMAT(a),KDMAT(b),DMAT(x)) {
+    integer m = ar;
+    integer n = ac;
+    integer nrhs = bc;
+    integer ldb = xr;
+    REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
+    DEBUGMSG("linearSolveSVDR_l");
+    double*AC = (double*)malloc(m*n*sizeof(double));
+    double*S = (double*)malloc(MIN(m,n)*sizeof(double));
+    memcpy(AC,ap,m*n*sizeof(double));
+    if (m>=n) {
+        memcpy(xp,bp,m*nrhs*sizeof(double));
+    } else {
+        int k;
+        for(k = 0; k<nrhs; k++) {
+            memcpy(xp+ldb*k,bp+m*k,m*sizeof(double));
+        }
+    }
+    integer res;
+    integer lwork = -1;
+    integer rank;
+    double ans;
+    dgelss_  (&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             S,
+             &rcond,&rank,
+             &ans,&lwork,
+             &res);
+    lwork = ceil(ans);
+    //printf("ans = %d\n",lwork);
+    double * work = (double*)malloc(lwork*sizeof(double));
+    dgelss_  (&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             S,
+             &rcond,&rank,
+             work,&lwork,
+             &res);
+    if(res>0) {
+        return NOCONVER;
+    }
+    CHECK(res,res);
+    free(work);
+    free(S);
+    free(AC);
+    OK
+}
+//////////////////// least squares complex linear system using SVD ////////////
+// not in clapack.h
+int zgelss_(integer *m, integer *n, integer *nhrs,
+    doublecomplex *a, integer *lda, doublecomplex *b, integer *ldb, doublereal *s,
+    doublereal *rcond, integer* rank,
+    doublecomplex *work, integer* lwork, doublereal* rwork,
+    integer *info);
+int linearSolveSVDC_l(double rcond, KCMAT(a),KCMAT(b),CMAT(x)) {
+    integer m = ar;
+    integer n = ac;
+    integer nrhs = bc;
+    integer ldb = xr;
+    REQUIRES(m>=1 && n>=1 && ar==br && xr==MAX(m,n) && xc == bc, BAD_SIZE);
+    DEBUGMSG("linearSolveSVDC_l");
+    doublecomplex*AC = (doublecomplex*)malloc(m*n*sizeof(doublecomplex));
+    double*S = (double*)malloc(MIN(m,n)*sizeof(double));
+    double*RWORK = (double*)malloc(5*MIN(m,n)*sizeof(double));
+    memcpy(AC,ap,m*n*sizeof(doublecomplex));
+    if (m>=n) {
+        memcpy(xp,bp,m*nrhs*sizeof(doublecomplex));
+    } else {
+        int k;
+        for(k = 0; k<nrhs; k++) {
+            memcpy(xp+ldb*k,bp+m*k,m*sizeof(doublecomplex));
+        }
+    }
+    integer res;
+    integer lwork = -1;
+    integer rank;
+    doublecomplex ans;
+    zgelss_  (&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             S,
+             &rcond,&rank,
+             &ans,&lwork,
+             RWORK,
+             &res);
+    lwork = ceil(ans.r);
+    //printf("ans = %d\n",lwork);
+    doublecomplex * work = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    zgelss_  (&m,&n,&nrhs,
+             AC,&m,
+             xp,&ldb,
+             S,
+             &rcond,&rank,
+             work,&lwork,
+             RWORK,
+             &res);
+    if(res>0) {
+        return NOCONVER;
+    }
+    CHECK(res,res);
+    free(work);
+    free(RWORK);
+    free(S);
+    free(AC);
+    OK
+}
+//////////////////// Cholesky factorization /////////////////////////
+/* Subroutine */ int zpotrf_(char *uplo, integer *n, doublecomplex *a,
+        integer *lda, integer *info);
+int chol_l_H(KCMAT(a),CMAT(l)) {
+    integer n = ar;
+    REQUIRES(n>=1 && ac == n && lr==n && lc==n,BAD_SIZE);
+    DEBUGMSG("chol_l_H");
+    memcpy(lp,ap,n*n*sizeof(doublecomplex));
+    char uplo = 'U';
+    integer res;
+    zpotrf_ (&uplo,&n,lp,&n,&res);
+    CHECK(res>0,NODEFPOS);
+    CHECK(res,res);
+    doublecomplex zero = {0.,0.};
+    int r,c;
+    for (r=0; r<lr-1; r++) {
+        for(c=r+1; c<lc; c++) {
+            lp[r*lc+c] = zero;
+        }
+    }
+    OK
+}
+/* Subroutine */ int dpotrf_(char *uplo, integer *n, doublereal *a, integer *
+        lda, integer *info);
+int chol_l_S(KDMAT(a),DMAT(l)) {
+    integer n = ar;
+    REQUIRES(n>=1 && ac == n && lr==n && lc==n,BAD_SIZE);
+    DEBUGMSG("chol_l_S");
+    memcpy(lp,ap,n*n*sizeof(double));
+    char uplo = 'U';
+    integer res;
+    dpotrf_ (&uplo,&n,lp,&n,&res);
+    CHECK(res>0,NODEFPOS);
+    CHECK(res,res);
+    int r,c;
+    for (r=0; r<lr-1; r++) {
+        for(c=r+1; c<lc; c++) {
+            lp[r*lc+c] = 0.;
+        }
+    }
+    OK
+}
+//////////////////// QR factorization /////////////////////////
+/* Subroutine */ int dgeqr2_(integer *m, integer *n, doublereal *a, integer *
+        lda, doublereal *tau, doublereal *work, integer *info);
+int qr_l_R(KDMAT(a), DVEC(tau), DMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n >=1 && rr== m && rc == n && taun == mn, BAD_SIZE);
+    DEBUGMSG("qr_l_R");
+    double *WORK = (double*)malloc(n*sizeof(double));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*n*sizeof(double));
+    integer res;
+    dgeqr2_ (&m,&n,rp,&m,taup,WORK,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+/* Subroutine */ int zgeqr2_(integer *m, integer *n, doublecomplex *a,
+        integer *lda, doublecomplex *tau, doublecomplex *work, integer *info);
+int qr_l_C(KCMAT(a), CVEC(tau), CMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n >=1 && rr== m && rc == n && taun == mn, BAD_SIZE);
+    DEBUGMSG("qr_l_C");
+    doublecomplex *WORK = (doublecomplex*)malloc(n*sizeof(doublecomplex));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*n*sizeof(doublecomplex));
+    integer res;
+    zgeqr2_ (&m,&n,rp,&m,taup,WORK,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+/* Subroutine */ int dorgqr_(integer *m, integer *n, integer *k, doublereal *
+        a, integer *lda, doublereal *tau, doublereal *work, integer *lwork,
+        integer *info);
+int c_dorgqr(KDMAT(a), KDVEC(tau), DMAT(r)) {
+    integer m = ar;
+    integer n = MIN(ac,ar);
+    integer k = taun;
+    DEBUGMSG("c_dorgqr");
+    integer lwork = 8*n; // FIXME
+    double *WORK = (double*)malloc(lwork*sizeof(double));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*k*sizeof(double));
+    integer res;
+    dorgqr_ (&m,&n,&k,rp,&m,(double*)taup,WORK,&lwork,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+/* Subroutine */ int zungqr_(integer *m, integer *n, integer *k,
+        doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+        work, integer *lwork, integer *info);
+int c_zungqr(KCMAT(a), KCVEC(tau), CMAT(r)) {
+    integer m = ar;
+    integer n = MIN(ac,ar);
+    integer k = taun;
+    DEBUGMSG("z_ungqr");
+    integer lwork = 8*n; // FIXME
+    doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*k*sizeof(doublecomplex));
+    integer res;
+    zungqr_ (&m,&n,&k,rp,&m,(doublecomplex*)taup,WORK,&lwork,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+//////////////////// Hessenberg factorization /////////////////////////
+/* Subroutine */ int dgehrd_(integer *n, integer *ilo, integer *ihi,
+        doublereal *a, integer *lda, doublereal *tau, doublereal *work,
+        integer *lwork, integer *info);
+int hess_l_R(KDMAT(a), DVEC(tau), DMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n == m && rr== m && rc == n && taun == mn-1, BAD_SIZE);
+    DEBUGMSG("hess_l_R");
+    integer lwork = 5*n; // fixme
+    double *WORK = (double*)malloc(lwork*sizeof(double));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*n*sizeof(double));
+    integer res;
+    integer one = 1;
+    dgehrd_ (&n,&one,&n,rp,&n,taup,WORK,&lwork,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+/* Subroutine */ int zgehrd_(integer *n, integer *ilo, integer *ihi,
+        doublecomplex *a, integer *lda, doublecomplex *tau, doublecomplex *
+        work, integer *lwork, integer *info);
+int hess_l_C(KCMAT(a), CVEC(tau), CMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n == m && rr== m && rc == n && taun == mn-1, BAD_SIZE);
+    DEBUGMSG("hess_l_C");
+    integer lwork = 5*n; // fixme
+    doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    CHECK(!WORK,MEM);
+    memcpy(rp,ap,m*n*sizeof(doublecomplex));
+    integer res;
+    integer one = 1;
+    zgehrd_ (&n,&one,&n,rp,&n,taup,WORK,&lwork,&res);
+    CHECK(res,res);
+    free(WORK);
+    OK
+}
+//////////////////// Schur factorization /////////////////////////
+/* Subroutine */ int dgees_(char *jobvs, char *sort, L_fp select, integer *n,
+        doublereal *a, integer *lda, integer *sdim, doublereal *wr,
+        doublereal *wi, doublereal *vs, integer *ldvs, doublereal *work,
+        integer *lwork, logical *bwork, integer *info);
+int schur_l_R(KDMAT(a), DMAT(u), DMAT(s)) {
+    integer m = ar;
+    integer n = ac;
+    REQUIRES(m>=1 && n==m && ur==n && uc==n && sr==n && sc==n, BAD_SIZE);
+    DEBUGMSG("schur_l_R");
+    //int k;
+    //printf("---------------------------\n");
+    //printf("%p: ",ap); for(k=0;k<n*n;k++) printf("%f ",ap[k]); printf("\n");
+    //printf("%p: ",up); for(k=0;k<n*n;k++) printf("%f ",up[k]); printf("\n");
+    //printf("%p: ",sp); for(k=0;k<n*n;k++) printf("%f ",sp[k]); printf("\n");
+    memcpy(sp,ap,n*n*sizeof(double));
+    integer lwork = 6*n; // fixme
+    double *WORK = (double*)malloc(lwork*sizeof(double));
+    double *WR = (double*)malloc(n*sizeof(double));
+    double *WI = (double*)malloc(n*sizeof(double));
+    // WR and WI not really required in this call
+    logical *BWORK = (logical*)malloc(n*sizeof(logical));
+    integer res;
+    integer sdim;
+    dgees_ ("V","N",NULL,&n,sp,&n,&sdim,WR,WI,up,&n,WORK,&lwork,BWORK,&res);
+    //printf("%p: ",ap); for(k=0;k<n*n;k++) printf("%f ",ap[k]); printf("\n");
+    //printf("%p: ",up); for(k=0;k<n*n;k++) printf("%f ",up[k]); printf("\n");
+    //printf("%p: ",sp); for(k=0;k<n*n;k++) printf("%f ",sp[k]); printf("\n");
+    if(res>0) {
+        return NOCONVER;
+    }
+    CHECK(res,res);
+    free(WR);
+    free(WI);
+    free(BWORK);
+    free(WORK);
+    OK
+}
+/* Subroutine */ int zgees_(char *jobvs, char *sort, L_fp select, integer *n,
+        doublecomplex *a, integer *lda, integer *sdim, doublecomplex *w,
+        doublecomplex *vs, integer *ldvs, doublecomplex *work, integer *lwork,
+         doublereal *rwork, logical *bwork, integer *info);
+int schur_l_C(KCMAT(a), CMAT(u), CMAT(s)) {
+    integer m = ar;
+    integer n = ac;
+    REQUIRES(m>=1 && n==m && ur==n && uc==n && sr==n && sc==n, BAD_SIZE);
+    DEBUGMSG("schur_l_C");
+    memcpy(sp,ap,n*n*sizeof(doublecomplex));
+    integer lwork = 6*n; // fixme
+    doublecomplex *WORK = (doublecomplex*)malloc(lwork*sizeof(doublecomplex));
+    doublecomplex *W = (doublecomplex*)malloc(n*sizeof(doublecomplex));
+    // W not really required in this call
+    logical *BWORK = (logical*)malloc(n*sizeof(logical));
+    double *RWORK = (double*)malloc(n*sizeof(double));
+    integer res;
+    integer sdim;
+    zgees_ ("V","N",NULL,&n,sp,&n,&sdim,W,
+                            up,&n,
+                            WORK,&lwork,RWORK,BWORK,&res);
+    if(res>0) {
+        return NOCONVER;
+    }
+    CHECK(res,res);
+    free(W);
+    free(BWORK);
+    free(WORK);
+    OK
+}
+//////////////////// LU factorization /////////////////////////
+/* Subroutine */ int dgetrf_(integer *m, integer *n, doublereal *a, integer *
+        lda, integer *ipiv, integer *info);
+int lu_l_R(KDMAT(a), DVEC(ipiv), DMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n >=1 && ipivn == mn, BAD_SIZE);
+    DEBUGMSG("lu_l_R");
+    integer* auxipiv = (integer*)malloc(mn*sizeof(integer));
+    memcpy(rp,ap,m*n*sizeof(double));
+    integer res;
+    dgetrf_ (&m,&n,rp,&m,auxipiv,&res);
+    if(res>0) {
+        res = 0; // fixme
+    }
+    CHECK(res,res);
+    int k;
+    for (k=0; k<mn; k++) {
+        ipivp[k] = auxipiv[k];
+    }
+    free(auxipiv);
+    OK
+}
+/* Subroutine */ int zgetrf_(integer *m, integer *n, doublecomplex *a,
+        integer *lda, integer *ipiv, integer *info);
+int lu_l_C(KCMAT(a), DVEC(ipiv), CMAT(r)) {
+    integer m = ar;
+    integer n = ac;
+    integer mn = MIN(m,n);
+    REQUIRES(m>=1 && n >=1 && ipivn == mn, BAD_SIZE);
+    DEBUGMSG("lu_l_C");
+    integer* auxipiv = (integer*)malloc(mn*sizeof(integer));
+    memcpy(rp,ap,m*n*sizeof(doublecomplex));
+    integer res;
+    zgetrf_ (&m,&n,rp,&m,auxipiv,&res);
+    if(res>0) {
+        res = 0; // fixme
+    }
+    CHECK(res,res);
+    int k;
+    for (k=0; k<mn; k++) {
+        ipivp[k] = auxipiv[k];
+    }
+    free(auxipiv);
+    OK
+}
+//////////////////// LU substitution /////////////////////////
+/* Subroutine */ int dgetrs_(char *trans, integer *n, integer *nrhs,
+        doublereal *a, integer *lda, integer *ipiv, doublereal *b, integer *
+        ldb, integer *info);
+int luS_l_R(KDMAT(a), KDVEC(ipiv), KDMAT(b), DMAT(x)) {
+  integer m = ar;
+  integer n = ac;
+  integer mrhs = br;
+  integer nrhs = bc;
+  REQUIRES(m==n && m==mrhs && m==ipivn,BAD_SIZE);
+  integer* auxipiv = (integer*)malloc(n*sizeof(integer));
+  int k;
+  for (k=0; k<n; k++) {
+    auxipiv[k] = (integer)ipivp[k];
+  }
+  integer res;
+  memcpy(xp,bp,mrhs*nrhs*sizeof(double));
+  dgetrs_ ("N",&n,&nrhs,(/*no const (!?)*/ double*)ap,&m,auxipiv,xp,&mrhs,&res);
+  CHECK(res,res);
+  free(auxipiv);
+  OK
+}
+/* Subroutine */ int zgetrs_(char *trans, integer *n, integer *nrhs,
+        doublecomplex *a, integer *lda, integer *ipiv, doublecomplex *b,
+        integer *ldb, integer *info);
+int luS_l_C(KCMAT(a), KDVEC(ipiv), KCMAT(b), CMAT(x)) {
+    integer m = ar;
+    integer n = ac;
+    integer mrhs = br;
+    integer nrhs = bc;
+    REQUIRES(m==n && m==mrhs && m==ipivn,BAD_SIZE);
+    integer* auxipiv = (integer*)malloc(n*sizeof(integer));
+    int k;
+    for (k=0; k<n; k++) {
+        auxipiv[k] = (integer)ipivp[k];
+    }
+    integer res;
+    memcpy(xp,bp,mrhs*nrhs*sizeof(doublecomplex));
+    zgetrs_ ("N",&n,&nrhs,(doublecomplex*)ap,&m,auxipiv,xp,&mrhs,&res);
+    CHECK(res,res);
+    free(auxipiv);
+    OK
+}
+//////////////////// Matrix Product /////////////////////////
+void dgemm_(char *, char *, integer *, integer *, integer *,
+           double *, const double *, integer *, const double *,
+           integer *, double *, double *, integer *);
+int multiplyR(int ta, int tb, KDMAT(a),KDMAT(b),DMAT(r)) {
+    //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
+    DEBUGMSG("dgemm_");
+    CHECKNANR(a,"NaN multR Input\n")
+    CHECKNANR(b,"NaN multR Input\n")
+    integer m = ta?ac:ar;
+    integer n = tb?br:bc;
+    integer k = ta?ar:ac;
+    integer lda = ar;
+    integer ldb = br;
+    integer ldc = rr;
+    double alpha = 1;
+    double beta = 0;
+    dgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,ap,&lda,bp,&ldb,&beta,rp,&ldc);
+    CHECKNANR(r,"NaN multR Output\n")
+    OK
+}
+void zgemm_(char *, char *, integer *, integer *, integer *,
+           doublecomplex *, const doublecomplex *, integer *, const doublecomplex *,
+           integer *, doublecomplex *, doublecomplex *, integer *);
+int multiplyC(int ta, int tb, KCMAT(a),KCMAT(b),CMAT(r)) {
+    //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
+    DEBUGMSG("zgemm_");
+    CHECKNANC(a,"NaN multC Input\n")
+    CHECKNANC(b,"NaN multC Input\n")
+    integer m = ta?ac:ar;
+    integer n = tb?br:bc;
+    integer k = ta?ar:ac;
+    integer lda = ar;
+    integer ldb = br;
+    integer ldc = rr;
+    doublecomplex alpha = {1,0};
+    doublecomplex beta = {0,0};
+    zgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,
+           ap,&lda,
+           bp,&ldb,&beta,
+           rp,&ldc);
+    CHECKNANC(r,"NaN multC Output\n")
+    OK
+}
+void sgemm_(char *, char *, integer *, integer *, integer *,
+            float *, const float *, integer *, const float *,
+           integer *, float *, float *, integer *);
+int multiplyF(int ta, int tb, KFMAT(a),KFMAT(b),FMAT(r)) {
+    //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
+    DEBUGMSG("sgemm_");
+    integer m = ta?ac:ar;
+    integer n = tb?br:bc;
+    integer k = ta?ar:ac;
+    integer lda = ar;
+    integer ldb = br;
+    integer ldc = rr;
+    float alpha = 1;
+    float beta = 0;
+    sgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,ap,&lda,bp,&ldb,&beta,rp,&ldc);
+    OK
+}
+void cgemm_(char *, char *, integer *, integer *, integer *,
+           complex *, const complex *, integer *, const complex *,
+           integer *, complex *, complex *, integer *);
+int multiplyQ(int ta, int tb, KQMAT(a),KQMAT(b),QMAT(r)) {
+    //REQUIRES(ac==br && ar==rr && bc==rc,BAD_SIZE);
+    DEBUGMSG("cgemm_");
+    integer m = ta?ac:ar;
+    integer n = tb?br:bc;
+    integer k = ta?ar:ac;
+    integer lda = ar;
+    integer ldb = br;
+    integer ldc = rr;
+    complex alpha = {1,0};
+    complex beta = {0,0};
+    cgemm_(ta?"T":"N",tb?"T":"N",&m,&n,&k,&alpha,
+           ap,&lda,
+           bp,&ldb,&beta,
+           rp,&ldc);
+    OK
+}
+//////////////////// transpose /////////////////////////
+int transF(KFMAT(x),FMAT(t)) {
+    REQUIRES(xr==tc && xc==tr,BAD_SIZE);
+    DEBUGMSG("transF");
+    int i,j;
+    for (i=0; i<tr; i++) {
+        for (j=0; j<tc; j++) {
+        tp[i*tc+j] = xp[j*xc+i];
+        }
+    }
+    OK
+}
+int transR(KDMAT(x),DMAT(t)) {
+    REQUIRES(xr==tc && xc==tr,BAD_SIZE);
+    DEBUGMSG("transR");
+    int i,j;
+    for (i=0; i<tr; i++) {
+        for (j=0; j<tc; j++) {
+        tp[i*tc+j] = xp[j*xc+i];
+        }
+    }
+    OK
+}
+int transQ(KQMAT(x),QMAT(t)) {
+    REQUIRES(xr==tc && xc==tr,BAD_SIZE);
+    DEBUGMSG("transQ");
+    int i,j;
+    for (i=0; i<tr; i++) {
+        for (j=0; j<tc; j++) {
+        tp[i*tc+j] = xp[j*xc+i];
+        }
+    }
+    OK
+}
+int transC(KCMAT(x),CMAT(t)) {
+    REQUIRES(xr==tc && xc==tr,BAD_SIZE);
+    DEBUGMSG("transC");
+    int i,j;
+    for (i=0; i<tr; i++) {
+        for (j=0; j<tc; j++) {
+        tp[i*tc+j] = xp[j*xc+i];
+        }
+    }
+    OK
+}
+int transP(KPMAT(x), PMAT(t)) {
+    REQUIRES(xr==tc && xc==tr,BAD_SIZE);
+    REQUIRES(xs==ts,NOCONVER);
+    DEBUGMSG("transP");
+    int i,j;
+    for (i=0; i<tr; i++) {
+        for (j=0; j<tc; j++) {
+          memcpy(tp+(i*tc+j)*xs,xp +(j*xc+i)*xs,xs);
+        }
+    }
+    OK
+}
+//////////////////// constant /////////////////////////
+int constantF(float * pval, FVEC(r)) {
+    DEBUGMSG("constantF")
+    int k;
+    double val = *pval;
+    for(k=0;k<rn;k++) {
+        rp[k]=val;
+    }
+    OK
+}
+int constantR(double * pval, DVEC(r)) {
+    DEBUGMSG("constantR")
+    int k;
+    double val = *pval;
+    for(k=0;k<rn;k++) {
+        rp[k]=val;
+    }
+    OK
+}
+int constantQ(complex* pval, QVEC(r)) {
+    DEBUGMSG("constantQ")
+    int k;
+    complex val = *pval;
+    for(k=0;k<rn;k++) {
+        rp[k]=val;
+    }
+    OK
+}
+int constantC(doublecomplex* pval, CVEC(r)) {
+    DEBUGMSG("constantC")
+    int k;
+    doublecomplex val = *pval;
+    for(k=0;k<rn;k++) {
+        rp[k]=val;
+    }
+    OK
+}
+int constantP(void* pval, PVEC(r)) {
+    DEBUGMSG("constantP")
+    int k;
+    for(k=0;k<rn;k++) {
+      memcpy(rp+k*rs,pval,rs);
+    }
+    OK
+}
+//////////////////// float-double conversion /////////////////////////
+int float2double(FVEC(x),DVEC(y)) {
+    DEBUGMSG("float2double")
+    int k;
+    for(k=0;k<xn;k++) {
+        yp[k]=xp[k];
+    }
+    OK
+}
+int double2float(DVEC(x),FVEC(y)) {
+    DEBUGMSG("double2float")
+    int k;
+    for(k=0;k<xn;k++) {
+        yp[k]=xp[k];
+    }
+    OK
+}
+//////////////////// conjugate /////////////////////////
+int conjugateQ(KQVEC(x),QVEC(t)) {
+    REQUIRES(xn==tn,BAD_SIZE);
+    DEBUGMSG("conjugateQ");
+    int k;
+    for(k=0;k<xn;k++) {
+        tp[k].r =  xp[k].r;
+        tp[k].i = -xp[k].i;
+    }
+    OK
+}
+int conjugateC(KCVEC(x),CVEC(t)) {
+    REQUIRES(xn==tn,BAD_SIZE);
+    DEBUGMSG("conjugateC");
+    int k;
+    for(k=0;k<xn;k++) {
+        tp[k].r =  xp[k].r;
+        tp[k].i = -xp[k].i;
+    }
+    OK
+}
+//////////////////// step /////////////////////////
+int stepF(FVEC(x),FVEC(y)) {
+    DEBUGMSG("stepF")
+    int k;
+    for(k=0;k<xn;k++) {
+        yp[k]=xp[k]>0;
+    }
+    OK
+}
+int stepD(DVEC(x),DVEC(y)) {
+    DEBUGMSG("stepD")
+    int k;
+    for(k=0;k<xn;k++) {
+        yp[k]=xp[k]>0;
+    }
+    OK
+}
+//////////////////// cond /////////////////////////
+int condF(FVEC(x),FVEC(y),FVEC(lt),FVEC(eq),FVEC(gt),FVEC(r)) {
+    REQUIRES(xn==yn && xn==ltn && xn==eqn && xn==gtn && xn==rn ,BAD_SIZE);
+    DEBUGMSG("condF")
+    int k;
+    for(k=0;k<xn;k++) {
+        rp[k] = xp[k]<yp[k]?ltp[k]:(xp[k]>yp[k]?gtp[k]:eqp[k]);
+    }
+    OK
+}
+int condD(DVEC(x),DVEC(y),DVEC(lt),DVEC(eq),DVEC(gt),DVEC(r)) {
+    REQUIRES(xn==yn && xn==ltn && xn==eqn && xn==gtn && xn==rn ,BAD_SIZE);
+    DEBUGMSG("condD")
+    int k;
+    for(k=0;k<xn;k++) {
+        rp[k] = xp[k]<yp[k]?ltp[k]:(xp[k]>yp[k]?gtp[k]:eqp[k]);
+    }
+    OK
+}
diff --git a/packages/base/src/C/lapack-aux.h b/packages/base/src/C/lapack-aux.h
new file mode 100644
index 0000000..a3f1899
--- /dev/null
+++ b/packages/base/src/C/lapack-aux.h
@@ -0,0 +1,60 @@
+/*
+ * We have copied the definitions in f2c.h required
+ * to compile clapack.h, modified to support both
+ * 32 and 64 bit
+      http://opengrok.creo.hu/dragonfly/xref/src/contrib/gcc-3.4/libf2c/readme.netlib
+      http://www.ibm.com/developerworks/library/l-port64.html
+ */
+#ifdef _LP64
+typedef int integer;
+typedef unsigned int uinteger;
+typedef int logical;
+typedef long longint;           /* system-dependent */
+typedef unsigned long ulongint; /* system-dependent */
+#else
+typedef long int integer;
+typedef unsigned long int uinteger;
+typedef long int logical;
+typedef long long longint;              /* system-dependent */
+typedef unsigned long long ulongint;    /* system-dependent */
+#endif
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+typedef logical (*L_fp)();
+typedef short ftnlen;
+/********************************************************/
+#define FVEC(A) int A##n, float*A##p
+#define DVEC(A) int A##n, double*A##p
+#define QVEC(A) int A##n, complex*A##p
+#define CVEC(A) int A##n, doublecomplex*A##p
+#define PVEC(A) int A##n, void* A##p, int A##s
+#define FMAT(A) int A##r, int A##c, float* A##p
+#define DMAT(A) int A##r, int A##c, double* A##p
+#define QMAT(A) int A##r, int A##c, complex* A##p
+#define CMAT(A) int A##r, int A##c, doublecomplex* A##p
+#define PMAT(A) int A##r, int A##c, void* A##p, int A##s
+#define KFVEC(A) int A##n, const float*A##p
+#define KDVEC(A) int A##n, const double*A##p
+#define KQVEC(A) int A##n, const complex*A##p
+#define KCVEC(A) int A##n, const doublecomplex*A##p
+#define KPVEC(A) int A##n, const void* A##p, int A##s
+#define KFMAT(A) int A##r, int A##c, const float* A##p
+#define KDMAT(A) int A##r, int A##c, const double* A##p
+#define KQMAT(A) int A##r, int A##c, const complex* A##p
+#define KCMAT(A) int A##r, int A##c, const doublecomplex* A##p
+#define KPMAT(A) int A##r, int A##c, const void* A##p, int A##s
diff --git a/packages/base/src/Data/Packed.hs b/packages/base/src/Data/Packed.hs
new file mode 100644
index 0000000..c66718a
--- /dev/null
+++ b/packages/base/src/Data/Packed.hs
@@ -0,0 +1,25 @@
+-----------------------------------------------------------------------------
+{- |
+Module      :  Data.Packed
+Copyright   :  (c) Alberto Ruiz 2006-2014
+License     :  BSD3
+Maintainer  :  Alberto Ruiz
+Stability   :  provisional
+Types for dense 'Vector' and 'Matrix' of 'Storable' elements.
+-}
+-----------------------------------------------------------------------------
+module Data.Packed (
+    -- * Vector
+    --
+    -- | Vectors are @Data.Vector.Storable.Vector@ from the \"vector\" package.
+    module Data.Packed.Vector,
+    -- * Matrix
+    module Data.Packed.Matrix,
+) where
+import Data.Packed.Vector
+import Data.Packed.Matrix
diff --git a/packages/base/src/Data/Packed/Development.hs b/packages/base/src/Data/Packed/Development.hs
new file mode 100644
index 0000000..777b6c5
--- /dev/null
+++ b/packages/base/src/Data/Packed/Development.hs
@@ -0,0 +1,31 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Packed.Development
+-- Copyright   :  (c) Alberto Ruiz 2009
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+-- Portability :  portable
+--
+-- The library can be easily extended with additional foreign functions
+-- using the tools in this module. Illustrative usage examples can be found
+-- in the @examples\/devel@ folder included in the package.
+--
+-----------------------------------------------------------------------------
+module Data.Packed.Development (
+    createVector, createMatrix,
+    vec, mat,
+    app1, app2, app3, app4,
+    app5, app6, app7, app8, app9, app10,
+    MatrixOrder(..), orderOf, cmat, fmat,
+    matrixFromVector,
+    unsafeFromForeignPtr,
+    unsafeToForeignPtr,
+    check, (//),
+    at', atM'
+) where
+import Data.Packed.Internal
diff --git a/packages/base/src/Data/Packed/Foreign.hs b/packages/base/src/Data/Packed/Foreign.hs
new file mode 100644
index 0000000..efa51ca
--- /dev/null
+++ b/packages/base/src/Data/Packed/Foreign.hs
@@ -0,0 +1,99 @@
+{-# LANGUAGE MagicHash, UnboxedTuples #-}
+-- | FFI and hmatrix helpers.
+--
+-- Sample usage, to upload a perspective matrix to a shader.
+--
+-- @ glUniformMatrix4fv 0 1 (fromIntegral gl_TRUE) \`appMatrix\` perspective 0.01 100 (pi\/2) (4\/3) 
+-- @
+--
+module Data.Packed.Foreign 
+    ( app
+    , appVector, appVectorLen
+    , appMatrix, appMatrixLen, appMatrixRaw, appMatrixRawLen
+    , unsafeMatrixToVector, unsafeMatrixToForeignPtr
+    ) where
+import Data.Packed.Internal
+import qualified Data.Vector.Storable as S
+import Foreign (Ptr, ForeignPtr, Storable)
+import Foreign.C.Types (CInt)
+import GHC.Base (IO(..), realWorld#)
+{-# INLINE unsafeInlinePerformIO #-}
+-- | If we use unsafePerformIO, it may not get inlined, so in a function that returns IO (which are all safe uses of app* in this module), there would be
+-- unecessary calls to unsafePerformIO or its internals.
+unsafeInlinePerformIO :: IO a -> a
+unsafeInlinePerformIO (IO f) = case f realWorld# of
+    (# _, x #) -> x
+{-# INLINE app #-}
+-- | Only useful since it is left associated with a precedence of 1, unlike 'Prelude.$', which is right associative.
+-- e.g.
+--
+-- @
+-- someFunction
+--     \`appMatrixLen\` m
+--     \`appVectorLen\` v
+--     \`app\` other
+--     \`app\` arguments
+--     \`app\` go here
+-- @
+--
+-- One could also write:
+--
+-- @
+-- (someFunction 
+--     \`appMatrixLen\` m
+--     \`appVectorLen\` v) 
+--     other 
+--     arguments 
+--     (go here)
+-- @
+--
+app :: (a -> b) -> a -> b
+app f = f
+{-# INLINE appVector #-}
+appVector :: Storable a => (Ptr a -> b) -> Vector a -> b
+appVector f x = unsafeInlinePerformIO (S.unsafeWith x (return . f))
+{-# INLINE appVectorLen #-}
+appVectorLen :: Storable a => (CInt -> Ptr a -> b) -> Vector a -> b
+appVectorLen f x = unsafeInlinePerformIO (S.unsafeWith x (return . f (fromIntegral (S.length x))))
+{-# INLINE appMatrix #-}
+appMatrix :: Element a => (Ptr a -> b) -> Matrix a -> b
+appMatrix f x = unsafeInlinePerformIO (S.unsafeWith (flatten x) (return . f))
+{-# INLINE appMatrixLen #-}
+appMatrixLen :: Element a => (CInt -> CInt -> Ptr a -> b) -> Matrix a -> b
+appMatrixLen f x = unsafeInlinePerformIO (S.unsafeWith (flatten x) (return . f r c))
+  where
+    r = fromIntegral (rows x)
+    c = fromIntegral (cols x)
+{-# INLINE appMatrixRaw #-}
+appMatrixRaw :: Storable a => (Ptr a -> b) -> Matrix a -> b
+appMatrixRaw f x = unsafeInlinePerformIO (S.unsafeWith (xdat x) (return . f))
+{-# INLINE appMatrixRawLen #-}
+appMatrixRawLen :: Element a => (CInt -> CInt -> Ptr a -> b) -> Matrix a -> b
+appMatrixRawLen f x = unsafeInlinePerformIO (S.unsafeWith (xdat x) (return . f r c))
+  where
+    r = fromIntegral (rows x)
+    c = fromIntegral (cols x)
+infixl 1 `app`
+infixl 1 `appVector`
+infixl 1 `appMatrix`
+infixl 1 `appMatrixRaw`
+{-# INLINE unsafeMatrixToVector #-}
+-- | This will disregard the order of the matrix, and simply return it as-is. 
+-- If the order of the matrix is RowMajor, this function is identical to 'flatten'.
+unsafeMatrixToVector :: Matrix a -> Vector a
+unsafeMatrixToVector = xdat
+{-# INLINE unsafeMatrixToForeignPtr #-}
+unsafeMatrixToForeignPtr :: Storable a => Matrix a -> (ForeignPtr a, Int)
+unsafeMatrixToForeignPtr m = S.unsafeToForeignPtr0 (xdat m)
diff --git a/packages/base/src/Data/Packed/Internal.hs b/packages/base/src/Data/Packed/Internal.hs
new file mode 100644
index 0000000..537e51e
--- /dev/null
+++ b/packages/base/src/Data/Packed/Internal.hs
@@ -0,0 +1,26 @@
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Packed.Internal
+-- Copyright   :  (c) Alberto Ruiz 2007
+-- License     :  GPL-style
+--
+-- Maintainer  :  Alberto Ruiz <aruiz@um.es>
+-- Stability   :  provisional
+-- Portability :  portable
+--
+-- Reexports all internal modules
+--
+-----------------------------------------------------------------------------
+-- #hide
+module Data.Packed.Internal (
+    module Data.Packed.Internal.Common,
+    module Data.Packed.Internal.Signatures,
+    module Data.Packed.Internal.Vector,
+    module Data.Packed.Internal.Matrix,
+) where
+import Data.Packed.Internal.Common
+import Data.Packed.Internal.Signatures
+import Data.Packed.Internal.Vector
+import Data.Packed.Internal.Matrix
diff --git a/packages/base/src/Data/Packed/Internal/Common.hs b/packages/base/src/Data/Packed/Internal/Common.hs
new file mode 100644
index 0000000..615bbdf
--- /dev/null
+++ b/packages/base/src/Data/Packed/Internal/Common.hs
@@ -0,0 +1,160 @@
+{-# LANGUAGE CPP #-}
+-- |
+-- Module      :  Data.Packed.Internal.Common
+-- Copyright   :  (c) Alberto Ruiz 2007
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+--
+--
+-- Development utilities.
+--
+module Data.Packed.Internal.Common(
+  Adapt,
+  app1, app2, app3, app4,
+  app5, app6, app7, app8, app9, app10,
+  (//), check, mbCatch,
+  splitEvery, common, compatdim,
+  fi,
+  table,
+  finit
+) where
+import Control.Monad(when)
+import Foreign.C.Types
+import Foreign.Storable.Complex()
+import Data.List(transpose,intersperse)
+import Control.Exception as E
+-- | @splitEvery 3 [1..9] == [[1,2,3],[4,5,6],[7,8,9]]@
+splitEvery :: Int -> [a] -> [[a]]
+splitEvery _ [] = []
+splitEvery k l = take k l : splitEvery k (drop k l)
+-- | obtains the common value of a property of a list
+common :: (Eq a) => (b->a) -> [b] -> Maybe a
+common f = commonval . map f where
+    commonval :: (Eq a) => [a] -> Maybe a
+    commonval [] = Nothing
+    commonval [a] = Just a
+    commonval (a:b:xs) = if a==b then commonval (b:xs) else Nothing
+-- | common value with \"adaptable\" 1
+compatdim :: [Int] -> Maybe Int
+compatdim [] = Nothing
+compatdim [a] = Just a
+compatdim (a:b:xs)
+    | a==b = compatdim (b:xs)
+    | a==1 = compatdim (b:xs)
+    | b==1 = compatdim (a:xs)
+    | otherwise = Nothing
+-- | Formatting tool
+table :: String -> [[String]] -> String
+table sep as = unlines . map unwords' $ transpose mtp where 
+    mt = transpose as
+    longs = map (maximum . map length) mt
+    mtp = zipWith (\a b -> map (pad a) b) longs mt
+    pad n str = replicate (n - length str) ' ' ++ str
+    unwords' = concat . intersperse sep
+-- | postfix function application (@flip ($)@)
+(//) :: x -> (x -> y) -> y
+infixl 0 //
+(//) = flip ($)
+-- | specialized fromIntegral
+fi :: Int -> CInt
+fi = fromIntegral
+-- hmm..
+ww2 w1 o1 w2 o2 f = w1 o1 $ w2 o2 . f
+ww3 w1 o1 w2 o2 w3 o3 f = w1 o1 $ ww2 w2 o2 w3 o3 . f
+ww4 w1 o1 w2 o2 w3 o3 w4 o4 f = w1 o1 $ ww3 w2 o2 w3 o3 w4 o4 . f
+ww5 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 f = w1 o1 $ ww4 w2 o2 w3 o3 w4 o4 w5 o5 . f
+ww6 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 f = w1 o1 $ ww5 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 . f
+ww7 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 f = w1 o1 $ ww6 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 . f
+ww8 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 f = w1 o1 $ ww7 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 . f
+ww9 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 f = w1 o1 $ ww8 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 . f
+ww10 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 w10 o10 f = w1 o1 $ ww9 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 w10 o10 . f
+type Adapt f t r = t -> ((f -> r) -> IO()) -> IO()
+type Adapt1 f t1 = Adapt f t1 (IO CInt) -> t1 -> String -> IO()
+type Adapt2 f t1 r1 t2 = Adapt f t1 r1 -> t1 -> Adapt1 r1 t2
+type Adapt3 f t1 r1 t2 r2 t3 = Adapt f t1 r1 -> t1 -> Adapt2 r1 t2 r2 t3
+type Adapt4 f t1 r1 t2 r2 t3 r3 t4 = Adapt f t1 r1 -> t1 -> Adapt3 r1 t2 r2 t3 r3 t4
+type Adapt5 f t1 r1 t2 r2 t3 r3 t4 r4 t5 = Adapt f t1 r1 -> t1 -> Adapt4 r1 t2 r2 t3 r3 t4 r4 t5
+type Adapt6 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 = Adapt f t1 r1 -> t1 -> Adapt5 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6
+type Adapt7 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 = Adapt f t1 r1 -> t1 -> Adapt6 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7
+type Adapt8 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 = Adapt f t1 r1 -> t1 -> Adapt7 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8
+type Adapt9 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9 = Adapt f t1 r1 -> t1 -> Adapt8 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9
+type Adapt10 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9 r9 t10 = Adapt f t1 r1 -> t1 -> Adapt9 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9 r9 t10
+app1 :: f -> Adapt1 f t1
+app2 :: f -> Adapt2 f t1 r1 t2
+app3 :: f -> Adapt3 f t1 r1 t2 r2 t3
+app4 :: f -> Adapt4 f t1 r1 t2 r2 t3 r3 t4
+app5 :: f -> Adapt5 f t1 r1 t2 r2 t3 r3 t4 r4 t5
+app6 :: f -> Adapt6 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6
+app7 :: f -> Adapt7 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7
+app8 :: f -> Adapt8 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8
+app9 :: f -> Adapt9 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9
+app10 :: f -> Adapt10 f t1 r1 t2 r2 t3 r3 t4 r4 t5 r5 t6 r6 t7 r7 t8 r8 t9 r9 t10
+app1 f w1 o1 s = w1 o1 $ \a1 -> f // a1 // check s
+app2 f w1 o1 w2 o2 s = ww2 w1 o1 w2 o2 $ \a1 a2 -> f // a1 // a2 // check s
+app3 f w1 o1 w2 o2 w3 o3 s = ww3 w1 o1 w2 o2 w3 o3 $
+     \a1 a2 a3 -> f // a1 // a2 // a3 // check s
+app4 f w1 o1 w2 o2 w3 o3 w4 o4 s = ww4 w1 o1 w2 o2 w3 o3 w4 o4 $
+     \a1 a2 a3 a4 -> f // a1 // a2 // a3 // a4 // check s
+app5 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 s = ww5 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 $
+     \a1 a2 a3 a4 a5 -> f // a1 // a2 // a3 // a4 // a5 // check s
+app6 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 s = ww6 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 $
+     \a1 a2 a3 a4 a5 a6 -> f // a1 // a2 // a3 // a4 // a5 // a6 // check s
+app7 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 s = ww7 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 $
+     \a1 a2 a3 a4 a5 a6 a7 -> f // a1 // a2 // a3 // a4 // a5 // a6 // a7 // check s
+app8 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 s = ww8 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 $
+     \a1 a2 a3 a4 a5 a6 a7 a8 -> f // a1 // a2 // a3 // a4 // a5 // a6 // a7 // a8 // check s
+app9 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 s = ww9 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 $
+     \a1 a2 a3 a4 a5 a6 a7 a8 a9 -> f // a1 // a2 // a3 // a4 // a5 // a6 // a7 // a8 // a9 // check s
+app10 f w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 w10 o10 s = ww10 w1 o1 w2 o2 w3 o3 w4 o4 w5 o5 w6 o6 w7 o7 w8 o8 w9 o9 w10 o10 $
+     \a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 -> f // a1 // a2 // a3 // a4 // a5 // a6 // a7 // a8 // a9 // a10 // check s
+-- GSL error codes are <= 1024
+-- | error codes for the auxiliary functions required by the wrappers
+errorCode :: CInt -> String
+errorCode 2000 = "bad size"
+errorCode 2001 = "bad function code"
+errorCode 2002 = "memory problem"
+errorCode 2003 = "bad file"
+errorCode 2004 = "singular"
+errorCode 2005 = "didn't converge"
+errorCode 2006 = "the input matrix is not positive definite"
+errorCode 2007 = "not yet supported in this OS"
+errorCode n    = "code "++show n
+-- | clear the fpu
+foreign import ccall unsafe "asm_finit" finit :: IO ()
+-- | check the error code
+check :: String -> IO CInt -> IO ()
+check msg f = do
+#if FINIT
+    finit
+#endif
+    err <- f
+    when (err/=0) $ error (msg++": "++errorCode err)
+    return ()
+-- | Error capture and conversion to Maybe
+mbCatch :: IO x -> IO (Maybe x)
+mbCatch act = E.catch (Just `fmap` act) f
+    where f :: SomeException -> IO (Maybe x)
+          f _ = return Nothing
diff --git a/packages/base/src/Data/Packed/Internal/Matrix.hs b/packages/base/src/Data/Packed/Internal/Matrix.hs
new file mode 100644
index 0000000..9b831cc
--- /dev/null
+++ b/packages/base/src/Data/Packed/Internal/Matrix.hs
@@ -0,0 +1,422 @@
+{-# LANGUAGE ForeignFunctionInterface #-}
+{-# LANGUAGE FlexibleContexts         #-}
+{-# LANGUAGE FlexibleInstances        #-}
+{-# LANGUAGE BangPatterns             #-}
+-- |
+-- Module      :  Data.Packed.Internal.Matrix
+-- Copyright   :  (c) Alberto Ruiz 2007
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+--
+-- Internal matrix representation
+--
+module Data.Packed.Internal.Matrix(
+    Matrix(..), rows, cols, cdat, fdat,
+    MatrixOrder(..), orderOf,
+    createMatrix, mat,
+    cmat, fmat,
+    toLists, flatten, reshape,
+    Element(..),
+    trans,
+    fromRows, toRows, fromColumns, toColumns,
+    matrixFromVector,
+    subMatrix,
+    liftMatrix, liftMatrix2,
+    (@@>), atM',
+    singleton,
+    emptyM,
+    size, shSize, conformVs, conformMs, conformVTo, conformMTo
+) where
+import Data.Packed.Internal.Common
+import Data.Packed.Internal.Signatures
+import Data.Packed.Internal.Vector
+import Foreign.Marshal.Alloc(alloca, free)
+import Foreign.Marshal.Array(newArray)
+import Foreign.Ptr(Ptr, castPtr)
+import Foreign.Storable(Storable, peekElemOff, pokeElemOff, poke, sizeOf)
+import Data.Complex(Complex)
+import Foreign.C.Types
+import System.IO.Unsafe(unsafePerformIO)
+import Control.DeepSeq
+-----------------------------------------------------------------
+{- Design considerations for the Matrix Type
+   -----------------------------------------
+- we must easily handle both row major and column major order,
+  for bindings to LAPACK and GSL/C
+- we'd like to simplify redundant matrix transposes:
+   - Some of them arise from the order requirements of some functions
+   - some functions (matrix product) admit transposed arguments
+- maybe we don't really need this kind of simplification:
+   - more complex code
+   - some computational overhead
+   - only appreciable gain in code with a lot of redundant transpositions
+     and cheap matrix computations
+- we could carry both the matrix and its (lazily computed) transpose.
+  This may save some transpositions, but it is necessary to keep track of the
+  data which is actually computed to be used by functions like the matrix product
+  which admit both orders.
+- but if we need the transposed data and it is not in the structure, we must make
+  sure that we touch the same foreignptr that is used in the computation.
+- a reasonable solution is using two constructors for a matrix. Transposition just
+  "flips" the constructor. Actual data transposition is not done if followed by a
+  matrix product or another transpose.
+-}
+data MatrixOrder = RowMajor | ColumnMajor deriving (Show,Eq)
+transOrder RowMajor = ColumnMajor
+transOrder ColumnMajor = RowMajor
+{- | Matrix representation suitable for GSL and LAPACK computations.
+The elements are stored in a continuous memory array.
+-}
+data Matrix t = Matrix { irows :: {-# UNPACK #-} !Int
+                       , icols :: {-# UNPACK #-} !Int
+                       , xdat :: {-# UNPACK #-} !(Vector t)
+                       , order :: !MatrixOrder }
+-- RowMajor: preferred by C, fdat may require a transposition
+-- ColumnMajor: preferred by LAPACK, cdat may require a transposition
+cdat = xdat
+fdat = xdat
+rows :: Matrix t -> Int
+rows = irows
+cols :: Matrix t -> Int
+cols = icols
+orderOf :: Matrix t -> MatrixOrder
+orderOf = order
+-- | Matrix transpose.
+trans :: Matrix t -> Matrix t
+trans Matrix {irows = r, icols = c, xdat = d, order = o } = Matrix { irows = c, icols = r, xdat = d, order = transOrder o}
+cmat :: (Element t) => Matrix t -> Matrix t
+cmat m@Matrix{order = RowMajor} = m
+cmat Matrix {irows = r, icols = c, xdat = d, order = ColumnMajor } = Matrix { irows = r, icols = c, xdat = transdata r d c, order = RowMajor}
+fmat :: (Element t) => Matrix t -> Matrix t
+fmat m@Matrix{order = ColumnMajor} = m
+fmat Matrix {irows = r, icols = c, xdat = d, order = RowMajor } = Matrix { irows = r, icols = c, xdat = transdata c d r, order = ColumnMajor}
+-- C-Haskell matrix adapter
+-- mat :: Adapt (CInt -> CInt -> Ptr t -> r) (Matrix t) r
+mat :: (Storable t) => Matrix t -> (((CInt -> CInt -> Ptr t -> t1) -> t1) -> IO b) -> IO b
+mat a f =
+    unsafeWith (xdat a) $ \p -> do
+        let m g = do
+            g (fi (rows a)) (fi (cols a)) p
+        f m
+{- | Creates a vector by concatenation of rows. If the matrix is ColumnMajor, this operation requires a transpose.
+>>> flatten (ident 3)
+fromList [1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0]
+-}
+flatten :: Element t => Matrix t -> Vector t
+flatten = xdat . cmat
+{-
+type Mt t s = Int -> Int -> Ptr t -> s
+infixr 6 ::>
+type t ::> s = Mt t s
+-}
+-- | the inverse of 'Data.Packed.Matrix.fromLists'
+toLists :: (Element t) => Matrix t -> [[t]]
+toLists m = splitEvery (cols m) . toList . flatten $ m
+-- | Create a matrix from a list of vectors.
+-- All vectors must have the same dimension,
+-- or dimension 1, which is are automatically expanded.
+fromRows :: Element t => [Vector t] -> Matrix t
+fromRows [] = emptyM 0 0
+fromRows vs = case compatdim (map dim vs) of
+    Nothing -> error $ "fromRows expects vectors with equal sizes (or singletons), given: " ++ show (map dim vs)
+    Just 0  -> emptyM r 0
+    Just c  -> matrixFromVector RowMajor r c . vjoin . map (adapt c) $ vs
+  where
+    r = length vs
+    adapt c v
+        | c == 0 = fromList[]
+        | dim v == c = v
+        | otherwise = constantD (v@>0) c
+-- | extracts the rows of a matrix as a list of vectors
+toRows :: Element t => Matrix t -> [Vector t]
+toRows m
+    | c == 0    = replicate r (fromList[])
+    | otherwise = toRows' 0
+  where
+    v = flatten m
+    r = rows m
+    c = cols m
+    toRows' k | k == r*c  = []
+              | otherwise = subVector k c v : toRows' (k+c)
+-- | Creates a matrix from a list of vectors, as columns
+fromColumns :: Element t => [Vector t] -> Matrix t
+fromColumns m = trans . fromRows $ m
+-- | Creates a list of vectors from the columns of a matrix
+toColumns :: Element t => Matrix t -> [Vector t]
+toColumns m = toRows . trans $ m
+-- | Reads a matrix position.
+(@@>) :: Storable t => Matrix t -> (Int,Int) -> t
+infixl 9 @@>
+m@Matrix {irows = r, icols = c} @@> (i,j)
+    | safe      = if i<0 || i>=r || j<0 || j>=c
+                    then error "matrix indexing out of range"
+                    else atM' m i j
+    | otherwise = atM' m i j
+{-# INLINE (@@>) #-}
+--  Unsafe matrix access without range checking
+atM' Matrix {icols = c, xdat = v, order = RowMajor} i j = v `at'` (i*c+j)
+atM' Matrix {irows = r, xdat = v, order = ColumnMajor} i j = v `at'` (j*r+i)
+{-# INLINE atM' #-}
+------------------------------------------------------------------
+matrixFromVector o r c v
+    | r * c == dim v = m
+    | otherwise = error $ "can't reshape vector dim = "++ show (dim v)++" to matrix " ++ shSize m
+  where
+    m = Matrix { irows = r, icols = c, xdat = v, order = o }
+-- allocates memory for a new matrix
+createMatrix :: (Storable a) => MatrixOrder -> Int -> Int -> IO (Matrix a)
+createMatrix ord r c = do
+    p <- createVector (r*c)
+    return (matrixFromVector ord r c p)
+{- | Creates a matrix from a vector by grouping the elements in rows with the desired number of columns. (GNU-Octave groups by columns. To do it you can define @reshapeF r = trans . reshape r@
+where r is the desired number of rows.)
+>>> reshape 4 (fromList [1..12])
+(3><4)
+ [ 1.0,  2.0,  3.0,  4.0
+ , 5.0,  6.0,  7.0,  8.0
+ , 9.0, 10.0, 11.0, 12.0 ]
+-}
+reshape :: Storable t => Int -> Vector t -> Matrix t
+reshape 0 v = matrixFromVector RowMajor 0 0 v
+reshape c v = matrixFromVector RowMajor (dim v `div` c) c v
+singleton x = reshape 1 (fromList [x])
+-- | application of a vector function on the flattened matrix elements
+liftMatrix :: (Storable a, Storable b) => (Vector a -> Vector b) -> Matrix a -> Matrix b
+liftMatrix f Matrix { irows = r, icols = c, xdat = d, order = o } = matrixFromVector o r c (f d)
+-- | application of a vector function on the flattened matrices elements
+liftMatrix2 :: (Element t, Element a, Element b) => (Vector a -> Vector b -> Vector t) -> Matrix a -> Matrix b -> Matrix t
+liftMatrix2 f m1 m2
+    | not (compat m1 m2) = error "nonconformant matrices in liftMatrix2"
+    | otherwise = case orderOf m1 of
+        RowMajor    -> matrixFromVector RowMajor    (rows m1) (cols m1) (f (xdat m1) (flatten m2))
+        ColumnMajor -> matrixFromVector ColumnMajor (rows m1) (cols m1) (f (xdat m1) ((xdat.fmat) m2))
+compat :: Matrix a -> Matrix b -> Bool
+compat m1 m2 = rows m1 == rows m2 && cols m1 == cols m2
+------------------------------------------------------------------
+{- | Supported matrix elements.
+    This class provides optimized internal
+    operations for selected element types.
+    It provides unoptimised defaults for any 'Storable' type,
+    so you can create instances simply as:
+    @instance Element Foo@.
+-}
+class (Storable a) => Element a where
+    subMatrixD :: (Int,Int) -- ^ (r0,c0) starting position 
+               -> (Int,Int) -- ^ (rt,ct) dimensions of submatrix
+               -> Matrix a -> Matrix a
+    subMatrixD = subMatrix'
+    transdata :: Int -> Vector a -> Int -> Vector a
+    transdata = transdataP -- transdata'
+    constantD  :: a -> Int -> Vector a
+    constantD = constantP -- constant'
+instance Element Float where
+    transdata  = transdataAux ctransF
+    constantD  = constantAux cconstantF
+instance Element Double where
+    transdata  = transdataAux ctransR
+    constantD  = constantAux cconstantR
+instance Element (Complex Float) where
+    transdata  = transdataAux ctransQ
+    constantD  = constantAux cconstantQ
+instance Element (Complex Double) where
+    transdata  = transdataAux ctransC
+    constantD  = constantAux cconstantC
+-------------------------------------------------------------------
+transdataAux fun c1 d c2 =
+    if noneed
+        then d
+        else unsafePerformIO $ do
+            v <- createVector (dim d)
+            unsafeWith d $ \pd ->
+                unsafeWith v $ \pv ->
+                    fun (fi r1) (fi c1) pd (fi r2) (fi c2) pv // check "transdataAux"
+            return v
+  where r1 = dim d `div` c1
+        r2 = dim d `div` c2
+        noneed = dim d == 0 || r1 == 1 || c1 == 1
+transdataP :: Storable a => Int -> Vector a -> Int -> Vector a
+transdataP c1 d c2 =
+    if noneed
+       then d
+       else unsafePerformIO $ do
+          v <- createVector (dim d)
+          unsafeWith d $ \pd ->
+              unsafeWith v $ \pv ->
+                  ctransP (fi r1) (fi c1) (castPtr pd) (fi sz) (fi r2) (fi c2) (castPtr pv) (fi sz) // check "transdataP"
+          return v
+   where r1 = dim d `div` c1
+         r2 = dim d `div` c2
+         sz = sizeOf (d @> 0)
+         noneed = dim d == 0 || r1 == 1 || c1 == 1
+foreign import ccall unsafe "transF" ctransF :: TFMFM
+foreign import ccall unsafe "transR" ctransR :: TMM
+foreign import ccall unsafe "transQ" ctransQ :: TQMQM
+foreign import ccall unsafe "transC" ctransC :: TCMCM
+foreign import ccall unsafe "transP" ctransP :: CInt -> CInt -> Ptr () -> CInt -> CInt -> CInt -> Ptr () -> CInt -> IO CInt
+----------------------------------------------------------------------
+constantAux fun x n = unsafePerformIO $ do
+    v <- createVector n
+    px <- newArray [x]
+    app1 (fun px) vec v "constantAux"
+    free px
+    return v
+foreign import ccall unsafe "constantF" cconstantF :: Ptr Float -> TF
+foreign import ccall unsafe "constantR" cconstantR :: Ptr Double -> TV
+foreign import ccall unsafe "constantQ" cconstantQ :: Ptr (Complex Float) -> TQV
+foreign import ccall unsafe "constantC" cconstantC :: Ptr (Complex Double) -> TCV
+constantP :: Storable a => a -> Int -> Vector a
+constantP a n = unsafePerformIO $ do
+    let sz = sizeOf a
+    v <- createVector n
+    unsafeWith v $ \p -> do
+       alloca $ \k -> do
+                      poke k a
+                      cconstantP (castPtr k) (fi n) (castPtr p) (fi sz) // check "constantP"
+    return v
+foreign import ccall unsafe "constantP" cconstantP :: Ptr () -> CInt -> Ptr () -> CInt -> IO CInt
+----------------------------------------------------------------------
+-- | Extracts a submatrix from a matrix.
+subMatrix :: Element a
+          => (Int,Int) -- ^ (r0,c0) starting position 
+          -> (Int,Int) -- ^ (rt,ct) dimensions of submatrix
+          -> Matrix a -- ^ input matrix
+          -> Matrix a -- ^ result
+subMatrix (r0,c0) (rt,ct) m
+    | 0 <= r0 && 0 <= rt && r0+rt <= (rows m) &&
+      0 <= c0 && 0 <= ct && c0+ct <= (cols m) = subMatrixD (r0,c0) (rt,ct) m
+    | otherwise = error $ "wrong subMatrix "++
+                          show ((r0,c0),(rt,ct))++" of "++show(rows m)++"x"++ show (cols m)
+subMatrix'' (r0,c0) (rt,ct) c v = unsafePerformIO $ do
+    w <- createVector (rt*ct)
+    unsafeWith v $ \p ->
+        unsafeWith w $ \q -> do
+            let go (-1) _ = return ()
+                go !i (-1) = go (i-1) (ct-1)
+                go !i !j = do x <- peekElemOff p ((i+r0)*c+j+c0)
+                              pokeElemOff      q (i*ct+j) x
+                              go i (j-1)
+            go (rt-1) (ct-1)
+    return w
+subMatrix' (r0,c0) (rt,ct) (Matrix { icols = c, xdat = v, order = RowMajor}) = Matrix rt ct (subMatrix'' (r0,c0) (rt,ct) c v) RowMajor
+subMatrix' (r0,c0) (rt,ct) m = trans $ subMatrix' (c0,r0) (ct,rt) (trans m)
+--------------------------------------------------------------------------
+maxZ xs = if minimum xs == 0 then 0 else maximum xs
+conformMs ms = map (conformMTo (r,c)) ms
+  where
+    r = maxZ (map rows ms)
+    c = maxZ (map cols ms)
+    
+conformVs vs = map (conformVTo n) vs
+  where
+    n = maxZ (map dim vs)
+conformMTo (r,c) m
+    | size m == (r,c) = m
+    | size m == (1,1) = matrixFromVector RowMajor r c (constantD (m@@>(0,0)) (r*c))
+    | size m == (r,1) = repCols c m
+    | size m == (1,c) = repRows r m
+    | otherwise = error $ "matrix " ++ shSize m ++ " cannot be expanded to (" ++ show r ++ "><"++ show c ++")"
+conformVTo n v
+    | dim v == n = v
+    | dim v == 1 = constantD (v@>0) n
+    | otherwise = error $ "vector of dim=" ++ show (dim v) ++ " cannot be expanded to dim=" ++ show n
+repRows n x = fromRows (replicate n (flatten x))
+repCols n x = fromColumns (replicate n (flatten x))
+size m = (rows m, cols m)
+shSize m = "(" ++ show (rows m) ++"><"++ show (cols m)++")"
+emptyM r c = matrixFromVector RowMajor r c (fromList[])
+----------------------------------------------------------------------
+instance (Storable t, NFData t) => NFData (Matrix t)
+  where
+    rnf m | d > 0     = rnf (v @> 0)
+          | otherwise = ()
+      where
+        d = dim v
+        v = xdat m
diff --git a/packages/base/src/Data/Packed/Internal/Signatures.hs b/packages/base/src/Data/Packed/Internal/Signatures.hs
new file mode 100644
index 0000000..acc3070
--- /dev/null
+++ b/packages/base/src/Data/Packed/Internal/Signatures.hs
@@ -0,0 +1,70 @@
+-- |
+-- Module      :  Data.Packed.Internal.Signatures
+-- Copyright   :  (c) Alberto Ruiz 2009
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+--
+-- Signatures of the C functions.
+--
+module Data.Packed.Internal.Signatures where
+import Foreign.Ptr(Ptr)
+import Data.Complex(Complex)
+import Foreign.C.Types(CInt)
+type PF = Ptr Float                             --
+type PD = Ptr Double                            --
+type PQ = Ptr (Complex Float)                   --
+type PC = Ptr (Complex Double)                  --
+type TF = CInt -> PF -> IO CInt                 --
+type TFF = CInt -> PF -> TF                     --
+type TFV = CInt -> PF -> TV                     --
+type TVF = CInt -> PD -> TF                     --
+type TFFF = CInt -> PF -> TFF                   --
+type TV = CInt -> PD -> IO CInt                 --
+type TVV = CInt -> PD -> TV                     --
+type TVVV = CInt -> PD -> TVV                   --
+type TFM = CInt -> CInt -> PF -> IO CInt        --
+type TFMFM =  CInt -> CInt -> PF -> TFM         --
+type TFMFMFM =  CInt -> CInt -> PF -> TFMFM     --
+type TM = CInt -> CInt -> PD -> IO CInt         --
+type TMM =  CInt -> CInt -> PD -> TM            --
+type TVMM = CInt -> PD -> TMM                   --
+type TMVMM = CInt -> CInt -> PD -> TVMM         --
+type TMMM =  CInt -> CInt -> PD -> TMM          --
+type TVM = CInt -> PD -> TM                     --
+type TVVM = CInt -> PD -> TVM                   --
+type TMV = CInt -> CInt -> PD -> TV             --
+type TMMV = CInt -> CInt -> PD -> TMV           --
+type TMVM = CInt -> CInt -> PD -> TVM           --
+type TMMVM = CInt -> CInt -> PD -> TMVM         --
+type TCM = CInt -> CInt -> PC -> IO CInt        --
+type TCVCM = CInt -> PC -> TCM                  --
+type TCMCVCM = CInt -> CInt -> PC -> TCVCM      --
+type TMCMCVCM = CInt -> CInt -> PD -> TCMCVCM   --
+type TCMCMCVCM = CInt -> CInt -> PC -> TCMCVCM  --
+type TCMCM = CInt -> CInt -> PC -> TCM          --
+type TVCM = CInt -> PD -> TCM                   --
+type TCMVCM = CInt -> CInt -> PC -> TVCM        --
+type TCMCMVCM = CInt -> CInt -> PC -> TCMVCM    --
+type TCMCMCM = CInt -> CInt -> PC -> TCMCM      --
+type TCV = CInt -> PC -> IO CInt                --
+type TCVCV = CInt -> PC -> TCV                  --
+type TCVCVCV = CInt -> PC -> TCVCV              --
+type TCVV = CInt -> PC -> TV                    --
+type TQV = CInt -> PQ -> IO CInt                --
+type TQVQV = CInt -> PQ -> TQV                  --
+type TQVQVQV = CInt -> PQ -> TQVQV              --
+type TQVF = CInt -> PQ -> TF                    --
+type TQM = CInt -> CInt -> PQ -> IO CInt        --
+type TQMQM = CInt -> CInt -> PQ -> TQM          --
+type TQMQMQM = CInt -> CInt -> PQ -> TQMQM      --
+type TCMCV = CInt -> CInt -> PC -> TCV          --
+type TVCV = CInt -> PD -> TCV                   --
+type TCVM = CInt -> PC -> TM                    --
+type TMCVM = CInt -> CInt -> PD -> TCVM         --
+type TMMCVM = CInt -> CInt -> PD -> TMCVM       --
diff --git a/packages/base/src/Data/Packed/Internal/Vector.hs b/packages/base/src/Data/Packed/Internal/Vector.hs
new file mode 100644
index 0000000..d0bc143
--- /dev/null
+++ b/packages/base/src/Data/Packed/Internal/Vector.hs
@@ -0,0 +1,471 @@
+{-# LANGUAGE MagicHash, CPP, UnboxedTuples, BangPatterns, FlexibleContexts #-}
+-- |
+-- Module      :  Data.Packed.Internal.Vector
+-- Copyright   :  (c) Alberto Ruiz 2007
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+--
+-- Vector implementation
+--
+--------------------------------------------------------------------------------
+module Data.Packed.Internal.Vector (
+    Vector, dim,
+    fromList, toList, (|>),
+    vjoin, (@>), safe, at, at', subVector, takesV,
+    mapVector, mapVectorWithIndex, zipVectorWith, unzipVectorWith,
+    mapVectorM, mapVectorM_, mapVectorWithIndexM, mapVectorWithIndexM_,
+    foldVector, foldVectorG, foldLoop, foldVectorWithIndex,
+    createVector, vec,
+    asComplex, asReal, float2DoubleV, double2FloatV,
+    stepF, stepD, condF, condD,
+    conjugateQ, conjugateC,
+    cloneVector,
+    unsafeToForeignPtr,
+    unsafeFromForeignPtr,
+    unsafeWith
+) where
+import Data.Packed.Internal.Common
+import Data.Packed.Internal.Signatures
+import Foreign.Marshal.Array(peekArray, copyArray, advancePtr)
+import Foreign.ForeignPtr(ForeignPtr, castForeignPtr)
+import Foreign.Ptr(Ptr)
+import Foreign.Storable(Storable, peekElemOff, pokeElemOff, sizeOf)
+import Foreign.C.Types
+import Data.Complex
+import Control.Monad(when)
+import System.IO.Unsafe(unsafePerformIO)
+#if __GLASGOW_HASKELL__ >= 605
+import GHC.ForeignPtr           (mallocPlainForeignPtrBytes)
+#else
+import Foreign.ForeignPtr       (mallocForeignPtrBytes)
+#endif
+import GHC.Base
+#if __GLASGOW_HASKELL__ < 612
+import GHC.IOBase hiding (liftIO)
+#endif
+import qualified Data.Vector.Storable as Vector
+import Data.Vector.Storable(Vector,
+                            fromList,
+                            unsafeToForeignPtr,
+                            unsafeFromForeignPtr,
+                            unsafeWith)
+-- | Number of elements
+dim :: (Storable t) => Vector t -> Int
+dim = Vector.length
+-- C-Haskell vector adapter
+-- vec :: Adapt (CInt -> Ptr t -> r) (Vector t) r
+vec :: (Storable t) => Vector t -> (((CInt -> Ptr t -> t1) -> t1) -> IO b) -> IO b
+vec x f = unsafeWith x $ \p -> do
+    let v g = do
+        g (fi $ dim x) p
+    f v
+{-# INLINE vec #-}
+-- allocates memory for a new vector
+createVector :: Storable a => Int -> IO (Vector a)
+createVector n = do
+    when (n < 0) $ error ("trying to createVector of negative dim: "++show n)
+    fp <- doMalloc undefined
+    return $ unsafeFromForeignPtr fp 0 n
+  where
+    --
+    -- Use the much cheaper Haskell heap allocated storage
+    -- for foreign pointer space we control
+    --
+    doMalloc :: Storable b => b -> IO (ForeignPtr b)
+    doMalloc dummy = do
+#if __GLASGOW_HASKELL__ >= 605
+        mallocPlainForeignPtrBytes (n * sizeOf dummy)
+#else
+        mallocForeignPtrBytes      (n * sizeOf dummy)
+#endif
+{- | creates a Vector from a list:
+@> fromList [2,3,5,7]
+4 |> [2.0,3.0,5.0,7.0]@
+-}
+safeRead v = inlinePerformIO . unsafeWith v
+{-# INLINE safeRead #-}
+inlinePerformIO :: IO a -> a
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+{-# INLINE inlinePerformIO #-}
+{- | extracts the Vector elements to a list
+>>> toList (linspace 5 (1,10))
+[1.0,3.25,5.5,7.75,10.0]
+-}
+toList :: Storable a => Vector a -> [a]
+toList v = safeRead v $ peekArray (dim v)
+{- | Create a vector from a list of elements and explicit dimension. The input
+     list is explicitly truncated if it is too long, so it may safely
+     be used, for instance, with infinite lists.
+>>> 5 |> [1..]
+fromList [1.0,2.0,3.0,4.0,5.0]
+-}
+(|>) :: (Storable a) => Int -> [a] -> Vector a
+infixl 9 |>
+n |> l = if length l' == n
+            then fromList l'
+            else error "list too short for |>"
+  where l' = take n l
+-- | access to Vector elements without range checking
+at' :: Storable a => Vector a -> Int -> a
+at' v n = safeRead v $ flip peekElemOff n
+{-# INLINE at' #-}
+--
+-- turn off bounds checking with -funsafe at configure time.
+-- ghc will optimise away the salways true case at compile time.
+--
+#if defined(UNSAFE)
+safe :: Bool
+safe = False
+#else
+safe = True
+#endif
+-- | access to Vector elements with range checking.
+at :: Storable a => Vector a -> Int -> a
+at v n
+    | safe      = if n >= 0 && n < dim v
+                    then at' v n
+                    else error "vector index out of range"
+    | otherwise = at' v n
+{-# INLINE at #-}
+{- | takes a number of consecutive elements from a Vector
+>>> subVector 2 3 (fromList [1..10])
+fromList [3.0,4.0,5.0]
+-}
+subVector :: Storable t => Int       -- ^ index of the starting element
+                        -> Int       -- ^ number of elements to extract
+                        -> Vector t  -- ^ source
+                        -> Vector t  -- ^ result
+subVector = Vector.slice
+{- | Reads a vector position:
+>>> fromList [0..9] @> 7
+7.0
+-}
+(@>) :: Storable t => Vector t -> Int -> t
+infixl 9 @>
+(@>) = at
+{- | concatenate a list of vectors
+>>> vjoin [fromList [1..5::Double], konst 1 3]
+fromList [1.0,2.0,3.0,4.0,5.0,1.0,1.0,1.0]
+-}
+vjoin :: Storable t => [Vector t] -> Vector t
+vjoin [] = fromList []
+vjoin [v] = v
+vjoin as = unsafePerformIO $ do
+    let tot = sum (map dim as)
+    r <- createVector tot
+    unsafeWith r $ \ptr ->
+        joiner as tot ptr
+    return r
+  where joiner [] _ _ = return ()
+        joiner (v:cs) _ p = do
+            let n = dim v
+            unsafeWith v $ \pb -> copyArray p pb n
+            joiner cs 0 (advancePtr p n)
+{- | Extract consecutive subvectors of the given sizes.
+>>> takesV [3,4] (linspace 10 (1,10::Double))
+[fromList [1.0,2.0,3.0],fromList [4.0,5.0,6.0,7.0]]
+-}
+takesV :: Storable t => [Int] -> Vector t -> [Vector t]
+takesV ms w | sum ms > dim w = error $ "takesV " ++ show ms ++ " on dim = " ++ (show $ dim w)
+            | otherwise = go ms w
+    where go [] _ = []
+          go (n:ns) v = subVector 0 n v
+                      : go ns (subVector n (dim v - n) v)
+---------------------------------------------------------------
+-- | transforms a complex vector into a real vector with alternating real and imaginary parts 
+asReal :: (RealFloat a, Storable a) => Vector (Complex a) -> Vector a
+asReal v = unsafeFromForeignPtr (castForeignPtr fp) (2*i) (2*n)
+    where (fp,i,n) = unsafeToForeignPtr v
+-- | transforms a real vector into a complex vector with alternating real and imaginary parts
+asComplex :: (RealFloat a, Storable a) => Vector a -> Vector (Complex a)
+asComplex v = unsafeFromForeignPtr (castForeignPtr fp) (i `div` 2) (n `div` 2)
+    where (fp,i,n) = unsafeToForeignPtr v
+---------------------------------------------------------------
+float2DoubleV :: Vector Float -> Vector Double
+float2DoubleV v = unsafePerformIO $ do
+    r <- createVector (dim v)
+    app2 c_float2double vec v vec r "float2double"
+    return r
+double2FloatV :: Vector Double -> Vector Float
+double2FloatV v = unsafePerformIO $ do
+    r <- createVector (dim v)
+    app2 c_double2float vec v vec r "double2float2"
+    return r
+foreign import ccall unsafe "float2double" c_float2double:: TFV
+foreign import ccall unsafe "double2float" c_double2float:: TVF
+---------------------------------------------------------------
+stepF :: Vector Float -> Vector Float
+stepF v = unsafePerformIO $ do
+    r <- createVector (dim v)
+    app2 c_stepF vec v vec r "stepF"
+    return r
+stepD :: Vector Double -> Vector Double
+stepD v = unsafePerformIO $ do
+    r <- createVector (dim v)
+    app2 c_stepD vec v vec r "stepD"
+    return r
+foreign import ccall unsafe "stepF" c_stepF :: TFF
+foreign import ccall unsafe "stepD" c_stepD :: TVV
+---------------------------------------------------------------
+condF :: Vector Float -> Vector Float -> Vector Float -> Vector Float -> Vector Float -> Vector Float
+condF x y l e g = unsafePerformIO $ do
+    r <- createVector (dim x)
+    app6 c_condF vec x vec y vec l vec e vec g vec r "condF"
+    return r
+condD :: Vector Double -> Vector Double -> Vector Double -> Vector Double -> Vector Double -> Vector Double
+condD x y l e g = unsafePerformIO $ do
+    r <- createVector (dim x)
+    app6 c_condD vec x vec y vec l vec e vec g vec r "condD"
+    return r
+foreign import ccall unsafe "condF" c_condF :: CInt -> PF -> CInt -> PF -> CInt -> PF -> TFFF
+foreign import ccall unsafe "condD" c_condD :: CInt -> PD -> CInt -> PD -> CInt -> PD -> TVVV
+--------------------------------------------------------------------------------
+conjugateAux fun x = unsafePerformIO $ do
+    v <- createVector (dim x)
+    app2 fun vec x vec v "conjugateAux"
+    return v
+conjugateQ :: Vector (Complex Float) -> Vector (Complex Float)
+conjugateQ = conjugateAux c_conjugateQ
+foreign import ccall unsafe "conjugateQ" c_conjugateQ :: TQVQV
+conjugateC :: Vector (Complex Double) -> Vector (Complex Double)
+conjugateC = conjugateAux c_conjugateC
+foreign import ccall unsafe "conjugateC" c_conjugateC :: TCVCV
+--------------------------------------------------------------------------------
+cloneVector :: Storable t => Vector t -> IO (Vector t)
+cloneVector v = do
+        let n = dim v
+        r <- createVector n
+        let f _ s _ d =  copyArray d s n >> return 0
+        app2 f vec v vec r "cloneVector"
+        return r
+------------------------------------------------------------------
+-- | map on Vectors
+mapVector :: (Storable a, Storable b) => (a-> b) -> Vector a -> Vector b
+mapVector f v = unsafePerformIO $ do
+    w <- createVector (dim v)
+    unsafeWith v $ \p ->
+        unsafeWith w $ \q -> do
+            let go (-1) = return ()
+                go !k = do x <- peekElemOff p k
+                           pokeElemOff      q k (f x)
+                           go (k-1)
+            go (dim v -1)
+    return w
+{-# INLINE mapVector #-}
+-- | zipWith for Vectors
+zipVectorWith :: (Storable a, Storable b, Storable c) => (a-> b -> c) -> Vector a -> Vector b -> Vector c
+zipVectorWith f u v = unsafePerformIO $ do
+    let n = min (dim u) (dim v)
+    w <- createVector n
+    unsafeWith u $ \pu ->
+        unsafeWith v $ \pv ->
+            unsafeWith w $ \pw -> do
+                let go (-1) = return ()
+                    go !k = do x <- peekElemOff pu k
+                               y <- peekElemOff pv k
+                               pokeElemOff      pw k (f x y)
+                               go (k-1)
+                go (n -1)
+    return w
+{-# INLINE zipVectorWith #-}
+-- | unzipWith for Vectors
+unzipVectorWith :: (Storable (a,b), Storable c, Storable d) 
+                   => ((a,b) -> (c,d)) -> Vector (a,b) -> (Vector c,Vector d)
+unzipVectorWith f u = unsafePerformIO $ do
+      let n = dim u
+      v <- createVector n
+      w <- createVector n
+      unsafeWith u $ \pu ->
+          unsafeWith v $ \pv ->
+              unsafeWith w $ \pw -> do
+                  let go (-1) = return ()
+                      go !k   = do z <- peekElemOff pu k
+                                   let (x,y) = f z 
+                                   pokeElemOff      pv k x
+                                   pokeElemOff      pw k y
+                                   go (k-1)
+                  go (n-1)
+      return (v,w)
+{-# INLINE unzipVectorWith #-}
+foldVector :: Storable a => (a -> b -> b) -> b -> Vector a -> b
+foldVector f x v = unsafePerformIO $
+    unsafeWith v $ \p -> do
+        let go (-1) s = return s
+            go !k !s = do y <- peekElemOff p k
+                          go (k-1::Int) (f y s)
+        go (dim v -1) x
+{-# INLINE foldVector #-}
+-- the zero-indexed index is passed to the folding function
+foldVectorWithIndex :: Storable a => (Int -> a -> b -> b) -> b -> Vector a -> b
+foldVectorWithIndex f x v = unsafePerformIO $
+    unsafeWith v $ \p -> do
+        let go (-1) s = return s
+            go !k !s = do y <- peekElemOff p k
+                          go (k-1::Int) (f k y s)
+        go (dim v -1) x
+{-# INLINE foldVectorWithIndex #-}
+foldLoop f s0 d = go (d - 1) s0
+     where
+       go 0 s = f (0::Int) s
+       go !j !s = go (j - 1) (f j s)
+foldVectorG f s0 v = foldLoop g s0 (dim v)
+    where g !k !s = f k (at' v) s
+          {-# INLINE g #-} -- Thanks to Ryan Ingram (http://permalink.gmane.org/gmane.comp.lang.haskell.cafe/46479)
+{-# INLINE foldVectorG #-}
+-------------------------------------------------------------------
+-- | monadic map over Vectors
+--    the monad @m@ must be strict
+mapVectorM :: (Storable a, Storable b, Monad m) => (a -> m b) -> Vector a -> m (Vector b)
+mapVectorM f v = do
+    w <- return $! unsafePerformIO $! createVector (dim v)
+    mapVectorM' w 0 (dim v -1)
+    return w
+    where mapVectorM' w' !k !t
+              | k == t               = do
+                                       x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                       y <- f x
+                                       return $! inlinePerformIO $! unsafeWith w' $! \q -> pokeElemOff q k y
+              | otherwise            = do
+                                       x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                       y <- f x
+                                       _ <- return $! inlinePerformIO $! unsafeWith w' $! \q -> pokeElemOff q k y
+                                       mapVectorM' w' (k+1) t
+{-# INLINE mapVectorM #-}
+-- | monadic map over Vectors
+mapVectorM_ :: (Storable a, Monad m) => (a -> m ()) -> Vector a -> m ()
+mapVectorM_ f v = do
+    mapVectorM' 0 (dim v -1)
+    where mapVectorM' !k !t
+              | k == t            = do
+                                    x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k
+                                    f x
+              | otherwise         = do
+                                    x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                    _ <- f x
+                                    mapVectorM' (k+1) t
+{-# INLINE mapVectorM_ #-}
+-- | monadic map over Vectors with the zero-indexed index passed to the mapping function
+--    the monad @m@ must be strict
+mapVectorWithIndexM :: (Storable a, Storable b, Monad m) => (Int -> a -> m b) -> Vector a -> m (Vector b)
+mapVectorWithIndexM f v = do
+    w <- return $! unsafePerformIO $! createVector (dim v)
+    mapVectorM' w 0 (dim v -1)
+    return w
+    where mapVectorM' w' !k !t
+              | k == t               = do
+                                       x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                       y <- f k x
+                                       return $! inlinePerformIO $! unsafeWith w' $! \q -> pokeElemOff q k y
+              | otherwise            = do
+                                       x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                       y <- f k x
+                                       _ <- return $! inlinePerformIO $! unsafeWith w' $! \q -> pokeElemOff q k y
+                                       mapVectorM' w' (k+1) t
+{-# INLINE mapVectorWithIndexM #-}
+-- | monadic map over Vectors with the zero-indexed index passed to the mapping function
+mapVectorWithIndexM_ :: (Storable a, Monad m) => (Int -> a -> m ()) -> Vector a -> m ()
+mapVectorWithIndexM_ f v = do
+    mapVectorM' 0 (dim v -1)
+    where mapVectorM' !k !t
+              | k == t            = do
+                                    x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k
+                                    f k x
+              | otherwise         = do
+                                    x <- return $! inlinePerformIO $! unsafeWith v $! \p -> peekElemOff p k 
+                                    _ <- f k x
+                                    mapVectorM' (k+1) t
+{-# INLINE mapVectorWithIndexM_ #-}
+mapVectorWithIndex :: (Storable a, Storable b) => (Int -> a -> b) -> Vector a -> Vector b
+--mapVectorWithIndex g = head . mapVectorWithIndexM (\a b -> [g a b])
+mapVectorWithIndex f v = unsafePerformIO $ do
+    w <- createVector (dim v)
+    unsafeWith v $ \p ->
+        unsafeWith w $ \q -> do
+            let go (-1) = return ()
+                go !k = do x <- peekElemOff p k
+                           pokeElemOff      q k (f k x)
+                           go (k-1)
+            go (dim v -1)
+    return w
+{-# INLINE mapVectorWithIndex #-}
diff --git a/packages/base/src/Data/Packed/Matrix.hs b/packages/base/src/Data/Packed/Matrix.hs
new file mode 100644
index 0000000..d94d167
--- /dev/null
+++ b/packages/base/src/Data/Packed/Matrix.hs
@@ -0,0 +1,490 @@
+{-# LANGUAGE TypeFamilies #-}
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE FlexibleInstances #-}
+{-# LANGUAGE MultiParamTypeClasses #-}
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Packed.Matrix
+-- Copyright   :  (c) Alberto Ruiz 2007-10
+-- License     :  GPL
+--
+-- Maintainer  :  Alberto Ruiz <aruiz@um.es>
+-- Stability   :  provisional
+--
+-- A Matrix representation suitable for numerical computations using LAPACK and GSL.
+--
+-- This module provides basic functions for manipulation of structure.
+-----------------------------------------------------------------------------
+{-# OPTIONS_HADDOCK hide #-}
+module Data.Packed.Matrix (
+    Matrix,
+    Element,
+    rows,cols,
+    (><),
+    trans,
+    reshape, flatten,
+    fromLists, toLists, buildMatrix,
+    (@@>),
+    asRow, asColumn,
+    fromRows, toRows, fromColumns, toColumns,
+    fromBlocks, diagBlock, toBlocks, toBlocksEvery,
+    repmat,
+    flipud, fliprl,
+    subMatrix, takeRows, dropRows, takeColumns, dropColumns,
+    extractRows, extractColumns,
+    diagRect, takeDiag,
+    mapMatrix, mapMatrixWithIndex, mapMatrixWithIndexM, mapMatrixWithIndexM_,
+    liftMatrix, liftMatrix2, liftMatrix2Auto,fromArray2D
+) where
+import Data.Packed.Internal
+import qualified Data.Packed.ST as ST
+import Data.Array
+import Data.List(transpose,intersperse)
+import Foreign.Storable(Storable)
+import Control.Monad(liftM)
+-------------------------------------------------------------------
+#ifdef BINARY
+import Data.Binary
+import Control.Monad(replicateM)
+instance (Binary a, Element a, Storable a) => Binary (Matrix a) where
+    put m = do
+            let r = rows m
+            let c = cols m
+            put r
+            put c
+            mapM_ (\i -> mapM_ (\j -> put $ m @@> (i,j)) [0..(c-1)]) [0..(r-1)]
+    get = do
+          r <- get
+          c <- get
+          xs <- replicateM r $ replicateM c get
+          return $ fromLists xs
+#endif
+-------------------------------------------------------------------
+instance (Show a, Element a) => (Show (Matrix a)) where
+    show m | rows m == 0 || cols m == 0 = sizes m ++" []"
+    show m = (sizes m++) . dsp . map (map show) . toLists $ m
+sizes m = "("++show (rows m)++"><"++show (cols m)++")\n"
+dsp as = (++" ]") . (" ["++) . init . drop 2 . unlines . map (" , "++) . map unwords' $ transpose mtp
+    where
+        mt = transpose as
+        longs = map (maximum . map length) mt
+        mtp = zipWith (\a b -> map (pad a) b) longs mt
+        pad n str = replicate (n - length str) ' ' ++ str
+        unwords' = concat . intersperse ", "
+------------------------------------------------------------------
+instance (Element a, Read a) => Read (Matrix a) where
+    readsPrec _ s = [((rs><cs) . read $ listnums, rest)]
+        where (thing,rest) = breakAt ']' s
+              (dims,listnums) = breakAt ')' thing
+              cs = read . init . fst. breakAt ')' . snd . breakAt '<' $ dims
+              rs = read . snd . breakAt '(' .init . fst . breakAt '>' $ dims
+breakAt c l = (a++[c],tail b) where
+    (a,b) = break (==c) l
+------------------------------------------------------------------
+-- | creates a matrix from a vertical list of matrices
+joinVert :: Element t => [Matrix t] -> Matrix t
+joinVert [] = emptyM 0 0
+joinVert ms = case common cols ms of
+    Nothing -> error "(impossible) joinVert on matrices with different number of columns"
+    Just c  -> matrixFromVector RowMajor (sum (map rows ms)) c $ vjoin (map flatten ms)
+-- | creates a matrix from a horizontal list of matrices
+joinHoriz :: Element t => [Matrix t] -> Matrix t
+joinHoriz ms = trans. joinVert . map trans $ ms
+{- | Create a matrix from blocks given as a list of lists of matrices.
+Single row-column components are automatically expanded to match the
+corresponding common row and column:
+@
+disp = putStr . dispf 2
+@
+>>> disp $ fromBlocks [[ident 5, 7, row[10,20]], [3, diagl[1,2,3], 0]]
+8x10
+1  0  0  0  0  7  7  7  10  20
+0  1  0  0  0  7  7  7  10  20
+0  0  1  0  0  7  7  7  10  20
+0  0  0  1  0  7  7  7  10  20
+0  0  0  0  1  7  7  7  10  20
+3  3  3  3  3  1  0  0   0   0
+3  3  3  3  3  0  2  0   0   0
+3  3  3  3  3  0  0  3   0   0
+-}
+fromBlocks :: Element t => [[Matrix t]] -> Matrix t
+fromBlocks = fromBlocksRaw . adaptBlocks
+fromBlocksRaw mms = joinVert . map joinHoriz $ mms
+adaptBlocks ms = ms' where
+    bc = case common length ms of
+          Just c -> c
+          Nothing -> error "fromBlocks requires rectangular [[Matrix]]"
+    rs = map (compatdim . map rows) ms
+    cs = map (compatdim . map cols) (transpose ms)
+    szs = sequence [rs,cs]
+    ms' = splitEvery bc $ zipWith g szs (concat ms)
+    g [Just nr,Just nc] m
+                | nr == r && nc == c = m
+                | r == 1 && c == 1 = matrixFromVector RowMajor nr nc (constantD x (nr*nc))
+                | r == 1 = fromRows (replicate nr (flatten m))
+                | otherwise = fromColumns (replicate nc (flatten m))
+      where
+        r = rows m
+        c = cols m
+        x = m@@>(0,0)
+    g _ _ = error "inconsistent dimensions in fromBlocks"
+--------------------------------------------------------------------------------
+{- | create a block diagonal matrix
+>>>  disp 2 $ diagBlock [konst 1 (2,2), konst 2 (3,5), col [5,7]]
+7x8
+1  1  0  0  0  0  0  0
+1  1  0  0  0  0  0  0
+0  0  2  2  2  2  2  0
+0  0  2  2  2  2  2  0
+0  0  2  2  2  2  2  0
+0  0  0  0  0  0  0  5
+0  0  0  0  0  0  0  7
+>>> diagBlock [(0><4)[], konst 2 (2,3)]  :: Matrix Double
+(2><7)
+ [ 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0
+ , 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0 ]
+-}
+diagBlock :: (Element t, Num t) => [Matrix t] -> Matrix t
+diagBlock ms = fromBlocks $ zipWith f ms [0..]
+  where
+    f m k = take n $ replicate k z ++ m : repeat z
+    n = length ms
+    z = (1><1) [0]
+--------------------------------------------------------------------------------
+-- | Reverse rows
+flipud :: Element t => Matrix t -> Matrix t
+flipud m = extractRows [r-1,r-2 .. 0] $ m
+  where
+    r = rows m
+-- | Reverse columns
+fliprl :: Element t => Matrix t -> Matrix t
+fliprl m = extractColumns [c-1,c-2 .. 0] $ m
+  where
+    c = cols m
+------------------------------------------------------------
+{- | creates a rectangular diagonal matrix:
+>>> diagRect 7 (fromList [10,20,30]) 4 5 :: Matrix Double
+(4><5)
+ [ 10.0,  7.0,  7.0, 7.0, 7.0
+ ,  7.0, 20.0,  7.0, 7.0, 7.0
+ ,  7.0,  7.0, 30.0, 7.0, 7.0
+ ,  7.0,  7.0,  7.0, 7.0, 7.0 ]
+-}
+diagRect :: (Storable t) => t -> Vector t -> Int -> Int -> Matrix t
+diagRect z v r c = ST.runSTMatrix $ do
+        m <- ST.newMatrix z r c
+        let d = min r c `min` (dim v)
+        mapM_ (\k -> ST.writeMatrix m k k (v@>k)) [0..d-1]
+        return m
+-- | extracts the diagonal from a rectangular matrix
+takeDiag :: (Element t) => Matrix t -> Vector t
+takeDiag m = fromList [flatten m `at` (k*cols m+k) | k <- [0 .. min (rows m) (cols m) -1]]
+------------------------------------------------------------
+{- | An easy way to create a matrix:
+>>> (2><3)[2,4,7,-3,11,0]
+(2><3)
+ [  2.0,  4.0, 7.0
+ , -3.0, 11.0, 0.0 ]
+This is the format produced by the instances of Show (Matrix a), which
+can also be used for input.
+The input list is explicitly truncated, so that it can
+safely be used with lists that are too long (like infinite lists).
+>>> (2><3)[1..]
+(2><3)
+ [ 1.0, 2.0, 3.0
+ , 4.0, 5.0, 6.0 ]
+-}
+(><) :: (Storable a) => Int -> Int -> [a] -> Matrix a
+r >< c = f where
+    f l | dim v == r*c = matrixFromVector RowMajor r c v
+        | otherwise    = error $ "inconsistent list size = "
+                                 ++show (dim v) ++" in ("++show r++"><"++show c++")"
+        where v = fromList $ take (r*c) l
+----------------------------------------------------------------
+-- | Creates a matrix with the first n rows of another matrix
+takeRows :: Element t => Int -> Matrix t -> Matrix t
+takeRows n mt = subMatrix (0,0) (n, cols mt) mt
+-- | Creates a copy of a matrix without the first n rows
+dropRows :: Element t => Int -> Matrix t -> Matrix t
+dropRows n mt = subMatrix (n,0) (rows mt - n, cols mt) mt
+-- |Creates a matrix with the first n columns of another matrix
+takeColumns :: Element t => Int -> Matrix t -> Matrix t
+takeColumns n mt = subMatrix (0,0) (rows mt, n) mt
+-- | Creates a copy of a matrix without the first n columns
+dropColumns :: Element t => Int -> Matrix t -> Matrix t
+dropColumns n mt = subMatrix (0,n) (rows mt, cols mt - n) mt
+----------------------------------------------------------------
+{- | Creates a 'Matrix' from a list of lists (considered as rows).
+>>> fromLists [[1,2],[3,4],[5,6]]
+(3><2)
+ [ 1.0, 2.0
+ , 3.0, 4.0
+ , 5.0, 6.0 ]
+-}
+fromLists :: Element t => [[t]] -> Matrix t
+fromLists = fromRows . map fromList
+-- | creates a 1-row matrix from a vector
+--
+-- >>> asRow (fromList [1..5])
+--  (1><5)
+--   [ 1.0, 2.0, 3.0, 4.0, 5.0 ]
+--
+asRow :: Storable a => Vector a -> Matrix a
+asRow v = reshape (dim v) v
+-- | creates a 1-column matrix from a vector
+--
+-- >>> asColumn (fromList [1..5])
+-- (5><1)
+--  [ 1.0
+--  , 2.0
+--  , 3.0
+--  , 4.0
+--  , 5.0 ]
+--
+asColumn :: Storable a => Vector a -> Matrix a
+asColumn = trans . asRow
+{- | creates a Matrix of the specified size using the supplied function to
+     to map the row\/column position to the value at that row\/column position.
+@> buildMatrix 3 4 (\\(r,c) -> fromIntegral r * fromIntegral c)
+(3><4)
+ [ 0.0, 0.0, 0.0, 0.0, 0.0
+ , 0.0, 1.0, 2.0, 3.0, 4.0
+ , 0.0, 2.0, 4.0, 6.0, 8.0]@
+Hilbert matrix of order N:
+@hilb n = buildMatrix n n (\\(i,j)->1/(fromIntegral i + fromIntegral j +1))@
+-}
+buildMatrix :: Element a => Int -> Int -> ((Int, Int) -> a) -> Matrix a
+buildMatrix rc cc f =
+    fromLists $ map (map f)
+        $ map (\ ri -> map (\ ci -> (ri, ci)) [0 .. (cc - 1)]) [0 .. (rc - 1)]
+-----------------------------------------------------
+fromArray2D :: (Storable e) => Array (Int, Int) e -> Matrix e
+fromArray2D m = (r><c) (elems m)
+    where ((r0,c0),(r1,c1)) = bounds m
+          r = r1-r0+1
+          c = c1-c0+1
+-- | rearranges the rows of a matrix according to the order given in a list of integers.
+extractRows :: Element t => [Int] -> Matrix t -> Matrix t
+extractRows [] m = emptyM 0 (cols m)
+extractRows l m = fromRows $ extract (toRows m) l
+  where
+    extract l' is = [l'!!i | i<- map verify is]
+    verify k
+        | k >= 0 && k < rows m = k
+        | otherwise = error $ "can't extract row "
+                           ++show k++" in list " ++ show l ++ " from matrix " ++ shSize m
+-- | rearranges the rows of a matrix according to the order given in a list of integers.
+extractColumns :: Element t => [Int] -> Matrix t -> Matrix t
+extractColumns l m = trans . extractRows (map verify l) . trans $ m
+  where
+    verify k
+        | k >= 0 && k < cols m = k
+        | otherwise = error $ "can't extract column "
+                           ++show k++" in list " ++ show l ++ " from matrix " ++ shSize m
+{- | creates matrix by repetition of a matrix a given number of rows and columns
+>>> repmat (ident 2) 2 3
+(4><6)
+ [ 1.0, 0.0, 1.0, 0.0, 1.0, 0.0
+ , 0.0, 1.0, 0.0, 1.0, 0.0, 1.0
+ , 1.0, 0.0, 1.0, 0.0, 1.0, 0.0
+ , 0.0, 1.0, 0.0, 1.0, 0.0, 1.0 ]
+-}
+repmat :: (Element t) => Matrix t -> Int -> Int -> Matrix t
+repmat m r c
+    | r == 0 || c == 0 = emptyM (r*rows m) (c*cols m)
+    | otherwise = fromBlocks $ replicate r $ replicate c $ m
+-- | A version of 'liftMatrix2' which automatically adapt matrices with a single row or column to match the dimensions of the other matrix.
+liftMatrix2Auto :: (Element t, Element a, Element b) => (Vector a -> Vector b -> Vector t) -> Matrix a -> Matrix b -> Matrix t
+liftMatrix2Auto f m1 m2
+    | compat' m1 m2 = lM f m1  m2
+    | ok            = lM f m1' m2'
+    | otherwise = error $ "nonconformable matrices in liftMatrix2Auto: " ++ shSize m1 ++ ", " ++ shSize m2
+  where
+    (r1,c1) = size m1
+    (r2,c2) = size m2
+    r = max r1 r2
+    c = max c1 c2
+    r0 = min r1 r2
+    c0 = min c1 c2
+    ok = r0 == 1 || r1 == r2 && c0 == 1 || c1 == c2
+    m1' = conformMTo (r,c) m1
+    m2' = conformMTo (r,c) m2
+-- FIXME do not flatten if equal order
+lM f m1 m2 = matrixFromVector
+                RowMajor
+                (max (rows m1) (rows m2))
+                (max (cols m1) (cols m2))
+                (f (flatten m1) (flatten m2))
+compat' :: Matrix a -> Matrix b -> Bool
+compat' m1 m2 = s1 == (1,1) || s2 == (1,1) || s1 == s2
+  where
+    s1 = size m1
+    s2 = size m2
+------------------------------------------------------------
+toBlockRows [r] m | r == rows m = [m]
+toBlockRows rs m = map (reshape (cols m)) (takesV szs (flatten m))
+    where szs = map (* cols m) rs
+toBlockCols [c] m | c == cols m = [m]
+toBlockCols cs m = map trans . toBlockRows cs . trans $ m
+-- | Partition a matrix into blocks with the given numbers of rows and columns.
+-- The remaining rows and columns are discarded.
+toBlocks :: (Element t) => [Int] -> [Int] -> Matrix t -> [[Matrix t]]
+toBlocks rs cs m = map (toBlockCols cs) . toBlockRows rs $ m
+-- | Fully partition a matrix into blocks of the same size. If the dimensions are not
+-- a multiple of the given size the last blocks will be smaller.
+toBlocksEvery :: (Element t) => Int -> Int -> Matrix t -> [[Matrix t]]
+toBlocksEvery r c m
+    | r < 1 || c < 1 = error $ "toBlocksEvery expects block sizes > 0, given "++show r++" and "++ show c
+    | otherwise = toBlocks rs cs m
+  where
+    (qr,rr) = rows m `divMod` r
+    (qc,rc) = cols m `divMod` c
+    rs = replicate qr r ++ if rr > 0 then [rr] else []
+    cs = replicate qc c ++ if rc > 0 then [rc] else []
+-------------------------------------------------------------------
+-- Given a column number and a function taking matrix indexes, returns
+-- a function which takes vector indexes (that can be used on the
+-- flattened matrix).
+mk :: Int -> ((Int, Int) -> t) -> (Int -> t)
+mk c g = \k -> g (divMod k c)
+{- |
+>>> mapMatrixWithIndexM_ (\(i,j) v -> printf "m[%d,%d] = %.f\n" i j v :: IO()) ((2><3)[1 :: Double ..])
+m[0,0] = 1
+m[0,1] = 2
+m[0,2] = 3
+m[1,0] = 4
+m[1,1] = 5
+m[1,2] = 6
+-}
+mapMatrixWithIndexM_
+  :: (Element a, Num a, Monad m) =>
+      ((Int, Int) -> a -> m ()) -> Matrix a -> m ()
+mapMatrixWithIndexM_ g m = mapVectorWithIndexM_ (mk c g) . flatten $ m
+  where
+    c = cols m
+{- |
+>>> mapMatrixWithIndexM (\(i,j) v -> Just $ 100*v + 10*fromIntegral i + fromIntegral j) (ident 3:: Matrix Double)
+Just (3><3)
+ [ 100.0,   1.0,   2.0
+ ,  10.0, 111.0,  12.0
+ ,  20.0,  21.0, 122.0 ]
+-}
+mapMatrixWithIndexM
+  :: (Element a, Storable b, Monad m) =>
+      ((Int, Int) -> a -> m b) -> Matrix a -> m (Matrix b)
+mapMatrixWithIndexM g m = liftM (reshape c) . mapVectorWithIndexM (mk c g) . flatten $ m 
+    where
+      c = cols m
+{- |
+>>> mapMatrixWithIndex (\\(i,j) v -> 100*v + 10*fromIntegral i + fromIntegral j) (ident 3:: Matrix Double)
+(3><3)
+ [ 100.0,   1.0,   2.0
+ ,  10.0, 111.0,  12.0
+ ,  20.0,  21.0, 122.0 ]
+ -}
+mapMatrixWithIndex
+  :: (Element a, Storable b) =>
+      ((Int, Int) -> a -> b) -> Matrix a -> Matrix b
+mapMatrixWithIndex g m = reshape c . mapVectorWithIndex (mk c g) . flatten $ m
+    where
+      c = cols m
+mapMatrix :: (Storable a, Storable b) => (a -> b) -> Matrix a -> Matrix b
+mapMatrix f = liftMatrix (mapVector f)
diff --git a/packages/base/src/Data/Packed/ST.hs b/packages/base/src/Data/Packed/ST.hs
new file mode 100644
index 0000000..dae457c
--- /dev/null
+++ b/packages/base/src/Data/Packed/ST.hs
@@ -0,0 +1,178 @@
+{-# LANGUAGE CPP #-}
+{-# LANGUAGE TypeOperators #-}
+{-# LANGUAGE Rank2Types    #-}
+{-# LANGUAGE BangPatterns  #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Packed.ST
+-- Copyright   :  (c) Alberto Ruiz 2008
+-- License     :  BSD3
+-- Maintainer  :  Alberto Ruiz
+-- Stability   :  provisional
+-- Portability :  portable
+--
+-- In-place manipulation inside the ST monad.
+-- See examples/inplace.hs in the distribution.
+--
+-----------------------------------------------------------------------------
+module Data.Packed.ST (
+    -- * Mutable Vectors
+    STVector, newVector, thawVector, freezeVector, runSTVector,
+    readVector, writeVector, modifyVector, liftSTVector,
+    -- * Mutable Matrices
+    STMatrix, newMatrix, thawMatrix, freezeMatrix, runSTMatrix,
+    readMatrix, writeMatrix, modifyMatrix, liftSTMatrix,
+    -- * Unsafe functions
+    newUndefinedVector,
+    unsafeReadVector, unsafeWriteVector,
+    unsafeThawVector, unsafeFreezeVector,
+    newUndefinedMatrix,
+    unsafeReadMatrix, unsafeWriteMatrix,
+    unsafeThawMatrix, unsafeFreezeMatrix
+) where
+import Data.Packed.Internal
+import Control.Monad.ST(ST, runST)
+import Foreign.Storable(Storable, peekElemOff, pokeElemOff)
+#if MIN_VERSION_base(4,4,0)
+import Control.Monad.ST.Unsafe(unsafeIOToST)
+#else
+import Control.Monad.ST(unsafeIOToST)
+#endif
+{-# INLINE ioReadV #-}
+ioReadV :: Storable t => Vector t -> Int -> IO t
+ioReadV v k = unsafeWith v $ \s -> peekElemOff s k
+{-# INLINE ioWriteV #-}
+ioWriteV :: Storable t => Vector t -> Int -> t -> IO ()
+ioWriteV v k x = unsafeWith v $ \s -> pokeElemOff s k x
+newtype STVector s t = STVector (Vector t)
+thawVector :: Storable t => Vector t -> ST s (STVector s t)
+thawVector = unsafeIOToST . fmap STVector . cloneVector
+unsafeThawVector :: Storable t => Vector t -> ST s (STVector s t)
+unsafeThawVector = unsafeIOToST . return . STVector
+runSTVector :: Storable t => (forall s . ST s (STVector s t)) -> Vector t
+runSTVector st = runST (st >>= unsafeFreezeVector)
+{-# INLINE unsafeReadVector #-}
+unsafeReadVector :: Storable t => STVector s t -> Int -> ST s t
+unsafeReadVector   (STVector x) = unsafeIOToST . ioReadV x
+{-# INLINE unsafeWriteVector #-}
+unsafeWriteVector :: Storable t => STVector s t -> Int -> t -> ST s ()
+unsafeWriteVector  (STVector x) k = unsafeIOToST . ioWriteV x k
+{-# INLINE modifyVector #-}
+modifyVector :: (Storable t) => STVector s t -> Int -> (t -> t) -> ST s ()
+modifyVector x k f = readVector x k >>= return . f >>= unsafeWriteVector x k
+liftSTVector :: (Storable t) => (Vector t -> a) -> STVector s1 t -> ST s2 a
+liftSTVector f (STVector x) = unsafeIOToST . fmap f . cloneVector $ x
+freezeVector :: (Storable t) => STVector s1 t -> ST s2 (Vector t)
+freezeVector v = liftSTVector id v
+unsafeFreezeVector :: (Storable t) => STVector s1 t -> ST s2 (Vector t)
+unsafeFreezeVector (STVector x) = unsafeIOToST . return $ x
+{-# INLINE safeIndexV #-}
+safeIndexV f (STVector v) k
+    | k < 0 || k>= dim v = error $ "out of range error in vector (dim="
+                                   ++show (dim v)++", pos="++show k++")"
+    | otherwise = f (STVector v) k
+{-# INLINE readVector #-}
+readVector :: Storable t => STVector s t -> Int -> ST s t
+readVector = safeIndexV unsafeReadVector
+{-# INLINE writeVector #-}
+writeVector :: Storable t => STVector s t -> Int -> t -> ST s ()
+writeVector = safeIndexV unsafeWriteVector
+newUndefinedVector :: Storable t => Int -> ST s (STVector s t)
+newUndefinedVector = unsafeIOToST . fmap STVector . createVector
+{-# INLINE newVector #-}
+newVector :: Storable t => t -> Int -> ST s (STVector s t)
+newVector x n = do
+    v <- newUndefinedVector n
+    let go (-1) = return v
+        go !k = unsafeWriteVector v k x >> go (k-1 :: Int)
+    go (n-1)
+-------------------------------------------------------------------------
+{-# INLINE ioReadM #-}
+ioReadM :: Storable t => Matrix t -> Int -> Int -> IO t
+ioReadM (Matrix _ nc cv RowMajor) r c = ioReadV cv (r*nc+c)
+ioReadM (Matrix nr _ fv ColumnMajor) r c = ioReadV fv (c*nr+r)
+{-# INLINE ioWriteM #-}
+ioWriteM :: Storable t => Matrix t -> Int -> Int -> t -> IO ()
+ioWriteM (Matrix _ nc cv RowMajor) r c val = ioWriteV cv (r*nc+c) val
+ioWriteM (Matrix nr _ fv ColumnMajor) r c val = ioWriteV fv (c*nr+r) val
+newtype STMatrix s t = STMatrix (Matrix t)
+thawMatrix :: Storable t => Matrix t -> ST s (STMatrix s t)
+thawMatrix = unsafeIOToST . fmap STMatrix . cloneMatrix
+unsafeThawMatrix :: Storable t => Matrix t -> ST s (STMatrix s t)
+unsafeThawMatrix = unsafeIOToST . return . STMatrix
+runSTMatrix :: Storable t => (forall s . ST s (STMatrix s t)) -> Matrix t
+runSTMatrix st = runST (st >>= unsafeFreezeMatrix)
+{-# INLINE unsafeReadMatrix #-}
+unsafeReadMatrix :: Storable t => STMatrix s t -> Int -> Int -> ST s t
+unsafeReadMatrix   (STMatrix x) r = unsafeIOToST . ioReadM x r
+{-# INLINE unsafeWriteMatrix #-}
+unsafeWriteMatrix :: Storable t => STMatrix s t -> Int -> Int -> t -> ST s ()
+unsafeWriteMatrix  (STMatrix x) r c = unsafeIOToST . ioWriteM x r c
+{-# INLINE modifyMatrix #-}
+modifyMatrix :: (Storable t) => STMatrix s t -> Int -> Int -> (t -> t) -> ST s ()
+modifyMatrix x r c f = readMatrix x r c >>= return . f >>= unsafeWriteMatrix x r c
+liftSTMatrix :: (Storable t) => (Matrix t -> a) -> STMatrix s1 t -> ST s2 a
+liftSTMatrix f (STMatrix x) = unsafeIOToST . fmap f . cloneMatrix $ x
+unsafeFreezeMatrix :: (Storable t) => STMatrix s1 t -> ST s2 (Matrix t)
+unsafeFreezeMatrix (STMatrix x) = unsafeIOToST . return $ x
+freezeMatrix :: (Storable t) => STMatrix s1 t -> ST s2 (Matrix t)
+freezeMatrix m = liftSTMatrix id m
+cloneMatrix (Matrix r c d o) = cloneVector d >>= return . (\d' -> Matrix r c d' o)
+{-# INLINE safeIndexM #-}
+safeIndexM f (STMatrix m) r c
+    | r<0 || r>=rows m ||
+      c<0 || c>=cols m = error $ "out of range error in matrix (size="
+                                 ++show (rows m,cols m)++", pos="++show (r,c)++")"
+    | otherwise = f (STMatrix m) r c
+{-# INLINE readMatrix #-}
+readMatrix :: Storable t => STMatrix s t -> Int -> Int -> ST s t
+readMatrix = safeIndexM unsafeReadMatrix
+{-# INLINE writeMatrix #-}
+writeMatrix :: Storable t => STMatrix s t -> Int -> Int -> t -> ST s ()
+writeMatrix = safeIndexM unsafeWriteMatrix
+newUndefinedMatrix :: Storable t => MatrixOrder -> Int -> Int -> ST s (STMatrix s t)
+newUndefinedMatrix ord r c = unsafeIOToST $ fmap STMatrix $ createMatrix ord r c
+{-# NOINLINE newMatrix #-}
+newMatrix :: Storable t => t -> Int -> Int -> ST s (STMatrix s t)
+newMatrix v r c = unsafeThawMatrix $ reshape c $ runSTVector $ newVector v (r*c)
diff --git a/packages/base/src/Data/Packed/Vector.hs b/packages/base/src/Data/Packed/Vector.hs
new file mode 100644
index 0000000..b5a4318
--- /dev/null
+++ b/packages/base/src/Data/Packed/Vector.hs
@@ -0,0 +1,96 @@
+{-# LANGUAGE FlexibleContexts #-}
+{-# LANGUAGE CPP #-}
+-----------------------------------------------------------------------------
+-- |
+-- Module      :  Data.Packed.Vector
+-- Copyright   :  (c) Alberto Ruiz 2007-10
+-- License     :  GPL
+--
+-- Maintainer  :  Alberto Ruiz <aruiz@um.es>
+-- Stability   :  provisional
+--
+-- 1D arrays suitable for numeric computations using external libraries.
+--
+-- This module provides basic functions for manipulation of structure.
+--
+-----------------------------------------------------------------------------
+{-# OPTIONS_HADDOCK hide #-}
+module Data.Packed.Vector (
+    Vector,
+    fromList, (|>), toList, buildVector,
+    dim, (@>),
+    subVector, takesV, vjoin, join,
+    mapVector, mapVectorWithIndex, zipVector, zipVectorWith, unzipVector, unzipVectorWith,
+    mapVectorM, mapVectorM_, mapVectorWithIndexM, mapVectorWithIndexM_,
+    foldLoop, foldVector, foldVectorG, foldVectorWithIndex
+) where
+import Data.Packed.Internal.Vector
+import Foreign.Storable
+-------------------------------------------------------------------
+#ifdef BINARY
+import Data.Binary
+import Control.Monad(replicateM)
+-- a 64K cache, with a Double taking 13 bytes in Bytestring,
+-- implies a chunk size of 5041
+chunk :: Int
+chunk = 5000
+chunks :: Int -> [Int]
+chunks d = let c = d `div` chunk
+               m = d `mod` chunk
+           in if m /= 0 then reverse (m:(replicate c chunk)) else (replicate c chunk)  
+putVector v = do
+              let d = dim v
+              mapM_ (\i -> put $ v @> i) [0..(d-1)]
+getVector d = do
+              xs <- replicateM d get
+              return $! fromList xs
+instance (Binary a, Storable a) => Binary (Vector a) where
+    put v = do
+            let d = dim v
+            put d
+            mapM_ putVector $! takesV (chunks d) v
+    get = do
+          d <- get
+          vs <- mapM getVector $ chunks d
+          return $! vjoin vs
+#endif
+-------------------------------------------------------------------
+{- | creates a Vector of the specified length using the supplied function to
+     to map the index to the value at that index.
+@> buildVector 4 fromIntegral
+4 |> [0.0,1.0,2.0,3.0]@
+-}
+buildVector :: Storable a => Int -> (Int -> a) -> Vector a
+buildVector len f =
+    fromList $ map f [0 .. (len - 1)]
+-- | zip for Vectors
+zipVector :: (Storable a, Storable b, Storable (a,b)) => Vector a -> Vector b -> Vector (a,b)
+zipVector = zipVectorWith (,)
+-- | unzip for Vectors
+unzipVector :: (Storable a, Storable b, Storable (a,b)) => Vector (a,b) -> (Vector a,Vector b)
+unzipVector = unzipVectorWith id
+-------------------------------------------------------------------
+{-# DEPRECATED join "use vjoin or Data.Vector.concat" #-}
+join ::  Storable t => [Vector t] -> Vector t
+join = vjoin