summaryrefslogtreecommitdiff
path: root/packages/hmatrix/examples/pca1.hs
diff options
context:
space:
mode:
Diffstat (limited to 'packages/hmatrix/examples/pca1.hs')
-rw-r--r--packages/hmatrix/examples/pca1.hs46
1 files changed, 46 insertions, 0 deletions
diff --git a/packages/hmatrix/examples/pca1.hs b/packages/hmatrix/examples/pca1.hs
new file mode 100644
index 0000000..a11eba9
--- /dev/null
+++ b/packages/hmatrix/examples/pca1.hs
@@ -0,0 +1,46 @@
1-- Principal component analysis
2
3import Numeric.LinearAlgebra
4import System.Directory(doesFileExist)
5import System.Process(system)
6import Control.Monad(when)
7
8type Vec = Vector Double
9type Mat = Matrix Double
10
11
12-- Vector with the mean value of the columns of a matrix
13mean a = constant (recip . fromIntegral . rows $ a) (rows a) <> a
14
15-- covariance matrix of a list of observations stored as rows
16cov x = (trans xc <> xc) / fromIntegral (rows x - 1)
17 where xc = x - asRow (mean x)
18
19
20-- creates the compression and decompression functions from the desired number of components
21pca :: Int -> Mat -> (Vec -> Vec , Vec -> Vec)
22pca n dataSet = (encode,decode)
23 where
24 encode x = vp <> (x - m)
25 decode x = x <> vp + m
26 m = mean dataSet
27 c = cov dataSet
28 (_,v) = eigSH' c
29 vp = takeRows n (trans v)
30
31norm = pnorm PNorm2
32
33main = do
34 ok <- doesFileExist ("mnist.txt")
35 when (not ok) $ do
36 putStrLn "\nTrying to download test datafile..."
37 system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz")
38 system("gunzip mnist.txt.gz")
39 return ()
40 m <- loadMatrix "mnist.txt" -- fromFile "mnist.txt" (5000,785)
41 let xs = takeColumns (cols m -1) m -- the last column is the digit type (class label)
42 let x = toRows xs !! 4 -- an arbitrary test Vec
43 let (pe,pd) = pca 10 xs
44 let y = pe x
45 print y -- compressed version
46 print $ norm (x - pd y) / norm x --reconstruction quality