diff options
Diffstat (limited to 'examples/pca1.hs')
-rw-r--r-- | examples/pca1.hs | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/examples/pca1.hs b/examples/pca1.hs new file mode 100644 index 0000000..2c5074d --- /dev/null +++ b/examples/pca1.hs | |||
@@ -0,0 +1,45 @@ | |||
1 | -- Principal component analysis | ||
2 | |||
3 | import LinearAlgebra | ||
4 | import System.Directory(doesFileExist) | ||
5 | import System(system) | ||
6 | import Control.Monad(when) | ||
7 | |||
8 | type Vec = Vector Double | ||
9 | type Mat = Matrix Double | ||
10 | |||
11 | sumColumns m = constant 1 (rows m) <> m | ||
12 | |||
13 | -- Vec with the mean value of the columns of a Mat | ||
14 | mean x = sumColumns x / fromIntegral (rows x) | ||
15 | |||
16 | -- covariance Mat of a list of observations as rows of a Mat | ||
17 | cov x = (trans xc <> xc) / fromIntegral (rows x -1) | ||
18 | where xc = center x | ||
19 | center m = m - constant 1 (rows m) `outer` mean m | ||
20 | |||
21 | -- creates the compression and decompression functions from the desired number of components | ||
22 | pca :: Int -> Mat -> (Vec -> Vec , Vec -> Vec) | ||
23 | pca n dataSet = (encode,decode) | ||
24 | where | ||
25 | encode x = vp <> (x - m) | ||
26 | decode x = x <> vp + m | ||
27 | m = mean dataSet | ||
28 | c = cov dataSet | ||
29 | (_,v) = eigS c | ||
30 | vp = takeRows n (trans v) | ||
31 | |||
32 | main = do | ||
33 | ok <- doesFileExist ("mnist.txt") | ||
34 | when (not ok) $ do | ||
35 | putStrLn "\nTrying to download test datafile..." | ||
36 | system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz") | ||
37 | system("gunzip mnist.txt.gz") | ||
38 | return () | ||
39 | m <- fromFile "mnist.txt" (5000,785) | ||
40 | let xs = takeColumns (cols m -1) m -- the last column is the digit type (class label) | ||
41 | let x = toRows xs !! 4 -- an arbitrary test Vec | ||
42 | let (pe,pd) = pca 10 xs | ||
43 | let y = pe x | ||
44 | print y -- compressed version | ||
45 | print $ norm (x - pd y) / norm x --reconstruction quality | ||