diff options
Diffstat (limited to 'packages/hmatrix/examples/pca1.hs')
-rw-r--r-- | packages/hmatrix/examples/pca1.hs | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/packages/hmatrix/examples/pca1.hs b/packages/hmatrix/examples/pca1.hs new file mode 100644 index 0000000..a11eba9 --- /dev/null +++ b/packages/hmatrix/examples/pca1.hs | |||
@@ -0,0 +1,46 @@ | |||
1 | -- Principal component analysis | ||
2 | |||
3 | import Numeric.LinearAlgebra | ||
4 | import System.Directory(doesFileExist) | ||
5 | import System.Process(system) | ||
6 | import Control.Monad(when) | ||
7 | |||
8 | type Vec = Vector Double | ||
9 | type Mat = Matrix Double | ||
10 | |||
11 | |||
12 | -- Vector with the mean value of the columns of a matrix | ||
13 | mean a = constant (recip . fromIntegral . rows $ a) (rows a) <> a | ||
14 | |||
15 | -- covariance matrix of a list of observations stored as rows | ||
16 | cov x = (trans xc <> xc) / fromIntegral (rows x - 1) | ||
17 | where xc = x - asRow (mean x) | ||
18 | |||
19 | |||
20 | -- creates the compression and decompression functions from the desired number of components | ||
21 | pca :: Int -> Mat -> (Vec -> Vec , Vec -> Vec) | ||
22 | pca n dataSet = (encode,decode) | ||
23 | where | ||
24 | encode x = vp <> (x - m) | ||
25 | decode x = x <> vp + m | ||
26 | m = mean dataSet | ||
27 | c = cov dataSet | ||
28 | (_,v) = eigSH' c | ||
29 | vp = takeRows n (trans v) | ||
30 | |||
31 | norm = pnorm PNorm2 | ||
32 | |||
33 | main = do | ||
34 | ok <- doesFileExist ("mnist.txt") | ||
35 | when (not ok) $ do | ||
36 | putStrLn "\nTrying to download test datafile..." | ||
37 | system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz") | ||
38 | system("gunzip mnist.txt.gz") | ||
39 | return () | ||
40 | m <- loadMatrix "mnist.txt" -- fromFile "mnist.txt" (5000,785) | ||
41 | let xs = takeColumns (cols m -1) m -- the last column is the digit type (class label) | ||
42 | let x = toRows xs !! 4 -- an arbitrary test Vec | ||
43 | let (pe,pd) = pca 10 xs | ||
44 | let y = pe x | ||
45 | print y -- compressed version | ||
46 | print $ norm (x - pd y) / norm x --reconstruction quality | ||