diff options
author | Alberto Ruiz <aruiz@um.es> | 2014-05-21 10:30:55 +0200 |
---|---|---|
committer | Alberto Ruiz <aruiz@um.es> | 2014-05-21 10:30:55 +0200 |
commit | 197e88c3b56d28840217010a2871c6ea3a4dd1a4 (patch) | |
tree | 825be9d6c9d87d23f7e5497c0133d11d52c63535 /examples/pca2.hs | |
parent | e07c3dee7235496b71a89233106d93f6cc94ada1 (diff) |
update dependencies, move examples etc
Diffstat (limited to 'examples/pca2.hs')
-rw-r--r-- | examples/pca2.hs | 65 |
1 files changed, 65 insertions, 0 deletions
diff --git a/examples/pca2.hs b/examples/pca2.hs new file mode 100644 index 0000000..e7ea95f --- /dev/null +++ b/examples/pca2.hs | |||
@@ -0,0 +1,65 @@ | |||
1 | -- Improved PCA, including illustrative graphics | ||
2 | |||
3 | import Numeric.LinearAlgebra | ||
4 | import Graphics.Plot | ||
5 | import System.Directory(doesFileExist) | ||
6 | import System.Process(system) | ||
7 | import Control.Monad(when) | ||
8 | |||
9 | type Vec = Vector Double | ||
10 | type Mat = Matrix Double | ||
11 | |||
12 | -- Vector with the mean value of the columns of a matrix | ||
13 | mean a = constant (recip . fromIntegral . rows $ a) (rows a) <> a | ||
14 | |||
15 | -- covariance matrix of a list of observations stored as rows | ||
16 | cov x = (trans xc <> xc) / fromIntegral (rows x - 1) | ||
17 | where xc = x - asRow (mean x) | ||
18 | |||
19 | |||
20 | type Stat = (Vec, [Double], Mat) | ||
21 | -- 1st and 2nd order statistics of a dataset (mean, eigenvalues and eigenvectors of cov) | ||
22 | stat :: Mat -> Stat | ||
23 | stat x = (m, toList s, trans v) where | ||
24 | m = mean x | ||
25 | (s,v) = eigSH' (cov x) | ||
26 | |||
27 | -- creates the compression and decompression functions from the desired reconstruction | ||
28 | -- quality and the statistics of a data set | ||
29 | pca :: Double -> Stat -> (Vec -> Vec , Vec -> Vec) | ||
30 | pca prec (m,s,v) = (encode,decode) | ||
31 | where | ||
32 | encode x = vp <> (x - m) | ||
33 | decode x = x <> vp + m | ||
34 | vp = takeRows n v | ||
35 | n = 1 + (length $ fst $ span (< (prec'*sum s)) $ cumSum s) | ||
36 | cumSum = tail . scanl (+) 0.0 | ||
37 | prec' = if prec <=0.0 || prec >= 1.0 | ||
38 | then error "the precision in pca must be 0<prec<1" | ||
39 | else prec | ||
40 | |||
41 | shdigit :: Vec -> IO () | ||
42 | shdigit v = imshow (reshape 28 (-v)) | ||
43 | |||
44 | -- shows the effect of a given reconstruction quality on a test vector | ||
45 | test :: Stat -> Double -> Vec -> IO () | ||
46 | test st prec x = do | ||
47 | let (pe,pd) = pca prec st | ||
48 | let y = pe x | ||
49 | print $ dim y | ||
50 | shdigit (pd y) | ||
51 | |||
52 | main = do | ||
53 | ok <- doesFileExist ("mnist.txt") | ||
54 | when (not ok) $ do | ||
55 | putStrLn "\nTrying to download test datafile..." | ||
56 | system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz") | ||
57 | system("gunzip mnist.txt.gz") | ||
58 | return () | ||
59 | m <- loadMatrix "mnist.txt" | ||
60 | let xs = takeColumns (cols m -1) m | ||
61 | let x = toRows xs !! 4 -- an arbitrary test vector | ||
62 | shdigit x | ||
63 | let st = stat xs | ||
64 | test st 0.90 x | ||
65 | test st 0.50 x | ||