diff options
author | Alberto Ruiz <aruiz@um.es> | 2007-09-21 18:28:08 +0000 |
---|---|---|
committer | Alberto Ruiz <aruiz@um.es> | 2007-09-21 18:28:08 +0000 |
commit | 0198366bba7a5f2d67338633f9eb90889ffc31b2 (patch) | |
tree | 4897d90233b333ee2092e63a4b74c7bcb2d22577 /examples/pca2.hs | |
parent | d4cb2692f9dae748da23371057a983deca4b2f80 (diff) |
add examples
Diffstat (limited to 'examples/pca2.hs')
-rw-r--r-- | examples/pca2.hs | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/examples/pca2.hs b/examples/pca2.hs new file mode 100644 index 0000000..bd498e4 --- /dev/null +++ b/examples/pca2.hs | |||
@@ -0,0 +1,67 @@ | |||
1 | -- Improved PCA, including illustrative graphics | ||
2 | |||
3 | import LinearAlgebra | ||
4 | import Graphics.Plot | ||
5 | import System.Directory(doesFileExist) | ||
6 | import System(system) | ||
7 | import Control.Monad(when) | ||
8 | |||
9 | type Vec = Vector Double | ||
10 | type Mat = Matrix Double | ||
11 | |||
12 | sumColumns m = constant 1 (rows m) <> m | ||
13 | |||
14 | -- Vector with the mean value of the columns of a Mat | ||
15 | mean x = sumColumns x / fromIntegral (rows x) | ||
16 | |||
17 | -- covariance matrix of a list of observations as rows of a matrix | ||
18 | cov x = (trans xc <> xc) / fromIntegral (rows x -1) | ||
19 | where xc = center x | ||
20 | center m = m - constant 1 (rows m) `outer` mean m | ||
21 | |||
22 | type Stat = (Vec, [Double], Mat) | ||
23 | -- 1st and 2nd order statistics of a dataset (mean, eigenvalues and eigenvectors of cov) | ||
24 | stat :: Mat -> Stat | ||
25 | stat x = (m, toList s, trans v) where | ||
26 | m = mean x | ||
27 | (s,v) = eigS (cov x) | ||
28 | |||
29 | -- creates the compression and decompression functions from the desired reconstruction | ||
30 | -- quality and the statistics of a data set | ||
31 | pca :: Double -> Stat -> (Vec -> Vec , Vec -> Vec) | ||
32 | pca prec (m,s,v) = (encode,decode) | ||
33 | where | ||
34 | encode x = vp <> (x - m) | ||
35 | decode x = x <> vp + m | ||
36 | vp = takeRows n v | ||
37 | n = 1 + (length $ fst $ span (< (prec'*sum s)) $ cumSum s) | ||
38 | cumSum = tail . scanl (+) 0.0 | ||
39 | prec' = if prec <=0.0 || prec >= 1.0 | ||
40 | then error "the precision in pca must be 0<prec<1" | ||
41 | else prec | ||
42 | |||
43 | shdigit :: Vec -> IO () | ||
44 | shdigit v = imshow (reshape 28 (-v)) | ||
45 | |||
46 | -- shows the effect of a given reconstruction quality on a test vector | ||
47 | test :: Stat -> Double -> Vec -> IO () | ||
48 | test st prec x = do | ||
49 | let (pe,pd) = pca prec st | ||
50 | let y = pe x | ||
51 | print $ dim y | ||
52 | shdigit (pd y) | ||
53 | |||
54 | main = do | ||
55 | ok <- doesFileExist ("mnist.txt") | ||
56 | when (not ok) $ do | ||
57 | putStrLn "\nTrying to download test datafile..." | ||
58 | system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz") | ||
59 | system("gunzip mnist.txt.gz") | ||
60 | return () | ||
61 | m <- fromFile "mnist.txt" (5000,785) | ||
62 | let xs = takeColumns (cols m -1) m | ||
63 | let x = toRows xs !! 4 -- an arbitrary test vector | ||
64 | shdigit x | ||
65 | let st = stat xs | ||
66 | test st 0.90 x | ||
67 | test st 0.50 x | ||