summaryrefslogtreecommitdiff
path: root/examples/pca2.hs
diff options
context:
space:
mode:
authorAlberto Ruiz <aruiz@um.es>2007-09-21 18:28:08 +0000
committerAlberto Ruiz <aruiz@um.es>2007-09-21 18:28:08 +0000
commit0198366bba7a5f2d67338633f9eb90889ffc31b2 (patch)
tree4897d90233b333ee2092e63a4b74c7bcb2d22577 /examples/pca2.hs
parentd4cb2692f9dae748da23371057a983deca4b2f80 (diff)
add examples
Diffstat (limited to 'examples/pca2.hs')
-rw-r--r--examples/pca2.hs67
1 files changed, 67 insertions, 0 deletions
diff --git a/examples/pca2.hs b/examples/pca2.hs
new file mode 100644
index 0000000..bd498e4
--- /dev/null
+++ b/examples/pca2.hs
@@ -0,0 +1,67 @@
1-- Improved PCA, including illustrative graphics
2
3import LinearAlgebra
4import Graphics.Plot
5import System.Directory(doesFileExist)
6import System(system)
7import Control.Monad(when)
8
9type Vec = Vector Double
10type Mat = Matrix Double
11
12sumColumns m = constant 1 (rows m) <> m
13
14-- Vector with the mean value of the columns of a Mat
15mean x = sumColumns x / fromIntegral (rows x)
16
17-- covariance matrix of a list of observations as rows of a matrix
18cov x = (trans xc <> xc) / fromIntegral (rows x -1)
19 where xc = center x
20 center m = m - constant 1 (rows m) `outer` mean m
21
22type Stat = (Vec, [Double], Mat)
23-- 1st and 2nd order statistics of a dataset (mean, eigenvalues and eigenvectors of cov)
24stat :: Mat -> Stat
25stat x = (m, toList s, trans v) where
26 m = mean x
27 (s,v) = eigS (cov x)
28
29-- creates the compression and decompression functions from the desired reconstruction
30-- quality and the statistics of a data set
31pca :: Double -> Stat -> (Vec -> Vec , Vec -> Vec)
32pca prec (m,s,v) = (encode,decode)
33 where
34 encode x = vp <> (x - m)
35 decode x = x <> vp + m
36 vp = takeRows n v
37 n = 1 + (length $ fst $ span (< (prec'*sum s)) $ cumSum s)
38 cumSum = tail . scanl (+) 0.0
39 prec' = if prec <=0.0 || prec >= 1.0
40 then error "the precision in pca must be 0<prec<1"
41 else prec
42
43shdigit :: Vec -> IO ()
44shdigit v = imshow (reshape 28 (-v))
45
46-- shows the effect of a given reconstruction quality on a test vector
47test :: Stat -> Double -> Vec -> IO ()
48test st prec x = do
49 let (pe,pd) = pca prec st
50 let y = pe x
51 print $ dim y
52 shdigit (pd y)
53
54main = do
55 ok <- doesFileExist ("mnist.txt")
56 when (not ok) $ do
57 putStrLn "\nTrying to download test datafile..."
58 system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz")
59 system("gunzip mnist.txt.gz")
60 return ()
61 m <- fromFile "mnist.txt" (5000,785)
62 let xs = takeColumns (cols m -1) m
63 let x = toRows xs !! 4 -- an arbitrary test vector
64 shdigit x
65 let st = stat xs
66 test st 0.90 x
67 test st 0.50 x