summaryrefslogtreecommitdiff
path: root/examples/pca2.hs
diff options
context:
space:
mode:
authorAlberto Ruiz <aruiz@um.es>2014-05-21 10:30:55 +0200
committerAlberto Ruiz <aruiz@um.es>2014-05-21 10:30:55 +0200
commit197e88c3b56d28840217010a2871c6ea3a4dd1a4 (patch)
tree825be9d6c9d87d23f7e5497c0133d11d52c63535 /examples/pca2.hs
parente07c3dee7235496b71a89233106d93f6cc94ada1 (diff)
update dependencies, move examples etc
Diffstat (limited to 'examples/pca2.hs')
-rw-r--r--examples/pca2.hs65
1 files changed, 65 insertions, 0 deletions
diff --git a/examples/pca2.hs b/examples/pca2.hs
new file mode 100644
index 0000000..e7ea95f
--- /dev/null
+++ b/examples/pca2.hs
@@ -0,0 +1,65 @@
1-- Improved PCA, including illustrative graphics
2
3import Numeric.LinearAlgebra
4import Graphics.Plot
5import System.Directory(doesFileExist)
6import System.Process(system)
7import Control.Monad(when)
8
9type Vec = Vector Double
10type Mat = Matrix Double
11
12-- Vector with the mean value of the columns of a matrix
13mean a = constant (recip . fromIntegral . rows $ a) (rows a) <> a
14
15-- covariance matrix of a list of observations stored as rows
16cov x = (trans xc <> xc) / fromIntegral (rows x - 1)
17 where xc = x - asRow (mean x)
18
19
20type Stat = (Vec, [Double], Mat)
21-- 1st and 2nd order statistics of a dataset (mean, eigenvalues and eigenvectors of cov)
22stat :: Mat -> Stat
23stat x = (m, toList s, trans v) where
24 m = mean x
25 (s,v) = eigSH' (cov x)
26
27-- creates the compression and decompression functions from the desired reconstruction
28-- quality and the statistics of a data set
29pca :: Double -> Stat -> (Vec -> Vec , Vec -> Vec)
30pca prec (m,s,v) = (encode,decode)
31 where
32 encode x = vp <> (x - m)
33 decode x = x <> vp + m
34 vp = takeRows n v
35 n = 1 + (length $ fst $ span (< (prec'*sum s)) $ cumSum s)
36 cumSum = tail . scanl (+) 0.0
37 prec' = if prec <=0.0 || prec >= 1.0
38 then error "the precision in pca must be 0<prec<1"
39 else prec
40
41shdigit :: Vec -> IO ()
42shdigit v = imshow (reshape 28 (-v))
43
44-- shows the effect of a given reconstruction quality on a test vector
45test :: Stat -> Double -> Vec -> IO ()
46test st prec x = do
47 let (pe,pd) = pca prec st
48 let y = pe x
49 print $ dim y
50 shdigit (pd y)
51
52main = do
53 ok <- doesFileExist ("mnist.txt")
54 when (not ok) $ do
55 putStrLn "\nTrying to download test datafile..."
56 system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz")
57 system("gunzip mnist.txt.gz")
58 return ()
59 m <- loadMatrix "mnist.txt"
60 let xs = takeColumns (cols m -1) m
61 let x = toRows xs !! 4 -- an arbitrary test vector
62 shdigit x
63 let st = stat xs
64 test st 0.90 x
65 test st 0.50 x