summaryrefslogtreecommitdiff
path: root/examples/pca1.hs
diff options
context:
space:
mode:
authorAlberto Ruiz <aruiz@um.es>2007-09-21 18:28:08 +0000
committerAlberto Ruiz <aruiz@um.es>2007-09-21 18:28:08 +0000
commit0198366bba7a5f2d67338633f9eb90889ffc31b2 (patch)
tree4897d90233b333ee2092e63a4b74c7bcb2d22577 /examples/pca1.hs
parentd4cb2692f9dae748da23371057a983deca4b2f80 (diff)
add examples
Diffstat (limited to 'examples/pca1.hs')
-rw-r--r--examples/pca1.hs45
1 files changed, 45 insertions, 0 deletions
diff --git a/examples/pca1.hs b/examples/pca1.hs
new file mode 100644
index 0000000..2c5074d
--- /dev/null
+++ b/examples/pca1.hs
@@ -0,0 +1,45 @@
1-- Principal component analysis
2
3import LinearAlgebra
4import System.Directory(doesFileExist)
5import System(system)
6import Control.Monad(when)
7
8type Vec = Vector Double
9type Mat = Matrix Double
10
11sumColumns m = constant 1 (rows m) <> m
12
13-- Vec with the mean value of the columns of a Mat
14mean x = sumColumns x / fromIntegral (rows x)
15
16-- covariance Mat of a list of observations as rows of a Mat
17cov x = (trans xc <> xc) / fromIntegral (rows x -1)
18 where xc = center x
19 center m = m - constant 1 (rows m) `outer` mean m
20
21-- creates the compression and decompression functions from the desired number of components
22pca :: Int -> Mat -> (Vec -> Vec , Vec -> Vec)
23pca n dataSet = (encode,decode)
24 where
25 encode x = vp <> (x - m)
26 decode x = x <> vp + m
27 m = mean dataSet
28 c = cov dataSet
29 (_,v) = eigS c
30 vp = takeRows n (trans v)
31
32main = do
33 ok <- doesFileExist ("mnist.txt")
34 when (not ok) $ do
35 putStrLn "\nTrying to download test datafile..."
36 system("wget -nv http://dis.um.es/~alberto/material/sp/mnist.txt.gz")
37 system("gunzip mnist.txt.gz")
38 return ()
39 m <- fromFile "mnist.txt" (5000,785)
40 let xs = takeColumns (cols m -1) m -- the last column is the digit type (class label)
41 let x = toRows xs !! 4 -- an arbitrary test Vec
42 let (pe,pd) = pca 10 xs
43 let y = pe x
44 print y -- compressed version
45 print $ norm (x - pd y) / norm x --reconstruction quality