データ構造

マップ

import qualified Data.Map as Map

kv :: Map.Map Integer String
kv = Map.fromList [(1, "a"), (2, "b")]

lkup :: Integer -> String -> String
lkup key def =
  case Map.lookup key kv of
    Just val -> val
    Nothing  -> def

import Data.Tree

{-

   A
  / \
 B   C
    / \
   D   E

-}

tree :: Tree String
tree = Node "A" [Node "B" [], Node "C" [Node "D" [], Node "E" []]]

postorder :: Tree a -> [a]
postorder (Node a ts) = elts ++ [a]
  where elts = concat (map postorder ts)

preorder :: Tree a -> [a]
preorder (Node a ts) = a : elts
  where elts = concat (map preorder ts)

ex1 = drawTree tree
ex2 = drawForest (subForest tree)
ex3 = flatten tree
ex4 = levels tree
ex5 = preorder tree
ex6 = postorder tree

Set

import qualified Data.Set as Set

set :: Set.Set Integer
set = Set.fromList [1..1000]

memtest :: Integer -> Bool
memtest elt = Set.member elt set

Vector

Vectors are high performance single dimensional arrays that come come in six variants, two for each of the following types of a mutable and an immutable variant.

  • Data.Vector
  • Data.Vector.Storable
  • Data.Vector.Unboxed

The most notable feature of vectors is constant time memory access with ((!)) as well as variety of efficient map, fold and scan operations on top of a fusion framework that generates surprisingly optimal code.

fromList :: [a] -> Vector a
toList :: Vector a -> [a]
(!) :: Vector a -> Int -> a
map :: (a -> b) -> Vector a -> Vector b
foldl :: (a -> b -> a) -> a -> Vector b -> a
scanl :: (a -> b -> a) -> a -> Vector b -> Vector a
zipWith :: (a -> b -> c) -> Vector a -> Vector b -> Vector c
iterateN :: Int -> (a -> a) -> a -> Vector a
import Data.Vector.Unboxed as V

norm ::  Vector Double -> Double
norm = sqrt . V.sum . V.map (\x -> x*x)

example1 :: Double
example1 = norm $ V.iterateN 100000000 (+1) 0.0

See: Numerical Haskell: A Vector Tutorial

可変Vector

freeze :: MVector (PrimState m) a -> m (Vector a)
thaw :: Vector a -> MVector (PrimState m) a

Within the IO monad we can perform arbitrary read and writes on the mutable vector with constant time reads and writes. When needed a static Vector can be created to/from the MVector using the freeze/thaw functions.

import GHC.Prim
import Control.Monad
import Control.Monad.ST
import Control.Monad.Primitive

import Data.Vector.Unboxed (freeze)
import Data.Vector.Unboxed.Mutable
import qualified Data.Vector.Unboxed as V

example :: PrimMonad m => m (V.Vector Int)
example = do
  v <- new 10
  forM_ [0..9] $ \i ->
     write v i (2*i)
  freeze v

-- vector computation in IO
vecIO :: IO (V.Vector Int)
vecIO = example

-- vector computation in ST
vecST :: ST s (V.Vector Int)
vecST = example


main :: IO ()
main = do
  vecIO >>= print
  print $ runST vecST

非整序コンテナ

fromList :: (Eq k, Hashable k) => [(k, v)] -> HashMap k v
lookup :: (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
insert :: (Eq k, Hashable k) => k -> v -> HashMap k v -> HashMap k v

Both the HashMap and HashSet are purely functional data structures that are drop in replacements for the containers equivalents but with more efficient space and time performance. Additionally all stored elements must have a Hashable instance.

import qualified Data.HashSet as S
import qualified Data.HashMap.Lazy as M

example1 :: M.HashMap Int Char
example1 = M.fromList $ zip [1..10] ['a'..]

example2 :: S.HashSet Int
example2 = S.fromList [1..10]

See: Johan Tibell: Announcing Unordered Containers

ハッシュテーブル

Hashtables provides hashtables with efficient lookup within the ST or IO monad.

import Prelude hiding (lookup)

import Control.Monad.ST
import Data.HashTable.ST.Basic

-- Hashtable parameterized by ST "thread"
type HT s = HashTable s String String

set :: ST s (HT s)
set = do
  ht <- new
  insert ht "key" "value1"
  return ht

get :: HT s -> ST s (Maybe String)
get ht = do
  val <- lookup ht "key"
  return val

example :: Maybe String
example = runST (set >>= get)
new :: ST s (HashTable s k v)
insert :: (Eq k, Hashable k) => HashTable s k v -> k -> v -> ST s ()
lookup :: (Eq k, Hashable k) => HashTable s k v -> k -> ST s (Maybe v)

グラフ

The Graph module in the containers library is a somewhat antiquated API for working with directed graphs. A little bit of data wrapping makes it a little more straightforward to use. The library is not necessarily well-suited for large graph-theoretic operations but is perfectly fine for example, to use in a typechecker which need to resolve strongly connected components of the module definition graph.

import Data.Tree
import Data.Graph

data Grph node key = Grph
  { _graph :: Graph
  , _vertices :: Vertex -> (node, key, [key])
  }

fromList :: Ord key => [(node, key, [key])] -> Grph node key
fromList = uncurry Grph . graphFromEdges'

vertexLabels :: Functor f => Grph b t -> (f Vertex) -> f b
vertexLabels g = fmap (vertexLabel g)

vertexLabel :: Grph b t -> Vertex -> b
vertexLabel g = (\(vi, _, _) -> vi) . (_vertices g)

-- Topologically sort graph
topo' :: Grph node key -> [node]
topo' g = vertexLabels g $ topSort (_graph g)

-- Strongly connected components of graph
scc' :: Grph node key -> [[node]]
scc' g = fmap (vertexLabels g . flatten) $ scc (_graph g)

So for example we can construct a simple graph:

ex1 :: [(String, String, [String])]
ex1 = [
    ("a","a",["b"]),
    ("b","b",["c"]),
    ("c","c",["a"])
  ]

ts1 :: [String]
ts1 = topo' (fromList ex1)
-- ["a","b","c"]

sc1 :: [[String]]
sc1 = scc' (fromList ex1)
-- [["a","b","c"]]

Or with two strongly connected subgraphs:

ex2 :: [(String, String, [String])]
ex2 = [
    ("a","a",["b"]),
    ("b","b",["c"]),
    ("c","c",["a"]),

    ("d","d",["e"]),
    ("e","e",["f", "e"]),
    ("f","f",["d", "e"])
  ]


ts2 :: [String]
ts2 = topo' (fromList ex2)
-- ["d","e","f","a","b","c"]

sc2 :: [[String]]
sc2 = scc' (fromList ex2)
-- [["d","e","f"],["a","b","c"]]

See: GraphSCC

グラフ理論

The fgl library provides are more efficient graph structure and a wide variety of common graph-theoretic operations. For example calculating the dominance frontier of a graph shows up quite frequently in control flow analysis for compiler design.

import qualified Data.Graph.Inductive as G

cyc3 :: G.Gr Char String
cyc3 = G.buildGr
       [([("ca",3)],1,'a',[("ab",2)]),
                ([],2,'b',[("bc",3)]),
                ([],3,'c',[])]

-- Loop query
ex1 :: Bool
ex1 = G.hasLoop x

-- Dominators
ex2 :: [(G.Node, [G.Node])]
ex2 = G.dom x 0
x :: G.Gr Int ()
x = G.insEdges edges gr
  where
  gr = G.insNodes nodes G.empty
  edges = [(0,1,()), (0,2,()), (2,1,()), (2,3,())]
  nodes = zip [0,1 ..] [2,3,4,1]

DList

A dlist is a list-like structure that is optimized for O(1) append operations, internally it uses a Church encoding of the list structure. It is specifically suited for operations which are append-only and need only access it when manifesting the entire structure. It is particularly well-suited for use in the Writer monad.

import Data.DList
import Control.Monad
import Control.Monad.Writer

logger :: Writer (DList Int) ()
logger = replicateM_ 100000 $ tell (singleton 0)

シーケンス

The sequence data structure behaves structurally similar to list but is optimized for append/prepend operations and traversal.

import Data.Sequence

a :: Seq Int
a = fromList [1,2,3]

a0 :: Seq Int
a0 = a |> 4
-- [1,2,3,4]

a1 :: Seq Int
a1 = 0 <| a
-- [0,1,2,3]

行列とHBlas

Just as in C when working with n-dimensional matrices we'll typically overlay the high-level matrix structure onto an unboxed contiguous block of memory with index functions which perform the coordinate translations to calculate offsets. The two most common layouts are:

  • Row-major order
  • Column-major order

Which are best illustrated.

The calculations have a particularly nice implementation in Haskell in terms of scans over indices.

import qualified Data.Vector as V

data Order = RowMajor | ColMajor

rowMajor :: [Int] -> [Int]
rowMajor = scanr (*) 1 . tail

colMajor :: [Int] -> [Int]
colMajor = init . scanl (*) 1

data Matrix a = Matrix
  { _dims  :: [Int]
  , _elts  :: V.Vector a
  , _order :: Order
  }

fromList :: [Int] -> Order -> [a] -> Matrix a
fromList sh order elts =
  if product sh == length elts
  then Matrix sh (V.fromList elts) order
  else error "dimensions don't match"

indexTo :: [Int] -> Matrix a -> a
indexTo ix mat = boundsCheck offset
  where
    boundsCheck n =
      if 0 <= n && n < V.length (_elts mat)
      then V.unsafeIndex (_elts mat) offset
      else error "out of bounds"
    ordering = case _order mat of
      RowMajor -> rowMajor
      ColMajor -> colMajor
    offset = sum $ zipWith (*) ix (ordering (_dims mat))

matrix :: Order -> Matrix Int
matrix order = fromList [4,4] order [1..16]

ex1 :: [Int]
ex1 = rowMajor [1,2,3,4]
-- [24,12,4,1]

ex2 :: [Int]
ex2 = colMajor [1,2,3,4]
-- [1,1,2,6]

ex3 :: Int
ex3 = indexTo [1,3] (matrix RowMajor)
-- 8

ex4 :: Int
ex4 = indexTo [1,3] (matrix ColMajor)
-- 14

Unboxed matrices of this type can also be passed to C or Fortran libraries such BLAS or LAPACK linear algebra libraries. The hblas package wraps many of these routines and forms the low-level wrappers for higher level-libraries that need access to these foreign routines.

For example the dgemm routine takes two pointers to a sequence of double values of two matrices of size (m × k) and (k × n) and performs efficient matrix multiplication writing the resulting data through a pointer to a (m × n) matrix.

import Foreign.Storable
import Numerical.HBLAS.BLAS
import Numerical.HBLAS.MatrixTypes

-- Generate the constant mutable square matrix of the given type and dimensions.
constMatrix :: Storable a => Int -> a -> IO (IODenseMatrix Row a)
constMatrix n k = generateMutableDenseMatrix SRow (n,n) (const k)

example_dgemm :: IO ()
example_dgemm = do
    left  <- constMatrix 2 (2 :: Double)
    right <- constMatrix 2 (3 :: Double)
    out   <- constMatrix 2 (0 :: Double)

    dgemm NoTranspose NoTranspose 1.0 1.0 left right out

    resulting <- mutableVectorToList $ _bufferDenMutMat out
    print resulting

See: hblas

results matching ""

    No results matching ""