{-# LANGUAGE FlexibleContexts #-}
-- | Control Dependence Graphs for the LLVM IR
--
-- This module follows the definition of control dependence of Cytron et al
-- (http://dl.acm.org/citation.cfm?doid=115372.115320):
--
-- Let X and Y be nodes in the CFG.  If X appears on every path from Y
-- to Exit, then X postdominates Y.  If X postdominates Y but X != Y,
-- then X strictly postdominates Y.
--
-- A CFG node Y is control dependent on a CFG node X if both:
--
--  * There is a non-null path p from X->Y such that Y postdominates
--    every node *after* X on p.
--
--  * The node Y does not strictly postdominate the node X.
--
-- This CDG formulation does not insert a dummy Start node to link
-- together all of the top-level nodes.  This just means that the set
-- of control dependencies can be empty if code will be executed
-- unconditionally.
module LLVM.Analysis.CDG (
  -- * Types
  CDG,
  HasCDG(..),
  -- * Constructor
  controlDependenceGraph,
  -- * Queries
  directControlDependencies,
  controlDependencies,
  controlDependentOn,
  -- * Visualization
  cdgGraphvizRepr
  ) where

import Data.GraphViz
import Data.HashMap.Strict ( HashMap )
import qualified Data.HashMap.Strict as M
import Data.HashSet ( HashSet )
import qualified Data.HashSet as S
import Data.List ( foldl' )
import Data.Maybe ( fromMaybe )

import Data.Graph.Interface
import Data.Graph.MutableDigraph
import Data.Graph.Algorithms.DFS

import LLVM.Analysis
import LLVM.Analysis.CFG
import LLVM.Analysis.Dominance

-- | The internal representation of the CDG.  Instructions are
-- control-dependent on other instructions, so they are the nodes in
-- the graph.
type CDGType = DenseDigraph Instruction ()
type LEdgeType = Edge CDGType

-- | A control depenence graph
data CDG = CDG { cdgGraph :: CDGType
               , cdgCFG :: CFG
               }

class HasCDG a where
  getCDG :: a -> CDG

instance HasCFG CDG where
  getCFG = cdgCFG

-- | Return True if @n@ is control dependent on @m@.
--
-- > controlDependentOn cdg m n
controlDependentOn :: CDG -> Instruction -> Instruction -> Bool
controlDependentOn cdg m n = m `elem` controlDependencies cdg n

-- | Get the list of instructions that @i@ is control dependent upon.
-- This list does not include @i@.  As noted above, the list will be
-- empty if @i@ is executed unconditionally.
--
-- > controlDependences cdg i
controlDependencies :: CDG -> Instruction -> [Instruction]
controlDependencies (CDG g _) i =
  filter (/= i) deps
  where
    deps = map (safeLab "LLVM.Analysis.CDG.controlDependnecies.deps" g) $ dfs [instructionUniqueId i] g

safeLab :: (InspectableGraph gr)
           => String -> gr -> Vertex -> VertexLabel gr
safeLab loc g n = fromMaybe errMsg (lab g n)
  where
    errMsg = error (loc ++ ": missing label for CDG node " ++ show n)

-- | Get the list of instructions that @i@ is directly control
-- dependent upon.
directControlDependencies :: CDG -> Instruction -> [Instruction]
directControlDependencies (CDG g _) i =
  map (safeLab "LLVM.Analysis.CDG.directControlDependencies" g) $ suc g (instructionUniqueId i)

-- | Construct the control dependence graph for a function (from its
-- CFG).  This follows the construction from chapter 9 of the
-- Munchnick Compiler Design and Implementation book.
--
-- For an input function F:
--
-- 1) Construct the CFG G for F
--
-- 2) Construct the postdominator tree PT for F
--
-- 3) Let S be the set of edges m->n in G such that n does not
--    postdominate m
--
-- 4) For each edge m->n in S, find the lowest common ancestor l of m
--    and n in the postdominator tree.  All nodes on the path from
--    l->n (not including l) in PT are control dependent on m.
--
-- Note: the typical construction augments the CFG with a fake start
-- node.  Doing that here would be a bit complicated, so the graph
-- just isn't connected by a fake Start node.
controlDependenceGraph :: CFG -> CDG
controlDependenceGraph cfg = CDG (mkGraph ns es) cfg
  where
    ns = map (\(n, l) -> (n, l)) $ labeledVertices g
    es = M.foldlWithKey' toEdge [] controlDeps

    g = cfgGraph cfg
    pdt = postdominatorTree (reverseCFG cfg)
    eloc = "LLVM.Analysis.CDG.controlDependenceGraph.cfgEdges"
    cfgEdges = map (\(Edge src dst _) -> (safeLab eloc g src, safeLab eloc g dst)) (edges g)
    -- cfgEdges = map ((safeLab $__LOCATION__ g) *** (safeLab $__LOCATION__ g)) (edges g)
    -- | All of the edges in the CFG m->n such that n does not
    -- postdominate m
    s = filter (isNotPostdomEdge pdt) cfgEdges
    controlDeps = foldr (extractDeps pdt) M.empty s

-- | Determine if an edge belongs in the set S
isNotPostdomEdge :: PostdominatorTree -> (Instruction, Instruction) -> Bool
isNotPostdomEdge pdt (m, n) = instructionIsTerminator m && not (postdominates pdt n m)

-- | Add an edge from @dependent@ to each @m@ it is control dependent on
toEdge :: [LEdgeType] -> Instruction -> HashSet Instruction -> [LEdgeType]
toEdge acc dependent = S.foldr (toE dependent) acc
  where
    toE n m a = Edge (instructionUniqueId n) (instructionUniqueId m) () : a

-- | A private type to describe what instructions the keys of the map
-- are control dependent upon.
type DepMap = HashMap Instruction (HashSet Instruction)

-- | Record control dependencies into a map (based on edges in S and
-- the postdominator tree).  The map is from instructions to the
-- nearest instruction that they are control dependent on.
extractDeps :: PostdominatorTree
               -> (Instruction, Instruction)
               -> DepMap
               -> DepMap
extractDeps pdt (m, n) cdeps =
  foldl' (addDep m) cdeps dependOnM
  where
    l = nearestCommonPostdominator pdt m n
    npdoms = instructionPostdominators pdt n
    -- All of the nodes from n to l in the postdominator tree,
    -- ignoring l.  If there was no common ancestor (e.g., there were
    -- multiple exit instructions), take all of the postdominators of
    -- n.
    dependOnM = case l of
      Just l' -> takeWhile (/=l') npdoms
      Nothing -> npdoms

-- | Multiple predecessors *ARE* allowed.  Consider
--
-- > void acl_create_entry(int *other_p, int *entry_p) {
-- >   if(!other_p || !entry_p) {
-- >     if(entry_p)
-- >       *entry_p = 5;
--
-- >     return;
-- >   }
--
-- >   *entry_p = 6;
-- > }
--
-- The second comparison of entry_p against NULL directly depends on
-- both conditions above it.  If !other_p is true, that is the
-- immediate dependency.  Otherwise, if !entry_p is true (but !other_p
-- is false), it is also a direct dependency.
addDep :: Instruction -> DepMap -> Instruction -> DepMap
addDep m deps n = M.insertWith S.union n (S.singleton m) deps



-- Visualization

cdgGraphvizParams :: GraphvizParams n Instruction el BasicBlock Instruction
cdgGraphvizParams =
  defaultParams { fmtNode = \(_,l) -> [ toLabel (toValue l) ]
                , clusterID = Int . basicBlockUniqueId
                , clusterBy = nodeCluster
                , fmtCluster = formatCluster
                }
  where
    nodeCluster l@(_, i) =
      let Just bb = instructionBasicBlock i
      in C bb (N l)
    formatCluster bb = [GraphAttrs [toLabel (show (basicBlockName bb))]]

cdgGraphvizRepr :: CDG -> DotGraph Vertex
cdgGraphvizRepr cdg = graphElemsToDot cdgGraphvizParams ns es
  where
    g = cdgGraph cdg
    ns = labeledVertices g
    es = map (\(Edge s d l) -> (s, d, l)) (edges g)