src/LLVM/Analysis/Dataflow.hs

{-# LANGUAGE MultiParamTypeClasses, BangPatterns, NoMonomorphismRestriction #-}
{-# LANGUAGE ScopedTypeVariables #-}
-- | This module defines an interface for intra-procedural dataflow
-- analysis (forward and backward).
--
-- The user simply defines a type to represent the state of their
-- dataflow analysis as an instance of 'DataflowAnalysis'.  This class
-- adds one function, 'transfer', to the semilattices defined in the
-- lattices package.
--
-- To use this dataflow analysis framework, pass it an initial
-- analysis state and either a control flow graph or a function.  The
-- analysis then returns a function that maps instructions to the
-- dataflow value at that instruction.  For example,
--
-- > let initialState = ...
-- >     cfg = mkCFG f
-- >     results = forwardDataflow initialState cfg
-- > in results (cfgFinalValue cfg)
--
-- gives the dataflow value for the return instruction in function
-- @f@.  Any instruction in @f@ can be used as an argument to the
-- @result@ function.
module LLVM.Analysis.Dataflow (
  -- * Dataflow analysis
  DataflowAnalysis(..),
  MeetSemiLattice(..),
  BoundedMeetSemiLattice(..),
  meets,
  forwardDataflow,
  backwardDataflow,
  -- * Dataflow results
  DataflowResult,
  dataflowResult
  ) where

import Algebra.Lattice
import Control.DeepSeq
import Control.Monad ( foldM )
import Data.HashMap.Strict ( HashMap )
import qualified Data.HashMap.Strict as M
import Data.HashSet ( HashSet )
import qualified Data.HashSet as S
import Data.List ( sort )
import Data.Maybe ( fromMaybe )
import Text.Printf

import LLVM.Analysis
import LLVM.Analysis.CFG

-- | A class defining the interface to a dataflow analysis.  The
-- analysis object itself that is passed to one of the dataflow
-- functions acts as the initial state.
--
-- Note that the second type parameter, @c@, can be used to pass
-- information that is global and constant for the analysis of a
-- function.  This can save space by not requiring the information to
-- be stored redundantly in every dataflow fact.  If this parameter is
-- not needed, it can safely be instantiated as () (and subsequently
-- ignored).
class (BoundedMeetSemiLattice a, Monad m) => DataflowAnalysis m a where
  transfer :: a -- ^ The incoming analysis state
              -> Instruction -- ^ The instruction being analyzed
              -> m a
  -- ^ The transfer function of this analysis.  It is given any global
  -- constant data, the current set of facts, the current instruction,
  -- and a list of incoming edges.
  phiTransfer ::  [Instruction] -- ^ The instruction being analyzed
                 -> [(BasicBlock, a, CFGEdge)] -- ^ Incoming CFG edges
                 -> m a
  phiTransfer _ = return . meets . map (\(_, e, _) -> e)
  -- ^ The transfer function that will process all Phi nodes in
  -- parallel.

  -- | This is a hook to perform some analysis on an _edge_ before the
  -- normal transfer functions run on the next instruction.
  --
  -- This is a convenient separation of edge and instruction
  -- processing, but it also improves precision in cases like:
  --
  -- > define void @f(i8* %p) nounwind uwtable {
  -- >   %1 = icmp ne i8* %p, null
  -- >   br i1 %1, label %2, label %3
  -- >
  -- > ; <label>:2                                       ; preds = %0
  -- >   call void @free(i8* %p) nounwind
  -- >   br label %3
  -- >
  -- > ; <label>:3                                       ; preds = %2, %0
  -- >   ret void
  -- > }
  --
  -- Without separate edge processing, the join at the return
  -- statement executes before we can use information about %p being
  -- NULL on one edge of the branch.
  --
  -- NOTE: This transfer function is only appleid to edges between
  -- basic blocks.  Within a basic block, the edge is always an
  -- unconditional edge and there is no merging anyway.
  edgeTransfer :: a -> CFGEdge -> m a
  edgeTransfer e _ = return e


-- | The opaque result of a dataflow analysis
data DataflowResult a =
  DataflowResult (HashMap Instruction a)

instance (Eq a) => Eq (DataflowResult a) where
  (DataflowResult m1) == (DataflowResult m2) = m1 == m2

instance (NFData a) => NFData (DataflowResult a) where
  rnf (DataflowResult m) = m `deepseq` ()

-- | Get the result of a dataflow analysis at the given instruction.
-- Will throw an error if the instruction is not in the function for
-- which this analysis was run.
--
-- FIXME: The block result here needs to be able to reverse the instruction
-- list if doing a backwards analysis
dataflowResult :: DataflowResult a -> Instruction -> a
dataflowResult (DataflowResult m) i =
  fromMaybe errMsg $ M.lookup i m
  where
    errMsg = error ("LLVM.Analysis.Dataflow.dataflowResult: Instruction " ++ show i ++ " has no dataflow result")

firstInst :: Instruction -> Bool
firstInst i = firstBlock bb
  where
    Just bb = instructionBasicBlock i

forwardDataflow :: (Eq a, HasCFG b, DataflowAnalysis m a)
                   => a -> b -> m (DataflowResult a)
forwardDataflow analysis f =
  dataflowAnalysis basicBlockTerminatorInstruction id
    (basicBlockLabeledPredecessors cfg)
    (basicBlockLabeledSuccessors cfg)
    analysis cfg
  where
    cfg = getCFG f

-- | FIXME: This is currently broken.  When should the phi
-- instructions be processed in parallel? Before or after the
-- instructions in the block?  It also needs to look up the output
-- fact of the *first* instruction of the block instead of the last
backwardDataflow :: (Eq a, HasCFG b, DataflowAnalysis m a)
                    => a -> b -> m (DataflowResult a)
backwardDataflow analysis f =
  dataflowAnalysis firstNonPhiInstruction reverse
    (basicBlockLabeledSuccessors cfg)
    (basicBlockLabeledPredecessors cfg)
    analysis cfg
  where
    cfg = getCFG f

dataflowAnalysis :: forall a m . (Eq a, DataflowAnalysis m a)
                    => (BasicBlock -> Instruction)
                    -> ([Instruction] -> [Instruction])
                    -> (BasicBlock -> [(BasicBlock, CFGEdge)])
                    -> (BasicBlock -> [(BasicBlock, CFGEdge)])
                    -> a -> CFG -> m (DataflowResult a)
dataflowAnalysis lastInstruction orderedBlockInsts blockPreds blockSuccs fact0 cfg = do
  let blocks = functionBody func
      insts = concatMap basicBlockInstructions blocks
      initialStates = M.fromList $ zip insts (repeat top)
      s0 = (initialStates, S.empty)
  res <- dataflow blocks s0
  return (DataflowResult res)
  where
    func = cfgFunction cfg

    -- | If there is nothing left in the worklist, return the facts
    -- associated with the exit node.
    --
    -- FIXME: When turning the workset into a worklist, see about a
    -- better sort order to visit instructions in.  Currently it
    -- should actually be automatically close to topological sort
    -- order since instruction IDs are assigned sequentially and the
    -- IR is in SSA form (so definitions dominate uses -- topological
    -- order).
    dataflow !work !factsAndWork = do
      (facts', nextWork') <- foldM processBlock factsAndWork work
      case S.null nextWork' of
        True -> return facts'
        False -> dataflow (sort (S.toList nextWork')) (facts', S.empty)

    -- | FIXME the ordered block instructions should probably exclude
    -- phi nodes here since we don't track facts for phi nodes.  This
    -- will cause problems for a backwards analysis.
    lookupBlockFact :: (DataflowAnalysis m a) => HashMap Instruction a -> BasicBlock -> a
    lookupBlockFact facts block =
      fromMaybe errMsg $ M.lookup (lastInstruction block) facts
      where
        errMsg = error $ printf "LLVM.Analysis.Dataflow.dataflowAnalysis.lookupBlockFact: No facts for block %s" (show (toValue block))

    processBlock :: (DataflowAnalysis m a)
                    => (HashMap Instruction a, HashSet BasicBlock)
                    -> BasicBlock
                    -> m (HashMap Instruction a, HashSet BasicBlock)
    processBlock (outputFacts, nextWork) block = do

      -- First, match up the output facts for each incoming basic
      -- block with the associated CFG edge.
      let predsWithFacts = map (\(b, e) -> (b, lookupBlockFact outputFacts b, e)) preds
          applyEdgeTransfer (b, f, e) = do
            f' <- edgeTransfer f e
            return (b, f', e)

      -- Now apply the edge transfer function to each CFG edge.  This
      -- lets us generate facts before meeting all of the incoming
      -- edges.
      predsWithEdgeFacts <- mapM applyEdgeTransfer predsWithFacts

      -- Separate out Phi nodes so that we can process them all in parallel before
      -- touching the other instructions.
      let (phiNodes, otherInsts) = basicBlockSplitPhiNodes block

      -- Determine the input fact.  If there are no predecessors and
      -- this is the first block, use the initial analysis fact
      -- (fact0).  If there are no predecessors and this is not the
      -- first block, this block is not reachable and we use top as
      -- its input fact.
      --
      -- Otherwise, we just generate an initial input fact for the
      -- first instruction of the block (either through meeting all
      -- incoming inputs OR while processing the Phi nodes.
      inputFact <- case null preds of
        True -> case firstBlock block of
          True -> return fact0
          False -> return top
        False -> case null phiNodes of
          True -> return $! meets (map (\(_, f, _) -> f) predsWithEdgeFacts)
          False -> phiTransfer phiNodes  predsWithEdgeFacts

      -- Apply the transfer function to each instruction in the block.
      -- The fold chains output facts to input facts.  This step also
      -- (purely) updates the output fact map.
      let insts = orderedBlockInsts otherInsts
      (_, outputFacts') <- foldM processNode (inputFact, outputFacts) insts

      -- If the output fact of the block did not change, we don't need
      -- to update the successors at all and so we don't modify the
      -- worklist.
      --
      -- Since the facts for some instructions inside this block may
      -- have changed, we update the output facts either way (it
      -- doesn't really cost any extra).
      case lookupBlockFact outputFacts' block == lastOutputFact of
        True -> return (outputFacts', nextWork)
        False ->
          -- Update the worklist with the successors of this block and the facts
          let succBlocks = map fst $ blockSuccs block
              nextWork' = nextWork `S.union` S.fromList succBlocks
          in return (outputFacts', nextWork')
      where
        lastOutputFact = lookupBlockFact outputFacts block
        preds = blockPreds block

    -- | Apply the transfer function to each instruction in this
    -- block.  If the result is different than the current fact, add
    -- all successors to the worklist.
    processNode (inputFact, outputFacts) inst = do
      outFact <- transfer inputFact inst
      let outputFacts' = M.insert inst outFact outputFacts
      return (outFact, outputFacts')

firstBlock :: BasicBlock -> Bool
firstBlock bb = bb == fb
  where
    f = basicBlockFunction bb
    fb : _ = functionBody f

{-# ANN module "HLint: ignore Use if" #-}