From 2ccf715aef508f7315f0a7438868de2220ce4f72 Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Wed, 5 Feb 2025 21:30:22 +0000 Subject: [PATCH] JIT: Enable reusing profile-aware DFS trees between phases (#112198) Revival of #112105. Add a switch to FlowGraphDfsTree that indicates if its traversal used edge likelihoods to determine the order in which successors were visited. This switch can then be used by debug checks that verify the DFS's correctness to check both types of trees. This functionality means we can frequently reuse the DFS tree annotations computed by LSRA throughout block layout. --- src/coreclr/jit/compiler.h | 11 ++++++++++- src/coreclr/jit/fgdiagnostic.cpp | 27 +++++++++++++++++++++------ src/coreclr/jit/fgopt.cpp | 16 +++++++++------- src/coreclr/jit/flowgraph.cpp | 2 +- src/coreclr/jit/lsra.cpp | 10 +++++++--- 5 files changed, 48 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index c3bf108a19a92b..2bcbe0ee2bd31d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -1938,12 +1938,16 @@ class FlowGraphDfsTree // Whether the DFS that produced the tree found any backedges. bool m_hasCycle; + // Whether the DFS that produced the tree used edge likelihoods to influence successor visitation order. + bool m_profileAware; + public: - FlowGraphDfsTree(Compiler* comp, BasicBlock** postOrder, unsigned postOrderCount, bool hasCycle) + FlowGraphDfsTree(Compiler* comp, BasicBlock** postOrder, unsigned postOrderCount, bool hasCycle, bool profileAware) : m_comp(comp) , m_postOrder(postOrder) , m_postOrderCount(postOrderCount) , m_hasCycle(hasCycle) + , m_profileAware(profileAware) { } @@ -1978,6 +1982,11 @@ class FlowGraphDfsTree return m_hasCycle; } + bool IsProfileAware() const + { + return m_profileAware; + } + #ifdef DEBUG void Dump() const; #endif // DEBUG diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 1794c6b57954c7..824c974067184a 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -4742,15 +4742,30 @@ void Compiler::fgDebugCheckFlowGraphAnnotations() return; } - unsigned count = fgRunDfs( - [](BasicBlock* block, unsigned preorderNum) { + auto visitPreorder = [](BasicBlock* block, unsigned preorderNum) { assert(block->bbPreorderNum == preorderNum); - }, - [=](BasicBlock* block, unsigned postorderNum) { + }; + + auto visitPostorder = [=](BasicBlock* block, unsigned postorderNum) { assert(block->bbPostorderNum == postorderNum); assert(m_dfsTree->GetPostOrder(postorderNum) == block); - }, - [](BasicBlock* block, BasicBlock* succ) {}); + }; + + auto visitEdge = [](BasicBlock* block, BasicBlock* succ) {}; + + unsigned count; + if (m_dfsTree->IsProfileAware()) + { + count = fgRunDfs(visitPreorder, + visitPostorder, + visitEdge); + } + else + { + count = fgRunDfs(visitPreorder, + visitPostorder, + visitEdge); + } assert(m_dfsTree->GetPostOrderCount() == count); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8c8376694281f6..064c94d6af4107 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -4663,10 +4663,6 @@ void Compiler::fgDoReversePostOrderLayout() } #endif // DEBUG - // Compute DFS of all blocks in the method, using profile data to determine the order successors are visited in. - // - m_dfsTree = fgComputeDfs(); - // If LSRA didn't create any new blocks, we can reuse its loop-aware RPO traversal, // which is cached in Compiler::fgBBs. // If the cache isn't available, we need to recompute the loop-aware RPO. @@ -4675,15 +4671,21 @@ void Compiler::fgDoReversePostOrderLayout() if (rpoSequence == nullptr) { - rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; + assert(m_dfsTree == nullptr); + m_dfsTree = fgComputeDfs(); FlowGraphNaturalLoops* const loops = FlowGraphNaturalLoops::Find(m_dfsTree); - unsigned index = 0; - auto addToSequence = [rpoSequence, &index](BasicBlock* block) { + rpoSequence = new (this, CMK_BasicBlock) BasicBlock*[m_dfsTree->GetPostOrderCount()]; + unsigned index = 0; + auto addToSequence = [rpoSequence, &index](BasicBlock* block) { rpoSequence[index++] = block; }; fgVisitBlocksInLoopAwareRPO(m_dfsTree, loops, addToSequence); } + else + { + assert(m_dfsTree != nullptr); + } // Fast path: We don't have any EH regions, so just reorder the blocks // diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index eaed687a57eee2..369b7b7a30241c 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -4353,7 +4353,7 @@ FlowGraphDfsTree* Compiler::fgComputeDfs() fgRunDfs(visitPreorder, visitPostorder, visitEdge); - return new (this, CMK_DepthFirstSearch) FlowGraphDfsTree(this, postOrder, numBlocks, hasCycle); + return new (this, CMK_DepthFirstSearch) FlowGraphDfsTree(this, postOrder, numBlocks, hasCycle, useProfile); } // Add explicit instantiations. diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index c764a7259fba38..0f7397012f2d14 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -947,13 +947,16 @@ void LinearScan::setBlockSequence() bbVisitedSet = BitVecOps::MakeEmpty(traits); assert((blockSequence == nullptr) && (bbSeqCount == 0)); - FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs(); + + compiler->m_dfsTree = compiler->fgComputeDfs(); + FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree; blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; if (compiler->opts.OptimizationEnabled() && dfsTree->HasCycle()) { - // Ensure loop bodies are compact in the visitation order - FlowGraphNaturalLoops* const loops = FlowGraphNaturalLoops::Find(dfsTree); + // Ensure loop bodies are compact in the visitation order. + compiler->m_loops = FlowGraphNaturalLoops::Find(dfsTree); + FlowGraphNaturalLoops* const loops = compiler->m_loops; unsigned index = 0; auto addToSequence = [this, &index](BasicBlock* block) { @@ -1319,6 +1322,7 @@ PhaseStatus LinearScan::doLinearScan() { assert(compiler->fgBBcount > bbSeqCount); compiler->fgBBs = nullptr; + compiler->fgInvalidateDfsTree(); } return PhaseStatus::MODIFIED_EVERYTHING;