Migrate Tutorial.IdModelReshapeAnalysis to direct bindings (#5190)

rdspring1 · web-flow · commit af515ada0623 · 2025-09-23T20:51:36.000-07:00
### Create `idm` submodule in direct bindings for `IdModel`, `DisjointSets`, and `ValGraph`. * Mapped `DisjointSets::strictAreMapped` to python `strict_are_mapped` * Mapped IdModel constructor and `IdModel::maybeBuildGraph` * Mapped `ValGraph::disjointValSets` and `ValGraph::mapVals` * Add `IdMappingMode` enum PR Stack: * #5187 * #5188 * #5189 * #5190 **<<< This PR.**
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -783,6 +783,7 @@ if(BUILD_PYTHON)
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/cutlass.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/runtime.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/schedule.cpp
+    ${NVFUSER_PYTHON_DIRECT_BINDINGS}/id_model.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/direct_utils.cpp
     ${NVFUSER_PYTHON_DIRECT_BINDINGS}/python_translate.cpp
   )
diff --git a/csrc/disjoint_set.h b/csrc/disjoint_set.h
@@ -293,7 +293,7 @@ class VectorOfUniqueEntries {
 //! DisjointSet::*AreMapped(a,b) checks if a and b belong to the same disjoint
 //! set
 template <typename T, typename Hash = std::hash<T>>
-class DisjointSets {
+class NVF_API DisjointSets {
  public:
   using DisjointSet = std::shared_ptr<VectorOfUniqueEntries<T, Hash>>;
   using DisjointSetMap = std::unordered_map<T, DisjointSet, Hash>;
diff --git a/csrc/id_model/id_model.h b/csrc/id_model/id_model.h
@@ -104,7 +104,7 @@ StatefulInliningInfo buildStatefulInliningInfo(
 // IdMappingMode::LOOP
 //   Subgraph of the permissive graph. Maps only CA and their
 //   dependent domains.
-class IdModel : public PolymorphicBase {
+class NVF_API IdModel : public PolymorphicBase {
  public:
   // Sometimes fusion inputs or outputs are disconnected from expressions, in
   // those cases we still may want to send in some additional tensor views from
diff --git a/csrc/val_graph.h b/csrc/val_graph.h
@@ -53,7 +53,7 @@ namespace nvfuser {
 // only tested with IterDomain. Some of the routines might need to be
 // extended for other Val types.
 
-class ValGraph {
+class NVF_API ValGraph {
  public:
   ValGraph() = default;
 
diff --git a/python/python_direct/bindings.cpp b/python/python_direct/bindings.cpp
@@ -24,6 +24,7 @@ void initNvFuserPythonBindings(PyObject* module) {
   bindOperations(nvfuser);
   bindScheduleOperators(nvfuser);
   bindMultiDevice(nvfuser);
+  bindIdModel(nvfuser);
   nvfuser.def(
       "translate_fusion",
       &translateFusion,
diff --git a/python/python_direct/bindings.h b/python/python_direct/bindings.h
@@ -39,6 +39,9 @@ void bindScheduleOperators(py::module& nvfuser);
 // Add bindings for MultiDevice features
 void bindMultiDevice(py::module& nvfuser);
 
+// Add bindings for IdModel and ValGraph
+void bindIdModel(py::module& nvfuser);
+
 // Translate a CPP Fusion to a bindings python function
 std::string translateFusion(Fusion* f);
 
diff --git a/python/python_direct/enum.cpp b/python/python_direct/enum.cpp
@@ -67,6 +67,13 @@ void bindEnums(py::module& nvfuser) {
       .value("transpose", SchedulerType::Transpose)
       .value("expr_eval", SchedulerType::ExprEval)
       .value("resize", SchedulerType::Resize);
+
+  py::enum_<IdMappingMode>(nvfuser, "IdMappingMode")
+      .value("exact", IdMappingMode::EXACT)
+      .value("almost_exact", IdMappingMode::ALMOSTEXACT)
+      .value("broadcast", IdMappingMode::BROADCAST)
+      .value("permissive", IdMappingMode::PERMISSIVE)
+      .value("loop", IdMappingMode::LOOP);
 }
 
 } // namespace nvfuser::python
diff --git a/python/python_direct/id_model.cpp b/python/python_direct/id_model.cpp
@@ -0,0 +1,153 @@
+// clang-format off
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES.
+ * All rights reserved.
+ * SPDX-License-Identifier: BSD-3-Clause
+ */
+// clang-format on
+#include <bindings.h>
+#include <id_model/id_model.h>
+#include <val_graph.h>
+
+namespace nvfuser::python {
+
+namespace {
+
+void bindIdModelClass(py::module_& idm) {
+  py::class_<IdModel, std::unique_ptr<IdModel>> id_model(idm, "IdModel");
+  id_model.def(
+      py::init([](Fusion* fusion,
+                  bool build_graphs,
+                  bool allow_self_mapping,
+                  bool validate) {
+        return std::make_unique<IdModel>(
+            fusion, build_graphs, allow_self_mapping, validate);
+      }),
+      py::arg("fusion"),
+      py::arg("build_graphs") = false,
+      py::arg("allow_self_mapping") = true,
+      py::arg("validate") = false,
+      R"(
+  Create a new IdModel for the given fusion.
+
+  Parameters
+  ----------
+  fusion : Fusion
+      The fusion to create the IdModel for
+  build_graphs : bool
+      Whether to build graphs
+  allow_self_mapping : bool
+      Whether to allow self mapping
+  validate : bool
+      Whether to validate graphs
+
+  Returns
+  -------
+  IdModel
+      The created IdModel
+  )");
+  id_model.def(
+      "__str__",
+      &IdModel::toString,
+      R"(
+      Returns the string representation of the IdModel.
+      )");
+  id_model.def(
+      "maybe_build_graph",
+      &IdModel::maybeBuildGraph,
+      py::arg("mode"),
+      py::return_value_policy::reference,
+      R"(
+      Build a graph if not already built.
+      Dependent graphs are also built if not yet done.
+
+      Parameters
+      ----------
+      mode : IdMappingMode
+          The mode to build the graph for
+
+      Returns
+      -------
+      ValGraph
+        The graph built
+      )");
+}
+
+void bindValGraph(py::module_& idm) {
+  py::class_<ValGraph, std::unique_ptr<ValGraph>> val_graph(idm, "ValGraph");
+  val_graph.def(
+      "disjoint_val_sets",
+      &ValGraph::disjointValSets,
+      py::return_value_policy::reference,
+      R"(
+    Returns the disjoint val set.
+
+    Returns
+    -------
+    DisjointValSets
+      The disjoint val set
+    )");
+  val_graph.def(
+      "__str__",
+      &ValGraph::toString,
+      R"(
+      Returns the string representation of the ValGraph.
+      )");
+  val_graph.def(
+      "map_vals",
+      &ValGraph::mapVals,
+      py::arg("val0"),
+      py::arg("val1"),
+      R"(Maps the two values.
+
+    Parameters
+    ----------
+    val0 : Val
+      The first value to map
+    val1 : Val
+      The second value to map
+    )");
+}
+
+void bindDisjointSets(py::module_& id_model) {
+  py::class_<DisjointSets<Val*>, std::unique_ptr<DisjointSets<Val*>>>
+      disjoint_sets(id_model, "DisjointValSets");
+  disjoint_sets.def(
+      "__str__",
+      &DisjointSets<Val*>::toString,
+      R"(
+      Returns the string representation of the DisjointSets.
+      )");
+  disjoint_sets.def(
+      "strict_are_mapped",
+      &DisjointSets<Val*>::strictAreMapped,
+      py::arg("entry0"),
+      py::arg("entry1"),
+      R"(
+  Returns if the two entries are strictly mapped.
+
+  Parameters
+  ----------
+  entry0 : Val
+    The first entry to check
+  entry1 : Val
+    The second entry to check
+
+  Returns
+  -------
+  bool
+    True if the two entries are strictly mapped, False otherwise.
+  )");
+}
+
+} // namespace
+
+void bindIdModel(py::module& nvfuser) {
+  py::module_ idm = nvfuser.def_submodule(
+      "idm", "This submodule contains all id model operators for NvFuser.");
+  bindIdModelClass(idm);
+  bindValGraph(idm);
+  bindDisjointSets(idm);
+}
+
+} // namespace nvfuser::python
diff --git a/python/python_direct/ir.cpp b/python/python_direct/ir.cpp
@@ -70,6 +70,17 @@ Returns
 -------
 Expr
     The definition of this expression.
+)")
+      .def(
+          "uses",
+          &Val::uses,
+          R"(
+Get the uses of this expression.
+
+Returns
+-------
+Expr
+    The uses of this expression.
 )");
 
   // Expr
diff --git a/tests/python/direct/test_tutorial.py b/tests/python/direct/test_tutorial.py
@@ -7,6 +7,7 @@
 import torch
 from nvfuser_direct import (
     FusionDefinition,
+    IdMappingMode,
     ParallelType,
     TensorView,
     Merge,
@@ -15,8 +16,9 @@
     SqueezeOp,
     ReshapeOp,
 )
+from nvfuser_direct import idm
 
-verbose_ = False
+verbose_ = True
 
 
 def test_tutorial_memcpy():
@@ -508,3 +510,65 @@ def test_tutorial_reshape():
         # Note that all the transformations of squeeze_output are scheduling
         # transformations, thus it should not have a root domain
         assert not squeeze_output.has_root()
+
+
+def test_tutorial_id_model_reshape_analysis():
+    """
+    Demonstration of using IdModel for analyzing equivalence of reshape ops
+    """
+    with FusionDefinition() as fd:
+        # Use the static reshape to avoid reshape concretization.
+        tv0 = fd.define_tensor(shape=[10, 20])
+        tv1 = fd.define_tensor(shape=[10, 20])
+
+        # While the reshape operations are equivalent, we do not know if the two
+        # inputs are the same. There is not an operation allowing us to infer
+        # equivalence. e.g., tv0 + tv1.
+        tv2 = fd.ops.reshape(tv0, [20, 10])
+        tv3 = fd.ops.reshape(tv1, [20, 10])
+        fd.add_output(tv2)
+        fd.add_output(tv3)
+
+    id_model = idm.IdModel(fd.fusion)
+    exact_graph = id_model.maybe_build_graph(IdMappingMode.exact)
+
+    if verbose_:
+        print(id_model)
+        print(exact_graph)
+        print(exact_graph.disjoint_val_sets())
+
+    # As mentioned above, we do not know any relationship between tv0 and tv1.
+    # They should not be mapped in exact graph.
+    assert len(tv0.get_logical_domain()) == len(tv1.get_logical_domain())
+    for tv0_id, tv1_id in zip(tv0.get_logical_domain(), tv1.get_logical_domain()):
+        assert not exact_graph.disjoint_val_sets().strict_are_mapped(tv0_id, tv1_id)
+
+    # Thus, the outputs of the reshape ops are not mapped either
+    assert len(tv2.get_loop_domain()) == len(tv3.get_loop_domain())
+    for tv2_id, tv3_id in zip(tv2.get_loop_domain(), tv3.get_loop_domain()):
+        assert not exact_graph.disjoint_val_sets().strict_are_mapped(tv2_id, tv3_id)
+
+    # Now, suppose we can say the inputs are exactly mapped. We can manually
+    # add mappings:
+    for tv0_id, tv1_id in zip(tv0.get_logical_domain(), tv1.get_logical_domain()):
+        exact_graph.map_vals(tv0_id, tv1_id)
+
+    # Now, tv2 and tv3 should be fully mapped, including their root,
+    # intermediate and loop domains.
+
+    # Check the root domains.
+    assert len(tv2.get_root_domain()) == len(tv3.get_root_domain())
+    for tv2_id, tv3_id in zip(tv2.get_root_domain(), tv3.get_root_domain()):
+        assert exact_graph.disjoint_val_sets().strict_are_mapped(tv2_id, tv3_id)
+
+    # The reshape consists of a merge and split. The output of the merge should
+    # be mapped as well
+    assert exact_graph.disjoint_val_sets().strict_are_mapped(
+        tv2.get_root_domain()[0].uses()[0].output(0),
+        tv3.get_root_domain()[0].uses()[0].output(0),
+    )
+
+    # The next operation is split. Its outputs, which are the loop domains,
+    # should be mapped too.
+    for tv2_id, tv3_id in zip(tv2.get_loop_domain(), tv3.get_loop_domain()):
+        assert exact_graph.disjoint_val_sets().strict_are_mapped(tv2_id, tv3_id)

Original file line number	Diff line number	Diff line change
`@@ -783,6 +783,7 @@ if(BUILD_PYTHON)`
`783`	`783`	`${NVFUSER_PYTHON_DIRECT_BINDINGS}/cutlass.cpp`
`784`	`784`	`${NVFUSER_PYTHON_DIRECT_BINDINGS}/runtime.cpp`
`785`	`785`	`${NVFUSER_PYTHON_DIRECT_BINDINGS}/schedule.cpp`
	`786`	`+ ${NVFUSER_PYTHON_DIRECT_BINDINGS}/id_model.cpp`
`786`	`787`	`${NVFUSER_PYTHON_DIRECT_BINDINGS}/direct_utils.cpp`
`787`	`788`	`${NVFUSER_PYTHON_DIRECT_BINDINGS}/python_translate.cpp`
`788`	`789`	`)`