Remove the python hooks to the old codegen. This exposes all the

python problems in the tests. Eliminate the expansion of python enumerate(). Remove use of empty labels for all measurements. This eliminates a loop, a data structure, and the invalid mixing of quantum and classical data values in classical memory. Fix bugs in AST bridge. Fix NVIDIA#2538 - measurement register name cannot be empty. Signed-off-by: Eric Schweitz <[email protected]>
schweitzpgi · Feb 5, 2025 · 2acc144 · 2acc144
1 parent eaae1c6
commit 2acc144
Show file tree

Hide file tree

Showing 10 changed files with 183 additions and 96 deletions.
diff --git a/include/cudaq/Optimizer/CodeGen/Pipelines.h b/include/cudaq/Optimizer/CodeGen/Pipelines.h
@@ -30,35 +30,18 @@ void commonPipelineConvertToQIR(mlir::PassManager &pm,
                                 mlir::StringRef codeGenFor = "qir",
                                 mlir::StringRef passConfigAs = "qir");
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-void commonPipelineConvertToQIR_PythonWorkaround(
-    mlir::PassManager &pm, const std::optional<mlir::StringRef> &convertTo);
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Does not specify a particular QIR profile.
 inline void addPipelineConvertToQIR(mlir::PassManager &pm) {
   commonPipelineConvertToQIR(pm);
 }
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, std::nullopt);
-}
-
 /// \brief Pipeline builder to convert Quake to QIR.
 /// Specifies a particular QIR profile in \p convertTo.
 /// \p pm Pass manager to append passes to
 /// \p convertTo name of QIR profile (e.g., `qir-base`, `qir-adaptive`, ...)
 void addPipelineConvertToQIR(mlir::PassManager &pm, mlir::StringRef convertTo);
 
-/// \deprecated{Only for Python, since it can't use the new QIR codegen.}
-inline void
-addPipelineConvertToQIR_PythonWorkaround(mlir::PassManager &pm,
-                                         mlir::StringRef convertTo) {
-  commonPipelineConvertToQIR_PythonWorkaround(pm, convertTo);
-  addQIRProfilePipeline(pm, convertTo);
-}
-
 void addLowerToCCPipeline(mlir::OpPassManager &pm);
 
 void addPipelineTranslateToOpenQASM(mlir::PassManager &pm);

diff --git a/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp b/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp
@@ -1091,7 +1091,7 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
 
     // Process the controls, sorting them by type.
     for (auto pr : llvm::zip(op.getControls(), adaptor.getControls())) {
-      if (isa<quake::VeqType>(std::get<0>(pr).getType())) {
+      if (isaVeqArgument(std::get<0>(pr).getType())) {
         numArrayCtrls++;
         auto sizeCall = rewriter.create<func::CallOp>(
             loc, i64Ty, cudaq::opt::QIRArrayGetSize,
@@ -1154,6 +1154,18 @@ struct QuantumGatePattern : public OpConversionPattern<OP> {
     return forwardOrEraseOp();
   }
 
+  static bool isaVeqArgument(Type ty) {
+    // TODO: Need a way to identify arrays when using the opaque pointer
+    // variant. (In Python, the arguments may already be converted.)
+    auto alreadyConverted = [](Type ty) {
+      if (auto ptrTy = dyn_cast<cudaq::cc::PointerType>(ty))
+        if (auto strTy = dyn_cast<LLVM::LLVMStructType>(ptrTy.getElementType()))
+          return strTy.isIdentified() && strTy.getName() == "Array";
+      return false;
+    };
+    return isa<quake::VeqType>(ty) || alreadyConverted(ty);
+  }
+
   static bool conformsToIntendedCall(std::size_t numControls, Value ctrl, OP op,
                                      StringRef qirFunctionName) {
     if (numControls != 1)
@@ -1818,9 +1830,7 @@ struct QuakeToQIRAPIPrepPass
   }
 
   void guaranteeMzIsLabeled(quake::MzOp mz, int &counter, OpBuilder &builder) {
-    if (mz.getRegisterNameAttr() &&
-        /* FIXME: issue 2538: the name should never be empty. */
-        !mz.getRegisterNameAttr().getValue().empty()) {
+    if (mz.getRegisterNameAttr()) {
       mz->setAttr(cudaq::opt::MzAssignedNameAttrName, builder.getUnitAttr());
       return;
     }

diff --git a/lib/Optimizer/CodeGen/Pipelines.cpp b/lib/Optimizer/CodeGen/Pipelines.cpp
@@ -51,37 +51,6 @@ void cudaq::opt::commonPipelineConvertToQIR(PassManager &pm,
   pm.addPass(createCCToLLVM());
 }
 
-void cudaq::opt::commonPipelineConvertToQIR_PythonWorkaround(
-    PassManager &pm, const std::optional<StringRef> &convertTo) {
-  pm.addNestedPass<func::FuncOp>(createApplyControlNegations());
-  addAggressiveEarlyInlining(pm);
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createUnwindLoweringPass());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addPass(createApplyOpSpecializationPass());
-  pm.addNestedPass<func::FuncOp>(createExpandMeasurementsPass());
-  pm.addNestedPass<func::FuncOp>(createClassicalMemToReg());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddDeallocs());
-  pm.addNestedPass<func::FuncOp>(createQuakeAddMetadata());
-  pm.addNestedPass<func::FuncOp>(createLoopNormalize());
-  LoopUnrollOptions luo;
-  luo.allowBreak = convertTo && (*convertTo == "qir-adaptive");
-  pm.addNestedPass<func::FuncOp>(createLoopUnroll(luo));
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  pm.addNestedPass<func::FuncOp>(createLowerToCFGPass());
-  pm.addNestedPass<func::FuncOp>(createCombineQuantumAllocations());
-  pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
-  pm.addNestedPass<func::FuncOp>(createCSEPass());
-  if (convertTo && (*convertTo == "qir-base"))
-    pm.addNestedPass<func::FuncOp>(createDelayMeasurementsPass());
-  pm.addPass(createConvertMathToFuncs());
-  pm.addPass(createSymbolDCEPass());
-  pm.addPass(createConvertToQIR());
-}
-
 void cudaq::opt::addPipelineTranslateToOpenQASM(PassManager &pm) {
   pm.addNestedPass<func::FuncOp>(createCanonicalizerPass());
   pm.addNestedPass<func::FuncOp>(createCSEPass());

diff --git a/lib/Optimizer/Dialect/Quake/QuakeOps.cpp b/lib/Optimizer/Dialect/Quake/QuakeOps.cpp
@@ -517,38 +517,41 @@ void quake::WrapOp::getCanonicalizationPatterns(RewritePatternSet &patterns,
 //===----------------------------------------------------------------------===//
 
 // Common verification for measurement operations.
-static LogicalResult verifyMeasurements(Operation *const op,
-                                        TypeRange targetsType,
-                                        const Type bitsType) {
+template <typename MEAS>
+LogicalResult verifyMeasurements(MEAS op, TypeRange targetsType,
+                                 const Type bitsType) {
   if (failed(verifyWireResultsAreLinear(op)))
     return failure();
   bool mustBeStdvec =
       targetsType.size() > 1 ||
       (targetsType.size() == 1 && isa<quake::VeqType>(targetsType[0]));
   if (mustBeStdvec) {
-    if (!isa<cudaq::cc::StdvecType>(op->getResult(0).getType()))
-      return op->emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
-                             "measuring a qreg, a series of qubits, or both");
+    if (!isa<cudaq::cc::StdvecType>(op.getMeasOut().getType()))
+      return op.emitOpError("must return `!cc.stdvec<!quake.measure>`, when "
+                            "measuring a qreg, a series of qubits, or both");
   } else {
-    if (!isa<quake::MeasureType>(op->getResult(0).getType()))
+    if (!isa<quake::MeasureType>(op.getMeasOut().getType()))
       return op->emitOpError(
           "must return `!quake.measure` when measuring exactly one qubit");
   }
+  if (op.getRegisterName())
+    if (op.getRegisterName()->empty())
+      return op->emitError("quake measurement name cannot be empty.");
   return success();
 }
 
 LogicalResult quake::MxOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MyOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 
 LogicalResult quake::MzOp::verify() {
-  return verifyMeasurements(getOperation(), getTargets().getType(),
+  return verifyMeasurements(*this, getTargets().getType(),
                             getMeasOut().getType());
 }
 

diff --git a/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp b/lib/Optimizer/Transforms/GlobalizeArrayValues.cpp
@@ -87,6 +87,23 @@ convertArrayAttrToGlobalConstant(MLIRContext *ctx, Location loc,
 }
 
 namespace {
+
+// This pattern replaces a cc.const_array with a global constant. It can
+// recognize a couple of usage patterns and will generate efficient IR in those
+// cases.
+//
+// Pattern 1: The entire constant array is stored to a stack variable(s). Here
+// we can eliminate the stack allocation and use the global constant.
+//
+// Pattern 2: Individual elements at dynamic offsets are extracted from the
+// constant array and used. This can be replaced with a compute pointer
+// operation using the global constant and a load of the element at the computed
+// offset.
+//
+// Default: If the usage is not recognized, the constant array value is replaced
+// with a load of the entire global variable. In this case, LLVM's optimizations
+// are counted on to help demote the (large?) sequence value to primitive memory
+// address arithmetic.
 struct ConstantArrayPattern
     : public OpRewritePattern<cudaq::cc::ConstantArrayOp> {
   explicit ConstantArrayPattern(MLIRContext *ctx, ModuleOp module,
@@ -95,21 +112,31 @@ struct ConstantArrayPattern
 
   LogicalResult matchAndRewrite(cudaq::cc::ConstantArrayOp conarr,
                                 PatternRewriter &rewriter) const override {
+    auto func = conarr->getParentOfType<func::FuncOp>();
+    if (!func)
+      return failure();
+
     SmallVector<cudaq::cc::AllocaOp> allocas;
     SmallVector<cudaq::cc::StoreOp> stores;
+    SmallVector<cudaq::cc::ExtractValueOp> extracts;
+    bool loadAsValue = false;
     for (auto *usr : conarr->getUsers()) {
       auto store = dyn_cast<cudaq::cc::StoreOp>(usr);
-      if (!store)
-        return failure();
-      auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
-      if (!alloca)
-        return failure();
-      stores.push_back(store);
-      allocas.push_back(alloca);
+      auto extract = dyn_cast<cudaq::cc::ExtractValueOp>(usr);
+      if (store) {
+        auto alloca = store.getPtrvalue().getDefiningOp<cudaq::cc::AllocaOp>();
+        if (!alloca)
+          continue;
+        stores.push_back(store);
+        allocas.push_back(alloca);
+        continue;
+      }
+      if (extract) {
+        extracts.push_back(extract);
+        continue;
+      }
+      loadAsValue = true;
     }
-    auto func = conarr->getParentOfType<func::FuncOp>();
-    if (!func)
-      return failure();
     std::string globalName =
         func.getName().str() + ".rodata_" + std::to_string(counter++);
     auto *ctx = rewriter.getContext();
@@ -118,12 +145,39 @@ struct ConstantArrayPattern
     if (failed(convertArrayAttrToGlobalConstant(ctx, conarr.getLoc(), valueAttr,
                                                 module, globalName, eleTy)))
       return failure();
-    for (auto alloca : allocas)
-      rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
-          alloca, alloca.getType(), globalName);
-    for (auto store : stores)
-      rewriter.eraseOp(store);
-    rewriter.eraseOp(conarr);
+    auto loc = conarr.getLoc();
+    if (!extracts.empty()) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      auto elePtrTy = cudaq::cc::PointerType::get(eleTy);
+      for (auto extract : extracts) {
+        SmallVector<cudaq::cc::ComputePtrArg> args;
+        unsigned i = 0;
+        for (auto arg : extract.getRawConstantIndices()) {
+          if (arg == cudaq::cc::ExtractValueOp::getDynamicIndexValue())
+            args.push_back(extract.getDynamicIndices()[i++]);
+          else
+            args.push_back(arg);
+        }
+        OpBuilder::InsertionGuard guard(rewriter);
+        rewriter.setInsertionPoint(extract);
+        auto addrVal =
+            rewriter.create<cudaq::cc::ComputePtrOp>(loc, elePtrTy, base, args);
+        rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(extract, addrVal);
+      }
+    }
+    if (!stores.empty()) {
+      for (auto alloca : allocas)
+        rewriter.replaceOpWithNewOp<cudaq::cc::AddressOfOp>(
+            alloca, alloca.getType(), globalName);
+      for (auto store : stores)
+        rewriter.eraseOp(store);
+    }
+    if (loadAsValue) {
+      auto base = rewriter.create<cudaq::cc::AddressOfOp>(
+          loc, cudaq::cc::PointerType::get(conarr.getType()), globalName);
+      rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(conarr, base);
+    }
     return success();
   }
 

diff --git a/python/cudaq/kernel/ast_bridge.py b/python/cudaq/kernel/ast_bridge.py
@@ -1749,9 +1749,11 @@ def bodyBuilder(iterVal):
                     self.ctx) if len(qubits) == 1 and quake.RefType.isinstance(
                         qubits[0].type) else cc.StdvecType.get(
                             self.ctx, quake.MeasureType.get(self.ctx))
-                measureResult = opCtor(measTy, [],
-                                       qubits,
-                                       registerName=registerName).result
+                label = registerName
+                if not label:
+                    label = None
+                measureResult = opCtor(measTy, [], qubits,
+                                       registerName=label).result
                 if pushResultToStack:
                     self.pushValue(
                         quake.DiscriminateOp(resTy, measureResult).result)
@@ -3152,6 +3154,73 @@ def bodyBuilder(iterVar):
                                             isDecrementing=isDecrementing)
                 return
 
+        # We can simplify `for i,j in enumerate(L)` MLIR code immensely
+        # by just building a for loop over the iterable object L and using
+        # the index into that iterable and the element.
+        if isinstance(node.iter, ast.Call):
+            if node.iter.func.id == 'enumerate':
+                [self.visit(arg) for arg in node.iter.args]
+                if len(self.valueStack) == 2:
+                    iterable = self.popValue()
+                    self.popValue()
+                else:
+                    assert len(self.valueStack) == 1
+                    iterable = self.popValue()
+                iterable = self.ifPointerThenLoad(iterable)
+                totalSize = None
+                extractFunctor = None
+                varNames = []
+                for elt in node.target.elts:
+                    varNames.append(elt.id)
+
+                beEfficient = False
+                if quake.VeqType.isinstance(iterable.type):
+                    totalSize = quake.VeqSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        q = quake.ExtractRefOp(self.getRefType(),
+                                               seq,
+                                               -1,
+                                               index=idx).result
+                        return [idx, q]
+
+                    extractFunctor = functor
+                    beEfficient = True
+                elif cc.StdvecType.isinstance(iterable.type):
+                    totalSize = cc.StdvecSizeOp(self.getIntegerType(),
+                                                iterable).result
+
+                    def functor(seq, idx):
+                        vecTy = cc.StdvecType.getElementType(seq.type)
+                        dataTy = cc.PointerType.get(self.ctx, vecTy)
+                        arrTy = vecTy
+                        if not cc.ArrayType.isinstance(arrTy):
+                            arrTy = cc.ArrayType.get(self.ctx, vecTy)
+                        dataArrTy = cc.PointerType.get(self.ctx, arrTy)
+                        data = cc.StdvecDataOp(dataArrTy, seq).result
+                        v = cc.ComputePtrOp(
+                            dataTy, data, [idx],
+                            DenseI32ArrayAttr.get([kDynamicPtrIndex],
+                                                  context=self.ctx)).result
+                        return [idx, v]
+
+                    extractFunctor = functor
+                    beEfficient = True
+
+                if beEfficient:
+
+                    def bodyBuilder(iterVar):
+                        self.symbolTable.pushScope()
+                        values = extractFunctor(iterable, iterVar)
+                        for i, v in enumerate(values):
+                            self.symbolTable[varNames[i]] = v
+                        [self.visit(b) for b in node.body]
+                        self.symbolTable.popScope()
+
+                    self.createInvariantForLoop(totalSize, bodyBuilder)
+                    return
+
         self.visit(node.iter)
         assert len(self.valueStack) > 0 and len(self.valueStack) < 3
 

diff --git a/python/cudaq/kernel/kernel_builder.py b/python/cudaq/kernel/kernel_builder.py
@@ -1076,10 +1076,12 @@ def mz(self, target, regName=None):
             if quake.VeqType.isinstance(target.mlirValue.type):
                 retTy = stdvecTy
                 measTy = cc.StdvecType.get(self.ctx, measTy)
-            res = quake.MzOp(
-                measTy, [], [target.mlirValue],
-                registerName=StringAttr.get(regName, context=self.ctx)
-                if regName is not None else '')
+            if regName is not None:
+                res = quake.MzOp(measTy, [], [target.mlirValue],
+                                 registerName=StringAttr.get(regName,
+                                                             context=self.ctx))
+            else:
+                res = quake.MzOp(measTy, [], [target.mlirValue])
             disc = quake.DiscriminateOp(retTy, res)
             return self.__createQuakeValue(disc.result)