Skip to content

Commit 9a507f7

Browse files
WIP
Signed-off-by: Pradnya Khalate <pkhalate@nvidia.com>
1 parent f001b79 commit 9a507f7

29 files changed

Lines changed: 1201 additions & 42 deletions

File tree

cudaq/include/cudaq/Optimizer/CodeGen/QIRFunctionNames.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ static constexpr const char QIRMeasureBody[] = "__quantum__qis__mz__body";
1919
static constexpr const char QIRMeasure[] = "__quantum__qis__mz";
2020
static constexpr const char QIRMeasureToRegister[] =
2121
"__quantum__qis__mz__to__register";
22+
static constexpr const char QIRMeasureHandleToRegister[] =
23+
"__quantum__qis__mz_handle__to__register";
2224
static constexpr const char QIRResetBody[] = "__quantum__qis__reset__body";
2325
static constexpr const char QIRReset[] = "__quantum__qis__reset";
2426

cudaq/lib/Optimizer/Builder/Intrinsics.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,8 @@ static constexpr IntrinsicCode intrinsicTable[] = {
631631
func.func private @__quantum__qis__reset(!qir_qubit)
632632
func.func private @__quantum__qis__mz(!qir_qubit) -> !qir_result
633633
func.func private @__quantum__qis__mz__to__register(!qir_qubit, !qir_charptr) -> !qir_result
634+
func.func private @__quantum__qis__mz_handle__to__register(!qir_qubit, !qir_charptr) -> i64
635+
func.func private @__quantum__qis__read_result__body(!qir_result) -> i1
634636
func.func private @__quantum__qis__swap(!qir_qubit, !qir_qubit)
635637
func.func private @__quantum__qis__rx(f64, !qir_qubit)
636638
func.func private @__quantum__qis__phased_rx(f64, f64, !qir_qubit)

cudaq/lib/Optimizer/CodeGen/ConvertToQIRAPI.cpp

Lines changed: 58 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -820,6 +820,25 @@ struct DiscriminateOpRewrite
820820
ConversionPatternRewriter &rewriter) const override {
821821
auto loc = disc.getLoc();
822822
Value m = adaptor.getMeasurement();
823+
// Handle-form: the operand is the `i64` chronological measurement
824+
// index produced by `mz_handle__to__register`. Round-trip it through
825+
// `Result*` and call`read_result__body`, which looks the bit up in
826+
// `measRes2Val` under the inttoptr-encoded `Result*` key populated
827+
// by`mz_handle__to__register`.
828+
if (isa<IntegerType>(m.getType())) {
829+
auto ctx = rewriter.getContext();
830+
auto resultPtrTy = cudaq::cc::PointerType::get(
831+
LLVM::LLVMStructType::getOpaque("Result", ctx));
832+
auto resAsPtr = cudaq::cc::CastOp::create(rewriter, loc, resultPtrTy, m);
833+
rewriter.replaceOpWithNewOp<func::CallOp>(
834+
disc, rewriter.getI1Type(), cudaq::opt::qir0_1::ReadResultBody,
835+
ValueRange{resAsPtr});
836+
return success();
837+
}
838+
// Non-handle path: legacy `Result* -> ptr<i1>; load` pattern. Safe
839+
// because the only producer of `Result*` outside the handle path is
840+
// the sentinel-returning `mz` / `mz__to__register`, where
841+
// `Result = bool` and the pointer is dereferenceable.
823842
auto i1PtrTy = cudaq::cc::PointerType::get(rewriter.getI1Type());
824843
auto cast = cudaq::cc::CastOp::create(rewriter, loc, i1PtrTy, m);
825844
rewriter.replaceOpWithNewOp<cudaq::cc::LoadOp>(disc, cast);
@@ -844,12 +863,22 @@ struct DiscriminateOpToCallRewrite
844863
// expected by the QIR read-result functions.
845864
SmallVector<Value> operands{adaptor.getOperands().begin(),
846865
adaptor.getOperands().end()};
847-
if (operands.size() == 1 && isa<IntegerType>(operands.front().getType())) {
866+
const bool operandIsHandle =
867+
operands.size() == 1 && isa<IntegerType>(operands.front().getType());
868+
if (operandIsHandle) {
848869
auto resultTy = M::getResultType(rewriter.getContext());
849870
operands.front() =
850871
cudaq::cc::CastOp::create(rewriter, loc, resultTy, operands.front());
851872
}
852-
if constexpr (M::discriminateToClassical) {
873+
// For handle-form callers, the i64 payload is the chronological
874+
// measurement index produced by `mz_handle__to__register`. Loading
875+
// through `Result*` as if it were `bool*` (the legacy bitcast+load
876+
// pattern below) would dereference an integer-encoded pointer and
877+
// segfault — the read-result runtime call is the QIR-spec way to
878+
// recover the bit. The `mz_handle__to__register` adapter populates
879+
// `measRes2Val` keyed by the index-encoded `Result*`, so the lookup
880+
// resolves.
881+
if (operandIsHandle || M::discriminateToClassical) {
853882
if constexpr (M::qirVersion == QirVersion::version_1_0) {
854883
rewriter.replaceOpWithNewOp<func::CallOp>(
855884
disc, rewriter.getI1Type(), cudaq::opt::qir1_0::ReadResult,
@@ -1490,20 +1519,38 @@ struct MeasurementOpPattern : public OpConversionPattern<cudaq::quake::MzOp> {
14901519
adaptor.getTargets().end()};
14911520
auto functionName = M::getQIRMeasure();
14921521

1493-
// Handle-form measurements produce a `!cc.measure_handle` SSA value
1494-
// whose converted type is `i64`. The QIR measurement function still
1495-
// returns `Result*`, so we bridge the call's `Result*` result to the
1496-
// converted `i64` payload via `cc.cast`.
1522+
// Handle-form measurements produce a `!cc.measure_handle` SSA value whose
1523+
// converted type is `i64`. Route handle-form callers to the sibling runtime
1524+
// entry
1525+
// `__quantum__qis__mz_handle__to__register` which returns the chronological
1526+
// measurement index directly as `i64` (the QIR Base/ Adaptive Profile
1527+
// convention that the integer encoded in `Result*` identifies the
1528+
// measurement, see
1529+
// https://github.com/qir-alliance/qir-spec/blob/1.0/specification/profiles/Base_Profile.md).
14971530
const bool measOutIsHandle =
14981531
isa<cudaq::cc::MeasureHandleType>(mz.getMeasOut().getType());
14991532

15001533
// Are we using the measurement that returns a result?
15011534
if constexpr (M::mzReturnsResultType) {
1502-
// Yes, the measurement results the result, so we can use a
1503-
// straightforward codegen pattern. Use either the mz or the
1504-
// mz_to_register call (with the name as an extra argument) and forward
1505-
// the result of the call as the result.
1535+
// Handle-form gets its own runtime entry that returns `i64` directly.
1536+
if (measOutIsHandle) {
1537+
auto cstringGlobal =
1538+
createGlobalCString(mz, loc, rewriter, regNameAttr.getValue());
1539+
args.push_back(cstringGlobal);
1540+
auto i64Ty = rewriter.getI64Type();
1541+
auto call = func::CallOp::create(
1542+
rewriter, loc, i64Ty, cudaq::opt::QIRMeasureHandleToRegister, args);
1543+
call->setAttr(cudaq::opt::QIRRegisterNameAttr, regNameAttr);
1544+
SmallVector<Value> replaceVals;
1545+
replaceVals.push_back(call.getResult(0));
1546+
auto assundry = filterArgs(mz, adaptor.getTargets());
1547+
replaceVals.append(assundry.begin(), assundry.end());
1548+
rewriter.replaceOp(mz, replaceVals);
1549+
return success();
1550+
}
15061551

1552+
// Non-handle path: use the standard mz / mz__to__register call and
1553+
// forward its `Result*` result unchanged.
15071554
if (mz->getAttr(cudaq::opt::MzAssignedNameAttrName)) {
15081555
functionName = cudaq::opt::QIRMeasureToRegister;
15091556
auto cstringGlobal =
@@ -1515,13 +1562,7 @@ struct MeasurementOpPattern : public OpConversionPattern<cudaq::quake::MzOp> {
15151562
func::CallOp::create(rewriter, loc, resultTy, functionName, args);
15161563
auto assundry = filterArgs(mz, adaptor.getTargets());
15171564
SmallVector<Value> replaceVals;
1518-
if (measOutIsHandle) {
1519-
auto i64Ty = rewriter.getI64Type();
1520-
replaceVals.push_back(
1521-
cudaq::cc::CastOp::create(rewriter, loc, i64Ty, call.getResult(0)));
1522-
} else {
1523-
replaceVals.append(call.getResults().begin(), call.getResults().end());
1524-
}
1565+
replaceVals.append(call.getResults().begin(), call.getResults().end());
15251566
replaceVals.append(assundry.begin(), assundry.end());
15261567
rewriter.replaceOp(mz, replaceVals);
15271568
call->setAttr(cudaq::opt::QIRRegisterNameAttr, regNameAttr);

cudaq/test/Transforms/qir_api_measure_handle.qke

Lines changed: 7 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,8 @@ func.func @scalar_handle() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"}
2727

2828
// CHECK-LABEL: func.func @scalar_handle() -> i1
2929
// CHECK: %[[VAL_Q:.*]] = call @__quantum__rt__qubit_allocate()
30-
// CHECK: %[[VAL_R:.*]] = call @__quantum__qis__mz__to__register(%[[VAL_Q]], {{%.*}}) {{.*}} -> !cc.ptr<!llvm.struct<"Result", opaque>>
31-
// `MeasurementOpPattern` casts the QIR call's `Result*` to the converted
32-
// `i64` payload, and `DiscriminateOpToCallRewrite` casts back to
33-
// `!cc.ptr<i1>`. The two pointer endpoints (`Result*` and `ptr<i1>`) are
34-
// joined by an i64 round-trip whose only purpose was the handle ABI; the
35-
// `FuseCastCascade` `ptr -> int -> ptr` rule collapses the chain into a
36-
// single pointer cast that lowers to `llvm.bitcast`. This avoids the
37-
// `llvm.ptrtoint` that the NVQIR profile verifier rejects.
38-
// CHECK: %[[VAL_P:.*]] = cc.cast %[[VAL_R]] : (!cc.ptr<!llvm.struct<"Result", opaque>>) -> !cc.ptr<i1>
39-
// CHECK: %[[VAL_B:.*]] = cc.load %[[VAL_P]] : !cc.ptr<i1>
30+
// CHECK: %[[VAL_R:.*]] = cc.cast %[[VAL_H]] : (i64) -> !cc.ptr<!llvm.struct<"Result", opaque>>
31+
// CHECK: %[[VAL_B:.*]] = call @__quantum__qis__read_result__body(%[[VAL_R]]) : (!cc.ptr<!llvm.struct<"Result", opaque>>) -> i1
4032
// CHECK: return %[[VAL_B]] : i1
4133

4234
// -----
@@ -180,9 +172,7 @@ func.func @handle_stdvec_consume(%v: !cc.stdvec<!cc.measure_handle>) -> i1 attri
180172

181173
// -----
182174
// End-to-end guard: a follow-up canonicalize pass after convert-to-qir-api
183-
// must preserve the same Result* -> ptr<i1> collapse. The narrow integer-hop
184-
// non-fold case is already covered in cast_fold.qke.
185-
175+
// must preserve the handle-form lowering.
186176
func.func @scalar_handle_e2e() -> i1 attributes {"cudaq-entrypoint", "cudaq-kernel"} {
187177
%0 = quake.alloca !quake.ref
188178
%m = quake.mz %0 name "h" : (!quake.ref) -> !cc.measure_handle
@@ -192,12 +182,10 @@ func.func @scalar_handle_e2e() -> i1 attributes {"cudaq-entrypoint", "cudaq-kern
192182

193183
// CHECK-E2E-LABEL: func.func @scalar_handle_e2e() -> i1
194184
// CHECK-E2E: %[[VAL_Q:.*]] = call @__quantum__rt__qubit_allocate()
195-
// CHECK-E2E: %[[VAL_R:.*]] = call @__quantum__qis__mz__to__register({{.*}})
196-
// CHECK-E2E-SAME: -> !cc.ptr<!llvm.struct<"Result", opaque>>
197-
// CHECK-E2E: %[[VAL_P:.*]] = cc.cast %[[VAL_R]] : (!cc.ptr<!llvm.struct<"Result", opaque>>) -> !cc.ptr<i1>
198-
// CHECK-E2E-NOT: cc.cast {{.*}} : (!cc.ptr<!llvm.struct<"Result", opaque>>) -> i64
199-
// CHECK-E2E-NOT: cc.cast {{.*}} : (i64) -> !cc.ptr<i1>
200-
// CHECK-E2E: %[[VAL_B:.*]] = cc.load %[[VAL_P]] : !cc.ptr<i1>
185+
// CHECK-E2E: %[[VAL_H:.*]] = call @__quantum__qis__mz_handle__to__register({{.*}}) {{.*}} -> i64
186+
// CHECK-E2E: %[[VAL_R:.*]] = cc.cast %[[VAL_H]] : (i64) -> !cc.ptr<!llvm.struct<"Result", opaque>>
187+
// CHECK-E2E: %[[VAL_B:.*]] = call @__quantum__qis__read_result__body(%[[VAL_R]]) : (!cc.ptr<!llvm.struct<"Result", opaque>>) -> i1
188+
// CHECK-E2E-NOT: cc.load
201189
// CHECK-E2E: return %[[VAL_B]] : i1
202190

203191
}

python/cudaq/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,7 @@ def _isinstance(other, _cls=cls, _isinst=py_isinstance):
187187
from .runtime.draw import draw
188188
from .runtime.unitary import get_unitary
189189
from .runtime.resource_count import estimate_resources
190+
from .runtime.dem import dem_from_kernel
190191
from .runtime.vqe import vqe # Removed! Use VQE from CUDA-QX
191192
from .kernel.register_op import register_operation
192193
from .mlir._mlir_libs._quakeDialects import cudaq_runtime

python/cudaq/runtime/dem.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# ============================================================================ #
2+
# Copyright (c) 2025 - 2026 NVIDIA Corporation & Affiliates. #
3+
# All rights reserved. #
4+
# #
5+
# This source code and the accompanying materials are made available under #
6+
# the terms of the Apache License 2.0 which accompanies this distribution. #
7+
# ============================================================================ #
8+
9+
from cudaq.mlir._mlir_libs._quakeDialects import cudaq_runtime
10+
from cudaq.kernel.kernel_decorator import (mk_decorator, isa_kernel_decorator)
11+
from cudaq.util import trace
12+
13+
14+
@trace.traced
15+
def dem_from_kernel(kernel, *args, noise_model=None):
16+
"""Generate a detector error model (DEM) from a CUDA-Q kernel.
17+
18+
Runs `kernel` under the internal `"dem"` execution context, captures
19+
the recorded circuit from the backend, and returns Stim's standard
20+
`.dem` text via `stim::DetectorErrorModel::str()`. The active CUDA-Q
21+
target is unaffected; the analysis simulator is an internal,
22+
thread-local override.
23+
24+
Args:
25+
kernel (:class:`Kernel`): The :class:`Kernel` to analyze.
26+
*arguments: Concrete argument values forwarded to the kernel invocation.
27+
noise_model (:class:`NoiseModel`, optional): Noise model layered on
28+
top of any `apply_noise` ops already present in the kernel.
29+
30+
Returns:
31+
UTF-8 string in Stim's standard `.dem` file format. Consumers
32+
that need a structured DEM can parse it with
33+
`stim.DetectorErrorModel(text)`.
34+
"""
35+
if isa_kernel_decorator(kernel):
36+
decorator = kernel
37+
else:
38+
decorator = mk_decorator(kernel)
39+
processedArgs, module = decorator.prepare_call(*args)
40+
return cudaq_runtime.dem_from_kernel_impl(decorator.uniqName, module,
41+
noise_model, *processedArgs)

python/extension/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ declare_mlir_python_extension(CUDAQuantumPythonSources.Extension
107107
../runtime/cudaq/algorithms/py_sample_async.cpp
108108
../runtime/cudaq/algorithms/py_sample_ptsbe.cpp
109109
../runtime/cudaq/algorithms/py_resource_count.cpp
110+
../runtime/cudaq/analysis/py_dem.cpp
110111
../runtime/cudaq/algorithms/py_run.cpp
111112
../../runtime/cudaq/algorithms/run.cpp # Common Python and C++ implementation of run
112113
../runtime/cudaq/algorithms/py_state.cpp
@@ -170,6 +171,7 @@ target_include_directories(CUDAQuantumPythonSources.Extension INTERFACE
170171
)
171172
target_link_libraries(CUDAQuantumPythonSources.Extension INTERFACE
172173
cudaq
174+
cudaq-analysis
173175
cudaq-logger
174176
cudaq-common
175177
cudaq-em-default

python/extension/CUDAQuantumExtension.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "runtime/cudaq/algorithms/py_translate.h"
2929
#include "runtime/cudaq/algorithms/py_unitary.h"
3030
#include "runtime/cudaq/algorithms/py_utils.h"
31+
#include "runtime/cudaq/analysis/py_dem.h"
3132
#include "runtime/cudaq/operators/py_boson_op.h"
3233
#include "runtime/cudaq/operators/py_fermion_op.h"
3334
#include "runtime/cudaq/operators/py_handlers.h"
@@ -134,6 +135,7 @@ NB_MODULE(_quakeDialects, m) {
134135
bindPyRunAsync(cudaqRuntime);
135136
bindPyTranslate(cudaqRuntime);
136137
bindCountResources(cudaqRuntime);
138+
bindDemFromKernel(cudaqRuntime);
137139
bindSampleAsync(cudaqRuntime);
138140
bindSamplePTSBE(cudaqRuntime);
139141
bindObserveAsync(cudaqRuntime);
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*******************************************************************************
2+
* Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. *
3+
* All rights reserved. *
4+
* *
5+
* This source code and the accompanying materials are made available under *
6+
* the terms of the Apache License 2.0 which accompanies this distribution. *
7+
******************************************************************************/
8+
9+
#include "py_dem.h"
10+
#include "common/NoiseModel.h"
11+
#include "runtime/cudaq/platform/py_alt_launch_kernel.h"
12+
#include "cudaq/algorithms/dem.h"
13+
#include "cudaq/platform.h"
14+
#include "mlir/Bindings/Python/NanobindAdaptors.h"
15+
#include <nanobind/stl/optional.h>
16+
#include <nanobind/stl/string.h>
17+
#include <optional>
18+
#include <string>
19+
20+
using namespace cudaq;
21+
22+
static std::string dem_from_kernel_impl(const std::string &kernelName,
23+
MlirModule kernelMod,
24+
std::optional<noise_model> noise,
25+
nanobind::args args) {
26+
auto &platform = cudaq::get_platform();
27+
args = simplifiedValidateInputArguments(args);
28+
29+
const cudaq::noise_model *noisePtr = noise ? &(*noise) : nullptr;
30+
return cudaq::details::runDemFromKernel(
31+
kernelName, platform, noisePtr, [&]() {
32+
[[maybe_unused]] auto result =
33+
cudaq::marshal_and_launch_module(kernelName, kernelMod, args);
34+
});
35+
}
36+
37+
void cudaq::bindDemFromKernel(nanobind::module_ &mod) {
38+
mod.def("dem_from_kernel_impl", dem_from_kernel_impl, nanobind::arg(),
39+
nanobind::arg(), nanobind::arg().none(), nanobind::arg(),
40+
"See python documentation for dem_from_kernel.");
41+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/****************************************************************-*- C++ -*-****
2+
* Copyright (c) 2022 - 2026 NVIDIA Corporation & Affiliates. *
3+
* All rights reserved. *
4+
* *
5+
* This source code and the accompanying materials are made available under *
6+
* the terms of the Apache License 2.0 which accompanies this distribution. *
7+
******************************************************************************/
8+
9+
#pragma once
10+
11+
#include <nanobind/nanobind.h>
12+
13+
namespace cudaq {
14+
void bindDemFromKernel(nanobind::module_ &mod);
15+
}

0 commit comments

Comments
 (0)