From 9d220f8690b6b565ae49d3b0a3222646c2c051fa Mon Sep 17 00:00:00 2001
From: LegionAtol <alessio.parato@gmail.com>
Date: Fri, 18 Oct 2024 20:23:26 +0200
Subject: [PATCH] notebooks to demonstrate how the _RL.py module works with
 single and multiple qubits

---
 .../optimal-control/CNOT_qubits_with_rl.md    | 132 +++++++++++++
 .../optimal-control/single_qubit_with_rl.md   | 177 ++++++++++++++++++
 2 files changed, 309 insertions(+)
 create mode 100644 tutorials-v5/optimal-control/CNOT_qubits_with_rl.md
 create mode 100644 tutorials-v5/optimal-control/single_qubit_with_rl.md

diff --git a/tutorials-v5/optimal-control/CNOT_qubits_with_rl.md b/tutorials-v5/optimal-control/CNOT_qubits_with_rl.md
new file mode 100644
index 00000000..b3eaab07
--- /dev/null
+++ b/tutorials-v5/optimal-control/CNOT_qubits_with_rl.md
@@ -0,0 +1,132 @@
+---
+jupyter:
+  jupytext:
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.16.4
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+# Quantum Optimal Control with Reinforcement Learning
+
+In this notebook, we will demonstrate how to use the `_RL` module to solve a quantum optimal control problem using reinforcement learning (RL).
+The goal is to use 2 Qubits to realize CNOT gate. In practice there is a control qubit and a target qubit, if the control qubit is in the state |0⟩ the target qubit remains unchanged, if the control qubit is in the state |1⟩ the CNOT gate flips the state of the target qubit.
+
+
+
+### Setup and Import Required Libraries
+
+```python
+# If you are running this in an environment where some packages are missing, use this cell to install them:
+# !pip install qutip stable-baselines3 gymnasium
+
+import matplotlib.pyplot as plt
+import numpy as np
+import qutip as qt
+from qutip_qoc import Objective, optimize_pulses
+from stable_baselines3 import PPO
+```
+
+### Define the Quantum Control Problem
+
+
+The system starts from an initial state represented by the identity on two qubits, with the goal of achieving a CNOT gate as the target state. To accomplish this, control operators based on the Pauli matrices are defined to act on individual qubits and pairs of qubits. Additionally, a drift Hamiltonian is introduced to account for interactions between the qubits and noise, thereby modeling the dynamics of the open quantum system.
+
+```python
+# Define the initial and target states
+initial = qt.tensor(qt.qeye(2), qt.qeye(2))
+target = qt.gates.cnot()
+
+# convert to superoperator (for open system)
+initial = qt.sprepost(initial, initial.dag())
+target = qt.sprepost(target, target.dag())
+
+# single qubit control operators
+sx, sy, sz = qt.sigmax(), qt.sigmay(), qt.sigmaz()
+identity = qt.qeye(2)
+
+# two qubit control operators
+i_sx, sx_i = qt.tensor(sx, identity), qt.tensor(identity, sx)
+i_sy, sy_i = qt.tensor(sy, identity), qt.tensor(identity, sy)
+i_sz, sz_i = qt.tensor(sz, identity), qt.tensor(identity, sz)
+
+# Define the control Hamiltonians
+Hc = [i_sx, i_sy, i_sz, sx_i, sy_i, sz_i]
+Hc = [qt.liouvillian(H) for H in Hc]
+
+# drift and noise term for a two-qubit system
+omega, delta, gamma = 0.1, 1.0, 0.1
+i_sm, sm_i = qt.tensor(qt.sigmam(), identity), qt.tensor(identity, qt.sigmam())
+
+# energy levels and interaction
+Hd = omega * (i_sz + sz_i) + delta * i_sz * sz_i
+Hd = qt.liouvillian(H=Hd, c_ops=[gamma * (i_sm + sm_i)])
+
+# combined operator list
+H = [Hd, Hc[0], Hc[1], Hc[2], Hc[3], Hc[4], Hc[5]]
+
+# Define the objective
+objectives = [Objective(initial, H, target)]
+
+# Define the control parameters with bounds
+control_parameters = {"p": {"bounds": [(-30, 30)]}}
+
+# Define the time interval
+tlist = np.linspace(0, np.pi, 100)
+
+# Define algorithm-specific settings
+algorithm_kwargs = {
+    "fid_err_targ": 0.01,
+    "alg": "RL",
+    "max_iter": 400,
+    "shorter_pulses": False,
+}
+optimizer_kwargs = {}
+```
+
+Note that `max_iter` defines the number of episodes, the 100 in `tlist` defines the maximum number of steps per episode.  
+If `shorter_pulses` is True, the training will be longer as the algorithm will try to optimize the episodes using as few steps as possible in addition to checking if the target infidelity is reached.
+If False, the algorithm takes less time and stops as soon as it finds an episode with infidelity <= target infidelity.
+
+
+### Initialize and Train the RL Environment
+
+
+Now we will call the `optimize_pulses()` method, passing it the control problem we defined.
+The method will create an instance of the `_RL` class, which will set up the reinforcement learning environment and start training.
+Finally it returns the optimization results through an object of the `Result` class.
+
+```python
+# Initialize the RL environment and start training
+rl_result = optimize_pulses(
+    objectives, control_parameters, tlist, algorithm_kwargs, optimizer_kwargs
+)
+```
+
+### Analyze the Results
+
+
+After the training is complete, we can analyze the results obtained by the RL agent. 
+In the above window showing the output produced by Gymansium, you can observe how during training the number of steps per episode (ep_len_mean) decreases and the average reward of the episodes (ep_rew_mean) increases.
+
+
+We can now see the fields of the `Result` class, this includes the final infidelity, the optimized control parameters and more.
+
+```python
+print(rl_result)
+```
+
+```python
+# We can show the hinton matrix
+fig, ax = qt.hinton(rl_result._final_states[0])
+ax.set_title("hinton")
+```
+
+```python
+
+```
diff --git a/tutorials-v5/optimal-control/single_qubit_with_rl.md b/tutorials-v5/optimal-control/single_qubit_with_rl.md
new file mode 100644
index 00000000..be7c64f5
--- /dev/null
+++ b/tutorials-v5/optimal-control/single_qubit_with_rl.md
@@ -0,0 +1,177 @@
+---
+jupyter:
+  jupytext:
+    text_representation:
+      extension: .md
+      format_name: markdown
+      format_version: '1.3'
+      jupytext_version: 1.16.0
+  kernelspec:
+    display_name: Python 3 (ipykernel)
+    language: python
+    name: python3
+---
+
+# Quantum Optimal Control with Reinforcement Learning
+
+In this notebook, we will demonstrate how to use the `_RL` module to solve a quantum optimal control problem using reinforcement learning (RL). We will define a simple state transfer problem with a single qubit, where the goal is to transfer a quantum system from one state to another, and we will use the RL agent to optimize the control pulses to achieve this task.
+After we will also see the same problem but using unitary operators
+
+
+## State to State Transfer
+
+
+
+
+### Setup and Import Required Libraries
+
+```python
+# If you are running this in an environment where some packages are missing, use this cell to install them:
+# !pip install qutip stable-baselines3 gymnasium
+
+import matplotlib.pyplot as plt
+import numpy as np
+import qutip as qt
+from qutip_qoc import Objective, optimize_pulses
+from stable_baselines3 import PPO
+```
+
+### Define the Quantum Control Problem
+
+
+We define the problem of transferring a quantum system from the initial state |0⟩ to the target state |+⟩. The system is controlled via three control Hamiltonians corresponding to the Pauli matrices, and a drift Hamiltonian for natural evolution of the qubit.
+
+```python
+# Define the initial and target states
+initial_state = qt.basis(2, 0)  # |0⟩
+target_state = (qt.basis(2, 0) + qt.basis(2, 1)).unit()  # |+⟩
+# target_state = qt.basis(2, 1)   # |1⟩
+
+# Define the control Hamiltonians (Pauli matrices)
+H_c = [qt.sigmax(), qt.sigmay(), qt.sigmaz()]
+
+# Define the drift Hamiltonian
+w, d = 0.1, 1.0
+H_d = 1 / 2 * (w * qt.sigmaz() + d * qt.sigmax())
+
+# Combine the Hamiltonians into a single list
+H = [H_d] + H_c
+
+# Define the objective
+objectives = [Objective(initial=initial_state, H=H, target=target_state)]
+
+# Define the control parameters with bounds
+control_parameters = {
+    "p": {"bounds": [(-13, 13)]},
+}
+
+# Define the time interval
+tlist = np.linspace(0, 10, 100)
+
+# Define algorithm-specific settings
+algorithm_kwargs = {
+    "fid_err_targ": 0.01,
+    "alg": "RL",
+    "max_iter": 23000,
+    "shorter_pulses": True,
+}
+optimizer_kwargs = {}
+```
+
+Note that `max_iter` defines the number of episodes the algorithm can execute, the 100 in `tlist` defines the maximum number of steps per episode.  
+If `shorter_pulses` is True, the training will be longer as the algorithm will try to optimize the episodes using as few steps as possible in addition to checking if the target infidelity is reached.
+If False, the algorithm takes less time and stops as soon as it finds an episode with infidelity <= target infidelity.
+
+
+### Initialize and Train the RL Environment
+
+
+Now we will call the `optimize_pulses()` method, passing it the control problem we defined.
+The method will create an instance of the `_RL` class, which will set up the reinforcement learning environment and start training.
+Finally it returns the optimization results through an object of the `Result` class.
+
+```python
+# Initialize the RL environment and start training
+rl_result = optimize_pulses(
+    objectives, control_parameters, tlist, algorithm_kwargs, optimizer_kwargs
+)
+```
+
+### Analyze the Results
+
+
+After the training is complete, we can analyze the results obtained by the RL agent. 
+In the above window showing the output produced by Gymansium, you can observe how during training the number of steps per episode (ep_len_mean) decreases and the average reward of the episodes (ep_rew_mean) increases.
+
+
+We can now see the fields of the `Result` class, this includes the final infidelity, the optimized control parameters and more.
+
+```python
+print(rl_result)
+```
+
+```python
+# We can also visualize the initial and final states on the Bloch sphere
+bloch_sp = qt.Bloch()
+bloch_sp.add_states(initial_state)  # green
+bloch_sp.add_states(target_state)  # orange
+bloch_sp.add_states(rl_result._final_states[0])  # blue
+bloch_sp.show()
+```
+
+## Unitary Operators
+
+Now we will show how to tackle a problem similar to the previous one, but this time, instead of reaching a specific target state, the goal is to start from the identity operator and evolve it in a controlled way until we obtain a specific unitary operator, such as the Hadamard gate.
+
+
+The control problem is similar to the previous one, we just need to change the initial state, the target state (now they are matrices) and update the objective.  
+We can also change the number of episodes for this task by changing `max_iter`  
+By setting `shorter_pulses` to False, the algorithm will stop as soon as it finds an episode that satisfies the target infidelity.
+
+```python
+initial = qt.qeye(2)  # Identity
+target = qt.gates.hadamard_transform()
+
+objectives = [Objective(initial, H, target)]
+
+# Define the control parameters with bounds
+control_parameters = {
+    "p": {"bounds": [(-13, 13)]},
+}
+
+algorithm_kwargs = {
+    "fid_err_targ": 0.01,
+    "alg": "RL",
+    "max_iter": 300,
+    "shorter_pulses": False,
+}
+```
+
+```python
+# Initialize the RL environment and start training
+rl_result = optimize_pulses(
+    objectives, control_parameters, tlist, algorithm_kwargs, optimizer_kwargs
+)
+```
+
+```python
+print(rl_result)
+```
+
+```python
+# We can show in this case the hinton matrix
+fig, ax = qt.hinton(rl_result._final_states[0])
+ax.set_title("hinton")
+```
+
+```python
+# for the hadamard matrix
+U = qt.gates.hadamard_transform()
+fig, ax = qt.hinton(U)
+```
+
+We are using PSU norm in the infidelity calculation, so the found transformation is correct, independently of the global phase.
+
+```python
+
+```