Implemented training with quantum pipeline.

IChowdhury01 · IChowdhury01 · commit 7f7bb0026937 · 2022-06-17T10:29:50.000-04:00
Added new dataset.
diff --git a/app/src/main/classical_user_input.py b/app/src/main/classical_user_input.py
@@ -1,5 +1,4 @@
-from app.src.main.pipelines.classical import classical_compute
-
+from app.src.main.pipelines.classical import send_into_classical_pipeline
 
 test_sentence = input("Enter a sentence: ")
-classical_compute(test_sentence)
+send_into_classical_pipeline(test_sentence)
diff --git a/app/src/main/constants/sample_sentences.py b/app/src/main/constants/sample_sentences.py
@@ -7,4 +7,3 @@
 ADS_ISSUED = "Initial ADS-Amount is equal to 6000000 ."
 
 NON_ADR_WALKING = "John walks in the park ."
-
diff --git a/app/src/main/pipelines/classical.py b/app/src/main/pipelines/classical.py
@@ -1,10 +1,8 @@
 from discopy import grammar, Dim
 from lambeq import BobcatParser, Rewriter, AtomicType, MPSAnsatz
 
-from app.src.main.constants import sample_sentences
 
-
-def classical_compute(sentence):
+def send_into_classical_pipeline(sentence):
     # Convert to string diagram
     parser = BobcatParser(verbose='text')
     diagram = parser.sentence2diagram(sentence)  # syntax-based, not bag-of-words
@@ -13,7 +11,6 @@ def classical_compute(sentence):
     # Rewrite string diagram, to reduce performance costs / training time
     rewriter = Rewriter(['prepositional_phrase', 'determiner'])  # lower tensor count on prepositions
     prep_reduced_diagram = rewriter(diagram).normal_form()
-    prep_reduced_diagram.draw(figsize=(9, 4), fontsize=13)
 
     curry_functor = Rewriter(['curry'])  # reduce number of cups
     curried_diagram = curry_functor(prep_reduced_diagram).normal_form()
@@ -32,8 +29,3 @@ def classical_compute(sentence):
     mps_diagram.draw(figsize=(13, 7), fontsize=13)
 
     # Todo: Training
-
-if __name__ == "__main__":
-    test_sentence = sample_sentences.ADS_ISSUED
-    print(f"Input string: {test_sentence}")
-    classical_compute(test_sentence)
diff --git a/app/src/main/pipelines/quantum.py b/app/src/main/pipelines/quantum.py
@@ -1,17 +1,22 @@
 import os
+import warnings
 import webbrowser
 
+import numpy as np
 from discopy import grammar
-from lambeq import BobcatParser, Rewriter, AtomicType, IQPAnsatz
+from lambeq import BobcatParser, Rewriter, AtomicType, IQPAnsatz, remove_cups, TketModel, QuantumTrainer, SPSAOptimizer, \
+    Dataset
 from matplotlib import pyplot
 from pytket.circuit.display import render_circuit_as_html
-from pytket.extensions.qiskit import tk_to_qiskit
+from pytket.extensions.qiskit import tk_to_qiskit, AerBackend
 
-from app.src.main.constants import sample_sentences
-from settings import GEN_PATH
+from settings import GEN_PATH, PROJECT_ROOT_PATH
 
+PATH_TO_TRAINING = os.path.join(PROJECT_ROOT_PATH, 'data', 'rp_train_data.txt')
+PATH_TO_TESTING = os.path.join(PROJECT_ROOT_PATH, 'data', 'rp_test_data.txt')
 
-def quantum_compute(sentence):
+
+def send_into_quantum_pipeline(sentence):
     # Convert to string diagram
     parser = BobcatParser(verbose='text')
     diagram = parser.sentence2diagram(sentence)  # syntax-based, not bag-of-words
@@ -20,7 +25,6 @@ def quantum_compute(sentence):
     # Rewrite string diagram, to reduce performance costs / training time
     rewriter = Rewriter(['prepositional_phrase', 'determiner'])  # lower tensor count on prepositions
     prep_reduced_diagram = rewriter(diagram).normal_form()
-    prep_reduced_diagram.draw(figsize=(9, 4), fontsize=13)
 
     curry_functor = Rewriter(['curry'])  # reduce number of cups
     curried_diagram = curry_functor(prep_reduced_diagram).normal_form()
@@ -33,7 +37,7 @@ def quantum_compute(sentence):
     C = AtomicType.CONJUNCTION
     ansatz = IQPAnsatz({N: 1, S: 1, P: 1, C: 1}, n_layers=4)
 
-    discopy_circuit = ansatz(diagram)   # Quantum circuit, DisCoPy format
+    discopy_circuit = ansatz(diagram)  # Quantum circuit, DisCoPy format
     discopy_circuit.draw(figsize=(15, 10))
 
     tket_circuit = discopy_circuit.to_tk()  # Quantum circuit, pytket format
@@ -46,9 +50,105 @@ def quantum_compute(sentence):
     qiskit_circuit.draw(output='mpl')
     pyplot.show()
 
-    # Todo: Training
 
-if __name__ == "__main__":
-    test_sentence = sample_sentences.NON_ADR_WALKING
-    print(f"Input string: {test_sentence}")
-    quantum_compute(test_sentence)
+def read_data(filename):
+    labels, sentences = [], []
+    with open(PATH_TO_TRAINING) as f:
+        for line in f:
+            t = int(line[0])
+            labels.append([t, 1 - t])
+            sentences.append(line[1:].strip())
+        return labels, sentences
+
+
+def train_data():
+    warnings.filterwarnings('ignore')
+    os.environ['TOKENIZERS_PARALLELISM'] = 'true'
+
+    BATCH_SIZE = 30
+    EPOCHS = 200
+    SEED = 2
+
+    train_labels, train_data = read_data(PATH_TO_TRAINING)
+    val_labels, val_data = read_data(PATH_TO_TESTING)
+
+    parser = BobcatParser(root_cats=('NP', 'N'), verbose='text')
+    raw_train_diagrams = parser.sentences2diagrams(train_data, suppress_exceptions=True)
+    raw_val_diagrams = parser.sentences2diagrams(val_data, suppress_exceptions=True)
+
+    train_diagrams = [
+        diagram.normal_form()
+        for diagram in raw_train_diagrams if diagram is not None
+    ]
+    val_diagrams = [
+        diagram.normal_form()
+        for diagram in raw_val_diagrams if diagram is not None
+    ]
+
+    train_labels = [
+        label for (diagram, label)
+        in zip(raw_train_diagrams, train_labels)
+        if diagram is not None
+    ]
+    val_labels = [
+        label for (diagram, label)
+        in zip(raw_val_diagrams, val_labels)
+        if diagram is not None
+    ]
+
+    ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 0},
+                       n_layers=1, n_single_qubit_params=3)
+
+    train_circuits = [ansatz(remove_cups(diagram)) for diagram in train_diagrams]
+    test_circuits = [ansatz(remove_cups(diagram)) for diagram in val_diagrams]
+    all_circuits = train_circuits + test_circuits
+
+    backend = AerBackend()
+    backend_config = {
+        'backend': backend,
+        'compilation': backend.default_compilation_pass(2),
+        'shots': 8192
+    }
+
+    model = TketModel.from_diagrams(all_circuits, backend_config=backend_config)
+    loss = lambda y_hat, y: -np.sum(y * np.log(y_hat)) / len(y)  # binary cross-entropy loss
+    acc = lambda y_hat, y: np.sum(np.round(y_hat) == y) / len(y) / 2  # half due to double-counting
+    eval_metrics = {"acc": acc}
+
+    trainer = QuantumTrainer(
+        model,
+        loss_function=loss,
+        epochs=EPOCHS,
+        optimizer=SPSAOptimizer,
+        optim_hyperparams={'a': 0.05, 'c': 0.06, 'A': 0.01 * EPOCHS},
+        evaluate_functions=eval_metrics,
+        evaluate_on_train=True,
+        verbose='text',
+        seed=0
+    )
+
+    train_dataset = Dataset(
+        train_circuits,
+        train_labels,
+        batch_size=BATCH_SIZE)
+
+    test_dataset = Dataset(test_circuits, val_labels, shuffle=False)
+
+    # Plotting accuracy & loss for training/testing sets
+    # fig, ((ax_tl, ax_tr), (ax_bl, ax_br)) = pyplot.subplots(2, 2, sharex=True, sharey='row', figsize=(10, 6))
+    # ax_tl.set_title('Training set')
+    # ax_tr.set_title('Development set')
+    # ax_bl.set_xlabel('Iterations')
+    # ax_br.set_xlabel('Iterations')
+    # ax_bl.set_ylabel('Accuracy')
+    # ax_tl.set_ylabel('Loss')
+    #
+    # colours = iter(pyplot.rcParams['axes.prop_cycle'].by_key()['color'])
+    # ax_tl.plot(trainer.train_epoch_costs[::10], color=next(colours))
+    # ax_bl.plot(trainer.train_results['acc'][::10], color=next(colours))
+    # ax_tr.plot(trainer.val_costs[::10], color=next(colours))
+    # ax_br.plot(trainer.val_results['acc'][::10], color=next(colours))
+
+    trainer.fit(train_dataset, test_dataset, evaluation_step=1, logging_step=20)  # Train
+    test_acc = acc(model(test_circuits), val_labels)  # Record accuracy
+    print('Test accuracy:', test_acc.item())
diff --git a/app/src/main/quantum_training.py b/app/src/main/quantum_training.py
@@ -0,0 +1,3 @@
+from app.src.main.pipelines.quantum import train_data
+
+train_data()
diff --git a/app/src/main/quantum_user_input.py b/app/src/main/quantum_user_input.py
@@ -1,5 +1,4 @@
-from app.src.main.pipelines.quantum import quantum_compute
-
+from app.src.main.pipelines.quantum import send_into_quantum_pipeline
 
 test_sentence = input("Enter a sentence: ")
-quantum_compute(test_sentence)
+send_into_quantum_pipeline(test_sentence)
diff --git a/data/classify_adr_train_data.txt b/data/classify_adr_train_data.txt
@@ -72,4 +72,6 @@
 1  Advance amount is equal to 4000 .
 1  Fixed amount is equal to 3000 .
 1  Upfront amount is equal to 1000 .
-1  Contract renews 90 days before termination .
+1  Contract renews 90 days before termination .
+1  Minimum-Balance is 95 percent .
+1  Minimum-Balance is equal to 300000 .
diff --git a/data/rp_test_data.txt b/data/rp_test_data.txt
@@ -0,0 +1,31 @@
+1 organization that fleet destroy .
+1 person that teacher teach .
+1 device that air enter .
+1 device that water enter .
+1 device that astronomer use .
+1 document that student submit .
+1 document that government sell .
+1 player that pitcher face .
+1 building that monk build .
+1 quality that artist achieve .
+1 quality that species share .
+1 quality that vehicle increase .
+1 room that ship have .
+1 room that train feature .
+1 activity that festival feature .
+1 mammal that police have .
+1 material that police use .
+1 material that excavation remove .
+1 material that water have .
+0 organization that have team .
+0 building that attract sailor .
+0 device that show time .
+0 player that hit run .
+0 quality that win election .
+0 vehicle that replace horse .
+0 scientist that discover species .
+0 phenomenon that hit island .
+0 scientist that discover star .
+0 vehicle that destroy vessel .
+0 vehicle that cross river .
+0 mammal that attack ship .
diff --git a/data/rp_train_data.txt b/data/rp_train_data.txt
@@ -0,0 +1,74 @@
+1 organization that church establish .
+1 organization that team join .
+1 organization that company sell .
+1 organization that soldier serve .
+1 organization that sailor join .
+1 organization that vessel serve .
+1 organization that church represent .
+1 person that school serve .
+1 building that astronomer build .
+1 building that astronomer own .
+1 building that archaeologist discover .
+1 building that archaeologist study .
+1 player that batsman face .
+1 building that audience fill .
+1 device that shepherd play .
+1 document that company publish .
+1 device that people wear .
+1 document that election use .
+1 document that government offer .
+1 document that person submit .
+1 player that batter face .
+1 player that pitcher strike .
+1 person that train carry .
+1 quality that election reflect .
+1 organization that player join .
+1 quality that church teach .
+1 quality that vehicle offer .
+1 room that vessel contain .
+1 room that church have .
+1 woman that child love .
+1 material that fuel contain .
+1 woman that soldier use .
+1 woman that husband have .
+1 woman that husband love .
+1 vehicle that family own .
+1 material that ship strike .
+1 mammal that shepherd use .
+1 material that officer carry .
+1 phenomenon that engine lose .
+1 room that archaeologist discover .
+1 room that school include .
+1 room that student enter .
+1 vehicle that train have .
+1 vehicle that horse pull .
+1 vehicle that island have .
+1 material that engine require .
+0 organization that establish church .
+0 organization that support child .
+0 organization that use train .
+0 person that join movement .
+0 person that lose family .
+0 building that hold festival .
+0 device that carry water .
+0 device that keep time .
+0 player that strike batter .
+0 player that allow run .
+0 building that house monk .
+0 person that take ship .
+0 room that control movement .
+0 room that hold engine .
+0 woman that have child .
+0 woman that raise child .
+0 woman that carry pitcher .
+0 woman that have husband .
+0 woman that leave husband .
+0 activity that fill air .
+0 phenomenon that raise river .
+0 scientist that visit island .
+0 material that attract farmer .
+0 phenomenon that require fuel .
+0 vehicle that enter port .
+0 vehicle that transport horse .
+0 vehicle that haul material .
+0 activity that build school .

Original file line number	Diff line number	Diff line change
`@@ -7,4 +7,3 @@`
`7`	`7`	`ADS_ISSUED = "Initial ADS-Amount is equal to 6000000 ."`
`8`	`8`
`9`	`9`	`NON_ADR_WALKING = "John walks in the park ."`
`10`		`-`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+from app.src.main.pipelines.quantum import train_data`
	`2`	`+`
	`3`	`+train_data()`