tensorflow
diff --git a/‎lingvo/core/BUILD
Lines changed: 2 additions & 0 deletions b/‎lingvo/core/BUILD
Lines changed: 2 additions & 0 deletions
diff --git a/‎lingvo/core/py_utils_test.py
Lines changed: 63 additions & 0 deletions b/‎lingvo/core/py_utils_test.py
Lines changed: 63 additions & 0 deletions
diff --git a/‎lingvo/core/testdata/BUILD
Lines changed: 7 additions & 0 deletions b/‎lingvo/core/testdata/BUILD
Lines changed: 7 additions & 0 deletions
diff --git a/‎lingvo/core/testdata/qat_test_4bit_weights.npy
252 KB b/‎lingvo/core/testdata/qat_test_4bit_weights.npy
252 KB
diff --git a/‎lingvo/core/testdata/qat_test_8bit_weights.npy
252 KB b/‎lingvo/core/testdata/qat_test_8bit_weights.npy
252 KB
diff --git a/‎lingvo/core/testdata/qat_test_bias.npy
8 KB b/‎lingvo/core/testdata/qat_test_bias.npy
8 KB
diff --git a/‎lingvo/core/testdata/qat_test_inputs.npy
16.1 KB b/‎lingvo/core/testdata/qat_test_inputs.npy
16.1 KB
diff --git a/‎lingvo/core/testdata/qat_test_output_4bit_weight_qat_false.npy
4.06 KB b/‎lingvo/core/testdata/qat_test_output_4bit_weight_qat_false.npy
4.06 KB
diff --git a/‎lingvo/core/testdata/qat_test_output_4bit_weight_qat_true.npy
4.06 KB b/‎lingvo/core/testdata/qat_test_output_4bit_weight_qat_true.npy
4.06 KB
diff --git a/‎lingvo/core/testdata/qat_test_output_8bit_weight_qat_false.npy
4.06 KB b/‎lingvo/core/testdata/qat_test_output_8bit_weight_qat_false.npy
4.06 KB
diff --git a/‎lingvo/core/testdata/qat_test_output_8bit_weight_qat_true.npy
4.06 KB b/‎lingvo/core/testdata/qat_test_output_8bit_weight_qat_true.npy
4.06 KB
@@ -1475,6 +1475,7 @@ pytype_strict_test(
     size = "medium",
     srcs = ["py_utils_test.py"],
     args = ["--noenable_eager_execution"],
+    data = ["//lingvo/core/testdata:quantization_test_data"],
     deps = [
         ":py_utils_test_lib",
         # Implicit freezegun dependency.
@@ -1487,6 +1488,7 @@ pytype_strict_test(
     name = "py_utils_eager_test",
     srcs = ["py_utils_test.py"],
     args = ["--enable_eager_execution"],
+    data = ["//lingvo/core/testdata:quantization_test_data"],
     main = "py_utils_test.py",
     deps = [
         ":py_utils_test_lib",
 
@@ -1352,6 +1352,69 @@ def testQAT(self, qat_output, expected):
       )
       self.assertAllClose(self.evaluate(x), expected)
 
+  @parameterized.named_parameters(
+      (
+          '4bit_weight_qat_output_false',
+          False,
+          'core/testdata/qat_test_4bit_weights.npy',
+          'core/testdata/qat_test_output_4bit_weight_qat_false.npy',
+      ),
+      (
+          '4bit_weight_qat_output_true',
+          True,
+          'core/testdata/qat_test_4bit_weights.npy',
+          'core/testdata/qat_test_output_4bit_weight_qat_true.npy',
+      ),
+      (
+          '8bit_weight_qat_output_false',
+          False,
+          'core/testdata/qat_test_8bit_weights.npy',
+          'core/testdata/qat_test_output_8bit_weight_qat_false.npy',
+      ),
+      (
+          '8bit_weight_qat_output_true',
+          True,
+          'core/testdata/qat_test_8bit_weights.npy',
+          'core/testdata/qat_test_output_8bit_weight_qat_true.npy',
+      ),
+  )
+  def testEinsumQuantization(self, qat_output, weights_path, expected):
+    # num_tasks=1, input_dim=2, output_dim=3
+    weights_path = test_helper.test_src_dir_path(weights_path)
+    weights = tf.convert_to_tensor(np.load(weights_path), tf.float32)
+    bias_path = test_helper.test_src_dir_path('core/testdata/qat_test_bias.npy')
+    bias = tf.convert_to_tensor(np.load(bias_path), tf.float32)
+    inputs_path = test_helper.test_src_dir_path(
+        'core/testdata/qat_test_inputs.npy'
+    )
+    inputs = tf.convert_to_tensor(np.load(inputs_path), tf.float32)
+    output_path = test_helper.test_src_dir_path(expected)
+    output = tf.convert_to_tensor(np.load(output_path), tf.float32)
+
+    quant_layer_p = layers.MultitaskProjectionEinsumLayer.Params()
+    quant_layer_p.name = 'testQAT'
+    quant_layer_p.input_dim = 256
+    quant_layer_p.output_dim = 126
+    quant_layer_p.num_tasks = 8
+
+    with self.session(use_gpu=False):
+      x = self.evaluate(
+          py_utils.MultiTaskProjection(
+              weights=weights,
+              biases=bias,
+              inputs=inputs,
+              tasks=1,
+              einsum_order='select_and_multiply',
+              quant_layer=layers.MultitaskProjectionEinsumLayer(quant_layer_p),
+              w_q_name='w',
+              w_q_domain='default',
+              qat_output=qat_output,
+          )
+      )
+      # different server CPUs produce slightly different results, e-3 is a safe
+      # margin since outputs are in the order of e+4
+      self.assertAllClose(x, output, atol=2.5e-3)
+
   def testShardedFilePatternToGlob(self):
     file_pattern = '/some/path/to/file@8'
     self.assertEqual('/some/path/to/file-?????-of-00008',
 
@@ -15,3 +15,10 @@ filegroup(
         "en-1k.spm.*",
     ]),
 )
+
+filegroup(
+    name = "quantization_test_data",
+    data = glob([
+        "qat_test_*",
+    ]),
+)