Move the quantization/select order, this prevents accidentally accumulating size from unused weights.

lingvo-bot · copybara-github · commit d0bd5515f7af · 2024-07-22T16:24:45.000-07:00
PiperOrigin-RevId: 654933747
diff --git a/lingvo/core/py_utils.py b/lingvo/core/py_utils.py
@@ -7036,14 +7036,16 @@ def MultiTaskProjection(
   # o - output_dim
 
   if einsum_order == 'select_and_multiply':
-    # Weights quantization:
-    weights = quant_layer.QWeight(weights, domain=w_q_domain)
-    weights = quant_layer.ToAqtWeight(w_q_name, weights, feature_axis=-1)
     # select..
     # [{batch,} {time,} input_dim, output_dim]
     selected_weights = tf.einsum(
         f'{b_task}{t_task}k,kio->{b_task}{t_task}io', tasks_onehot, weights
     )
+    # Weights quantization:
+    selected_weights = quant_layer.QWeight(selected_weights, domain=w_q_domain)
+    selected_weights = quant_layer.ToAqtWeight(
+        w_q_name, selected_weights, feature_axis=-1
+    )
     if qat_output:
       # .. and multiply
       # [batch, {time,} output_dim]