@@ -2716,6 +2716,14 @@ bool canDoCSEForSendPayloadCopies(
27162716 std::map<G4_INST *, std::vector<std::pair<G4_INST *, G4_INST *>>>
27172717 &sendUnRemovedPayLoadCopyMap,
27182718 IR_Builder &builder) {
2719+ if (prevInst->getMaskOffset () != succInst->getMaskOffset ()) {
2720+ return false ;
2721+ }
2722+
2723+ if (prevInst->getExecSize () != succInst->getExecSize ()) {
2724+ return false ;
2725+ }
2726+
27192727 // Same SFID
27202728 G4_SendgDesc *prevSendDesc =
27212729 static_cast <G4_SendgDesc *>(prevInst->getMsgDesc ());
@@ -2752,6 +2760,16 @@ bool canDoCSEForSendPayloadCopies(
27522760 G4_Operand *dst1 = sendPayLoadCopyMap[prevInst][i]->getDst ();
27532761 G4_Operand *dst2 = sendPayLoadCopyMap[succInst][i]->getDst ();
27542762
2763+ if (sendPayLoadCopyMap[prevInst][i]->getMaskOffset () !=
2764+ sendPayLoadCopyMap[succInst][i]->getMaskOffset ()) {
2765+ return false ;
2766+ }
2767+
2768+ if (sendPayLoadCopyMap[prevInst][i]->getExecSize () !=
2769+ sendPayLoadCopyMap[succInst][i]->getExecSize ()) {
2770+ return false ;
2771+ }
2772+
27552773 if ((dst1->asDstRegRegion ()->getRegOff () !=
27562774 dst2->asDstRegRegion ()->getRegOff ()) ||
27572775 (dst1->asDstRegRegion ()->getSubRegOff () !=
@@ -2764,13 +2782,13 @@ bool canDoCSEForSendPayloadCopies(
27642782 }
27652783 }
27662784
2767- // If using sendg, 8 indexes can supported per-mov. So, we use 1/8 as
2768- // threshold for mov instruction reduction here.
2769- if (!((sendPayLoadCopyMap[prevInst]. size () <= 8 ) &&
2770- (sendPayLoadCopyMap[prevInst]. size () > 1 ) &&
2771- (diffCopies. size () <= 1 )) &&
2772- ((( float )diffCopies. size () / sendPayLoadCopyMap[prevInst]. size ()) >
2773- 0.125 ) ) {
2785+ // More than 1 diff
2786+ if (diffCopies. size () > 1 ) {
2787+ return false ;
2788+ }
2789+
2790+ // Too many different copies
2791+ if ((( float )diffCopies. size () / sendPayLoadCopyMap[prevInst]. size ()) > 0.15 ) {
27742792 return false ;
27752793 }
27762794
0 commit comments