-
Notifications
You must be signed in to change notification settings - Fork 15.6k
[RISCV] Update Andes45 vector load/stores scheduling info #173806
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
tclin914
wants to merge
1
commit into
llvm:main
Choose a base branch
from
tclin914:andes45-sched-rvv-vlsu
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+1,947
−1,792
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This PR adds latency/throughput for all RVV load/stores to the Andes45 series scheduling model.
Member
|
@llvm/pr-subscribers-backend-risc-v Author: Jim Lin (tclin914) ChangesThis PR adds latency/throughput for all RVV load/stores to the Andes45 series scheduling model. Patch is 884.84 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/173806.diff 5 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
index d5f523711100a..8e4b96a205edb 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -8,6 +8,27 @@
//===----------------------------------------------------------------------===//
+defvar Andes45VLEN = 512;
+defvar Andes45DLEN = 512;
+defvar Andes45VLEN_DLEN_RATIO = !div(Andes45VLEN, Andes45DLEN);
+
+assert !or(!eq(Andes45VLEN_DLEN_RATIO, 1), !eq(Andes45VLEN_DLEN_RATIO, 2)),
+ "Andes45VLEN / Andes45DLEN should be 1 or 2";
+
+defvar Andes45BIU_DATA_WIDTH = 512;
+defvar Andes45DLEN_BIU_DATA_WIDTH_RATIO = !div(Andes45DLEN, Andes45BIU_DATA_WIDTH);
+
+assert !or(!eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 1), !eq(Andes45DLEN_BIU_DATA_WIDTH_RATIO, 2)),
+ "Andes45DLEN / Andes45DLEN_BIU_DATA_WIDTH_RATIO should be 1 or 2";
+
+// HVM region: VLSU_MEM_DW equals DLEN
+// Cachable/Non-cachable region: VLSU_MEM_DW equals BIU_DATA_WIDTH
+defvar Andes45VLSU_MEM_DW = Andes45BIU_DATA_WIDTH;
+defvar Andes45VLEN_VLSU_MEM_DW_RATIO = !div(Andes45VLEN, Andes45VLSU_MEM_DW);
+
+// There are various latency depending on its memory type and status.
+defvar VLSU_MEM_LATENCY = 13;
+
// The worst case LMUL is the largest LMUL.
class Andes45IsWorstCaseMX<string mx, list<string> MxList> {
defvar LLMUL = LargestLMUL<MxList>.r;
@@ -22,6 +43,45 @@ class Andes45IsWorstCaseMXSEW<string mx, int sew, list<string> MxList,
bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
}
+// (VLEN/VLSU_MEM_DW)*EMUL
+class Andes45GetCyclesLoadStore<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "M2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 2),
+ !eq(mx, "M4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 4),
+ !eq(mx, "M8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 8),
+ !eq(mx, "MF2") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "MF4") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1),
+ !eq(mx, "MF8") : !mul(Andes45VLEN_VLSU_MEM_DW_RATIO, 1)
+ );
+}
+
+class Andes45GetCyclesOnePerElement<string mx, int sew> {
+ defvar VL = !div(Andes45VLEN, sew);
+ int c = !cond(
+ !eq(mx, "M1") : VL,
+ !eq(mx, "M2") : !mul(VL, 2),
+ !eq(mx, "M4") : !mul(VL, 4),
+ !eq(mx, "M8") : !mul(VL, 8),
+ !eq(mx, "MF2") : !div(VL, 2),
+ !eq(mx, "MF4") : !div(VL, 4),
+ !eq(mx, "MF8") : !div(VL, 8)
+ );
+}
+
+// When fractional LMUL is used, the LMUL used in calculation is 1.
+class Andes45GetLMULValue<string mx> {
+ int c = !cond(
+ !eq(mx, "M1") : 1,
+ !eq(mx, "M2") : 2,
+ !eq(mx, "M4") : 4,
+ !eq(mx, "M8") : 8,
+ !eq(mx, "MF2") : 1,
+ !eq(mx, "MF4") : 1,
+ !eq(mx, "MF8") : 1
+ );
+}
+
def Andes45Model : SchedMachineModel {
let MicroOpBufferSize = 0; // Andes45 is in-order processor
let IssueWidth = 2; // 2 micro-ops dispatched per cycle
@@ -372,58 +432,153 @@ def : WriteRes<WriteVSETIVLI, [Andes45CSR]>;
def : WriteRes<WriteVSETVL, [Andes45CSR]>;
// 7. Vector Loads and Stores
+
+// Unit-stride loads and stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY).
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL.
foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride loads and stores
- defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in {
+ defm "" : LMULWriteResMX<"WriteVLDE", [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDFF", [Andes45VLSU], mx, IsWorstCase>;
+ }
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTE", [Andes45VLSU], mx, IsWorstCase>;
// Mask loads and stores
+ let Latency = !add(4, VLSU_MEM_LATENCY), ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVLDM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+ let ReleaseAtCycles = [Cycles] in
defm "" : LMULWriteResMX<"WriteVSTM", [Andes45VLSU], mx, IsWorstCase=!eq(mx, "M1")>;
+}
+
+// Strided loads and stores.
+
+// Strided loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (4+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is VL.
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Strided and indexed loads and stores
foreach eew = [8, 16, 32, 64] in {
- defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLDS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSTS" # eew, [Andes45VLSU], mx, IsWorstCase>;
}
}
-// Segmented loads and stores
+// Indexed loads and stores
+
+// Indexed loads and stores operate at one element per cycles.
+// We uses the SEW to compute the number of elements for throughput.
+// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW)).
+// The throughput for loads and stores is (VL+EMUL-1).
+foreach mx = SchedMxList in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, 8>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+
+ foreach eew = [8, 16, 32, 64] in {
+ let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVLDUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLDOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+
+ let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVSTUX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSTOX" # eew, [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Unit-Stride Segmented Loads and Stores
+
+// The latency for loads is (4+VLSU_MEM_LATENCY+EMUL* NFIELDS+2)
+// The throughput for loads and stores is (VLEN/VLSU_MEM_DW)*EMUL*NFIELDS.
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesLoadStore<mx>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
+ defvar Size = !mul(Andes45GetLMULValue<mx>.c, nf);
+
+ let Latency = !add(4, !add(VLSU_MEM_LATENCY, !add(Size, 2))),
+ ReleaseAtCycles = [!mul(Cycles, nf)] in {
+ defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ // TODO
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Strided Segmented Loads and Stores
+
+// The latency for loads is (5+VLSU_MEM_LATENCY+(DLEN/EEW))
+// The throughput for loads and stores is VL.
foreach mx = SchedMxList in {
foreach nf=2-8 in {
foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, eew>.c;
defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Unit-stride segmented
- defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(5, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ let ReleaseAtCycles = [Cycles] in
+ defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ }
+}
+
+// Indexed Segmented Loads and Stores
- // Strided/indexed segmented
- defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+// The latency for loads is (6+VLSU_MEM_LATENCY+(DLEN/EEW))
+// The throughput for loads and stores is (VL+EMUL-1).
+foreach mx = SchedMxList in {
+ foreach nf=2-8 in {
+ foreach eew = [8, 16, 32, 64] in {
+ defvar Cycles = Andes45GetCyclesOnePerElement<mx, eew>.c;
+ defvar IsWorstCase = Andes45IsWorstCaseMX<mx, SchedMxList>.c;
- // Indexed segmented
- defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
- defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" #eew, [Andes45VLSU], mx, IsWorstCase>;
+ let Latency = !add(6, !add(VLSU_MEM_LATENCY, !div(Andes45DLEN, eew))),
+ ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
+ let ReleaseAtCycles = [!add(Cycles, !sub(Andes45GetLMULValue<mx>.c, 1))] in {
+ defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew,
+ [Andes45VLSU], mx, IsWorstCase>;
+ }
}
}
}
// Whole register move/load/store
foreach LMul = [1, 2, 4, 8] in {
- def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [Andes45VLSU]>;
- def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [Andes45VLSU]>;
+ let Latency = 6, ReleaseAtCycles = [!mul(LMul, 2)] in
+ def : WriteRes<!cast<SchedWrite>("WriteVLD" # LMul # "R"), [Andes45VLSU]>;
+ let ReleaseAtCycles = [!mul(LMul, 2)] in
+ def : WriteRes<!cast<SchedWrite>("WriteVST" # LMul # "R"), [Andes45VLSU]>;
def : WriteRes<!cast<SchedWrite>("WriteVMov" # LMul # "V"), [Andes45VPERMUT]>;
}
diff --git a/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
index 375c615c0cc61..70f95ebe690f2 100644
--- a/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
+++ b/llvm/test/tools/llvm-mca/RISCV/Andes45/rvv-vle-vse-vlm.s
@@ -210,49 +210,49 @@ vle64ff.v v8, (a0)
# CHECK: [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e8, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE8_V vle8.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE8_V vle8.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, mf4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e16, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE16_V vle16.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE16_V vle16.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, mf2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 1.00 * 17 Andes45VLSU VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m2, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 2.00 * 17 Andes45VLSU[2] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m4, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 4.00 * 17 Andes45VLSU[4] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e32, m8, ta, ma
-# CHECK-NEXT: 1 1 1.00 * 1 Andes45VLSU VLE32_V vle32.v v8, (a0)
+# CHECK-NEXT: 1 17 8.00 * 17 Andes45VLSU[8] VLE32_V vle32.v v8, (a0)
# CHECK-NEXT: 1 1 1.00 U 1 Andes45CSR VSETVLI vsetvli zero, zero, e64, m1, ta, ma
-# CHECK-NEXT: 1 1 1.00 *...
[truncated]
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This PR adds latency/throughput for all RVV load/stores to the Andes45 series scheduling model.