Skip to content
This repository was archived by the owner on Sep 2, 2018. It is now read-only.

Commit b01d8d2

Browse files
committed
[AMDGPU] Refactor FLAT TD instructions
Differential revision: https://reviews.llvm.org/D24072 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@280655 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent af86df2 commit b01d8d2

File tree

6 files changed

+525
-438
lines changed

6 files changed

+525
-438
lines changed

lib/Target/AMDGPU/AMDGPUInstructions.td

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -380,12 +380,6 @@ class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
380380
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
381381
>;
382382

383-
class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
384-
(ops node:$ptr, node:$value),
385-
(atomic_op node:$ptr, node:$value),
386-
[{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
387-
>;
388-
389383
def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
390384
def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
391385
def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
@@ -404,19 +398,6 @@ def atomic_cmp_swap_global_nortn : PatFrag<
404398
[{ return SDValue(N, 0).use_empty(); }]
405399
>;
406400

407-
def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
408-
def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
409-
def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
410-
def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
411-
def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
412-
def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
413-
def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
414-
def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
415-
def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
416-
def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
417-
418-
def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
419-
420401
//===----------------------------------------------------------------------===//
421402
// Misc Pattern Fragments
422403
//===----------------------------------------------------------------------===//

lib/Target/AMDGPU/CIInstructions.td

Lines changed: 0 additions & 240 deletions
Original file line numberDiff line numberDiff line change
@@ -81,244 +81,4 @@ defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
8181
>;
8282
}
8383

84-
//===----------------------------------------------------------------------===//
85-
// Flat Instructions
86-
//===----------------------------------------------------------------------===//
87-
88-
defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
89-
flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
90-
>;
91-
defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
92-
flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
93-
>;
94-
defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
95-
flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
96-
>;
97-
defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
98-
flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
99-
;
100-
defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
101-
flat<0xc, 0x14>, "flat_load_dword", VGPR_32
102-
>;
103-
defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
104-
flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
105-
>;
106-
defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
107-
flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
108-
>;
109-
defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
110-
flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
111-
>;
112-
defm FLAT_STORE_BYTE : FLAT_Store_Helper <
113-
flat<0x18>, "flat_store_byte", VGPR_32
114-
>;
115-
defm FLAT_STORE_SHORT : FLAT_Store_Helper <
116-
flat <0x1a>, "flat_store_short", VGPR_32
117-
>;
118-
defm FLAT_STORE_DWORD : FLAT_Store_Helper <
119-
flat<0x1c>, "flat_store_dword", VGPR_32
120-
>;
121-
defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
122-
flat<0x1d>, "flat_store_dwordx2", VReg_64
123-
>;
124-
defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
125-
flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
126-
>;
127-
defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
128-
flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
129-
>;
130-
defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
131-
flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat
132-
>;
133-
defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
134-
flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32,
135-
atomic_cmp_swap_flat, v2i32, VReg_64
136-
>;
137-
defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
138-
flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat
139-
>;
140-
defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
141-
flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat
142-
>;
143-
defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
144-
flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat
145-
>;
146-
defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
147-
flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat
148-
>;
149-
defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
150-
flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat
151-
>;
152-
defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
153-
flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat
154-
>;
155-
defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
156-
flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat
157-
>;
158-
defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
159-
flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat
160-
>;
161-
defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
162-
flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat
163-
>;
164-
defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
165-
flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat
166-
>;
167-
defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
168-
flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat
169-
>;
170-
defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
171-
flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat
172-
>;
173-
defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
174-
flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64,
175-
atomic_cmp_swap_flat, v2i64, VReg_128
176-
>;
177-
defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
178-
flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat
179-
>;
180-
defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
181-
flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat
182-
>;
183-
defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
184-
flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat
185-
>;
186-
defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
187-
flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat
188-
>;
189-
defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
190-
flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat
191-
>;
192-
defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
193-
flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat
194-
>;
195-
defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
196-
flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat
197-
>;
198-
defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
199-
flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat
200-
>;
201-
defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
202-
flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat
203-
>;
204-
defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
205-
flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat
206-
>;
207-
defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
208-
flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat
209-
>;
210-
21184
} // End SubtargetPredicate = isCIVI
212-
213-
// CI Only flat instructions
214-
215-
let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in {
216-
217-
defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
218-
flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32,
219-
null_frag, v2f32, VReg_64
220-
>;
221-
defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
222-
flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32
223-
>;
224-
defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
225-
flat<0x40>, "flat_atomic_fmax", VGPR_32, f32
226-
>;
227-
defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
228-
flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64,
229-
null_frag, v2f64, VReg_128
230-
>;
231-
defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
232-
flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64
233-
>;
234-
defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
235-
flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64
236-
>;
237-
238-
} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1
239-
240-
//===----------------------------------------------------------------------===//
241-
// Flat Patterns
242-
//===----------------------------------------------------------------------===//
243-
244-
let Predicates = [isCIVI] in {
245-
246-
// Patterns for global loads with no offset.
247-
class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
248-
(vt (node i64:$addr)),
249-
(inst $addr, 0, 0, 0)
250-
>;
251-
252-
class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
253-
(vt (node i64:$addr)),
254-
(inst $addr, 1, 0, 0)
255-
>;
256-
257-
def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
258-
def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
259-
def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
260-
def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
261-
def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
262-
def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
263-
def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
264-
265-
def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
266-
def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
267-
268-
269-
class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
270-
(node vt:$data, i64:$addr),
271-
(inst $addr, $data, 0, 0, 0)
272-
>;
273-
274-
class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
275-
// atomic store follows atomic binop convention so the address comes
276-
// first.
277-
(node i64:$addr, vt:$data),
278-
(inst $addr, $data, 1, 0, 0)
279-
>;
280-
281-
def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
282-
def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
283-
def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
284-
def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
285-
def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
286-
287-
def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
288-
def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
289-
290-
class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
291-
ValueType data_vt = vt> : Pat <
292-
(vt (node i64:$addr, data_vt:$data)),
293-
(inst $addr, $data, 0, 0)
294-
>;
295-
296-
def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
297-
def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
298-
def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
299-
def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
300-
def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
301-
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
302-
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
303-
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
304-
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
305-
def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
306-
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
307-
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
308-
def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
309-
310-
def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
311-
def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
312-
def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
313-
def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
314-
def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
315-
def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
316-
def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
317-
def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
318-
def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
319-
def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
320-
def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
321-
def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
322-
def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
323-
324-
} // End Predicates = [isCIVI]

0 commit comments

Comments
 (0)