Skip to content

Commit 756c7be

Browse files
committed
[CIR][AMDGPU] Adds lowering for amdgcn image load/store builtins
1 parent 4916f0e commit 756c7be

File tree

4 files changed

+348
-6
lines changed

4 files changed

+348
-6
lines changed

clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp

Lines changed: 100 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,59 @@ static llvm::StringRef getIntrinsicNameforWaveReduction(unsigned BuiltinID) {
5454
}
5555
}
5656

57+
static mlir::Value
58+
emitAMDGCNImageOverloadedReturnType(CIRGenFunction &CGF, const CallExpr *E,
59+
llvm::StringRef IntrinsicName, bool IsImageStore) {
60+
auto &Builder = CGF.getBuilder();
61+
62+
auto findTextureDescIndex = [&CGF](const CallExpr *E) -> unsigned {
63+
QualType TexQT = CGF.getContext().AMDGPUTextureTy;
64+
for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
65+
QualType ArgTy = E->getArg(I)->getType();
66+
if (ArgTy == TexQT ||
67+
ArgTy.getCanonicalType() == TexQT.getCanonicalType()) {
68+
return I;
69+
}
70+
}
71+
return ~0U;
72+
};
73+
74+
unsigned RsrcIndex = findTextureDescIndex(E);
75+
if (RsrcIndex == ~0U) {
76+
llvm::report_fatal_error("Invalid argument count for image builtin");
77+
}
78+
79+
cir::VectorType Vec8I32Ty =
80+
cir::VectorType::get(Builder.getSInt32Ty(), 8);
81+
82+
llvm::SmallVector<mlir::Value, 10> Args;
83+
for (unsigned I = 0, N = E->getNumArgs(); I < N; ++I) {
84+
mlir::Value V = CGF.emitScalarExpr(E->getArg(I));
85+
86+
if (I == RsrcIndex) {
87+
mlir::Type VTy = V.getType();
88+
if (mlir::isa<cir::PointerType>(VTy)) {
89+
V = Builder.createAlignedLoad(CGF.getLoc(E->getExprLoc()), Vec8I32Ty, V,
90+
CharUnits::fromQuantity(32));
91+
}
92+
}
93+
Args.push_back(V);
94+
}
95+
96+
mlir::Type RetTy;
97+
if (IsImageStore) {
98+
RetTy = cir::VoidType::get(Builder.getContext());
99+
} else {
100+
RetTy = CGF.convertType(E->getType());
101+
}
102+
103+
auto CallOp = LLVMIntrinsicCallOp::create(
104+
Builder, CGF.getLoc(E->getExprLoc()),
105+
Builder.getStringAttr(IntrinsicName), RetTy, Args);
106+
107+
return CallOp.getResult();
108+
}
109+
57110
mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
58111
const CallExpr *expr) {
59112
switch (builtinId) {
@@ -359,61 +412,102 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
359412
}
360413
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f32_i32:
361414
case AMDGPU::BI__builtin_amdgcn_image_load_1d_v4f16_i32:
415+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.1d", false);
362416
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f32_i32:
363417
case AMDGPU::BI__builtin_amdgcn_image_load_1darray_v4f16_i32:
418+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.1darray",
419+
false);
364420
case AMDGPU::BI__builtin_amdgcn_image_load_2d_f32_i32:
365421
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f32_i32:
366422
case AMDGPU::BI__builtin_amdgcn_image_load_2d_v4f16_i32:
423+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.2d", false);
367424
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_f32_i32:
368425
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f32_i32:
369426
case AMDGPU::BI__builtin_amdgcn_image_load_2darray_v4f16_i32:
427+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.2darray",
428+
false);
370429
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f32_i32:
371430
case AMDGPU::BI__builtin_amdgcn_image_load_3d_v4f16_i32:
431+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.3d", false);
372432
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f32_i32:
373433
case AMDGPU::BI__builtin_amdgcn_image_load_cube_v4f16_i32:
434+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.cube",
435+
false);
374436
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f32_i32:
375437
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1d_v4f16_i32:
438+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.mip.1d",
439+
false);
440+
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f32_i32:
441+
case AMDGPU::BI__builtin_amdgcn_image_load_mip_1darray_v4f16_i32:
442+
return emitAMDGCNImageOverloadedReturnType(*this, expr,
443+
"amdgcn.image.load.mip.1darray", false);
444+
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_f32_i32:
376445
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f32_i32:
377446
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2d_v4f16_i32:
447+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.mip.2d",
448+
false);
378449
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_f32_i32:
379450
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f32_i32:
380451
case AMDGPU::BI__builtin_amdgcn_image_load_mip_2darray_v4f16_i32:
452+
return emitAMDGCNImageOverloadedReturnType(*this, expr,
453+
"amdgcn.image.load.mip.2darray", false);
381454
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f32_i32:
382455
case AMDGPU::BI__builtin_amdgcn_image_load_mip_3d_v4f16_i32:
456+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.mip.3d",
457+
false);
383458
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f32_i32:
384-
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32: {
385-
llvm_unreachable("image_load_* NYI");
386-
}
459+
case AMDGPU::BI__builtin_amdgcn_image_load_mip_cube_v4f16_i32:
460+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.load.mip.cube",
461+
false);
387462
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f32_i32:
388463
case AMDGPU::BI__builtin_amdgcn_image_store_1d_v4f16_i32:
464+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.1d", true);
389465
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f32_i32:
390466
case AMDGPU::BI__builtin_amdgcn_image_store_1darray_v4f16_i32:
467+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.1darray",
468+
true);
391469
case AMDGPU::BI__builtin_amdgcn_image_store_2d_f32_i32:
392470
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f32_i32:
393471
case AMDGPU::BI__builtin_amdgcn_image_store_2d_v4f16_i32:
472+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.2d", true);
394473
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_f32_i32:
395474
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f32_i32:
396475
case AMDGPU::BI__builtin_amdgcn_image_store_2darray_v4f16_i32:
476+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.2darray",
477+
true);
397478
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f32_i32:
398479
case AMDGPU::BI__builtin_amdgcn_image_store_3d_v4f16_i32:
480+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.3d", true);
399481
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f32_i32:
400482
case AMDGPU::BI__builtin_amdgcn_image_store_cube_v4f16_i32:
483+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.cube",
484+
true);
401485
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f32_i32:
402486
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1d_v4f16_i32:
487+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.mip.1d",
488+
true);
403489
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f32_i32:
404490
case AMDGPU::BI__builtin_amdgcn_image_store_mip_1darray_v4f16_i32:
491+
return emitAMDGCNImageOverloadedReturnType(*this, expr,
492+
"amdgcn.image.store.mip.1darray", true);
405493
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_f32_i32:
406494
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f32_i32:
407495
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2d_v4f16_i32:
496+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.mip.2d",
497+
true);
408498
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_f32_i32:
409499
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f32_i32:
410500
case AMDGPU::BI__builtin_amdgcn_image_store_mip_2darray_v4f16_i32:
501+
return emitAMDGCNImageOverloadedReturnType(*this, expr,
502+
"amdgcn.image.store.mip.2darray", true);
411503
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f32_i32:
412504
case AMDGPU::BI__builtin_amdgcn_image_store_mip_3d_v4f16_i32:
505+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.mip.3d",
506+
true);
413507
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f32_i32:
414-
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32: {
415-
llvm_unreachable("image_store_* NYI");
416-
}
508+
case AMDGPU::BI__builtin_amdgcn_image_store_mip_cube_v4f16_i32:
509+
return emitAMDGCNImageOverloadedReturnType(*this, expr, "amdgcn.image.store.mip.cube",
510+
true);
417511
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f32_f32:
418512
case AMDGPU::BI__builtin_amdgcn_image_sample_1d_v4f16_f32:
419513
case AMDGPU::BI__builtin_amdgcn_image_sample_1darray_v4f32_f32:

clang/lib/CIR/CodeGen/CIRGenTypes.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -587,6 +587,13 @@ mlir::Type CIRGenTypes::convertType(QualType T) {
587587
llvm_unreachable("NYI"); \
588588
} break;
589589
#include "clang/Basic/WebAssemblyReferenceTypes.def"
590+
#define AMDGPU_OPAQUE_PTR_TYPE(Name, Id, SingletonId, Width, Align, AS) \
591+
case BuiltinType::Id: \
592+
ResultType = Builder.getPointerTo(CGM.VoidTy); \
593+
break;
594+
#define AMDGPU_NAMED_BARRIER_TYPE(Name, Id, SingletonId, Width, Align, Scope) \
595+
case BuiltinType::Id: \
596+
llvm_unreachable("NYI");
590597
#define AMDGPU_TYPE(Name, Id, SingletonId, Width, Align) \
591598
case BuiltinType::Id: \
592599
llvm_unreachable("NYI");
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
#include "../Inputs/cuda.h"
2+
3+
// REQUIRES: amdgpu-registered-target
4+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir\
5+
// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-cir %s -o %t.cir
6+
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
7+
8+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11 -fclangir\
9+
// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
10+
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
11+
12+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -x hip -std=c++11\
13+
// RUN: -target-cpu gfx1100 -fcuda-is-device -emit-llvm %s -o %t.ll
14+
// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
15+
16+
//===----------------------------------------------------------------------===//
17+
// Test AMDGPU image load/store builtins in HIP
18+
//===----------------------------------------------------------------------===//
19+
20+
typedef float float4 __attribute__((ext_vector_type(4)));
21+
typedef _Float16 half;
22+
typedef half half4 __attribute__((ext_vector_type(4)));
23+
24+
// CIR-LABEL: @_Z22test_image_load_2d_f32
25+
// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
26+
// LLVM: define{{.*}} void @_Z22test_image_load_2d_f32Pfiiu18__amdgpu_texture_t(
27+
// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
28+
// OGCG: define{{.*}} void @_Z22test_image_load_2d_f32Pfiiu18__amdgpu_texture_t(
29+
// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2d.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
30+
__device__ void test_image_load_2d_f32(float* out, int x, int y, __amdgpu_texture_t rsrc) {
31+
*out = __builtin_amdgcn_image_load_2d_f32_i32(15, x, y, rsrc, 0, 0);
32+
}
33+
34+
// CIR-LABEL: @_Z24test_image_load_2d_v4f32
35+
// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
36+
// LLVM: define{{.*}} void @_Z24test_image_load_2d_v4f32PDv4_fiiu18__amdgpu_texture_t(
37+
// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
38+
// OGCG: define{{.*}} void @_Z24test_image_load_2d_v4f32PDv4_fiiu18__amdgpu_texture_t(
39+
// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
40+
__device__ void test_image_load_2d_v4f32(float4* out, int x, int y, __amdgpu_texture_t rsrc) {
41+
*out = __builtin_amdgcn_image_load_2d_v4f32_i32(15, x, y, rsrc, 0, 0);
42+
}
43+
44+
// CIR-LABEL: @_Z24test_image_load_2d_v4f16
45+
// CIR: cir.llvm.intrinsic "amdgcn.image.load.2d" {{.*}} : (!s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.f16 x 4>
46+
// LLVM: define{{.*}} void @_Z24test_image_load_2d_v4f16PDv4_DF16_iiu18__amdgpu_texture_t(
47+
// LLVM: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
48+
// OGCG: define{{.*}} void @_Z24test_image_load_2d_v4f16PDv4_DF16_iiu18__amdgpu_texture_t(
49+
// OGCG: call {{.*}}<4 x half> @llvm.amdgcn.image.load.2d.v4f16.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
50+
__device__ void test_image_load_2d_v4f16(half4* out, int x, int y, __amdgpu_texture_t rsrc) {
51+
*out = __builtin_amdgcn_image_load_2d_v4f16_i32(15, x, y, rsrc, 0, 0);
52+
}
53+
54+
// CIR-LABEL: @_Z23test_image_store_2d_f32
55+
// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
56+
// LLVM: define{{.*}} void @_Z23test_image_store_2d_f32fiiu18__amdgpu_texture_t(
57+
// LLVM: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
58+
// OGCG: define{{.*}} void @_Z23test_image_store_2d_f32fiiu18__amdgpu_texture_t(
59+
// OGCG: call void @llvm.amdgcn.image.store.2d.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
60+
__device__ void test_image_store_2d_f32(float val, int x, int y, __amdgpu_texture_t rsrc) {
61+
__builtin_amdgcn_image_store_2d_f32_i32(val, 15, x, y, rsrc, 0, 0);
62+
}
63+
64+
// CIR-LABEL: @_Z25test_image_store_2d_v4f32
65+
// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
66+
// LLVM: define{{.*}} void @_Z25test_image_store_2d_v4f32Dv4_fiiu18__amdgpu_texture_t(
67+
// LLVM: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
68+
// OGCG: define{{.*}} void @_Z25test_image_store_2d_v4f32Dv4_fiiu18__amdgpu_texture_t(
69+
// OGCG: call void @llvm.amdgcn.image.store.2d.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
70+
__device__ void test_image_store_2d_v4f32(float4 val, int x, int y, __amdgpu_texture_t rsrc) {
71+
__builtin_amdgcn_image_store_2d_v4f32_i32(val, 15, x, y, rsrc, 0, 0);
72+
}
73+
74+
// CIR-LABEL: @_Z25test_image_store_2d_v4f16
75+
// CIR: cir.llvm.intrinsic "amdgcn.image.store.2d" {{.*}} : (!cir.vector<!cir.f16 x 4>, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
76+
// LLVM: define{{.*}} void @_Z25test_image_store_2d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
77+
// LLVM: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
78+
// OGCG: define{{.*}} void @_Z25test_image_store_2d_v4f16Dv4_DF16_iiu18__amdgpu_texture_t(
79+
// OGCG: call void @llvm.amdgcn.image.store.2d.v4f16.i32.v8i32(<4 x half> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
80+
__device__ void test_image_store_2d_v4f16(half4 val, int x, int y, __amdgpu_texture_t rsrc) {
81+
__builtin_amdgcn_image_store_2d_v4f16_i32(val, 15, x, y, rsrc, 0, 0);
82+
}
83+
84+
// CIR-LABEL: @_Z27test_image_load_2darray_f32
85+
// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.float
86+
// LLVM: define{{.*}} void @_Z27test_image_load_2darray_f32Pfiiiu18__amdgpu_texture_t(
87+
// LLVM: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
88+
// OGCG: define{{.*}} void @_Z27test_image_load_2darray_f32Pfiiiu18__amdgpu_texture_t(
89+
// OGCG: call {{.*}}float @llvm.amdgcn.image.load.2darray.f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
90+
__device__ void test_image_load_2darray_f32(float* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
91+
*out = __builtin_amdgcn_image_load_2darray_f32_i32(15, x, y, slice, rsrc, 0, 0);
92+
}
93+
94+
// CIR-LABEL: @_Z29test_image_load_2darray_v4f32
95+
// CIR: cir.llvm.intrinsic "amdgcn.image.load.2darray" {{.*}} : (!s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !cir.vector<!cir.float x 4>
96+
// LLVM: define{{.*}} void @_Z29test_image_load_2darray_v4f32PDv4_fiiiu18__amdgpu_texture_t(
97+
// LLVM: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
98+
// OGCG: define{{.*}} void @_Z29test_image_load_2darray_v4f32PDv4_fiiiu18__amdgpu_texture_t(
99+
// OGCG: call {{.*}}<4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32.v8i32(i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
100+
__device__ void test_image_load_2darray_v4f32(float4* out, int x, int y, int slice, __amdgpu_texture_t rsrc) {
101+
*out = __builtin_amdgcn_image_load_2darray_v4f32_i32(15, x, y, slice, rsrc, 0, 0);
102+
}
103+
104+
// CIR-LABEL: @_Z28test_image_store_2darray_f32
105+
// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.float, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
106+
// LLVM: define{{.*}} void @_Z28test_image_store_2darray_f32fiiiu18__amdgpu_texture_t(
107+
// LLVM: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
108+
// OGCG: define{{.*}} void @_Z28test_image_store_2darray_f32fiiiu18__amdgpu_texture_t(
109+
// OGCG: call void @llvm.amdgcn.image.store.2darray.f32.i32.v8i32(float {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
110+
__device__ void test_image_store_2darray_f32(float val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
111+
__builtin_amdgcn_image_store_2darray_f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
112+
}
113+
114+
// CIR-LABEL: @_Z30test_image_store_2darray_v4f32
115+
// CIR: cir.llvm.intrinsic "amdgcn.image.store.2darray" {{.*}} : (!cir.vector<!cir.float x 4>, !s32i, !s32i, !s32i, !s32i, !cir.vector<!s32i x 8>, !s32i, !s32i) -> !void
116+
// LLVM: define{{.*}} void @_Z30test_image_store_2darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
117+
// LLVM: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
118+
// OGCG: define{{.*}} void @_Z30test_image_store_2darray_v4f32Dv4_fiiiu18__amdgpu_texture_t(
119+
// OGCG: call void @llvm.amdgcn.image.store.2darray.v4f32.i32.v8i32(<4 x float> {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, i32 {{.*}}, <8 x i32> {{.*}}, i32 {{.*}}, i32 {{.*}})
120+
__device__ void test_image_store_2darray_v4f32(float4 val, int x, int y, int slice, __amdgpu_texture_t rsrc) {
121+
__builtin_amdgcn_image_store_2darray_v4f32_i32(val, 15, x, y, slice, rsrc, 0, 0);
122+
}

0 commit comments

Comments
 (0)