[CIR][AMDGPU] Add lowering for amdgcn trig preop builtins

skc7 · skc7 · commit 1e6dff6fa606 · 2025-12-09T11:50:22.000+05:30
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
@@ -54,6 +54,19 @@ static llvm::StringRef getIntrinsicNameforWaveReduction(unsigned BuiltinID) {
   }
 }
 
+// Emit an intrinsic that has 1 float or double operand, and 1 integer.
+static mlir::Value emitFPIntBuiltin(CIRGenFunction &CGF, const CallExpr *E,
+                                    llvm::StringRef intrinsicName) {
+  mlir::Value Src0 = CGF.emitScalarExpr(E->getArg(0));
+  mlir::Value Src1 = CGF.emitScalarExpr(E->getArg(1));
+  mlir::Value result =
+      LLVMIntrinsicCallOp::create(CGF.getBuilder(), CGF.getLoc(E->getExprLoc()),
+                                  CGF.getBuilder().getStringAttr(intrinsicName),
+                                  Src0.getType(), {Src0, Src1})
+          .getResult();
+  return result;
+}
+
 mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
                                                   const CallExpr *expr) {
   switch (builtinId) {
@@ -163,7 +176,7 @@ mlir::Value CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
   }
   case AMDGPU::BI__builtin_amdgcn_trig_preop:
   case AMDGPU::BI__builtin_amdgcn_trig_preopf: {
-    llvm_unreachable("trig_preop_* NYI");
+    return emitFPIntBuiltin(*this, expr, "amdgcn.trig.preop");
   }
   case AMDGPU::BI__builtin_amdgcn_rcp:
   case AMDGPU::BI__builtin_amdgcn_rcpf:
diff --git a/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip b/clang/test/CIR/CodeGen/HIP/builtins-amdgcn.hip
@@ -305,3 +305,23 @@ __device__ void test_readlane(int* out, int a, int b) {
 __device__ void test_readfirstlane(int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @_Z19test_trig_preop_f32Pffi
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @_Z19test_trig_preop_f32Pffi
+// LLVM: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z19test_trig_preop_f32Pffi
+// OGCG: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+__device__ void test_trig_preop_f32(float* out, float a, int b) {
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CIR-LABEL: @_Z19test_trig_preop_f64Pddi
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} void @_Z19test_trig_preop_f64Pddi
+// LLVM: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @_Z19test_trig_preop_f64Pddi
+// OGCG: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+__device__ void test_trig_preop_f64(double* out, double a, int b) {
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}
diff --git a/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl b/clang/test/CIR/CodeGen/OpenCL/builtins_amdgcn.cl
@@ -318,3 +318,23 @@ void test_readlane(global int* out, int a, int b) {
 void test_readfirstlane(global int* out, int a) {
   *out = __builtin_amdgcn_readfirstlane(a);
 }
+
+// CIR-LABEL: @test_trig_preop_f32
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.float, !s32i) -> !cir.float
+// LLVM: define{{.*}} void @test_trig_preop_f32
+// LLVM: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_trig_preop_f32
+// OGCG: call{{.*}} float @llvm.amdgcn.trig.preop.f32(float %{{.+}}, i32 %{{.*}})
+void test_trig_preop_f32(global float* out, float a, int b) {
+  *out = __builtin_amdgcn_trig_preopf(a, b);
+}
+
+// CIR-LABEL: @test_trig_preop_f64
+// CIR: cir.llvm.intrinsic "amdgcn.trig.preop" {{.*}} : (!cir.double, !s32i) -> !cir.double
+// LLVM: define{{.*}} void @test_trig_preop_f64
+// LLVM: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+// OGCG: define{{.*}} void @test_trig_preop_f64
+// OGCG: call{{.*}} double @llvm.amdgcn.trig.preop.f64(double %{{.+}}, i32 %{{.*}})
+void test_trig_preop_f64(global double* out, double a, int b) {
+  *out = __builtin_amdgcn_trig_preop(a, b);
+}