diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index fe0b80f5c1af..11ac01468cbf 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -74,6 +74,7 @@ struct MissingFeatures { // GNU vectors are done, but other kinds of vectors haven't been implemented. static bool scalableVectors() { return false; } static bool vectorConstants() { return false; } + static bool vectorToX86AmxCasting() { return false; } // Address space related static bool addressSpace() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 975a8f10b15d..de6d27ff2ce7 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -532,6 +532,38 @@ decodeFixedType(ArrayRef &infos, } // llvm::Intrinsics accepts only LLVMContext. We need to reimplement it here. +/// Helper function to correct integer signedness for intrinsic arguments. +/// IIT always returns signed integers, but the actual intrinsic may expect +/// unsigned integers based on the AST FunctionDecl parameter types. +static mlir::Type getIntrinsicArgumentTypeFromAST(mlir::Type iitType, + const CallExpr *E, + unsigned argIndex, + mlir::MLIRContext *context) { + // If it's not an integer type, return as-is + auto intTy = dyn_cast(iitType); + if (!intTy) + return iitType; + + // Get the FunctionDecl from the CallExpr + const FunctionDecl *FD = nullptr; + if (const auto *DRE = + dyn_cast(E->getCallee()->IgnoreImpCasts())) { + FD = dyn_cast(DRE->getDecl()); + } + + // If we have FunctionDecl and this argument exists, check its signedness + if (FD && argIndex < FD->getNumParams()) { + QualType paramType = FD->getParamDecl(argIndex)->getType(); + if (paramType->isUnsignedIntegerType()) { + // Create unsigned version of the type + return IntType::get(context, intTy.getWidth(), /*isSigned=*/false); + } + } + + // Default: keep IIT type (signed) + return iitType; +} + static cir::FuncType getIntrinsicType(mlir::MLIRContext *context, llvm::Intrinsic::ID id) { using namespace llvm::Intrinsic; @@ -2730,12 +2762,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, SmallVector args; for (unsigned i = 0; i < E->getNumArgs(); i++) { - mlir::Value arg = emitScalarOrConstFoldImmArg(iceArguments, i, E); - mlir::Type argType = arg.getType(); - if (argType != intrinsicType.getInput(i)) + mlir::Value argValue = emitScalarOrConstFoldImmArg(iceArguments, i, E); + // If the intrinsic arg type is different from the builtin arg type + // we need to do a bit cast. + mlir::Type argType = argValue.getType(); + mlir::Type expectedTy = intrinsicType.getInput(i); + + // Use helper to get the correct integer type based on AST signedness + mlir::Type correctedExpectedTy = + getIntrinsicArgumentTypeFromAST(expectedTy, E, i, &getMLIRContext()); + + if (argType != correctedExpectedTy) llvm_unreachable("NYI"); - args.push_back(arg); + args.push_back(argValue); } auto intrinsicCall = builder.create( diff --git a/clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu b/clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu new file mode 100644 index 000000000000..2f195d547804 --- /dev/null +++ b/clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \ +// RUN: -fcuda-is-device -target-feature +ptx60 \ +// RUN: -emit-cir -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CIR %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx65 \ +// RUN: -emit-cir -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CIR %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx70 \ +// RUN: -emit-cir -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=CIR %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \ +// RUN: -fcuda-is-device -target-feature +ptx60 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=LLVM %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx65 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=LLVM %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx70 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=LLVM %s + +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_70 \ +// RUN: -fcuda-is-device -target-feature +ptx60 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=OGCHECK %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx65 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=OGCHECK %s +// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \ +// RUN: -fcuda-is-device -target-feature +ptx70 \ +// RUN: -emit-llvm -o - -x cuda %s \ +// RUN: | FileCheck -check-prefix=OGCHECK %s + +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) + +typedef unsigned long long uint64_t; + +__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b, + bool pred, uint64_t i64) { + + // CIR: cir.llvm.intrinsic "nvvm.bar.warp.sync" {{.*}} : (!u32i) + // LLVM: call void @llvm.nvvm.bar.warp.sync(i32 + // OGCHECK: call void @llvm.nvvm.bar.warp.sync(i32 + __nvvm_bar_warp_sync(mask); + +}