Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/CIR/MissingFeatures.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ struct MissingFeatures {
// GNU vectors are done, but other kinds of vectors haven't been implemented.
static bool scalableVectors() { return false; }
static bool vectorConstants() { return false; }
static bool vectorToX86AmxCasting() { return false; }

// Address space related
static bool addressSpace() { return false; }
Expand Down
48 changes: 44 additions & 4 deletions clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,38 @@ decodeFixedType(ArrayRef<llvm::Intrinsic::IITDescriptor> &infos,
}

// llvm::Intrinsics accepts only LLVMContext. We need to reimplement it here.
/// Helper function to correct integer signedness for intrinsic arguments.
/// IIT always returns signed integers, but the actual intrinsic may expect
/// unsigned integers based on the AST FunctionDecl parameter types.
static mlir::Type getIntrinsicArgumentTypeFromAST(mlir::Type iitType,
const CallExpr *E,
unsigned argIndex,
mlir::MLIRContext *context) {
// If it's not an integer type, return as-is
auto intTy = dyn_cast<cir::IntType>(iitType);
if (!intTy)
return iitType;

// Get the FunctionDecl from the CallExpr
const FunctionDecl *FD = nullptr;
if (const auto *DRE =
dyn_cast<DeclRefExpr>(E->getCallee()->IgnoreImpCasts())) {
FD = dyn_cast<FunctionDecl>(DRE->getDecl());
}

// If we have FunctionDecl and this argument exists, check its signedness
if (FD && argIndex < FD->getNumParams()) {
QualType paramType = FD->getParamDecl(argIndex)->getType();
if (paramType->isUnsignedIntegerType()) {
// Create unsigned version of the type
return IntType::get(context, intTy.getWidth(), /*isSigned=*/false);
}
}

// Default: keep IIT type (signed)
return iitType;
}

static cir::FuncType getIntrinsicType(mlir::MLIRContext *context,
llvm::Intrinsic::ID id) {
using namespace llvm::Intrinsic;
Expand Down Expand Up @@ -2730,12 +2762,20 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,

SmallVector<mlir::Value> args;
for (unsigned i = 0; i < E->getNumArgs(); i++) {
mlir::Value arg = emitScalarOrConstFoldImmArg(iceArguments, i, E);
mlir::Type argType = arg.getType();
if (argType != intrinsicType.getInput(i))
mlir::Value argValue = emitScalarOrConstFoldImmArg(iceArguments, i, E);
// If the intrinsic arg type is different from the builtin arg type
// we need to do a bit cast.
mlir::Type argType = argValue.getType();
mlir::Type expectedTy = intrinsicType.getInput(i);

// Use helper to get the correct integer type based on AST signedness
mlir::Type correctedExpectedTy =
getIntrinsicArgumentTypeFromAST(expectedTy, E, i, &getMLIRContext());

if (argType != correctedExpectedTy)
llvm_unreachable("NYI");

args.push_back(arg);
args.push_back(argValue);
}

auto intrinsicCall = builder.create<cir::LLVMIntrinsicCallOp>(
Expand Down
55 changes: 55 additions & 0 deletions clang/test/CIR/CodeGen/CUDA/builtins-nvptx-ptx60.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
// RUN: -fcuda-is-device -target-feature +ptx60 \
// RUN: -emit-cir -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CIR %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx65 \
// RUN: -emit-cir -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CIR %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx70 \
// RUN: -emit-cir -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=CIR %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_70 \
// RUN: -fcuda-is-device -target-feature +ptx60 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=LLVM %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx65 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=LLVM %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fclangir -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx70 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=LLVM %s

// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_70 \
// RUN: -fcuda-is-device -target-feature +ptx60 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=OGCHECK %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx65 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=OGCHECK %s
// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -target-cpu sm_80 \
// RUN: -fcuda-is-device -target-feature +ptx70 \
// RUN: -emit-llvm -o - -x cuda %s \
// RUN: | FileCheck -check-prefix=OGCHECK %s

#define __device__ __attribute__((device))
#define __global__ __attribute__((global))
#define __shared__ __attribute__((shared))
#define __constant__ __attribute__((constant))

typedef unsigned long long uint64_t;

__device__ void nvvm_sync(unsigned mask, int i, float f, int a, int b,
bool pred, uint64_t i64) {

// CIR: cir.llvm.intrinsic "nvvm.bar.warp.sync" {{.*}} : (!u32i)
// LLVM: call void @llvm.nvvm.bar.warp.sync(i32
// OGCHECK: call void @llvm.nvvm.bar.warp.sync(i32
__nvvm_bar_warp_sync(mask);

}
Loading