diff --git a/.gitignore b/.gitignore index 380b80fb8..64679fef5 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,7 @@ /tests/simple-client/simple-client /tests/simple-multi-alloc/simple-multi-alloc /tests/sloppy-dumptypes/sloppy-dumptypes +/tests/cxx-new/cxx-new /tools/allocsites /tools/dumptypes /tools/ifacetypes @@ -135,7 +136,11 @@ Makefile.in /tools/dwarftypes /tools/extrasyms /tools/frametypes +/tools/frametypes2 /tools/metavector +/tools/dumpsyscalls +/tools/noopgen +/tools/objdumpallocs /tests/unit-tests/metavec /tests/unit-tests/metavec-debug @@ -199,3 +204,34 @@ oopsla*.ps /buildtest/*/Dockerfile /buildtest/*/Dockerfile.env + +# clangd +.cache +compile_commands.json + +.vscode + +# allocsld build artifacts +*.os +/allocsld/*.map + +# LLVM bitcode +*.bc + +# liballocs-generated type info +*.usedtypes.c + +# standalone allocstubs assembly (not matched by *.allocstubs.s above) +allocstubs.s + +# compiled tools +/tools/dwarf-machine-self-test +/tools/lang/c++/bin/clang-ast-parser + +# symlinks generated by the build (point into contrib/liballocstool/include) +/include/uniqtype-defs.h +/include/uniqtype.h +/include/malloc-meta.h +/include/pageindex.h + +/allocsld/allocsld.h \ No newline at end of file diff --git a/allocsld/allocsld.h b/allocsld/allocsld.h new file mode 100644 index 000000000..46870578b --- /dev/null +++ b/allocsld/allocsld.h @@ -0,0 +1,6 @@ +#ifndef LIBALLOCS_ALLOCSLD_H_ +#define LIBALLOCS_ALLOCSLD_H_ +#define DT_ALLOCS_BOOTSTRAP_RELA 0x6ffffa01 +#define DT_ALLOCS_BOOTSTRAP_RELASZ 0x6ffffa02 +#define DT_ALLOCS_BOOTSTRAP_RELAENT 0x6ffffa03 +#endif diff --git a/include/malloc-meta.h b/include/malloc-meta.h index b1bece442..17b87273d 100644 --- a/include/malloc-meta.h +++ b/include/malloc-meta.h @@ -54,24 +54,30 @@ #error "Variable size lifetime policies not fully supported yet" #endif +struct insert_initial { + unsigned char always_0:1; + unsigned long alloc_site:47; + unsigned short unused:12; + unsigned char lifetime_policies:4; // should never be zero (0000 => already freed) +}; + +struct insert_with_type { + unsigned char always_1:1; + signed short alloc_site_id:15; /* may be zero; -1 means "no/unknown alloc site" */ + unsigned long uniqtype_shifted:44; /* uniqtype ptrs are 8-byte-aligned and have top bit 0 => this field is ((unsigned long) u)>>3 */ + unsigned char lifetime_policies:4; // should never be zero (0000 => already freed) +}; + +struct insert_common { + unsigned long _ignore:60; + unsigned char lifetime_policies:4; +}; + struct insert { union { - struct insert_initial { - unsigned char always_0:1; - unsigned long alloc_site:47; - unsigned short unused:12; - unsigned char lifetime_policies:4; // should never be zero (0000 => already freed) - } initial; - struct insert_with_type { - unsigned char always_1:1; - signed short alloc_site_id:15; /* may be zero; -1 means "no/unknown alloc site" */ - unsigned long uniqtype_shifted:44; /* uniqtype ptrs are 8-byte-aligned and have top bit 0 => this field is ((unsigned long) u)>>3 */ - unsigned char lifetime_policies:4; // should never be zero (0000 => already freed) - } with_type; - struct insert_common { - unsigned long _ignore:60; - unsigned char lifetime_policies:4; - } common; + struct insert_initial initial; + struct insert_with_type with_type; + struct insert_common common; }; } __attribute((packed)); diff --git a/include/pageindex.h b/include/pageindex.h index a851156f5..fa8c8f471 100644 --- a/include/pageindex.h +++ b/include/pageindex.h @@ -82,6 +82,13 @@ extern bigalloc_num_t *pageindex __attribute__((weak)); #if defined(__PIC__) || defined(__code_model_large__) extern bigalloc_num_t *__liballocs_pageindex __attribute__((weak)); #endif +/* Forward-declare the enum so C++ sees a complete-enough type for the return type. + * The enum is defined in the liballocs DSO; we only need the declaration here. */ +#ifdef __cplusplus +enum object_memory_kind : int; +#else +enum object_memory_kind; /* GCC extension: incomplete enum forward declaration */ +#endif enum object_memory_kind __liballocs_get_memory_kind(const void *obj) __attribute__((visibility("protected"))); void __liballocs_print_l0_to_stream_err(void); diff --git a/tests/Makefile b/tests/Makefile index 88fa108cf..6fc7447ef 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -227,10 +227,12 @@ default: # generic clean rule that we can run from test dirs too (with $(MAKE) -f ../Makefile) clean: # (delete anything whose name is a prefix of a .c file's and doesn't contain a dot) - rm -f $(filter-out .,$(foreach pat,% %.o %.allocstubs.o %.allocstubs.c %.linked.o %.s %.i %.cil.s %.cil.i,$(patsubst %.c,$(pat),$(shell find -name '*.c')))) + rm -f $(filter-out .,$(foreach pat,% %.o %.ii %.usedtypes.c %.allocstubs.o %.allocstubs.c %.linked.o %.s %.i %.cil.s %.cil.i,$(patsubst %.c,$(pat),$(shell find -name '*.c')))) + rm -f $(filter-out .,$(foreach pat,% %.o %.ii %.usedtypes.c %.allocstubs.o %.allocstubs.c %.linked.o %.s %.i %.cil.s %.cil.i,$(patsubst %.cpp,$(pat),$(shell find -name '*.cpp')))) find -name '*.cil.c' -o \ -name '*.allocs' -o -name '*.so' -o -name '*.ltrans.out' -o \ - -name '*.allocstubs.c' -o -name '*.fixuplog' | xargs rm -f + -name '*.allocstubs.c' -o -name '*.fixuplog' -o \ + -name 'allocstubs.s' -o -name '*.bc' | xargs rm -f .PHONY: unit-tests unit-tests: diff --git a/tests/cxx-new/cxx-new.cpp b/tests/cxx-new/cxx-new.cpp new file mode 100644 index 000000000..f4ee16aa9 --- /dev/null +++ b/tests/cxx-new/cxx-new.cpp @@ -0,0 +1,19 @@ +#include +#include + +struct Point { int x, y; }; + +int main() { + Point *p = new Point; + Point *ps = new Point[10]; + int *n = new int(42); + + printf("p type: %s\n", UNIQTYPE_NAME(__liballocs_get_alloc_type(p))); + printf("ps type: %s\n", UNIQTYPE_NAME(__liballocs_get_alloc_type(ps))); + printf("n type: %s\n", UNIQTYPE_NAME(__liballocs_get_alloc_type(n))); + + delete p; + delete[] ps; + delete n; + return 0; +} diff --git a/tests/cxx-new/mk.inc b/tests/cxx-new/mk.inc new file mode 100644 index 000000000..c0d480177 --- /dev/null +++ b/tests/cxx-new/mk.inc @@ -0,0 +1,11 @@ +CXX := $(LIBALLOCS)/tools/lang/c++/bin/allocsc++ +export CXX + +CXXFLAGS := $(filter-out -std=c99,$(CFLAGS)) -std=c++17 +export CXXFLAGS + +LDLIBS += -lallocs + +cxx-new: cxx-new.cpp + $(CXX) $(CXXFLAGS) $(CPPFLAGS) -o $@ $< $(LDFLAGS) $(LDLIBS) + diff --git a/tools/compilerwrapper.py b/tools/compilerwrapper.py index 2bb3a0fb8..5bf8956fa 100644 --- a/tools/compilerwrapper.py +++ b/tools/compilerwrapper.py @@ -201,6 +201,8 @@ def printErrors(self, errfile): # also the items themselves -- could be strings or SourceFiles phaseItems = [[] for n in range(Phase.DRIVER, 1+Phase.LINK)] + + phases = [] # keep a map of all source files (not items; excluding linker inputs), indexed by position allSourceFiles = dict({}) diff --git a/tools/gather-srcallocs.sh b/tools/gather-srcallocs.sh index 2f71344e1..3828f9726 100755 --- a/tools/gather-srcallocs.sh +++ b/tools/gather-srcallocs.sh @@ -79,6 +79,12 @@ cat "$all_obj_allocs_file" | cut -f1 | sort | uniq | while read obj rest; do (1|2|12|29) # DW_LANG_C89, DW_LANG_C, DW_LANG_C99, DW_LANG_C11 $(dirname "$0")/lang/c/bin/c-"$our_name_rewritten" "$cu_sourcepath" "$obj" "$cu_fname" "$cu_compdir" ;; + (4|26|33|34|43|44) # DW_LANG_C_plus_plus, DW_LANG_C_plus_plus_03, + # DW_LANG_C_plus_plus_11, DW_LANG_C_plus_plus_14 + # DW_LANG_C_plus_plus_17, DW_LANG_C_plus_plus_20 + $(dirname "$0")/lang/c++/bin/c++-"$our_name_rewritten" \ + "$cu_sourcepath" "$obj" "$cu_fname" "$cu_compdir" + ;; (*) # unknown echo "Warning: could not gather source-level allocs for unknown language: $cu_language_fullstr ($cu_language_num, $( echo -n "$cu_language_fullstr" | hd ))" 1>&2 ;; diff --git a/tools/lang/c++/bin/c++-gather-srcallocs b/tools/lang/c++/bin/c++-gather-srcallocs new file mode 100755 index 000000000..912001fec --- /dev/null +++ b/tools/lang/c++/bin/c++-gather-srcallocs @@ -0,0 +1,22 @@ +#!/bin/bash + +cu_sourcepath="$1" +obj="$2" +cu_fname="$3" +cu_compdir="$4" + +# Map foo.cpp / foo.cc / foo.C / foo.cxx -> foo.i.allocs +# links all c++ related files to *.i.allocs +cu_allocspath="$( echo "$cu_sourcepath" | \ + sed 's/\.\(cpp\|cc\|cxx\|C\|c++\)$/.i.allocs/' )" + +if [[ "$cu_allocspath" == "$cu_sourcepath" ]]; then + echo "Warning: unrecognised C++ source extension in $cu_sourcepath" 1>&2 + exit 0 +fi + +if [[ ! -r "$cu_allocspath" ]]; then + echo "Warning: missing expected allocs file ($cu_allocspath)" 1>&2 +else + cat "$cu_allocspath" +fi diff --git a/tools/lang/c++/clang-ast-parser/Makefile b/tools/lang/c++/clang-ast-parser/Makefile new file mode 100644 index 000000000..e8adb97d1 --- /dev/null +++ b/tools/lang/c++/clang-ast-parser/Makefile @@ -0,0 +1,53 @@ +CXX = g++ + +LLVM_CXXFLAGS = $(shell llvm-config-20 --cxxflags) +LLVM_LDFLAGS = $(shell llvm-config-20 --ldflags) +LLVM_LIBS = $(shell llvm-config-20 --libs --system-libs --link-static) + + +# 1. Get the full paths +# 2. Use 'basename' to get just the filename (e.g., libclangBasic.a) +# 3. Use 'sed' to strip the 'lib' prefix and '.a' suffix +CLANG_LIBS = $(shell ls /usr/lib/llvm-20/lib/libclang*.a | xargs -n1 basename | sed 's/^lib\(.*\)\.a/-l\1/') + +CXXFLAGS = -std=c++17 -g $(LLVM_CXXFLAGS) -Iinclude + +BIN = ../bin +SRC = src +BUILD = build +TARGET = $(BIN)/clang-ast-parser + +SRCS = $(shell find $(SRC) -name *.cpp) +OBJS = $(subst $(SRC)/,$(BUILD)/,$(addsuffix .o,$(basename $(SRCS)))) + +default: ../bin/clang-ast-parser + +$(BUILD)/%.o: $(SRC)/%.cpp + mkdir -p $(dir $@) + ${CXX} ${CXXFLAGS} -c $< -o $@ + +$(TARGET): $(OBJS) + mkdir -p $(dir $@) + ${CXX} ${CXXFLAGS} $(OBJS) -o $@ ${LLVM_LDFLAGS} \ + -Wl,--start-group ${CLANG_LIBS} ${LLVM_LIBS} -Wl,--end-group + +clang-ast-parser.o: clang-ast-parser.cpp + ${CXX} ${CXXFLAGS} -c clang-ast-parser.cpp -o clang-ast-parser.o + +# Group is used to tell linker to search repeatedly within the group +clang-ast-parser: clang-ast-parser.o + ${CXX} ${CXXFLAGS} clang-ast-parser.o -o clang-ast-parser ${LLVM_LDFLAGS} \ + -Wl,--start-group ${CLANG_LIBS} ${LLVM_LIBS} -Wl,--end-group + +clean: + rm -rf build + rm -f $(TARGET) + +dep: + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 20 + rm llvm.sh + sudo apt update + sudo apt install libclang-20-dev llvm-20-dev libpolly-20-dev libisl-dev libzstd-dev + diff --git a/tools/lang/c++/clang-ast-parser/src/clang-ast-parser.cpp b/tools/lang/c++/clang-ast-parser/src/clang-ast-parser.cpp new file mode 100644 index 000000000..253ec39c3 --- /dev/null +++ b/tools/lang/c++/clang-ast-parser/src/clang-ast-parser.cpp @@ -0,0 +1,77 @@ +#include "clang/AST/ASTConsumer.h" +#include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/FrontendAction.h" +#include "clang/Tooling/Tooling.h" +#include "clang/Tooling/CommonOptionsParser.h" +#include "llvm/Support/CommandLine.h" +#include "uniqtype-name.h" + +using namespace clang; +using namespace clang::tooling; +using namespace llvm; + +static cl::OptionCategory MyToolCategory("my-tool-options"); + +class NewDetectorVisitor : public RecursiveASTVisitor { +public: + explicit NewDetectorVisitor(ASTContext *Context, std::shared_ptr outStream) : Context(Context), OutStream(outStream) {} + + bool VisitCXXNewExpr(CXXNewExpr *E) { + // skip placement operator + if(E->getNumPlacementArgs() > 0) return true; + + FullSourceLoc FullLocation = Context->getFullLoc(E->getBeginLoc()); + if (FullLocation.isValid()) { + std::string TypeName = E->getAllocatedType().getAsString(); + *OutStream << FullLocation.getFileEntry()->tryGetRealPathName() << "\t" + << FullLocation.getSpellingLineNumber() << "\t" + << FullLocation.getSpellingColumnNumber() << "\t" + << "new" << "\t" + << uniqtypeNameFromClangType(E->getAllocatedType(), Context) << "\t" + << (E->isArray() ? "1": "0") << "\n"; + } + return true; + } + +private: + ASTContext *Context; + std::shared_ptr OutStream; +}; + +class NewDetectorConsumer : public ASTConsumer { +public: + explicit NewDetectorConsumer(ASTContext *Context, std::shared_ptr outStream) : Visitor(Context, outStream) {} + void HandleTranslationUnit(ASTContext &Context) override { + Visitor.TraverseDecl(Context.getTranslationUnitDecl()); + } +private: + NewDetectorVisitor Visitor; +}; + +class NewDetectorAction : public ASTFrontendAction { +public: + std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef file) override { + // *.cpp -> *.i.allocs + SmallString<256> outPath(file); + sys::path::replace_extension(outPath, ""); + std::string outputPath = std::string(outPath) + ".i.allocs"; + std::error_code ec; + auto outStream = std::make_shared(outputPath, ec); + return std::make_unique(&CI.getASTContext(), std::move(outStream)); + } +}; + +int main(int argc, const char **argv) { + auto ExpectedParser = CommonOptionsParser::create(argc, argv, MyToolCategory); + + if (!ExpectedParser) { + errs() << ExpectedParser.takeError(); + return 1; + } + + CommonOptionsParser& OptionsParser = ExpectedParser.get(); + ClangTool Tool(OptionsParser.getCompilations(), OptionsParser.getSourcePathList()); + + return Tool.run(newFrontendActionFactory().get()); +} diff --git a/tools/lang/c++/clang-ast-parser/src/uniqtype-name.cpp b/tools/lang/c++/clang-ast-parser/src/uniqtype-name.cpp new file mode 100644 index 000000000..a2ab01080 --- /dev/null +++ b/tools/lang/c++/clang-ast-parser/src/uniqtype-name.cpp @@ -0,0 +1,33 @@ +#include "clang/AST/ASTContext.h" +#include "clang/AST/Decl.h" +#include "clang/AST/Type.h" + +using namespace clang; + +std::string uniqtypeNameFromClangType(QualType qt, ASTContext *ctx) { + const Type *T = qt.getTypePtr(); + + // For records (struct/class): use tag name + if (const RecordType *RT = T->getAs()) { + std::string name = RT->getDecl()->getNameAsString(); + if (!name.empty()) return "__uniqtype__" + name; + } + + // For built-in types: use canonical name + bit width + if (const BuiltinType *BT = T->getAs()) { + uint64_t bits = ctx->getTypeSize(qt); + std::string canonName = BT->getName(ctx->getPrintingPolicy()).str(); + // Map to DWARF canonical name (e.g. "int" → "int", "char" → "signed char") + return "__uniqtype__" + canonName + "$$" + std::to_string(bits); + } + + // Pointer types + if (T->isPointerType()) { + QualType pointee = T->getPointeeType(); + return "__uniqtype____PTR_" + uniqtypeNameFromClangType(pointee, ctx); + } + + // Unknown / too complex — fall back to uninterpreted byte + return "__uniqtype____uninterpreted_byte"; +} + diff --git a/tools/lang/c++/clang-ast-parser/src/uniqtype-name.h b/tools/lang/c++/clang-ast-parser/src/uniqtype-name.h new file mode 100644 index 000000000..e8263413b --- /dev/null +++ b/tools/lang/c++/clang-ast-parser/src/uniqtype-name.h @@ -0,0 +1,11 @@ +#ifndef UNIQTYPE_NAME_H + +#define UNIQTYPE_NAME_H + +#include "clang/AST/Type.h" +#include + +std::string uniqtypeNameFromClangType(clang::QualType qt, clang::ASTContext *ctx); + +#endif + diff --git a/tools/lang/c++/lib/allocscxx.py b/tools/lang/c++/lib/allocscxx.py index 64583b7a0..dab7a83ac 100755 --- a/tools/lang/c++/lib/allocscxx.py +++ b/tools/lang/c++/lib/allocscxx.py @@ -9,14 +9,24 @@ sys.path.append(liballocs_base + "tools/lang/c++/lib") from allocscompilerwrapper import * +if os.environ.get('ALLOCS_DEBUGPY'): + import debugpy + debugpy.listen(int(os.environ['ALLOCS_DEBUGPY'])) + debugpy.wait_for_client() + class AllocsCxx(AllocsCompilerWrapper): - # FIXME: also new, delete et al def defaultL1AllocFns(self): - return ["malloc(Z)p", "calloc(zZ)p", "realloc(pZ)p", "memalign(zZ)p"] + return ["malloc(Z)p", "calloc(zZ)p", "realloc(pZ)p", "memalign(zZ)p", + "_Znwm(Z)p", # new(size_t) + "_Znam(Z)p", # new[](size_t) + ] def defaultFreeFns(self): - return ["free(P)"] - + return ["free(P)", + "_ZdlPv(P)", # delete(void*) + "_ZdaPv(P)" # delete[](void*) + ] + def makeObjectFileName(self, sourceFile): nameStem, nameExtension = os.path.splitext(sourceFile) if (nameExtension == ".cpp" or nameExtension == ".cc" or nameExtension == ".C"): @@ -27,6 +37,56 @@ def makeObjectFileName(self, sourceFile): self.debugMsg("Making a secret output file (from unknown source) " + outputFilename + "\n") return outputFilename + def getCompilationFlags(self): + """Return flags needed for clang-ast-parser to parse the source with the same + include paths, defines, and language standard as the real compiler invocation.""" + opts = self.optionsForPhases({Phase.PREPROCESS, Phase.COMPILE}) + return self.flatOptions(opts) + + def getSystemCxxIncludes(self): + """Query the real C++ compiler for its system include search paths and return + them as a list of -isystem flags for clang-ast-parser.""" + cxx = os.environ.get("ALLOCSCXX_CXX", "c++") + try: + result = subprocess.run( + [cxx, "-v", "-x", "c++", "/dev/null", "-fsyntax-only"], + stderr=subprocess.PIPE, stdout=subprocess.DEVNULL, text=True + ) + lines = result.stderr.split("\n") + paths = [] + in_list = False + for line in lines: + if "#include <" in line and "search starts here" in line: + in_list = True + continue + if "End of search list" in line: + break + if in_list and line.startswith(" "): + paths.append(line.strip()) + return [item for p in paths for item in ("-isystem", p)] + except Exception: + return [] + + def runAllocsParser(self, sourceFile): + parser = os.path.join( + self.getLibAllocsBaseDir(), + "tools/lang/c++/bin/clang-ast-parser" + ) + if not os.path.exists(parser): + self.debugMsg("clang-ast-parser not found, skipping C++ allocs dump\n") + return + compile_flags = self.getCompilationFlags() + self.getSystemCxxIncludes() + cmd = [parser, str(sourceFile), "--"] + compile_flags + self.debugMsg("Running clang-ast-parser: " + " ".join(cmd) + "\n") + subprocess.call(cmd) + + def runPhasesBeforeLink(self): + ret = super().runPhasesBeforeLink() + if ret == 0 and not self.onlyPreprocessing() and Phase.ASSEMBLE in self.enabledPhases: + for src in self.getSourceInputFiles(): + self.runAllocsParser(src) + return ret + def getBasicCompilerCommand(self): return [os.environ.get("ALLOCSCXX_CXX", "c++")] @@ -34,4 +94,3 @@ def getBasicCompilerCommand(self): wrapper = AllocsCxx() ret = wrapper.main() exit(ret) - diff --git a/tools/objdumpallocs b/tools/objdumpallocs index 2cbbb4b26..767ed3cfc 100755 --- a/tools/objdumpallocs +++ b/tools/objdumpallocs @@ -66,7 +66,7 @@ make_match_any_line_floating_eregexp () { } line_regexp='^([0-9a-f]*) <([^>]*)>' -alloc_descriptors="${LIBALLOCS_ALLOC_FNS} malloc(Z)p calloc(zZ)p realloc(pZ)p memalign(zZ)p alloca(Z)p" +alloc_descriptors="${LIBALLOCS_ALLOC_FNS} malloc(Z)p calloc(zZ)p realloc(pZ)p memalign(zZ)p alloca(Z)p _Znwm(Z)p _Znam(Z)p" suballoc_descriptors="${LIBALLOCS_SUBALLOC_FNS:-}" allocsz_descriptors="${LIBALLOCS_ALLOCSZ_FNS:-}" all_alloc_descriptors="${alloc_descriptors}${suballoc_descriptors:+ ${suballoc_descriptors}}${allocsz_descriptors:+ ${allocsz_descriptors}}"