From 1030297d77ae5110b7873530f645aeec3a4264ba Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 27 Jun 2022 11:33:45 -0700 Subject: [PATCH 1/2] [ubsan] Using metadata instead of prologue data for function sanitizer Information in the function `Prologue Data` is intentionally opaque. When a function with `Prologue Data` is duplicated. The self (global value) references inside `Prologue Data` is still pointing to the original function. This may cause errors like `fatal error: error in backend: Cannot represent a difference across sections`. This patch detaches the information from function `Prologue Data` and attaches it to a function metadata node. This and D116130 fix https://github.com/llvm/llvm-project/issues/49689. Reviewed By: pcc Differential Revision: https://reviews.llvm.org/D115844 (cherry picked from commit 6678f8e505b19069a9dbdc3e3ee088d543752412) --- llvm/docs/LangRef.rst | 22 ++++++++++++++++++- llvm/include/llvm/IR/FixedMetadataKinds.def | 1 + llvm/include/llvm/IR/MDBuilder.h | 4 ++++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 18 +++++++++++++++ llvm/lib/IR/MDBuilder.cpp | 8 +++++++ .../Instrumentation/AddressSanitizer.cpp | 10 ++++----- llvm/test/CodeGen/X86/func-sanitizer.ll | 18 +++++++++++++++ 7 files changed, 75 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/X86/func-sanitizer.ll diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 920834b10897..a5a84a2f297b 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5194,7 +5194,7 @@ multiple metadata attachments with the same identifier. A transformation is required to drop any metadata attachment that it does not know or know it can't preserve. Currently there is an exception for metadata -attachment to globals for ``!type`` and ``!absolute_symbol`` which can't be +attachment to globals for ``!func_sanitize``, ``!type`` and ``!absolute_symbol`` which can't be unconditionally dropped unless the global is itself deleted. Metadata attached to a module using named metadata may not be dropped, with @@ -7080,6 +7080,26 @@ Example: %a.addr = alloca float*, align 8, !annotation !0 !0 = !{!"auto-init"} +'``func_sanitize``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^ + +The ``func_sanitize`` metadata is used to attach two values for the function +sanitizer instrumentation. The first value is the ubsan function signature. +The second value is the address of the proxy variable which stores the address +of the RTTI descriptor. If :ref:`prologue ` and '``func_sanitize``' +are used at the same time, :ref:`prologue ` is emitted before +'``func_sanitize``' in the output. + +Example: + +.. code-block:: text + + @__llvm_rtti_proxy = private unnamed_addr constant i8* bitcast ({ i8*, i8* }* @_ZTIFvvE to i8*) + define void @_Z3funv() !func_sanitize !0 { + return void + } + !0 = !{i32 846595819, i8** @__llvm_rtti_proxy} + Module Flags Metadata ===================== diff --git a/llvm/include/llvm/IR/FixedMetadataKinds.def b/llvm/include/llvm/IR/FixedMetadataKinds.def index 31979cd2f9db..baa7a91cbf56 100644 --- a/llvm/include/llvm/IR/FixedMetadataKinds.def +++ b/llvm/include/llvm/IR/FixedMetadataKinds.def @@ -42,3 +42,4 @@ LLVM_FIXED_MD_KIND(MD_preserve_access_index, "llvm.preserve.access.index", 27) LLVM_FIXED_MD_KIND(MD_vcall_visibility, "vcall_visibility", 28) LLVM_FIXED_MD_KIND(MD_noundef, "noundef", 29) LLVM_FIXED_MD_KIND(MD_annotation, "annotation", 30) +LLVM_FIXED_MD_KIND(MD_func_sanitize, "func_sanitize", 31) diff --git a/llvm/include/llvm/IR/MDBuilder.h b/llvm/include/llvm/IR/MDBuilder.h index 42829388b79a..21d7b8b6da71 100644 --- a/llvm/include/llvm/IR/MDBuilder.h +++ b/llvm/include/llvm/IR/MDBuilder.h @@ -108,6 +108,10 @@ public: /// Merge the new callback encoding \p NewCB into \p ExistingCallbacks. MDNode *mergeCallbackEncodings(MDNode *ExistingCallbacks, MDNode *NewCB); + /// Return metadata feeding to the CodeGen about how to generate a function + /// prologue for the "function" santizier. + MDNode *createRTTIPointerPrologue(Constant *PrologueSig, Constant *RTTI); + //===------------------------------------------------------------------===// // AA metadata. //===------------------------------------------------------------------===// diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 3e8e190eecc3..091f16567a36 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -844,6 +844,24 @@ void AsmPrinter::emitFunctionHeader() { // Emit the prologue data. if (F.hasPrologueData()) emitGlobalConstant(F.getParent()->getDataLayout(), F.getPrologueData()); + + // Emit the function prologue data for the indirect call sanitizer. + if (const MDNode *MD = F.getMetadata(LLVMContext::MD_func_sanitize)) { + assert(TM.getTargetTriple().getArch() == Triple::x86 || + TM.getTargetTriple().getArch() == Triple::x86_64); + assert(MD->getNumOperands() == 2); + + auto *PrologueSig = mdconst::extract(MD->getOperand(0)); + auto *FTRTTIProxy = mdconst::extract(MD->getOperand(1)); + assert(PrologueSig && FTRTTIProxy); + emitGlobalConstant(F.getParent()->getDataLayout(), PrologueSig); + + const MCExpr *Proxy = lowerConstant(FTRTTIProxy); + const MCExpr *FnExp = MCSymbolRefExpr::create(CurrentFnSym, OutContext); + const MCExpr *PCRel = MCBinaryExpr::createSub(Proxy, FnExp, OutContext); + // Use 32 bit since only small code model is supported. + OutStreamer->emitValue(PCRel, 4u); + } } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the diff --git a/llvm/lib/IR/MDBuilder.cpp b/llvm/lib/IR/MDBuilder.cpp index 35af8490287b..fc59fda9fe22 100644 --- a/llvm/lib/IR/MDBuilder.cpp +++ b/llvm/lib/IR/MDBuilder.cpp @@ -150,6 +150,14 @@ MDNode *MDBuilder::mergeCallbackEncodings(MDNode *ExistingCallbacks, return MDNode::get(Context, Ops); } +MDNode *MDBuilder::createRTTIPointerPrologue(Constant *PrologueSig, + Constant *RTTI) { + SmallVector Ops; + Ops.push_back(createConstant(PrologueSig)); + Ops.push_back(createConstant(RTTI)); + return MDNode::get(Context, Ops); +} + MDNode *MDBuilder::createAnonymousAARoot(StringRef Name, MDNode *Extra) { SmallVector Args(1, nullptr); if (Extra) diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8f94172a6402..9d11ef26c1f0 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -1405,7 +1405,11 @@ static GlobalVariable *createPrivateGlobalForSourceLoc(Module &M, /// Check if \p G has been created by a trusted compiler pass. static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { // Do not instrument @llvm.global_ctors, @llvm.used, etc. - if (G->getName().startswith("llvm.")) + if (G->getName().startswith("llvm.") || + // Do not instrument gcov counter arrays. + G->getName().startswith("__llvm_gcov_ctr") || + // Do not instrument rtti proxy symbols for function sanitizer. + G->getName().startswith("__llvm_rtti_proxy")) return true; // Do not instrument asan globals. @@ -1414,10 +1418,6 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { G->getName().startswith(kODRGenPrefix)) return true; - // Do not instrument gcov counter arrays. - if (G->getName() == "__llvm_gcov_ctr") - return true; - return false; } diff --git a/llvm/test/CodeGen/X86/func-sanitizer.ll b/llvm/test/CodeGen/X86/func-sanitizer.ll new file mode 100644 index 000000000000..b8d96a346d0c --- /dev/null +++ b/llvm/test/CodeGen/X86/func-sanitizer.ll @@ -0,0 +1,18 @@ +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +; CHECK: _Z3funv: +; CHECK: .cfi_startproc +; CHECK: .long 846595819 +; CHECK: .long .L__llvm_rtti_proxy-_Z3funv +; CHECK: .L__llvm_rtti_proxy: +; CHECK: .quad i +; CHECK: .size .L__llvm_rtti_proxy, 8 + +@i = linkonce_odr constant i32 1 +@__llvm_rtti_proxy = private unnamed_addr constant i32* @i + +define dso_local void @_Z3funv() !func_sanitize !0 { + ret void +} + +!0 = !{i32 846595819, i32** @__llvm_rtti_proxy} From 640a28cc0fe12741daa4c22f34b618ad57195528 Mon Sep 17 00:00:00 2001 From: Yuanfang Chen Date: Mon, 27 Jun 2022 11:36:32 -0700 Subject: [PATCH 2/2] [Coroutine] Remove the '!func_sanitize' metadata for split functions There is no proper RTTI for these split functions. So just delete the metadata. Fixes https://github.com/llvm/llvm-project/issues/49689. Reviewed By: rjmccall Differential Revision: https://reviews.llvm.org/D116130 (cherry picked from commit e2e9e708e5c2c3d5357b4bb355285ef55cd060d9) --- llvm/lib/Transforms/Coroutines/CoroSplit.cpp | 6 ++++++ llvm/test/Transforms/Coroutines/coro-split-00.ll | 12 +++++++----- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp index b5129809c6a6..78134fd297da 100644 --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -923,6 +923,12 @@ void CoroCloner::create() { NewF->setVisibility(savedVisibility); NewF->setUnnamedAddr(savedUnnamedAddr); NewF->setDLLStorageClass(savedDLLStorageClass); + // The function sanitizer metadata needs to match the signature of the + // function it is being attached to. However this does not hold for split + // functions here. Thus remove the metadata for split functions. + if (Shape.ABI == coro::ABI::Switch && + NewF->hasMetadata(LLVMContext::MD_func_sanitize)) + NewF->eraseMetadata(LLVMContext::MD_func_sanitize); // Replace the attributes of the new function: auto OrigAttrs = NewF->getAttributes(); diff --git a/llvm/test/Transforms/Coroutines/coro-split-00.ll b/llvm/test/Transforms/Coroutines/coro-split-00.ll index d5c6279a6a87..cf2dc10549e7 100644 --- a/llvm/test/Transforms/Coroutines/coro-split-00.ll +++ b/llvm/test/Transforms/Coroutines/coro-split-00.ll @@ -1,7 +1,7 @@ ; Tests that coro-split pass splits the coroutine into f, f.resume and f.destroy ; RUN: opt < %s -passes='cgscc(coro-split),simplifycfg,early-cse' -S | FileCheck %s -define i8* @f() "coroutine.presplit"="1" { +define i8* @f() "coroutine.presplit"="1" !func_sanitize !0 { entry: %id = call token @llvm.coro.id(i32 0, i8* null, i8* null, i8* null) %need.alloc = call i1 @llvm.coro.alloc(token %id) @@ -32,7 +32,7 @@ suspend: ret i8* %hdl } -; CHECK-LABEL: @f( +; CHECK-LABEL: @f() !func_sanitize !0 { ; CHECK: call i8* @malloc ; CHECK: @llvm.coro.begin(token %id, i8* %phi) ; CHECK: store void (%f.Frame*)* @f.resume, void (%f.Frame*)** %resume.addr @@ -43,7 +43,7 @@ suspend: ; CHECK-NOT: call void @free( ; CHECK: ret i8* %hdl -; CHECK-LABEL: @f.resume( +; CHECK-LABEL: @f.resume({{.*}}) { ; CHECK-NOT: call i8* @malloc ; CHECK-NOT: call void @print(i32 0) ; CHECK: call void @print(i32 1) @@ -51,13 +51,13 @@ suspend: ; CHECK: call void @free( ; CHECK: ret void -; CHECK-LABEL: @f.destroy( +; CHECK-LABEL: @f.destroy({{.*}}) { ; CHECK-NOT: call i8* @malloc ; CHECK-NOT: call void @print( ; CHECK: call void @free( ; CHECK: ret void -; CHECK-LABEL: @f.cleanup( +; CHECK-LABEL: @f.cleanup({{.*}}) { ; CHECK-NOT: call i8* @malloc ; CHECK-NOT: call void @print( ; CHECK-NOT: call void @free( @@ -77,3 +77,5 @@ declare i1 @llvm.coro.end(i8*, i1) declare noalias i8* @malloc(i32) declare void @print(i32) declare void @free(i8*) willreturn + +!0 = !{i32 846595819, i8** null}