From 3eb6d072ea2aa7f39e85c036f98201df1e65c56c Mon Sep 17 00:00:00 2001 From: Vedant Kumar <vsk@apple.com> Date: Thu, 13 Jul 2017 20:55:26 +0000 Subject: [PATCH] [ubsan] Teach the pointer overflow check that "p - <unsigned> <= p" (PR33430) The pointer overflow check gives false negatives when dealing with expressions in which an unsigned value is subtracted from a pointer. This is summarized in PR33430 [1]: ubsan permits the result of the subtraction to be greater than "p", but it should not. To fix the issue, we should track whether or not the pointer expression is a subtraction. If it is, and the indices are unsigned, we know to expect "p - <unsigned> <= p". I've tested this by running check-{llvm,clang} with a stage2 ubsan-enabled build. I've also added some tests to compiler-rt, which are in D34122. [1] https://bugs.llvm.org/show_bug.cgi?id=33430 Differential Revision: https://reviews.llvm.org/D34121 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@307955 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CGExpr.cpp | 4 ++- lib/CodeGen/CGExprScalar.cpp | 38 ++++++++++++++++----------- lib/CodeGen/CodeGenFunction.h | 7 +++++ test/CodeGen/ubsan-pointer-overflow.m | 15 ++++++++--- 4 files changed, 44 insertions(+), 20 deletions(-) diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index 9f40ee5a00a..0ae725532dc 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -3052,7 +3052,9 @@ static llvm::Value *emitArraySubscriptGEP(CodeGenFunction &CGF, SourceLocation loc, const llvm::Twine &name = "arrayidx") { if (inbounds) { - return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, loc, name); + return CGF.EmitCheckedInBoundsGEP(ptr, indices, signedIndices, + CodeGenFunction::NotSubtraction, loc, + name); } else { return CGF.Builder.CreateGEP(ptr, indices, name); } diff --git a/lib/CodeGen/CGExprScalar.cpp b/lib/CodeGen/CGExprScalar.cpp index 43c86495f3d..1170b014ec7 100644 --- a/lib/CodeGen/CGExprScalar.cpp +++ b/lib/CodeGen/CGExprScalar.cpp @@ -1851,7 +1851,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::Value *input; int amount = (isInc ? 1 : -1); - bool signedIndex = !isInc; + bool isSubtraction = !isInc; if (const AtomicType *atomicTy = type->getAs<AtomicType>()) { type = atomicTy->getValueType(); @@ -1941,8 +1941,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, numElts, "vla.inc"); else - value = CGF.EmitCheckedInBoundsGEP(value, numElts, signedIndex, - E->getExprLoc(), "vla.inc"); + value = CGF.EmitCheckedInBoundsGEP( + value, numElts, /*SignedIndices=*/false, isSubtraction, + E->getExprLoc(), "vla.inc"); // Arithmetic on function pointers (!) is just +-1. } else if (type->isFunctionType()) { @@ -1952,8 +1953,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.funcptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.funcptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.funcptr"); value = Builder.CreateBitCast(value, input->getType()); // For everything else, we can just do a simple increment. @@ -1962,8 +1964,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, amt, "incdec.ptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, amt, signedIndex, - E->getExprLoc(), "incdec.ptr"); + value = CGF.EmitCheckedInBoundsGEP(value, amt, /*SignedIndices=*/false, + isSubtraction, E->getExprLoc(), + "incdec.ptr"); } // Vector increment/decrement. @@ -2044,7 +2047,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, if (CGF.getLangOpts().isSignedOverflowDefined()) value = Builder.CreateGEP(value, sizeValue, "incdec.objptr"); else - value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, signedIndex, + value = CGF.EmitCheckedInBoundsGEP(value, sizeValue, + /*SignedIndices=*/false, isSubtraction, E->getExprLoc(), "incdec.objptr"); value = Builder.CreateBitCast(value, input->getType()); } @@ -2663,7 +2667,6 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } bool isSigned = indexOperand->getType()->isSignedIntegerOrEnumerationType(); - bool mayHaveNegativeGEPIndex = isSigned || isSubtraction; unsigned width = cast<llvm::IntegerType>(index->getType())->getBitWidth(); auto &DL = CGF.CGM.getDataLayout(); @@ -2715,7 +2718,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, } else { index = CGF.Builder.CreateNSWMul(index, numElements, "vla.index"); pointer = - CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } return pointer; @@ -2733,7 +2736,7 @@ static Value *emitPointerArithmetic(CodeGenFunction &CGF, if (CGF.getLangOpts().isSignedOverflowDefined()) return CGF.Builder.CreateGEP(pointer, index, "add.ptr"); - return CGF.EmitCheckedInBoundsGEP(pointer, index, mayHaveNegativeGEPIndex, + return CGF.EmitCheckedInBoundsGEP(pointer, index, isSigned, isSubtraction, op.E->getExprLoc(), "add.ptr"); } @@ -2807,7 +2810,7 @@ static Value* tryEmitFMulAdd(const BinOpInfo &op, Value *ScalarExprEmitter::EmitAdd(const BinOpInfo &op) { if (op.LHS->getType()->isPointerTy() || op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ false); + return emitPointerArithmetic(CGF, op, CodeGenFunction::NotSubtraction); if (op.Ty->isSignedIntegerOrEnumerationType()) { switch (CGF.getLangOpts().getSignedOverflowBehavior()) { @@ -2878,7 +2881,7 @@ Value *ScalarExprEmitter::EmitSub(const BinOpInfo &op) { // If the RHS is not a pointer, then we have normal pointer // arithmetic. if (!op.RHS->getType()->isPointerTy()) - return emitPointerArithmetic(CGF, op, /*subtraction*/ true); + return emitPointerArithmetic(CGF, op, CodeGenFunction::IsSubtraction); // Otherwise, this is a pointer subtraction. @@ -3853,6 +3856,7 @@ LValue CodeGenFunction::EmitCompoundAssignmentLValue( Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, ArrayRef<Value *> IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name) { Value *GEPVal = Builder.CreateInBoundsGEP(Ptr, IdxList, Name); @@ -3958,15 +3962,19 @@ Value *CodeGenFunction::EmitCheckedInBoundsGEP(Value *Ptr, // pointer matches the sign of the total offset. llvm::Value *ValidGEP; auto *NoOffsetOverflow = Builder.CreateNot(OffsetOverflows); - auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); if (SignedIndices) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); auto *PosOrZeroOffset = Builder.CreateICmpSGE(TotalOffset, Zero); llvm::Value *NegValid = Builder.CreateICmpULT(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd( Builder.CreateSelect(PosOrZeroOffset, PosOrZeroValid, NegValid), NoOffsetOverflow); - } else { + } else if (!SignedIndices && !IsSubtraction) { + auto *PosOrZeroValid = Builder.CreateICmpUGE(ComputedGEP, IntPtr); ValidGEP = Builder.CreateAnd(PosOrZeroValid, NoOffsetOverflow); + } else { + auto *NegOrZeroValid = Builder.CreateICmpULE(ComputedGEP, IntPtr); + ValidGEP = Builder.CreateAnd(NegOrZeroValid, NoOffsetOverflow); } llvm::Constant *StaticArgs[] = {EmitCheckSourceLocation(Loc)}; diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h index 5933e029be8..753dd92f307 100644 --- a/lib/CodeGen/CodeGenFunction.h +++ b/lib/CodeGen/CodeGenFunction.h @@ -3589,12 +3589,19 @@ public: /// nonnull, if \p LHS is marked _Nonnull. void EmitNullabilityCheck(LValue LHS, llvm::Value *RHS, SourceLocation Loc); + /// An enumeration which makes it easier to specify whether or not an + /// operation is a subtraction. + enum { NotSubtraction = false, IsSubtraction = true }; + /// Same as IRBuilder::CreateInBoundsGEP, but additionally emits a check to /// detect undefined behavior when the pointer overflow sanitizer is enabled. /// \p SignedIndices indicates whether any of the GEP indices are signed. + /// \p IsSubtraction indicates whether the expression used to form the GEP + /// is a subtraction. llvm::Value *EmitCheckedInBoundsGEP(llvm::Value *Ptr, ArrayRef<llvm::Value *> IdxList, bool SignedIndices, + bool IsSubtraction, SourceLocation Loc, const Twine &Name = ""); diff --git a/test/CodeGen/ubsan-pointer-overflow.m b/test/CodeGen/ubsan-pointer-overflow.m index da622355fdb..977e2458384 100644 --- a/test/CodeGen/ubsan-pointer-overflow.m +++ b/test/CodeGen/ubsan-pointer-overflow.m @@ -10,16 +10,20 @@ void unary_arith(char *p) { ++p; // CHECK: ptrtoint i8* {{.*}} to i64, !nosanitize - // CHECK-NEXT: add i64 {{.*}}, -1, !nosanitize - // CHECK: select i1 false{{.*}}, !nosanitize + // CHECK-NEXT: [[COMPGEP:%.*]] = add i64 {{.*}}, -1, !nosanitize + // CHECK: [[NEGVALID:%.*]] = icmp ule i64 [[COMPGEP]], {{.*}}, !nosanitize + // CHECK-NOT: select + // CHECK: br i1 [[NEGVALID]]{{.*}}, !nosanitize // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} --p; + // CHECK: icmp uge i64 // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} p++; - // CHECK: select + // CHECK: icmp ule i64 + // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} p--; } @@ -64,7 +68,8 @@ void binary_arith_unsigned(char *p, unsigned i) { // CHECK: [[OFFSET:%.*]] = sub i64 0, {{.*}} // CHECK-NEXT: getelementptr inbounds {{.*}} [[OFFSET]] - // CHECK: select + // CHECK: icmp ule i64 + // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} p - i; } @@ -121,8 +126,10 @@ void pointer_array(int **arr, int k) { // CHECK-LABEL: define void @pointer_array_unsigned_indices void pointer_array_unsigned_indices(int **arr, unsigned k) { + // CHECK: icmp uge // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} + // CHECK: icmp uge // CHECK-NOT: select // CHECK: call void @__ubsan_handle_pointer_overflow{{.*}} arr[k][k]; -- GitLab