diff --git a/include/clang/Analysis/Analyses/PrintfFormatString.h b/include/clang/Analysis/Analyses/PrintfFormatString.h new file mode 100644 index 0000000000000000000000000000000000000000..978486d271aeaa82ce73d3da4ef5920d3e0ed4d9 --- /dev/null +++ b/include/clang/Analysis/Analyses/PrintfFormatString.h @@ -0,0 +1,182 @@ +//==- PrintfFormatStrings.h - Analysis of printf format strings --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Handling of format string in printf and friends. The structure of format +// strings for fprintf() are described in C99 7.19.6.1. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_FPRINTF_FORMAT_H +#define LLVM_CLANG_FPRINTF_FORMAT_H + +#include <cassert> + +namespace clang { +namespace printf { + +class ConversionSpecifier { +public: + enum Kind { + InvalidSpecifier = 0, + dArg, // 'd' + iArg, // 'i', + oArg, // 'o', + uArg, // 'u', + xArg, // 'x', + XArg, // 'X', + fArg, // 'f', + FArg, // 'F', + eArg, // 'e', + EArg, // 'E', + gArg, // 'g', + GArg, // 'G', + aArg, // 'a', + AArg, // 'A', + IntAsCharArg, // 'c' + CStrArg, // 's' + VoidPtrArg, // 'p' + OutIntPtrArg, // 'n' + PercentArg, // '%' + IntArgBeg = dArg, + IntArgEnd = iArg, + UIntArgBeg = oArg, + UIntArgEnd = XArg, + DoubleArgBeg = fArg, + DoubleArgEnd = AArg + }; + + ConversionSpecifier(Kind k) : kind(k) {} + + bool isIntArg() const { return kind >= dArg && kind <= iArg; } + bool isUIntArg() const { return kind >= oArg && kind <= XArg; } + bool isDoubleArg() const { return kind >= fArg && kind <= AArg; } + Kind getKind() const { return kind; } + +private: + const Kind kind; +}; + +enum LengthModifier { + None, + AsChar, // 'hh' + AsShort, // 'h' + AsLong, // 'l' + AsLongLong, // 'll' + AsIntMax, // 'j' + AsSizeT, // 'z' + AsPtrDiff, // 't' + AsLongDouble // 'L' +}; + +enum Flags { + LeftJustified = 0x1, + PlusPrefix = 0x2, + SpacePrefix = 0x4, + AlternativeForm = 0x8, + LeadingZeroes = 0x16 +}; + +class OptionalAmount { +public: + enum HowSpecified { NotSpecified, Constant, Arg }; + + OptionalAmount(HowSpecified h = NotSpecified) : hs(h), amt(0) {} + OptionalAmount(unsigned i) : hs(Constant), amt(i) {} + + HowSpecified getHowSpecified() const { return hs; } + + unsigned getConstantAmount() const { + assert(hs == Constant); + return amt; + } + + unsigned getArgumentsConsumed() { + return hs == Arg ? 1 : 0; + } + +private: + HowSpecified hs; + unsigned amt; +}; + +class FormatSpecifier { + unsigned conversionSpecifier : 6; + unsigned lengthModifier : 5; + unsigned flags : 5; + OptionalAmount FieldWidth; + OptionalAmount Precision; +public: + FormatSpecifier() : conversionSpecifier(0), lengthModifier(0), flags(0) {} + + static FormatSpecifier Parse(const char *beg, const char *end); + + // Methods for incrementally constructing the FormatSpecifier. + void setConversionSpecifier(ConversionSpecifier cs) { + conversionSpecifier = (unsigned) cs.getKind(); + } + void setLengthModifier(LengthModifier lm) { + lengthModifier = (unsigned) lm; + } + void setIsLeftJustified() { flags |= LeftJustified; } + void setHasPlusPrefix() { flags |= PlusPrefix; } + void setHasSpacePrefix() { flags |= SpacePrefix; } + void setHasAlternativeForm() { flags |= AlternativeForm; } + void setHasLeadingZeros() { flags |= LeadingZeroes; } + + // Methods for querying the format specifier. + + ConversionSpecifier getConversionSpecifier() const { + return (ConversionSpecifier::Kind) conversionSpecifier; + } + + LengthModifier getLengthModifier() const { + return (LengthModifier) lengthModifier; + } + + void setFieldWidth(const OptionalAmount &Amt) { + FieldWidth = Amt; + } + + void setPrecision(const OptionalAmount &Amt) { + Precision = Amt; + } + + bool isLeftJustified() const { return flags & LeftJustified; } + bool hasPlusPrefix() const { return flags & PlusPrefix; } + bool hasAlternativeForm() const { return flags & AlternativeForm; } + bool hasLeadingZeros() const { return flags & LeadingZeroes; } +}; + + +class FormatStringHandler { +public: + FormatStringHandler() {} + virtual ~FormatStringHandler(); + + virtual void HandleIncompleteFormatSpecifier(const char *startSpecifier, + const char *endSpecifier) {} + + virtual void HandleNullChar(const char *nullCharacter) {} + + virtual void HandleIncompletePrecision(const char *periodChar) {} + + virtual void HandleInvalidConversionSpecifier(const char *conversionChar) {} + + virtual void HandleFormatSpecifier(const FormatSpecifier &FS, + const char *startSpecifier, + const char *endSpecifier) {} +}; + +bool ParseFormatString(FormatStringHandler &H, + const char *beg, const char *end); + + +} // end printf namespace +} // end clang namespace +#endif diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index 0cadca5dc5393064d0f5414671a3e884df6f83c3..4f8259e4493927717adb13533137c0d115046041 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -4,6 +4,7 @@ add_clang_library(clangAnalysis AnalysisContext.cpp CFG.cpp LiveVariables.cpp + PrintfFormatString.cpp UninitializedValues.cpp ) diff --git a/lib/Analysis/PrintfFormatString.cpp b/lib/Analysis/PrintfFormatString.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b2adeeb5f9e41e5d48bb66ae841eafd19bd8821e --- /dev/null +++ b/lib/Analysis/PrintfFormatString.cpp @@ -0,0 +1,239 @@ +//= PrintfFormatStrings.cpp - Analysis of printf format strings --*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Handling of format string in printf and friends. The structure of format +// strings for fprintf() are described in C99 7.19.6.1. +// +//===----------------------------------------------------------------------===// + +#include "clang/Analysis/Analyses/PrintfFormatString.h" + +using namespace clang; +using namespace printf; + +namespace { +class FormatSpecifierResult { + FormatSpecifier FS; + const char *Start; + bool HasError; +public: + FormatSpecifierResult(bool err = false) + : Start(0), HasError(err) {} + FormatSpecifierResult(const char *start, + const printf::FormatSpecifier &fs) + : FS(fs), Start(start), HasError(false) {} + + + const char *getStart() const { return Start; } + bool hasError() const { return HasError; } + bool hasValue() const { return Start != 0; } + const FormatSpecifier &getValue() const { + assert(hasValue()); + return FS; + } + const printf::FormatSpecifier &getValue() { return FS; } +}; +} // end anonymous namespace + +template <typename T> +class UpdateOnReturn { + T &ValueToUpdate; + const T &ValueToCopy; +public: + UpdateOnReturn(T &valueToUpdate, const T &valueToCopy) + : ValueToUpdate(valueToUpdate), ValueToCopy(valueToCopy) {} + + ~UpdateOnReturn() { + ValueToUpdate = ValueToCopy; + } +}; + +static OptionalAmount ParseAmount(const char *&Beg, const char *E) { + const char *I = Beg; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + bool foundDigits = false; + unsigned accumulator = 0; + + for ( ; I != E; ++I) { + char c = *I; + if (c >= '0' && c <= '9') { + foundDigits = true; + accumulator += (accumulator * 10) + (c - '0'); + continue; + } + + if (foundDigits) + return OptionalAmount(accumulator); + + if (c == '*') + return OptionalAmount(OptionalAmount::Arg); + + break; + } + + return OptionalAmount(); +} + +static FormatSpecifierResult ParseFormatSpecifier(printf::FormatStringHandler &H, + const char *&Beg, const char *E) { + + const char *I = Beg; + const char *Start = NULL; + UpdateOnReturn <const char*> UpdateBeg(Beg, I); + + // Look for a '%' character that indicates the start of a format specifier. + while (I != E) { + char c = *I; + ++I; + if (c == '\0') { + // Detect spurious null characters, which are likely errors. + H.HandleNullChar(I); + return true; + } + if (c == '%') { + Start = I; // Record the start of the format specifier. + break; + } + } + + // No format specifier found? + if (!Start) + return false; + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E); + return true; + } + + FormatSpecifier FS; + + // Look for flags (if any). + bool hasMore = true; + for ( ; I != E; ++I) { + switch (*I) { + default: hasMore = false; break; + case '-': FS.setIsLeftJustified(); break; + case '+': FS.setHasPlusPrefix(); break; + case ' ': FS.setHasSpacePrefix(); break; + case '#': FS.setHasAlternativeForm(); break; + case '0': FS.setHasLeadingZeros(); break; + } + if (!hasMore) + break; + } + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E); + return true; + } + + // Look for the field width (if any). + FS.setFieldWidth(ParseAmount(I, E)); + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E); + return true; + } + + // Look for the precision (if any). + if (*I == '.') { + const char *startPrecision = I++; + if (I == E) { + H.HandleIncompletePrecision(I - 1); + return true; + } + + FS.setPrecision(ParseAmount(I, E)); + + if (I == E) { + // No more characters left? + H.HandleIncompletePrecision(startPrecision); + return true; + } + } + + // Look for the length modifier. + LengthModifier lm = None; + switch (*I) { + default: + break; + case 'h': + ++I; + lm = (I != E && *I == 'h') ? ++I, AsChar : AsShort; + break; + case 'l': + ++I; + lm = (I != E && *I == 'l') ? ++I, AsLongLong : AsLong; + break; + case 'j': lm = AsIntMax; ++I; break; + case 'z': lm = AsSizeT; ++I; break; + case 't': lm = AsPtrDiff; ++I; break; + case 'L': lm = AsLongDouble; ++I; break; + } + FS.setLengthModifier(lm); + + if (I == E) { + // No more characters left? + H.HandleIncompleteFormatSpecifier(Start, E); + return true; + } + + // Finally, look for the conversion specifier. + ConversionSpecifier::Kind cs; + switch (*I) { + default: + H.HandleInvalidConversionSpecifier(I); + return true; + case 'd' : cs = ConversionSpecifier::dArg; break; + case 'i' : cs = ConversionSpecifier::iArg; break; + case 'o' : cs = ConversionSpecifier::oArg; break; + case 'u' : cs = ConversionSpecifier::uArg; break; + case 'x' : cs = ConversionSpecifier::xArg; break; + case 'X' : cs = ConversionSpecifier::XArg; break; + case 'f' : cs = ConversionSpecifier::fArg; break; + case 'F' : cs = ConversionSpecifier::FArg; break; + case 'e' : cs = ConversionSpecifier::eArg; break; + case 'E' : cs = ConversionSpecifier::EArg; break; + case 'g' : cs = ConversionSpecifier::gArg; break; + case 'G' : cs = ConversionSpecifier::GArg; break; + case 'a' : cs = ConversionSpecifier::aArg; break; + case 'A' : cs = ConversionSpecifier::AArg; break; + case 'c' : cs = ConversionSpecifier::IntAsCharArg; break; + case 's' : cs = ConversionSpecifier::CStrArg; break; + case 'p' : cs = ConversionSpecifier::VoidPtrArg; break; + case 'n' : cs = ConversionSpecifier::OutIntPtrArg; break; + case '%' : cs = ConversionSpecifier::PercentArg; break; + } + FS.setConversionSpecifier(cs); + return FormatSpecifierResult(Start, FS); +} + +bool ParseFormatSring(FormatStringHandler &H, const char *I, const char *E) { + // Keep looking for a format specifier until we have exhausted the string. + while (I != E) { + const FormatSpecifierResult &FSR = ParseFormatSpecifier(H, I, E); + // Did an error of any kind occur when parsing the specifier? If so, + // don't do any more processing. + if (FSR.hasError()) + return true;; + // Done processing the string? + if (!FSR.hasValue()) + break; + // We have a format specifier. Pass it to the callback. + H.HandleFormatSpecifier(FSR.getValue(), FSR.getStart(), I); + } + assert(I == E && "Format string not exhausted"); + return false; +} + +FormatStringHandler::~FormatStringHandler() {}