From c77ca07725884bbec4ba5940e731dc36f8f71777 Mon Sep 17 00:00:00 2001 From: Johannes Altmanninger <aclopte@gmail.com> Date: Sun, 20 Aug 2017 12:09:07 +0000 Subject: [PATCH] [clang-diff] Fix similarity computation Summary: Add separate tests for the top-down and the bottom-up phase, as well as one for the optimal matching. Reviewers: arphaman Subscribers: klimek Differential Revision: https://reviews.llvm.org/D36185 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@311284 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Tooling/ASTDiff/ASTDiff.h | 4 ++- lib/Tooling/ASTDiff/ASTDiff.cpp | 35 +++++++++++------- test/Tooling/clang-diff-bottomup.cpp | 39 ++++++++++++++++++++ test/Tooling/clang-diff-opt.cpp | 44 +++++++++++++++++++++++ test/Tooling/clang-diff-topdown.cpp | 48 +++++++++++++++++++++++++ tools/clang-diff/ClangDiff.cpp | 13 +++++++ 6 files changed, 169 insertions(+), 14 deletions(-) create mode 100644 test/Tooling/clang-diff-bottomup.cpp create mode 100644 test/Tooling/clang-diff-opt.cpp create mode 100644 test/Tooling/clang-diff-topdown.cpp diff --git a/include/clang/Tooling/ASTDiff/ASTDiff.h b/include/clang/Tooling/ASTDiff/ASTDiff.h index ac33dc42d81..3260e2f057c 100644 --- a/include/clang/Tooling/ASTDiff/ASTDiff.h +++ b/include/clang/Tooling/ASTDiff/ASTDiff.h @@ -105,12 +105,14 @@ struct ComparisonOptions { /// During bottom-up matching, match only nodes with at least this value as /// the ratio of their common descendants. - double MinSimilarity = 0.2; + double MinSimilarity = 0.5; /// Whenever two subtrees are matched in the bottom-up phase, the optimal /// mapping is computed, unless the size of either subtrees exceeds this. int MaxSize = 100; + bool StopAfterTopDown = false; + /// Returns false if the nodes should never be matched. bool isMatchingAllowed(const Node &N1, const Node &N2) const { return N1.getType().isSame(N2.getType()); diff --git a/lib/Tooling/ASTDiff/ASTDiff.cpp b/lib/Tooling/ASTDiff/ASTDiff.cpp index ada7cbcfc55..22f3a4e4f06 100644 --- a/lib/Tooling/ASTDiff/ASTDiff.cpp +++ b/lib/Tooling/ASTDiff/ASTDiff.cpp @@ -92,7 +92,7 @@ private: // Computes the ratio of common descendants between the two nodes. // Descendants are only considered to be equal when they are mapped in M. - double getSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; + double getJaccardSimilarity(const Mapping &M, NodeId Id1, NodeId Id2) const; // Returns the node that has the highest degree of similarity. NodeId findCandidate(const Mapping &M, NodeId Id1) const; @@ -714,8 +714,8 @@ bool ASTDiff::Impl::haveSameParents(const Mapping &M, NodeId Id1, void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, NodeId Id2) const { - if (std::max(T1.getNumberOfDescendants(Id1), - T2.getNumberOfDescendants(Id2)) >= Options.MaxSize) + if (std::max(T1.getNumberOfDescendants(Id1), T2.getNumberOfDescendants(Id2)) > + Options.MaxSize) return; ZhangShashaMatcher Matcher(*this, T1, T2, Id1, Id2); std::vector<std::pair<NodeId, NodeId>> R = Matcher.getMatchingNodes(); @@ -727,16 +727,23 @@ void ASTDiff::Impl::addOptimalMapping(Mapping &M, NodeId Id1, } } -double ASTDiff::Impl::getSimilarity(const Mapping &M, NodeId Id1, - NodeId Id2) const { - if (Id1.isInvalid() || Id2.isInvalid()) - return 0.0; +double ASTDiff::Impl::getJaccardSimilarity(const Mapping &M, NodeId Id1, + NodeId Id2) const { int CommonDescendants = 0; const Node &N1 = T1.getNode(Id1); - for (NodeId Id = Id1 + 1; Id <= N1.RightMostDescendant; ++Id) - CommonDescendants += int(T2.isInSubtree(M.getDst(Id), Id2)); - return 2.0 * CommonDescendants / - (T1.getNumberOfDescendants(Id1) + T2.getNumberOfDescendants(Id2)); + // Count the common descendants, excluding the subtree root. + for (NodeId Src = Id1 + 1; Src <= N1.RightMostDescendant; ++Src) { + NodeId Dst = M.getDst(Src); + CommonDescendants += int(Dst.isValid() && T2.isInSubtree(Dst, Id2)); + } + // We need to subtract 1 to get the number of descendants excluding the root. + double Denominator = T1.getNumberOfDescendants(Id1) - 1 + + T2.getNumberOfDescendants(Id2) - 1 - CommonDescendants; + // CommonDescendants is less than the size of one subtree. + assert(Denominator >= 0 && "Expected non-negative denominator."); + if (Denominator == 0) + return 0; + return CommonDescendants / Denominator; } NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { @@ -747,7 +754,7 @@ NodeId ASTDiff::Impl::findCandidate(const Mapping &M, NodeId Id1) const { continue; if (M.hasDst(Id2)) continue; - double Similarity = getSimilarity(M, Id1, Id2); + double Similarity = getJaccardSimilarity(M, Id1, Id2); if (Similarity >= Options.MinSimilarity && Similarity > HighestSimilarity) { HighestSimilarity = Similarity; Candidate = Id2; @@ -767,8 +774,8 @@ void ASTDiff::Impl::matchBottomUp(Mapping &M) const { } break; } - const Node &N1 = T1.getNode(Id1); bool Matched = M.hasSrc(Id1); + const Node &N1 = T1.getNode(Id1); bool MatchedChildren = std::any_of(N1.Children.begin(), N1.Children.end(), [&](NodeId Child) { return M.hasSrc(Child); }); @@ -836,6 +843,8 @@ ASTDiff::Impl::Impl(SyntaxTree::Impl &T1, SyntaxTree::Impl &T2, void ASTDiff::Impl::computeMapping() { TheMapping = matchTopDown(); + if (Options.StopAfterTopDown) + return; matchBottomUp(TheMapping); } diff --git a/test/Tooling/clang-diff-bottomup.cpp b/test/Tooling/clang-diff-bottomup.cpp new file mode 100644 index 00000000000..8193ad7bbd6 --- /dev/null +++ b/test/Tooling/clang-diff-bottomup.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=0 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the bottom-up matching, with maxsize set to 0, so that the optimal matching will never be applied. + +#ifndef DEST + +void f1() { ; {{;}} } +void f2() { ;; {{;}} } + +#else + +// Jaccard similarity threshold is 0.5. + +void f1() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (4 + 5 - 3) = 3 / 6 >= 0.5 +// CHECK: Match FunctionDecl: f1(void (void))(1) to FunctionDecl: f1(void (void))(1) +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK: Match CompoundStmt(4) to CompoundStmt(3) +// CHECK: Match CompoundStmt(5) to CompoundStmt(4) +// CHECK: Match NullStmt(6) to NullStmt(5) + {{;}} ;; +} + +void f2() { +// CompoundStmt: 3 matched descendants, subtree sizes 4 and 5 +// Jaccard similarity = 3 / (5 + 6 - 3) = 3 / 8 < 0.5 +// CHECK-NOT: Match FunctionDecl(9) +// CHECK-NOT: Match CompoundStmt(10) +// CHECK: Match CompoundStmt(11) to CompoundStmt(10) +// CHECK: Match CompoundStmt(12) to CompoundStmt(11) +// CHECK: Match NullStmt(13) to NullStmt(12) +// CHECK-NOT: Match NullStmt(13) + {{;}} ;;; +} + +#endif diff --git a/test/Tooling/clang-diff-opt.cpp b/test/Tooling/clang-diff-opt.cpp new file mode 100644 index 00000000000..567483de1f2 --- /dev/null +++ b/test/Tooling/clang-diff-opt.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -s=10 %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the behaviour of the matching according to the optimal tree edit +// distance, implemented with Zhang and Shasha's algorithm. +// Just for testing we use a tiny value of 10 for maxsize. Subtrees bigger than +// this size will not be processed by the optimal algorithm. + +#ifndef DEST + +void f1() { {;} {{;}} } + +void f2() { {;} {{;;;;;}} } + +void f3() { {;} {{;;;;;;}} } + +#else + +void f1() { +// Jaccard similarity = 3 / (5 + 4 - 3) = 3 / 6 >= 0.5 +// The optimal matching algorithm should move the ; into the outer block +// CHECK: Match CompoundStmt(2) to CompoundStmt(2) +// CHECK-NOT: Match CompoundStmt(3) +// CHECK-NEXT: Match NullStmt(4) to NullStmt(3) + ; {{;}} +} + +void f2() { + // Jaccard similarity = 7 / (10 + 10 - 7) >= 0.5 + // As none of the subtrees is bigger than 10 nodes, the optimal algorithm + // will be run. + // CHECK: Match NullStmt(11) to NullStmt(9) + ;; {{;;;;;}} +} + +void f3() { + // Jaccard similarity = 8 / (11 + 11 - 8) >= 0.5 + // As the subtrees are bigger than 10 nodes, the optimal algorithm will not + // be run. + // CHECK: Delete NullStmt(22) + ;; {{;;;;;;}} +} +#endif diff --git a/test/Tooling/clang-diff-topdown.cpp b/test/Tooling/clang-diff-topdown.cpp new file mode 100644 index 00000000000..55553899392 --- /dev/null +++ b/test/Tooling/clang-diff-topdown.cpp @@ -0,0 +1,48 @@ +// RUN: %clang_cc1 -E %s > %t.src.cpp +// RUN: %clang_cc1 -E %s > %t.dst.cpp -DDEST +// RUN: clang-diff -dump-matches -stop-after=topdown %t.src.cpp %t.dst.cpp -- | FileCheck %s +// +// Test the top-down matching of identical subtrees only. + +#ifndef DEST + +void f1() +{ + // Match some subtree of height greater than 2. + // CHECK: Match CompoundStmt(3) to CompoundStmt(3) + // CHECK: Match CompoundStmt(4) to CompoundStmt(4) + // CHECK: Match NullStmt(5) to NullStmt(5) + {{;}} + + // Don't match subtrees that are smaller. + // CHECK-NOT: Match CompoundStmt(6) + // CHECK-NOT: Match NullStmt(7) + {;} + + // Greedy approach - use the first matching subtree when there are multiple + // identical subtrees. + // CHECK: Match CompoundStmt(8) to CompoundStmt(8) + // CHECK: Match CompoundStmt(9) to CompoundStmt(9) + // CHECK: Match NullStmt(10) to NullStmt(10) + {{;;}} +} + +#else + +void f1() { + + {{;}} + + {;} + + {{;;}} + // CHECK-NOT: Match {{.*}} to CompoundStmt(11) + // CHECK-NOT: Match {{.*}} to CompoundStmt(12) + // CHECK-NOT: Match {{.*}} to NullStmt(13) + {{;;}} + + // CHECK-NOT: Match {{.*}} to NullStmt(14) + ; +} + +#endif diff --git a/tools/clang-diff/ClangDiff.cpp b/tools/clang-diff/ClangDiff.cpp index 7d6d9c81966..0bfa4558fe1 100644 --- a/tools/clang-diff/ClangDiff.cpp +++ b/tools/clang-diff/ClangDiff.cpp @@ -50,6 +50,11 @@ static cl::opt<std::string> DestinationPath(cl::Positional, cl::Optional, cl::cat(ClangDiffCategory)); +static cl::opt<std::string> StopAfter("stop-after", + cl::desc("<topdown|bottomup>"), + cl::Optional, cl::init(""), + cl::cat(ClangDiffCategory)); + static cl::opt<int> MaxSize("s", cl::desc("<maxsize>"), cl::Optional, cl::init(-1), cl::cat(ClangDiffCategory)); @@ -425,6 +430,14 @@ int main(int argc, const char **argv) { diff::ComparisonOptions Options; if (MaxSize != -1) Options.MaxSize = MaxSize; + if (!StopAfter.empty()) { + if (StopAfter == "topdown") + Options.StopAfterTopDown = true; + else if (StopAfter != "bottomup") { + llvm::errs() << "Error: Invalid argument for -stop-after\n"; + return 1; + } + } diff::SyntaxTree SrcTree(Src->getASTContext()); diff::SyntaxTree DstTree(Dst->getASTContext()); diff::ASTDiff Diff(SrcTree, DstTree, Options); -- GitLab