Kaydet (Commit) a18d92bc authored tarafından Noel Grandin's avatar Noel Grandin

new loplugin inlineablemethods

look for methods that are:

(*) non-virtual
(*) only called once
(*) only called from inside their own class
(*) small i.e. < 40 characters

which are candidates for just having their code inlined

Change-Id: I0e9e8125d140282cdcdd2a77374059b17b2fcd7d
üst 296f8a57
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <cassert>
#include <string>
#include <iostream>
#include <fstream>
#include <set>
#include <unordered_map>
#include "clang/AST/Attr.h"
#include "plugin.hxx"
#include "compat.hxx"
/**
Methods that are only called from inside their own class, and are only called from one spot
TODO if a method has only one call-site, and that call site is inside a constructor
then it's probably worth inlining, since it's probably an old method that was intended to be shared amongst
multiple constructors
*/
namespace {
struct MyFuncInfo
{
std::string access;
std::string returnType;
std::string nameAndParams;
std::string sourceLocation;
};
bool operator < (const MyFuncInfo &lhs, const MyFuncInfo &rhs)
{
return std::tie(lhs.returnType, lhs.nameAndParams)
< std::tie(rhs.returnType, rhs.nameAndParams);
}
// try to limit the voluminous output a little
static std::unordered_map<std::string, MyFuncInfo> calledFromMap;
static std::set<MyFuncInfo> definitionSet;
static std::set<MyFuncInfo> calledFromOutsideSet;
static std::set<MyFuncInfo> largeFunctionSet;
static std::set<MyFuncInfo> addressOfSet;
class InlineableMethods:
public RecursiveASTVisitor<InlineableMethods>, public loplugin::Plugin
{
public:
explicit InlineableMethods(InstantiationData const & data): Plugin(data) {}
virtual void run() override
{
TraverseDecl(compiler.getASTContext().getTranslationUnitDecl());
// dump all our output in one write call - this is to try and limit IO "crosstalk" between multiple processes
// writing to the same logfile
std::string output;
for (const MyFuncInfo & s : definitionSet)
output += "definition:\t" + s.access + "\t" + s.returnType + "\t" + s.nameAndParams + "\t" + s.sourceLocation + "\n";
for (const MyFuncInfo & s : calledFromOutsideSet)
output += "outside:\t" + s.returnType + "\t" + s.nameAndParams + "\n";
for (const std::pair<std::string,MyFuncInfo> & s : calledFromMap)
output += "calledFrom:\t" + s.first
+ "\t" + s.second.returnType + "\t" + s.second.nameAndParams + "\n";
for (const MyFuncInfo & s : largeFunctionSet)
output += "large:\t" + s.returnType + "\t" + s.nameAndParams + "\n";
for (const MyFuncInfo & s : addressOfSet)
output += "addrof:\t" + s.returnType + "\t" + s.nameAndParams + "\n";
ofstream myfile;
myfile.open( SRCDIR "/loplugin.inlineablemethods.log", ios::app | ios::out);
myfile << output;
myfile.close();
}
bool shouldVisitTemplateInstantiations () const { return true; }
bool shouldVisitImplicitCode() const { return true; }
bool VisitFunctionDecl( const FunctionDecl* );
bool VisitDeclRefExpr( const DeclRefExpr* );
bool VisitMemberExpr( const MemberExpr* );
// interception methods for FunctionDecl and all its subclasses
bool TraverseFunctionDecl( FunctionDecl* );
bool TraverseCXXMethodDecl( CXXMethodDecl* );
bool TraverseCXXConstructorDecl( CXXConstructorDecl* );
bool TraverseCXXConversionDecl( CXXConversionDecl* );
bool TraverseCXXDestructorDecl( CXXDestructorDecl* );
private:
MyFuncInfo niceName(const FunctionDecl* functionDecl);
std::string toString(SourceLocation loc);
void functionTouchedFromExpr( const FunctionDecl* calleeFunctionDecl, const Expr* expr );
bool isCalleeFunctionInteresting( const FunctionDecl* );
void logCalledFrom(SourceLocation calleeSite, const FunctionDecl* functionDecl);
// I use traverse and a member variable because I cannot find a reliable way of walking back up the AST tree using the parentStmt() stuff
// TODO doesn't cope with nested functions
const FunctionDecl* mpTraversingFunction = nullptr;
};
MyFuncInfo InlineableMethods::niceName(const FunctionDecl* functionDecl)
{
if (functionDecl->getInstantiatedFromMemberFunction())
functionDecl = functionDecl->getInstantiatedFromMemberFunction();
else if (functionDecl->getClassScopeSpecializationPattern())
functionDecl = functionDecl->getClassScopeSpecializationPattern();
// workaround clang-3.5 issue
#if CLANG_VERSION >= 30600
else if (functionDecl->getTemplateInstantiationPattern())
functionDecl = functionDecl->getTemplateInstantiationPattern();
#endif
MyFuncInfo aInfo;
switch (functionDecl->getAccess())
{
case AS_public: aInfo.access = "public"; break;
case AS_private: aInfo.access = "private"; break;
case AS_protected: aInfo.access = "protected"; break;
default: aInfo.access = "unknown"; break;
}
if (!isa<CXXConstructorDecl>(functionDecl)) {
aInfo.returnType = compat::getReturnType(*functionDecl).getCanonicalType().getAsString();
} else {
aInfo.returnType = "";
}
if (isa<CXXMethodDecl>(functionDecl)) {
const CXXRecordDecl* recordDecl = dyn_cast<CXXMethodDecl>(functionDecl)->getParent();
aInfo.nameAndParams += recordDecl->getQualifiedNameAsString();
aInfo.nameAndParams += "::";
}
aInfo.nameAndParams += functionDecl->getNameAsString() + "(";
bool bFirst = true;
for (const ParmVarDecl *pParmVarDecl : compat::parameters(*functionDecl)) {
if (bFirst)
bFirst = false;
else
aInfo.nameAndParams += ",";
aInfo.nameAndParams += pParmVarDecl->getType().getCanonicalType().getAsString();
}
aInfo.nameAndParams += ")";
if (isa<CXXMethodDecl>(functionDecl) && dyn_cast<CXXMethodDecl>(functionDecl)->isConst()) {
aInfo.nameAndParams += " const";
}
aInfo.sourceLocation = toString( functionDecl->getLocation() );
return aInfo;
}
std::string InlineableMethods::toString(SourceLocation loc)
{
SourceLocation expansionLoc = compiler.getSourceManager().getExpansionLoc( loc );
StringRef name = compiler.getSourceManager().getFilename(expansionLoc);
std::string sourceLocation = std::string(name.substr(strlen(SRCDIR)+1)) + ":" + std::to_string(compiler.getSourceManager().getSpellingLineNumber(expansionLoc));
normalizeDotDotInFilePath(sourceLocation);
return sourceLocation;
}
bool InlineableMethods::VisitFunctionDecl( const FunctionDecl* functionDecl )
{
const FunctionDecl* canonicalFunctionDecl = functionDecl->getCanonicalDecl();
if (!isCalleeFunctionInteresting(canonicalFunctionDecl)) {
return true;
}
definitionSet.insert(niceName(canonicalFunctionDecl));
if (functionDecl->doesThisDeclarationHaveABody()) {
bool bLargeFunction = false;
if (const CompoundStmt* compoundStmt = dyn_cast<CompoundStmt>(functionDecl->getBody())) {
if (compoundStmt->size() > 1) {
bLargeFunction = true;
}
if (!bLargeFunction) {
auto s1 = compiler.getSourceManager().getCharacterData(compoundStmt->getLBracLoc());
auto s2 = compiler.getSourceManager().getCharacterData(compoundStmt->getRBracLoc());
bLargeFunction = (s2 - s1) > 40;
// any function that uses a parameter more than once
if (!bLargeFunction) {
StringRef bodyText(s1, s2-s1);
for (const ParmVarDecl* param : compat::parameters(*functionDecl)) {
StringRef name = param->getName();
if (name.empty())
continue;
size_t idx = bodyText.find(name);
if (idx != StringRef::npos && bodyText.find(name, idx+1) != StringRef::npos) {
bLargeFunction = true;
break;
}
}
}
}
}
if (bLargeFunction) {
largeFunctionSet.insert(niceName(canonicalFunctionDecl));
}
}
return true;
}
bool InlineableMethods::TraverseFunctionDecl( FunctionDecl* p )
{
mpTraversingFunction = p;
bool ret = RecursiveASTVisitor::TraverseFunctionDecl(p);
mpTraversingFunction = nullptr;
return ret;
}
bool InlineableMethods::TraverseCXXMethodDecl( CXXMethodDecl* p )
{
mpTraversingFunction = p;
bool ret = RecursiveASTVisitor::TraverseCXXMethodDecl(p);
mpTraversingFunction = nullptr;
return ret;
}
bool InlineableMethods::TraverseCXXConstructorDecl( CXXConstructorDecl* p )
{
mpTraversingFunction = p;
bool ret = RecursiveASTVisitor::TraverseCXXConstructorDecl(p);
mpTraversingFunction = nullptr;
return ret;
}
bool InlineableMethods::TraverseCXXConversionDecl( CXXConversionDecl* p )
{
mpTraversingFunction = p;
bool ret = RecursiveASTVisitor::TraverseCXXConversionDecl(p);
mpTraversingFunction = nullptr;
return ret;
}
bool InlineableMethods::TraverseCXXDestructorDecl( CXXDestructorDecl* p )
{
mpTraversingFunction = p;
bool ret = RecursiveASTVisitor::TraverseCXXDestructorDecl(p);
mpTraversingFunction = nullptr;
return ret;
}
bool InlineableMethods::VisitMemberExpr( const MemberExpr* memberExpr )
{
const FunctionDecl* functionDecl = dyn_cast<FunctionDecl>(memberExpr->getMemberDecl());
if (functionDecl) {
functionTouchedFromExpr(functionDecl, memberExpr);
}
return true;
}
bool InlineableMethods::VisitDeclRefExpr( const DeclRefExpr* declRefExpr )
{
const FunctionDecl* functionDecl = dyn_cast<FunctionDecl>(declRefExpr->getDecl());
if (functionDecl) {
functionTouchedFromExpr(functionDecl, declRefExpr);
}
return true;
}
void InlineableMethods::functionTouchedFromExpr( const FunctionDecl* calleeFunctionDecl, const Expr* expr )
{
if (!mpTraversingFunction) {
return;
}
const FunctionDecl* canonicalFunctionDecl = calleeFunctionDecl->getCanonicalDecl();
if (!isCalleeFunctionInteresting(canonicalFunctionDecl)) {
return;
}
logCalledFrom(expr->getLocStart(), canonicalFunctionDecl);
if (const UnaryOperator* unaryOp = dyn_cast_or_null<UnaryOperator>(parentStmt(expr))) {
if (unaryOp->getOpcode() == UO_AddrOf) {
addressOfSet.insert(niceName(canonicalFunctionDecl));
}
}
const CXXMethodDecl* calleeMethodDecl = dyn_cast<CXXMethodDecl>(calleeFunctionDecl);
const CXXMethodDecl* callsiteParentMethodDecl = dyn_cast<CXXMethodDecl>(mpTraversingFunction);
if (!callsiteParentMethodDecl
|| calleeMethodDecl->getParent() != callsiteParentMethodDecl->getParent())
{
calledFromOutsideSet.insert(niceName(canonicalFunctionDecl));
}
}
void InlineableMethods::logCalledFrom(SourceLocation calleeLoc, const FunctionDecl* functionDecl)
{
functionDecl = functionDecl->getCanonicalDecl();
while (functionDecl->getTemplateInstantiationPattern())
functionDecl = functionDecl->getTemplateInstantiationPattern();
calledFromMap.emplace(toString(calleeLoc), niceName(functionDecl));
}
bool InlineableMethods::isCalleeFunctionInteresting(const FunctionDecl* functionDecl)
{
// ignore stuff that forms part of the stable URE interface
if (isInUnoIncludeFile(compiler.getSourceManager().getSpellingLoc(
functionDecl->getNameInfo().getLoc()))) {
return false;
}
if (isa<CXXDestructorDecl>(functionDecl)) {
return false;
}
if (functionDecl->isDeleted() || functionDecl->isDefaulted()) {
return false;
}
if (isa<CXXConstructorDecl>(functionDecl) && dyn_cast<CXXConstructorDecl>(functionDecl)->isCopyConstructor()) {
return false;
}
if (!functionDecl->getLocation().isValid() || ignoreLocation(functionDecl)) {
return false;
}
const CXXMethodDecl* methodDecl = dyn_cast<CXXMethodDecl>(functionDecl);
if (!methodDecl || methodDecl->isVirtual()) {
return false;
}
return true;
}
loplugin::Plugin::Registration< InlineableMethods > X("inlineablemethods", true);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
#!/usr/bin/python
import sys
import re
import io
# --------------------------------------------------------------------------------------------
# globals
# --------------------------------------------------------------------------------------------
definitionSet = set() # set of tuple(return_type, name_and_params)
definitionToSourceLocationMap = dict()
calledFromDict = dict()
calledFromOutsideSet = set()
largeFunctionSet = set() # set of tuple(return_type, name_and_params)
addressOfSet = set() # set of tuple(return_type, name_and_params)
# clang does not always use exactly the same numbers in the type-parameter vars it generates
# so I need to substitute them to ensure we can match correctly.
normalizeTypeParamsRegex = re.compile(r"type-parameter-\d+-\d+")
def normalizeTypeParams( line ):
return normalizeTypeParamsRegex.sub("type-parameter-?-?", line)
# --------------------------------------------------------------------------------------------
# primary input loop
# --------------------------------------------------------------------------------------------
# The parsing here is designed to avoid grabbing stuff which is mixed in from gbuild.
# I have not yet found a way of suppressing the gbuild output.
with io.open("loplugin.inlineablemethods.log", "rb", buffering=1024*1024) as txt:
for line in txt:
tokens = line.strip().split("\t")
if tokens[0] == "definition:":
access = tokens[1]
returnType = tokens[2]
nameAndParams = tokens[3]
sourceLocation = tokens[4]
funcInfo = (normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams))
definitionSet.add(funcInfo)
definitionToSourceLocationMap[funcInfo] = sourceLocation
elif tokens[0] == "calledFrom:":
calleeLocation = tokens[1]
returnType = tokens[2]
nameAndParams = tokens[3]
funcInfo = (normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams))
if not funcInfo in calledFromDict:
calledFromDict[funcInfo] = set()
calledFromDict[funcInfo].add(calleeLocation)
elif tokens[0] == "outside:":
returnType = tokens[1]
nameAndParams = tokens[2]
calledFromOutsideSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams)))
elif tokens[0] == "large:":
returnType = tokens[1]
nameAndParams = tokens[2]
largeFunctionSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams)))
elif tokens[0] == "addrof:":
returnType = tokens[1]
nameAndParams = tokens[2]
addressOfSet.add((normalizeTypeParams(returnType), normalizeTypeParams(nameAndParams)))
else:
print( "unknown line: " + line)
# Invert the definitionToSourceLocationMap.
# If we see more than one method at the same sourceLocation, it's being autogenerated as part of a template
# and we should just ignore it.
sourceLocationToDefinitionMap = {}
for k, v in definitionToSourceLocationMap.iteritems():
sourceLocationToDefinitionMap[v] = sourceLocationToDefinitionMap.get(v, [])
sourceLocationToDefinitionMap[v].append(k)
for k, definitions in sourceLocationToDefinitionMap.iteritems():
if len(definitions) > 1:
for d in definitions:
definitionSet.remove(d)
def isOtherConstness( d, callSet ):
method = d[0] + " " + d[1]
# if this method is const, and there is a non-const variant of it, and the non-const variant is in use, then leave it alone
if d[0].startswith("const ") and d[1].endswith(" const"):
if ((d[0][6:],d[1][:-6]) in callSet):
return True
elif method.endswith(" const"):
method2 = method[:len(method)-6] # strip off " const"
if ((d[0],method2) in callSet):
return True
if method.endswith(" const") and ("::iterator" in method):
method2 = method[:len(method)-6] # strip off " const"
method2 = method2.replace("::const_iterator", "::iterator")
if ((d[0],method2) in callSet):
return True
# if this method is non-const, and there is a const variant of it, and the const variant is in use, then leave it alone
if (not method.endswith(" const")) and ((d[0],"const " + method + " const") in callSet):
return True
if (not method.endswith(" const")) and ("::iterator" in method):
method2 = method.replace("::iterator", "::const_iterator") + " const"
if ((d[0],method2) in callSet):
return True
return False
# sort the results using a "natural order" so sequences like [item1,item2,item10] sort nicely
def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):
return [int(text) if text.isdigit() else text.lower()
for text in re.split(_nsre, s)]
def sort_set_by_natural_key(s):
return sorted(s, key=lambda v: natural_sort_key(v[1]))
# --------------------------------------------------------------------------------------------
# Methods that are only called from inside their own class, and are only called from one spot
# --------------------------------------------------------------------------------------------
tmp4set = set()
for d in definitionSet:
if d in calledFromOutsideSet:
continue
if isOtherConstness(d, calledFromOutsideSet):
continue
if d not in definitionToSourceLocationMap:
print("warning, method has no location: " + d[0] + " " + d[1])
continue
# ignore external code
if definitionToSourceLocationMap[d].startswith("external/"):
continue
# ignore constructors, calledFromOutsideSet does not provide accurate info for them
tokens = d[1].split("(")[0].split("::")
if len(tokens)>1 and tokens[-2] == tokens[-1]:
continue
# ignore large methods, which make the code clearer by being out of line
if d in largeFunctionSet:
continue
# ignore methods whose address we take
if d in addressOfSet:
continue
# ignore unused methods, leave them to the dedicated analysis
if d not in calledFromDict:
continue
# ignore methods called from more than one site
if len(calledFromDict[d]) > 1:
continue
method = d[0] + " " + d[1]
tmp4set.add((method, definitionToSourceLocationMap[d]))
# print output, sorted by name and line number
with open("loplugin.inlineablemethods.report", "wt") as f:
for t in sort_set_by_natural_key(tmp4set):
f.write(t[1] + "\n")
f.write(" " + t[0] + "\n")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment