Skip to content

Commit

Permalink
Add new function lookupUniqueTypeSymbolByName (#1781)
Browse files Browse the repository at this point in the history
* Add new function `lookupUniqueTypeSymbolByName`

This adds two new functions `ScopeManager.lookupUniqueTypeSymbolByName` and `Reference.doesReferToType`. This harmonizes a lot of boilerplate code in type resolver, cxx extra pass and resolve ambuigity pass, which were all trying to achieve the same thing.

Fixes #1766

* Fixed issue with Go test, more robust handling of wrapped references

* Addressed code review
  • Loading branch information
oxisto authored Oct 6, 2024
1 parent 1c4e93d commit 344d366
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 84 deletions.
25 changes: 25 additions & 0 deletions cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/ScopeManager.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.*
import de.fraunhofer.aisec.cpg.graph.scopes.*
import de.fraunhofer.aisec.cpg.graph.statements.*
import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.DeclaresType
import de.fraunhofer.aisec.cpg.graph.types.FunctionPointerType
import de.fraunhofer.aisec.cpg.graph.types.IncompleteType
import de.fraunhofer.aisec.cpg.graph.types.Type
Expand Down Expand Up @@ -1009,6 +1010,30 @@ class ScopeManager : ScopeProvider {

return list
}

/**
* This function tries to look up the symbol contained in [name] (using [lookupSymbolByName])
* and returns a [DeclaresType] node, if this name resolved to something which declares a type.
*
* It is important to know that the lookup needs to be unique, so if multiple declarations match
* this symbol, a warning is triggered and null is returned.
*/
fun lookupUniqueTypeSymbolByName(name: Name, startScope: Scope?): DeclaresType? {
var symbols =
lookupSymbolByName(name = name, startScope = startScope) { it is DeclaresType }
.filterIsInstance<DeclaresType>()

// We need to have a single match, otherwise we have an ambiguous type, and we cannot
// normalize it.
if (symbols.size > 1) {
LOGGER.warn(
"Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.",
name
)
}

return symbols.singleOrNull()
}
}

/**
Expand Down
29 changes: 29 additions & 0 deletions cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/TypeManager.kt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration
import de.fraunhofer.aisec.cpg.graph.declarations.TemplateDeclaration
import de.fraunhofer.aisec.cpg.graph.scopes.Scope
import de.fraunhofer.aisec.cpg.graph.scopes.TemplateScope
import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference
import de.fraunhofer.aisec.cpg.graph.types.*
import de.fraunhofer.aisec.cpg.passes.Pass
import de.fraunhofer.aisec.cpg.passes.ResolveCallExpressionAmbiguityPass
import java.util.*
import java.util.concurrent.ConcurrentHashMap
import org.slf4j.Logger
Expand Down Expand Up @@ -353,3 +356,29 @@ val Collection<Type>.commonType: Type?
// root node is 0) and re-wrap the final common type back into the original wrap state
return commonAncestors.minByOrNull(Type.Ancestor::depth)?.type?.let { typeOp.apply(it) }
}

/**
* A utility function that checks whether our [Reference] refers to a [Type]. This is used by many
* passes that replace certain [Reference] nodes with other nodes, e.g., the
* [ResolveCallExpressionAmbiguityPass].
*
* Note: This involves some symbol lookup (using [ScopeManager.lookupUniqueTypeSymbolByName]), so
* this can only be used in passes.
*/
context(Pass<*>)
fun Reference.nameIsType(): Type? {
// First, check if it is a simple type
var type = language?.getSimpleTypeOf(name)
if (type != null) {
return type
}

// This could also be a typedef
type = scopeManager.typedefFor(name, scope)
if (type != null) {
return type
}

// Lastly, check if the reference contains a symbol that points to type (declaration)
return scopeManager.lookupUniqueTypeSymbolByName(name, scope)?.declaredType
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import de.fraunhofer.aisec.cpg.graph.types.ObjectType
import de.fraunhofer.aisec.cpg.graph.types.Type
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker
import de.fraunhofer.aisec.cpg.helpers.replace
import de.fraunhofer.aisec.cpg.nameIsType
import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn
import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore
import de.fraunhofer.aisec.cpg.passes.configuration.RequiresLanguageTrait
Expand Down Expand Up @@ -85,27 +86,20 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU
var callee = call.callee
val language = callee.language

// We need to "unwrap" some references because they might be nested in unary operations such
// as pointers. We are interested in the references in the "core". We can skip all
// references that are member expressions
val ref = callee.unwrapReference()
if (ref == null || ref is MemberExpression) {
return
}

// Check, if this is cast is really a construct expression (if the language supports
// functional-constructs)
if (language is HasFunctionStyleConstruction) {
// Make sure, we do not accidentally "construct" primitive types
if (language.builtInTypes.contains(callee.name.toString()) == true) {
return
}

val fqn =
if (callee.name.parent == null) {
scopeManager.currentNamespace.fqn(
callee.name.localName,
delimiter = callee.name.delimiter
)
} else {
callee.name
}

// Check for our type. We are only interested in object types
val type = typeManager.lookupResolvedType(fqn)
if (type is ObjectType) {
var type = ref.nameIsType()
if (type is ObjectType && !type.isPrimitive) {
walker.replaceCallWithConstruct(type, parent, call)
}
}
Expand All @@ -114,34 +108,10 @@ class ResolveCallExpressionAmbiguityPass(ctx: TranslationContext) : TranslationU
// cast expression. And this is only really necessary, if the function call has a single
// argument.
if (language is HasFunctionStyleCasts && call.arguments.size == 1) {
var pointer = false
// If the argument is a UnaryOperator, unwrap them
if (callee is UnaryOperator && callee.operatorCode == "*") {
pointer = true
callee = callee.input
}

// First, check if this is a built-in type
var builtInType = language.getSimpleTypeOf(callee.name)
if (builtInType != null) {
walker.replaceCallWithCast(builtInType, parent, call, false)
} else {
// If not, then this could still refer to an existing type. We need to make sure
// that we take the current namespace into account
val fqn =
if (callee.name.parent == null) {
scopeManager.currentNamespace.fqn(
callee.name.localName,
delimiter = callee.name.delimiter
)
} else {
callee.name
}

val type = typeManager.lookupResolvedType(fqn)
if (type != null) {
walker.replaceCallWithCast(type, parent, call, pointer)
}
// Check if it is type and replace the call
var type = ref.nameIsType()
if (type != null) {
walker.replaceCallWithCast(type, parent, call, false)
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,20 +104,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) {
// filter for nodes that implement DeclaresType, because otherwise we will get a lot of
// constructor declarations and such with the same name. It seems this is ok since most
// languages will prefer structs/classes over functions when resolving types.
var symbols =
scopeManager
.lookupSymbolByName(type.name, startScope = type.scope) { it is DeclaresType }
.filterIsInstance<DeclaresType>()

// We need to have a single match, otherwise we have an ambiguous type, and we cannot
// normalize it.
if (symbols.size > 1) {
log.warn(
"Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.",
name
)
}
var declares = symbols.singleOrNull()
var declares = scopeManager.lookupUniqueTypeSymbolByName(type.name, type.scope)

// If we did not find any declaration, we can try to infer a record declaration for it
if (declares == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import de.fraunhofer.aisec.cpg.graph.statements.expressions.*
import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration
import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker
import de.fraunhofer.aisec.cpg.helpers.replace
import de.fraunhofer.aisec.cpg.nameIsType
import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn
import de.fraunhofer.aisec.cpg.passes.configuration.ExecuteBefore

Expand Down Expand Up @@ -77,7 +78,8 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) {
* the graph.
*/
private fun removeBracketOperators(node: UnaryOperator, parent: Node?) {
if (node.operatorCode == "()" && !typeManager.typeExists(node.input.name)) {
val input = node.input
if (node.operatorCode == "()" && input is Reference && input.nameIsType() == null) {
// It was really just parenthesis around an identifier, but we can only make this
// distinction now.
//
Expand All @@ -101,24 +103,25 @@ class CXXExtraPass(ctx: TranslationContext) : ComponentPass(ctx) {
private fun convertOperators(binOp: BinaryOperator, parent: Node?) {
val fakeUnaryOp = binOp.lhs
val language = fakeUnaryOp.language as? CLanguage

// We need to check, if the expression in parentheses is really referring to a type or
// not. A common example is code like `(long) &addr`. We could end up parsing this as a
// binary operator with the left-hand side of `(long)`, an operator code `&` and a rhs
// of `addr`.
if (
language != null &&
fakeUnaryOp is UnaryOperator &&
fakeUnaryOp.operatorCode == "()" &&
typeManager.typeExists(fakeUnaryOp.input.name)
) {
// If the name (`long` in the example) is a type, then the unary operator (`(long)`)
// is really a cast and our binary operator is really a unary operator `&addr`.
if (language == null || fakeUnaryOp !is UnaryOperator || fakeUnaryOp.operatorCode != "()") {
return
}

// If the name (`long` in the example) is a type, then the unary operator (`(long)`)
// is really a cast and our binary operator is really a unary operator `&addr`.
var type = (fakeUnaryOp.input as? Reference)?.nameIsType()
if (type != null) {
// We need to perform the following steps:
// * create a cast expression out of the ()-unary operator, with the type that is
// referred to in the op.
val cast = newCastExpression().codeAndLocationFrom(fakeUnaryOp)
cast.language = language
cast.castType = fakeUnaryOp.objectType(fakeUnaryOp.input.name)
cast.castType = type

// * create a unary operator with the rhs of the binary operator (and the same
// operator code).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ class CXXExpressionTest {
// We should have two calls (int and myint64)
val casts = tu.casts
assertEquals(2, casts.size)
assertEquals(listOf("int", "myint64"), casts.map { it.name.localName })
assertEquals(listOf("int", "long long int"), casts.map { it.name.localName })
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -81,20 +81,6 @@ import de.fraunhofer.aisec.cpg.passes.inference.startInference
* In the frontend we only do the assignment, therefore we need to create a new
* [VariableDeclaration] for `b` and inject a [DeclarationStatement].
*
* ## Converting Call Expressions into Cast Expressions
*
* In Go, it is possible to convert compatible types by "calling" the type name as a function, such
* as
*
* ```go
* var i = int(2.0)
* ```
*
* This is also possible with more complex types, such as interfaces or aliased types, as long as
* they are compatible. Because types in the same package can be defined in multiple files, we
* cannot decide during the frontend run. Therefore, we need to execute this pass before the
* [SymbolResolver] and convert certain [CallExpression] nodes into a [CastExpression].
*
* ## Adjust Names of Keys in Key Value Expressions to FQN
*
* This pass also adjusts the names of keys in a [KeyValueExpression], which is part of an
Expand Down

0 comments on commit 344d366

Please sign in to comment.