From c27b13d7543374ec709a54702e02b98193e85ab0 Mon Sep 17 00:00:00 2001 From: Christian Banse Date: Fri, 4 Oct 2024 15:07:24 +0200 Subject: [PATCH] Cleanup of `SymbolResolver` (#1777) --- .../fraunhofer/aisec/cpg/graph/types/Type.kt | 5 + .../aisec/cpg/passes/SymbolResolver.kt | 396 ++++-------------- .../aisec/cpg/passes/TypeResolver.kt | 134 +++--- .../aisec/cpg/passes/inference/PassHelper.kt | 332 +++++++++++++++ .../cpg/passes/PythonAddDeclarationsPass.kt | 32 +- .../frontends/python/PythonFrontendTest.kt | 6 +- .../TypescriptLanguageFrontendTest.kt | 2 +- 7 files changed, 507 insertions(+), 400 deletions(-) create mode 100644 cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt index ad167ae643..1c38508df0 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/graph/types/Type.kt @@ -304,7 +304,12 @@ var Type.recordDeclaration: RecordDeclaration? } } +/** + * This interfaces specifies that this node (most likely a [Declaration]) declares a type. This is + * used by [TypeResolver.resolveType] to find appropriate symbols and declarations. + */ interface DeclaresType { + /** The [Type] that is being declared. */ val declaredType: Type } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt index c1572a84dd..d8e455d636 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/SymbolResolver.kt @@ -31,7 +31,6 @@ import de.fraunhofer.aisec.cpg.frontends.* import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.* import de.fraunhofer.aisec.cpg.graph.scopes.NameScope -import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.scopes.Symbol import de.fraunhofer.aisec.cpg.graph.statements.expressions.* import de.fraunhofer.aisec.cpg.graph.types.* @@ -39,10 +38,10 @@ import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker.ScopedWalker import de.fraunhofer.aisec.cpg.helpers.Util import de.fraunhofer.aisec.cpg.helpers.replace import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn -import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver -import de.fraunhofer.aisec.cpg.passes.inference.inferFunction -import de.fraunhofer.aisec.cpg.passes.inference.inferMethod import de.fraunhofer.aisec.cpg.passes.inference.startInference +import de.fraunhofer.aisec.cpg.passes.inference.tryFieldInference +import de.fraunhofer.aisec.cpg.passes.inference.tryFunctionInference +import de.fraunhofer.aisec.cpg.passes.inference.tryVariableInference import de.fraunhofer.aisec.cpg.processing.strategy.Strategy import org.slf4j.Logger import org.slf4j.LoggerFactory @@ -162,28 +161,53 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return target } - protected fun handleReference(currentClass: RecordDeclaration?, current: Node?) { - val language = current?.language - - if (current !is Reference || current is MemberExpression) return + /** + * This function handles symbol resolving for a [Reference]. After a successful lookup of the + * symbol contained in [Reference.name], the property [Reference.refersTo] is set to the best + * (or only) candidate. + * + * On a high-level, it performs the following steps: + * - Use [ScopeManager.lookupSymbolByName] to retrieve [Declaration] candidates based on the + * [Reference.name]. This can either result in an "unqualified" or "qualified" lookup, + * depending on the name. + * - The results of the lookup are stored in [Reference.candidates]. The purpose of this is + * two-fold. First, it is a good way to debug potential symbol resolution errors. Second, it + * is used by other functions, for example [handleCallExpression], which then picks the best + * viable option out of the candidates (if the reference is part of the + * [CallExpression.callee]). + * - In the next step, we need to decide whether we are resolving a standalone reference (which + * most likely points to a [VariableDeclaration]) or if we are part of a + * [CallExpression.callee]. In the first case, we can directly assign [Reference.refersTo] + * based on the candidates (at the moment we only assign it if we have exactly one candidate). + * In the second case, we are finished and let [handleCallExpression] take care of the rest + * once the EOG reaches the appropriate [CallExpression] (which should actually be just be the + * next EOG node). + */ + protected fun handleReference(currentClass: RecordDeclaration?, ref: Reference) { + val language = ref.language // Ignore references to anonymous identifiers, if the language supports it (e.g., the _ // identifier in Go) if ( - language is HasAnonymousIdentifier && - current.name.localName == language.anonymousIdentifier + language is HasAnonymousIdentifier && ref.name.localName == language.anonymousIdentifier ) { return } + // Ignore references to "super" if the language has super expressions, because they will be + // handled separately in handleMemberExpression + if (language is HasSuperClasses && ref.name.localName == language.superClassKeyword) { + return + } + // Find a list of candidate symbols. Currently, this is only used the in the "next-gen" call // resolution, but in future this will also be used in resolving regular references. - current.candidates = scopeManager.lookupSymbolByName(current.name, current.location).toSet() + ref.candidates = scopeManager.lookupSymbolByName(ref.name, ref.location).toSet() // Preparation for a future without legacy call resolving. Taking the first candidate is not // ideal since we are running into an issue with function pointers here (see workaround // below). - var wouldResolveTo = current.candidates.singleOrNull() + var wouldResolveTo = ref.candidates.singleOrNull() // For now, we need to ignore reference expressions that are directly embedded into call // expressions, because they are the "callee" property. In the future, we will use this @@ -194,7 +218,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // of this call expression back to its original variable declaration. In the future, we want // to extend this particular code to resolve all callee references to their declarations, // i.e., their function definitions and get rid of the separate CallResolver. - if (current.resolutionHelper is CallExpression) { + if (ref.resolutionHelper is CallExpression) { // Peek into the declaration, and if it is only one declaration and a variable, we can // proceed normally, as we are running into the special case explained above. Otherwise, // we abort here (for now). @@ -208,7 +232,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // percentage of references now. if (wouldResolveTo is FunctionDeclaration) { // We need to invoke the legacy resolver, just to be sure - var legacy = scopeManager.resolveReference(current) + var legacy = scopeManager.resolveReference(ref) // This is just for us to catch these differences in symbol resolving in the future. The // difference is pretty much only that the legacy system takes parameters of the @@ -225,92 +249,27 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { // Only consider resolving, if the language frontend did not specify a resolution. If we // already have populated the wouldResolveTo variable, we can re-use this instead of // resolving again - var refersTo = current.refersTo ?: wouldResolveTo + var refersTo = ref.refersTo ?: wouldResolveTo var recordDeclType: Type? = null if (currentClass != null) { recordDeclType = currentClass.toType() } - val helperType = current.resolutionHelper?.type + val helperType = ref.resolutionHelper?.type if (helperType is FunctionPointerType && refersTo == null) { - refersTo = resolveMethodFunctionPointer(current, helperType) - } - - // only add new nodes for non-static unknown - if ( - refersTo == null && - !current.isStaticAccess && - recordDeclType != null && - recordDeclType.recordDeclaration != null - ) { - // Maybe we are referring to a field instead of a local var - val field = resolveMember(recordDeclType, current) - if (field != null) { - refersTo = field - } - } - - // TODO: we need to do proper scoping (and merge it with the code above), but for now - // this just enables CXX static fields - if (refersTo == null && language != null && current.name.isQualified()) { - recordDeclType = getEnclosingTypeOf(current) - val field = resolveMember(recordDeclType, current) - if (field != null) { - refersTo = field - } + refersTo = resolveMethodFunctionPointer(ref, helperType) } + // If we did not resolve the reference up to this point, we can try to infer the declaration if (refersTo == null) { - // We can try to infer a possible global variable, if the language supports this - refersTo = tryGlobalVariableInference(current) + refersTo = tryVariableInference(ref) } if (refersTo != null) { - current.refersTo = refersTo + ref.refersTo = refersTo } else { - Util.warnWithFileLocation( - current, - log, - "Did not find a declaration for ${current.name}" - ) - } - } - - /** - * Tries to infer a global variable from an unresolved [Reference]. This will return `null`, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. - */ - private fun tryGlobalVariableInference(ref: Reference): Declaration? { - if (ref.language !is HasGlobalVariables) { - return null - } - - // For now, we only infer globals at the top-most global level, i.e., no globals in - // namespaces - if (ref.name.isQualified()) { - return null - } - - // Forward this to our inference system. This will also check whether and how inference is - // configured. - return scopeManager.globalScope?.astNode?.startInference(ctx)?.inferVariableDeclaration(ref) - } - - /** - * We get the type of the "scope" this node is in. (e.g. for a field, we drop the field's name - * and have the class) - */ - protected fun getEnclosingTypeOf(current: Node): Type { - val language = current.language - - return if (language != null && language.namespaceDelimiter.isNotEmpty()) { - val parentName = (current.name.parent ?: current.name).toString() - var type = current.objectType(parentName) - TypeResolver.resolveType(type) - type - } else { - current.unknownType() + Util.warnWithFileLocation(ref, log, "Did not find a declaration for ${ref.name}") } } @@ -356,17 +315,22 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } val baseType = base.type.root - current.refersTo = resolveMember(baseType, current) + if (baseType is ObjectType) { + current.refersTo = resolveMember(baseType, current) + } } - protected fun resolveMember(containingClass: Type, reference: Reference): ValueDeclaration? { + protected fun resolveMember( + containingClass: ObjectType, + reference: Reference + ): ValueDeclaration? { if (isSuperclassReference(reference)) { // if we have a "super" on the member side, this is a member call. We need to resolve // this in the call resolver instead return null } var member: ValueDeclaration? = null - var type = containingClass + var type: Type = containingClass // Check for a possible overloaded operator-> if ( @@ -390,6 +354,7 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { val record = type.recordDeclaration if (record != null) { + // TODO(oxisto): This should use symbols rather than the AST fields member = record.fields .filter { it.name.lastPartsMatch(reference.name) } @@ -404,87 +369,16 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { .map { it.definition } .firstOrNull() } + if (member == null && record is EnumDeclaration) { member = record.entries[reference.name.localName] } - if (member != null) { - return member - } - - // This is a little bit of a workaround, but at least this makes sure we are not inferring a - // record, where a namespace already exist - val (scope, _) = scopeManager.extractScope(reference, null) - return if (scope == null) { - handleUnknownField(containingClass, reference) - } else { - // Workaround needed for Java. If we already have a record scope, use the "old" - // inference function - when (scope) { - is RecordScope -> handleUnknownField(containingClass, reference) - is NameScope -> { - log.warn( - "We should infer a namespace variable ${reference.name} at this point, but this is not yet implemented." - ) - null - } - else -> { - log.warn( - "We should infer a variable ${reference.name} in ${scope}, but this is not yet implemented." - ) - null - } - } - } - } - - // TODO(oxisto): Move to inference class - protected fun handleUnknownField(base: Type, ref: Reference): FieldDeclaration? { - val name = ref.name - - // unwrap a potential pointer-type - if (base is PointerType) { - return handleUnknownField(base.elementType, ref) - } - - var record = base.recordDeclaration - if (record == null) { - // We access an unknown field of an unknown record. so we need to handle that along the - // way as well - record = ctx.tryRecordInference(base, locationHint = ref) - } - - if (record == null) { - log.error( - "There is no matching record in the record map. Can't identify which field is used." - ) - return null + if (member == null) { + member = tryFieldInference(reference, containingClass) } - val target = record.fields.firstOrNull { it.name.lastPartsMatch(name) } - - return if (target != null) { - target - } else { - val declaration = - newFieldDeclaration( - name.localName, - // we will set the type later through the type inference observer - record.unknownType(), - listOf(), - null, - false, - ) - record.addField(declaration) - declaration.language = record.language - declaration.isInferred = true - - // We might be able to resolve the type later (or better), if a type is - // assigned to our reference later - ref.registerTypeObserver(TypeInferenceObserver(declaration)) - - declaration - } + return member } protected fun handle(node: Node?, currClass: RecordDeclaration?) { @@ -699,36 +593,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return candidates } - /** - * Creates an inferred element for each RecordDeclaration - * - * @param possibleContainingTypes - * @param call - */ - protected fun createMethodDummies( - possibleContainingTypes: Set, - bestGuess: Type?, - call: CallExpression - ): List { - var records = - possibleContainingTypes.mapNotNull { - val root = it.root as? ObjectType - root?.recordDeclaration - } - - // We access an unknown method of an unknown record. so we need to handle that - // along the way as well. We prefer the base type - if (records.isEmpty()) { - records = - listOfNotNull( - ctx.tryRecordInference(bestGuess?.root ?: unknownType(), locationHint = call) - ) - } - records = records.distinct() - - return records.mapNotNull { record -> record.inferMethod(call, ctx = ctx) } - } - protected fun handleConstructExpression(constructExpression: ConstructExpression) { if (constructExpression.instantiates != null && constructExpression.constructor != null) return @@ -810,32 +674,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { return resolveWithArguments(candidates, op.operatorArguments, op as Expression) } - /** - * Returns a set of types in which the callee of our [call] could reside in. More concretely, it - * returns a [Pair], where the first element is the set of types and the second is our best - * guess. - */ - protected fun getPossibleContainingTypes(call: CallExpression): Pair, Type?> { - val possibleTypes = mutableSetOf() - var bestGuess: Type? = null - if (call is MemberCallExpression) { - call.base?.let { base -> - bestGuess = base.type - possibleTypes.add(base.type) - possibleTypes.addAll(base.assignedTypes) - } - } else { - // This could be a C++ member call with an implicit this (which we do not create), so - // let's add the current class to the possible list - scopeManager.currentRecord?.toType()?.let { - bestGuess = it - possibleTypes.add(it) - } - } - - return Pair(possibleTypes, bestGuess) - } - protected fun getInvocationCandidatesFromParents( name: Symbol, possibleTypes: Set, @@ -909,48 +747,6 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { ?.createInferredConstructor(constructExpression.signature) } - fun tryFunctionInference( - call: CallExpression, - result: CallResolutionResult, - ): List { - // We need to see, whether we have any suitable base (e.g. a class) or not; There are two - // main cases - // a) we have a member expression -> easy - // b) we have a call expression -> not so easy. This could be a member call with an implicit - // this (in which case we want to explore the base type). But that is only possible if - // the callee is not qualified, because otherwise we are in a static call like - // MyClass::doSomething() or in a namespace call (in case we do not want to explore the - // base type here yet). This will change in a future PR. - val (suitableBases, bestGuess) = - if (call.callee is MemberExpression || !call.callee.name.isQualified()) { - getPossibleContainingTypes(call) - } else { - Pair(setOf(), null) - } - - return if (suitableBases.isEmpty()) { - // Resolution has provided no result, we can forward this to the inference system, - // if we want. While this is definitely a function, it could still be a function - // inside a namespace. We therefore have two possible start points, a namespace - // declaration or a translation unit. Nothing else is allowed (fow now). We can - // re-use the information in the ResolutionResult, since this already contains the - // actual start scope (e.g. in case the callee has an FQN). - var scope = result.actualStartScope - if (scope !is NameScope) { - scope = scopeManager.globalScope - } - val func = - when (val start = scope?.astNode) { - is TranslationUnitDeclaration -> start.inferFunction(call, ctx = ctx) - is NamespaceDeclaration -> start.inferFunction(call, ctx = ctx) - else -> null - } - listOfNotNull(func) - } else { - createMethodDummies(suitableBases, bestGuess, call) - } - } - companion object { val LOGGER: Logger = LoggerFactory.getLogger(SymbolResolver::class.java) @@ -976,67 +772,27 @@ open class SymbolResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } -fun TranslationContext.tryNamespaceInference( - name: Name, - locationHint: Node? -): NamespaceDeclaration? { - return scopeManager.globalScope - ?.astNode - ?.startInference(this) - ?.inferNamespaceDeclaration(name, null, locationHint) -} - /** - * Tries to infer a [RecordDeclaration] from an unresolved [Type]. This will return `null`, if - * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * Returns a set of types in which the callee of our [call] could reside in. More concretely, it + * returns a [Pair], where the first element is the set of types and the second is our best guess. */ -fun TranslationContext.tryRecordInference( - type: Type, - locationHint: Node? = null -): RecordDeclaration? { - val kind = - if (type.language is HasStructs) { - "struct" - } else { - "class" +internal fun Pass<*>.getPossibleContainingTypes(call: CallExpression): Pair, Type?> { + val possibleTypes = mutableSetOf() + var bestGuess: Type? = null + if (call is MemberCallExpression) { + call.base?.let { base -> + bestGuess = base.type + possibleTypes.add(base.type) + possibleTypes.addAll(base.assignedTypes) + } + } else if (call.language is HasImplicitReceiver) { + // This could be a member call with an implicit receiver, so let's add the current class + // to the possible list + scopeManager.currentRecord?.toType()?.let { + bestGuess = it + possibleTypes.add(it) } - // Determine the scope where we want to start our inference - var (scope, _) = scopeManager.extractScope(type) - - if (scope !is NameScope) { - scope = null - } - - var holder = scope?.astNode - - // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a - // parent record) - var parentName = type.name.parent - if (scope == null && parentName != null) { - // At this point, we need to check whether we have any type reference to our parent - // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we - // have a high chance that this is actually a parent record and not a namespace - var parentType = typeManager.lookupResolvedType(parentName) - holder = - if (parentType != null) { - tryRecordInference(parentType, locationHint = locationHint) - } else { - tryNamespaceInference(parentName, locationHint = locationHint) - } - } - - val record = - (holder ?: this.scopeManager.globalScope?.astNode) - ?.startInference(this) - ?.inferRecordDeclaration(type, kind, locationHint) - - // update the type's record. Because types are only unique per scope, we potentially need to - // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record - if (record != null) { - typeManager.firstOrderTypes - .filter { it.name == record.name } - .forEach { it.recordDeclaration = record } } - return record + return Pair(possibleTypes, bestGuess) } diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt index b27c5dd7d9..7ceab7561a 100644 --- a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/TypeResolver.kt @@ -25,7 +25,9 @@ */ package de.fraunhofer.aisec.cpg.passes +import de.fraunhofer.aisec.cpg.ScopeManager import de.fraunhofer.aisec.cpg.TranslationContext +import de.fraunhofer.aisec.cpg.TypeManager import de.fraunhofer.aisec.cpg.graph.* import de.fraunhofer.aisec.cpg.graph.declarations.RecordDeclaration import de.fraunhofer.aisec.cpg.graph.types.DeclaresType @@ -34,6 +36,7 @@ import de.fraunhofer.aisec.cpg.graph.types.Type import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration import de.fraunhofer.aisec.cpg.helpers.SubgraphWalker import de.fraunhofer.aisec.cpg.passes.configuration.DependsOn +import de.fraunhofer.aisec.cpg.passes.inference.tryRecordInference /** * The purpose of this [Pass] is to establish a relationship between [Type] nodes (more specifically @@ -59,65 +62,87 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { } } - companion object { - context(ContextProvider) - fun resolveType(type: Type): Boolean { - // Let's start by looking up the type according to their name and scope. We exclusively - // filter for nodes that implement DeclaresType, because otherwise we will get a lot of - // constructor declarations and such with the same name. It seems this is ok since most - // languages will prefer structs/classes over functions when resolving types. - var symbols = - ctx?.scopeManager?.lookupSymbolByName(type.name, startScope = type.scope) { - it is DeclaresType - } ?: listOf() - - // We need to have a single match, otherwise we have an ambiguous type and we cannot - // normalize it. - // TODO: Maybe we should have a warning in this case? - var declares = symbols.filterIsInstance().singleOrNull() - - // Check for a possible typedef - var target = ctx?.scopeManager?.typedefFor(type.name, type.scope) - if (target != null) { - if (target.typeOrigin == Type.Origin.UNRESOLVED && type != target) { - // Make sure our typedef target is resolved - resolveType(target) - } - - var originDeclares = target.recordDeclaration - var name = target.name - log.debug("Aliasing type {} in {} scope to {}", type.name, type.scope, name) - type.declaredFrom = originDeclares - type.recordDeclaration = originDeclares - type.typeOrigin = Type.Origin.RESOLVED - return true + /** + * This function tries to "resolve" a [Type] back to the original declaration that declared it + * (see [DeclaresType]). More specifically, it harmonises the type's name to the FQN of the + * declared type and sets the [Type.declaredFrom] (and [ObjectType.recordDeclaration]) property. + * It also sets [Type.typeOrigin] to [Type.Origin.RESOLVED] to mark it as resolved. + * + * The high-level approach looks like the following: + * - First, we check if this type refers to a typedef (see [ScopeManager.typedefFor]). If yes, + * we need to make sure that the target type is resolved and then resolve the type to the + * target type's declaration. + * - If no typedef is used, [ScopeManager.lookupSymbolByName] is used to look up declarations by + * the type's name, starting at its [Type.scope]. Depending on the type, this can be + * unqualified or qualified. We filter exclusively for declarations that implement + * [DeclaresType]. + * - If this yields no declaration, we try to infer a record declaration using + * [tryRecordInference]. + * - Finally, we set the type's name to the resolved type, set [Type.declaredFrom], + * [ObjectType.recordDeclaration], sync [Type.superTypes] with the declaration and set + * [Type.typeOrigin] to [Type.Origin.RESOLVED]. + */ + fun resolveType(type: Type): Boolean { + // Check for a possible typedef + var target = scopeManager.typedefFor(type.name, type.scope) + if (target != null) { + if (target.typeOrigin == Type.Origin.UNRESOLVED && type != target) { + // Make sure our typedef target is resolved + resolveType(target) } - if (declares == null) { - declares = ctx?.tryRecordInference(type, locationHint = type) - } + var originDeclares = target.recordDeclaration + var name = target.name + log.debug("Aliasing type {} in {} scope to {}", type.name, type.scope, name) + type.declaredFrom = originDeclares + type.recordDeclaration = originDeclares + type.typeOrigin = Type.Origin.RESOLVED + return true + } - // If we found the "real" declared type, we can normalize the name of our scoped type - // and - // set the name to the declared type. - if (declares != null) { - var declaredType = declares.declaredType - log.debug( - "Resolving type {} in {} scope to {}", - type.name, - type.scope, - declaredType.name - ) - type.name = declaredType.name - type.declaredFrom = declares - type.recordDeclaration = declares as? RecordDeclaration - type.typeOrigin = Type.Origin.RESOLVED - type.superTypes.addAll(declaredType.superTypes) - return true - } + // Let's start by looking up the type according to their name and scope. We exclusively + // filter for nodes that implement DeclaresType, because otherwise we will get a lot of + // constructor declarations and such with the same name. It seems this is ok since most + // languages will prefer structs/classes over functions when resolving types. + var symbols = + scopeManager + .lookupSymbolByName(type.name, startScope = type.scope) { it is DeclaresType } + .filterIsInstance() + + // We need to have a single match, otherwise we have an ambiguous type, and we cannot + // normalize it. + if (symbols.size > 1) { + log.warn( + "Lookup of type {} returned more than one symbol which declares a type, this is an ambiguity and the following analysis might not be correct.", + name + ) + } + var declares = symbols.singleOrNull() + + // If we did not find any declaration, we can try to infer a record declaration for it + if (declares == null) { + declares = tryRecordInference(type, locationHint = type) + } - return false + // If we found the "real" declared type, we can normalize the name of our scoped type + // and set the name to the declared type. + if (declares != null) { + var declaredType = declares.declaredType + log.debug( + "Resolving type {} in {} scope to {}", + type.name, + type.scope, + declaredType.name + ) + type.name = declaredType.name + type.declaredFrom = declares + type.recordDeclaration = declares as? RecordDeclaration + type.typeOrigin = Type.Origin.RESOLVED + type.superTypes.addAll(declaredType.superTypes) + return true } + + return false } private fun handleNode(node: Node?) { @@ -135,6 +160,7 @@ open class TypeResolver(ctx: TranslationContext) : ComponentPass(ctx) { // Nothing to do } + /** Resolves all types in [TypeManager.firstOrderTypes] using [resolveType]. */ fun resolveFirstOrderTypes() { for (type in typeManager.firstOrderTypes.sortedBy { it.name }) { if (type is ObjectType && type.typeOrigin == Type.Origin.UNRESOLVED) { diff --git a/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt new file mode 100644 index 0000000000..da7184e8f5 --- /dev/null +++ b/cpg-core/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/inference/PassHelper.kt @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2024, Fraunhofer AISEC. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $$$$$$\ $$$$$$$\ $$$$$$\ + * $$ __$$\ $$ __$$\ $$ __$$\ + * $$ / \__|$$ | $$ |$$ / \__| + * $$ | $$$$$$$ |$$ |$$$$\ + * $$ | $$ ____/ $$ |\_$$ | + * $$ | $$\ $$ | $$ | $$ | + * \$$$$$ |$$ | \$$$$$ | + * \______/ \__| \______/ + * + */ +package de.fraunhofer.aisec.cpg.passes.inference + +import de.fraunhofer.aisec.cpg.CallResolutionResult +import de.fraunhofer.aisec.cpg.InferenceConfiguration +import de.fraunhofer.aisec.cpg.frontends.HasGlobalVariables +import de.fraunhofer.aisec.cpg.frontends.HasImplicitReceiver +import de.fraunhofer.aisec.cpg.frontends.HasStructs +import de.fraunhofer.aisec.cpg.frontends.Language +import de.fraunhofer.aisec.cpg.graph.Name +import de.fraunhofer.aisec.cpg.graph.Node +import de.fraunhofer.aisec.cpg.graph.declarations.* +import de.fraunhofer.aisec.cpg.graph.newFieldDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.GlobalScope +import de.fraunhofer.aisec.cpg.graph.scopes.NameScope +import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope +import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.MemberExpression +import de.fraunhofer.aisec.cpg.graph.statements.expressions.Reference +import de.fraunhofer.aisec.cpg.graph.types.ObjectType +import de.fraunhofer.aisec.cpg.graph.types.Type +import de.fraunhofer.aisec.cpg.graph.types.recordDeclaration +import de.fraunhofer.aisec.cpg.graph.unknownType +import de.fraunhofer.aisec.cpg.passes.Pass +import de.fraunhofer.aisec.cpg.passes.Pass.Companion.log +import de.fraunhofer.aisec.cpg.passes.TypeResolver +import de.fraunhofer.aisec.cpg.passes.getPossibleContainingTypes +import de.fraunhofer.aisec.cpg.passes.inference.Inference.TypeInferenceObserver +import kotlin.collections.forEach + +/** + * Tries to infer a [NamespaceDeclaration] from a [Name]. This will return `null`, if inference was + * not possible, or if it was turned off in the [InferenceConfiguration]. + */ +internal fun Pass<*>.tryNamespaceInference(name: Name, locationHint: Node?): NamespaceDeclaration? { + return scopeManager.globalScope + ?.astNode + ?.startInference(this.ctx) + ?.inferNamespaceDeclaration(name, null, locationHint) +} + +/** + * Tries to infer a [RecordDeclaration] from an unresolved [Type]. This will return `null`, if + * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + */ +internal fun Pass<*>.tryRecordInference( + type: Type, + locationHint: Node? = null, +): RecordDeclaration? { + val kind = + if (type.language is HasStructs) { + "struct" + } else { + "class" + } + // Determine the scope where we want to start our inference + var (scope, _) = scopeManager.extractScope(type) + + if (scope !is NameScope) { + scope = null + } + + var holder = scope?.astNode + + // If we could not find a scope, but we have an FQN, we can try to infer a namespace (or a + // parent record) + var parentName = type.name.parent + if (scope == null && parentName != null) { + // At this point, we need to check whether we have any type reference to our parent + // name. If we have (e.g. it is used in a function parameter, variable, etc.), then we + // have a high chance that this is actually a parent record and not a namespace + var parentType = typeManager.lookupResolvedType(parentName) + holder = + if (parentType != null) { + tryRecordInference(parentType, locationHint = locationHint) + } else { + tryNamespaceInference(parentName, locationHint = locationHint) + } + } + + val record = + (holder ?: this.scopeManager.globalScope?.astNode) + ?.startInference(this.ctx) + ?.inferRecordDeclaration(type, kind, locationHint) + + // Update the type's record. Because types are only unique per scope, we potentially need to + // update multiple type nodes, i.e., all type nodes whose FQN match the inferred record. We only + // need to do this if we are NOT in the type resolver + if (this !is TypeResolver && record != null) { + typeManager.firstOrderTypes + .filter { it.name == record.name } + .forEach { it.recordDeclaration = record } + } + + return record +} + +/** + * Tries to infer a [VariableDeclaration] (or [FieldDeclaration]) out of a [Reference]. This will + * return `null`, if inference was not possible, or if it was turned off in the + * [InferenceConfiguration]. + * + * We mainly try to infer global variables and fields here, since these are possibly parts of the + * code we do not "see". We do not try to infer local variables, because we are under the assumption + * that even with incomplete code, we at least have the complete current function code. We can + * therefore differentiate between four scenarios: + * - Inference of a [FieldDeclaration] if we have a language that allows implicit receivers, are + * inside a function and the ref is not qualified. This is then forwarded to [tryFieldInference]. + * - Inference of a top-level [VariableDeclaration] on a namespace level (this is not yet + * implemented) + * - Inference of a global [VariableDeclaration] in the [GlobalScope]. + * - No inference, in any other cases since this would mean that we would infer a local variable. + * This is something we do not want to do see (see above). + */ +internal fun Pass<*>.tryVariableInference( + ref: Reference, +): VariableDeclaration? { + var currentRecordType = scopeManager.currentRecord?.toType() as? ObjectType + return if ( + ref.language is HasImplicitReceiver && + !ref.name.isQualified() && + !ref.isStaticAccess && + currentRecordType != null + ) { + // This could potentially be a reference to a field with an implicit receiver call + tryFieldInference(ref, currentRecordType) + } else if (ref.name.isQualified()) { + // For now, we only infer globals at the top-most global level, i.e., no globals in + // namespaces + val (scope, _) = scopeManager.extractScope(ref, null) + when (scope) { + is NameScope -> { + log.warn( + "We should infer a namespace variable ${ref.name} at this point, but this is not yet implemented." + ) + null + } + else -> { + log.warn( + "We should infer a variable ${ref.name} in ${scope}, but this is not yet implemented." + ) + null + } + } + } else if (ref.language is HasGlobalVariables) { + // We can try to infer a possible global variable (at top-level), if the language + // supports this + scopeManager.globalScope?.astNode?.startInference(this.ctx)?.inferVariableDeclaration(ref) + } else { + // Nothing to infer + null + } +} + +/** + * Tries to infer a [FieldDeclaration] from an unresolved [MemberExpression] or [Reference] (if the + * language has [HasImplicitReceiver]). This will return `null`, if inference was not possible, or + * if it was turned off in the [InferenceConfiguration]. + * + * It will also try to infer a [RecordDeclaration], if [targetType] does not have a declaration. + * However, this is a very special corner-case that will most likely not be triggered, since the + * majority of types will have their declaration inferred in the [TypeResolver] already before we + * reach this step here. This should actually only happen in one case: If we try to infer a field of + * a type that is registered in [Language.builtInTypes] (e.g. `std::string` for C++). In this case, + * the record for this type is NOT inferred in the type resolver, because we intentionally wait + * until the symbol resolver, in case we really "see" the record, e.g., if we parse the std headers. + * If we did not "see" its declaration, we can infer it now. + */ +internal fun Pass<*>.tryFieldInference( + ref: Reference, + targetType: ObjectType +): VariableDeclaration? { + // We only want to infer fields here, this can either happen if we have a reference with an + // implicit receiver or if we have a scoped reference and the scope points to a record + val (scope, _) = scopeManager.extractScope(ref) + if (scope != null && scope !is RecordScope) { + return null + } + + var record = targetType.recordDeclaration + // We access an unknown field of an unknown record. so we need to handle that along the + // way as well. + if (record == null) { + record = tryRecordInference(targetType, locationHint = ref) + } + + if (record == null) { + log.error( + "There is no matching record in the record map. Can't identify which field is used." + ) + return null + } + + val declaration = + ref.newFieldDeclaration( + ref.name.localName, + // we will set the type later through the type inference observer + record.unknownType(), + listOf(), + null, + false, + ) + record.addField(declaration) + declaration.language = record.language + declaration.isInferred = true + + // We might be able to resolve the type later (or better), if a type is + // assigned to our reference later + ref.registerTypeObserver(TypeInferenceObserver(declaration)) + + return declaration +} + +/** + * Tries to infer a [FunctionDeclaration] or a [MethodDeclaration] from a [CallExpression]. This + * will return an empty list, if inference was not possible, or if it was turned off in the + * [InferenceConfiguration]. + * + * Depending on several factors, e.g., whether the callee has an FQN, was a [MemberExpression] or + * whether the language supports [HasImplicitReceiver] we either infer + * - a global [FunctionDeclaration] + * - a [FunctionDeclaration] in a namespace + * - a [MethodDeclaration] in a record using [tryMethodInference] + * + * Since potentially multiple suitable bases exist for the inference of methods (derived by + * [getPossibleContainingTypes]), we infer a method for all of them and return a list. + */ +internal fun Pass<*>.tryFunctionInference( + call: CallExpression, + result: CallResolutionResult, +): List { + // We need to see, whether we have any suitable base (e.g. a class) or not; There are two + // main cases + // a) we have a member expression -> easy + // b) we have a call expression -> not so easy. This could be a member call with an implicit + // this (in which case we want to explore the base type). But that is only possible if + // the callee is not qualified, because otherwise we are in a static call like + // MyClass::doSomething() or in a namespace call (in case we do not want to explore the + // base type here yet). This will change in a future PR. + val (suitableBases, bestGuess) = + if ( + call.callee is MemberExpression || + !call.callee.name.isQualified() && call.language is HasImplicitReceiver + ) { + getPossibleContainingTypes(call) + } else { + Pair(setOf(), null) + } + + return if (suitableBases.isEmpty()) { + // While this is definitely a function, it could still be a function + // inside a namespace. We therefore have two possible start points, a namespace + // declaration or a translation unit. Nothing else is allowed (for now). We can + // re-use the information in the ResolutionResult, since this already contains the + // actual start scope (e.g. in case the callee has an FQN). + var scope = result.actualStartScope + if (scope !is NameScope) { + scope = scopeManager.globalScope + } + val func = + when (val start = scope?.astNode) { + is TranslationUnitDeclaration -> start.inferFunction(call, ctx = this.ctx) + is NamespaceDeclaration -> start.inferFunction(call, ctx = this.ctx) + else -> null + } + listOfNotNull(func) + } else { + tryMethodInference(call, suitableBases, bestGuess) + } +} + +/** + * Tries to infer a [MethodDeclaration] from a [CallExpression]. This will return an empty list, if + * inference was not possible, or if it was turned off in the [InferenceConfiguration]. + * + * Since potentially multiple suitable bases exist for the inference of methods (specified in + * [possibleContainingTypes]), we infer a method for all of them and return a list. + * + * Should we encounter that none of our types in [possibleContainingTypes] have a resolved + * declaration, we are inferring one (using [bestGuess]). This should normally not happen as missing + * type declarations are already inferred in the [TypeResolver]. However, there is a special + * corner-case involving types in [Language.builtInTypes] (see [tryFieldInference] for more + * details), + */ +internal fun Pass<*>.tryMethodInference( + call: CallExpression, + possibleContainingTypes: Set, + bestGuess: Type?, +): List { + var records = + possibleContainingTypes.mapNotNull { + val root = it.root as? ObjectType + root?.recordDeclaration + } + + // We access an unknown method of an unknown record. so we need to handle that along the way as + // well. We prefer the base type. This should only happen on types that are "built-in", as all + // other type declarations are already inferred by the type resolver at this stage. + if (records.isEmpty()) { + records = + listOfNotNull( + tryRecordInference(bestGuess?.root ?: call.unknownType(), locationHint = call) + ) + } + records = records.distinct() + + return records.mapNotNull { record -> record.inferMethod(call, ctx = this.ctx) } +} diff --git a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt index 100791c74a..9d3992557f 100644 --- a/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt +++ b/cpg-language-python/src/main/kotlin/de/fraunhofer/aisec/cpg/passes/PythonAddDeclarationsPass.kt @@ -32,6 +32,7 @@ import de.fraunhofer.aisec.cpg.graph.declarations.Declaration import de.fraunhofer.aisec.cpg.graph.declarations.FieldDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.MethodDeclaration import de.fraunhofer.aisec.cpg.graph.declarations.VariableDeclaration +import de.fraunhofer.aisec.cpg.graph.scopes.RecordScope import de.fraunhofer.aisec.cpg.graph.statements.ForEachStatement import de.fraunhofer.aisec.cpg.graph.statements.expressions.AssignExpression import de.fraunhofer.aisec.cpg.graph.statements.expressions.CallExpression @@ -102,22 +103,17 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { (scopeManager.currentFunction as? MethodDeclaration)?.receiver?.name ) { // We need to temporarily jump into the scope of the current record to - // add the field - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field + // add the field. These are instance attributes + scopeManager.withScope( + scopeManager.firstScopeIsInstanceOrNull() + ) { + newFieldDeclaration(node.name) + } } else { - val v = newVariableDeclaration(node.name) - v + newVariableDeclaration(node.name) } } else { - val field = - scopeManager.withScope(scopeManager.currentRecord?.scope) { - newFieldDeclaration(node.name) - } - field + newFieldDeclaration(node.name) } } else { newVariableDeclaration(node.name) @@ -127,14 +123,8 @@ class PythonAddDeclarationsPass(ctx: TranslationContext) : ComponentPass(ctx) { decl.location = node.location decl.isImplicit = true - if (decl is FieldDeclaration) { - scopeManager.currentRecord?.addField(decl) - scopeManager.withScope(scopeManager.currentRecord?.scope) { - scopeManager.addDeclaration(decl) - } - } else { - scopeManager.addDeclaration(decl) - } + scopeManager.withScope(decl.scope) { scopeManager.addDeclaration(decl) } + return decl } diff --git a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt index 17f8b0855d..f68284a5de 100644 --- a/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt +++ b/cpg-language-python/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/python/PythonFrontendTest.kt @@ -1336,10 +1336,8 @@ class PythonFrontendTest : BaseTest() { it.registerLanguage() } assertNotNull(result) - assertEquals(2, result.variables.size) - // Note, that "pi" is incorrectly inferred as a field declaration. This is a known bug in - // the inference system (and not in the python module) and will be handled separately. - assertEquals(listOf("mypi", "pi"), result.variables.map { it.name.localName }) + assertEquals(1, result.variables.size) + assertEquals(listOf("mypi"), result.variables.map { it.name.localName }) } @Test diff --git a/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt b/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt index 0ac64f07ca..b553cd7f9b 100644 --- a/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt +++ b/cpg-language-typescript/src/test/kotlin/de/fraunhofer/aisec/cpg/frontends/typescript/TypescriptLanguageFrontendTest.kt @@ -262,7 +262,7 @@ class TypeScriptLanguageFrontendTest { assertLocalName("Users", usersComponent) assertEquals(1, usersComponent.constructors.size) assertEquals(2, usersComponent.methods.size) - assertEquals(/*0*/ 2 /* because of dummy nodes */, usersComponent.fields.size) + assertEquals(0, usersComponent.fields.size) val render = usersComponent.methods["render"] assertNotNull(render)