diff --git a/framework/.projen/tasks.json b/framework/.projen/tasks.json index 8484ab91c..5872f1f45 100644 --- a/framework/.projen/tasks.json +++ b/framework/.projen/tasks.json @@ -75,6 +75,12 @@ { "exec": "rsync -avr --exclude '*.ts' --exclude '*.js' src/processing/lib/spark-runtime/emr-containers/resources lib/processing/lib/spark-runtime/emr-containers" }, + { + "exec": "rsync -avr --exclude '*.ts' --exclude '*.js' src/governance/lib/resources lib/governance/lib" + }, + { + "exec": "rsync -avr --exclude '*.ts' --exclude '*.js' src/governance/lib/datazone/resources lib/governance/lib/datazone" + }, { "exec": "rsync -avr --exclude '*.ts' --exclude '*.js' src/consumption/lib/redshift/resources lib/consumption/lib/redshift" }, diff --git a/framework/API.md b/framework/API.md index 15ce5b6b3..ece474d76 100644 --- a/framework/API.md +++ b/framework/API.md @@ -3067,7 +3067,7 @@ Create service linked role for the indicated service if it doesn't exists. ```typescript const slr = new dsf.utils.CreateServiceLinkedRole(this, 'CreateSLR') -slr.create(ServiceLinkedRoleService.REDSHIFT) +slr.create(dsf.utils.ServiceLinkedRoleService.REDSHIFT) ``` @@ -4197,29 +4197,36 @@ The security group for Client VPN Endpoint. --- -### KafkaApi +### DataZoneCustomAssetTypeFactory -A construct to create a Kafka API admin client. +Factory construct providing resources to create a DataZone custom asset type. -> [https://awslabs.github.io/data-solutions-framework-on-aws/](https://awslabs.github.io/data-solutions-framework-on-aws/) +*Example* -#### Initializers +```typescript +new dsf.governance.DataZoneCustomAssetTypeFactory(this, 'CustomAssetTypeFactory', { + domainId: 'aba_dc999t9ime9sss', +}); +``` + + +#### Initializers ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -new streaming.KafkaApi(scope: Construct, id: string, props: KafkaApiProps) +new governance.DataZoneCustomAssetTypeFactory(scope: Construct, id: string, props: DataZoneCustomAssetTypeFactoryProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.streaming.KafkaApiProps | *No description.* | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.governance.DataZoneCustomAssetTypeFactoryProps | The DataZoneCustomAssetTypeFactory properties. | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct @@ -4227,7 +4234,7 @@ the Scope of the CDK Construct. --- -##### `id`Required +##### `id`Required - *Type:* string @@ -4235,9 +4242,11 @@ the ID of the CDK Construct. --- -##### `props`Required +##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaApiProps +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneCustomAssetTypeFactoryProps + +The DataZoneCustomAssetTypeFactory properties. --- @@ -4245,16 +4254,13 @@ the ID of the CDK Construct. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| grantConsume | Grant a principal permissions to consume from a topic. | -| grantProduce | Grant a principal permissions to produce to a topic. | -| retrieveVersion | Retrieve DSF package.json version. | -| setAcl | Creates a ACL in the MSK Cluster. | -| setTopic | Creates a topic in the MSK Cluster. | +| toString | Returns a string representation of this construct. | +| createCustomAssetType | Creates a DataZone custom asset type based on the provided properties. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -4262,252 +4268,291 @@ public toString(): string Returns a string representation of this construct. -##### `grantConsume` +##### `createCustomAssetType` ```typescript -public grantConsume(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource +public createCustomAssetType(id: string, customAssetType: DataZoneCustomAssetTypeProps): CustomAssetType ``` -Grant a principal permissions to consume from a topic. +Creates a DataZone custom asset type based on the provided properties. -###### `id`Required +###### `id`Required - *Type:* string -the CDK resource ID. +the ID of the CDK Construct. --- -###### `topicName`Required +###### `customAssetType`Required -- *Type:* string +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneCustomAssetTypeProps -the target topic to grant consume permissions on. +the properties of the custom asset type. --- -###### `clientAuthentication`Required +##### `retrieveVersion` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +```typescript +public retrieveVersion(): any +``` -The authentication mode of the consumer. +Retrieve DSF package.json version. ---- +#### Static Functions -###### `principal`Required +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | -- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal +--- -the principal receiveing grant consume permissions. +##### `isConstruct` ---- +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -###### `host`Optional +governance.DataZoneCustomAssetTypeFactory.isConstruct(x: any) +``` -- *Type:* string +Checks if `x` is a construct. -the host of the consumer. +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. ---- +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. -###### `removalPolicy`Optional +###### `x`Required -- *Type:* aws-cdk-lib.RemovalPolicy +- *Type:* any -the removal policy to apply to the grant. +Any object. --- -###### `customResourceAuthentication`Optional - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +#### Properties -The authentication used by the Kafka API admin client to create the ACL. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| createFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the DataZone custom asset type creation. | +| createLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs Log Group for the DataZone custom asset type creation. | +| createRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the DataZone custom asset type creation. | +| handlerRole | aws-cdk-lib.aws_iam.IRole | The role used by the custom resource. | +| serviceToken | string | The service token for the custom resource. | --- -##### `grantProduce` +##### `node`Required ```typescript -public grantProduce(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource +public readonly node: Node; ``` -Grant a principal permissions to produce to a topic. - -###### `id`Required - -- *Type:* string +- *Type:* constructs.Node -the CDK resource ID. +The tree node. --- -###### `topicName`Required - -- *Type:* string - -the target topic to grant produce permissions on. - ---- +##### `createFunction`Required -###### `clientAuthentication`Required +```typescript +public readonly createFunction: IFunction; +``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The authentication mode of the producer. +The Lambda Function for the DataZone custom asset type creation. --- -###### `principal`Required - -- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal - -the principal receiving grant produce permissions. - ---- +##### `createLogGroup`Required -###### `host`Optional +```typescript +public readonly createLogGroup: ILogGroup; +``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -the host of the producer. +The CloudWatch Logs Log Group for the DataZone custom asset type creation. --- -###### `removalPolicy`Optional - -- *Type:* aws-cdk-lib.RemovalPolicy - -the removal policy to apply to the grant. - ---- +##### `createRole`Required -###### `customResourceAuthentication`Optional +```typescript +public readonly createRole: IRole; +``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +- *Type:* aws-cdk-lib.aws_iam.IRole -The authentication used by the Kafka API admin client to create the ACL. +The IAM Role for the DataZone custom asset type creation. --- -##### `retrieveVersion` +##### `handlerRole`Required ```typescript -public retrieveVersion(): any +public readonly handlerRole: IRole; ``` -Retrieve DSF package.json version. +- *Type:* aws-cdk-lib.aws_iam.IRole -##### `setAcl` +The role used by the custom resource. -```typescript -public setAcl(id: string, aclDefinition: Acl, removalPolicy?: RemovalPolicy, clientAuthentication?: Authentication): CustomResource -``` +--- -Creates a ACL in the MSK Cluster. +##### `serviceToken`Required -###### `id`Required +```typescript +public readonly serviceToken: string; +``` - *Type:* string -the CDK ID of the ACL. +The service token for the custom resource. --- -###### `aclDefinition`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Acl +#### Constants -the Kafka ACL definition. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -###### `removalPolicy`Optional +##### `DSF_OWNED_TAG`Required -- *Type:* aws-cdk-lib.RemovalPolicy +```typescript +public readonly DSF_OWNED_TAG: string; +``` -Wether to keep the ACL or delete it when removing the resource from the Stack. +- *Type:* string --- -###### `clientAuthentication`Optional +##### `DSF_TRACKING_CODE`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +```typescript +public readonly DSF_TRACKING_CODE: string; +``` -The authentication used by the Kafka API admin client to create the ACL. +- *Type:* string --- -##### `setTopic` +### DataZoneGsrMskDataSource + +A DataZone custom data source for MSK (Managed Streaming for Kafka) with integration for Glue Schema Registry. + +*Example* ```typescript -public setTopic(id: string, clientAuthentication: Authentication, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource +import { Schedule } from 'aws-cdk-lib/aws-events'; + +new dsf.governance.DataZoneGsrMskDataSource(this, 'MskDatasource', { + domainId: 'aba_dc999t9ime9sss', + projectId: '999999b3m5cpz', + registryName: 'MyRegistry', + clusterName: 'MyCluster', + runSchedule: Schedule.cron({ minute: '0', hour: '12' }), // Trigger daily at noon + enableSchemaRegistryEvent: true, // Enable events for Glue Schema Registry changes +}); ``` -Creates a topic in the MSK Cluster. -###### `id`Required +#### Initializers -- *Type:* string +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -the CDK ID for Topic. +new governance.DataZoneGsrMskDataSource(scope: Construct, id: string, props: DataZoneGsrMskDataSourceProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.governance.DataZoneGsrMskDataSourceProps | The DataZoneGsrMskDataSourceProps properties. | --- -###### `clientAuthentication`Required +##### `scope`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +- *Type:* constructs.Construct -The authentication used by the Kafka API admin client to create the topic. +the Scope of the CDK Construct. --- -###### `topicDefinition`Required +##### `id`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic +- *Type:* string -the Kafka topic definition. +the ID of the CDK Construct. --- -###### `removalPolicy`Optional +##### `props`Required -- *Type:* aws-cdk-lib.RemovalPolicy +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneGsrMskDataSourceProps -Wether to keep the topic or delete it when removing the resource from the Stack. +The DataZoneGsrMskDataSourceProps properties. --- -###### `waitForLeaders`Optional - -- *Type:* boolean +#### Methods -If set to true, waits until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | --- -###### `timeout`Optional +##### `toString` -- *Type:* number +```typescript +public toString(): string +``` -The time in ms to wait for a topic to be completely created on the controller node. +Returns a string representation of this construct. ---- +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. #### Static Functions | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -streaming.KafkaApi.isConstruct(x: any) +governance.DataZoneGsrMskDataSource.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -4526,7 +4571,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -4538,19 +4583,16 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| mskAclFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used by the Custom Resource provider when MSK is using mTLS authentication. | -| mskAclLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The Cloudwatch Log Group used by the Custom Resource provider when MSK is using mTLS authentication. | -| mskAclRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Custom Resource provider when MSK is using mTLS authentication. | -| mskAclSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Custom Resource provider when MSK is using mTLS authentication. | -| mskIamFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used by the Custom Resource provider when MSK is using IAM authentication. | -| mskIamLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The Cloudwatch Log Group used by the Custom Resource provider when MSK is using IAM authentication. | -| mskIamRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Custom Resource provider when MSK is using IAM authentication. | -| mskIamSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Custom Resource provider when MSK is using IAM authentication. | +| node | constructs.Node | The tree node. | +| datasourceLambdaRole | aws-cdk-lib.aws_iam.Role | The IAM Role of the Lambda Function interacting with DataZone API. | +| dataZoneMembership | aws-cdk-lib.aws_datazone.CfnProjectMembership | The membership of the Lambda Role on the DataZone Project. | +| createUpdateEventRule | aws-cdk-lib.aws_events.Rule | The Event Bridge Rule for schema creation and update. | +| deleteEventRule | aws-cdk-lib.aws_events.Rule | The Event Bridge Rule for schema deletion. | +| scheduleRule | aws-cdk-lib.aws_events.Rule | The Event Bridge Rule for trigger the data source execution. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -4562,99 +4604,63 @@ The tree node. --- -##### `mskAclFunction`Optional +##### `datasourceLambdaRole`Required ```typescript -public readonly mskAclFunction: IFunction; +public readonly datasourceLambdaRole: Role; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction +- *Type:* aws-cdk-lib.aws_iam.Role -The Lambda function used by the Custom Resource provider when MSK is using mTLS authentication. +The IAM Role of the Lambda Function interacting with DataZone API. --- -##### `mskAclLogGroup`Optional +##### `dataZoneMembership`Required ```typescript -public readonly mskAclLogGroup: ILogGroup; +public readonly dataZoneMembership: CfnProjectMembership; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_datazone.CfnProjectMembership -The Cloudwatch Log Group used by the Custom Resource provider when MSK is using mTLS authentication. +The membership of the Lambda Role on the DataZone Project. --- -##### `mskAclRole`Optional +##### `createUpdateEventRule`Optional ```typescript -public readonly mskAclRole: IRole; +public readonly createUpdateEventRule: Rule; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* aws-cdk-lib.aws_events.Rule -The IAM Role used by the Custom Resource provider when MSK is using mTLS authentication. +The Event Bridge Rule for schema creation and update. --- -##### `mskAclSecurityGroup`Optional +##### `deleteEventRule`Optional ```typescript -public readonly mskAclSecurityGroup: ISecurityGroup[]; +public readonly deleteEventRule: Rule; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Type:* aws-cdk-lib.aws_events.Rule -The Security Group used by the Custom Resource provider when MSK is using mTLS authentication. +The Event Bridge Rule for schema deletion. --- -##### `mskIamFunction`Optional - -```typescript -public readonly mskIamFunction: IFunction; -``` - -- *Type:* aws-cdk-lib.aws_lambda.IFunction - -The Lambda function used by the Custom Resource provider when MSK is using IAM authentication. - ---- - -##### `mskIamLogGroup`Optional - -```typescript -public readonly mskIamLogGroup: ILogGroup; -``` - -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The Cloudwatch Log Group used by the Custom Resource provider when MSK is using IAM authentication. - ---- - -##### `mskIamRole`Optional - -```typescript -public readonly mskIamRole: IRole; -``` - -- *Type:* aws-cdk-lib.aws_iam.IRole - -The IAM Role used by the Custom Resource provider when MSK is using IAM authentication. - ---- - -##### `mskIamSecurityGroup`Optional +##### `scheduleRule`Optional ```typescript -public readonly mskIamSecurityGroup: ISecurityGroup[]; +public readonly scheduleRule: Rule; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Type:* aws-cdk-lib.aws_events.Rule -The Security Group used by the Custom Resource provider when MSK is using IAM authentication. +The Event Bridge Rule for trigger the data source execution. --- @@ -4662,12 +4668,12 @@ The Security Group used by the Custom Resource provider when MSK is using IAM au | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -4677,7 +4683,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -4687,36 +4693,37 @@ public readonly DSF_TRACKING_CODE: string; --- -### MskProvisioned - -A construct to create an MSK Provisioned cluster. +### DataZoneMskAssetType -> [https://awslabs.github.io/data-solutions-framework-on-aws/](https://awslabs.github.io/data-solutions-framework-on-aws/) +A DataZone custom asset type representing an MSK topic. *Example* ```typescript -const msk = new dsf.streaming.MskProvisioned(this, 'cluster'); +new dsf.governance.DataZoneMskAssetType(this, 'MskAssetType', { + domainId: 'aba_dc999t9ime9sss', + projectId: '999999b3m5cpz', +}); ``` -#### Initializers +#### Initializers ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -new streaming.MskProvisioned(scope: Construct, id: string, props?: MskProvisionedProps) +new governance.DataZoneMskAssetType(scope: Construct, id: string, props: DataZoneMskAssetTypeProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.streaming.MskProvisionedProps | *No description.* | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.governance.DataZoneMskAssetTypeProps | The DataZoneMskAssetTypeProps properties. | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct @@ -4724,7 +4731,7 @@ the Scope of the CDK Construct. --- -##### `id`Required +##### `id`Required - *Type:* string @@ -4732,9 +4739,11 @@ the ID of the CDK Construct. --- -##### `props`Optional +##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskProvisionedProps +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneMskAssetTypeProps + +The DataZoneMskAssetTypeProps properties. --- @@ -4742,20 +4751,12 @@ the ID of the CDK Construct. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| addClusterPolicy | Add a cluster policy. | -| deleteClusterPolicy | *No description.* | -| getBootstrapBrokers | Method to get bootstrap broker connection string based on the authentication mode. | -| grantConsume | Grant a principal permissions to consume from a topic. | -| grantProduce | Grant a principal permissions to produce to a topic. | -| putClusterPolicy | *No description.* | -| retrieveVersion | Retrieve DSF package.json version. | -| setAcl | Creates ACL in the Msk Cluster. | -| setTopic | Creates a topic in the Msk Cluster. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -4763,205 +4764,224 @@ public toString(): string Returns a string representation of this construct. -##### `addClusterPolicy` +##### `retrieveVersion` ```typescript -public addClusterPolicy(policy: PolicyDocument, id: string): CfnClusterPolicy +public retrieveVersion(): any ``` -Add a cluster policy. - -###### `policy`Required +Retrieve DSF package.json version. -- *Type:* aws-cdk-lib.aws_iam.PolicyDocument +#### Static Functions -the IAM principal to grand the consume action. +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | --- -###### `id`Required +##### `isConstruct` -- *Type:* string +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -the CDK id for the Cluster Policy. +governance.DataZoneMskAssetType.isConstruct(x: any) +``` ---- +Checks if `x` is a construct. -##### `deleteClusterPolicy` +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. -```typescript -public deleteClusterPolicy(): void -``` +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. -##### `getBootstrapBrokers` +###### `x`Required -```typescript -public getBootstrapBrokers(authentication: Authentication): string -``` +- *Type:* any -Method to get bootstrap broker connection string based on the authentication mode. +Any object. -###### `authentication`Required +--- -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +#### Properties -the authentication mode. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| mskCustomAssetType | @cdklabs/aws-data-solutions-framework.governance.CustomAssetType | The custom asset type for MSK. | +| owningProject | aws-cdk-lib.aws_datazone.CfnProject | The project owning the MSK asset type. | --- -##### `grantConsume` +##### `node`Required ```typescript -public grantConsume(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource +public readonly node: Node; ``` -Grant a principal permissions to consume from a topic. - -###### `id`Required - -- *Type:* string +- *Type:* constructs.Node -the CDK resource ID. +The tree node. --- -###### `topicName`Required - -- *Type:* string - -the target topic to grant consume permissions on. - ---- +##### `mskCustomAssetType`Required -###### `clientAuthentication`Required +```typescript +public readonly mskCustomAssetType: CustomAssetType; +``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +- *Type:* @cdklabs/aws-data-solutions-framework.governance.CustomAssetType -The authentication mode of the consumer. +The custom asset type for MSK. --- -###### `principal`Required - -- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal - -the principal receiveing grant consume permissions. - ---- +##### `owningProject`Optional -###### `host`Optional +```typescript +public readonly owningProject: CfnProject; +``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_datazone.CfnProject -the host of the consumer. +The project owning the MSK asset type. --- -###### `removalPolicy`Optional - -- *Type:* aws-cdk-lib.RemovalPolicy +#### Constants -the removal policy to apply to the grant. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -###### `customResourceAuthentication`Optional +##### `DSF_OWNED_TAG`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +```typescript +public readonly DSF_OWNED_TAG: string; +``` -The authentication used by the Kafka API admin client to create the ACL. +- *Type:* string --- -##### `grantProduce` +##### `DSF_TRACKING_CODE`Required ```typescript -public grantProduce(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource +public readonly DSF_TRACKING_CODE: string; ``` -Grant a principal permissions to produce to a topic. +- *Type:* string -###### `id`Required +--- -- *Type:* string +### DataZoneMskCentralAuthorizer -the CDK resource ID. +A central authorizer workflow for granting read access to Kafka topics. ---- +The workflow is triggered by an event sent to the DataZone event bus. +First, it collects metadata from DataZone about the Kafka topics. +Then, it grants access to the relevant IAM roles. +Finally acknowledge the subscription grant in DataZone. -###### `topicName`Required +*Example* -- *Type:* string +```typescript +new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', +}); +``` -the target topic to grant produce permissions on. ---- +#### Initializers -###### `clientAuthentication`Required +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +new governance.DataZoneMskCentralAuthorizer(scope: Construct, id: string, props: DataZoneMskCentralAuthorizerProps) +``` -The authentication mode of the producer. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.governance.DataZoneMskCentralAuthorizerProps | The DataZoneMskCentralAuthorizer properties. | --- -###### `principal`Required +##### `scope`Required -- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal +- *Type:* constructs.Construct -the principal receiving grant produce permissions. +the Scope of the CDK Construct. --- -###### `host`Optional +##### `id`Required - *Type:* string -the host of the producer. +the ID of the CDK Construct. --- -###### `removalPolicy`Optional +##### `props`Required -- *Type:* aws-cdk-lib.RemovalPolicy +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneMskCentralAuthorizerProps -the removal policy to apply to the grant. +The DataZoneMskCentralAuthorizer properties. --- -###### `customResourceAuthentication`Optional - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication +#### Methods -The authentication used by the Kafka API admin client to create the ACL. +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| registerAccount | Connect the central authorizer workflow with environment authorizer workflows in other accounts. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `putClusterPolicy` +##### `toString` ```typescript -public putClusterPolicy(policy: string, id: string, currentVersion?: string): void +public toString(): string ``` -###### `policy`Required - -- *Type:* string +Returns a string representation of this construct. ---- +##### `registerAccount` -###### `id`Required +```typescript +public registerAccount(accountId: string): CfnEventBusPolicy +``` -- *Type:* string +Connect the central authorizer workflow with environment authorizer workflows in other accounts. ---- +This method grants the environment workflow to send events in the default Event Bridge bus for orchestration. -###### `currentVersion`Optional +###### `accountId`Required - *Type:* string +The account ID to register the authorizer with. + --- -##### `retrieveVersion` +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -4969,120 +4989,23 @@ public retrieveVersion(): any Retrieve DSF package.json version. -##### `setAcl` - -```typescript -public setAcl(id: string, aclDefinition: Acl, removalPolicy?: RemovalPolicy, clientAuthentication?: Authentication): CustomResource -``` - -Creates ACL in the Msk Cluster. - -###### `id`Required - -- *Type:* string +#### Static Functions -the CDK ID of the ACL. +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | --- -###### `aclDefinition`Required +##### `isConstruct` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Acl +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -the Kafka Acl definition. +governance.DataZoneMskCentralAuthorizer.isConstruct(x: any) +``` ---- - -###### `removalPolicy`Optional - -- *Type:* aws-cdk-lib.RemovalPolicy - -Wether to keep the ACL or delete it when removing the resource from the Stack {@default RemovalPolicy.RETAIN}. - ---- - -###### `clientAuthentication`Optional - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication - -The authentication used by the Kafka API admin client to create the ACL. - ---- - -##### `setTopic` - -```typescript -public setTopic(id: string, clientAuthentication: Authentication, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource -``` - -Creates a topic in the Msk Cluster. - -###### `id`Required - -- *Type:* string - -the CDK ID of the Topic. - ---- - -###### `clientAuthentication`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication - -The authentication used by the Kafka API admin client to create the topic. - ---- - -###### `topicDefinition`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic - -the Kafka topic definition. - ---- - -###### `removalPolicy`Optional - -- *Type:* aws-cdk-lib.RemovalPolicy - -Wether to keep the topic or delete it when removing the resource from the Stack {@default RemovalPolicy.RETAIN}. - ---- - -###### `waitForLeaders`Optional - -- *Type:* boolean - -If this is true it will wait until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. - ---- - -###### `timeout`Optional - -- *Type:* number - -The time in ms to wait for a topic to be completely created on the controller node. - ---- - -#### Static Functions - -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | -| createClusterConfiguration | *No description.* | - ---- - -##### `isConstruct` - -```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' - -streaming.MskProvisioned.isConstruct(x: any) -``` - -Checks if `x` is a construct. +Checks if `x` is a construct. Use this method instead of `instanceof` to properly detect `Construct` instances, even when the construct library is symlinked. @@ -5098,7 +5021,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -5106,91 +5029,26 @@ Any object. --- -##### `createClusterConfiguration` - -```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' - -streaming.MskProvisioned.createClusterConfiguration(scope: Construct, id: string, name: string, serverPropertiesFilePath: string, kafkaVersions?: KafkaVersion[], configurationDescription?: string, latestRevision?: LatestRevisionProperty) -``` - -###### `scope`Required - -- *Type:* constructs.Construct - ---- - -###### `id`Required - -- *Type:* string - ---- - -###### `name`Required - -- *Type:* string - ---- - -###### `serverPropertiesFilePath`Required - -- *Type:* string - ---- - -###### `kafkaVersions`Optional - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion[] - ---- - -###### `configurationDescription`Optional - -- *Type:* string - ---- - -###### `latestRevision`Optional - -- *Type:* aws-cdk-lib.aws_msk.CfnConfiguration.LatestRevisionProperty - ---- - #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| cluster | aws-cdk-lib.aws_msk.CfnCluster | The MSK cluster created by the construct. | -| encryptionAtRestKey | aws-cdk-lib.aws_kms.IKey | The KMS CMK key for encrypting data within the cluster. | -| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC where the MSK cluster is deployed. | -| applyConfigurationFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for applying MSK configuration. | -| applyConfigurationLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for applying MSK configuration. | -| applyConfigurationRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for applying MSK configuration. | -| applyConfigurationSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for applying MSK configuration. | -| brokerLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch log group associated with brokers activity. | -| brokerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The security group associated with the MSK brokers. | -| clusterConfiguration | aws-cdk-lib.aws_msk.CfnConfiguration | The MSK cluster configuration. | -| iamCrudAdminFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for CRUD operations via IAM authentication. | -| iamCrudAdminLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for CRUD operations via IAM authentication. | -| iamCrudAdminRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the Lambda responsible for CRUD operations via IAM authentication. | -| iamCrudAdminSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for CRUD operations via IAM authentication. | -| inClusterAclFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for CRUD operations via mTLS authentication. | -| inClusterAclLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for CRUD operations via mTLS authentication. | -| inClusterAclRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the Lambda responsible for CRUD operations via mTLS authentication. | -| inClusterAclSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for CRUD operations via mTLS authentication. | -| updateConnectivityFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for updating MSK Connectivity. | -| updateConnectivityLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for updating MSK Connectivity. | -| updateConnectivityRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for updating MSK Connectivity. | -| updateConnectivitySecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for updating MSK Connectivity. | -| updateZookepeerFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for updating Zookeeper. | -| updateZookepeerLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for updating Zookeeper. | -| updateZookepeerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for updating Zookeeper. | -| updateZookepeerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | THe Security Group associated to the Lambda responsible for updating Zookeeper. | +| node | constructs.Node | The tree node. | +| callbackEventRule | aws-cdk-lib.aws_events.IRule | The event rule used to listen for producer and subscriber grants callback. | +| callbackFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used to handle producer and subscriber grants callback. | +| callbackRole | aws-cdk-lib.aws_iam.IRole | The role used by the Lambda function handling producer and subscriber grants callback. | +| datazoneCallbackFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used to acknowledge the subscription grant in DataZone. | +| datazoneCallbackRole | aws-cdk-lib.aws_iam.IRole | The role used to acknowledge the subscription grant in DataZone. | +| datazoneEventRole | aws-cdk-lib.aws_iam.IRole | The role used by the DataZone event to trigger the authorizer workflow. | +| datazoneEventRule | aws-cdk-lib.aws_events.IRule | The event rule used to trigger the authorizer workflow. | +| deadLetterQueue | aws-cdk-lib.aws_sqs.IQueue | The dead letter queue for the authorizer workflow. | +| metadataCollectorFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used to collect metadata from DataZone. | +| metadataCollectorRole | aws-cdk-lib.aws_iam.IRole | The role used to collect metadata from DataZone. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The state machine used to orchestrate the authorizer workflow. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -5202,315 +5060,423 @@ The tree node. --- -##### `cluster`Required +##### `callbackEventRule`Required ```typescript -public readonly cluster: CfnCluster; +public readonly callbackEventRule: IRule; ``` -- *Type:* aws-cdk-lib.aws_msk.CfnCluster +- *Type:* aws-cdk-lib.aws_events.IRule -The MSK cluster created by the construct. +The event rule used to listen for producer and subscriber grants callback. --- -##### `encryptionAtRestKey`Required +##### `callbackFunction`Required ```typescript -public readonly encryptionAtRestKey: IKey; +public readonly callbackFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The KMS CMK key for encrypting data within the cluster. +The Lambda function used to handle producer and subscriber grants callback. --- -##### `vpc`Required +##### `callbackRole`Required ```typescript -public readonly vpc: IVpc; +public readonly callbackRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_ec2.IVpc +- *Type:* aws-cdk-lib.aws_iam.IRole -The VPC where the MSK cluster is deployed. +The role used by the Lambda function handling producer and subscriber grants callback. --- -##### `applyConfigurationFunction`Optional +##### `datazoneCallbackFunction`Required ```typescript -public readonly applyConfigurationFunction: IFunction; +public readonly datazoneCallbackFunction: IFunction; ``` - *Type:* aws-cdk-lib.aws_lambda.IFunction -The Lambda function responsible for applying MSK configuration. +The Lambda function used to acknowledge the subscription grant in DataZone. --- -##### `applyConfigurationLogGroup`Optional +##### `datazoneCallbackRole`Required ```typescript -public readonly applyConfigurationLogGroup: ILogGroup; +public readonly datazoneCallbackRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_iam.IRole -The CloudWatch Log Group used by the Lambda responsible for applying MSK configuration. +The role used to acknowledge the subscription grant in DataZone. --- -##### `applyConfigurationRole`Optional +##### `datazoneEventRole`Required ```typescript -public readonly applyConfigurationRole: IRole; +public readonly datazoneEventRole: IRole; ``` - *Type:* aws-cdk-lib.aws_iam.IRole -The IAM Role used by the Lambda responsible for applying MSK configuration. +The role used by the DataZone event to trigger the authorizer workflow. --- -##### `applyConfigurationSecurityGroup`Optional +##### `datazoneEventRule`Required ```typescript -public readonly applyConfigurationSecurityGroup: ISecurityGroup[]; +public readonly datazoneEventRule: IRule; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Type:* aws-cdk-lib.aws_events.IRule -The Security Group used by the Lambda responsible for applying MSK configuration. +The event rule used to trigger the authorizer workflow. --- -##### `brokerLogGroup`Optional +##### `deadLetterQueue`Required ```typescript -public readonly brokerLogGroup: ILogGroup; +public readonly deadLetterQueue: IQueue; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_sqs.IQueue -The CloudWatch log group associated with brokers activity. +The dead letter queue for the authorizer workflow. --- -##### `brokerSecurityGroup`Optional +##### `metadataCollectorFunction`Required ```typescript -public readonly brokerSecurityGroup: ISecurityGroup; +public readonly metadataCollectorFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The security group associated with the MSK brokers. +The Lambda function used to collect metadata from DataZone. --- -##### `clusterConfiguration`Optional +##### `metadataCollectorRole`Required ```typescript -public readonly clusterConfiguration: CfnConfiguration; +public readonly metadataCollectorRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_msk.CfnConfiguration +- *Type:* aws-cdk-lib.aws_iam.IRole -The MSK cluster configuration. +The role used to collect metadata from DataZone. --- -##### `iamCrudAdminFunction`Optional +##### `stateMachine`Required ```typescript -public readonly iamCrudAdminFunction: IFunction; +public readonly stateMachine: StateMachine; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction +- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine -The Lambda function responsible for CRUD operations via IAM authentication. +The state machine used to orchestrate the authorizer workflow. --- -##### `iamCrudAdminLogGroup`Optional - -```typescript -public readonly iamCrudAdminLogGroup: ILogGroup; -``` - -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +#### Constants -The CloudWatch Log Group used by the Lambda responsible for CRUD operations via IAM authentication. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| AUTHORIZER_NAME | string | The name of the authorizer. | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | +| MSK_ASSET_TYPE | string | The asset type for the DataZone custom asset type. | --- -##### `iamCrudAdminRole`Optional +##### `AUTHORIZER_NAME`Required ```typescript -public readonly iamCrudAdminRole: IRole; +public readonly AUTHORIZER_NAME: string; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* string -The IAM role used by the Lambda responsible for CRUD operations via IAM authentication. +The name of the authorizer. --- -##### `iamCrudAdminSecurityGroup`Optional +##### `DSF_OWNED_TAG`Required ```typescript -public readonly iamCrudAdminSecurityGroup: ISecurityGroup[]; +public readonly DSF_OWNED_TAG: string; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] - -The Security Group used by the Lambda responsible for CRUD operations via IAM authentication. +- *Type:* string --- -##### `inClusterAclFunction`Optional +##### `DSF_TRACKING_CODE`Required ```typescript -public readonly inClusterAclFunction: IFunction; +public readonly DSF_TRACKING_CODE: string; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction - -The Lambda function responsible for CRUD operations via mTLS authentication. +- *Type:* string --- -##### `inClusterAclLogGroup`Optional +##### `MSK_ASSET_TYPE`Required ```typescript -public readonly inClusterAclLogGroup: ILogGroup; +public readonly MSK_ASSET_TYPE: string; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* string -The CloudWatch Log Group used by the Lambda responsible for CRUD operations via mTLS authentication. +The asset type for the DataZone custom asset type. --- -##### `inClusterAclRole`Optional +### DataZoneMskEnvironmentAuthorizer -```typescript -public readonly inClusterAclRole: IRole; +An environment authorizer workflow for granting read access to Kafka topics. + +The workflow is triggered by an event sent by the central authorizer construct. +It creates IAM policies required for the Kafka client to access the relevant topics. +It supports MSK provisioned and serverless, in single and cross accounts, and grant/revoke requests. + +*Example* + +```typescript +new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', +}); ``` -- *Type:* aws-cdk-lib.aws_iam.IRole -The IAM role used by the Lambda responsible for CRUD operations via mTLS authentication. +#### Initializers + +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' + +new governance.DataZoneMskEnvironmentAuthorizer(scope: Construct, id: string, props: DataZoneMskEnvironmentAuthorizerProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | The CDK Construct scope. | +| id | string | The CDK Construct id. | +| props | @cdklabs/aws-data-solutions-framework.governance.DataZoneMskEnvironmentAuthorizerProps | The props for the DataZoneMskEnvironmentAuthorizer construct. | --- -##### `inClusterAclSecurityGroup`Optional +##### `scope`Required + +- *Type:* constructs.Construct + +The CDK Construct scope. + +--- + +##### `id`Required + +- *Type:* string + +The CDK Construct id. + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneMskEnvironmentAuthorizerProps + +The props for the DataZoneMskEnvironmentAuthorizer construct. + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` ```typescript -public readonly inClusterAclSecurityGroup: ISecurityGroup[]; +public toString(): string ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +Returns a string representation of this construct. -The Security Group used by the Lambda responsible for CRUD operations via mTLS authentication. +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | --- -##### `updateConnectivityFunction`Optional +##### `isConstruct` ```typescript -public readonly updateConnectivityFunction: IFunction; +import { governance } from '@cdklabs/aws-data-solutions-framework' + +governance.DataZoneMskEnvironmentAuthorizer.isConstruct(x: any) ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction +Checks if `x` is a construct. -The Lambda function responsible for updating MSK Connectivity. +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. --- -##### `updateConnectivityLogGroup`Optional +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| deadLetterQueue | any | The dead letter queue for the events. | +| eventRole | aws-cdk-lib.aws_iam.IRole | The role used by the events to trigger the authorizer workflow. | +| eventRule | aws-cdk-lib.aws_events.IRule | The event rule used to trigger the authorizer workflow. | +| grantFunction | aws-cdk-lib.aws_lambda.IFunction | The lambda function used to grant access to Kafka topics. | +| grantRole | aws-cdk-lib.aws_iam.IRole | The IAM role used to grant access to Kafka topics. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.IStateMachine | The state machine used to orchestrate the authorizer workflow. | +| eventBusPolicy | aws-cdk-lib.aws_events.CfnEventBusPolicy | The event bus policy used to receive events from the central authorizer. | + +--- + +##### `node`Required ```typescript -public readonly updateConnectivityLogGroup: ILogGroup; +public readonly node: Node; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* constructs.Node -The CloudWatch Log Group used by the Lambda responsible for updating MSK Connectivity. +The tree node. --- -##### `updateConnectivityRole`Optional +##### `deadLetterQueue`Required ```typescript -public readonly updateConnectivityRole: IRole; +public readonly deadLetterQueue: any; +``` + +- *Type:* any + +The dead letter queue for the events. + +--- + +##### `eventRole`Required + +```typescript +public readonly eventRole: IRole; ``` - *Type:* aws-cdk-lib.aws_iam.IRole -The IAM Role used by the Lambda responsible for updating MSK Connectivity. +The role used by the events to trigger the authorizer workflow. --- -##### `updateConnectivitySecurityGroup`Optional +##### `eventRule`Required ```typescript -public readonly updateConnectivitySecurityGroup: ISecurityGroup[]; +public readonly eventRule: IRule; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Type:* aws-cdk-lib.aws_events.IRule -The Security Group used by the Lambda responsible for updating MSK Connectivity. +The event rule used to trigger the authorizer workflow. --- -##### `updateZookepeerFunction`Optional +##### `grantFunction`Required ```typescript -public readonly updateZookepeerFunction: IFunction; +public readonly grantFunction: IFunction; ``` - *Type:* aws-cdk-lib.aws_lambda.IFunction -The Lambda function responsible for updating Zookeeper. +The lambda function used to grant access to Kafka topics. --- -##### `updateZookepeerLogGroup`Optional +##### `grantRole`Required ```typescript -public readonly updateZookepeerLogGroup: ILogGroup; +public readonly grantRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_iam.IRole -The CloudWatch Log Group used by the Lambda responsible for updating Zookeeper. +The IAM role used to grant access to Kafka topics. --- -##### `updateZookepeerRole`Optional +##### `stateMachine`Required ```typescript -public readonly updateZookepeerRole: IRole; +public readonly stateMachine: IStateMachine; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* aws-cdk-lib.aws_stepfunctions.IStateMachine -The IAM Role used by the Lambda responsible for updating Zookeeper. +The state machine used to orchestrate the authorizer workflow. --- -##### `updateZookepeerSecurityGroup`Optional +##### `eventBusPolicy`Optional ```typescript -public readonly updateZookepeerSecurityGroup: ISecurityGroup; +public readonly eventBusPolicy: CfnEventBusPolicy; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_events.CfnEventBusPolicy -THe Security Group associated to the Lambda responsible for updating Zookeeper. +The event bus policy used to receive events from the central authorizer. --- @@ -5518,13 +5484,12 @@ THe Security Group associated to the Lambda responsible for updating Zookeeper. | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | -| MSK_DEFAULT_VERSION | @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -5534,7 +5499,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -5544,46 +5509,29 @@ public readonly DSF_TRACKING_CODE: string; --- -##### `MSK_DEFAULT_VERSION`Required - -```typescript -public readonly MSK_DEFAULT_VERSION: KafkaVersion; -``` - -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion - ---- - -### MskServerless +### KafkaApi -A construct to create an MSK Serverless cluster. +A construct to create a Kafka API admin client. > [https://awslabs.github.io/data-solutions-framework-on-aws/](https://awslabs.github.io/data-solutions-framework-on-aws/) -*Example* - -```typescript -const msk = new dsf.streaming.MskServerless(this, 'cluster'); -``` - - -#### Initializers +#### Initializers ```typescript import { streaming } from '@cdklabs/aws-data-solutions-framework' -new streaming.MskServerless(scope: Construct, id: string, props?: MskServerlessProps) +new streaming.KafkaApi(scope: Construct, id: string, props: KafkaApiProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.streaming.MskServerlessProps | *No description.* | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.streaming.KafkaApiProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct @@ -5591,7 +5539,7 @@ the Scope of the CDK Construct. --- -##### `id`Required +##### `id`Required - *Type:* string @@ -5599,9 +5547,9 @@ the ID of the CDK Construct. --- -##### `props`Optional +##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskServerlessProps +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaApiProps --- @@ -5609,16 +5557,16 @@ the ID of the CDK Construct. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| addClusterPolicy | Add a cluster policy. | -| addTopic | Creates a topic in the MSK Serverless. | -| grantConsume | Grant a principal the right to consume data from a topic. | -| grantProduce | Grant a principal to produce data to a topic. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| grantConsume | Grant a principal permissions to consume from a topic. | +| grantProduce | Grant a principal permissions to produce to a topic. | +| retrieveVersion | Retrieve DSF package.json version. | +| setAcl | Creates a ACL in the MSK Cluster. | +| setTopic | Creates a topic in the MSK Cluster. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -5626,127 +5574,135 @@ public toString(): string Returns a string representation of this construct. -##### `addClusterPolicy` +##### `grantConsume` ```typescript -public addClusterPolicy(policy: PolicyDocument, id: string): CfnClusterPolicy +public grantConsume(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource ``` -Add a cluster policy. +Grant a principal permissions to consume from a topic. -###### `policy`Required +###### `id`Required -- *Type:* aws-cdk-lib.aws_iam.PolicyDocument +- *Type:* string -the IAM principal to grand the consume action. +the CDK resource ID. --- -###### `id`Required +###### `topicName`Required - *Type:* string -the CDK id for the Cluster Policy. +the target topic to grant consume permissions on. --- -##### `addTopic` - -```typescript -public addTopic(id: string, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource -``` - -Creates a topic in the MSK Serverless. - -###### `id`Required +###### `clientAuthentication`Required -- *Type:* string +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -the CDK id for the topic. +The authentication mode of the consumer. --- -###### `topicDefinition`Required +###### `principal`Required -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic +- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal -the Kafka topic definition. +the principal receiveing grant consume permissions. --- -###### `removalPolicy`Optional +###### `host`Optional -- *Type:* aws-cdk-lib.RemovalPolicy +- *Type:* string -Wether to keep the topic or delete it when removing the resource from the Stack. +the host of the consumer. --- -###### `waitForLeaders`Optional +###### `removalPolicy`Optional -- *Type:* boolean +- *Type:* aws-cdk-lib.RemovalPolicy -Wait until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. +the removal policy to apply to the grant. --- -###### `timeout`Optional +###### `customResourceAuthentication`Optional -- *Type:* number +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -The time in ms to wait for a topic to be completely created on the controller node. +The authentication used by the Kafka API admin client to create the ACL. --- -##### `grantConsume` +##### `grantProduce` ```typescript -public grantConsume(topicName: string, principal: IPrincipal): CustomResource +public grantProduce(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource ``` -Grant a principal the right to consume data from a topic. +Grant a principal permissions to produce to a topic. -###### `topicName`Required +###### `id`Required - *Type:* string -the topic to which the principal can consume data from. +the CDK resource ID. --- -###### `principal`Required +###### `topicName`Required -- *Type:* aws-cdk-lib.aws_iam.IPrincipal +- *Type:* string -the IAM principal to grand the consume action. +the target topic to grant produce permissions on. --- -##### `grantProduce` +###### `clientAuthentication`Required -```typescript -public grantProduce(topicName: string, principal: IPrincipal): CustomResource -``` +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -Grant a principal to produce data to a topic. +The authentication mode of the producer. -###### `topicName`Required +--- + +###### `principal`Required + +- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal + +the principal receiving grant produce permissions. + +--- + +###### `host`Optional - *Type:* string -the name of the topic to grant producer permissions. +the host of the producer. --- -###### `principal`Required +###### `removalPolicy`Optional -- *Type:* aws-cdk-lib.aws_iam.IPrincipal +- *Type:* aws-cdk-lib.RemovalPolicy -the IAM principal to grand producer permissions. +the removal policy to apply to the grant. --- -##### `retrieveVersion` +###### `customResourceAuthentication`Optional + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication + +The authentication used by the Kafka API admin client to create the ACL. + +--- + +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -5754,20 +5710,116 @@ public retrieveVersion(): any Retrieve DSF package.json version. +##### `setAcl` + +```typescript +public setAcl(id: string, aclDefinition: Acl, removalPolicy?: RemovalPolicy, clientAuthentication?: Authentication): CustomResource +``` + +Creates a ACL in the MSK Cluster. + +###### `id`Required + +- *Type:* string + +the CDK ID of the ACL. + +--- + +###### `aclDefinition`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Acl + +the Kafka ACL definition. + +--- + +###### `removalPolicy`Optional + +- *Type:* aws-cdk-lib.RemovalPolicy + +Wether to keep the ACL or delete it when removing the resource from the Stack. + +--- + +###### `clientAuthentication`Optional + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication + +The authentication used by the Kafka API admin client to create the ACL. + +--- + +##### `setTopic` + +```typescript +public setTopic(id: string, clientAuthentication: Authentication, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource +``` + +Creates a topic in the MSK Cluster. + +###### `id`Required + +- *Type:* string + +the CDK ID for Topic. + +--- + +###### `clientAuthentication`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication + +The authentication used by the Kafka API admin client to create the topic. + +--- + +###### `topicDefinition`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic + +the Kafka topic definition. + +--- + +###### `removalPolicy`Optional + +- *Type:* aws-cdk-lib.RemovalPolicy + +Wether to keep the topic or delete it when removing the resource from the Stack. + +--- + +###### `waitForLeaders`Optional + +- *Type:* boolean + +If set to true, waits until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. + +--- + +###### `timeout`Optional + +- *Type:* number + +The time in ms to wait for a topic to be completely created on the controller node. + +--- + #### Static Functions | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript import { streaming } from '@cdklabs/aws-data-solutions-framework' -streaming.MskServerless.isConstruct(x: any) +streaming.KafkaApi.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -5786,7 +5838,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -5798,16 +5850,19 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| cluster | aws-cdk-lib.aws_msk.CfnServerlessCluster | *No description.* | -| clusterName | string | *No description.* | -| lambdaSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | *No description.* | -| vpc | aws-cdk-lib.aws_ec2.IVpc | *No description.* | -| brokerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | *No description.* | +| node | constructs.Node | The tree node. | +| mskAclFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used by the Custom Resource provider when MSK is using mTLS authentication. | +| mskAclLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The Cloudwatch Log Group used by the Custom Resource provider when MSK is using mTLS authentication. | +| mskAclRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Custom Resource provider when MSK is using mTLS authentication. | +| mskAclSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Custom Resource provider when MSK is using mTLS authentication. | +| mskIamFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function used by the Custom Resource provider when MSK is using IAM authentication. | +| mskIamLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The Cloudwatch Log Group used by the Custom Resource provider when MSK is using IAM authentication. | +| mskIamRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Custom Resource provider when MSK is using IAM authentication. | +| mskIamSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Custom Resource provider when MSK is using IAM authentication. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -5819,53 +5874,99 @@ The tree node. --- -##### `cluster`Required +##### `mskAclFunction`Optional ```typescript -public readonly cluster: CfnServerlessCluster; +public readonly mskAclFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_msk.CfnServerlessCluster +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function used by the Custom Resource provider when MSK is using mTLS authentication. --- -##### `clusterName`Required +##### `mskAclLogGroup`Optional ```typescript -public readonly clusterName: string; +public readonly mskAclLogGroup: ILogGroup; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The Cloudwatch Log Group used by the Custom Resource provider when MSK is using mTLS authentication. --- -##### `lambdaSecurityGroup`Required +##### `mskAclRole`Optional ```typescript -public readonly lambdaSecurityGroup: ISecurityGroup; +public readonly mskAclRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used by the Custom Resource provider when MSK is using mTLS authentication. --- -##### `vpc`Required +##### `mskAclSecurityGroup`Optional ```typescript -public readonly vpc: IVpc; +public readonly mskAclSecurityGroup: ISecurityGroup[]; ``` -- *Type:* aws-cdk-lib.aws_ec2.IVpc +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] + +The Security Group used by the Custom Resource provider when MSK is using mTLS authentication. --- -##### `brokerSecurityGroup`Optional +##### `mskIamFunction`Optional ```typescript -public readonly brokerSecurityGroup: ISecurityGroup; +public readonly mskIamFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function used by the Custom Resource provider when MSK is using IAM authentication. + +--- + +##### `mskIamLogGroup`Optional + +```typescript +public readonly mskIamLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The Cloudwatch Log Group used by the Custom Resource provider when MSK is using IAM authentication. + +--- + +##### `mskIamRole`Optional + +```typescript +public readonly mskIamRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used by the Custom Resource provider when MSK is using IAM authentication. + +--- + +##### `mskIamSecurityGroup`Optional + +```typescript +public readonly mskIamSecurityGroup: ISecurityGroup[]; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] + +The Security Group used by the Custom Resource provider when MSK is using IAM authentication. --- @@ -5873,12 +5974,12 @@ public readonly brokerSecurityGroup: ISecurityGroup; | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -5888,7 +5989,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -5898,68 +5999,54 @@ public readonly DSF_TRACKING_CODE: string; --- -### OpenSearchCluster +### MskProvisioned -A construct to provision Amazon OpenSearch Cluster and OpenSearch Dashboards. +A construct to create an MSK Provisioned cluster. -Uses IAM Identity Center SAML authentication. -If OpenSearch cluster is deployed in vpc created using DataVpc construct, -ClientVPNEndpoint will be provisioned automatically for secure access to OpenSearch Dashboards. +> [https://awslabs.github.io/data-solutions-framework-on-aws/](https://awslabs.github.io/data-solutions-framework-on-aws/) *Example* ```typescript - const osCluster = new dsf.consumption.OpenSearchCluster(this, 'MyOpenSearchCluster',{ - domainName:"mycluster1", - samlEntityId:'', - samlMetadataContent:'', - samlMasterBackendRole:'', - deployInVpc:true, - removalPolicy:cdk.RemovalPolicy.DESTROY - }); - - osCluster.addRoleMapping('DashBoardUser', 'dashboards_user',''); - osCluster.addRoleMapping('ReadAllRole', 'readall',''); +const msk = new dsf.streaming.MskProvisioned(this, 'cluster'); ``` -#### Initializers +#### Initializers ```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +import { streaming } from '@cdklabs/aws-data-solutions-framework' -new consumption.OpenSearchCluster(scope: Construct, id: string, props: OpenSearchClusterProps) +new streaming.MskProvisioned(scope: Construct, id: string, props?: MskProvisionedProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the AWS CDK Construct. | -| id | string | the ID of the AWS CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.consumption.OpenSearchClusterProps | the OpenSearchCluster [properties]{@link OpenSearchClusterProps}. | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.streaming.MskProvisionedProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct -the Scope of the AWS CDK Construct. +the Scope of the CDK Construct. --- -##### `id`Required +##### `id`Required - *Type:* string -the ID of the AWS CDK Construct. +the ID of the CDK Construct. --- -##### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.OpenSearchClusterProps +##### `props`Optional -the OpenSearchCluster [properties]{@link OpenSearchClusterProps}. +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskProvisionedProps --- @@ -5967,14 +6054,20 @@ the OpenSearchCluster [properties]{@link OpenSearchClusterProps}. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| addRoleMapping | *No description.* | -| callOpenSearchApi | Calls OpenSearch API using custom resource. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| addClusterPolicy | Add a cluster policy. | +| deleteClusterPolicy | *No description.* | +| getBootstrapBrokers | Method to get bootstrap broker connection string based on the authentication mode. | +| grantConsume | Grant a principal permissions to consume from a topic. | +| grantProduce | Grant a principal permissions to produce to a topic. | +| putClusterPolicy | *No description.* | +| retrieveVersion | Retrieve DSF package.json version. | +| setAcl | Creates ACL in the Msk Cluster. | +| setTopic | Creates a topic in the Msk Cluster. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -5982,349 +6075,323 @@ public toString(): string Returns a string representation of this construct. -##### `addRoleMapping` +##### `addClusterPolicy` ```typescript -public addRoleMapping(id: string, name: string, role: string, persist?: boolean): CustomResource +public addClusterPolicy(policy: PolicyDocument, id: string): CfnClusterPolicy ``` -> [https://opensearch.org/docs/2.9/security/access-control/users-roles/#predefined-roles](https://opensearch.org/docs/2.9/security/access-control/users-roles/#predefined-roles) +Add a cluster policy. -###### `id`Required +###### `policy`Required -- *Type:* string +- *Type:* aws-cdk-lib.aws_iam.PolicyDocument -The CDK resource ID. +the IAM principal to grand the consume action. --- -###### `name`Required +###### `id`Required - *Type:* string -OpenSearch role name. +the CDK id for the Cluster Policy. --- -###### `role`Required - -- *Type:* string +##### `deleteClusterPolicy` -list of IAM roles. +```typescript +public deleteClusterPolicy(): void +``` -For IAM Identity center provide SAML group Id as a role +##### `getBootstrapBrokers` ---- +```typescript +public getBootstrapBrokers(authentication: Authentication): string +``` -###### `persist`Optional +Method to get bootstrap broker connection string based on the authentication mode. -- *Type:* boolean +###### `authentication`Required -Set to true if you want to prevent the roles to be ovewritten by subsequent PUT API calls. +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -Default false. +the authentication mode. --- -##### `callOpenSearchApi` +##### `grantConsume` ```typescript -public callOpenSearchApi(id: string, apiPath: string, body: any, method?: string): CustomResource +public grantConsume(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource ``` -Calls OpenSearch API using custom resource. +Grant a principal permissions to consume from a topic. -###### `id`Required +###### `id`Required - *Type:* string -The CDK resource ID. +the CDK resource ID. --- -###### `apiPath`Required +###### `topicName`Required - *Type:* string -OpenSearch API path. +the target topic to grant consume permissions on. --- -###### `body`Required +###### `clientAuthentication`Required -- *Type:* any +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -OpenSearch API request body. +The authentication mode of the consumer. --- -###### `method`Optional +###### `principal`Required -- *Type:* string +- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal -Opensearch API method,. +the principal receiveing grant consume permissions. --- -##### `retrieveVersion` +###### `host`Optional -```typescript -public retrieveVersion(): any -``` +- *Type:* string -Retrieve DSF package.json version. +the host of the consumer. -#### Static Functions +--- -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | +###### `removalPolicy`Optional + +- *Type:* aws-cdk-lib.RemovalPolicy + +the removal policy to apply to the grant. --- -##### `isConstruct` +###### `customResourceAuthentication`Optional -```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -consumption.OpenSearchCluster.isConstruct(x: any) -``` +The authentication used by the Kafka API admin client to create the ACL. -Checks if `x` is a construct. +--- -Use this method instead of `instanceof` to properly detect `Construct` -instances, even when the construct library is symlinked. +##### `grantProduce` -Explanation: in JavaScript, multiple copies of the `constructs` library on -disk are seen as independent, completely different libraries. As a -consequence, the class `Construct` in each copy of the `constructs` library -is seen as a different class, and an instance of one class will not test as -`instanceof` the other class. `npm install` will not create installations -like this, but users may manually symlink construct libraries together or -use a monorepo tool: in those cases, multiple copies of the `constructs` -library can be accidentally installed, and `instanceof` will behave -unpredictably. It is safest to avoid using `instanceof`, and using -this type-testing method instead. +```typescript +public grantProduce(id: string, topicName: string, clientAuthentication: Authentication, principal: string | IPrincipal, host?: string, removalPolicy?: RemovalPolicy, customResourceAuthentication?: Authentication): CustomResource +``` -###### `x`Required +Grant a principal permissions to produce to a topic. -- *Type:* any +###### `id`Required -Any object. +- *Type:* string + +the CDK resource ID. --- -#### Properties +###### `topicName`Required -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| node | constructs.Node | The tree node. | -| domain | aws-cdk-lib.aws_opensearchservice.IDomain | OpenSearchCluster domain. | -| encryptionKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt data and logs. | -| logGroup | aws-cdk-lib.aws_logs.ILogGroup | CloudWatch Logs Log Group to store OpenSearch cluster logs. | -| masterRole | aws-cdk-lib.aws_iam.IRole | IAM Role used to provision and configure OpenSearch domain. | -| vpc | aws-cdk-lib.aws_ec2.IVpc | VPC OpenSearch cluster is provisioned in. | +- *Type:* string ---- +the target topic to grant produce permissions on. -##### `node`Required +--- -```typescript -public readonly node: Node; -``` +###### `clientAuthentication`Required -- *Type:* constructs.Node +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -The tree node. +The authentication mode of the producer. --- -##### `domain`Required - -```typescript -public readonly domain: IDomain; -``` +###### `principal`Required -- *Type:* aws-cdk-lib.aws_opensearchservice.IDomain +- *Type:* string | aws-cdk-lib.aws_iam.IPrincipal -OpenSearchCluster domain. +the principal receiving grant produce permissions. --- -##### `encryptionKey`Required - -```typescript -public readonly encryptionKey: IKey; -``` +###### `host`Optional -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* string -The KMS Key used to encrypt data and logs. +the host of the producer. --- -##### `logGroup`Required - -```typescript -public readonly logGroup: ILogGroup; -``` +###### `removalPolicy`Optional -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.RemovalPolicy -CloudWatch Logs Log Group to store OpenSearch cluster logs. +the removal policy to apply to the grant. --- -##### `masterRole`Required - -```typescript -public readonly masterRole: IRole; -``` +###### `customResourceAuthentication`Optional -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -IAM Role used to provision and configure OpenSearch domain. +The authentication used by the Kafka API admin client to create the ACL. --- -##### `vpc`Optional +##### `putClusterPolicy` ```typescript -public readonly vpc: IVpc; +public putClusterPolicy(policy: string, id: string, currentVersion?: string): void ``` -- *Type:* aws-cdk-lib.aws_ec2.IVpc +###### `policy`Required -VPC OpenSearch cluster is provisioned in. +- *Type:* string --- -#### Constants +###### `id`Required -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +- *Type:* string --- -##### `DSF_OWNED_TAG`Required - -```typescript -public readonly DSF_OWNED_TAG: string; -``` +###### `currentVersion`Optional - *Type:* string --- -##### `DSF_TRACKING_CODE`Required +##### `retrieveVersion` ```typescript -public readonly DSF_TRACKING_CODE: string; +public retrieveVersion(): any ``` -- *Type:* string +Retrieve DSF package.json version. ---- +##### `setAcl` -### PySparkApplicationPackage +```typescript +public setAcl(id: string, aclDefinition: Acl, removalPolicy?: RemovalPolicy, clientAuthentication?: Authentication): CustomResource +``` -A construct that takes your PySpark application, packages its virtual environment and uploads it along its entrypoint to an Amazon S3 bucket This construct requires Docker daemon installed locally to run. +Creates ACL in the Msk Cluster. -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/pyspark-application-package](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/pyspark-application-package) +###### `id`Required -*Example* +- *Type:* string -```typescript -let pysparkPacker = new dsf.processing.PySparkApplicationPackage (this, 'pysparkPacker', { - applicationName: 'my-pyspark', - entrypointPath: '/Users/my-user/my-spark-job/app/app-pyspark.py', - dependenciesFolder: '/Users/my-user/my-spark-job/app', - removalPolicy: cdk.RemovalPolicy.DESTROY, -}); -``` +the CDK ID of the ACL. +--- -#### Initializers +###### `aclDefinition`Required -```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Acl -new processing.PySparkApplicationPackage(scope: Construct, id: string, props: PySparkApplicationPackageProps) -``` +the Kafka Acl definition. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.processing.PySparkApplicationPackageProps | {@link PySparkApplicationPackageProps}. | +--- + +###### `removalPolicy`Optional + +- *Type:* aws-cdk-lib.RemovalPolicy + +Wether to keep the ACL or delete it when removing the resource from the Stack {@default RemovalPolicy.RETAIN}. --- -##### `scope`Required +###### `clientAuthentication`Optional -- *Type:* constructs.Construct +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -the Scope of the CDK Construct. +The authentication used by the Kafka API admin client to create the ACL. --- -##### `id`Required +##### `setTopic` + +```typescript +public setTopic(id: string, clientAuthentication: Authentication, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource +``` + +Creates a topic in the Msk Cluster. + +###### `id`Required - *Type:* string -the ID of the CDK Construct. +the CDK ID of the Topic. --- -##### `props`Required +###### `clientAuthentication`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.PySparkApplicationPackageProps +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.Authentication -{@link PySparkApplicationPackageProps}. +The authentication used by the Kafka API admin client to create the topic. --- -#### Methods +###### `topicDefinition`Required -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic + +the Kafka topic definition. --- -##### `toString` +###### `removalPolicy`Optional -```typescript -public toString(): string -``` +- *Type:* aws-cdk-lib.RemovalPolicy -Returns a string representation of this construct. +Wether to keep the topic or delete it when removing the resource from the Stack {@default RemovalPolicy.RETAIN}. -##### `retrieveVersion` +--- -```typescript -public retrieveVersion(): any -``` +###### `waitForLeaders`Optional -Retrieve DSF package.json version. +- *Type:* boolean + +If this is true it will wait until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. + +--- + +###### `timeout`Optional + +- *Type:* number + +The time in ms to wait for a topic to be completely created on the controller node. + +--- #### Static Functions | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | +| createClusterConfiguration | *No description.* | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { streaming } from '@cdklabs/aws-data-solutions-framework' -processing.PySparkApplicationPackage.isConstruct(x: any) +streaming.MskProvisioned.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -6343,7 +6410,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -6351,22 +6418,91 @@ Any object. --- +##### `createClusterConfiguration` + +```typescript +import { streaming } from '@cdklabs/aws-data-solutions-framework' + +streaming.MskProvisioned.createClusterConfiguration(scope: Construct, id: string, name: string, serverPropertiesFilePath: string, kafkaVersions?: KafkaVersion[], configurationDescription?: string, latestRevision?: LatestRevisionProperty) +``` + +###### `scope`Required + +- *Type:* constructs.Construct + +--- + +###### `id`Required + +- *Type:* string + +--- + +###### `name`Required + +- *Type:* string + +--- + +###### `serverPropertiesFilePath`Required + +- *Type:* string + +--- + +###### `kafkaVersions`Optional + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion[] + +--- + +###### `configurationDescription`Optional + +- *Type:* string + +--- + +###### `latestRevision`Optional + +- *Type:* aws-cdk-lib.aws_msk.CfnConfiguration.LatestRevisionProperty + +--- + #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| artifactsBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the artifacts (entrypoint and virtual environment archive). | -| assetUploadManagedPolicy | aws-cdk-lib.aws_iam.IManagedPolicy | The IAM Managed Policy used by the custom resource for the assets deployment. | -| assetUploadRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the BucketDeployment to upload the artifacts to an s3 bucket. | -| entrypointUri | string | The location (generally it's an S3 URI) where the entry point is saved. | -| artifactsAccessLogsBucket | @cdklabs/aws-data-solutions-framework.storage.AccessLogsBucket | The access logs bucket to log accesses on the artifacts bucket. | -| sparkVenvConf | string | The Spark Config containing the configuration of virtual environment archive with all dependencies. | -| venvArchiveUri | string | The location (generally an S3 URI) where the archive of the Python virtual environment with all dependencies is stored. | +| node | constructs.Node | The tree node. | +| cluster | aws-cdk-lib.aws_msk.CfnCluster | The MSK cluster created by the construct. | +| encryptionAtRestKey | aws-cdk-lib.aws_kms.IKey | The KMS CMK key for encrypting data within the cluster. | +| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC where the MSK cluster is deployed. | +| applyConfigurationFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for applying MSK configuration. | +| applyConfigurationLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for applying MSK configuration. | +| applyConfigurationRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for applying MSK configuration. | +| applyConfigurationSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for applying MSK configuration. | +| brokerLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch log group associated with brokers activity. | +| brokerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The security group associated with the MSK brokers. | +| clusterConfiguration | aws-cdk-lib.aws_msk.CfnConfiguration | The MSK cluster configuration. | +| iamCrudAdminFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for CRUD operations via IAM authentication. | +| iamCrudAdminLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for CRUD operations via IAM authentication. | +| iamCrudAdminRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the Lambda responsible for CRUD operations via IAM authentication. | +| iamCrudAdminSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for CRUD operations via IAM authentication. | +| inClusterAclFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for CRUD operations via mTLS authentication. | +| inClusterAclLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for CRUD operations via mTLS authentication. | +| inClusterAclRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the Lambda responsible for CRUD operations via mTLS authentication. | +| inClusterAclSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for CRUD operations via mTLS authentication. | +| updateConnectivityFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for updating MSK Connectivity. | +| updateConnectivityLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for updating MSK Connectivity. | +| updateConnectivityRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for updating MSK Connectivity. | +| updateConnectivitySecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The Security Group used by the Lambda responsible for updating MSK Connectivity. | +| updateZookepeerFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function responsible for updating Zookeeper. | +| updateZookepeerLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the Lambda responsible for updating Zookeeper. | +| updateZookepeerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Lambda responsible for updating Zookeeper. | +| updateZookepeerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | THe Security Group associated to the Lambda responsible for updating Zookeeper. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -6378,596 +6514,572 @@ The tree node. --- -##### `artifactsBucket`Required +##### `cluster`Required ```typescript -public readonly artifactsBucket: IBucket; +public readonly cluster: CfnCluster; ``` -- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Type:* aws-cdk-lib.aws_msk.CfnCluster -The S3 Bucket for storing the artifacts (entrypoint and virtual environment archive). +The MSK cluster created by the construct. --- -##### `assetUploadManagedPolicy`Required +##### `encryptionAtRestKey`Required ```typescript -public readonly assetUploadManagedPolicy: IManagedPolicy; +public readonly encryptionAtRestKey: IKey; ``` -- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy +- *Type:* aws-cdk-lib.aws_kms.IKey -The IAM Managed Policy used by the custom resource for the assets deployment. +The KMS CMK key for encrypting data within the cluster. --- -##### `assetUploadRole`Required +##### `vpc`Required ```typescript -public readonly assetUploadRole: IRole; +public readonly vpc: IVpc; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole - -The IAM Role used by the BucketDeployment to upload the artifacts to an s3 bucket. +- *Type:* aws-cdk-lib.aws_ec2.IVpc -In case you provide your own S3 Bucket for storing the artifacts (entrypoint and virtual environment archive), -you must provide S3 write access to this role to upload the artifacts. +The VPC where the MSK cluster is deployed. --- -##### `entrypointUri`Required +##### `applyConfigurationFunction`Optional ```typescript -public readonly entrypointUri: string; +public readonly applyConfigurationFunction: IFunction; ``` -- *Type:* string - -The location (generally it's an S3 URI) where the entry point is saved. +- *Type:* aws-cdk-lib.aws_lambda.IFunction -You can pass this location to your Spark job. +The Lambda function responsible for applying MSK configuration. --- -##### `artifactsAccessLogsBucket`Optional +##### `applyConfigurationLogGroup`Optional ```typescript -public readonly artifactsAccessLogsBucket: AccessLogsBucket; +public readonly applyConfigurationLogGroup: ILogGroup; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.storage.AccessLogsBucket +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -The access logs bucket to log accesses on the artifacts bucket. +The CloudWatch Log Group used by the Lambda responsible for applying MSK configuration. --- -##### `sparkVenvConf`Optional +##### `applyConfigurationRole`Optional ```typescript -public readonly sparkVenvConf: string; +public readonly applyConfigurationRole: IRole; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_iam.IRole -The Spark Config containing the configuration of virtual environment archive with all dependencies. +The IAM Role used by the Lambda responsible for applying MSK configuration. --- -##### `venvArchiveUri`Optional +##### `applyConfigurationSecurityGroup`Optional ```typescript -public readonly venvArchiveUri: string; +public readonly applyConfigurationSecurityGroup: ISecurityGroup[]; ``` -- *Type:* string - -The location (generally an S3 URI) where the archive of the Python virtual environment with all dependencies is stored. +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] -You can pass this location to your Spark job. +The Security Group used by the Lambda responsible for applying MSK configuration. --- -#### Constants +##### `brokerLogGroup`Optional -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| ARTIFACTS_PREFIX | string | The prefix used to store artifacts on the artifact bucket. | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +```typescript +public readonly brokerLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch log group associated with brokers activity. --- -##### `ARTIFACTS_PREFIX`Required +##### `brokerSecurityGroup`Optional ```typescript -public readonly ARTIFACTS_PREFIX: string; +public readonly brokerSecurityGroup: ISecurityGroup; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -The prefix used to store artifacts on the artifact bucket. +The security group associated with the MSK brokers. --- -##### `DSF_OWNED_TAG`Required +##### `clusterConfiguration`Optional ```typescript -public readonly DSF_OWNED_TAG: string; +public readonly clusterConfiguration: CfnConfiguration; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_msk.CfnConfiguration + +The MSK cluster configuration. --- -##### `DSF_TRACKING_CODE`Required +##### `iamCrudAdminFunction`Optional ```typescript -public readonly DSF_TRACKING_CODE: string; +public readonly iamCrudAdminFunction: IFunction; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function responsible for CRUD operations via IAM authentication. --- -### RedshiftData +##### `iamCrudAdminLogGroup`Optional -Creates an asynchronous custom resource that handles the execution of SQL using Redshift's Data API. +```typescript +public readonly iamCrudAdminLogGroup: ILogGroup; +``` -If `vpc` and `vpcSubnets` are passed, this construct would also create the Redshift Data Interface VPC endpoint and configure the custom resource in the same VPC subnet. +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -*Example* +The CloudWatch Log Group used by the Lambda responsible for CRUD operations via IAM authentication. -```typescript -const namespace = new dsf.consumption.RedshiftServerlessNamespace(this, 'RedshiftNamespace', { - name: "default", - dbName: 'defaultdb', -}); +--- -const workgroup = new dsf.consumption.RedshiftServerlessWorkgroup(this, "RedshiftWorkgroup", { - name: "redshift-workgroup", - namespace: namespace, -}); +##### `iamCrudAdminRole`Optional -const rsData = workgroup.accessData('DataApi'); -rsData.createDbRole("EngineeringRole", "defaultdb", "engineering"); +```typescript +public readonly iamCrudAdminRole: IRole; ``` +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role used by the Lambda responsible for CRUD operations via IAM authentication. -#### Initializers +--- -```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +##### `iamCrudAdminSecurityGroup`Optional -new consumption.RedshiftData(scope: Construct, id: string, props: RedshiftDataProps) +```typescript +public readonly iamCrudAdminSecurityGroup: ISecurityGroup[]; ``` -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataProps | *No description.* | +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] ---- +The Security Group used by the Lambda responsible for CRUD operations via IAM authentication. -##### `scope`Required +--- -- *Type:* constructs.Construct +##### `inClusterAclFunction`Optional ---- +```typescript +public readonly inClusterAclFunction: IFunction; +``` -##### `id`Required +- *Type:* aws-cdk-lib.aws_lambda.IFunction -- *Type:* string +The Lambda function responsible for CRUD operations via mTLS authentication. --- -##### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataProps +##### `inClusterAclLogGroup`Optional ---- +```typescript +public readonly inClusterAclLogGroup: ILogGroup; +``` -#### Methods +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | -| assignDbRolesToIAMRole | Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. | -| createDbRole | Creates a new DB role. | -| grantDbAllPrivilegesToRole | Grants both read and write permissions on all the tables in the `schema` to the DB role. | -| grantDbSchemaToRole | Grants access to the schema to the DB role. | -| grantSchemaReadToRole | Grants read permission on all the tables in the `schema` to the DB role. | -| ingestData | Ingest data from S3 into a Redshift table. | -| mergeToTargetTable | Run the `MERGE` query using simplified mode. | -| runCustomSQL | Runs a custom SQL. | +The CloudWatch Log Group used by the Lambda responsible for CRUD operations via mTLS authentication. --- -##### `toString` +##### `inClusterAclRole`Optional ```typescript -public toString(): string +public readonly inClusterAclRole: IRole; ``` -Returns a string representation of this construct. +- *Type:* aws-cdk-lib.aws_iam.IRole -##### `retrieveVersion` +The IAM role used by the Lambda responsible for CRUD operations via mTLS authentication. + +--- + +##### `inClusterAclSecurityGroup`Optional ```typescript -public retrieveVersion(): any +public readonly inClusterAclSecurityGroup: ISecurityGroup[]; ``` -Retrieve DSF package.json version. +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] -##### `assignDbRolesToIAMRole` +The Security Group used by the Lambda responsible for CRUD operations via mTLS authentication. + +--- + +##### `updateConnectivityFunction`Optional ```typescript -public assignDbRolesToIAMRole(dbRoles: string[], targetRole: IRole): void +public readonly updateConnectivityFunction: IFunction; ``` -Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. +- *Type:* aws-cdk-lib.aws_lambda.IFunction -###### `dbRoles`Required +The Lambda function responsible for updating MSK Connectivity. -- *Type:* string[] +--- -List of Redshift DB roles to assign to IAM role. +##### `updateConnectivityLogGroup`Optional + +```typescript +public readonly updateConnectivityLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group used by the Lambda responsible for updating MSK Connectivity. --- -###### `targetRole`Required +##### `updateConnectivityRole`Optional + +```typescript +public readonly updateConnectivityRole: IRole; +``` - *Type:* aws-cdk-lib.aws_iam.IRole -The IAM role to assign the Redshift DB roles to. +The IAM Role used by the Lambda responsible for updating MSK Connectivity. --- -##### `createDbRole` +##### `updateConnectivitySecurityGroup`Optional ```typescript -public createDbRole(id: string, databaseName: string, roleName: string): CustomResource +public readonly updateConnectivitySecurityGroup: ISecurityGroup[]; ``` -Creates a new DB role. - -###### `id`Required - -- *Type:* string +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] -The CDK Construct ID. +The Security Group used by the Lambda responsible for updating MSK Connectivity. --- -###### `databaseName`Required +##### `updateZookepeerFunction`Optional -- *Type:* string +```typescript +public readonly updateZookepeerFunction: IFunction; +``` -The name of the database to run this command. +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function responsible for updating Zookeeper. --- -###### `roleName`Required +##### `updateZookepeerLogGroup`Optional -- *Type:* string +```typescript +public readonly updateZookepeerLogGroup: ILogGroup; +``` -The name of the role to create. +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group used by the Lambda responsible for updating Zookeeper. --- -##### `grantDbAllPrivilegesToRole` +##### `updateZookepeerRole`Optional ```typescript -public grantDbAllPrivilegesToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource +public readonly updateZookepeerRole: IRole; ``` -Grants both read and write permissions on all the tables in the `schema` to the DB role. - -###### `id`Required - -- *Type:* string +- *Type:* aws-cdk-lib.aws_iam.IRole -The CDK Construct ID. +The IAM Role used by the Lambda responsible for updating Zookeeper. --- -###### `databaseName`Required +##### `updateZookepeerSecurityGroup`Optional -- *Type:* string +```typescript +public readonly updateZookepeerSecurityGroup: ISecurityGroup; +``` -The name of the database to run this command. +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup ---- +THe Security Group associated to the Lambda responsible for updating Zookeeper. -###### `schema`Required +--- -- *Type:* string +#### Constants -The schema where the tables are located in. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | +| MSK_DEFAULT_VERSION | @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion | *No description.* | --- -###### `roleName`Required +##### `DSF_OWNED_TAG`Required -- *Type:* string +```typescript +public readonly DSF_OWNED_TAG: string; +``` -The DB role to grant the permissions to. +- *Type:* string --- -##### `grantDbSchemaToRole` +##### `DSF_TRACKING_CODE`Required ```typescript -public grantDbSchemaToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource +public readonly DSF_TRACKING_CODE: string; ``` -Grants access to the schema to the DB role. - -###### `id`Required - - *Type:* string -The CDK Construct ID. - --- -###### `databaseName`Required +##### `MSK_DEFAULT_VERSION`Required -- *Type:* string +```typescript +public readonly MSK_DEFAULT_VERSION: KafkaVersion; +``` -The name of the database to run this command. +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.KafkaVersion --- -###### `schema`Required - -- *Type:* string - -The schema where the tables are located in. +### MskServerless ---- +A construct to create an MSK Serverless cluster. -###### `roleName`Required +> [https://awslabs.github.io/data-solutions-framework-on-aws/](https://awslabs.github.io/data-solutions-framework-on-aws/) -- *Type:* string +*Example* -The DB role to grant the permissions to. +```typescript +const msk = new dsf.streaming.MskServerless(this, 'cluster'); +``` ---- -##### `grantSchemaReadToRole` +#### Initializers ```typescript -public grantSchemaReadToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource -``` - -Grants read permission on all the tables in the `schema` to the DB role. +import { streaming } from '@cdklabs/aws-data-solutions-framework' -###### `id`Required +new streaming.MskServerless(scope: Construct, id: string, props?: MskServerlessProps) +``` -- *Type:* string +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.streaming.MskServerlessProps | *No description.* | --- -###### `databaseName`Required +##### `scope`Required -- *Type:* string +- *Type:* constructs.Construct -The name of the database to run this command. +the Scope of the CDK Construct. --- -###### `schema`Required +##### `id`Required - *Type:* string -The schema where the tables are located in. +the ID of the CDK Construct. --- -###### `roleName`Required +##### `props`Optional -- *Type:* string +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskServerlessProps -The DB role to grant the permissions to. +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| addClusterPolicy | Add a cluster policy. | +| addTopic | Creates a topic in the MSK Serverless. | +| grantConsume | Grant a principal the right to consume data from a topic. | +| grantProduce | Grant a principal to produce data to a topic. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `ingestData` +##### `toString` ```typescript -public ingestData(id: string, databaseName: string, targetTable: string, sourceBucket: IBucket, sourcePrefix: string, ingestAdditionalOptions?: string, role?: IRole): CustomResource +public toString(): string ``` -Ingest data from S3 into a Redshift table. - -###### `id`Required +Returns a string representation of this construct. -- *Type:* string +##### `addClusterPolicy` -The CDK Construct ID. +```typescript +public addClusterPolicy(policy: PolicyDocument, id: string): CfnClusterPolicy +``` ---- +Add a cluster policy. -###### `databaseName`Required +###### `policy`Required -- *Type:* string +- *Type:* aws-cdk-lib.aws_iam.PolicyDocument -The name of the database to run this command. +the IAM principal to grand the consume action. --- -###### `targetTable`Required +###### `id`Required - *Type:* string -The target table to load the data into. - ---- - -###### `sourceBucket`Required - -- *Type:* aws-cdk-lib.aws_s3.IBucket - -The bucket where the source data would be coming from. +the CDK id for the Cluster Policy. --- -###### `sourcePrefix`Required - -- *Type:* string +##### `addTopic` -The location inside the bucket where the data would be ingested from. +```typescript +public addTopic(id: string, topicDefinition: MskTopic, removalPolicy?: RemovalPolicy, waitForLeaders?: boolean, timeout?: number): CustomResource +``` ---- +Creates a topic in the MSK Serverless. -###### `ingestAdditionalOptions`Optional +###### `id`Required - *Type:* string -Optional. - -Additional options to pass to the `COPY` command. For example, `delimiter '|'` or `ignoreheader 1` +the CDK id for the topic. --- -###### `role`Optional - -- *Type:* aws-cdk-lib.aws_iam.IRole +###### `topicDefinition`Required -Optional. +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.MskTopic -The IAM Role to use to access the data in S3. If not provided, it would use the default IAM role configured in the Redshift Namespace +the Kafka topic definition. --- -##### `mergeToTargetTable` - -```typescript -public mergeToTargetTable(id: string, databaseName: string, sourceTable: string, targetTable: string, sourceColumnId?: string, targetColumnId?: string): CustomResource -``` - -Run the `MERGE` query using simplified mode. - -This command would do an upsert into the target table. - -###### `id`Required +###### `removalPolicy`Optional -- *Type:* string +- *Type:* aws-cdk-lib.RemovalPolicy -The CDK Construct ID. +Wether to keep the topic or delete it when removing the resource from the Stack. --- -###### `databaseName`Required +###### `waitForLeaders`Optional -- *Type:* string +- *Type:* boolean -The name of the database to run this command. +Wait until metadata for the new topics doesn't throw LEADER_NOT_AVAILABLE. --- -###### `sourceTable`Required - -- *Type:* string +###### `timeout`Optional -The source table name. +- *Type:* number -Schema can also be included using the following format: `schemaName.tableName` +The time in ms to wait for a topic to be completely created on the controller node. --- -###### `targetTable`Required - -- *Type:* string - -The target table name. +##### `grantConsume` -Schema can also be included using the following format: `schemaName.tableName` +```typescript +public grantConsume(topicName: string, principal: IPrincipal): CustomResource +``` ---- +Grant a principal the right to consume data from a topic. -###### `sourceColumnId`Optional +###### `topicName`Required - *Type:* string -The column in the source table that's used to determine whether the rows in the `sourceTable` can be matched with rows in the `targetTable`. - -Default is `id` +the topic to which the principal can consume data from. --- -###### `targetColumnId`Optional - -- *Type:* string +###### `principal`Required -The column in the target table that's used to determine whether the rows in the `sourceTable` can be matched with rows in the `targetTable`. +- *Type:* aws-cdk-lib.aws_iam.IPrincipal -Default is `id` +the IAM principal to grand the consume action. --- -##### `runCustomSQL` +##### `grantProduce` ```typescript -public runCustomSQL(id: string, databaseName: string, sql: string, deleteSql?: string): CustomResource +public grantProduce(topicName: string, principal: IPrincipal): CustomResource ``` -Runs a custom SQL. - -Once the custom resource finishes execution, the attribute `Data` contains an attribute `execId` which contains the Redshift Data API execution ID. You can then use this to retrieve execution results via the `GetStatementResult` API. - -###### `id`Required - -- *Type:* string - -The CDK Construct ID. - ---- +Grant a principal to produce data to a topic. -###### `databaseName`Required +###### `topicName`Required - *Type:* string -The name of the database to run this command. +the name of the topic to grant producer permissions. --- -###### `sql`Required +###### `principal`Required -- *Type:* string +- *Type:* aws-cdk-lib.aws_iam.IPrincipal -The sql to run. +the IAM principal to grand producer permissions. --- -###### `deleteSql`Optional - -- *Type:* string - -Optional. +##### `retrieveVersion` -The sql to run when this resource gets deleted +```typescript +public retrieveVersion(): any +``` ---- +Retrieve DSF package.json version. #### Static Functions | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +import { streaming } from '@cdklabs/aws-data-solutions-framework' -consumption.RedshiftData.isConstruct(x: any) +streaming.MskServerless.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -6986,7 +7098,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -6998,24 +7110,16 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| customResourceSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the Custom Resource when deployed in a VPC. | -| vpcEndpoint | aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint | The created Redshift Data API interface vpc endpoint when deployed in a VPC. | -| vpcEndpointSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the VPC Endpoint when deployed in a VPC. | -| dataAccessTargetProps | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps | Contains normalized details of the target Redshift cluster/workgroup for data access. | -| executionRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Data API execution. | -| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data API status checks. | -| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data API status checks. | -| submitFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data submission. | -| submitLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data API submission. | -| taggingManagedPolicy | aws-cdk-lib.aws_iam.IManagedPolicy | The managed IAM policy allowing IAM Role to retrieve tag information. | -| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the S3 data copy cleaning up lambda. | -| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data cleaning up lambda. | -| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the S3 data copy cleaning up lambda. | +| node | constructs.Node | The tree node. | +| cluster | aws-cdk-lib.aws_msk.CfnServerlessCluster | *No description.* | +| clusterName | string | *No description.* | +| lambdaSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | *No description.* | +| vpc | aws-cdk-lib.aws_ec2.IVpc | *No description.* | +| brokerSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | *No description.* | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -7027,427 +7131,275 @@ The tree node. --- -##### `customResourceSecurityGroup`Optional +##### `cluster`Required ```typescript -public readonly customResourceSecurityGroup: ISecurityGroup; +public readonly cluster: CfnServerlessCluster; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup - -The Security Group used by the Custom Resource when deployed in a VPC. +- *Type:* aws-cdk-lib.aws_msk.CfnServerlessCluster --- -##### `vpcEndpoint`Optional +##### `clusterName`Required ```typescript -public readonly vpcEndpoint: IInterfaceVpcEndpoint; +public readonly clusterName: string; ``` -- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint - -The created Redshift Data API interface vpc endpoint when deployed in a VPC. +- *Type:* string --- -##### `vpcEndpointSecurityGroup`Optional +##### `lambdaSecurityGroup`Required ```typescript -public readonly vpcEndpointSecurityGroup: ISecurityGroup; +public readonly lambdaSecurityGroup: ISecurityGroup; ``` - *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -The Security Group used by the VPC Endpoint when deployed in a VPC. - --- -##### `dataAccessTargetProps`Required +##### `vpc`Required ```typescript -public readonly dataAccessTargetProps: RedshiftDataAccessTargetProps; +public readonly vpc: IVpc; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps - -Contains normalized details of the target Redshift cluster/workgroup for data access. +- *Type:* aws-cdk-lib.aws_ec2.IVpc --- -##### `executionRole`Required +##### `brokerSecurityGroup`Optional ```typescript -public readonly executionRole: IRole; +public readonly brokerSecurityGroup: ISecurityGroup; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole - -The IAM Role for the Redshift Data API execution. +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup --- -##### `statusFunction`Required - -```typescript -public readonly statusFunction: IFunction; -``` - -- *Type:* aws-cdk-lib.aws_lambda.IFunction +#### Constants -The Lambda Function for the Redshift Data API status checks. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `statusLogGroup`Required +##### `DSF_OWNED_TAG`Required ```typescript -public readonly statusLogGroup: ILogGroup; +public readonly DSF_OWNED_TAG: string; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The CloudWatch Log Group for the Redshift Data API status checks. +- *Type:* string --- -##### `submitFunction`Required +##### `DSF_TRACKING_CODE`Required ```typescript -public readonly submitFunction: IFunction; +public readonly DSF_TRACKING_CODE: string; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction - -The Lambda Function for the Redshift Data submission. +- *Type:* string --- -##### `submitLogGroup`Required +### OpenSearchCluster -```typescript -public readonly submitLogGroup: ILogGroup; -``` +A construct to provision Amazon OpenSearch Cluster and OpenSearch Dashboards. -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +Uses IAM Identity Center SAML authentication. +If OpenSearch cluster is deployed in vpc created using DataVpc construct, +ClientVPNEndpoint will be provisioned automatically for secure access to OpenSearch Dashboards. -The CloudWatch Log Group for the Redshift Data API submission. +*Example* ---- +```typescript + const osCluster = new dsf.consumption.OpenSearchCluster(this, 'MyOpenSearchCluster',{ + domainName:"mycluster1", + samlEntityId:'', + samlMetadataContent:'', + samlMasterBackendRole:'', + deployInVpc:true, + removalPolicy:cdk.RemovalPolicy.DESTROY + }); -##### `taggingManagedPolicy`Required + osCluster.addRoleMapping('DashBoardUser', 'dashboards_user',''); + osCluster.addRoleMapping('ReadAllRole', 'readall',''); +``` + + +#### Initializers ```typescript -public readonly taggingManagedPolicy: IManagedPolicy; -``` +import { consumption } from '@cdklabs/aws-data-solutions-framework' -- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy +new consumption.OpenSearchCluster(scope: Construct, id: string, props: OpenSearchClusterProps) +``` -The managed IAM policy allowing IAM Role to retrieve tag information. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the AWS CDK Construct. | +| id | string | the ID of the AWS CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.consumption.OpenSearchClusterProps | the OpenSearchCluster [properties]{@link OpenSearchClusterProps}. | --- -##### `cleanUpFunction`Optional - -```typescript -public readonly cleanUpFunction: IFunction; -``` +##### `scope`Required -- *Type:* aws-cdk-lib.aws_lambda.IFunction +- *Type:* constructs.Construct -The Lambda function for the S3 data copy cleaning up lambda. +the Scope of the AWS CDK Construct. --- -##### `cleanUpLogGroup`Optional - -```typescript -public readonly cleanUpLogGroup: ILogGroup; -``` +##### `id`Required -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* string -The CloudWatch Log Group for the Redshift Data cleaning up lambda. +the ID of the AWS CDK Construct. --- -##### `cleanUpRole`Optional - -```typescript -public readonly cleanUpRole: IRole; -``` +##### `props`Required -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.OpenSearchClusterProps -The IAM Role for the the S3 data copy cleaning up lambda. +the OpenSearchCluster [properties]{@link OpenSearchClusterProps}. --- -#### Constants +#### Methods -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| addRoleMapping | *No description.* | +| callOpenSearchApi | Calls OpenSearch API using custom resource. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `DSF_OWNED_TAG`Required +##### `toString` ```typescript -public readonly DSF_OWNED_TAG: string; +public toString(): string ``` -- *Type:* string - ---- +Returns a string representation of this construct. -##### `DSF_TRACKING_CODE`Required +##### `addRoleMapping` ```typescript -public readonly DSF_TRACKING_CODE: string; +public addRoleMapping(id: string, name: string, role: string, persist?: boolean): CustomResource ``` +> [https://opensearch.org/docs/2.9/security/access-control/users-roles/#predefined-roles](https://opensearch.org/docs/2.9/security/access-control/users-roles/#predefined-roles) + +###### `id`Required + - *Type:* string +The CDK resource ID. + --- -### RedshiftDataSharing +###### `name`Required -Creates an asynchronous custom resource to manage the data sharing lifecycle for both data producers and data consumers. +- *Type:* string -This also covers both same account and cross account access. +OpenSearch role name. -*Example* +--- -```typescript -const redshiftAdminSecret = Secret.fromSecretPartialArn(this, 'RedshiftAdminCredentials', 'arn:aws:secretsmanager:us-east-1:XXXXXXXX:secret:YYYYYYYY'); +###### `role`Required -const redshiftVpc = Vpc.fromLookup(this, 'RedshiftVpc', { - vpcId: 'XXXXXXXX', -}); +- *Type:* string -const dataAccess = new dsf.consumption.RedshiftData(this, 'RedshiftDataAccess', { - workgroupId: 'XXXXXXXXXXXXXXX', - secret: redshiftAdminSecret, - vpc: redshiftVpc, - subnets: redshiftVpc.selectSubnets({ - subnetGroupName: 'YYYYYYYY' - }), - createInterfaceVpcEndpoint: true, - executionTimeout: Duration.minutes(10), -}); +list of IAM roles. -const dataShare = new dsf.consumption.RedshiftDataSharing(this, 'RedshiftDataShare', { - redshiftData: dataAccess, - workgroupId: 'XXXXXXXXXXXXXXX', - secret: redshiftAdminSecret, - vpc: redshiftVpc, - subnets: redshiftVpc.selectSubnets({ - subnetGroupName: 'YYYYYYYY' - }), - createInterfaceVpcEndpoint: true, - executionTimeout: Duration.minutes(10), -}); +For IAM Identity center provide SAML group Id as a role - const share = dataShare.createShare('ProducerShare', 'default', 'example_share', 'public', ['public.customers']); +--- - const grantToConsumer = dataShare.grant('GrantToConsumer', { - dataShareName: 'example_share', - databaseName: 'default', - autoAuthorized: true, - accountId: "", - dataShareArn: '', - }); +###### `persist`Optional -dataShare.createDatabaseFromShare('ProducerShare', { - consumerNamespaceArn: '', - newDatabaseName: 'db_from_share', - databaseName: 'default', - dataShareName: 'example_share', - dataShareArn: '', - accountId: "", -}); -``` +- *Type:* boolean +Set to true if you want to prevent the roles to be ovewritten by subsequent PUT API calls. -#### Initializers +Default false. -```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +--- -new consumption.RedshiftDataSharing(scope: Construct, id: string, props: RedshiftDataSharingProps) +##### `callOpenSearchApi` + +```typescript +public callOpenSearchApi(id: string, apiPath: string, body: any, method?: string): CustomResource ``` -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingProps | *No description.* | +Calls OpenSearch API using custom resource. ---- +###### `id`Required -##### `scope`Required +- *Type:* string -- *Type:* constructs.Construct +The CDK resource ID. --- -##### `id`Required +###### `apiPath`Required - *Type:* string +OpenSearch API path. + --- -##### `props`Required +###### `body`Required -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingProps +- *Type:* any + +OpenSearch API request body. --- -#### Methods - -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | -| createDatabaseFromShare | Consume datashare by creating a new database pointing to the share. | -| createShare | Create a new datashare. | -| grant | Create a datashare grant to a namespace if it's in the same account, or to another account. | - ---- - -##### `toString` - -```typescript -public toString(): string -``` - -Returns a string representation of this construct. - -##### `retrieveVersion` - -```typescript -public retrieveVersion(): any -``` - -Retrieve DSF package.json version. - -##### `createDatabaseFromShare` - -```typescript -public createDatabaseFromShare(id: string, props: RedshiftDataSharingCreateDbProps): RedshiftDataSharingCreateDbFromShareProps -``` - -Consume datashare by creating a new database pointing to the share. - -If datashare is coming from a different account, setting `autoAssociate` to true -automatically associates the datashare to the cluster before the new database is created. - -###### `id`Required - -- *Type:* string - -the CDK ID of the resource. - ---- - -###### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingCreateDbProps - -`RedshiftDataSharingCreateDbProps`. - ---- - -##### `createShare` - -```typescript -public createShare(id: string, databaseName: string, dataShareName: string, schema: string, tables: string[]): RedshiftNewShareProps -``` - -Create a new datashare. - -###### `id`Required - -- *Type:* string - -the CDK ID of the resource. - ---- - -###### `databaseName`Required - -- *Type:* string - -The name of the database to connect to. - ---- - -###### `dataShareName`Required - -- *Type:* string - -The name of the datashare. - ---- - -###### `schema`Required +###### `method`Optional - *Type:* string -The schema to add in the datashare. - ---- - -###### `tables`Required - -- *Type:* string[] - -The list of tables that would be included in the datashare. - -This must follow the format: `.` +Opensearch API method,. --- -##### `grant` +##### `retrieveVersion` ```typescript -public grant(id: string, props: RedshiftDataSharingGrantProps): RedshiftDataSharingGrantedProps +public retrieveVersion(): any ``` -Create a datashare grant to a namespace if it's in the same account, or to another account. - -###### `id`Required - -- *Type:* string - -the CDK ID of the resource. - ---- - -###### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingGrantProps - -`RedshiftDataSharingGrantProps`. - ---- +Retrieve DSF package.json version. #### Static Functions | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript import { consumption } from '@cdklabs/aws-data-solutions-framework' -consumption.RedshiftDataSharing.isConstruct(x: any) +consumption.OpenSearchCluster.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -7466,7 +7418,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -7478,23 +7430,16 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| customResourceSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the Custom Resource when deployed in a VPC. | -| vpcEndpoint | aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint | The created Redshift Data API interface vpc endpoint when deployed in a VPC. | -| vpcEndpointSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the VPC Endpoint when deployed in a VPC. | -| dataAccessTargetProps | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps | Contains normalized details of the target Redshift cluster/workgroup for data access. | -| executionRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Data API execution. | -| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data Sharing status checks. | -| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing status checks. | -| submitFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data Sharing submission. | -| submitLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing submission. | -| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the cleaning up lambda. | -| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing cleaning up lambda. | -| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the cleaning up lambda. | +| node | constructs.Node | The tree node. | +| domain | aws-cdk-lib.aws_opensearchservice.IDomain | OpenSearchCluster domain. | +| encryptionKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt data and logs. | +| logGroup | aws-cdk-lib.aws_logs.ILogGroup | CloudWatch Logs Log Group to store OpenSearch cluster logs. | +| masterRole | aws-cdk-lib.aws_iam.IRole | IAM Role used to provision and configure OpenSearch domain. | +| vpc | aws-cdk-lib.aws_ec2.IVpc | VPC OpenSearch cluster is provisioned in. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -7506,224 +7451,150 @@ The tree node. --- -##### `customResourceSecurityGroup`Optional +##### `domain`Required ```typescript -public readonly customResourceSecurityGroup: ISecurityGroup; +public readonly domain: IDomain; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_opensearchservice.IDomain -The Security Group used by the Custom Resource when deployed in a VPC. +OpenSearchCluster domain. --- -##### `vpcEndpoint`Optional +##### `encryptionKey`Required ```typescript -public readonly vpcEndpoint: IInterfaceVpcEndpoint; +public readonly encryptionKey: IKey; ``` -- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint +- *Type:* aws-cdk-lib.aws_kms.IKey -The created Redshift Data API interface vpc endpoint when deployed in a VPC. +The KMS Key used to encrypt data and logs. --- -##### `vpcEndpointSecurityGroup`Optional +##### `logGroup`Required ```typescript -public readonly vpcEndpointSecurityGroup: ISecurityGroup; +public readonly logGroup: ILogGroup; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -The Security Group used by the VPC Endpoint when deployed in a VPC. +CloudWatch Logs Log Group to store OpenSearch cluster logs. --- -##### `dataAccessTargetProps`Required +##### `masterRole`Required ```typescript -public readonly dataAccessTargetProps: RedshiftDataAccessTargetProps; +public readonly masterRole: IRole; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps +- *Type:* aws-cdk-lib.aws_iam.IRole -Contains normalized details of the target Redshift cluster/workgroup for data access. +IAM Role used to provision and configure OpenSearch domain. --- -##### `executionRole`Required +##### `vpc`Optional ```typescript -public readonly executionRole: IRole; +public readonly vpc: IVpc; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* aws-cdk-lib.aws_ec2.IVpc -The IAM Role for the Redshift Data API execution. +VPC OpenSearch cluster is provisioned in. --- -##### `statusFunction`Required - -```typescript -public readonly statusFunction: IFunction; -``` - -- *Type:* aws-cdk-lib.aws_lambda.IFunction +#### Constants -The Lambda Function for the Redshift Data Sharing status checks. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `statusLogGroup`Required +##### `DSF_OWNED_TAG`Required ```typescript -public readonly statusLogGroup: ILogGroup; +public readonly DSF_OWNED_TAG: string; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The CloudWatch Log Group for the Redshift Data Sharing status checks. +- *Type:* string --- -##### `submitFunction`Required +##### `DSF_TRACKING_CODE`Required ```typescript -public readonly submitFunction: IFunction; +public readonly DSF_TRACKING_CODE: string; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction - -The Lambda Function for the Redshift Data Sharing submission. +- *Type:* string --- -##### `submitLogGroup`Required +### PySparkApplicationPackage -```typescript -public readonly submitLogGroup: ILogGroup; -``` +A construct that takes your PySpark application, packages its virtual environment and uploads it along its entrypoint to an Amazon S3 bucket This construct requires Docker daemon installed locally to run. -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/pyspark-application-package](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/pyspark-application-package) -The CloudWatch Log Group for the Redshift Data Sharing submission. +*Example* ---- +```typescript +let pysparkPacker = new dsf.processing.PySparkApplicationPackage (this, 'pysparkPacker', { + applicationName: 'my-pyspark', + entrypointPath: '/Users/my-user/my-spark-job/app/app-pyspark.py', + dependenciesFolder: '/Users/my-user/my-spark-job/app', + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); +``` -##### `cleanUpFunction`Optional + +#### Initializers ```typescript -public readonly cleanUpFunction: IFunction; -``` +import { processing } from '@cdklabs/aws-data-solutions-framework' -- *Type:* aws-cdk-lib.aws_lambda.IFunction +new processing.PySparkApplicationPackage(scope: Construct, id: string, props: PySparkApplicationPackageProps) +``` -The Lambda function for the cleaning up lambda. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.processing.PySparkApplicationPackageProps | {@link PySparkApplicationPackageProps}. | --- -##### `cleanUpLogGroup`Optional +##### `scope`Required -```typescript -public readonly cleanUpLogGroup: ILogGroup; -``` - -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The CloudWatch Log Group for the Redshift Data Sharing cleaning up lambda. - ---- - -##### `cleanUpRole`Optional - -```typescript -public readonly cleanUpRole: IRole; -``` - -- *Type:* aws-cdk-lib.aws_iam.IRole - -The IAM Role for the the cleaning up lambda. - ---- - -#### Constants - -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | - ---- - -##### `DSF_OWNED_TAG`Required - -```typescript -public readonly DSF_OWNED_TAG: string; -``` +- *Type:* constructs.Construct -- *Type:* string +the Scope of the CDK Construct. --- -##### `DSF_TRACKING_CODE`Required - -```typescript -public readonly DSF_TRACKING_CODE: string; -``` +##### `id`Required - *Type:* string ---- - -### RedshiftServerlessNamespace - -Create a Redshift Serverless Namespace with the admin credentials stored in Secrets Manager. - -*Example* - -```typescript -const namespace = new dsf.consumption.RedshiftServerlessNamespace(this, 'DefaultServerlessNamespace', { - dbName: 'defaultdb', - name: 'default' -}); -``` - - -#### Initializers - -```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' - -new consumption.RedshiftServerlessNamespace(scope: Construct, id: string, props: RedshiftServerlessNamespaceProps) -``` - -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespaceProps | *No description.* | - ---- - -##### `scope`Required - -- *Type:* constructs.Construct +the ID of the CDK Construct. --- -##### `id`Required - -- *Type:* string - ---- +##### `props`Required -##### `props`Required +- *Type:* @cdklabs/aws-data-solutions-framework.processing.PySparkApplicationPackageProps -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespaceProps +{@link PySparkApplicationPackageProps}. --- @@ -7731,12 +7602,12 @@ new consumption.RedshiftServerlessNamespace(scope: Construct, id: string, props: | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -7744,7 +7615,7 @@ public toString(): string Returns a string representation of this construct. -##### `retrieveVersion` +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -7756,16 +7627,16 @@ Retrieve DSF package.json version. | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +import { processing } from '@cdklabs/aws-data-solutions-framework' -consumption.RedshiftServerlessNamespace.isConstruct(x: any) +processing.PySparkApplicationPackage.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -7784,7 +7655,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -7796,26 +7667,18 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| adminSecret | aws-cdk-lib.aws_secretsmanager.ISecret | The created Secrets Manager secret containing the admin credentials. | -| adminSecretKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt the admin credentials secret. | -| createFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Serverless creation. | -| createLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs Log Group for the Redshift Serverless creation. | -| createRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Serverless creation. | -| customResource | aws-cdk-lib.CustomResource | The custom resource that creates the Namespace. | -| dataKey | aws-cdk-lib.aws_kms.Key | KMS key used by the namespace to encrypt the data. | -| dbName | string | The name of the database. | -| namespaceArn | string | The ARN of the created namespace. | -| namespaceId | string | The ID of the created namespace. | -| namespaceName | string | The name of the created namespace. | -| roles | {[ key: string ]: aws-cdk-lib.aws_iam.IRole} | The roles attached to the namespace in the form of `{RoleArn: IRole}`. | -| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the creation status check. | -| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs Log Group for the creation status check. | -| statusRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the creation status check. | +| node | constructs.Node | The tree node. | +| artifactsBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the artifacts (entrypoint and virtual environment archive). | +| assetUploadManagedPolicy | aws-cdk-lib.aws_iam.IManagedPolicy | The IAM Managed Policy used by the custom resource for the assets deployment. | +| assetUploadRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the BucketDeployment to upload the artifacts to an s3 bucket. | +| entrypointUri | string | The location (generally it's an S3 URI) where the entry point is saved. | +| artifactsAccessLogsBucket | @cdklabs/aws-data-solutions-framework.storage.AccessLogsBucket | The access logs bucket to log accesses on the artifacts bucket. | +| sparkVenvConf | string | The Spark Config containing the configuration of virtual environment archive with all dependencies. | +| venvArchiveUri | string | The location (generally an S3 URI) where the archive of the Python virtual environment with all dependencies is stored. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -7827,271 +7690,194 @@ The tree node. --- -##### `adminSecret`Required +##### `artifactsBucket`Required ```typescript -public readonly adminSecret: ISecret; +public readonly artifactsBucket: IBucket; ``` -- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret +- *Type:* aws-cdk-lib.aws_s3.IBucket -The created Secrets Manager secret containing the admin credentials. +The S3 Bucket for storing the artifacts (entrypoint and virtual environment archive). --- -##### `adminSecretKey`Required +##### `assetUploadManagedPolicy`Required ```typescript -public readonly adminSecretKey: IKey; +public readonly assetUploadManagedPolicy: IManagedPolicy; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy -The KMS Key used to encrypt the admin credentials secret. +The IAM Managed Policy used by the custom resource for the assets deployment. --- -##### `createFunction`Required +##### `assetUploadRole`Required ```typescript -public readonly createFunction: IFunction; +public readonly assetUploadRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction +- *Type:* aws-cdk-lib.aws_iam.IRole -The Lambda Function for the Redshift Serverless creation. +The IAM Role used by the BucketDeployment to upload the artifacts to an s3 bucket. + +In case you provide your own S3 Bucket for storing the artifacts (entrypoint and virtual environment archive), +you must provide S3 write access to this role to upload the artifacts. --- -##### `createLogGroup`Required +##### `entrypointUri`Required ```typescript -public readonly createLogGroup: ILogGroup; +public readonly entrypointUri: string; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* string -The CloudWatch Logs Log Group for the Redshift Serverless creation. +The location (generally it's an S3 URI) where the entry point is saved. + +You can pass this location to your Spark job. --- -##### `createRole`Required +##### `artifactsAccessLogsBucket`Optional ```typescript -public readonly createRole: IRole; +public readonly artifactsAccessLogsBucket: AccessLogsBucket; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* @cdklabs/aws-data-solutions-framework.storage.AccessLogsBucket -The IAM Role for the Redshift Serverless creation. +The access logs bucket to log accesses on the artifacts bucket. --- -##### `customResource`Required +##### `sparkVenvConf`Optional ```typescript -public readonly customResource: CustomResource; +public readonly sparkVenvConf: string; ``` -- *Type:* aws-cdk-lib.CustomResource +- *Type:* string -The custom resource that creates the Namespace. +The Spark Config containing the configuration of virtual environment archive with all dependencies. --- -##### `dataKey`Required +##### `venvArchiveUri`Optional ```typescript -public readonly dataKey: Key; +public readonly venvArchiveUri: string; ``` -- *Type:* aws-cdk-lib.aws_kms.Key - -KMS key used by the namespace to encrypt the data. +- *Type:* string ---- +The location (generally an S3 URI) where the archive of the Python virtual environment with all dependencies is stored. -##### `dbName`Required +You can pass this location to your Spark job. -```typescript -public readonly dbName: string; -``` +--- -- *Type:* string +#### Constants -The name of the database. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| ARTIFACTS_PREFIX | string | The prefix used to store artifacts on the artifact bucket. | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `namespaceArn`Required +##### `ARTIFACTS_PREFIX`Required ```typescript -public readonly namespaceArn: string; +public readonly ARTIFACTS_PREFIX: string; ``` - *Type:* string -The ARN of the created namespace. +The prefix used to store artifacts on the artifact bucket. --- -##### `namespaceId`Required +##### `DSF_OWNED_TAG`Required ```typescript -public readonly namespaceId: string; +public readonly DSF_OWNED_TAG: string; ``` - *Type:* string -The ID of the created namespace. - --- -##### `namespaceName`Required +##### `DSF_TRACKING_CODE`Required ```typescript -public readonly namespaceName: string; +public readonly DSF_TRACKING_CODE: string; ``` - *Type:* string -The name of the created namespace. - --- -##### `roles`Required +### RedshiftData -```typescript -public readonly roles: {[ key: string ]: IRole}; -``` +Creates an asynchronous custom resource that handles the execution of SQL using Redshift's Data API. -- *Type:* {[ key: string ]: aws-cdk-lib.aws_iam.IRole} +If `vpc` and `vpcSubnets` are passed, this construct would also create the Redshift Data Interface VPC endpoint and configure the custom resource in the same VPC subnet. -The roles attached to the namespace in the form of `{RoleArn: IRole}`. +*Example* -These roles are used to access other AWS services for ingestion, federated query, and data catalog access. +```typescript +const namespace = new dsf.consumption.RedshiftServerlessNamespace(this, 'RedshiftNamespace', { + name: "default", + dbName: 'defaultdb', +}); -> [https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-authentication-access-control.html](https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-authentication-access-control.html) +const workgroup = new dsf.consumption.RedshiftServerlessWorkgroup(this, "RedshiftWorkgroup", { + name: "redshift-workgroup", + namespace: namespace, +}); ---- +const rsData = workgroup.accessData('DataApi'); +rsData.createDbRole("EngineeringRole", "defaultdb", "engineering"); +``` -##### `statusFunction`Required -```typescript -public readonly statusFunction: IFunction; -``` - -- *Type:* aws-cdk-lib.aws_lambda.IFunction - -The Lambda Function for the creation status check. - ---- - -##### `statusLogGroup`Required - -```typescript -public readonly statusLogGroup: ILogGroup; -``` - -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The CloudWatch Logs Log Group for the creation status check. - ---- - -##### `statusRole`Required - -```typescript -public readonly statusRole: IRole; -``` - -- *Type:* aws-cdk-lib.aws_iam.IRole - -The IAM Role for the creation status check. - ---- - -#### Constants - -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | - ---- - -##### `DSF_OWNED_TAG`Required - -```typescript -public readonly DSF_OWNED_TAG: string; -``` - -- *Type:* string - ---- - -##### `DSF_TRACKING_CODE`Required - -```typescript -public readonly DSF_TRACKING_CODE: string; -``` - -- *Type:* string - ---- - -### RedshiftServerlessWorkgroup - -- *Implements:* aws-cdk-lib.aws_ec2.IConnectable - -Create a Redshift Serverless Workgroup. - -A default namespace would be created if none is provided. - -*Example* - -```typescript -const workgroup = new dsf.consumption.RedshiftServerlessWorkgroup(this, "RedshiftWorkgroup", { - name: "example-workgroup", - namespace: new dsf.consumption.RedshiftServerlessNamespace(this, "RedshiftNamespace", { - name: 'example-namespace', - dbName: 'defaultdb', - }) -}); -``` - - -#### Initializers +#### Initializers ```typescript import { consumption } from '@cdklabs/aws-data-solutions-framework' -new consumption.RedshiftServerlessWorkgroup(scope: Construct, id: string, props: RedshiftServerlessWorkgroupProps) +new consumption.RedshiftData(scope: Construct, id: string, props: RedshiftDataProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessWorkgroupProps | *No description.* | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct --- -##### `id`Required +##### `id`Required - *Type:* string --- -##### `props`Required +##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessWorkgroupProps +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataProps --- @@ -8099,25 +7885,20 @@ new consumption.RedshiftServerlessWorkgroup(scope: Construct, id: string, props: | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| accessData | Creates an instance of `RedshiftData` to send custom SQLs to the workgroup. | -| assignDbRolesToIAMRole | Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. | -| catalogTables | Creates a new Glue data catalog database with a crawler using JDBC target type to connect to the Redshift Workgroup. | -| createDatabaseFromShare | Consume datashare by creating a new database pointing to the share. | -| createDbRole | Creates a new DB role. | -| createShare | Create a new datashare. | -| grantAccessToShare | Create a datashare grant to a namespace if it's in the same account, or to another account. | -| grantDbAllPrivilegesToRole | Grants both read and write permissions on all the tables in the `schema` to the DB role. | -| grantDbSchemaToRole | Grants access to the schema to the DB role. | -| grantSchemaReadToRole | Grants read permission on all the tables in the `schema` to the DB role. | -| ingestData | Ingest data from S3 into a Redshift table. | -| mergeToTargetTable | Run the `MERGE` query using simplified mode. | -| retrieveVersion | Retrieve DSF package.json version. | -| runCustomSQL | Runs a custom SQL. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | +| assignDbRolesToIAMRole | Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. | +| createDbRole | Creates a new DB role. | +| grantDbAllPrivilegesToRole | Grants both read and write permissions on all the tables in the `schema` to the DB role. | +| grantDbSchemaToRole | Grants access to the schema to the DB role. | +| grantSchemaReadToRole | Grants read permission on all the tables in the `schema` to the DB role. | +| ingestData | Ingest data from S3 into a Redshift table. | +| mergeToTargetTable | Run the `MERGE` query using simplified mode. | +| runCustomSQL | Runs a custom SQL. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -8125,39 +7906,15 @@ public toString(): string Returns a string representation of this construct. -##### ~~`accessData`~~ +##### `retrieveVersion` ```typescript -public accessData(id: string, createVpcEndpoint?: boolean, existingInterfaceVPCEndpoint?: IInterfaceVpcEndpoint): RedshiftData +public retrieveVersion(): any ``` -Creates an instance of `RedshiftData` to send custom SQLs to the workgroup. - -###### `id`Required - -- *Type:* string - -The CDK ID of the resource. - ---- - -###### `createVpcEndpoint`Optional - -- *Type:* boolean - -if set to true, create interface VPC endpoint for Redshift Data API. - ---- - -###### `existingInterfaceVPCEndpoint`Optional - -- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint - -if `createVpcEndpoint` is false, and if this is populated, then the Lambda function's security group would be added in the existing VPC endpoint's security group. - ---- +Retrieve DSF package.json version. -##### `assignDbRolesToIAMRole` +##### `assignDbRolesToIAMRole` ```typescript public assignDbRolesToIAMRole(dbRoles: string[], targetRole: IRole): void @@ -8165,7 +7922,7 @@ public assignDbRolesToIAMRole(dbRoles: string[], targetRole: IRole): void Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. -###### `dbRoles`Required +###### `dbRoles`Required - *Type:* string[] @@ -8173,7 +7930,7 @@ List of Redshift DB roles to assign to IAM role. --- -###### `targetRole`Required +###### `targetRole`Required - *Type:* aws-cdk-lib.aws_iam.IRole @@ -8181,100 +7938,87 @@ The IAM role to assign the Redshift DB roles to. --- -##### `catalogTables` +##### `createDbRole` ```typescript -public catalogTables(id: string, catalogDbName: string, pathToCrawl?: string): DataCatalogDatabase +public createDbRole(id: string, databaseName: string, roleName: string): CustomResource ``` -Creates a new Glue data catalog database with a crawler using JDBC target type to connect to the Redshift Workgroup. +Creates a new DB role. -###### `id`Required +###### `id`Required - *Type:* string -The CDK ID of the resource. +The CDK Construct ID. --- -###### `catalogDbName`Required +###### `databaseName`Required - *Type:* string -The name of the Glue Database to create. +The name of the database to run this command. --- -###### `pathToCrawl`Optional +###### `roleName`Required - *Type:* string -The path of Redshift tables to crawl. +The name of the role to create. --- -##### `createDatabaseFromShare` +##### `grantDbAllPrivilegesToRole` ```typescript -public createDatabaseFromShare(id: string, newDatabaseName: string, producerDataShareName: string, producerNamespaceId?: string, producerAccountId?: string): RedshiftDataSharingCreateDbFromShareProps +public grantDbAllPrivilegesToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -Consume datashare by creating a new database pointing to the share. - -If datashare is coming from a different account, setting `autoAssociate` to true -automatically associates the datashare to the cluster before the new database is created. - -###### `id`Required - -- *Type:* string - -The CDK ID of the resource. - ---- +Grants both read and write permissions on all the tables in the `schema` to the DB role. -###### `newDatabaseName`Required +###### `id`Required - *Type:* string -The name of the database that would be created from the data share. +The CDK Construct ID. --- -###### `producerDataShareName`Required +###### `databaseName`Required - *Type:* string -The name of the data share from producer. +The name of the database to run this command. --- -###### `producerNamespaceId`Optional +###### `schema`Required - *Type:* string -The producer cluster namespace. +The schema where the tables are located in. --- -###### `producerAccountId`Optional +###### `roleName`Required - *Type:* string -The producer account ID. - -Required for cross account shares. +The DB role to grant the permissions to. --- -##### `createDbRole` +##### `grantDbSchemaToRole` ```typescript -public createDbRole(id: string, databaseName: string, roleName: string): CustomResource +public grantDbSchemaToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -Creates a new DB role. +Grants access to the schema to the DB role. -###### `id`Required +###### `id`Required - *Type:* string @@ -8282,7 +8026,7 @@ The CDK Construct ID. --- -###### `databaseName`Required +###### `databaseName`Required - *Type:* string @@ -8290,217 +8034,45 @@ The name of the database to run this command. --- -###### `roleName`Required +###### `schema`Required - *Type:* string -The name of the role to create. +The schema where the tables are located in. --- -##### `createShare` +###### `roleName`Required + +- *Type:* string + +The DB role to grant the permissions to. + +--- + +##### `grantSchemaReadToRole` ```typescript -public createShare(id: string, databaseName: string, dataShareName: string, schema: string, tables: string[]): RedshiftNewShareProps +public grantSchemaReadToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -Create a new datashare. +Grants read permission on all the tables in the `schema` to the DB role. -###### `id`Required +###### `id`Required - *Type:* string -The CDK ID of the resource. - --- -###### `databaseName`Required +###### `databaseName`Required - *Type:* string -The name of the database to connect to. +The name of the database to run this command. --- -###### `dataShareName`Required - -- *Type:* string - -The name of the datashare. - ---- - -###### `schema`Required - -- *Type:* string - -The schema to add in the datashare. - ---- - -###### `tables`Required - -- *Type:* string[] - -The list of tables that would be included in the datashare. - -This must follow the format: `.` - ---- - -##### `grantAccessToShare` - -```typescript -public grantAccessToShare(id: string, dataShareDetails: RedshiftNewShareProps, consumerNamespaceId?: string, consumerAccountId?: string, autoAuthorized?: boolean): RedshiftDataSharingGrantedProps -``` - -Create a datashare grant to a namespace if it's in the same account, or to another account. - -###### `id`Required - -- *Type:* string - -The CDK ID of the resource. - ---- - -###### `dataShareDetails`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps - -The details of the datashare. - ---- - -###### `consumerNamespaceId`Optional - -- *Type:* string - -The namespace of the consumer that you're sharing to. - -Either namespace or account Id must be provided. - ---- - -###### `consumerAccountId`Optional - -- *Type:* string - -The account ID of the consumer that you're sharing to. - -Either namespace or account Id must be provided. - ---- - -###### `autoAuthorized`Optional - -- *Type:* boolean - ---- - -##### `grantDbAllPrivilegesToRole` - -```typescript -public grantDbAllPrivilegesToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource -``` - -Grants both read and write permissions on all the tables in the `schema` to the DB role. - -###### `id`Required - -- *Type:* string - -The CDK Construct ID. - ---- - -###### `databaseName`Required - -- *Type:* string - -The name of the database to run this command. - ---- - -###### `schema`Required - -- *Type:* string - -The schema where the tables are located in. - ---- - -###### `roleName`Required - -- *Type:* string - -The DB role to grant the permissions to. - ---- - -##### `grantDbSchemaToRole` - -```typescript -public grantDbSchemaToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource -``` - -Grants access to the schema to the DB role. - -###### `id`Required - -- *Type:* string - -The CDK Construct ID. - ---- - -###### `databaseName`Required - -- *Type:* string - -The name of the database to run this command. - ---- - -###### `schema`Required - -- *Type:* string - -The schema where the tables are located in. - ---- - -###### `roleName`Required - -- *Type:* string - -The DB role to grant the permissions to. - ---- - -##### `grantSchemaReadToRole` - -```typescript -public grantSchemaReadToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource -``` - -Grants read permission on all the tables in the `schema` to the DB role. - -###### `id`Required - -- *Type:* string - ---- - -###### `databaseName`Required - -- *Type:* string - -The name of the database to run this command. - ---- - -###### `schema`Required +###### `schema`Required - *Type:* string @@ -8508,7 +8080,7 @@ The schema where the tables are located in. --- -###### `roleName`Required +###### `roleName`Required - *Type:* string @@ -8516,7 +8088,7 @@ The DB role to grant the permissions to. --- -##### `ingestData` +##### `ingestData` ```typescript public ingestData(id: string, databaseName: string, targetTable: string, sourceBucket: IBucket, sourcePrefix: string, ingestAdditionalOptions?: string, role?: IRole): CustomResource @@ -8524,7 +8096,7 @@ public ingestData(id: string, databaseName: string, targetTable: string, sourceB Ingest data from S3 into a Redshift table. -###### `id`Required +###### `id`Required - *Type:* string @@ -8532,7 +8104,7 @@ The CDK Construct ID. --- -###### `databaseName`Required +###### `databaseName`Required - *Type:* string @@ -8540,7 +8112,7 @@ The name of the database to run this command. --- -###### `targetTable`Required +###### `targetTable`Required - *Type:* string @@ -8548,7 +8120,7 @@ The target table to load the data into. --- -###### `sourceBucket`Required +###### `sourceBucket`Required - *Type:* aws-cdk-lib.aws_s3.IBucket @@ -8556,7 +8128,7 @@ The bucket where the source data would be coming from. --- -###### `sourcePrefix`Required +###### `sourcePrefix`Required - *Type:* string @@ -8564,7 +8136,7 @@ The location inside the bucket where the data would be ingested from. --- -###### `ingestAdditionalOptions`Optional +###### `ingestAdditionalOptions`Optional - *Type:* string @@ -8574,7 +8146,7 @@ Additional options to pass to the `COPY` command. For example, `delimiter '|'` o --- -###### `role`Optional +###### `role`Optional - *Type:* aws-cdk-lib.aws_iam.IRole @@ -8584,7 +8156,7 @@ The IAM Role to use to access the data in S3. If not provided, it would use the --- -##### `mergeToTargetTable` +##### `mergeToTargetTable` ```typescript public mergeToTargetTable(id: string, databaseName: string, sourceTable: string, targetTable: string, sourceColumnId?: string, targetColumnId?: string): CustomResource @@ -8594,7 +8166,7 @@ Run the `MERGE` query using simplified mode. This command would do an upsert into the target table. -###### `id`Required +###### `id`Required - *Type:* string @@ -8602,7 +8174,7 @@ The CDK Construct ID. --- -###### `databaseName`Required +###### `databaseName`Required - *Type:* string @@ -8610,7 +8182,7 @@ The name of the database to run this command. --- -###### `sourceTable`Required +###### `sourceTable`Required - *Type:* string @@ -8620,7 +8192,7 @@ Schema can also be included using the following format: `schemaName.tableName` --- -###### `targetTable`Required +###### `targetTable`Required - *Type:* string @@ -8630,7 +8202,7 @@ Schema can also be included using the following format: `schemaName.tableName` --- -###### `sourceColumnId`Optional +###### `sourceColumnId`Optional - *Type:* string @@ -8640,7 +8212,7 @@ Default is `id` --- -###### `targetColumnId`Optional +###### `targetColumnId`Optional - *Type:* string @@ -8650,15 +8222,7 @@ Default is `id` --- -##### `retrieveVersion` - -```typescript -public retrieveVersion(): any -``` - -Retrieve DSF package.json version. - -##### `runCustomSQL` +##### `runCustomSQL` ```typescript public runCustomSQL(id: string, databaseName: string, sql: string, deleteSql?: string): CustomResource @@ -8668,7 +8232,7 @@ Runs a custom SQL. Once the custom resource finishes execution, the attribute `Data` contains an attribute `execId` which contains the Redshift Data API execution ID. You can then use this to retrieve execution results via the `GetStatementResult` API. -###### `id`Required +###### `id`Required - *Type:* string @@ -8676,7 +8240,7 @@ The CDK Construct ID. --- -###### `databaseName`Required +###### `databaseName`Required - *Type:* string @@ -8684,7 +8248,7 @@ The name of the database to run this command. --- -###### `sql`Required +###### `sql`Required - *Type:* string @@ -8692,7 +8256,7 @@ The sql to run. --- -###### `deleteSql`Optional +###### `deleteSql`Optional - *Type:* string @@ -8706,16 +8270,16 @@ The sql to run when this resource gets deleted | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript import { consumption } from '@cdklabs/aws-data-solutions-framework' -consumption.RedshiftServerlessWorkgroup.isConstruct(x: any) +consumption.RedshiftData.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -8734,7 +8298,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -8746,127 +8310,188 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| cfnResource | aws-cdk-lib.aws_redshiftserverless.CfnWorkgroup | The created Redshift Serverless Workgroup. | -| connections | aws-cdk-lib.aws_ec2.Connections | Connections used by Workgroup security group. | -| existingShares | {[ key: string ]: @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps} | Index of existing shares. | -| glueConnection | aws-cdk-lib.aws_glue.CfnConnection | The Glue Connection associated with the workgroup. | -| namespace | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespace | The associated Redshift Serverless Namespace. | -| primarySecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The primary EC2 Security Group associated with the Redshift Serverless Workgroup. | -| selectedSubnets | aws-cdk-lib.aws_ec2.SelectedSubnets | The subnets where the Redshift Serverless Workgroup is deployed. | -| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC where the Redshift Serverless Workgroup is deployed. | - ---- - -##### `node`Required - -```typescript -public readonly node: Node; -``` - -- *Type:* constructs.Node - -The tree node. +| node | constructs.Node | The tree node. | +| customResourceSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the Custom Resource when deployed in a VPC. | +| vpcEndpoint | aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint | The created Redshift Data API interface vpc endpoint when deployed in a VPC. | +| vpcEndpointSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the VPC Endpoint when deployed in a VPC. | +| dataAccessTargetProps | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps | Contains normalized details of the target Redshift cluster/workgroup for data access. | +| executionRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Data API execution. | +| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data API status checks. | +| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data API status checks. | +| submitFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data submission. | +| submitLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data API submission. | +| taggingManagedPolicy | aws-cdk-lib.aws_iam.IManagedPolicy | The managed IAM policy allowing IAM Role to retrieve tag information. | +| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the S3 data copy cleaning up lambda. | +| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data cleaning up lambda. | +| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the S3 data copy cleaning up lambda. | --- -##### `cfnResource`Required +##### `node`Required ```typescript -public readonly cfnResource: CfnWorkgroup; +public readonly node: Node; ``` -- *Type:* aws-cdk-lib.aws_redshiftserverless.CfnWorkgroup +- *Type:* constructs.Node -The created Redshift Serverless Workgroup. +The tree node. --- -##### `connections`Required +##### `customResourceSecurityGroup`Optional ```typescript -public readonly connections: Connections; +public readonly customResourceSecurityGroup: ISecurityGroup; ``` -- *Type:* aws-cdk-lib.aws_ec2.Connections +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -Connections used by Workgroup security group. +The Security Group used by the Custom Resource when deployed in a VPC. -Used this to enable access from clients connecting to the workgroup +--- + +##### `vpcEndpoint`Optional + +```typescript +public readonly vpcEndpoint: IInterfaceVpcEndpoint; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint + +The created Redshift Data API interface vpc endpoint when deployed in a VPC. --- -##### `existingShares`Required +##### `vpcEndpointSecurityGroup`Optional ```typescript -public readonly existingShares: {[ key: string ]: RedshiftNewShareProps}; +public readonly vpcEndpointSecurityGroup: ISecurityGroup; ``` -- *Type:* {[ key: string ]: @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps} +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -Index of existing shares. +The Security Group used by the VPC Endpoint when deployed in a VPC. --- -##### `glueConnection`Required +##### `dataAccessTargetProps`Required ```typescript -public readonly glueConnection: CfnConnection; +public readonly dataAccessTargetProps: RedshiftDataAccessTargetProps; ``` -- *Type:* aws-cdk-lib.aws_glue.CfnConnection +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps -The Glue Connection associated with the workgroup. +Contains normalized details of the target Redshift cluster/workgroup for data access. -This can be used by Glue ETL Jobs to read/write data from/to Redshift workgroup +--- + +##### `executionRole`Required + +```typescript +public readonly executionRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role for the Redshift Data API execution. --- -##### `namespace`Required +##### `statusFunction`Required ```typescript -public readonly namespace: RedshiftServerlessNamespace; +public readonly statusFunction: IFunction; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespace +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The associated Redshift Serverless Namespace. +The Lambda Function for the Redshift Data API status checks. --- -##### `primarySecurityGroup`Required +##### `statusLogGroup`Required ```typescript -public readonly primarySecurityGroup: ISecurityGroup; +public readonly statusLogGroup: ILogGroup; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -The primary EC2 Security Group associated with the Redshift Serverless Workgroup. +The CloudWatch Log Group for the Redshift Data API status checks. --- -##### `selectedSubnets`Required +##### `submitFunction`Required ```typescript -public readonly selectedSubnets: SelectedSubnets; +public readonly submitFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_ec2.SelectedSubnets +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The subnets where the Redshift Serverless Workgroup is deployed. +The Lambda Function for the Redshift Data submission. --- -##### `vpc`Required +##### `submitLogGroup`Required ```typescript -public readonly vpc: IVpc; +public readonly submitLogGroup: ILogGroup; ``` -- *Type:* aws-cdk-lib.aws_ec2.IVpc +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -The VPC where the Redshift Serverless Workgroup is deployed. +The CloudWatch Log Group for the Redshift Data API submission. + +--- + +##### `taggingManagedPolicy`Required + +```typescript +public readonly taggingManagedPolicy: IManagedPolicy; +``` + +- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy + +The managed IAM policy allowing IAM Role to retrieve tag information. + +--- + +##### `cleanUpFunction`Optional + +```typescript +public readonly cleanUpFunction: IFunction; +``` + +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function for the S3 data copy cleaning up lambda. + +--- + +##### `cleanUpLogGroup`Optional + +```typescript +public readonly cleanUpLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the Redshift Data cleaning up lambda. + +--- + +##### `cleanUpRole`Optional + +```typescript +public readonly cleanUpRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role for the the S3 data copy cleaning up lambda. --- @@ -8874,12 +8499,12 @@ The VPC where the Redshift Serverless Workgroup is deployed. | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -8889,7 +8514,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -8899,62 +8524,96 @@ public readonly DSF_TRACKING_CODE: string; --- -### S3DataCopy +### RedshiftDataSharing -Copy data from one S3 bucket to another. +Creates an asynchronous custom resource to manage the data sharing lifecycle for both data producers and data consumers. -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Utils/s3-data-copy](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Utils/s3-data-copy) +This also covers both same account and cross account access. *Example* ```typescript -import { Bucket } from 'aws-cdk-lib/aws-s3'; +const redshiftAdminSecret = Secret.fromSecretPartialArn(this, 'RedshiftAdminCredentials', 'arn:aws:secretsmanager:us-east-1:XXXXXXXX:secret:YYYYYYYY'); -const sourceBucket = Bucket.fromBucketName(this, 'SourceBucket', 'nyc-tlc'); -const bucketName = `test-${this.region}-${this.account}-${dsf.utils.Utils.generateUniqueHash(this, 'TargetBucket')}`; +const redshiftVpc = Vpc.fromLookup(this, 'RedshiftVpc', { + vpcId: 'XXXXXXXX', +}); -const targetBucket = new Bucket(this, 'TargetBucket'); +const dataAccess = new dsf.consumption.RedshiftData(this, 'RedshiftDataAccess', { + workgroupId: 'XXXXXXXXXXXXXXX', + secret: redshiftAdminSecret, + vpc: redshiftVpc, + subnets: redshiftVpc.selectSubnets({ + subnetGroupName: 'YYYYYYYY' + }), + createInterfaceVpcEndpoint: true, + executionTimeout: Duration.minutes(10), +}); -new dsf.utils.S3DataCopy(this, 'S3DataCopy', { - sourceBucket, - sourceBucketPrefix: 'trip data/', - sourceBucketRegion: 'us-east-1', - targetBucket, +const dataShare = new dsf.consumption.RedshiftDataSharing(this, 'RedshiftDataShare', { + redshiftData: dataAccess, + workgroupId: 'XXXXXXXXXXXXXXX', + secret: redshiftAdminSecret, + vpc: redshiftVpc, + subnets: redshiftVpc.selectSubnets({ + subnetGroupName: 'YYYYYYYY' + }), + createInterfaceVpcEndpoint: true, + executionTimeout: Duration.minutes(10), +}); + + const share = dataShare.createShare('ProducerShare', 'default', 'example_share', 'public', ['public.customers']); + + const grantToConsumer = dataShare.grant('GrantToConsumer', { + dataShareName: 'example_share', + databaseName: 'default', + autoAuthorized: true, + accountId: "", + dataShareArn: '', + }); + +dataShare.createDatabaseFromShare('ProducerShare', { + consumerNamespaceArn: '', + newDatabaseName: 'db_from_share', + databaseName: 'default', + dataShareName: 'example_share', + dataShareArn: '', + accountId: "", }); ``` -#### Initializers +#### Initializers ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' +import { consumption } from '@cdklabs/aws-data-solutions-framework' -new utils.S3DataCopy(scope: Construct, id: string, props: S3DataCopyProps) +new consumption.RedshiftDataSharing(scope: Construct, id: string, props: RedshiftDataSharingProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.utils.S3DataCopyProps | *No description.* | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct --- -##### `id`Required +##### `id`Required - *Type:* string --- -##### `props`Required +##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.utils.S3DataCopyProps +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingProps --- @@ -8962,12 +8621,15 @@ new utils.S3DataCopy(scope: Construct, id: string, props: S3DataCopyProps) | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | +| createDatabaseFromShare | Consume datashare by creating a new database pointing to the share. | +| createShare | Create a new datashare. | +| grant | Create a datashare grant to a namespace if it's in the same account, or to another account. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -8975,7 +8637,7 @@ public toString(): string Returns a string representation of this construct. -##### `retrieveVersion` +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -8983,20 +8645,121 @@ public retrieveVersion(): any Retrieve DSF package.json version. -#### Static Functions +##### `createDatabaseFromShare` -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | +```typescript +public createDatabaseFromShare(id: string, props: RedshiftDataSharingCreateDbProps): RedshiftDataSharingCreateDbFromShareProps +``` ---- +Consume datashare by creating a new database pointing to the share. -##### `isConstruct` +If datashare is coming from a different account, setting `autoAssociate` to true +automatically associates the datashare to the cluster before the new database is created. + +###### `id`Required + +- *Type:* string + +the CDK ID of the resource. + +--- + +###### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingCreateDbProps + +`RedshiftDataSharingCreateDbProps`. + +--- + +##### `createShare` ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' +public createShare(id: string, databaseName: string, dataShareName: string, schema: string, tables: string[]): RedshiftNewShareProps +``` -utils.S3DataCopy.isConstruct(x: any) +Create a new datashare. + +###### `id`Required + +- *Type:* string + +the CDK ID of the resource. + +--- + +###### `databaseName`Required + +- *Type:* string + +The name of the database to connect to. + +--- + +###### `dataShareName`Required + +- *Type:* string + +The name of the datashare. + +--- + +###### `schema`Required + +- *Type:* string + +The schema to add in the datashare. + +--- + +###### `tables`Required + +- *Type:* string[] + +The list of tables that would be included in the datashare. + +This must follow the format: `.` + +--- + +##### `grant` + +```typescript +public grant(id: string, props: RedshiftDataSharingGrantProps): RedshiftDataSharingGrantedProps +``` + +Create a datashare grant to a namespace if it's in the same account, or to another account. + +###### `id`Required + +- *Type:* string + +the CDK ID of the resource. + +--- + +###### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataSharingGrantProps + +`RedshiftDataSharingGrantProps`. + +--- + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | + +--- + +##### `isConstruct` + +```typescript +import { consumption } from '@cdklabs/aws-data-solutions-framework' + +consumption.RedshiftDataSharing.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -9015,7 +8778,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -9027,18 +8790,23 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| copyFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the copy. | -| copyLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the S3 data copy. | -| copyRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the copy Lambba Function. | -| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the S3 data copy cleaning up lambda. | -| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the S3 data copy cleaning up lambda. | -| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the S3 data copy cleaning up lambda. | -| securityGroups | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The list of EC2 Security Groups used by the Lambda Functions. | +| node | constructs.Node | The tree node. | +| customResourceSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the Custom Resource when deployed in a VPC. | +| vpcEndpoint | aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint | The created Redshift Data API interface vpc endpoint when deployed in a VPC. | +| vpcEndpointSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The Security Group used by the VPC Endpoint when deployed in a VPC. | +| dataAccessTargetProps | @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps | Contains normalized details of the target Redshift cluster/workgroup for data access. | +| executionRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Data API execution. | +| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data Sharing status checks. | +| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing status checks. | +| submitFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Data Sharing submission. | +| submitLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing submission. | +| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the cleaning up lambda. | +| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the Redshift Data Sharing cleaning up lambda. | +| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the cleaning up lambda. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -9050,87 +8818,147 @@ The tree node. --- -##### `copyFunction`Required +##### `customResourceSecurityGroup`Optional ```typescript -public readonly copyFunction: IFunction; +public readonly customResourceSecurityGroup: ISecurityGroup; ``` -- *Type:* aws-cdk-lib.aws_lambda.IFunction +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -The Lambda Function for the copy. +The Security Group used by the Custom Resource when deployed in a VPC. --- -##### `copyLogGroup`Required +##### `vpcEndpoint`Optional ```typescript -public readonly copyLogGroup: ILogGroup; +public readonly vpcEndpoint: IInterfaceVpcEndpoint; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint -The CloudWatch Log Group for the S3 data copy. +The created Redshift Data API interface vpc endpoint when deployed in a VPC. --- -##### `copyRole`Required +##### `vpcEndpointSecurityGroup`Optional ```typescript -public readonly copyRole: IRole; +public readonly vpcEndpointSecurityGroup: ISecurityGroup; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup + +The Security Group used by the VPC Endpoint when deployed in a VPC. + +--- + +##### `dataAccessTargetProps`Required + +```typescript +public readonly dataAccessTargetProps: RedshiftDataAccessTargetProps; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftDataAccessTargetProps + +Contains normalized details of the target Redshift cluster/workgroup for data access. + +--- + +##### `executionRole`Required + +```typescript +public readonly executionRole: IRole; ``` - *Type:* aws-cdk-lib.aws_iam.IRole -The IAM Role for the copy Lambba Function. +The IAM Role for the Redshift Data API execution. --- -##### `cleanUpFunction`Optional +##### `statusFunction`Required ```typescript -public readonly cleanUpFunction: IFunction; +public readonly statusFunction: IFunction; ``` - *Type:* aws-cdk-lib.aws_lambda.IFunction -The Lambda function for the S3 data copy cleaning up lambda. +The Lambda Function for the Redshift Data Sharing status checks. --- -##### `cleanUpLogGroup`Optional +##### `statusLogGroup`Required ```typescript -public readonly cleanUpLogGroup: ILogGroup; +public readonly statusLogGroup: ILogGroup; ``` - *Type:* aws-cdk-lib.aws_logs.ILogGroup -The CloudWatch Log Group for the S3 data copy cleaning up lambda. +The CloudWatch Log Group for the Redshift Data Sharing status checks. --- -##### `cleanUpRole`Optional +##### `submitFunction`Required ```typescript -public readonly cleanUpRole: IRole; +public readonly submitFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The IAM Role for the the S3 data copy cleaning up lambda. +The Lambda Function for the Redshift Data Sharing submission. --- -##### `securityGroups`Optional +##### `submitLogGroup`Required ```typescript -public readonly securityGroups: ISecurityGroup[]; +public readonly submitLogGroup: ILogGroup; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -The list of EC2 Security Groups used by the Lambda Functions. +The CloudWatch Log Group for the Redshift Data Sharing submission. + +--- + +##### `cleanUpFunction`Optional + +```typescript +public readonly cleanUpFunction: IFunction; +``` + +- *Type:* aws-cdk-lib.aws_lambda.IFunction + +The Lambda function for the cleaning up lambda. + +--- + +##### `cleanUpLogGroup`Optional + +```typescript +public readonly cleanUpLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the Redshift Data Sharing cleaning up lambda. + +--- + +##### `cleanUpRole`Optional + +```typescript +public readonly cleanUpRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role for the the cleaning up lambda. --- @@ -9138,12 +8966,12 @@ The list of EC2 Security Groups used by the Lambda Functions. | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -9153,7 +8981,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -9163,99 +8991,51 @@ public readonly DSF_TRACKING_CODE: string; --- -### SparkEmrCICDPipeline +### RedshiftServerlessNamespace -A CICD Pipeline to test and deploy a Spark application on Amazon EMR in cross-account environments using CDK Pipelines. - -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-cicd-pipeline](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-cicd-pipeline) +Create a Redshift Serverless Namespace with the admin credentials stored in Secrets Manager. *Example* ```typescript -import { Bucket } from 'aws-cdk-lib/aws-s3'; -import { CodePipelineSource } from 'aws-cdk-lib/pipelines'; - -interface MyApplicationStackProps extends cdk.StackProps { - readonly stage: dsf.utils.CICDStage; -} - -class MyApplicationStack extends cdk.Stack { - constructor(scope: cdk.Stack, props?: MyApplicationStackProps) { - super(scope, 'MyApplicationStack'); - const bucket = new Bucket(this, 'TestBucket', { - autoDeleteObjects: true, - removalPolicy: cdk.RemovalPolicy.DESTROY, - }); - new cdk.CfnOutput(this, 'BucketName', { value: bucket.bucketName }); - } -} - -class MyStackFactory implements dsf.utils.ApplicationStackFactory { - createStack(scope: cdk.Stack, stage: dsf.utils.CICDStage): cdk.Stack { - return new MyApplicationStack(scope, { stage }); - } -} - -class MyCICDStack extends cdk.Stack { - constructor(scope: Construct, id: string) { - super(scope, id); - - new dsf.processing.SparkEmrCICDPipeline(this, 'TestConstruct', { - sparkApplicationName: 'test', - applicationStackFactory: new MyStackFactory(), - cdkApplicationPath: 'cdk/', - sparkApplicationPath: 'spark/', - sparkImage: dsf.processing.SparkImage.EMR_6_12, - integTestScript: 'cdk/integ-test.sh', - integTestEnv: { - TEST_BUCKET: 'BucketName', - }, - source: CodePipelineSource.connection('owner/weekly-job', 'mainline', { - connectionArn: 'arn:aws:codeconnections:eu-west-1:123456789012:connection/aEXAMPLE-8aad-4d5d-8878-dfcab0bc441f' - }), - }); -} -} +const namespace = new dsf.consumption.RedshiftServerlessNamespace(this, 'DefaultServerlessNamespace', { + dbName: 'defaultdb', + name: 'default' +}); ``` -#### Initializers +#### Initializers ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { consumption } from '@cdklabs/aws-data-solutions-framework' -new processing.SparkEmrCICDPipeline(scope: Construct, id: string, props: SparkEmrCICDPipelineProps) +new consumption.RedshiftServerlessNamespace(scope: Construct, id: string, props: RedshiftServerlessNamespaceProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrCICDPipelineProps | the SparkCICDPipelineProps properties. | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespaceProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct -the Scope of the CDK Construct. - --- -##### `id`Required +##### `id`Required - *Type:* string -the ID of the CDK Construct. - --- -##### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrCICDPipelineProps +##### `props`Required -the SparkCICDPipelineProps properties. +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespaceProps --- @@ -9263,12 +9043,12 @@ the SparkCICDPipelineProps properties. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -9276,7 +9056,7 @@ public toString(): string Returns a string representation of this construct. -##### `retrieveVersion` +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -9288,16 +9068,16 @@ Retrieve DSF package.json version. | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { consumption } from '@cdklabs/aws-data-solutions-framework' -processing.SparkEmrCICDPipeline.isConstruct(x: any) +consumption.RedshiftServerlessNamespace.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -9316,7 +9096,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -9328,16 +9108,26 @@ Any object. | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| artifactAccessLogsBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the access logs on the artifact S3 Bucket. | -| artifactBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the artifacts. | -| pipeline | aws-cdk-lib.pipelines.CodePipeline | The CodePipeline created as part of the Spark CICD Pipeline. | -| pipelineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for storing the CodePipeline logs. | -| integrationTestStage | aws-cdk-lib.pipelines.CodeBuildStep | The CodeBuild Step for the staging stage. | +| node | constructs.Node | The tree node. | +| adminSecret | aws-cdk-lib.aws_secretsmanager.ISecret | The created Secrets Manager secret containing the admin credentials. | +| adminSecretKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt the admin credentials secret. | +| createFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the Redshift Serverless creation. | +| createLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs Log Group for the Redshift Serverless creation. | +| createRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the Redshift Serverless creation. | +| customResource | aws-cdk-lib.CustomResource | The custom resource that creates the Namespace. | +| dataKey | aws-cdk-lib.aws_kms.Key | KMS key used by the namespace to encrypt the data. | +| dbName | string | The name of the database. | +| namespaceArn | string | The ARN of the created namespace. | +| namespaceId | string | The ID of the created namespace. | +| namespaceName | string | The name of the created namespace. | +| roles | {[ key: string ]: aws-cdk-lib.aws_iam.IRole} | The roles attached to the namespace in the form of `{RoleArn: IRole}`. | +| statusFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the creation status check. | +| statusLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs Log Group for the creation status check. | +| statusRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the creation status check. | --- -##### `node`Required +##### `node`Required ```typescript public readonly node: Node; @@ -9349,360 +9139,297 @@ The tree node. --- -##### `artifactAccessLogsBucket`Required +##### `adminSecret`Required ```typescript -public readonly artifactAccessLogsBucket: IBucket; +public readonly adminSecret: ISecret; ``` -- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret -The S3 Bucket for storing the access logs on the artifact S3 Bucket. +The created Secrets Manager secret containing the admin credentials. --- -##### `artifactBucket`Required +##### `adminSecretKey`Required ```typescript -public readonly artifactBucket: IBucket; +public readonly adminSecretKey: IKey; ``` -- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Type:* aws-cdk-lib.aws_kms.IKey -The S3 Bucket for storing the artifacts. +The KMS Key used to encrypt the admin credentials secret. --- -##### `pipeline`Required +##### `createFunction`Required ```typescript -public readonly pipeline: CodePipeline; +public readonly createFunction: IFunction; ``` -- *Type:* aws-cdk-lib.pipelines.CodePipeline +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The CodePipeline created as part of the Spark CICD Pipeline. +The Lambda Function for the Redshift Serverless creation. --- -##### `pipelineLogGroup`Required +##### `createLogGroup`Required ```typescript -public readonly pipelineLogGroup: ILogGroup; +public readonly createLogGroup: ILogGroup; ``` - *Type:* aws-cdk-lib.aws_logs.ILogGroup -The CloudWatch Log Group for storing the CodePipeline logs. +The CloudWatch Logs Log Group for the Redshift Serverless creation. --- -##### `integrationTestStage`Optional +##### `createRole`Required ```typescript -public readonly integrationTestStage: CodeBuildStep; +public readonly createRole: IRole; ``` -- *Type:* aws-cdk-lib.pipelines.CodeBuildStep +- *Type:* aws-cdk-lib.aws_iam.IRole -The CodeBuild Step for the staging stage. +The IAM Role for the Redshift Serverless creation. --- -#### Constants +##### `customResource`Required -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +```typescript +public readonly customResource: CustomResource; +``` + +- *Type:* aws-cdk-lib.CustomResource + +The custom resource that creates the Namespace. --- -##### `DSF_OWNED_TAG`Required +##### `dataKey`Required ```typescript -public readonly DSF_OWNED_TAG: string; +public readonly dataKey: Key; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_kms.Key + +KMS key used by the namespace to encrypt the data. --- -##### `DSF_TRACKING_CODE`Required +##### `dbName`Required ```typescript -public readonly DSF_TRACKING_CODE: string; +public readonly dbName: string; ``` - *Type:* string +The name of the database. + --- -### SparkEmrContainersJob +##### `namespaceArn`Required -A construct to run Spark Jobs using EMR Container runtime (EMR on EKS). +```typescript +public readonly namespaceArn: string; +``` -It creates a Step Functions State Machine that orchestrates the Spark Job. +- *Type:* string -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job) +The ARN of the created namespace. -*Example* +--- + +##### `namespaceId`Required ```typescript -import { JsonPath } from 'aws-cdk-lib/aws-stepfunctions'; +public readonly namespaceId: string; +``` -const job = new dsf.processing.SparkEmrContainersJob(this, 'SparkJob', { - jobConfig:{ - "Name": JsonPath.format('ge_profile-{}', JsonPath.uuid()), - "VirtualClusterId": "virtualClusterId", - "ExecutionRoleArn": "ROLE-ARN", - "JobDriver": { - "SparkSubmit": { - "EntryPoint": "s3://S3-BUCKET/pi.py", - "EntryPointArguments": [], - "SparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=4" - }, - } - } -} as dsf.processing.SparkEmrContainersJobApiProps); +- *Type:* string -new cdk.CfnOutput(this, 'SparkJobStateMachine', { - value: job.stateMachine!.stateMachineArn, -}); -``` +The ID of the created namespace. +--- -#### Initializers +##### `namespaceName`Required ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -new processing.SparkEmrContainersJob(scope: Construct, id: string, props: SparkEmrContainersJobProps | SparkEmrContainersJobApiProps) +public readonly namespaceName: string; ``` -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobProps \| @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobApiProps | *No description.* | +- *Type:* string + +The name of the created namespace. --- -##### `scope`Required +##### `roles`Required -- *Type:* constructs.Construct +```typescript +public readonly roles: {[ key: string ]: IRole}; +``` ---- +- *Type:* {[ key: string ]: aws-cdk-lib.aws_iam.IRole} -##### `id`Required +The roles attached to the namespace in the form of `{RoleArn: IRole}`. -- *Type:* string +These roles are used to access other AWS services for ingestion, federated query, and data catalog access. + +> [https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-authentication-access-control.html](https://docs.aws.amazon.com/redshift/latest/mgmt/redshift-iam-authentication-access-control.html) --- -##### `props`Required +##### `statusFunction`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobProps | @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobApiProps +```typescript +public readonly statusFunction: IFunction; +``` ---- +- *Type:* aws-cdk-lib.aws_lambda.IFunction -#### Methods - -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +The Lambda Function for the creation status check. --- -##### `toString` - -```typescript -public toString(): string -``` - -Returns a string representation of this construct. - -##### `retrieveVersion` +##### `statusLogGroup`Required ```typescript -public retrieveVersion(): any +public readonly statusLogGroup: ILogGroup; ``` -Retrieve DSF package.json version. - -#### Static Functions +- *Type:* aws-cdk-lib.aws_logs.ILogGroup -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | +The CloudWatch Logs Log Group for the creation status check. --- -##### `isConstruct` +##### `statusRole`Required ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkEmrContainersJob.isConstruct(x: any) +public readonly statusRole: IRole; ``` -Checks if `x` is a construct. - -Use this method instead of `instanceof` to properly detect `Construct` -instances, even when the construct library is symlinked. - -Explanation: in JavaScript, multiple copies of the `constructs` library on -disk are seen as independent, completely different libraries. As a -consequence, the class `Construct` in each copy of the `constructs` library -is seen as a different class, and an instance of one class will not test as -`instanceof` the other class. `npm install` will not create installations -like this, but users may manually symlink construct libraries together or -use a monorepo tool: in those cases, multiple copies of the `constructs` -library can be accidentally installed, and `instanceof` will behave -unpredictably. It is safest to avoid using `instanceof`, and using -this type-testing method instead. - -###### `x`Required - -- *Type:* any +- *Type:* aws-cdk-lib.aws_iam.IRole -Any object. +The IAM Role for the creation status check. --- -#### Properties +#### Constants | **Name** | **Type** | **Description** | | --- | --- | --- | -| node | constructs.Node | The tree node. | -| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | -| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `node`Required +##### `DSF_OWNED_TAG`Required ```typescript -public readonly node: Node; +public readonly DSF_OWNED_TAG: string; ``` -- *Type:* constructs.Node - -The tree node. +- *Type:* string --- -##### `stateMachine`Optional +##### `DSF_TRACKING_CODE`Required ```typescript -public readonly stateMachine: StateMachine; +public readonly DSF_TRACKING_CODE: string; ``` -- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine - -The Step Functions State Machine created to orchestrate the Spark Job. +- *Type:* string --- -##### `stateMachineLogGroup`Optional - -```typescript -public readonly stateMachineLogGroup: ILogGroup; -``` - -- *Type:* aws-cdk-lib.aws_logs.ILogGroup - -The CloudWatch Log Group used by the State Machine. - ---- +### RedshiftServerlessWorkgroup -#### Constants +- *Implements:* aws-cdk-lib.aws_ec2.IConnectable -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +Create a Redshift Serverless Workgroup. ---- +A default namespace would be created if none is provided. -##### `DSF_OWNED_TAG`Required +*Example* ```typescript -public readonly DSF_OWNED_TAG: string; +const workgroup = new dsf.consumption.RedshiftServerlessWorkgroup(this, "RedshiftWorkgroup", { + name: "example-workgroup", + namespace: new dsf.consumption.RedshiftServerlessNamespace(this, "RedshiftNamespace", { + name: 'example-namespace', + dbName: 'defaultdb', + }) +}); ``` -- *Type:* string - ---- -##### `DSF_TRACKING_CODE`Required +#### Initializers ```typescript -public readonly DSF_TRACKING_CODE: string; +import { consumption } from '@cdklabs/aws-data-solutions-framework' + +new consumption.RedshiftServerlessWorkgroup(scope: Construct, id: string, props: RedshiftServerlessWorkgroupProps) ``` -- *Type:* string +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessWorkgroupProps | *No description.* | --- -### SparkEmrContainersRuntime - -A construct to create an EKS cluster, configure it and enable it with EMR on EKS. - -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-containers-runtime](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-containers-runtime) - -*Example* +##### `scope`Required -```typescript -import { ManagedPolicy, PolicyDocument, PolicyStatement } from 'aws-cdk-lib/aws-iam'; -import { KubectlV30Layer } from '@aws-cdk/lambda-layer-kubectl-v30'; +- *Type:* constructs.Construct -const kubectlLayer = new KubectlV30Layer(this, 'kubectlLayer'); +--- -const emrEksCluster = dsf.processing.SparkEmrContainersRuntime.getOrCreate(this, { - publicAccessCIDRs: ['10.0.0.0/16'], - kubectlLambdaLayer: kubectlLayer, -}); +##### `id`Required -const virtualCluster = emrEksCluster.addEmrVirtualCluster(this, { - name: 'example', - createNamespace: true, - eksNamespace: 'example', -}); +- *Type:* string -const s3Read = new PolicyDocument({ - statements: [new PolicyStatement({ - actions: [ - 's3:GetObject', - ], - resources: ['arn:aws:s3:::aws-data-analytics-workshop'], - })], -}); +--- -const s3ReadPolicy = new ManagedPolicy(this, 's3ReadPolicy', { - document: s3Read, -}); +##### `props`Required -const execRole = emrEksCluster.createExecutionRole(this, 'ExecRole', s3ReadPolicy, 'example', 's3ReadExecRole'); -``` +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessWorkgroupProps +--- #### Methods | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| addEmrVirtualCluster | Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. | -| addInteractiveEndpoint | Creates a new Amazon EMR managed endpoint to be used with Amazon EMR Virtual Cluster . | -| addKarpenterNodePoolAndNodeClass | Apply the provided manifest and add the CDK dependency on EKS cluster. | -| createExecutionRole | Create and configure a new Amazon IAM Role usable as an execution role. | -| retrieveVersion | Retrieve DSF package.json version. | -| uploadPodTemplate | Upload podTemplates to the Amazon S3 location used by the cluster. | +| toString | Returns a string representation of this construct. | +| accessData | Creates an instance of `RedshiftData` to send custom SQLs to the workgroup. | +| assignDbRolesToIAMRole | Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. | +| catalogTables | Creates a new Glue data catalog database with a crawler using JDBC target type to connect to the Redshift Workgroup. | +| createDatabaseFromShare | Consume datashare by creating a new database pointing to the share. | +| createDbRole | Creates a new DB role. | +| createShare | Create a new datashare. | +| grantAccessToShare | Create a datashare grant to a namespace if it's in the same account, or to another account. | +| grantDbAllPrivilegesToRole | Grants both read and write permissions on all the tables in the `schema` to the DB role. | +| grantDbSchemaToRole | Grants access to the schema to the DB role. | +| grantSchemaReadToRole | Grants read permission on all the tables in the `schema` to the DB role. | +| ingestData | Ingest data from S3 into a Redshift table. | +| mergeToTargetTable | Run the `MERGE` query using simplified mode. | +| retrieveVersion | Retrieve DSF package.json version. | +| runCustomSQL | Runs a custom SQL. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -9710,908 +9437,748 @@ public toString(): string Returns a string representation of this construct. -##### `addEmrVirtualCluster` +##### ~~`accessData`~~ ```typescript -public addEmrVirtualCluster(scope: Construct, options: EmrVirtualClusterProps): CfnVirtualCluster +public accessData(id: string, createVpcEndpoint?: boolean, existingInterfaceVPCEndpoint?: IInterfaceVpcEndpoint): RedshiftData ``` -Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. +Creates an instance of `RedshiftData` to send custom SQLs to the workgroup. -###### `scope`Required +###### `id`Required -- *Type:* constructs.Construct +- *Type:* string -of the stack where virtual cluster is deployed. +The CDK ID of the resource. --- -###### `options`Required +###### `createVpcEndpoint`Optional -- *Type:* @cdklabs/aws-data-solutions-framework.processing.EmrVirtualClusterProps +- *Type:* boolean -the EmrVirtualClusterProps [properties]{@link EmrVirtualClusterProps}. +if set to true, create interface VPC endpoint for Redshift Data API. --- -##### `addInteractiveEndpoint` - -```typescript -public addInteractiveEndpoint(scope: Construct, id: string, interactiveSessionOptions: SparkEmrContainersRuntimeInteractiveSessionProps): CustomResource -``` +###### `existingInterfaceVPCEndpoint`Optional -Creates a new Amazon EMR managed endpoint to be used with Amazon EMR Virtual Cluster . +- *Type:* aws-cdk-lib.aws_ec2.IInterfaceVpcEndpoint -CfnOutput can be customized. +if `createVpcEndpoint` is false, and if this is populated, then the Lambda function's security group would be added in the existing VPC endpoint's security group. -###### `scope`Required +--- -- *Type:* constructs.Construct +##### `assignDbRolesToIAMRole` -the scope of the stack where managed endpoint is deployed. +```typescript +public assignDbRolesToIAMRole(dbRoles: string[], targetRole: IRole): void +``` ---- +Assigns Redshift DB roles to IAM role vs the `RedshiftDbRoles` tag. -###### `id`Required +###### `dbRoles`Required -- *Type:* string +- *Type:* string[] -the CDK id for endpoint. +List of Redshift DB roles to assign to IAM role. --- -###### `interactiveSessionOptions`Required +###### `targetRole`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersRuntimeInteractiveSessionProps +- *Type:* aws-cdk-lib.aws_iam.IRole -the EmrManagedEndpointOptions to configure the Amazon EMR managed endpoint. +The IAM role to assign the Redshift DB roles to. --- -##### `addKarpenterNodePoolAndNodeClass` +##### `catalogTables` ```typescript -public addKarpenterNodePoolAndNodeClass(id: string, manifest: any): any +public catalogTables(id: string, catalogDbName: string, pathToCrawl?: string): DataCatalogDatabase ``` -Apply the provided manifest and add the CDK dependency on EKS cluster. +Creates a new Glue data catalog database with a crawler using JDBC target type to connect to the Redshift Workgroup. -###### `id`Required +###### `id`Required - *Type:* string -the unique ID of the CDK resource. +The CDK ID of the resource. --- -###### `manifest`Required +###### `catalogDbName`Required -- *Type:* any +- *Type:* string -The manifest to apply. +The name of the Glue Database to create. -You can use the Utils class that offers method to read yaml file and load it as a manifest +--- + +###### `pathToCrawl`Optional + +- *Type:* string + +The path of Redshift tables to crawl. --- -##### `createExecutionRole` +##### `createDatabaseFromShare` ```typescript -public createExecutionRole(scope: Construct, id: string, policy: IManagedPolicy, eksNamespace: string, name: string): Role +public createDatabaseFromShare(id: string, newDatabaseName: string, producerDataShareName: string, producerNamespaceId?: string, producerAccountId?: string): RedshiftDataSharingCreateDbFromShareProps ``` -Create and configure a new Amazon IAM Role usable as an execution role. +Consume datashare by creating a new database pointing to the share. -This method makes the created role assumed by the Amazon EKS cluster Open ID Connect provider. +If datashare is coming from a different account, setting `autoAssociate` to true +automatically associates the datashare to the cluster before the new database is created. -###### `scope`Required +###### `id`Required -- *Type:* constructs.Construct +- *Type:* string -of the IAM role. +The CDK ID of the resource. --- -###### `id`Required +###### `newDatabaseName`Required - *Type:* string -of the CDK resource to be created, it should be unique across the stack. +The name of the database that would be created from the data share. --- -###### `policy`Required +###### `producerDataShareName`Required -- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy +- *Type:* string -the execution policy to attach to the role. +The name of the data share from producer. --- -###### `eksNamespace`Required +###### `producerNamespaceId`Optional - *Type:* string -The namespace from which the role is going to be used. - -MUST be the same as the namespace of the Virtual Cluster from which the job is submitted +The producer cluster namespace. --- -###### `name`Required +###### `producerAccountId`Optional - *Type:* string -Name to use for the role, required and is used to scope the iam role. - ---- - -##### `retrieveVersion` +The producer account ID. -```typescript -public retrieveVersion(): any -``` +Required for cross account shares. -Retrieve DSF package.json version. +--- -##### `uploadPodTemplate` +##### `createDbRole` ```typescript -public uploadPodTemplate(id: string, filePath: string): void +public createDbRole(id: string, databaseName: string, roleName: string): CustomResource ``` -Upload podTemplates to the Amazon S3 location used by the cluster. +Creates a new DB role. -###### `id`Required +###### `id`Required - *Type:* string -the unique ID of the CDK resource. +The CDK Construct ID. --- -###### `filePath`Required +###### `databaseName`Required - *Type:* string -The local path of the yaml podTemplate files to upload. +The name of the database to run this command. --- -#### Static Functions +###### `roleName`Required -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | -| getOrCreate | Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. | -| grantStartJobExecution | A static method granting the right to start and monitor a job to an IAM Role. | +- *Type:* string + +The name of the role to create. --- -##### `isConstruct` +##### `createShare` ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkEmrContainersRuntime.isConstruct(x: any) +public createShare(id: string, databaseName: string, dataShareName: string, schema: string, tables: string[]): RedshiftNewShareProps ``` -Checks if `x` is a construct. +Create a new datashare. -Use this method instead of `instanceof` to properly detect `Construct` -instances, even when the construct library is symlinked. +###### `id`Required -Explanation: in JavaScript, multiple copies of the `constructs` library on -disk are seen as independent, completely different libraries. As a -consequence, the class `Construct` in each copy of the `constructs` library -is seen as a different class, and an instance of one class will not test as -`instanceof` the other class. `npm install` will not create installations -like this, but users may manually symlink construct libraries together or -use a monorepo tool: in those cases, multiple copies of the `constructs` -library can be accidentally installed, and `instanceof` will behave -unpredictably. It is safest to avoid using `instanceof`, and using -this type-testing method instead. +- *Type:* string -###### `x`Required +The CDK ID of the resource. -- *Type:* any +--- -Any object. +###### `databaseName`Required + +- *Type:* string + +The name of the database to connect to. --- -##### `getOrCreate` +###### `dataShareName`Required -```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +- *Type:* string -processing.SparkEmrContainersRuntime.getOrCreate(scope: Construct, props: SparkEmrContainersRuntimeProps) -``` +The name of the datashare. -Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. +--- -###### `scope`Required +###### `schema`Required -- *Type:* constructs.Construct +- *Type:* string -the CDK scope used to search or create the cluster. +The schema to add in the datashare. --- -###### `props`Required +###### `tables`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersRuntimeProps +- *Type:* string[] -the EmrEksClusterProps [properties]{@link EmrEksClusterProps } if created. +The list of tables that would be included in the datashare. + +This must follow the format: `.` --- -##### `grantStartJobExecution` +##### `grantAccessToShare` ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkEmrContainersRuntime.grantStartJobExecution(startJobRole: IRole, executionRoleArn: string[], virtualClusterArn: string) +public grantAccessToShare(id: string, dataShareDetails: RedshiftNewShareProps, consumerNamespaceId?: string, consumerAccountId?: string, autoAuthorized?: boolean): RedshiftDataSharingGrantedProps ``` -A static method granting the right to start and monitor a job to an IAM Role. - -The method will scope the following actions `DescribeJobRun`, `TagResource` and `ListJobRuns` to the provided virtual cluster. -It will also scope `StartJobRun` as defined in the -[EMR on EKS official documentation](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/iam-execution-role.html) +Create a datashare grant to a namespace if it's in the same account, or to another account. -###### `startJobRole`Required +###### `id`Required -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* string -the role that will call the start job api and which needs to have the iam:PassRole permission. +The CDK ID of the resource. --- -###### `executionRoleArn`Required +###### `dataShareDetails`Required -- *Type:* string[] +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps -the role used by EMR on EKS to access resources during the job execution. +The details of the datashare. --- -###### `virtualClusterArn`Required +###### `consumerNamespaceId`Optional - *Type:* string -the EMR Virtual Cluster ARN to which the job is submitted. +The namespace of the consumer that you're sharing to. + +Either namespace or account Id must be provided. --- -#### Properties +###### `consumerAccountId`Optional -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| node | constructs.Node | The tree node. | -| ec2InstanceNodeGroupRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. | -| eksCluster | aws-cdk-lib.aws_eks.Cluster | The EKS cluster created by the construct if it is not provided. | -| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC used by the EKS cluster. | -| assetBucket | aws-cdk-lib.aws_s3.IBucket | The bucket holding podtemplates referenced in the configuration override for the job. | -| assetUploadBucketRole | aws-cdk-lib.aws_iam.IRole | The IAM role used to upload assets (pod templates) on S3. | -| awsNodeRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by IRSA for the aws-node daemonset. | -| criticalDefaultConfig | string | The configuration override for the spark application to use with the default nodes for criticale jobs. | -| csiDriverIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM Role created for the EBS CSI controller. | -| eksSecretKmsKey | aws-cdk-lib.aws_kms.IKey | The KMS key used for storing EKS secrets. | -| emrServiceRole | aws-cdk-lib.aws_iam.CfnServiceLinkedRole | The Service Linked role created for EMR. | -| flowLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the VPC flow log when the VPC is created. | -| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used for the VPC flow logs when the VPC is created. | -| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used for the VPC flow logs when the VPC is created. | -| karpenterEventRules | aws-cdk-lib.aws_events.IRule[] | The rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. | -| karpenterIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM role created for the Karpenter controller. | -| karpenterQueue | aws-cdk-lib.aws_sqs.IQueue | The SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. | -| karpenterSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The security group used by the EC2NodeClass of the default nodes. | -| notebookDefaultConfig | any | The configuration override for the spark application to use with the default nodes dedicated for notebooks. | -| podTemplateS3LocationCriticalDriver | string | The S3 location holding the driver pod tempalte for critical nodes. | -| podTemplateS3LocationCriticalExecutor | string | The S3 location holding the executor pod tempalte for critical nodes. | -| podTemplateS3LocationDriverShared | string | The S3 location holding the driver pod tempalte for shared nodes. | -| podTemplateS3LocationExecutorShared | string | The S3 location holding the executor pod tempalte for shared nodes. | -| podTemplateS3LocationNotebookDriver | string | The S3 location holding the driver pod tempalte for interactive sessions. | -| podTemplateS3LocationNotebookExecutor | string | The S3 location holding the executor pod tempalte for interactive sessions. | -| s3VpcEndpoint | aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint | The S3 VPC endpoint attached to the private subnets of the VPC when VPC is created. | -| sharedDefaultConfig | string | The configuration override for the spark application to use with the default nodes for none criticale jobs. | +- *Type:* string ---- +The account ID of the consumer that you're sharing to. -##### `node`Required +Either namespace or account Id must be provided. -```typescript -public readonly node: Node; -``` +--- -- *Type:* constructs.Node +###### `autoAuthorized`Optional -The tree node. +- *Type:* boolean --- -##### `ec2InstanceNodeGroupRole`Required +##### `grantDbAllPrivilegesToRole` ```typescript -public readonly ec2InstanceNodeGroupRole: IRole; +public grantDbAllPrivilegesToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +Grants both read and write permissions on all the tables in the `schema` to the DB role. -The IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. +###### `id`Required ---- +- *Type:* string -##### `eksCluster`Required +The CDK Construct ID. -```typescript -public readonly eksCluster: Cluster; -``` +--- -- *Type:* aws-cdk-lib.aws_eks.Cluster +###### `databaseName`Required -The EKS cluster created by the construct if it is not provided. +- *Type:* string ---- +The name of the database to run this command. -##### `vpc`Required +--- -```typescript -public readonly vpc: IVpc; -``` +###### `schema`Required -- *Type:* aws-cdk-lib.aws_ec2.IVpc +- *Type:* string -The VPC used by the EKS cluster. +The schema where the tables are located in. --- -##### `assetBucket`Optional - -```typescript -public readonly assetBucket: IBucket; -``` +###### `roleName`Required -- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Type:* string -The bucket holding podtemplates referenced in the configuration override for the job. +The DB role to grant the permissions to. --- -##### `assetUploadBucketRole`Optional +##### `grantDbSchemaToRole` ```typescript -public readonly assetUploadBucketRole: IRole; +public grantDbSchemaToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +Grants access to the schema to the DB role. -The IAM role used to upload assets (pod templates) on S3. +###### `id`Required ---- +- *Type:* string -##### `awsNodeRole`Optional +The CDK Construct ID. -```typescript -public readonly awsNodeRole: IRole; -``` +--- -- *Type:* aws-cdk-lib.aws_iam.IRole +###### `databaseName`Required -The IAM Role used by IRSA for the aws-node daemonset. +- *Type:* string ---- +The name of the database to run this command. -##### `criticalDefaultConfig`Optional +--- -```typescript -public readonly criticalDefaultConfig: string; -``` +###### `schema`Required - *Type:* string -The configuration override for the spark application to use with the default nodes for criticale jobs. +The schema where the tables are located in. --- -##### `csiDriverIrsaRole`Optional - -```typescript -public readonly csiDriverIrsaRole: IRole; -``` +###### `roleName`Required -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* string -The IAM Role created for the EBS CSI controller. +The DB role to grant the permissions to. --- -##### `eksSecretKmsKey`Optional +##### `grantSchemaReadToRole` ```typescript -public readonly eksSecretKmsKey: IKey; +public grantSchemaReadToRole(id: string, databaseName: string, schema: string, roleName: string): CustomResource ``` -- *Type:* aws-cdk-lib.aws_kms.IKey +Grants read permission on all the tables in the `schema` to the DB role. -The KMS key used for storing EKS secrets. +###### `id`Required ---- +- *Type:* string -##### `emrServiceRole`Optional +--- -```typescript -public readonly emrServiceRole: CfnServiceLinkedRole; -``` +###### `databaseName`Required -- *Type:* aws-cdk-lib.aws_iam.CfnServiceLinkedRole +- *Type:* string -The Service Linked role created for EMR. +The name of the database to run this command. --- -##### `flowLogGroup`Optional - -```typescript -public readonly flowLogGroup: ILogGroup; -``` +###### `schema`Required -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* string -The CloudWatch Log Group for the VPC flow log when the VPC is created. +The schema where the tables are located in. --- -##### `flowLogKey`Optional - -```typescript -public readonly flowLogKey: IKey; -``` +###### `roleName`Required -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* string -The KMS Key used for the VPC flow logs when the VPC is created. +The DB role to grant the permissions to. --- -##### `flowLogRole`Optional +##### `ingestData` ```typescript -public readonly flowLogRole: IRole; +public ingestData(id: string, databaseName: string, targetTable: string, sourceBucket: IBucket, sourcePrefix: string, ingestAdditionalOptions?: string, role?: IRole): CustomResource ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +Ingest data from S3 into a Redshift table. -The IAM Role used for the VPC flow logs when the VPC is created. +###### `id`Required ---- +- *Type:* string -##### `karpenterEventRules`Optional +The CDK Construct ID. -```typescript -public readonly karpenterEventRules: IRule[]; -``` +--- -- *Type:* aws-cdk-lib.aws_events.IRule[] +###### `databaseName`Required -The rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. +- *Type:* string ---- +The name of the database to run this command. -##### `karpenterIrsaRole`Optional +--- -```typescript -public readonly karpenterIrsaRole: IRole; -``` +###### `targetTable`Required -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* string -The IAM role created for the Karpenter controller. +The target table to load the data into. --- -##### `karpenterQueue`Optional +###### `sourceBucket`Required -```typescript -public readonly karpenterQueue: IQueue; -``` +- *Type:* aws-cdk-lib.aws_s3.IBucket -- *Type:* aws-cdk-lib.aws_sqs.IQueue +The bucket where the source data would be coming from. -The SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. +--- + +###### `sourcePrefix`Required + +- *Type:* string + +The location inside the bucket where the data would be ingested from. --- -##### `karpenterSecurityGroup`Optional +###### `ingestAdditionalOptions`Optional -```typescript -public readonly karpenterSecurityGroup: ISecurityGroup; -``` +- *Type:* string -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +Optional. -The security group used by the EC2NodeClass of the default nodes. +Additional options to pass to the `COPY` command. For example, `delimiter '|'` or `ignoreheader 1` --- -##### `notebookDefaultConfig`Optional +###### `role`Optional -```typescript -public readonly notebookDefaultConfig: any; -``` +- *Type:* aws-cdk-lib.aws_iam.IRole -- *Type:* any +Optional. -The configuration override for the spark application to use with the default nodes dedicated for notebooks. +The IAM Role to use to access the data in S3. If not provided, it would use the default IAM role configured in the Redshift Namespace --- -##### `podTemplateS3LocationCriticalDriver`Optional +##### `mergeToTargetTable` ```typescript -public readonly podTemplateS3LocationCriticalDriver: string; +public mergeToTargetTable(id: string, databaseName: string, sourceTable: string, targetTable: string, sourceColumnId?: string, targetColumnId?: string): CustomResource ``` +Run the `MERGE` query using simplified mode. + +This command would do an upsert into the target table. + +###### `id`Required + - *Type:* string -The S3 location holding the driver pod tempalte for critical nodes. +The CDK Construct ID. --- -##### `podTemplateS3LocationCriticalExecutor`Optional - -```typescript -public readonly podTemplateS3LocationCriticalExecutor: string; -``` +###### `databaseName`Required - *Type:* string -The S3 location holding the executor pod tempalte for critical nodes. +The name of the database to run this command. --- -##### `podTemplateS3LocationDriverShared`Optional - -```typescript -public readonly podTemplateS3LocationDriverShared: string; -``` +###### `sourceTable`Required - *Type:* string -The S3 location holding the driver pod tempalte for shared nodes. +The source table name. ---- +Schema can also be included using the following format: `schemaName.tableName` -##### `podTemplateS3LocationExecutorShared`Optional +--- -```typescript -public readonly podTemplateS3LocationExecutorShared: string; -``` +###### `targetTable`Required - *Type:* string -The S3 location holding the executor pod tempalte for shared nodes. +The target table name. ---- +Schema can also be included using the following format: `schemaName.tableName` -##### `podTemplateS3LocationNotebookDriver`Optional +--- -```typescript -public readonly podTemplateS3LocationNotebookDriver: string; -``` +###### `sourceColumnId`Optional - *Type:* string -The S3 location holding the driver pod tempalte for interactive sessions. +The column in the source table that's used to determine whether the rows in the `sourceTable` can be matched with rows in the `targetTable`. ---- +Default is `id` -##### `podTemplateS3LocationNotebookExecutor`Optional +--- -```typescript -public readonly podTemplateS3LocationNotebookExecutor: string; -``` +###### `targetColumnId`Optional - *Type:* string -The S3 location holding the executor pod tempalte for interactive sessions. +The column in the target table that's used to determine whether the rows in the `sourceTable` can be matched with rows in the `targetTable`. + +Default is `id` --- -##### `s3VpcEndpoint`Optional +##### `retrieveVersion` ```typescript -public readonly s3VpcEndpoint: IGatewayVpcEndpoint; +public retrieveVersion(): any ``` -- *Type:* aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint - -The S3 VPC endpoint attached to the private subnets of the VPC when VPC is created. - ---- +Retrieve DSF package.json version. -##### `sharedDefaultConfig`Optional +##### `runCustomSQL` ```typescript -public readonly sharedDefaultConfig: string; +public runCustomSQL(id: string, databaseName: string, sql: string, deleteSql?: string): CustomResource ``` +Runs a custom SQL. + +Once the custom resource finishes execution, the attribute `Data` contains an attribute `execId` which contains the Redshift Data API execution ID. You can then use this to retrieve execution results via the `GetStatementResult` API. + +###### `id`Required + - *Type:* string -The configuration override for the spark application to use with the default nodes for none criticale jobs. +The CDK Construct ID. --- -#### Constants +###### `databaseName`Required -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DEFAULT_CLUSTER_NAME | string | The default name of the EKS cluster. | -| DEFAULT_EKS_VERSION | aws-cdk-lib.aws_eks.KubernetesVersion | The default EKS version. | -| DEFAULT_EMR_EKS_VERSION | @cdklabs/aws-data-solutions-framework.processing.EmrContainersRuntimeVersion | The default EMR on EKS version. | -| DEFAULT_VPC_CIDR | string | The default CIDR when the VPC is created. | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +- *Type:* string ---- +The name of the database to run this command. -##### `DEFAULT_CLUSTER_NAME`Required +--- -```typescript -public readonly DEFAULT_CLUSTER_NAME: string; -``` +###### `sql`Required - *Type:* string -The default name of the EKS cluster. +The sql to run. --- -##### `DEFAULT_EKS_VERSION`Required +###### `deleteSql`Optional -```typescript -public readonly DEFAULT_EKS_VERSION: KubernetesVersion; -``` +- *Type:* string -- *Type:* aws-cdk-lib.aws_eks.KubernetesVersion +Optional. -The default EKS version. +The sql to run when this resource gets deleted --- -##### `DEFAULT_EMR_EKS_VERSION`Required +#### Static Functions -```typescript -public readonly DEFAULT_EMR_EKS_VERSION: EmrContainersRuntimeVersion; -``` - -- *Type:* @cdklabs/aws-data-solutions-framework.processing.EmrContainersRuntimeVersion - -The default EMR on EKS version. +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | --- -##### `DEFAULT_VPC_CIDR`Required +##### `isConstruct` ```typescript -public readonly DEFAULT_VPC_CIDR: string; +import { consumption } from '@cdklabs/aws-data-solutions-framework' + +consumption.RedshiftServerlessWorkgroup.isConstruct(x: any) ``` -- *Type:* string +Checks if `x` is a construct. -The default CIDR when the VPC is created. +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. ---- +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. -##### `DSF_OWNED_TAG`Required +###### `x`Required -```typescript -public readonly DSF_OWNED_TAG: string; -``` +- *Type:* any -- *Type:* string +Any object. --- -##### `DSF_TRACKING_CODE`Required - -```typescript -public readonly DSF_TRACKING_CODE: string; -``` +#### Properties -- *Type:* string +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| cfnResource | aws-cdk-lib.aws_redshiftserverless.CfnWorkgroup | The created Redshift Serverless Workgroup. | +| connections | aws-cdk-lib.aws_ec2.Connections | Connections used by Workgroup security group. | +| existingShares | {[ key: string ]: @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps} | Index of existing shares. | +| glueConnection | aws-cdk-lib.aws_glue.CfnConnection | The Glue Connection associated with the workgroup. | +| namespace | @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespace | The associated Redshift Serverless Namespace. | +| primarySecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The primary EC2 Security Group associated with the Redshift Serverless Workgroup. | +| selectedSubnets | aws-cdk-lib.aws_ec2.SelectedSubnets | The subnets where the Redshift Serverless Workgroup is deployed. | +| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC where the Redshift Serverless Workgroup is deployed. | --- -### SparkEmrServerlessJob - -A construct to run Spark Jobs using EMR Serverless. - -Creates a State Machine that orchestrates the Spark Job. - -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job) - -*Example* +##### `node`Required ```typescript -import { PolicyDocument, PolicyStatement } from 'aws-cdk-lib/aws-iam'; -import { JsonPath } from 'aws-cdk-lib/aws-stepfunctions'; - -const myExecutionRole = dsf.processing.SparkEmrServerlessRuntime.createExecutionRole(this, 'execRole1'); -const job = new dsf.processing.SparkEmrServerlessJob(this, 'SparkJob', { - jobConfig:{ - "Name": JsonPath.format('ge_profile-{}', JsonPath.uuid()), - "ApplicationId": "APPLICATION_ID", - "ExecutionRoleArn": myExecutionRole.roleArn, - "JobDriver": { - "SparkSubmit": { - "EntryPoint": "s3://S3-BUCKET/pi.py", - "EntryPointArguments": [], - "SparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=4" - }, - } - } -} as dsf.processing.SparkEmrServerlessJobApiProps); +public readonly node: Node; ``` +- *Type:* constructs.Node -#### Initializers - -```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -new processing.SparkEmrServerlessJob(scope: Construct, id: string, props: SparkEmrServerlessJobProps | SparkEmrServerlessJobApiProps) -``` - -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| scope | constructs.Construct | *No description.* | -| id | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobProps \| @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobApiProps | *No description.* | +The tree node. --- -##### `scope`Required - -- *Type:* constructs.Construct +##### `cfnResource`Required ---- +```typescript +public readonly cfnResource: CfnWorkgroup; +``` -##### `id`Required +- *Type:* aws-cdk-lib.aws_redshiftserverless.CfnWorkgroup -- *Type:* string +The created Redshift Serverless Workgroup. --- -##### `props`Required +##### `connections`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobProps | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobApiProps +```typescript +public readonly connections: Connections; +``` ---- +- *Type:* aws-cdk-lib.aws_ec2.Connections -#### Methods +Connections used by Workgroup security group. -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +Used this to enable access from clients connecting to the workgroup --- -##### `toString` - -```typescript -public toString(): string -``` - -Returns a string representation of this construct. - -##### `retrieveVersion` +##### `existingShares`Required ```typescript -public retrieveVersion(): any +public readonly existingShares: {[ key: string ]: RedshiftNewShareProps}; ``` -Retrieve DSF package.json version. - -#### Static Functions +- *Type:* {[ key: string ]: @cdklabs/aws-data-solutions-framework.consumption.RedshiftNewShareProps} -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | +Index of existing shares. --- -##### `isConstruct` +##### `glueConnection`Required ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkEmrServerlessJob.isConstruct(x: any) +public readonly glueConnection: CfnConnection; ``` -Checks if `x` is a construct. - -Use this method instead of `instanceof` to properly detect `Construct` -instances, even when the construct library is symlinked. - -Explanation: in JavaScript, multiple copies of the `constructs` library on -disk are seen as independent, completely different libraries. As a -consequence, the class `Construct` in each copy of the `constructs` library -is seen as a different class, and an instance of one class will not test as -`instanceof` the other class. `npm install` will not create installations -like this, but users may manually symlink construct libraries together or -use a monorepo tool: in those cases, multiple copies of the `constructs` -library can be accidentally installed, and `instanceof` will behave -unpredictably. It is safest to avoid using `instanceof`, and using -this type-testing method instead. - -###### `x`Required - -- *Type:* any - -Any object. - ---- +- *Type:* aws-cdk-lib.aws_glue.CfnConnection -#### Properties +The Glue Connection associated with the workgroup. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| node | constructs.Node | The tree node. | -| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | -| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | -| sparkJobExecutionRole | aws-cdk-lib.aws_iam.IRole | The Spark job execution role. | +This can be used by Glue ETL Jobs to read/write data from/to Redshift workgroup --- -##### `node`Required +##### `namespace`Required ```typescript -public readonly node: Node; +public readonly namespace: RedshiftServerlessNamespace; ``` -- *Type:* constructs.Node +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.RedshiftServerlessNamespace -The tree node. +The associated Redshift Serverless Namespace. --- -##### `stateMachine`Optional +##### `primarySecurityGroup`Required ```typescript -public readonly stateMachine: StateMachine; +public readonly primarySecurityGroup: ISecurityGroup; ``` -- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup -The Step Functions State Machine created to orchestrate the Spark Job. +The primary EC2 Security Group associated with the Redshift Serverless Workgroup. --- -##### `stateMachineLogGroup`Optional +##### `selectedSubnets`Required ```typescript -public readonly stateMachineLogGroup: ILogGroup; +public readonly selectedSubnets: SelectedSubnets; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_ec2.SelectedSubnets -The CloudWatch Log Group used by the State Machine. +The subnets where the Redshift Serverless Workgroup is deployed. --- -##### `sparkJobExecutionRole`Optional +##### `vpc`Required ```typescript -public readonly sparkJobExecutionRole: IRole; +public readonly vpc: IVpc; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole - -The Spark job execution role. +- *Type:* aws-cdk-lib.aws_ec2.IVpc -Use this property to add additional IAM permissions if necessary. +The VPC where the Redshift Serverless Workgroup is deployed. --- @@ -10619,12 +10186,12 @@ Use this property to add additional IAM permissions if necessary. | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | --- -##### `DSF_OWNED_TAG`Required +##### `DSF_OWNED_TAG`Required ```typescript public readonly DSF_OWNED_TAG: string; @@ -10634,7 +10201,7 @@ public readonly DSF_OWNED_TAG: string; --- -##### `DSF_TRACKING_CODE`Required +##### `DSF_TRACKING_CODE`Required ```typescript public readonly DSF_TRACKING_CODE: string; @@ -10644,68 +10211,62 @@ public readonly DSF_TRACKING_CODE: string; --- -### SparkEmrServerlessRuntime +### S3DataCopy -A construct to create a Spark EMR Serverless Application, along with methods to create IAM roles having the least privilege. +Copy data from one S3 bucket to another. -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-runtime](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-runtime) +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Utils/s3-data-copy](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Utils/s3-data-copy) *Example* ```typescript -import { Role, AccountRootPrincipal } from 'aws-cdk-lib/aws-iam'; +import { Bucket } from 'aws-cdk-lib/aws-s3'; -const serverlessRuntime = new dsf.processing.SparkEmrServerlessRuntime(this, 'EmrApp', { - name: 'SparkRuntimeServerless', -}); +const sourceBucket = Bucket.fromBucketName(this, 'SourceBucket', 'nyc-tlc'); +const bucketName = `test-${this.region}-${this.account}-${dsf.utils.Utils.generateUniqueHash(this, 'TargetBucket')}`; -const executionRole = dsf.processing.SparkEmrServerlessRuntime.createExecutionRole(this, 'ExecutionRole') +const targetBucket = new Bucket(this, 'TargetBucket'); -const submitterRole = new Role (this, 'SubmitterRole', { - assumedBy: new AccountRootPrincipal(), +new dsf.utils.S3DataCopy(this, 'S3DataCopy', { + sourceBucket, + sourceBucketPrefix: 'trip data/', + sourceBucketRegion: 'us-east-1', + targetBucket, }); - -dsf.processing.SparkEmrServerlessRuntime.grantStartJobExecution(submitterRole, [executionRole.roleArn], ['EMR-serverless-app-ID']); ``` -#### Initializers +#### Initializers ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { utils } from '@cdklabs/aws-data-solutions-framework' -new processing.SparkEmrServerlessRuntime(scope: Construct, id: string, props: SparkEmrServerlessRuntimeProps) +new utils.S3DataCopy(scope: Construct, id: string, props: S3DataCopyProps) ``` | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessRuntimeProps | {@link SparkEmrServerlessRuntimeProps}. | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.utils.S3DataCopyProps | *No description.* | --- -##### `scope`Required +##### `scope`Required - *Type:* constructs.Construct -the Scope of the CDK Construct. - --- -##### `id`Required +##### `id`Required - *Type:* string -the ID of the CDK Construct. - --- -##### `props`Required - -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessRuntimeProps +##### `props`Required -{@link SparkEmrServerlessRuntimeProps}. +- *Type:* @cdklabs/aws-data-solutions-framework.utils.S3DataCopyProps --- @@ -10713,13 +10274,12 @@ the ID of the CDK Construct. | **Name** | **Description** | | --- | --- | -| toString | Returns a string representation of this construct. | -| grantStartExecution | A method which will grant an IAM Role the right to start and monitor a job. | -| retrieveVersion | Retrieve DSF package.json version. | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | --- -##### `toString` +##### `toString` ```typescript public toString(): string @@ -10727,34 +10287,7 @@ public toString(): string Returns a string representation of this construct. -##### `grantStartExecution` - -```typescript -public grantStartExecution(startJobRole: IRole, executionRoleArn: string): void -``` - -A method which will grant an IAM Role the right to start and monitor a job. - -The method will also attach an iam:PassRole permission to limited to the IAM Job Execution roles passed. -The excution role will be able to submit job to the EMR Serverless application created by the construct. - -###### `startJobRole`Required - -- *Type:* aws-cdk-lib.aws_iam.IRole - -the role that will call the start job api and which need to have the iam:PassRole permission. - ---- - -###### `executionRoleArn`Required - -- *Type:* string - -the role use by EMR Serverless to access resources during the job execution. - ---- - -##### `retrieveVersion` +##### `retrieveVersion` ```typescript public retrieveVersion(): any @@ -10766,18 +10299,16 @@ Retrieve DSF package.json version. | **Name** | **Description** | | --- | --- | -| isConstruct | Checks if `x` is a construct. | -| createExecutionRole | A static method creating an execution IAM role that can be assumed by EMR Serverless The method returns the role it creates. | -| grantStartJobExecution | A static method granting the right to start and monitor a job to an IAM Role. | +| isConstruct | Checks if `x` is a construct. | --- -##### `isConstruct` +##### `isConstruct` ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { utils } from '@cdklabs/aws-data-solutions-framework' -processing.SparkEmrServerlessRuntime.isConstruct(x: any) +utils.S3DataCopy.isConstruct(x: any) ``` Checks if `x` is a construct. @@ -10796,7 +10327,7 @@ library can be accidentally installed, and `instanceof` will behave unpredictably. It is safest to avoid using `instanceof`, and using this type-testing method instead. -###### `x`Required +###### `x`Required - *Type:* any @@ -10804,1163 +10335,3678 @@ Any object. --- -##### `createExecutionRole` +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| copyFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda Function for the copy. | +| copyLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the S3 data copy. | +| copyRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the copy Lambba Function. | +| cleanUpFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for the S3 data copy cleaning up lambda. | +| cleanUpLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the S3 data copy cleaning up lambda. | +| cleanUpRole | aws-cdk-lib.aws_iam.IRole | The IAM Role for the the S3 data copy cleaning up lambda. | +| securityGroups | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The list of EC2 Security Groups used by the Lambda Functions. | + +--- + +##### `node`Required ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +public readonly node: Node; +``` -processing.SparkEmrServerlessRuntime.createExecutionRole(scope: Construct, id: string, executionRolePolicyDocument?: PolicyDocument, iamPolicyName?: string) +- *Type:* constructs.Node + +The tree node. + +--- + +##### `copyFunction`Required + +```typescript +public readonly copyFunction: IFunction; ``` -A static method creating an execution IAM role that can be assumed by EMR Serverless The method returns the role it creates. +- *Type:* aws-cdk-lib.aws_lambda.IFunction -If no `executionRolePolicyDocument` or `iamPolicyName` -The method will return a role with only a trust policy to EMR Servereless service principal. -You can use this role then to grant access to any resources you control. +The Lambda Function for the copy. -###### `scope`Required +--- -- *Type:* constructs.Construct +##### `copyLogGroup`Required -the scope in which to create the role. +```typescript +public readonly copyLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the S3 data copy. --- -###### `id`Required +##### `copyRole`Required -- *Type:* string +```typescript +public readonly copyRole: IRole; +``` -passed to the IAM Role construct object. +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role for the copy Lambba Function. --- -###### `executionRolePolicyDocument`Optional +##### `cleanUpFunction`Optional -- *Type:* aws-cdk-lib.aws_iam.PolicyDocument +```typescript +public readonly cleanUpFunction: IFunction; +``` -the inline policy document to attach to the role. +- *Type:* aws-cdk-lib.aws_lambda.IFunction -These are IAM policies needed by the job. -This parameter is mutually execlusive with iamPolicyName. +The Lambda function for the S3 data copy cleaning up lambda. --- -###### `iamPolicyName`Optional +##### `cleanUpLogGroup`Optional + +```typescript +public readonly cleanUpLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the S3 data copy cleaning up lambda. + +--- + +##### `cleanUpRole`Optional + +```typescript +public readonly cleanUpRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role for the the S3 data copy cleaning up lambda. + +--- + +##### `securityGroups`Optional + +```typescript +public readonly securityGroups: ISecurityGroup[]; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] + +The list of EC2 Security Groups used by the Lambda Functions. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` - *Type:* string -the IAM policy name to attach to the role, this is mutually execlusive with executionRolePolicyDocument. +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkEmrCICDPipeline + +A CICD Pipeline to test and deploy a Spark application on Amazon EMR in cross-account environments using CDK Pipelines. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-cicd-pipeline](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-cicd-pipeline) + +*Example* + +```typescript +import { Bucket } from 'aws-cdk-lib/aws-s3'; +import { CodePipelineSource } from 'aws-cdk-lib/pipelines'; + +interface MyApplicationStackProps extends cdk.StackProps { + readonly stage: dsf.utils.CICDStage; +} + +class MyApplicationStack extends cdk.Stack { + constructor(scope: cdk.Stack, props?: MyApplicationStackProps) { + super(scope, 'MyApplicationStack'); + const bucket = new Bucket(this, 'TestBucket', { + autoDeleteObjects: true, + removalPolicy: cdk.RemovalPolicy.DESTROY, + }); + new cdk.CfnOutput(this, 'BucketName', { value: bucket.bucketName }); + } +} + +class MyStackFactory implements dsf.utils.ApplicationStackFactory { + createStack(scope: cdk.Stack, stage: dsf.utils.CICDStage): cdk.Stack { + return new MyApplicationStack(scope, { stage }); + } +} + +class MyCICDStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + new dsf.processing.SparkEmrCICDPipeline(this, 'TestConstruct', { + sparkApplicationName: 'test', + applicationStackFactory: new MyStackFactory(), + cdkApplicationPath: 'cdk/', + sparkApplicationPath: 'spark/', + sparkImage: dsf.processing.SparkImage.EMR_6_12, + integTestScript: 'cdk/integ-test.sh', + integTestEnv: { + TEST_BUCKET: 'BucketName', + }, + source: CodePipelineSource.connection('owner/weekly-job', 'mainline', { + connectionArn: 'arn:aws:codeconnections:eu-west-1:123456789012:connection/aEXAMPLE-8aad-4d5d-8878-dfcab0bc441f' + }), + }); +} +} +``` + + +#### Initializers + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +new processing.SparkEmrCICDPipeline(scope: Construct, id: string, props: SparkEmrCICDPipelineProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrCICDPipelineProps | the SparkCICDPipelineProps properties. | + +--- + +##### `scope`Required + +- *Type:* constructs.Construct + +the Scope of the CDK Construct. + +--- + +##### `id`Required + +- *Type:* string + +the ID of the CDK Construct. + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrCICDPipelineProps + +the SparkCICDPipelineProps properties. + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrCICDPipeline.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| artifactAccessLogsBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the access logs on the artifact S3 Bucket. | +| artifactBucket | aws-cdk-lib.aws_s3.IBucket | The S3 Bucket for storing the artifacts. | +| pipeline | aws-cdk-lib.pipelines.CodePipeline | The CodePipeline created as part of the Spark CICD Pipeline. | +| pipelineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for storing the CodePipeline logs. | +| integrationTestStage | aws-cdk-lib.pipelines.CodeBuildStep | The CodeBuild Step for the staging stage. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `artifactAccessLogsBucket`Required + +```typescript +public readonly artifactAccessLogsBucket: IBucket; +``` + +- *Type:* aws-cdk-lib.aws_s3.IBucket + +The S3 Bucket for storing the access logs on the artifact S3 Bucket. + +--- + +##### `artifactBucket`Required + +```typescript +public readonly artifactBucket: IBucket; +``` + +- *Type:* aws-cdk-lib.aws_s3.IBucket + +The S3 Bucket for storing the artifacts. + +--- + +##### `pipeline`Required + +```typescript +public readonly pipeline: CodePipeline; +``` + +- *Type:* aws-cdk-lib.pipelines.CodePipeline + +The CodePipeline created as part of the Spark CICD Pipeline. + +--- + +##### `pipelineLogGroup`Required + +```typescript +public readonly pipelineLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for storing the CodePipeline logs. + +--- + +##### `integrationTestStage`Optional + +```typescript +public readonly integrationTestStage: CodeBuildStep; +``` + +- *Type:* aws-cdk-lib.pipelines.CodeBuildStep + +The CodeBuild Step for the staging stage. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkEmrContainersJob + +A construct to run Spark Jobs using EMR Container runtime (EMR on EKS). + +It creates a Step Functions State Machine that orchestrates the Spark Job. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job) + +*Example* + +```typescript +import { JsonPath } from 'aws-cdk-lib/aws-stepfunctions'; + +const job = new dsf.processing.SparkEmrContainersJob(this, 'SparkJob', { + jobConfig:{ + "Name": JsonPath.format('ge_profile-{}', JsonPath.uuid()), + "VirtualClusterId": "virtualClusterId", + "ExecutionRoleArn": "ROLE-ARN", + "JobDriver": { + "SparkSubmit": { + "EntryPoint": "s3://S3-BUCKET/pi.py", + "EntryPointArguments": [], + "SparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=4" + }, + } + } +} as dsf.processing.SparkEmrContainersJobApiProps); + +new cdk.CfnOutput(this, 'SparkJobStateMachine', { + value: job.stateMachine!.stateMachineArn, +}); +``` + + +#### Initializers + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +new processing.SparkEmrContainersJob(scope: Construct, id: string, props: SparkEmrContainersJobProps | SparkEmrContainersJobApiProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobProps \| @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobApiProps | *No description.* | + +--- + +##### `scope`Required + +- *Type:* constructs.Construct + +--- + +##### `id`Required + +- *Type:* string + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobProps | @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersJobApiProps + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrContainersJob.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | +| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `stateMachine`Optional + +```typescript +public readonly stateMachine: StateMachine; +``` + +- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine + +The Step Functions State Machine created to orchestrate the Spark Job. + +--- + +##### `stateMachineLogGroup`Optional + +```typescript +public readonly stateMachineLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group used by the State Machine. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkEmrContainersRuntime + +A construct to create an EKS cluster, configure it and enable it with EMR on EKS. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-containers-runtime](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-containers-runtime) + +*Example* + +```typescript +import { ManagedPolicy, PolicyDocument, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { KubectlV30Layer } from '@aws-cdk/lambda-layer-kubectl-v30'; + +const kubectlLayer = new KubectlV30Layer(this, 'kubectlLayer'); + +const emrEksCluster = dsf.processing.SparkEmrContainersRuntime.getOrCreate(this, { + publicAccessCIDRs: ['10.0.0.0/16'], + kubectlLambdaLayer: kubectlLayer, +}); + +const virtualCluster = emrEksCluster.addEmrVirtualCluster(this, { + name: 'example', + createNamespace: true, + eksNamespace: 'example', +}); + +const s3Read = new PolicyDocument({ + statements: [new PolicyStatement({ + actions: [ + 's3:GetObject', + ], + resources: ['arn:aws:s3:::aws-data-analytics-workshop'], + })], +}); + +const s3ReadPolicy = new ManagedPolicy(this, 's3ReadPolicy', { + document: s3Read, +}); + +const execRole = emrEksCluster.createExecutionRole(this, 'ExecRole', s3ReadPolicy, 'example', 's3ReadExecRole'); +``` + + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| addEmrVirtualCluster | Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. | +| addInteractiveEndpoint | Creates a new Amazon EMR managed endpoint to be used with Amazon EMR Virtual Cluster . | +| addKarpenterNodePoolAndNodeClass | Apply the provided manifest and add the CDK dependency on EKS cluster. | +| createExecutionRole | Create and configure a new Amazon IAM Role usable as an execution role. | +| retrieveVersion | Retrieve DSF package.json version. | +| uploadPodTemplate | Upload podTemplates to the Amazon S3 location used by the cluster. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `addEmrVirtualCluster` + +```typescript +public addEmrVirtualCluster(scope: Construct, options: EmrVirtualClusterProps): CfnVirtualCluster +``` + +Add a new Amazon EMR Virtual Cluster linked to Amazon EKS Cluster. + +###### `scope`Required + +- *Type:* constructs.Construct + +of the stack where virtual cluster is deployed. + +--- + +###### `options`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.EmrVirtualClusterProps + +the EmrVirtualClusterProps [properties]{@link EmrVirtualClusterProps}. + +--- + +##### `addInteractiveEndpoint` + +```typescript +public addInteractiveEndpoint(scope: Construct, id: string, interactiveSessionOptions: SparkEmrContainersRuntimeInteractiveSessionProps): CustomResource +``` + +Creates a new Amazon EMR managed endpoint to be used with Amazon EMR Virtual Cluster . + +CfnOutput can be customized. + +###### `scope`Required + +- *Type:* constructs.Construct + +the scope of the stack where managed endpoint is deployed. + +--- + +###### `id`Required + +- *Type:* string + +the CDK id for endpoint. + +--- + +###### `interactiveSessionOptions`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersRuntimeInteractiveSessionProps + +the EmrManagedEndpointOptions to configure the Amazon EMR managed endpoint. + +--- + +##### `addKarpenterNodePoolAndNodeClass` + +```typescript +public addKarpenterNodePoolAndNodeClass(id: string, manifest: any): any +``` + +Apply the provided manifest and add the CDK dependency on EKS cluster. + +###### `id`Required + +- *Type:* string + +the unique ID of the CDK resource. + +--- + +###### `manifest`Required + +- *Type:* any + +The manifest to apply. + +You can use the Utils class that offers method to read yaml file and load it as a manifest + +--- + +##### `createExecutionRole` + +```typescript +public createExecutionRole(scope: Construct, id: string, policy: IManagedPolicy, eksNamespace: string, name: string): Role +``` + +Create and configure a new Amazon IAM Role usable as an execution role. + +This method makes the created role assumed by the Amazon EKS cluster Open ID Connect provider. + +###### `scope`Required + +- *Type:* constructs.Construct + +of the IAM role. + +--- + +###### `id`Required + +- *Type:* string + +of the CDK resource to be created, it should be unique across the stack. + +--- + +###### `policy`Required + +- *Type:* aws-cdk-lib.aws_iam.IManagedPolicy + +the execution policy to attach to the role. + +--- + +###### `eksNamespace`Required + +- *Type:* string + +The namespace from which the role is going to be used. + +MUST be the same as the namespace of the Virtual Cluster from which the job is submitted + +--- + +###### `name`Required + +- *Type:* string + +Name to use for the role, required and is used to scope the iam role. + +--- + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +##### `uploadPodTemplate` + +```typescript +public uploadPodTemplate(id: string, filePath: string): void +``` + +Upload podTemplates to the Amazon S3 location used by the cluster. + +###### `id`Required + +- *Type:* string + +the unique ID of the CDK resource. + +--- + +###### `filePath`Required + +- *Type:* string + +The local path of the yaml podTemplate files to upload. + +--- + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | +| getOrCreate | Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. | +| grantStartJobExecution | A static method granting the right to start and monitor a job to an IAM Role. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrContainersRuntime.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +##### `getOrCreate` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrContainersRuntime.getOrCreate(scope: Construct, props: SparkEmrContainersRuntimeProps) +``` + +Get an existing EmrEksCluster based on the cluster name property or create a new one only one EKS cluster can exist per stack. + +###### `scope`Required + +- *Type:* constructs.Construct + +the CDK scope used to search or create the cluster. + +--- + +###### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrContainersRuntimeProps + +the EmrEksClusterProps [properties]{@link EmrEksClusterProps } if created. + +--- + +##### `grantStartJobExecution` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrContainersRuntime.grantStartJobExecution(startJobRole: IRole, executionRoleArn: string[], virtualClusterArn: string) +``` + +A static method granting the right to start and monitor a job to an IAM Role. + +The method will scope the following actions `DescribeJobRun`, `TagResource` and `ListJobRuns` to the provided virtual cluster. +It will also scope `StartJobRun` as defined in the +[EMR on EKS official documentation](https://docs.aws.amazon.com/emr/latest/EMR-on-EKS-DevelopmentGuide/iam-execution-role.html) + +###### `startJobRole`Required + +- *Type:* aws-cdk-lib.aws_iam.IRole + +the role that will call the start job api and which needs to have the iam:PassRole permission. + +--- + +###### `executionRoleArn`Required + +- *Type:* string[] + +the role used by EMR on EKS to access resources during the job execution. + +--- + +###### `virtualClusterArn`Required + +- *Type:* string + +the EMR Virtual Cluster ARN to which the job is submitted. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| ec2InstanceNodeGroupRole | aws-cdk-lib.aws_iam.IRole | The IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. | +| eksCluster | aws-cdk-lib.aws_eks.Cluster | The EKS cluster created by the construct if it is not provided. | +| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC used by the EKS cluster. | +| assetBucket | aws-cdk-lib.aws_s3.IBucket | The bucket holding podtemplates referenced in the configuration override for the job. | +| assetUploadBucketRole | aws-cdk-lib.aws_iam.IRole | The IAM role used to upload assets (pod templates) on S3. | +| awsNodeRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by IRSA for the aws-node daemonset. | +| criticalDefaultConfig | string | The configuration override for the spark application to use with the default nodes for criticale jobs. | +| csiDriverIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM Role created for the EBS CSI controller. | +| eksSecretKmsKey | aws-cdk-lib.aws_kms.IKey | The KMS key used for storing EKS secrets. | +| emrServiceRole | aws-cdk-lib.aws_iam.CfnServiceLinkedRole | The Service Linked role created for EMR. | +| flowLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the VPC flow log when the VPC is created. | +| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used for the VPC flow logs when the VPC is created. | +| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used for the VPC flow logs when the VPC is created. | +| karpenterEventRules | aws-cdk-lib.aws_events.IRule[] | The rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. | +| karpenterIrsaRole | aws-cdk-lib.aws_iam.IRole | The IAM role created for the Karpenter controller. | +| karpenterQueue | aws-cdk-lib.aws_sqs.IQueue | The SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. | +| karpenterSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | The security group used by the EC2NodeClass of the default nodes. | +| notebookDefaultConfig | any | The configuration override for the spark application to use with the default nodes dedicated for notebooks. | +| podTemplateS3LocationCriticalDriver | string | The S3 location holding the driver pod tempalte for critical nodes. | +| podTemplateS3LocationCriticalExecutor | string | The S3 location holding the executor pod tempalte for critical nodes. | +| podTemplateS3LocationDriverShared | string | The S3 location holding the driver pod tempalte for shared nodes. | +| podTemplateS3LocationExecutorShared | string | The S3 location holding the executor pod tempalte for shared nodes. | +| podTemplateS3LocationNotebookDriver | string | The S3 location holding the driver pod tempalte for interactive sessions. | +| podTemplateS3LocationNotebookExecutor | string | The S3 location holding the executor pod tempalte for interactive sessions. | +| s3VpcEndpoint | aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint | The S3 VPC endpoint attached to the private subnets of the VPC when VPC is created. | +| sharedDefaultConfig | string | The configuration override for the spark application to use with the default nodes for none criticale jobs. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `ec2InstanceNodeGroupRole`Required + +```typescript +public readonly ec2InstanceNodeGroupRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role used by the tooling managed nodegroup hosting core Kubernetes controllers like EBS CSI driver, core dns. + +--- + +##### `eksCluster`Required + +```typescript +public readonly eksCluster: Cluster; +``` + +- *Type:* aws-cdk-lib.aws_eks.Cluster + +The EKS cluster created by the construct if it is not provided. + +--- + +##### `vpc`Required + +```typescript +public readonly vpc: IVpc; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IVpc + +The VPC used by the EKS cluster. + +--- + +##### `assetBucket`Optional + +```typescript +public readonly assetBucket: IBucket; +``` + +- *Type:* aws-cdk-lib.aws_s3.IBucket + +The bucket holding podtemplates referenced in the configuration override for the job. + +--- + +##### `assetUploadBucketRole`Optional + +```typescript +public readonly assetUploadBucketRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role used to upload assets (pod templates) on S3. + +--- + +##### `awsNodeRole`Optional + +```typescript +public readonly awsNodeRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used by IRSA for the aws-node daemonset. + +--- + +##### `criticalDefaultConfig`Optional + +```typescript +public readonly criticalDefaultConfig: string; +``` + +- *Type:* string + +The configuration override for the spark application to use with the default nodes for criticale jobs. + +--- + +##### `csiDriverIrsaRole`Optional + +```typescript +public readonly csiDriverIrsaRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role created for the EBS CSI controller. + +--- + +##### `eksSecretKmsKey`Optional + +```typescript +public readonly eksSecretKmsKey: IKey; +``` + +- *Type:* aws-cdk-lib.aws_kms.IKey + +The KMS key used for storing EKS secrets. + +--- + +##### `emrServiceRole`Optional + +```typescript +public readonly emrServiceRole: CfnServiceLinkedRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.CfnServiceLinkedRole + +The Service Linked role created for EMR. + +--- + +##### `flowLogGroup`Optional + +```typescript +public readonly flowLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the VPC flow log when the VPC is created. + +--- + +##### `flowLogKey`Optional + +```typescript +public readonly flowLogKey: IKey; +``` + +- *Type:* aws-cdk-lib.aws_kms.IKey + +The KMS Key used for the VPC flow logs when the VPC is created. + +--- + +##### `flowLogRole`Optional + +```typescript +public readonly flowLogRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used for the VPC flow logs when the VPC is created. + +--- + +##### `karpenterEventRules`Optional + +```typescript +public readonly karpenterEventRules: IRule[]; +``` + +- *Type:* aws-cdk-lib.aws_events.IRule[] + +The rules used by Karpenter to track node health, rules are defined in the cloudformation below https://raw.githubusercontent.com/aws/karpenter/"${KARPENTER_VERSION}"/website/content/en/preview/getting-started/getting-started-with-karpenter/cloudformation.yaml. + +--- + +##### `karpenterIrsaRole`Optional + +```typescript +public readonly karpenterIrsaRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role created for the Karpenter controller. + +--- + +##### `karpenterQueue`Optional + +```typescript +public readonly karpenterQueue: IQueue; +``` + +- *Type:* aws-cdk-lib.aws_sqs.IQueue + +The SQS queue used by Karpenter to receive critical events from AWS services which may affect your nodes. + +--- + +##### `karpenterSecurityGroup`Optional + +```typescript +public readonly karpenterSecurityGroup: ISecurityGroup; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup + +The security group used by the EC2NodeClass of the default nodes. + +--- + +##### `notebookDefaultConfig`Optional + +```typescript +public readonly notebookDefaultConfig: any; +``` + +- *Type:* any + +The configuration override for the spark application to use with the default nodes dedicated for notebooks. + +--- + +##### `podTemplateS3LocationCriticalDriver`Optional + +```typescript +public readonly podTemplateS3LocationCriticalDriver: string; +``` + +- *Type:* string + +The S3 location holding the driver pod tempalte for critical nodes. + +--- + +##### `podTemplateS3LocationCriticalExecutor`Optional + +```typescript +public readonly podTemplateS3LocationCriticalExecutor: string; +``` + +- *Type:* string + +The S3 location holding the executor pod tempalte for critical nodes. + +--- + +##### `podTemplateS3LocationDriverShared`Optional + +```typescript +public readonly podTemplateS3LocationDriverShared: string; +``` + +- *Type:* string + +The S3 location holding the driver pod tempalte for shared nodes. + +--- + +##### `podTemplateS3LocationExecutorShared`Optional + +```typescript +public readonly podTemplateS3LocationExecutorShared: string; +``` + +- *Type:* string + +The S3 location holding the executor pod tempalte for shared nodes. + +--- + +##### `podTemplateS3LocationNotebookDriver`Optional + +```typescript +public readonly podTemplateS3LocationNotebookDriver: string; +``` + +- *Type:* string + +The S3 location holding the driver pod tempalte for interactive sessions. + +--- + +##### `podTemplateS3LocationNotebookExecutor`Optional + +```typescript +public readonly podTemplateS3LocationNotebookExecutor: string; +``` + +- *Type:* string + +The S3 location holding the executor pod tempalte for interactive sessions. + +--- + +##### `s3VpcEndpoint`Optional + +```typescript +public readonly s3VpcEndpoint: IGatewayVpcEndpoint; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint + +The S3 VPC endpoint attached to the private subnets of the VPC when VPC is created. + +--- + +##### `sharedDefaultConfig`Optional + +```typescript +public readonly sharedDefaultConfig: string; +``` + +- *Type:* string + +The configuration override for the spark application to use with the default nodes for none criticale jobs. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DEFAULT_CLUSTER_NAME | string | The default name of the EKS cluster. | +| DEFAULT_EKS_VERSION | aws-cdk-lib.aws_eks.KubernetesVersion | The default EKS version. | +| DEFAULT_EMR_EKS_VERSION | @cdklabs/aws-data-solutions-framework.processing.EmrContainersRuntimeVersion | The default EMR on EKS version. | +| DEFAULT_VPC_CIDR | string | The default CIDR when the VPC is created. | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DEFAULT_CLUSTER_NAME`Required + +```typescript +public readonly DEFAULT_CLUSTER_NAME: string; +``` + +- *Type:* string + +The default name of the EKS cluster. + +--- + +##### `DEFAULT_EKS_VERSION`Required + +```typescript +public readonly DEFAULT_EKS_VERSION: KubernetesVersion; +``` + +- *Type:* aws-cdk-lib.aws_eks.KubernetesVersion + +The default EKS version. + +--- + +##### `DEFAULT_EMR_EKS_VERSION`Required + +```typescript +public readonly DEFAULT_EMR_EKS_VERSION: EmrContainersRuntimeVersion; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.EmrContainersRuntimeVersion + +The default EMR on EKS version. + +--- + +##### `DEFAULT_VPC_CIDR`Required + +```typescript +public readonly DEFAULT_VPC_CIDR: string; +``` + +- *Type:* string + +The default CIDR when the VPC is created. + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkEmrServerlessJob + +A construct to run Spark Jobs using EMR Serverless. + +Creates a State Machine that orchestrates the Spark Job. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job) + +*Example* + +```typescript +import { PolicyDocument, PolicyStatement } from 'aws-cdk-lib/aws-iam'; +import { JsonPath } from 'aws-cdk-lib/aws-stepfunctions'; + +const myExecutionRole = dsf.processing.SparkEmrServerlessRuntime.createExecutionRole(this, 'execRole1'); +const job = new dsf.processing.SparkEmrServerlessJob(this, 'SparkJob', { + jobConfig:{ + "Name": JsonPath.format('ge_profile-{}', JsonPath.uuid()), + "ApplicationId": "APPLICATION_ID", + "ExecutionRoleArn": myExecutionRole.roleArn, + "JobDriver": { + "SparkSubmit": { + "EntryPoint": "s3://S3-BUCKET/pi.py", + "EntryPointArguments": [], + "SparkSubmitParameters": "--conf spark.executor.instances=2 --conf spark.executor.memory=2G --conf spark.driver.memory=2G --conf spark.executor.cores=4" + }, + } + } +} as dsf.processing.SparkEmrServerlessJobApiProps); +``` + + +#### Initializers + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +new processing.SparkEmrServerlessJob(scope: Construct, id: string, props: SparkEmrServerlessJobProps | SparkEmrServerlessJobApiProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | *No description.* | +| id | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobProps \| @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobApiProps | *No description.* | + +--- + +##### `scope`Required + +- *Type:* constructs.Construct + +--- + +##### `id`Required + +- *Type:* string + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobProps | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessJobApiProps + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrServerlessJob.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | +| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | +| sparkJobExecutionRole | aws-cdk-lib.aws_iam.IRole | The Spark job execution role. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `stateMachine`Optional + +```typescript +public readonly stateMachine: StateMachine; +``` + +- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine + +The Step Functions State Machine created to orchestrate the Spark Job. + +--- + +##### `stateMachineLogGroup`Optional + +```typescript +public readonly stateMachineLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group used by the State Machine. + +--- + +##### `sparkJobExecutionRole`Optional + +```typescript +public readonly sparkJobExecutionRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The Spark job execution role. + +Use this property to add additional IAM permissions if necessary. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkEmrServerlessRuntime + +A construct to create a Spark EMR Serverless Application, along with methods to create IAM roles having the least privilege. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-runtime](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-runtime) + +*Example* + +```typescript +import { Role, AccountRootPrincipal } from 'aws-cdk-lib/aws-iam'; + +const serverlessRuntime = new dsf.processing.SparkEmrServerlessRuntime(this, 'EmrApp', { + name: 'SparkRuntimeServerless', +}); + +const executionRole = dsf.processing.SparkEmrServerlessRuntime.createExecutionRole(this, 'ExecutionRole') + +const submitterRole = new Role (this, 'SubmitterRole', { + assumedBy: new AccountRootPrincipal(), +}); + +dsf.processing.SparkEmrServerlessRuntime.grantStartJobExecution(submitterRole, [executionRole.roleArn], ['EMR-serverless-app-ID']); +``` + + +#### Initializers + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +new processing.SparkEmrServerlessRuntime(scope: Construct, id: string, props: SparkEmrServerlessRuntimeProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| props | @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessRuntimeProps | {@link SparkEmrServerlessRuntimeProps}. | + +--- + +##### `scope`Required + +- *Type:* constructs.Construct + +the Scope of the CDK Construct. + +--- + +##### `id`Required + +- *Type:* string + +the ID of the CDK Construct. + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkEmrServerlessRuntimeProps + +{@link SparkEmrServerlessRuntimeProps}. + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| grantStartExecution | A method which will grant an IAM Role the right to start and monitor a job. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `grantStartExecution` + +```typescript +public grantStartExecution(startJobRole: IRole, executionRoleArn: string): void +``` + +A method which will grant an IAM Role the right to start and monitor a job. + +The method will also attach an iam:PassRole permission to limited to the IAM Job Execution roles passed. +The excution role will be able to submit job to the EMR Serverless application created by the construct. + +###### `startJobRole`Required + +- *Type:* aws-cdk-lib.aws_iam.IRole + +the role that will call the start job api and which need to have the iam:PassRole permission. + +--- + +###### `executionRoleArn`Required + +- *Type:* string + +the role use by EMR Serverless to access resources during the job execution. + +--- + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | +| createExecutionRole | A static method creating an execution IAM role that can be assumed by EMR Serverless The method returns the role it creates. | +| grantStartJobExecution | A static method granting the right to start and monitor a job to an IAM Role. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrServerlessRuntime.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +##### `createExecutionRole` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrServerlessRuntime.createExecutionRole(scope: Construct, id: string, executionRolePolicyDocument?: PolicyDocument, iamPolicyName?: string) +``` + +A static method creating an execution IAM role that can be assumed by EMR Serverless The method returns the role it creates. + +If no `executionRolePolicyDocument` or `iamPolicyName` +The method will return a role with only a trust policy to EMR Servereless service principal. +You can use this role then to grant access to any resources you control. + +###### `scope`Required + +- *Type:* constructs.Construct + +the scope in which to create the role. + +--- + +###### `id`Required + +- *Type:* string + +passed to the IAM Role construct object. + +--- + +###### `executionRolePolicyDocument`Optional + +- *Type:* aws-cdk-lib.aws_iam.PolicyDocument + +the inline policy document to attach to the role. + +These are IAM policies needed by the job. +This parameter is mutually execlusive with iamPolicyName. + +--- + +###### `iamPolicyName`Optional + +- *Type:* string + +the IAM policy name to attach to the role, this is mutually execlusive with executionRolePolicyDocument. + +--- + +##### `grantStartJobExecution` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkEmrServerlessRuntime.grantStartJobExecution(startJobRole: IRole, executionRoleArn: string[], applicationArns: string[]) +``` + +A static method granting the right to start and monitor a job to an IAM Role. + +The method will also attach an iam:PassRole permission limited to the IAM Job Execution roles passed + +###### `startJobRole`Required + +- *Type:* aws-cdk-lib.aws_iam.IRole + +the role that will call the start job api and which needs to have the iam:PassRole permission. + +--- + +###### `executionRoleArn`Required + +- *Type:* string[] + +the role used by EMR Serverless to access resources during the job execution. + +--- + +###### `applicationArns`Required + +- *Type:* string[] + +the EMR Serverless aplication ARN, this is used by the method to limit the EMR Serverless applications the role can submit job to. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| application | aws-cdk-lib.aws_emrserverless.CfnApplication | The EMR Serverless application. | +| emrApplicationSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | If no VPC is provided, one is created by default along with a security group attached to the EMR Serverless Application This attribute is used to expose the security group, if you provide your own security group through the {@link SparkEmrServerlessRuntimeProps} the attribute will be `undefined`. | +| flowLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the VPC flow log when the VPC is created. | +| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used for the VPC flow log when the VPC is created. | +| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used for the VPC flow log when the VPC is created. | +| s3VpcEndpoint | aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint | *No description.* | +| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC used by the EKS cluster. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `application`Required + +```typescript +public readonly application: CfnApplication; +``` + +- *Type:* aws-cdk-lib.aws_emrserverless.CfnApplication + +The EMR Serverless application. + +--- + +##### `emrApplicationSecurityGroup`Optional + +```typescript +public readonly emrApplicationSecurityGroup: ISecurityGroup; +``` + +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup + +If no VPC is provided, one is created by default along with a security group attached to the EMR Serverless Application This attribute is used to expose the security group, if you provide your own security group through the {@link SparkEmrServerlessRuntimeProps} the attribute will be `undefined`. + +--- + +##### `flowLogGroup`Optional + +```typescript +public readonly flowLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group for the VPC flow log when the VPC is created. + +--- + +##### `flowLogKey`Optional + +```typescript +public readonly flowLogKey: IKey; +``` + +- *Type:* aws-cdk-lib.aws_kms.IKey + +The KMS Key used for the VPC flow log when the VPC is created. + +--- + +##### `flowLogRole`Optional + +```typescript +public readonly flowLogRole: IRole; +``` + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM Role used for the VPC flow log when the VPC is created. + +--- + +##### `s3VpcEndpoint`Optional + +```typescript +public readonly s3VpcEndpoint: IGatewayVpcEndpoint; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint + +--- + +##### `vpc`Optional + +```typescript +public readonly vpc: IVpc; +``` + +- *Type:* aws-cdk-lib.aws_ec2.IVpc + +The VPC used by the EKS cluster. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +### SparkJob + +A base construct to run Spark Jobs. + +Creates an AWS Step Functions State Machine that orchestrates the Spark Job. + +> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job + +Available implementations: +* {@link SparkEmrServerlessJob } for Emr Serverless implementation +* {@link SparkEmrEksJob } for EMR On EKS implementation](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job + +Available implementations: +* {@link SparkEmrServerlessJob } for Emr Serverless implementation +* {@link SparkEmrEksJob } for EMR On EKS implementation) + +#### Initializers + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +new processing.SparkJob(scope: Construct, id: string, trackingTag: string, props: SparkJobProps) +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| scope | constructs.Construct | the Scope of the CDK Construct. | +| id | string | the ID of the CDK Construct. | +| trackingTag | string | *No description.* | +| props | @cdklabs/aws-data-solutions-framework.processing.SparkJobProps | the SparkJobProps properties. | + +--- + +##### `scope`Required + +- *Type:* constructs.Construct + +the Scope of the CDK Construct. + +--- + +##### `id`Required + +- *Type:* string + +the ID of the CDK Construct. + +--- + +##### `trackingTag`Required + +- *Type:* string + +--- + +##### `props`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkJobProps + +the SparkJobProps properties. + +--- + +#### Methods + +| **Name** | **Description** | +| --- | --- | +| toString | Returns a string representation of this construct. | +| retrieveVersion | Retrieve DSF package.json version. | + +--- + +##### `toString` + +```typescript +public toString(): string +``` + +Returns a string representation of this construct. + +##### `retrieveVersion` + +```typescript +public retrieveVersion(): any +``` + +Retrieve DSF package.json version. + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| isConstruct | Checks if `x` is a construct. | + +--- + +##### `isConstruct` + +```typescript +import { processing } from '@cdklabs/aws-data-solutions-framework' + +processing.SparkJob.isConstruct(x: any) +``` + +Checks if `x` is a construct. + +Use this method instead of `instanceof` to properly detect `Construct` +instances, even when the construct library is symlinked. + +Explanation: in JavaScript, multiple copies of the `constructs` library on +disk are seen as independent, completely different libraries. As a +consequence, the class `Construct` in each copy of the `constructs` library +is seen as a different class, and an instance of one class will not test as +`instanceof` the other class. `npm install` will not create installations +like this, but users may manually symlink construct libraries together or +use a monorepo tool: in those cases, multiple copies of the `constructs` +library can be accidentally installed, and `instanceof` will behave +unpredictably. It is safest to avoid using `instanceof`, and using +this type-testing method instead. + +###### `x`Required + +- *Type:* any + +Any object. + +--- + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| node | constructs.Node | The tree node. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | +| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | + +--- + +##### `node`Required + +```typescript +public readonly node: Node; +``` + +- *Type:* constructs.Node + +The tree node. + +--- + +##### `stateMachine`Optional + +```typescript +public readonly stateMachine: StateMachine; +``` + +- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine + +The Step Functions State Machine created to orchestrate the Spark Job. + +--- + +##### `stateMachineLogGroup`Optional + +```typescript +public readonly stateMachineLogGroup: ILogGroup; +``` + +- *Type:* aws-cdk-lib.aws_logs.ILogGroup + +The CloudWatch Log Group used by the State Machine. + +--- + +#### Constants + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| DSF_OWNED_TAG | string | *No description.* | +| DSF_TRACKING_CODE | string | *No description.* | + +--- + +##### `DSF_OWNED_TAG`Required + +```typescript +public readonly DSF_OWNED_TAG: string; +``` + +- *Type:* string + +--- + +##### `DSF_TRACKING_CODE`Required + +```typescript +public readonly DSF_TRACKING_CODE: string; +``` + +- *Type:* string + +--- + +## Structs + +### Acl + +Kakfa ACL This is similar to the object used by `kafkajs`, for more information see this [link](https://kafka.js.org/docs/admin#create-acl). + +#### Initializer + +```typescript +import { streaming } from '@cdklabs/aws-data-solutions-framework' + +const acl: streaming.Acl = { ... } +``` + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| host | string | *No description.* | +| operation | @cdklabs/aws-data-solutions-framework.streaming.AclOperationTypes | *No description.* | +| permissionType | @cdklabs/aws-data-solutions-framework.streaming.AclPermissionTypes | *No description.* | +| principal | string | *No description.* | +| resourceName | string | *No description.* | +| resourcePatternType | @cdklabs/aws-data-solutions-framework.streaming.ResourcePatternTypes | *No description.* | +| resourceType | @cdklabs/aws-data-solutions-framework.streaming.AclResourceTypes | *No description.* | + +--- + +##### `host`Required + +```typescript +public readonly host: string; +``` + +- *Type:* string + +--- + +##### `operation`Required + +```typescript +public readonly operation: AclOperationTypes; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclOperationTypes + +--- + +##### `permissionType`Required + +```typescript +public readonly permissionType: AclPermissionTypes; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclPermissionTypes + +--- + +##### `principal`Required + +```typescript +public readonly principal: string; +``` + +- *Type:* string + +--- + +##### `resourceName`Required + +```typescript +public readonly resourceName: string; +``` + +- *Type:* string + +--- + +##### `resourcePatternType`Required + +```typescript +public readonly resourcePatternType: ResourcePatternTypes; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.ResourcePatternTypes + +--- + +##### `resourceType`Required + +```typescript +public readonly resourceType: AclResourceTypes; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclResourceTypes + +--- + +### AclAdminProps + +This Props allow you to define the principals that will be adminstartor as well as the principal that will be used by the CDK Custom resources to. + +#### Initializer + +```typescript +import { streaming } from '@cdklabs/aws-data-solutions-framework' + +const aclAdminProps: streaming.AclAdminProps = { ... } +``` + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| aclAdminPrincipal | string | This Principal will be used by the CDK custom resource to set ACLs and Topics. | +| adminPrincipal | string | The Principal that will have administrator privilege in MSK The MSK construct does not have access to this principal Keep this principal in a secure storage and should be only used in case you put an ACL that lock MSK access. | +| secretCertificate | aws-cdk-lib.aws_secretsmanager.ISecret | This is the TLS certificate of the Principal that is used by the CDK custom resource which set ACLs and Topics. | + +--- + +##### `aclAdminPrincipal`Required + +```typescript +public readonly aclAdminPrincipal: string; +``` + +- *Type:* string + +This Principal will be used by the CDK custom resource to set ACLs and Topics. + +--- + +##### `adminPrincipal`Required + +```typescript +public readonly adminPrincipal: string; +``` + +- *Type:* string + +The Principal that will have administrator privilege in MSK The MSK construct does not have access to this principal Keep this principal in a secure storage and should be only used in case you put an ACL that lock MSK access. + +--- + +##### `secretCertificate`Required + +```typescript +public readonly secretCertificate: ISecret; +``` + +- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret + +This is the TLS certificate of the Principal that is used by the CDK custom resource which set ACLs and Topics. + +The secret in AWS secrets manager must be a JSON in the following format +{ + "key" : "PRIVATE-KEY", + "cert" : "CERTIFICATE" +} + +You can use the following utility to generate the certificates +https://github.com/aws-samples/amazon-msk-client-authentication + +--- + +### AnalyticsBucketProps + +Properties for the `AnalyticsBucket` construct. + +#### Initializer + +```typescript +import { storage } from '@cdklabs/aws-data-solutions-framework' + +const analyticsBucketProps: storage.AnalyticsBucketProps = { ... } +``` + +#### Properties + +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| encryptionKey | aws-cdk-lib.aws_kms.IKey | External KMS Key to use for the S3 Bucket encryption. | +| accessControl | aws-cdk-lib.aws_s3.BucketAccessControl | Specifies a canned ACL that grants predefined permissions to the bucket. | +| autoDeleteObjects | boolean | Whether all objects should be automatically deleted when the S3 Bucket is removed from the stack or when the stack is deleted. | +| blockPublicAccess | aws-cdk-lib.aws_s3.BlockPublicAccess | The block public access configuration of this bucket. | +| bucketKeyEnabled | boolean | Whether Amazon S3 should use its own intermediary key to generate data keys. Only relevant when using KMS for encryption. | +| bucketName | string | The physical name of this S3 Bucket. | +| cors | aws-cdk-lib.aws_s3.CorsRule[] | The CORS configuration of this bucket. | +| enforceSSL | boolean | Enforces SSL for requests. | +| eventBridgeEnabled | boolean | Whether this S3 Bucket should send notifications to Amazon EventBridge or not. | +| intelligentTieringConfigurations | aws-cdk-lib.aws_s3.IntelligentTieringConfiguration[] | Intelligent Tiering Configurations. | +| inventories | aws-cdk-lib.aws_s3.Inventory[] | The inventory configuration of the S3 Bucket. | +| lifecycleRules | aws-cdk-lib.aws_s3.LifecycleRule[] | Rules that define how Amazon S3 manages objects during their lifetime. | +| metrics | aws-cdk-lib.aws_s3.BucketMetrics[] | The metrics configuration of this bucket. | +| notificationsHandlerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role to be used by the notifications handler. | +| objectLockDefaultRetention | aws-cdk-lib.aws_s3.ObjectLockRetention | The default retention mode and rules for S3 Object Lock. | +| objectLockEnabled | boolean | Enable object lock on the S3 Bucket. | +| objectOwnership | aws-cdk-lib.aws_s3.ObjectOwnership | The objectOwnership of the S3 Bucket. | +| publicReadAccess | boolean | Grants public read access to all objects in the S3 Bucket. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| serverAccessLogsBucket | aws-cdk-lib.aws_s3.IBucket | S3 Bucket destination for the server access logs. | +| serverAccessLogsPrefix | string | Optional log file prefix to use for the S3 Bucket's access logs. | +| transferAcceleration | boolean | Whether this S3 Bucket should have transfer acceleration turned on or not. | +| versioned | boolean | Whether this S3 Bucket should have versioning turned on or not. | + +--- + +##### `encryptionKey`Required + +```typescript +public readonly encryptionKey: IKey; +``` + +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* If `encryption` is set to `KMS` and this property is undefined, a new KMS key will be created and associated with this bucket. + +External KMS Key to use for the S3 Bucket encryption. + +The `encryption` property must be either not specified or set to `KMS` or `DSSE`. +An error will be emitted if `encryption` is set to `UNENCRYPTED` or `S3_MANAGED`. + +--- + +##### `accessControl`Optional + +```typescript +public readonly accessControl: BucketAccessControl; +``` + +- *Type:* aws-cdk-lib.aws_s3.BucketAccessControl +- *Default:* BucketAccessControl.PRIVATE + +Specifies a canned ACL that grants predefined permissions to the bucket. + +--- + +##### `autoDeleteObjects`Optional + +```typescript +public readonly autoDeleteObjects: boolean; +``` + +- *Type:* boolean +- *Default:* False + +Whether all objects should be automatically deleted when the S3 Bucket is removed from the stack or when the stack is deleted. + +Requires the `removalPolicy` to be set to `RemovalPolicy.DESTROY`. + +--- + +##### `blockPublicAccess`Optional + +```typescript +public readonly blockPublicAccess: BlockPublicAccess; +``` + +- *Type:* aws-cdk-lib.aws_s3.BlockPublicAccess +- *Default:* CloudFormation defaults will apply. New buckets and objects don't allow public access, but users can modify bucket policies or object permissions to allow public access + +The block public access configuration of this bucket. + +--- + +##### `bucketKeyEnabled`Optional + +```typescript +public readonly bucketKeyEnabled: boolean; +``` + +- *Type:* boolean +- *Default:* False + +Whether Amazon S3 should use its own intermediary key to generate data keys. Only relevant when using KMS for encryption. + +If not enabled, every object GET and PUT will cause an API call to KMS (with the + attendant cost implications of that). +- If enabled, S3 will use its own time-limited key instead. + +Only relevant, when Encryption is set to `BucketEncryption.KMS` or `BucketEncryption.KMS_MANAGED`. + +--- + +##### `bucketName`Optional + +```typescript +public readonly bucketName: string; +``` + +- *Type:* string +- *Default:* `analytics---` + +The physical name of this S3 Bucket. + +--- + +##### `cors`Optional + +```typescript +public readonly cors: CorsRule[]; +``` + +- *Type:* aws-cdk-lib.aws_s3.CorsRule[] +- *Default:* No CORS configuration. + +The CORS configuration of this bucket. --- -##### `grantStartJobExecution` +##### `enforceSSL`Optional ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkEmrServerlessRuntime.grantStartJobExecution(startJobRole: IRole, executionRoleArn: string[], applicationArns: string[]) +public readonly enforceSSL: boolean; ``` -A static method granting the right to start and monitor a job to an IAM Role. - -The method will also attach an iam:PassRole permission limited to the IAM Job Execution roles passed - -###### `startJobRole`Required +- *Type:* boolean +- *Default:* False -- *Type:* aws-cdk-lib.aws_iam.IRole +Enforces SSL for requests. -the role that will call the start job api and which needs to have the iam:PassRole permission. +S3.5 of the AWS Foundational Security Best Practices Regarding S3. --- -###### `executionRoleArn`Required +##### `eventBridgeEnabled`Optional -- *Type:* string[] +```typescript +public readonly eventBridgeEnabled: boolean; +``` -the role used by EMR Serverless to access resources during the job execution. +- *Type:* boolean +- *Default:* False + +Whether this S3 Bucket should send notifications to Amazon EventBridge or not. --- -###### `applicationArns`Required +##### `intelligentTieringConfigurations`Optional -- *Type:* string[] +```typescript +public readonly intelligentTieringConfigurations: IntelligentTieringConfiguration[]; +``` -the EMR Serverless aplication ARN, this is used by the method to limit the EMR Serverless applications the role can submit job to. +- *Type:* aws-cdk-lib.aws_s3.IntelligentTieringConfiguration[] +- *Default:* No Intelligent Tiiering Configurations. + +Intelligent Tiering Configurations. --- -#### Properties +##### `inventories`Optional -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| node | constructs.Node | The tree node. | -| application | aws-cdk-lib.aws_emrserverless.CfnApplication | The EMR Serverless application. | -| emrApplicationSecurityGroup | aws-cdk-lib.aws_ec2.ISecurityGroup | If no VPC is provided, one is created by default along with a security group attached to the EMR Serverless Application This attribute is used to expose the security group, if you provide your own security group through the {@link SparkEmrServerlessRuntimeProps} the attribute will be `undefined`. | -| flowLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group for the VPC flow log when the VPC is created. | -| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used for the VPC flow log when the VPC is created. | -| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used for the VPC flow log when the VPC is created. | -| s3VpcEndpoint | aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint | *No description.* | -| vpc | aws-cdk-lib.aws_ec2.IVpc | The VPC used by the EKS cluster. | +```typescript +public readonly inventories: Inventory[]; +``` + +- *Type:* aws-cdk-lib.aws_s3.Inventory[] +- *Default:* No inventory configuration + +The inventory configuration of the S3 Bucket. --- -##### `node`Required +##### `lifecycleRules`Optional ```typescript -public readonly node: Node; +public readonly lifecycleRules: LifecycleRule[]; ``` -- *Type:* constructs.Node +- *Type:* aws-cdk-lib.aws_s3.LifecycleRule[] +- *Default:* No lifecycle rules. -The tree node. +Rules that define how Amazon S3 manages objects during their lifetime. --- -##### `application`Required +##### `metrics`Optional ```typescript -public readonly application: CfnApplication; +public readonly metrics: BucketMetrics[]; ``` -- *Type:* aws-cdk-lib.aws_emrserverless.CfnApplication +- *Type:* aws-cdk-lib.aws_s3.BucketMetrics[] +- *Default:* No metrics configuration. -The EMR Serverless application. +The metrics configuration of this bucket. --- -##### `emrApplicationSecurityGroup`Optional +##### `notificationsHandlerRole`Optional ```typescript -public readonly emrApplicationSecurityGroup: ISecurityGroup; +public readonly notificationsHandlerRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new IAM Role will be created. -If no VPC is provided, one is created by default along with a security group attached to the EMR Serverless Application This attribute is used to expose the security group, if you provide your own security group through the {@link SparkEmrServerlessRuntimeProps} the attribute will be `undefined`. +The IAM Role to be used by the notifications handler. --- -##### `flowLogGroup`Optional +##### `objectLockDefaultRetention`Optional ```typescript -public readonly flowLogGroup: ILogGroup; +public readonly objectLockDefaultRetention: ObjectLockRetention; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Type:* aws-cdk-lib.aws_s3.ObjectLockRetention +- *Default:* No default retention period -The CloudWatch Log Group for the VPC flow log when the VPC is created. +The default retention mode and rules for S3 Object Lock. + +Default retention can be configured after a bucket is created if the bucket already +has object lock enabled. Enabling object lock for existing buckets is not supported. --- -##### `flowLogKey`Optional +##### `objectLockEnabled`Optional ```typescript -public readonly flowLogKey: IKey; +public readonly objectLockEnabled: boolean; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* boolean +- *Default:* False, unless objectLockDefaultRetention is set (then, true) -The KMS Key used for the VPC flow log when the VPC is created. +Enable object lock on the S3 Bucket. + +Enabling object lock for existing buckets is not supported. Object lock must be enabled when the bucket is created. --- -##### `flowLogRole`Optional +##### `objectOwnership`Optional ```typescript -public readonly flowLogRole: IRole; +public readonly objectOwnership: ObjectOwnership; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole +- *Type:* aws-cdk-lib.aws_s3.ObjectOwnership +- *Default:* No ObjectOwnership configuration, uploading account will own the object. -The IAM Role used for the VPC flow log when the VPC is created. +The objectOwnership of the S3 Bucket. --- -##### `s3VpcEndpoint`Optional +##### `publicReadAccess`Optional ```typescript -public readonly s3VpcEndpoint: IGatewayVpcEndpoint; +public readonly publicReadAccess: boolean; ``` -- *Type:* aws-cdk-lib.aws_ec2.IGatewayVpcEndpoint +- *Type:* boolean +- *Default:* False + +Grants public read access to all objects in the S3 Bucket. + +Similar to calling `bucket.grantPublicAccess()` --- -##### `vpc`Optional +##### `removalPolicy`Optional ```typescript -public readonly vpc: IVpc; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* aws-cdk-lib.aws_ec2.IVpc +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). -The VPC used by the EKS cluster. +The removal policy when deleting the CDK resource. + +If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. +Otherwise the removalPolicy is reverted to RETAIN. --- -#### Constants +##### `serverAccessLogsBucket`Optional -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +```typescript +public readonly serverAccessLogsBucket: IBucket; +``` + +- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Default:* If "serverAccessLogsPrefix" undefined - access logs disabled, otherwise - log to current bucket. + +S3 Bucket destination for the server access logs. --- -##### `DSF_OWNED_TAG`Required +##### `serverAccessLogsPrefix`Optional ```typescript -public readonly DSF_OWNED_TAG: string; +public readonly serverAccessLogsPrefix: string; ``` - *Type:* string +- *Default:* No log file prefix + +Optional log file prefix to use for the S3 Bucket's access logs. + +If defined without "serverAccessLogsBucket", enables access logs to current S3 Bucket with this prefix. --- -##### `DSF_TRACKING_CODE`Required +##### `transferAcceleration`Optional ```typescript -public readonly DSF_TRACKING_CODE: string; +public readonly transferAcceleration: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* False + +Whether this S3 Bucket should have transfer acceleration turned on or not. --- -### SparkJob +##### `versioned`Optional -A base construct to run Spark Jobs. +```typescript +public readonly versioned: boolean; +``` -Creates an AWS Step Functions State Machine that orchestrates the Spark Job. +- *Type:* boolean +- *Default:* False (unless object lock is enabled, then true) -> [https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job +Whether this S3 Bucket should have versioning turned on or not. -Available implementations: -* {@link SparkEmrServerlessJob } for Emr Serverless implementation -* {@link SparkEmrEksJob } for EMR On EKS implementation](https://awslabs.github.io/data-solutions-framework-on-aws/docs/constructs/library/Processing/spark-emr-serverless-job +--- -Available implementations: -* {@link SparkEmrServerlessJob } for Emr Serverless implementation -* {@link SparkEmrEksJob } for EMR On EKS implementation) +### ApplicationStageProps -#### Initializers +Properties for the `ApplicationStage` class. + +#### Initializer ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' +import { utils } from '@cdklabs/aws-data-solutions-framework' -new processing.SparkJob(scope: Construct, id: string, trackingTag: string, props: SparkJobProps) +const applicationStageProps: utils.ApplicationStageProps = { ... } ``` +#### Properties + | **Name** | **Type** | **Description** | | --- | --- | --- | -| scope | constructs.Construct | the Scope of the CDK Construct. | -| id | string | the ID of the CDK Construct. | -| trackingTag | string | *No description.* | -| props | @cdklabs/aws-data-solutions-framework.processing.SparkJobProps | the SparkJobProps properties. | +| env | aws-cdk-lib.Environment | Default AWS environment (account/region) for `Stack`s in this `Stage`. | +| outdir | string | The output directory into which to emit synthesized artifacts. | +| permissionsBoundary | aws-cdk-lib.PermissionsBoundary | Options for applying a permissions boundary to all IAM Roles and Users created within this Stage. | +| policyValidationBeta1 | aws-cdk-lib.IPolicyValidationPluginBeta1[] | Validation plugins to run during synthesis. | +| stageName | string | Name of this stage. | +| applicationStackFactory | @cdklabs/aws-data-solutions-framework.utils.ApplicationStackFactory | The application CDK Stack Factory used to create application Stacks. | +| stage | @cdklabs/aws-data-solutions-framework.utils.CICDStage | The Stage to deploy the application CDK Stack in. | +| outputsEnv | {[ key: string ]: string} | The list of values to create CfnOutputs. | --- -##### `scope`Required +##### `env`Optional -- *Type:* constructs.Construct +```typescript +public readonly env: Environment; +``` -the Scope of the CDK Construct. +- *Type:* aws-cdk-lib.Environment +- *Default:* The environments should be configured on the `Stack`s. ---- +Default AWS environment (account/region) for `Stack`s in this `Stage`. -##### `id`Required +Stacks defined inside this `Stage` with either `region` or `account` missing +from its env will use the corresponding field given here. -- *Type:* string +If either `region` or `account`is is not configured for `Stack` (either on +the `Stack` itself or on the containing `Stage`), the Stack will be +*environment-agnostic*. -the ID of the CDK Construct. +Environment-agnostic stacks can be deployed to any environment, may not be +able to take advantage of all features of the CDK. For example, they will +not be able to use environmental context lookups, will not automatically +translate Service Principals to the right format based on the environment's +AWS partition, and other such enhancements. --- -##### `trackingTag`Required +*Example* -- *Type:* string +```typescript +// Use a concrete account and region to deploy this Stage to +new Stage(app, 'Stage1', { + env: { account: '123456789012', region: 'us-east-1' }, +}); ---- +// Use the CLI's current credentials to determine the target environment +new Stage(app, 'Stage2', { + env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION }, +}); +``` -##### `props`Required -- *Type:* @cdklabs/aws-data-solutions-framework.processing.SparkJobProps +##### `outdir`Optional -the SparkJobProps properties. +```typescript +public readonly outdir: string; +``` ---- +- *Type:* string +- *Default:* for nested stages, outdir will be determined as a relative directory to the outdir of the app. For apps, if outdir is not specified, a temporary directory will be created. -#### Methods +The output directory into which to emit synthesized artifacts. -| **Name** | **Description** | -| --- | --- | -| toString | Returns a string representation of this construct. | -| retrieveVersion | Retrieve DSF package.json version. | +Can only be specified if this stage is the root stage (the app). If this is +specified and this stage is nested within another stage, an error will be +thrown. --- -##### `toString` +##### `permissionsBoundary`Optional ```typescript -public toString(): string +public readonly permissionsBoundary: PermissionsBoundary; ``` -Returns a string representation of this construct. +- *Type:* aws-cdk-lib.PermissionsBoundary +- *Default:* no permissions boundary is applied -##### `retrieveVersion` +Options for applying a permissions boundary to all IAM Roles and Users created within this Stage. + +--- + +##### `policyValidationBeta1`Optional ```typescript -public retrieveVersion(): any +public readonly policyValidationBeta1: IPolicyValidationPluginBeta1[]; ``` -Retrieve DSF package.json version. +- *Type:* aws-cdk-lib.IPolicyValidationPluginBeta1[] +- *Default:* no validation plugins are used -#### Static Functions +Validation plugins to run during synthesis. -| **Name** | **Description** | -| --- | --- | -| isConstruct | Checks if `x` is a construct. | +If any plugin reports any violation, +synthesis will be interrupted and the report displayed to the user. --- -##### `isConstruct` +##### `stageName`Optional ```typescript -import { processing } from '@cdklabs/aws-data-solutions-framework' - -processing.SparkJob.isConstruct(x: any) +public readonly stageName: string; ``` -Checks if `x` is a construct. - -Use this method instead of `instanceof` to properly detect `Construct` -instances, even when the construct library is symlinked. - -Explanation: in JavaScript, multiple copies of the `constructs` library on -disk are seen as independent, completely different libraries. As a -consequence, the class `Construct` in each copy of the `constructs` library -is seen as a different class, and an instance of one class will not test as -`instanceof` the other class. `npm install` will not create installations -like this, but users may manually symlink construct libraries together or -use a monorepo tool: in those cases, multiple copies of the `constructs` -library can be accidentally installed, and `instanceof` will behave -unpredictably. It is safest to avoid using `instanceof`, and using -this type-testing method instead. +- *Type:* string +- *Default:* Derived from the id. -###### `x`Required +Name of this stage. -- *Type:* any +--- -Any object. +##### `applicationStackFactory`Required ---- +```typescript +public readonly applicationStackFactory: ApplicationStackFactory; +``` -#### Properties +- *Type:* @cdklabs/aws-data-solutions-framework.utils.ApplicationStackFactory -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| node | constructs.Node | The tree node. | -| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The Step Functions State Machine created to orchestrate the Spark Job. | -| stateMachineLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Log Group used by the State Machine. | +The application CDK Stack Factory used to create application Stacks. --- -##### `node`Required +##### `stage`Required ```typescript -public readonly node: Node; +public readonly stage: CICDStage; ``` -- *Type:* constructs.Node +- *Type:* @cdklabs/aws-data-solutions-framework.utils.CICDStage +- *Default:* No stage is passed to the application stack -The tree node. +The Stage to deploy the application CDK Stack in. --- -##### `stateMachine`Optional +##### `outputsEnv`Optional ```typescript -public readonly stateMachine: StateMachine; +public readonly outputsEnv: {[ key: string ]: string}; ``` -- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine +- *Type:* {[ key: string ]: string} +- *Default:* No CfnOutputs are created -The Step Functions State Machine created to orchestrate the Spark Job. +The list of values to create CfnOutputs. --- -##### `stateMachineLogGroup`Optional +### AthenaWorkgroupProps -```typescript -public readonly stateMachineLogGroup: ILogGroup; -``` +Properties for the AthenaWorkgroup Construct. -- *Type:* aws-cdk-lib.aws_logs.ILogGroup +#### Initializer -The CloudWatch Log Group used by the State Machine. +```typescript +import { consumption } from '@cdklabs/aws-data-solutions-framework' ---- +const athenaWorkgroupProps: consumption.AthenaWorkgroupProps = { ... } +``` -#### Constants +#### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| DSF_OWNED_TAG | string | *No description.* | -| DSF_TRACKING_CODE | string | *No description.* | +| name | string | Name of the Workgroup. | +| resultLocationPrefix | string | Specifies the location in Amazon S3 where query results are stored. | +| bytesScannedCutoffPerQuery | number | Indicates the number of days after creation when objects are deleted from the Result bucket. | +| enforceWorkGroupConfiguration | boolean | If set to "true", the settings for the workgroup override client-side settings. | +| engineVersion | @cdklabs/aws-data-solutions-framework.consumption.EngineVersion | The engine version on which the query runs. | +| executionRole | aws-cdk-lib.aws_iam.IRole | Role used to access user resources in an Athena for Apache Spark session. | +| publishCloudWatchMetricsEnabled | boolean | Indicates that the Amazon CloudWatch metrics are enabled for the workgroup. | +| recursiveDeleteOption | boolean | The option to delete a workgroup and its contents even if the workgroup contains any named queries. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| requesterPaysEnabled | boolean | Allows members assigned to a workgroup to reference Amazon S3 Requester Pays buckets in queries. | +| resultBucket | aws-cdk-lib.aws_s3.IBucket | Amazon S3 Bucket where query results are stored. | +| resultBucketName | string | Name for the S3 Bucket in case it should be created. | +| resultsEncryptionKey | aws-cdk-lib.aws_kms.IKey | Encryption key used to encrypt query results. | +| resultsRetentionPeriod | aws-cdk-lib.Duration | Indicates the number of days after creation when objects are deleted from the Result bucket. | +| state | @cdklabs/aws-data-solutions-framework.consumption.State | The state of the Workgroup. | --- -##### `DSF_OWNED_TAG`Required +##### `name`Required ```typescript -public readonly DSF_OWNED_TAG: string; +public readonly name: string; ``` - *Type:* string +Name of the Workgroup. + --- -##### `DSF_TRACKING_CODE`Required +##### `resultLocationPrefix`Required ```typescript -public readonly DSF_TRACKING_CODE: string; +public readonly resultLocationPrefix: string; ``` - *Type:* string ---- - -## Structs - -### Acl +Specifies the location in Amazon S3 where query results are stored. -Kakfa ACL This is similar to the object used by `kafkajs`, for more information see this [link](https://kafka.js.org/docs/admin#create-acl). +--- -#### Initializer +##### `bytesScannedCutoffPerQuery`Optional ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' - -const acl: streaming.Acl = { ... } +public readonly bytesScannedCutoffPerQuery: number; ``` -#### Properties +- *Type:* number -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| host | string | *No description.* | -| operation | @cdklabs/aws-data-solutions-framework.streaming.AclOperationTypes | *No description.* | -| permissionType | @cdklabs/aws-data-solutions-framework.streaming.AclPermissionTypes | *No description.* | -| principal | string | *No description.* | -| resourceName | string | *No description.* | -| resourcePatternType | @cdklabs/aws-data-solutions-framework.streaming.ResourcePatternTypes | *No description.* | -| resourceType | @cdklabs/aws-data-solutions-framework.streaming.AclResourceTypes | *No description.* | +Indicates the number of days after creation when objects are deleted from the Result bucket. --- -##### `host`Required +##### `enforceWorkGroupConfiguration`Optional ```typescript -public readonly host: string; +public readonly enforceWorkGroupConfiguration: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* True. + +If set to "true", the settings for the workgroup override client-side settings. --- -##### `operation`Required +##### `engineVersion`Optional ```typescript -public readonly operation: AclOperationTypes; +public readonly engineVersion: EngineVersion; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclOperationTypes +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.EngineVersion +- *Default:* AUTO. + +The engine version on which the query runs. --- -##### `permissionType`Required +##### `executionRole`Optional ```typescript -public readonly permissionType: AclPermissionTypes; +public readonly executionRole: IRole; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclPermissionTypes +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* The role is created if PySpark engine version is selected and no role is provided. + +Role used to access user resources in an Athena for Apache Spark session. --- -##### `principal`Required +##### `publishCloudWatchMetricsEnabled`Optional ```typescript -public readonly principal: string; +public readonly publishCloudWatchMetricsEnabled: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* True. + +Indicates that the Amazon CloudWatch metrics are enabled for the workgroup. --- -##### `resourceName`Required +##### `recursiveDeleteOption`Optional ```typescript -public readonly resourceName: string; +public readonly recursiveDeleteOption: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* Workgroup is retained. + +The option to delete a workgroup and its contents even if the workgroup contains any named queries. --- -##### `resourcePatternType`Required +##### `removalPolicy`Optional ```typescript -public readonly resourcePatternType: ResourcePatternTypes; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.ResourcePatternTypes +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). + +The removal policy when deleting the CDK resource. + +If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. +Otherwise the removalPolicy is reverted to RETAIN. --- -##### `resourceType`Required +##### `requesterPaysEnabled`Optional ```typescript -public readonly resourceType: AclResourceTypes; +public readonly requesterPaysEnabled: boolean; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.AclResourceTypes - ---- +- *Type:* boolean +- *Default:* False. -### AclAdminProps +Allows members assigned to a workgroup to reference Amazon S3 Requester Pays buckets in queries. -This Props allow you to define the principals that will be adminstartor as well as the principal that will be used by the CDK Custom resources to. +--- -#### Initializer +##### `resultBucket`Optional ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' - -const aclAdminProps: streaming.AclAdminProps = { ... } +public readonly resultBucket: IBucket; ``` -#### Properties +- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Default:* Create a new bucket with SSE encryption using AnalyticsBucket if not provided. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| aclAdminPrincipal | string | This Principal will be used by the CDK custom resource to set ACLs and Topics. | -| adminPrincipal | string | The Principal that will have administrator privilege in MSK The MSK construct does not have access to this principal Keep this principal in a secure storage and should be only used in case you put an ACL that lock MSK access. | -| secretCertificate | aws-cdk-lib.aws_secretsmanager.ISecret | This is the TLS certificate of the Principal that is used by the CDK custom resource which set ACLs and Topics. | +Amazon S3 Bucket where query results are stored. --- -##### `aclAdminPrincipal`Required +##### `resultBucketName`Optional ```typescript -public readonly aclAdminPrincipal: string; +public readonly resultBucketName: string; ``` - *Type:* string +- *Default:* Name will be provided. -This Principal will be used by the CDK custom resource to set ACLs and Topics. +Name for the S3 Bucket in case it should be created. --- -##### `adminPrincipal`Required +##### `resultsEncryptionKey`Optional ```typescript -public readonly adminPrincipal: string; +public readonly resultsEncryptionKey: IKey; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* The key is created if Result Bucket is not provided. -The Principal that will have administrator privilege in MSK The MSK construct does not have access to this principal Keep this principal in a secure storage and should be only used in case you put an ACL that lock MSK access. +Encryption key used to encrypt query results. + +Has to be provided if Result bucket is provided. +User needs to grant access to it for AthenaWorkGroup's executionRole (if Spark engine) or for +principals that were granted to run queries using AthenaWorkGroup's grantRunQueries. --- -##### `secretCertificate`Required +##### `resultsRetentionPeriod`Optional ```typescript -public readonly secretCertificate: ISecret; +public readonly resultsRetentionPeriod: Duration; ``` -- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret +- *Type:* aws-cdk-lib.Duration -This is the TLS certificate of the Principal that is used by the CDK custom resource which set ACLs and Topics. +Indicates the number of days after creation when objects are deleted from the Result bucket. -The secret in AWS secrets manager must be a JSON in the following format -{ - "key" : "PRIVATE-KEY", - "cert" : "CERTIFICATE" -} +--- -You can use the following utility to generate the certificates -https://github.com/aws-samples/amazon-msk-client-authentication +##### `state`Optional + +```typescript +public readonly state: State; +``` + +- *Type:* @cdklabs/aws-data-solutions-framework.consumption.State +- *Default:* ENABLED. + +The state of the Workgroup. --- -### AnalyticsBucketProps +### AuthorizerCentralWorflow -Properties for the `AnalyticsBucket` construct. +Interface for the authorizer central workflow. -#### Initializer +#### Initializer ```typescript -import { storage } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -const analyticsBucketProps: storage.AnalyticsBucketProps = { ... } +const authorizerCentralWorflow: governance.AuthorizerCentralWorflow = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| encryptionKey | aws-cdk-lib.aws_kms.IKey | External KMS Key to use for the S3 Bucket encryption. | -| accessControl | aws-cdk-lib.aws_s3.BucketAccessControl | Specifies a canned ACL that grants predefined permissions to the bucket. | -| autoDeleteObjects | boolean | Whether all objects should be automatically deleted when the S3 Bucket is removed from the stack or when the stack is deleted. | -| blockPublicAccess | aws-cdk-lib.aws_s3.BlockPublicAccess | The block public access configuration of this bucket. | -| bucketKeyEnabled | boolean | Whether Amazon S3 should use its own intermediary key to generate data keys. Only relevant when using KMS for encryption. | -| bucketName | string | The physical name of this S3 Bucket. | -| cors | aws-cdk-lib.aws_s3.CorsRule[] | The CORS configuration of this bucket. | -| enforceSSL | boolean | Enforces SSL for requests. | -| eventBridgeEnabled | boolean | Whether this S3 Bucket should send notifications to Amazon EventBridge or not. | -| intelligentTieringConfigurations | aws-cdk-lib.aws_s3.IntelligentTieringConfiguration[] | Intelligent Tiering Configurations. | -| inventories | aws-cdk-lib.aws_s3.Inventory[] | The inventory configuration of the S3 Bucket. | -| lifecycleRules | aws-cdk-lib.aws_s3.LifecycleRule[] | Rules that define how Amazon S3 manages objects during their lifetime. | -| metrics | aws-cdk-lib.aws_s3.BucketMetrics[] | The metrics configuration of this bucket. | -| notificationsHandlerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role to be used by the notifications handler. | -| objectLockDefaultRetention | aws-cdk-lib.aws_s3.ObjectLockRetention | The default retention mode and rules for S3 Object Lock. | -| objectLockEnabled | boolean | Enable object lock on the S3 Bucket. | -| objectOwnership | aws-cdk-lib.aws_s3.ObjectOwnership | The objectOwnership of the S3 Bucket. | -| publicReadAccess | boolean | Grants public read access to all objects in the S3 Bucket. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | -| serverAccessLogsBucket | aws-cdk-lib.aws_s3.IBucket | S3 Bucket destination for the server access logs. | -| serverAccessLogsPrefix | string | Optional log file prefix to use for the S3 Bucket's access logs. | -| transferAcceleration | boolean | Whether this S3 Bucket should have transfer acceleration turned on or not. | -| versioned | boolean | Whether this S3 Bucket should have versioning turned on or not. | +| authorizerEventRole | aws-cdk-lib.aws_iam.IRole | The authorizer event role for allowing events to invoke the workflow. | +| authorizerEventRule | aws-cdk-lib.aws_events.IRule | The authorizer event rule for triggering the workflow. | +| callbackEventRule | aws-cdk-lib.aws_events.IRule | The callback event rule for listening to producer and subscriber grants callback. | +| callbackFunction | aws-cdk-lib.aws_lambda.IFunction | The Lambda function for handling producer and subscriber grants callback. | +| callbackRole | aws-cdk-lib.aws_iam.IRole | The role for the Lambda function handling producer and subscriber grants callback. | +| deadLetterQueue | aws-cdk-lib.aws_sqs.IQueue | The authorizer dead letter queue for failed events. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.StateMachine | The authorizer Step Functions state machine. | --- -##### `encryptionKey`Required +##### `authorizerEventRole`Required ```typescript -public readonly encryptionKey: IKey; +public readonly authorizerEventRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* If `encryption` is set to `KMS` and this property is undefined, a new KMS key will be created and associated with this bucket. - -External KMS Key to use for the S3 Bucket encryption. +- *Type:* aws-cdk-lib.aws_iam.IRole -The `encryption` property must be either not specified or set to `KMS` or `DSSE`. -An error will be emitted if `encryption` is set to `UNENCRYPTED` or `S3_MANAGED`. +The authorizer event role for allowing events to invoke the workflow. --- -##### `accessControl`Optional +##### `authorizerEventRule`Required ```typescript -public readonly accessControl: BucketAccessControl; +public readonly authorizerEventRule: IRule; ``` -- *Type:* aws-cdk-lib.aws_s3.BucketAccessControl -- *Default:* BucketAccessControl.PRIVATE +- *Type:* aws-cdk-lib.aws_events.IRule -Specifies a canned ACL that grants predefined permissions to the bucket. +The authorizer event rule for triggering the workflow. --- -##### `autoDeleteObjects`Optional +##### `callbackEventRule`Required ```typescript -public readonly autoDeleteObjects: boolean; +public readonly callbackEventRule: IRule; ``` -- *Type:* boolean -- *Default:* False - -Whether all objects should be automatically deleted when the S3 Bucket is removed from the stack or when the stack is deleted. +- *Type:* aws-cdk-lib.aws_events.IRule -Requires the `removalPolicy` to be set to `RemovalPolicy.DESTROY`. +The callback event rule for listening to producer and subscriber grants callback. --- -##### `blockPublicAccess`Optional +##### `callbackFunction`Required ```typescript -public readonly blockPublicAccess: BlockPublicAccess; +public readonly callbackFunction: IFunction; ``` -- *Type:* aws-cdk-lib.aws_s3.BlockPublicAccess -- *Default:* CloudFormation defaults will apply. New buckets and objects don't allow public access, but users can modify bucket policies or object permissions to allow public access +- *Type:* aws-cdk-lib.aws_lambda.IFunction -The block public access configuration of this bucket. +The Lambda function for handling producer and subscriber grants callback. --- -##### `bucketKeyEnabled`Optional +##### `callbackRole`Required ```typescript -public readonly bucketKeyEnabled: boolean; +public readonly callbackRole: IRole; ``` -- *Type:* boolean -- *Default:* False - -Whether Amazon S3 should use its own intermediary key to generate data keys. Only relevant when using KMS for encryption. - -If not enabled, every object GET and PUT will cause an API call to KMS (with the - attendant cost implications of that). -- If enabled, S3 will use its own time-limited key instead. +- *Type:* aws-cdk-lib.aws_iam.IRole -Only relevant, when Encryption is set to `BucketEncryption.KMS` or `BucketEncryption.KMS_MANAGED`. +The role for the Lambda function handling producer and subscriber grants callback. --- -##### `bucketName`Optional +##### `deadLetterQueue`Required ```typescript -public readonly bucketName: string; +public readonly deadLetterQueue: IQueue; ``` -- *Type:* string -- *Default:* `analytics---` +- *Type:* aws-cdk-lib.aws_sqs.IQueue -The physical name of this S3 Bucket. +The authorizer dead letter queue for failed events. --- -##### `cors`Optional +##### `stateMachine`Required ```typescript -public readonly cors: CorsRule[]; +public readonly stateMachine: StateMachine; ``` -- *Type:* aws-cdk-lib.aws_s3.CorsRule[] -- *Default:* No CORS configuration. +- *Type:* aws-cdk-lib.aws_stepfunctions.StateMachine -The CORS configuration of this bucket. +The authorizer Step Functions state machine. --- -##### `enforceSSL`Optional +### AuthorizerEnvironmentWorflow + +The interface representing the environment custom authorizer workflow. + +#### Initializer ```typescript -public readonly enforceSSL: boolean; -``` +import { governance } from '@cdklabs/aws-data-solutions-framework' -- *Type:* boolean -- *Default:* False +const authorizerEnvironmentWorflow: governance.AuthorizerEnvironmentWorflow = { ... } +``` -Enforces SSL for requests. +#### Properties -S3.5 of the AWS Foundational Security Best Practices Regarding S3. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| deadLetterQueue | aws-cdk-lib.aws_sqs.IQueue | The dead letter queue for failed events. | +| eventRole | aws-cdk-lib.aws_iam.IRole | The role used by the even rule to trigger the Step Function state machine. | +| eventRule | aws-cdk-lib.aws_events.IRule | The event rule that triggers the workflow. | +| stateMachine | aws-cdk-lib.aws_stepfunctions.IStateMachine | The state machine that orchestrates the workflow. | +| eventBusPolicy | aws-cdk-lib.aws_events.CfnEventBusPolicy | The optional event bus policy for cross-account workflows. | --- -##### `eventBridgeEnabled`Optional +##### `deadLetterQueue`Required ```typescript -public readonly eventBridgeEnabled: boolean; +public readonly deadLetterQueue: IQueue; ``` -- *Type:* boolean -- *Default:* False +- *Type:* aws-cdk-lib.aws_sqs.IQueue -Whether this S3 Bucket should send notifications to Amazon EventBridge or not. +The dead letter queue for failed events. --- -##### `intelligentTieringConfigurations`Optional +##### `eventRole`Required ```typescript -public readonly intelligentTieringConfigurations: IntelligentTieringConfiguration[]; +public readonly eventRole: IRole; ``` -- *Type:* aws-cdk-lib.aws_s3.IntelligentTieringConfiguration[] -- *Default:* No Intelligent Tiiering Configurations. +- *Type:* aws-cdk-lib.aws_iam.IRole -Intelligent Tiering Configurations. +The role used by the even rule to trigger the Step Function state machine. --- -##### `inventories`Optional +##### `eventRule`Required ```typescript -public readonly inventories: Inventory[]; +public readonly eventRule: IRule; ``` -- *Type:* aws-cdk-lib.aws_s3.Inventory[] -- *Default:* No inventory configuration +- *Type:* aws-cdk-lib.aws_events.IRule -The inventory configuration of the S3 Bucket. +The event rule that triggers the workflow. --- -##### `lifecycleRules`Optional +##### `stateMachine`Required ```typescript -public readonly lifecycleRules: LifecycleRule[]; +public readonly stateMachine: IStateMachine; ``` -- *Type:* aws-cdk-lib.aws_s3.LifecycleRule[] -- *Default:* No lifecycle rules. +- *Type:* aws-cdk-lib.aws_stepfunctions.IStateMachine -Rules that define how Amazon S3 manages objects during their lifetime. +The state machine that orchestrates the workflow. --- -##### `metrics`Optional +##### `eventBusPolicy`Optional ```typescript -public readonly metrics: BucketMetrics[]; +public readonly eventBusPolicy: CfnEventBusPolicy; ``` -- *Type:* aws-cdk-lib.aws_s3.BucketMetrics[] -- *Default:* No metrics configuration. +- *Type:* aws-cdk-lib.aws_events.CfnEventBusPolicy -The metrics configuration of this bucket. +The optional event bus policy for cross-account workflows. --- -##### `notificationsHandlerRole`Optional +### BaseRedshiftDataSharingAccessProps + +The base interface for the different data sharing lifecycle properties. + +#### Initializer ```typescript -public readonly notificationsHandlerRole: IRole; +import { consumption } from '@cdklabs/aws-data-solutions-framework' + +const baseRedshiftDataSharingAccessProps: consumption.BaseRedshiftDataSharingAccessProps = { ... } ``` -- *Type:* aws-cdk-lib.aws_iam.IRole -- *Default:* A new IAM Role will be created. +#### Properties -The IAM Role to be used by the notifications handler. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| databaseName | string | The name of the Redshift database used in the data sharing. | +| dataShareName | string | The name of the data share. | +| accountId | string | For cross-account grants, this is the consumer account ID. | +| dataShareArn | string | The ARN of the datashare. | +| namespaceId | string | For single account grants, this is the consumer namespace ID. For cross-account grants, `namespaceId` is ignored. | --- -##### `objectLockDefaultRetention`Optional +##### `databaseName`Required ```typescript -public readonly objectLockDefaultRetention: ObjectLockRetention; +public readonly databaseName: string; ``` -- *Type:* aws-cdk-lib.aws_s3.ObjectLockRetention -- *Default:* No default retention period - -The default retention mode and rules for S3 Object Lock. +- *Type:* string -Default retention can be configured after a bucket is created if the bucket already -has object lock enabled. Enabling object lock for existing buckets is not supported. +The name of the Redshift database used in the data sharing. --- -##### `objectLockEnabled`Optional +##### `dataShareName`Required ```typescript -public readonly objectLockEnabled: boolean; +public readonly dataShareName: string; ``` -- *Type:* boolean -- *Default:* False, unless objectLockDefaultRetention is set (then, true) - -Enable object lock on the S3 Bucket. +- *Type:* string -Enabling object lock for existing buckets is not supported. Object lock must be enabled when the bucket is created. +The name of the data share. --- -##### `objectOwnership`Optional +##### `accountId`Optional ```typescript -public readonly objectOwnership: ObjectOwnership; +public readonly accountId: string; ``` -- *Type:* aws-cdk-lib.aws_s3.ObjectOwnership -- *Default:* No ObjectOwnership configuration, uploading account will own the object. +- *Type:* string +- *Default:* No account ID is used. -The objectOwnership of the S3 Bucket. +For cross-account grants, this is the consumer account ID. + +For cross-account consumers, this is the producer account ID. --- -##### `publicReadAccess`Optional +##### `dataShareArn`Optional ```typescript -public readonly publicReadAccess: boolean; +public readonly dataShareArn: string; ``` -- *Type:* boolean -- *Default:* False +- *Type:* string +- *Default:* No data share ARN is used. -Grants public read access to all objects in the S3 Bucket. +The ARN of the datashare. -Similar to calling `bucket.grantPublicAccess()` +This is required for any action that is cross account. --- -##### `removalPolicy`Optional +##### `namespaceId`Optional ```typescript -public readonly removalPolicy: RemovalPolicy; +public readonly namespaceId: string; ``` -- *Type:* aws-cdk-lib.RemovalPolicy -- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). +- *Type:* string +- *Default:* No namespace ID is used. -The removal policy when deleting the CDK resource. +For single account grants, this is the consumer namespace ID. For cross-account grants, `namespaceId` is ignored. -If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise the removalPolicy is reverted to RETAIN. +For consumers, this is the producer namespace ID. It is required for both single and cross account data sharing. --- -##### `serverAccessLogsBucket`Optional +### BrokerLogging + +Configuration details related to broker logs. + +#### Initializer + +```typescript +import { streaming } from '@cdklabs/aws-data-solutions-framework' -```typescript -public readonly serverAccessLogsBucket: IBucket; +const brokerLogging: streaming.BrokerLogging = { ... } ``` -- *Type:* aws-cdk-lib.aws_s3.IBucket -- *Default:* If "serverAccessLogsPrefix" undefined - access logs disabled, otherwise - log to current bucket. +#### Properties -S3 Bucket destination for the server access logs. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| cloudwatchLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs group that is the destination for broker logs. | +| firehoseDeliveryStreamName | string | The Kinesis Data Firehose delivery stream that is the destination for broker logs. | +| s3 | @cdklabs/aws-data-solutions-framework.streaming.S3LoggingConfiguration | Details of the Amazon S3 destination for broker logs. | --- -##### `serverAccessLogsPrefix`Optional +##### `cloudwatchLogGroup`Optional ```typescript -public readonly serverAccessLogsPrefix: string; +public readonly cloudwatchLogGroup: ILogGroup; ``` -- *Type:* string -- *Default:* No log file prefix - -Optional log file prefix to use for the S3 Bucket's access logs. +- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Default:* disabled -If defined without "serverAccessLogsBucket", enables access logs to current S3 Bucket with this prefix. +The CloudWatch Logs group that is the destination for broker logs. --- -##### `transferAcceleration`Optional +##### `firehoseDeliveryStreamName`Optional ```typescript -public readonly transferAcceleration: boolean; +public readonly firehoseDeliveryStreamName: string; ``` -- *Type:* boolean -- *Default:* False +- *Type:* string +- *Default:* disabled -Whether this S3 Bucket should have transfer acceleration turned on or not. +The Kinesis Data Firehose delivery stream that is the destination for broker logs. --- -##### `versioned`Optional +##### `s3`Optional ```typescript -public readonly versioned: boolean; +public readonly s3: S3LoggingConfiguration; ``` -- *Type:* boolean -- *Default:* False (unless object lock is enabled, then true) +- *Type:* @cdklabs/aws-data-solutions-framework.streaming.S3LoggingConfiguration +- *Default:* disabled -Whether this S3 Bucket should have versioning turned on or not. +Details of the Amazon S3 destination for broker logs. --- -### ApplicationStageProps +### ClusterConfigurationInfo -Properties for the `ApplicationStage` class. +The Amazon MSK configuration to use for the cluster. -#### Initializer +Note: There is currently no Cloudformation Resource to create a Configuration + +#### Initializer ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' +import { streaming } from '@cdklabs/aws-data-solutions-framework' -const applicationStageProps: utils.ApplicationStageProps = { ... } +const clusterConfigurationInfo: streaming.ClusterConfigurationInfo = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| env | aws-cdk-lib.Environment | Default AWS environment (account/region) for `Stack`s in this `Stage`. | -| outdir | string | The output directory into which to emit synthesized artifacts. | -| permissionsBoundary | aws-cdk-lib.PermissionsBoundary | Options for applying a permissions boundary to all IAM Roles and Users created within this Stage. | -| policyValidationBeta1 | aws-cdk-lib.IPolicyValidationPluginBeta1[] | Validation plugins to run during synthesis. | -| stageName | string | Name of this stage. | -| applicationStackFactory | @cdklabs/aws-data-solutions-framework.utils.ApplicationStackFactory | The application CDK Stack Factory used to create application Stacks. | -| stage | @cdklabs/aws-data-solutions-framework.utils.CICDStage | The Stage to deploy the application CDK Stack in. | -| outputsEnv | {[ key: string ]: string} | The list of values to create CfnOutputs. | +| arn | string | The Amazon Resource Name (ARN) of the MSK configuration to use. | +| revision | number | The revision of the Amazon MSK configuration to use. | --- -##### `env`Optional +##### `arn`Required ```typescript -public readonly env: Environment; +public readonly arn: string; ``` -- *Type:* aws-cdk-lib.Environment -- *Default:* The environments should be configured on the `Stack`s. - -Default AWS environment (account/region) for `Stack`s in this `Stage`. - -Stacks defined inside this `Stage` with either `region` or `account` missing -from its env will use the corresponding field given here. +- *Type:* string -If either `region` or `account`is is not configured for `Stack` (either on -the `Stack` itself or on the containing `Stage`), the Stack will be -*environment-agnostic*. +The Amazon Resource Name (ARN) of the MSK configuration to use. -Environment-agnostic stacks can be deployed to any environment, may not be -able to take advantage of all features of the CDK. For example, they will -not be able to use environmental context lookups, will not automatically -translate Service Principals to the right format based on the environment's -AWS partition, and other such enhancements. +For example, arn:aws:kafka:us-east-1:123456789012:configuration/example-configuration-name/abcdabcd-1234-abcd-1234-abcd123e8e8e-1. --- -*Example* +##### `revision`Required ```typescript -// Use a concrete account and region to deploy this Stage to -new Stage(app, 'Stage1', { - env: { account: '123456789012', region: 'us-east-1' }, -}); - -// Use the CLI's current credentials to determine the target environment -new Stage(app, 'Stage2', { - env: { account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION }, -}); +public readonly revision: number; ``` +- *Type:* number -##### `outdir`Optional +The revision of the Amazon MSK configuration to use. + +--- + +### CreateServiceLinkedRoleProps + +The properties of the `CreateServiceLinkedRole` construct. + +#### Initializer ```typescript -public readonly outdir: string; -``` +import { utils } from '@cdklabs/aws-data-solutions-framework' -- *Type:* string -- *Default:* for nested stages, outdir will be determined as a relative directory to the outdir of the app. For apps, if outdir is not specified, a temporary directory will be created. +const createServiceLinkedRoleProps: utils.CreateServiceLinkedRoleProps = { ... } +``` -The output directory into which to emit synthesized artifacts. +#### Properties -Can only be specified if this stage is the root stage (the app). If this is -specified and this stage is nested within another stage, an error will be -thrown. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- -##### `permissionsBoundary`Optional +##### `removalPolicy`Optional ```typescript -public readonly permissionsBoundary: PermissionsBoundary; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* aws-cdk-lib.PermissionsBoundary -- *Default:* no permissions boundary is applied +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). -Options for applying a permissions boundary to all IAM Roles and Users created within this Stage. +The removal policy when deleting the CDK resource. + +If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. +Otherwise, the removalPolicy is reverted to RETAIN. --- -##### `policyValidationBeta1`Optional +### CustomAssetType + +Interface representing a DataZone custom asset type. + +#### Initializer ```typescript -public readonly policyValidationBeta1: IPolicyValidationPluginBeta1[]; -``` +import { governance } from '@cdklabs/aws-data-solutions-framework' -- *Type:* aws-cdk-lib.IPolicyValidationPluginBeta1[] -- *Default:* no validation plugins are used +const customAssetType: governance.CustomAssetType = { ... } +``` -Validation plugins to run during synthesis. +#### Properties -If any plugin reports any violation, -synthesis will be interrupted and the report displayed to the user. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| domainIdentifier | string | The domain identifier of the custom asset type. | +| name | string | The name of the custom asset type. | +| projectIdentifier | string | The project identifier owner of the custom asset type. | +| revision | string | The revision of the custom asset type. | --- -##### `stageName`Optional +##### `domainIdentifier`Required ```typescript -public readonly stageName: string; +public readonly domainIdentifier: string; ``` - *Type:* string -- *Default:* Derived from the id. -Name of this stage. +The domain identifier of the custom asset type. --- -##### `applicationStackFactory`Required +##### `name`Required ```typescript -public readonly applicationStackFactory: ApplicationStackFactory; +public readonly name: string; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.utils.ApplicationStackFactory +- *Type:* string -The application CDK Stack Factory used to create application Stacks. +The name of the custom asset type. --- -##### `stage`Required +##### `projectIdentifier`Required ```typescript -public readonly stage: CICDStage; +public readonly projectIdentifier: string; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.utils.CICDStage -- *Default:* No stage is passed to the application stack +- *Type:* string -The Stage to deploy the application CDK Stack in. +The project identifier owner of the custom asset type. --- -##### `outputsEnv`Optional +##### `revision`Required ```typescript -public readonly outputsEnv: {[ key: string ]: string}; +public readonly revision: string; ``` -- *Type:* {[ key: string ]: string} -- *Default:* No CfnOutputs are created +- *Type:* string -The list of values to create CfnOutputs. +The revision of the custom asset type. --- -### AthenaWorkgroupProps +### DataCatalogDatabaseProps -Properties for the AthenaWorkgroup Construct. +Properties for the `DataCatalogDatabase` construct. -#### Initializer +#### Initializer ```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -const athenaWorkgroupProps: consumption.AthenaWorkgroupProps = { ... } +const dataCatalogDatabaseProps: governance.DataCatalogDatabaseProps = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| name | string | Name of the Workgroup. | -| resultLocationPrefix | string | Specifies the location in Amazon S3 where query results are stored. | -| bytesScannedCutoffPerQuery | number | Indicates the number of days after creation when objects are deleted from the Result bucket. | -| enforceWorkGroupConfiguration | boolean | If set to "true", the settings for the workgroup override client-side settings. | -| engineVersion | @cdklabs/aws-data-solutions-framework.consumption.EngineVersion | The engine version on which the query runs. | -| executionRole | aws-cdk-lib.aws_iam.IRole | Role used to access user resources in an Athena for Apache Spark session. | -| publishCloudWatchMetricsEnabled | boolean | Indicates that the Amazon CloudWatch metrics are enabled for the workgroup. | -| recursiveDeleteOption | boolean | The option to delete a workgroup and its contents even if the workgroup contains any named queries. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | -| requesterPaysEnabled | boolean | Allows members assigned to a workgroup to reference Amazon S3 Requester Pays buckets in queries. | -| resultBucket | aws-cdk-lib.aws_s3.IBucket | Amazon S3 Bucket where query results are stored. | -| resultBucketName | string | Name for the S3 Bucket in case it should be created. | -| resultsEncryptionKey | aws-cdk-lib.aws_kms.IKey | Encryption key used to encrypt query results. | -| resultsRetentionPeriod | aws-cdk-lib.Duration | Indicates the number of days after creation when objects are deleted from the Result bucket. | -| state | @cdklabs/aws-data-solutions-framework.consumption.State | The state of the Workgroup. | +| name | string | Database name. | +| autoCrawl | boolean | When enabled, this automatically creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. | +| autoCrawlSchedule | aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty | The schedule to run the Glue Crawler. | +| crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | KMS encryption Key used for the Glue Crawler logs. | +| crawlerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Glue Crawler when `autoCrawl` is set to `True`. | +| crawlerTableLevelDepth | number | Directory depth where the table folders are located. | +| glueConnectionName | string | The connection that would be used by the crawler. | +| jdbcPath | string | The JDBC path that would be included by the crawler. | +| jdbcSecret | aws-cdk-lib.aws_secretsmanager.ISecret | The secret associated with the JDBC connection. | +| jdbcSecretKMSKey | aws-cdk-lib.aws_kms.IKey | The KMS key used by the JDBC secret. | +| locationBucket | aws-cdk-lib.aws_s3.IBucket | S3 bucket where data is stored. | +| locationPrefix | string | Top level location where table data is stored. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- -##### `name`Required +##### `name`Required ```typescript public readonly name: string; @@ -11968,420 +14014,426 @@ public readonly name: string; - *Type:* string -Name of the Workgroup. +Database name. + +Construct would add a randomize suffix as part of the name to prevent name collisions. --- -##### `resultLocationPrefix`Required +##### `autoCrawl`Optional ```typescript -public readonly resultLocationPrefix: string; +public readonly autoCrawl: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* True -Specifies the location in Amazon S3 where query results are stored. +When enabled, this automatically creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. --- -##### `bytesScannedCutoffPerQuery`Optional +##### `autoCrawlSchedule`Optional ```typescript -public readonly bytesScannedCutoffPerQuery: number; +public readonly autoCrawlSchedule: ScheduleProperty; ``` -- *Type:* number +- *Type:* aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty +- *Default:* `cron(1 0 * * ? *)` -Indicates the number of days after creation when objects are deleted from the Result bucket. +The schedule to run the Glue Crawler. + +Default is once a day at 00:01h. --- -##### `enforceWorkGroupConfiguration`Optional +##### `crawlerLogEncryptionKey`Optional ```typescript -public readonly enforceWorkGroupConfiguration: boolean; +public readonly crawlerLogEncryptionKey: IKey; ``` -- *Type:* boolean -- *Default:* True. +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* Create a new key if none is provided -If set to "true", the settings for the workgroup override client-side settings. +KMS encryption Key used for the Glue Crawler logs. --- -##### `engineVersion`Optional +##### `crawlerRole`Optional ```typescript -public readonly engineVersion: EngineVersion; +public readonly crawlerRole: IRole; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.EngineVersion -- *Default:* AUTO. +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* When `autoCrawl` is enabled, a new role is created with least privilege permissions to run the crawler + +The IAM Role used by the Glue Crawler when `autoCrawl` is set to `True`. -The engine version on which the query runs. +Additional permissions are granted to this role such as S3 Bucket read only permissions and KMS encrypt/decrypt on the key used by the Glue Crawler logging to CloudWatch Logs. --- -##### `executionRole`Optional +##### `crawlerTableLevelDepth`Optional ```typescript -public readonly executionRole: IRole; +public readonly crawlerTableLevelDepth: number; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole -- *Default:* The role is created if PySpark engine version is selected and no role is provided. +- *Type:* number +- *Default:* calculated based on `locationPrefix` -Role used to access user resources in an Athena for Apache Spark session. +Directory depth where the table folders are located. + +This helps the Glue Crawler understand the layout of the folders in S3. --- -##### `publishCloudWatchMetricsEnabled`Optional +##### `glueConnectionName`Optional ```typescript -public readonly publishCloudWatchMetricsEnabled: boolean; +public readonly glueConnectionName: string; ``` -- *Type:* boolean -- *Default:* True. +- *Type:* string -Indicates that the Amazon CloudWatch metrics are enabled for the workgroup. +The connection that would be used by the crawler. --- -##### `recursiveDeleteOption`Optional +##### `jdbcPath`Optional ```typescript -public readonly recursiveDeleteOption: boolean; +public readonly jdbcPath: string; ``` -- *Type:* boolean -- *Default:* Workgroup is retained. +- *Type:* string -The option to delete a workgroup and its contents even if the workgroup contains any named queries. +The JDBC path that would be included by the crawler. --- -##### `removalPolicy`Optional +##### `jdbcSecret`Optional ```typescript -public readonly removalPolicy: RemovalPolicy; +public readonly jdbcSecret: ISecret; ``` -- *Type:* aws-cdk-lib.RemovalPolicy -- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). - -The removal policy when deleting the CDK resource. +- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret -If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise the removalPolicy is reverted to RETAIN. +The secret associated with the JDBC connection. --- -##### `requesterPaysEnabled`Optional +##### `jdbcSecretKMSKey`Optional ```typescript -public readonly requesterPaysEnabled: boolean; +public readonly jdbcSecretKMSKey: IKey; ``` -- *Type:* boolean -- *Default:* False. +- *Type:* aws-cdk-lib.aws_kms.IKey -Allows members assigned to a workgroup to reference Amazon S3 Requester Pays buckets in queries. +The KMS key used by the JDBC secret. --- -##### `resultBucket`Optional +##### `locationBucket`Optional ```typescript -public readonly resultBucket: IBucket; +public readonly locationBucket: IBucket; ``` - *Type:* aws-cdk-lib.aws_s3.IBucket -- *Default:* Create a new bucket with SSE encryption using AnalyticsBucket if not provided. -Amazon S3 Bucket where query results are stored. +S3 bucket where data is stored. --- -##### `resultBucketName`Optional +##### `locationPrefix`Optional ```typescript -public readonly resultBucketName: string; +public readonly locationPrefix: string; ``` - *Type:* string -- *Default:* Name will be provided. -Name for the S3 Bucket in case it should be created. +Top level location where table data is stored. + +The location prefix cannot be empty if the `locationBucket` is set. +The minimal configuration is `/` for the root level in the Bucket. --- -##### `resultsEncryptionKey`Optional +##### `removalPolicy`Optional ```typescript -public readonly resultsEncryptionKey: IKey; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* The key is created if Result Bucket is not provided. +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). -Encryption key used to encrypt query results. +The removal policy when deleting the CDK resource. -Has to be provided if Result bucket is provided. -User needs to grant access to it for AthenaWorkGroup's executionRole (if Spark engine) or for -principals that were granted to run queries using AthenaWorkGroup's grantRunQueries. +If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. +Otherwise the removalPolicy is reverted to RETAIN. --- -##### `resultsRetentionPeriod`Optional +### DataLakeCatalogProps + +Properties for the `DataLakeCatalog` Construct. + +#### Initializer ```typescript -public readonly resultsRetentionPeriod: Duration; +import { governance } from '@cdklabs/aws-data-solutions-framework' + +const dataLakeCatalogProps: governance.DataLakeCatalogProps = { ... } ``` -- *Type:* aws-cdk-lib.Duration +#### Properties -Indicates the number of days after creation when objects are deleted from the Result bucket. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| dataLakeStorage | @cdklabs/aws-data-solutions-framework.storage.DataLakeStorage | The DataLakeStorage object to create the data catalog on. | +| autoCrawl | boolean | When enabled, creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. | +| autoCrawlSchedule | aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty | The schedule when the Glue Crawler runs, if enabled. | +| crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | The KMS encryption Key used for the Glue Crawler logs. | +| crawlerTableLevelDepth | number | Directory depth where the table folders are located. | +| databaseName | string | The suffix of the Glue Data Catalog Database. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- -##### `state`Optional +##### `dataLakeStorage`Required ```typescript -public readonly state: State; +public readonly dataLakeStorage: DataLakeStorage; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.consumption.State -- *Default:* ENABLED. +- *Type:* @cdklabs/aws-data-solutions-framework.storage.DataLakeStorage -The state of the Workgroup. +The DataLakeStorage object to create the data catalog on. --- -### BaseRedshiftDataSharingAccessProps - -The base interface for the different data sharing lifecycle properties. - -#### Initializer +##### `autoCrawl`Optional ```typescript -import { consumption } from '@cdklabs/aws-data-solutions-framework' - -const baseRedshiftDataSharingAccessProps: consumption.BaseRedshiftDataSharingAccessProps = { ... } +public readonly autoCrawl: boolean; ``` -#### Properties +- *Type:* boolean +- *Default:* True -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| databaseName | string | The name of the Redshift database used in the data sharing. | -| dataShareName | string | The name of the data share. | -| accountId | string | For cross-account grants, this is the consumer account ID. | -| dataShareArn | string | The ARN of the datashare. | -| namespaceId | string | For single account grants, this is the consumer namespace ID. For cross-account grants, `namespaceId` is ignored. | +When enabled, creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. --- -##### `databaseName`Required +##### `autoCrawlSchedule`Optional ```typescript -public readonly databaseName: string; +public readonly autoCrawlSchedule: ScheduleProperty; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty +- *Default:* `cron(1 0 * * ? *)` -The name of the Redshift database used in the data sharing. +The schedule when the Glue Crawler runs, if enabled. + +Default is once a day at 00:01h. --- -##### `dataShareName`Required +##### `crawlerLogEncryptionKey`Optional ```typescript -public readonly dataShareName: string; +public readonly crawlerLogEncryptionKey: IKey; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* Create a new KMS Key if none is provided -The name of the data share. +The KMS encryption Key used for the Glue Crawler logs. --- -##### `accountId`Optional +##### `crawlerTableLevelDepth`Optional ```typescript -public readonly accountId: string; +public readonly crawlerTableLevelDepth: number; ``` -- *Type:* string -- *Default:* No account ID is used. +- *Type:* number +- *Default:* calculated based on `locationPrefix` -For cross-account grants, this is the consumer account ID. +Directory depth where the table folders are located. -For cross-account consumers, this is the producer account ID. +This helps the Glue Crawler understand the layout of the folders in S3. --- -##### `dataShareArn`Optional +##### `databaseName`Optional ```typescript -public readonly dataShareArn: string; +public readonly databaseName: string; ``` - *Type:* string -- *Default:* No data share ARN is used. +- *Default:* Use the bucket name as the database name and as the S3 location -The ARN of the datashare. +The suffix of the Glue Data Catalog Database. -This is required for any action that is cross account. +The name of the Glue Database is composed of the S3 Bucket name and this suffix. +The suffix is also added to the S3 location inside the data lake S3 Buckets. --- -##### `namespaceId`Optional +##### `removalPolicy`Optional ```typescript -public readonly namespaceId: string; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* string -- *Default:* No namespace ID is used. +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). -For single account grants, this is the consumer namespace ID. For cross-account grants, `namespaceId` is ignored. +The removal policy when deleting the CDK resource. -For consumers, this is the producer namespace ID. It is required for both single and cross account data sharing. +If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. +Otherwise the removalPolicy is reverted to RETAIN. --- -### BrokerLogging +### DataLakeStorageProps -Configuration details related to broker logs. +Properties for the DataLakeStorage Construct. -#### Initializer +#### Initializer ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' +import { storage } from '@cdklabs/aws-data-solutions-framework' -const brokerLogging: streaming.BrokerLogging = { ... } +const dataLakeStorageProps: storage.DataLakeStorageProps = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| cloudwatchLogGroup | aws-cdk-lib.aws_logs.ILogGroup | The CloudWatch Logs group that is the destination for broker logs. | -| firehoseDeliveryStreamName | string | The Kinesis Data Firehose delivery stream that is the destination for broker logs. | -| s3 | @cdklabs/aws-data-solutions-framework.streaming.S3LoggingConfiguration | Details of the Amazon S3 destination for broker logs. | +| bronzeBucketArchiveDelay | number | Delay (in days) before archiving BRONZE data to frozen storage (Glacier storage class). | +| bronzeBucketInfrequentAccessDelay | number | Delay (in days) before moving BRONZE data to cold storage (Infrequent Access storage class). | +| bronzeBucketName | string | Name of the Bronze bucket. | +| dataLakeKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt all DataLakeStorage S3 buckets. | +| goldBucketArchiveDelay | number | Delay (in days) before archiving GOLD data to frozen storage (Glacier storage class). | +| goldBucketInfrequentAccessDelay | number | Delay (in days) before moving GOLD data to cold storage (Infrequent Access storage class). | +| goldBucketName | string | Name of the Gold bucket. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| silverBucketArchiveDelay | number | Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). | +| silverBucketInfrequentAccessDelay | number | Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). | +| silverBucketName | string | Name of the Silver bucket. | --- -##### `cloudwatchLogGroup`Optional +##### `bronzeBucketArchiveDelay`Optional ```typescript -public readonly cloudwatchLogGroup: ILogGroup; +public readonly bronzeBucketArchiveDelay: number; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup -- *Default:* disabled +- *Type:* number +- *Default:* Move objects to Glacier after 90 days. -The CloudWatch Logs group that is the destination for broker logs. +Delay (in days) before archiving BRONZE data to frozen storage (Glacier storage class). --- -##### `firehoseDeliveryStreamName`Optional +##### `bronzeBucketInfrequentAccessDelay`Optional ```typescript -public readonly firehoseDeliveryStreamName: string; +public readonly bronzeBucketInfrequentAccessDelay: number; ``` -- *Type:* string -- *Default:* disabled +- *Type:* number +- *Default:* Move objects to Infrequent Access after 30 days. -The Kinesis Data Firehose delivery stream that is the destination for broker logs. +Delay (in days) before moving BRONZE data to cold storage (Infrequent Access storage class). --- -##### `s3`Optional +##### `bronzeBucketName`Optional ```typescript -public readonly s3: S3LoggingConfiguration; +public readonly bronzeBucketName: string; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.streaming.S3LoggingConfiguration -- *Default:* disabled - -Details of the Amazon S3 destination for broker logs. - ---- +- *Type:* string +- *Default:* `bronze---` will be used. -### ClusterConfigurationInfo +Name of the Bronze bucket. -The Amazon MSK configuration to use for the cluster. +Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). -Note: There is currently no Cloudformation Resource to create a Configuration +--- -#### Initializer +##### `dataLakeKey`Optional ```typescript -import { streaming } from '@cdklabs/aws-data-solutions-framework' - -const clusterConfigurationInfo: streaming.ClusterConfigurationInfo = { ... } +public readonly dataLakeKey: IKey; ``` -#### Properties +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* A single KMS customer key is created. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| arn | string | The Amazon Resource Name (ARN) of the MSK configuration to use. | -| revision | number | The revision of the Amazon MSK configuration to use. | +The KMS Key used to encrypt all DataLakeStorage S3 buckets. --- -##### `arn`Required +##### `goldBucketArchiveDelay`Optional ```typescript -public readonly arn: string; +public readonly goldBucketArchiveDelay: number; ``` -- *Type:* string - -The Amazon Resource Name (ARN) of the MSK configuration to use. +- *Type:* number +- *Default:* Objects are not archived to Glacier. -For example, arn:aws:kafka:us-east-1:123456789012:configuration/example-configuration-name/abcdabcd-1234-abcd-1234-abcd123e8e8e-1. +Delay (in days) before archiving GOLD data to frozen storage (Glacier storage class). --- -##### `revision`Required +##### `goldBucketInfrequentAccessDelay`Optional ```typescript -public readonly revision: number; +public readonly goldBucketInfrequentAccessDelay: number; ``` - *Type:* number +- *Default:* Move objects to Infrequent Access after 90 days. -The revision of the Amazon MSK configuration to use. +Delay (in days) before moving GOLD data to cold storage (Infrequent Access storage class). --- -### CreateServiceLinkedRoleProps - -The properties of the `CreateServiceLinkedRole` construct. - -#### Initializer +##### `goldBucketName`Optional ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' - -const createServiceLinkedRoleProps: utils.CreateServiceLinkedRoleProps = { ... } +public readonly goldBucketName: string; ``` -#### Properties +- *Type:* string +- *Default:* `gold---` will be used. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +Name of the Gold bucket. + +Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). --- -##### `removalPolicy`Optional +##### `removalPolicy`Optional ```typescript public readonly removalPolicy: RemovalPolicy; @@ -12397,325 +14449,346 @@ Otherwise, the removalPolicy is reverted to RETAIN. --- -### DataCatalogDatabaseProps +##### `silverBucketArchiveDelay`Optional -Properties for the `DataCatalogDatabase` construct. +```typescript +public readonly silverBucketArchiveDelay: number; +``` -#### Initializer +- *Type:* number +- *Default:* Objects are not archived to Glacier. -```typescript -import { governance } from '@cdklabs/aws-data-solutions-framework' +Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). -const dataCatalogDatabaseProps: governance.DataCatalogDatabaseProps = { ... } +--- + +##### `silverBucketInfrequentAccessDelay`Optional + +```typescript +public readonly silverBucketInfrequentAccessDelay: number; ``` -#### Properties +- *Type:* number +- *Default:* Move objects to Infrequent Access after 90 days. -| **Name** | **Type** | **Description** | -| --- | --- | --- | -| name | string | Database name. | -| autoCrawl | boolean | When enabled, this automatically creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. | -| autoCrawlSchedule | aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty | The schedule to run the Glue Crawler. | -| crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | KMS encryption Key used for the Glue Crawler logs. | -| crawlerRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used by the Glue Crawler when `autoCrawl` is set to `True`. | -| crawlerTableLevelDepth | number | Directory depth where the table folders are located. | -| glueConnectionName | string | The connection that would be used by the crawler. | -| jdbcPath | string | The JDBC path that would be included by the crawler. | -| jdbcSecret | aws-cdk-lib.aws_secretsmanager.ISecret | The secret associated with the JDBC connection. | -| jdbcSecretKMSKey | aws-cdk-lib.aws_kms.IKey | The KMS key used by the JDBC secret. | -| locationBucket | aws-cdk-lib.aws_s3.IBucket | S3 bucket where data is stored. | -| locationPrefix | string | Top level location where table data is stored. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). --- -##### `name`Required +##### `silverBucketName`Optional ```typescript -public readonly name: string; +public readonly silverBucketName: string; ``` - *Type:* string +- *Default:* `silver---` will be used. -Database name. +Name of the Silver bucket. -Construct would add a randomize suffix as part of the name to prevent name collisions. +Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). --- -##### `autoCrawl`Optional +### DataVpcClientVpnEndpointProps + +The properties for the ClientVPnEndpoint in DataVpc construct. + +#### Initializer ```typescript -public readonly autoCrawl: boolean; +import { utils } from '@cdklabs/aws-data-solutions-framework' + +const dataVpcClientVpnEndpointProps: utils.DataVpcClientVpnEndpointProps = { ... } ``` -- *Type:* boolean -- *Default:* True +#### Properties -When enabled, this automatically creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| samlMetadataDocument | string | An XML document generated by an identity provider (IdP) that supports SAML 2.0. The document includes the issuer's name, expiration information, and keys that can be used to validate the SAML authentication response (assertions) that are received from the IdP. You must generate the metadata document using the identity management software that is used as your organization's IdP. | +| serverCertificateArn | string | The ARN of the server certificate. | +| authorizeAllUsersToVpcCidr | boolean | Whether to authorize all users to the VPC CIDR. | +| dnsServers | string[] | Information about the DNS servers to be used for DNS resolution. | +| logging | boolean | A CloudWatch Logs log group for connection logging. | +| logGroup | aws-cdk-lib.aws_logs.ILogGroup | A CloudWatch Logs log group for connection logging. | +| port | aws-cdk-lib.aws_ec2.VpnPort | The port number to assign to the Client VPN endpoint for TCP and UDP traffic. | +| securityGroups | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The security groups to apply to the target network. | +| selfServicePortal | boolean | Specify whether to enable the self-service portal for the Client VPN endpoint. | +| sessionTimeout | aws-cdk-lib.aws_ec2.ClientVpnSessionTimeout | The maximum VPN session duration time. | +| splitTunnel | boolean | Indicates whether split-tunnel is enabled on the AWS Client VPN endpoint. | +| transportProtocol | aws-cdk-lib.aws_ec2.TransportProtocol | The transport protocol to be used by the VPN session. | --- -##### `autoCrawlSchedule`Optional +##### `samlMetadataDocument`Required ```typescript -public readonly autoCrawlSchedule: ScheduleProperty; +public readonly samlMetadataDocument: string; ``` -- *Type:* aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty -- *Default:* `cron(1 0 * * ? *)` - -The schedule to run the Glue Crawler. +- *Type:* string -Default is once a day at 00:01h. +An XML document generated by an identity provider (IdP) that supports SAML 2.0. The document includes the issuer's name, expiration information, and keys that can be used to validate the SAML authentication response (assertions) that are received from the IdP. You must generate the metadata document using the identity management software that is used as your organization's IdP. --- -##### `crawlerLogEncryptionKey`Optional +##### `serverCertificateArn`Required ```typescript -public readonly crawlerLogEncryptionKey: IKey; +public readonly serverCertificateArn: string; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* Create a new key if none is provided +- *Type:* string -KMS encryption Key used for the Glue Crawler logs. +The ARN of the server certificate. --- -##### `crawlerRole`Optional +##### `authorizeAllUsersToVpcCidr`Optional ```typescript -public readonly crawlerRole: IRole; +public readonly authorizeAllUsersToVpcCidr: boolean; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole -- *Default:* When `autoCrawl` is enabled, a new role is created with least privilege permissions to run the crawler - -The IAM Role used by the Glue Crawler when `autoCrawl` is set to `True`. +- *Type:* boolean +- *Default:* true -Additional permissions are granted to this role such as S3 Bucket read only permissions and KMS encrypt/decrypt on the key used by the Glue Crawler logging to CloudWatch Logs. +Whether to authorize all users to the VPC CIDR. --- -##### `crawlerTableLevelDepth`Optional +##### `dnsServers`Optional ```typescript -public readonly crawlerTableLevelDepth: number; +public readonly dnsServers: string[]; ``` -- *Type:* number -- *Default:* calculated based on `locationPrefix` - -Directory depth where the table folders are located. +- *Type:* string[] +- *Default:* DNS server in VPC, e.g. 10.0.0.2 -This helps the Glue Crawler understand the layout of the folders in S3. +Information about the DNS servers to be used for DNS resolution. --- -##### `glueConnectionName`Optional +##### `logging`Optional ```typescript -public readonly glueConnectionName: string; +public readonly logging: boolean; ``` -- *Type:* string +- *Type:* boolean +- *Default:* true -The connection that would be used by the crawler. +A CloudWatch Logs log group for connection logging. --- -##### `jdbcPath`Optional +##### `logGroup`Optional ```typescript -public readonly jdbcPath: string; +public readonly logGroup: ILogGroup; ``` -- *Type:* string +- *Type:* aws-cdk-lib.aws_logs.ILogGroup +- *Default:* new LogGroup is created -The JDBC path that would be included by the crawler. +A CloudWatch Logs log group for connection logging. --- -##### `jdbcSecret`Optional +##### `port`Optional ```typescript -public readonly jdbcSecret: ISecret; +public readonly port: VpnPort; ``` -- *Type:* aws-cdk-lib.aws_secretsmanager.ISecret +- *Type:* aws-cdk-lib.aws_ec2.VpnPort +- *Default:* true -The secret associated with the JDBC connection. +The port number to assign to the Client VPN endpoint for TCP and UDP traffic. --- -##### `jdbcSecretKMSKey`Optional +##### `securityGroups`Optional ```typescript -public readonly jdbcSecretKMSKey: IKey; +public readonly securityGroups: ISecurityGroup[]; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey +- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] +- *Default:* new Securoty Group is created, allowing the incoming connections on port 443 -The KMS key used by the JDBC secret. +The security groups to apply to the target network. --- -##### `locationBucket`Optional +##### `selfServicePortal`Optional ```typescript -public readonly locationBucket: IBucket; +public readonly selfServicePortal: boolean; ``` -- *Type:* aws-cdk-lib.aws_s3.IBucket +- *Type:* boolean +- *Default:* true -S3 bucket where data is stored. +Specify whether to enable the self-service portal for the Client VPN endpoint. --- -##### `locationPrefix`Optional +##### `sessionTimeout`Optional ```typescript -public readonly locationPrefix: string; +public readonly sessionTimeout: ClientVpnSessionTimeout; ``` -- *Type:* string - -Top level location where table data is stored. +- *Type:* aws-cdk-lib.aws_ec2.ClientVpnSessionTimeout +- *Default:* 480 minutes -The location prefix cannot be empty if the `locationBucket` is set. -The minimal configuration is `/` for the root level in the Bucket. +The maximum VPN session duration time. --- -##### `removalPolicy`Optional +##### `splitTunnel`Optional ```typescript -public readonly removalPolicy: RemovalPolicy; +public readonly splitTunnel: boolean; ``` -- *Type:* aws-cdk-lib.RemovalPolicy -- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). +- *Type:* boolean +- *Default:* true -The removal policy when deleting the CDK resource. +Indicates whether split-tunnel is enabled on the AWS Client VPN endpoint. -If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise the removalPolicy is reverted to RETAIN. +--- + +##### `transportProtocol`Optional + +```typescript +public readonly transportProtocol: TransportProtocol; +``` + +- *Type:* aws-cdk-lib.aws_ec2.TransportProtocol +- *Default:* TCP + +The transport protocol to be used by the VPN session. --- -### DataLakeCatalogProps +### DataVpcProps -Properties for the `DataLakeCatalog` Construct. +The properties for the `DataVpc` construct. -#### Initializer +#### Initializer ```typescript -import { governance } from '@cdklabs/aws-data-solutions-framework' +import { utils } from '@cdklabs/aws-data-solutions-framework' -const dataLakeCatalogProps: governance.DataLakeCatalogProps = { ... } +const dataVpcProps: utils.DataVpcProps = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| dataLakeStorage | @cdklabs/aws-data-solutions-framework.storage.DataLakeStorage | The DataLakeStorage object to create the data catalog on. | -| autoCrawl | boolean | When enabled, creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. | -| autoCrawlSchedule | aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty | The schedule when the Glue Crawler runs, if enabled. | -| crawlerLogEncryptionKey | aws-cdk-lib.aws_kms.IKey | The KMS encryption Key used for the Glue Crawler logs. | -| crawlerTableLevelDepth | number | Directory depth where the table folders are located. | -| databaseName | string | The suffix of the Glue Data Catalog Database. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| vpcCidr | string | The CIDR to use to create the subnets in the VPC. | +| clientVpnEndpointProps | @cdklabs/aws-data-solutions-framework.utils.DataVpcClientVpnEndpointProps | ClientVpnEndpoint propertioes. | +| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS key used to encrypt the VPC Flow Logs in the CloudWatch Log Group. | +| flowLogRetention | aws-cdk-lib.aws_logs.RetentionDays | The retention period to apply to VPC Flow Logs. | +| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used to send the VPC Flow Logs in CloudWatch. | +| natGateways | number | Number of NAT Gateways. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | --- -##### `dataLakeStorage`Required +##### `vpcCidr`Required ```typescript -public readonly dataLakeStorage: DataLakeStorage; +public readonly vpcCidr: string; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.storage.DataLakeStorage +- *Type:* string -The DataLakeStorage object to create the data catalog on. +The CIDR to use to create the subnets in the VPC. --- -##### `autoCrawl`Optional +##### `clientVpnEndpointProps`Optional ```typescript -public readonly autoCrawl: boolean; +public readonly clientVpnEndpointProps: DataVpcClientVpnEndpointProps; ``` -- *Type:* boolean -- *Default:* True +- *Type:* @cdklabs/aws-data-solutions-framework.utils.DataVpcClientVpnEndpointProps +- *Default:* None -When enabled, creates a top level Glue Crawler that would run based on the defined schedule in the `autoCrawlSchedule` parameter. +ClientVpnEndpoint propertioes. + +Required if client vpn endpoint is needed --- -##### `autoCrawlSchedule`Optional +##### `flowLogKey`Optional ```typescript -public readonly autoCrawlSchedule: ScheduleProperty; +public readonly flowLogKey: IKey; ``` -- *Type:* aws-cdk-lib.aws_glue.CfnCrawler.ScheduleProperty -- *Default:* `cron(1 0 * * ? *)` +- *Type:* aws-cdk-lib.aws_kms.IKey +- *Default:* A new KMS key is created -The schedule when the Glue Crawler runs, if enabled. +The KMS key used to encrypt the VPC Flow Logs in the CloudWatch Log Group. -Default is once a day at 00:01h. +The resource policy of the key must be configured according to the AWS documentation. + +> [https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html) --- -##### `crawlerLogEncryptionKey`Optional +##### `flowLogRetention`Optional ```typescript -public readonly crawlerLogEncryptionKey: IKey; +public readonly flowLogRetention: RetentionDays; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* Create a new KMS Key if none is provided +- *Type:* aws-cdk-lib.aws_logs.RetentionDays +- *Default:* One week retention -The KMS encryption Key used for the Glue Crawler logs. +The retention period to apply to VPC Flow Logs. --- -##### `crawlerTableLevelDepth`Optional +##### `flowLogRole`Optional ```typescript -public readonly crawlerTableLevelDepth: number; +public readonly flowLogRole: IRole; ``` -- *Type:* number -- *Default:* calculated based on `locationPrefix` +- *Type:* aws-cdk-lib.aws_iam.IRole +- *Default:* A new IAM role is created -Directory depth where the table folders are located. +The IAM Role used to send the VPC Flow Logs in CloudWatch. -This helps the Glue Crawler understand the layout of the folders in S3. +The role must be configured as described in the AWS VPC Flow Log documentation. + +> [https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-cwl.html#flow-logs-iam-role](https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-cwl.html#flow-logs-iam-role) --- -##### `databaseName`Optional +##### `natGateways`Optional ```typescript -public readonly databaseName: string; +public readonly natGateways: number; ``` -- *Type:* string -- *Default:* Use the bucket name as the database name and as the S3 location - -The suffix of the Glue Data Catalog Database. +- *Type:* number +- *Default:* 3 or the AZs defined in the context -The name of the Glue Database is composed of the S3 Bucket name and this suffix. -The suffix is also added to the S3 location inside the data lake S3 Buckets. +Number of NAT Gateways. --- -##### `removalPolicy`Optional +##### `removalPolicy`Optional ```typescript public readonly removalPolicy: RemovalPolicy; @@ -12727,503 +14800,552 @@ public readonly removalPolicy: RemovalPolicy; The removal policy when deleting the CDK resource. If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise the removalPolicy is reverted to RETAIN. +Otherwise, the removalPolicy is reverted to RETAIN. --- -### DataLakeStorageProps +### DataZoneCustomAssetTypeFactoryProps -Properties for the DataLakeStorage Construct. +Properties for the DataZoneCustomAssetTypeFactory construct. -#### Initializer +#### Initializer ```typescript -import { storage } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -const dataLakeStorageProps: storage.DataLakeStorageProps = { ... } +const dataZoneCustomAssetTypeFactoryProps: governance.DataZoneCustomAssetTypeFactoryProps = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| bronzeBucketArchiveDelay | number | Delay (in days) before archiving BRONZE data to frozen storage (Glacier storage class). | -| bronzeBucketInfrequentAccessDelay | number | Delay (in days) before moving BRONZE data to cold storage (Infrequent Access storage class). | -| bronzeBucketName | string | Name of the Bronze bucket. | -| dataLakeKey | aws-cdk-lib.aws_kms.IKey | The KMS Key used to encrypt all DataLakeStorage S3 buckets. | -| goldBucketArchiveDelay | number | Delay (in days) before archiving GOLD data to frozen storage (Glacier storage class). | -| goldBucketInfrequentAccessDelay | number | Delay (in days) before moving GOLD data to cold storage (Infrequent Access storage class). | -| goldBucketName | string | Name of the Gold bucket. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | -| silverBucketArchiveDelay | number | Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). | -| silverBucketInfrequentAccessDelay | number | Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). | -| silverBucketName | string | Name of the Silver bucket. | +| domainId | string | The DataZone domain identifier. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy for the custom resource. | --- -##### `bronzeBucketArchiveDelay`Optional +##### `domainId`Required ```typescript -public readonly bronzeBucketArchiveDelay: number; +public readonly domainId: string; ``` -- *Type:* number -- *Default:* Move objects to Glacier after 90 days. +- *Type:* string -Delay (in days) before archiving BRONZE data to frozen storage (Glacier storage class). +The DataZone domain identifier. --- -##### `bronzeBucketInfrequentAccessDelay`Optional +##### `removalPolicy`Optional ```typescript -public readonly bronzeBucketInfrequentAccessDelay: number; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* number -- *Default:* Move objects to Infrequent Access after 30 days. +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* RemovalPolicy.RETAIN -Delay (in days) before moving BRONZE data to cold storage (Infrequent Access storage class). +The removal policy for the custom resource. --- -##### `bronzeBucketName`Optional +### DataZoneCustomAssetTypeProps + +Properties for the DataZoneCustomAssetType construct. + +#### Initializer ```typescript -public readonly bronzeBucketName: string; -``` +import { governance } from '@cdklabs/aws-data-solutions-framework' -- *Type:* string -- *Default:* `bronze---` will be used. +const dataZoneCustomAssetTypeProps: governance.DataZoneCustomAssetTypeProps = { ... } +``` -Name of the Bronze bucket. +#### Properties -Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| assetTypeName | string | The name of the custom asset type. | +| formTypes | @cdklabs/aws-data-solutions-framework.governance.DataZoneFormType[] | The form types of the custom asset type. | +| projectId | string | The project identifier owner of the custom asset type. | +| assetTypeDescription | string | The description of the custom asset type. | --- -##### `dataLakeKey`Optional +##### `assetTypeName`Required ```typescript -public readonly dataLakeKey: IKey; +public readonly assetTypeName: string; ``` -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* A single KMS customer key is created. +- *Type:* string -The KMS Key used to encrypt all DataLakeStorage S3 buckets. +The name of the custom asset type. --- -##### `goldBucketArchiveDelay`Optional +##### `formTypes`Required ```typescript -public readonly goldBucketArchiveDelay: number; +public readonly formTypes: DataZoneFormType[]; ``` -- *Type:* number -- *Default:* Objects are not archived to Glacier. +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneFormType[] -Delay (in days) before archiving GOLD data to frozen storage (Glacier storage class). +The form types of the custom asset type. --- -##### `goldBucketInfrequentAccessDelay`Optional +*Example* ```typescript -public readonly goldBucketInfrequentAccessDelay: number; +[{ name: 'userForm', model: [{ name: 'firstName', type: 'String', required: true }] }] ``` -- *Type:* number -- *Default:* Move objects to Infrequent Access after 90 days. -Delay (in days) before moving GOLD data to cold storage (Infrequent Access storage class). +##### `projectId`Required + +```typescript +public readonly projectId: string; +``` + +- *Type:* string + +The project identifier owner of the custom asset type. --- -##### `goldBucketName`Optional +##### `assetTypeDescription`Optional ```typescript -public readonly goldBucketName: string; +public readonly assetTypeDescription: string; ``` - *Type:* string -- *Default:* `gold---` will be used. - -Name of the Gold bucket. +- *Default:* No description provided -Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). +The description of the custom asset type. --- -##### `removalPolicy`Optional +### DataZoneFormType + +Interface representing a DataZoneFormType. + +#### Initializer ```typescript -public readonly removalPolicy: RemovalPolicy; -``` +import { governance } from '@cdklabs/aws-data-solutions-framework' -- *Type:* aws-cdk-lib.RemovalPolicy -- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). +const dataZoneFormType: governance.DataZoneFormType = { ... } +``` -The removal policy when deleting the CDK resource. +#### Properties -If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise, the removalPolicy is reverted to RETAIN. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| name | string | The name of the form. | +| model | @cdklabs/aws-data-solutions-framework.governance.DataZoneFormTypeField[] | The fields of the form. | +| required | boolean | Whether the form is required. | --- -##### `silverBucketArchiveDelay`Optional +##### `name`Required ```typescript -public readonly silverBucketArchiveDelay: number; +public readonly name: string; ``` -- *Type:* number -- *Default:* Objects are not archived to Glacier. +- *Type:* string -Delay (in days) before archiving SILVER data to frozen storage (Glacier storage class). +The name of the form. --- -##### `silverBucketInfrequentAccessDelay`Optional +##### `model`Optional ```typescript -public readonly silverBucketInfrequentAccessDelay: number; +public readonly model: DataZoneFormTypeField[]; ``` -- *Type:* number -- *Default:* Move objects to Infrequent Access after 90 days. +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneFormTypeField[] +- *Default:* No model is required. The form is already configured in DataZone. -Delay (in days) before moving SILVER data to cold storage (Infrequent Access storage class). +The fields of the form. --- -##### `silverBucketName`Optional +*Example* ```typescript -public readonly silverBucketName: string; +[{ name: 'firstName', type: 'String', required: true }] ``` -- *Type:* string -- *Default:* `silver---` will be used. -Name of the Silver bucket. +##### `required`Optional -Use `BucketUtils.generateUniqueBucketName()` to generate a unique name (recommended). +```typescript +public readonly required: boolean; +``` + +- *Type:* boolean +- *Default:* false + +Whether the form is required. --- -### DataVpcClientVpnEndpointProps +### DataZoneFormTypeField -The properties for the ClientVPnEndpoint in DataVpc construct. +Interface representing a DataZoneFormTypeField. -#### Initializer +#### Initializer ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -const dataVpcClientVpnEndpointProps: utils.DataVpcClientVpnEndpointProps = { ... } +const dataZoneFormTypeField: governance.DataZoneFormTypeField = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| samlMetadataDocument | string | An XML document generated by an identity provider (IdP) that supports SAML 2.0. The document includes the issuer's name, expiration information, and keys that can be used to validate the SAML authentication response (assertions) that are received from the IdP. You must generate the metadata document using the identity management software that is used as your organization's IdP. | -| serverCertificateArn | string | The ARN of the server certificate. | -| authorizeAllUsersToVpcCidr | boolean | Whether to authorize all users to the VPC CIDR. | -| dnsServers | string[] | Information about the DNS servers to be used for DNS resolution. | -| logging | boolean | A CloudWatch Logs log group for connection logging. | -| logGroup | aws-cdk-lib.aws_logs.ILogGroup | A CloudWatch Logs log group for connection logging. | -| port | aws-cdk-lib.aws_ec2.VpnPort | The port number to assign to the Client VPN endpoint for TCP and UDP traffic. | -| securityGroups | aws-cdk-lib.aws_ec2.ISecurityGroup[] | The security groups to apply to the target network. | -| selfServicePortal | boolean | Specify whether to enable the self-service portal for the Client VPN endpoint. | -| sessionTimeout | aws-cdk-lib.aws_ec2.ClientVpnSessionTimeout | The maximum VPN session duration time. | -| splitTunnel | boolean | Indicates whether split-tunnel is enabled on the AWS Client VPN endpoint. | -| transportProtocol | aws-cdk-lib.aws_ec2.TransportProtocol | The transport protocol to be used by the VPN session. | +| name | string | The name of the field. | +| type | string | The type of the field. | +| required | boolean | Whether the field is required. | + +--- + +##### `name`Required + +```typescript +public readonly name: string; +``` + +- *Type:* string + +The name of the field. + +--- + +##### `type`Required + +```typescript +public readonly type: string; +``` + +- *Type:* string + +The type of the field. + +--- + +##### `required`Optional + +```typescript +public readonly required: boolean; +``` + +- *Type:* boolean +- *Default:* false + +Whether the field is required. --- -##### `samlMetadataDocument`Required +### DataZoneGsrMskDataSourceProps + +Properties for configuring a DataZone GSR MSK datasource. + +#### Initializer ```typescript -public readonly samlMetadataDocument: string; +import { governance } from '@cdklabs/aws-data-solutions-framework' + +const dataZoneGsrMskDataSourceProps: governance.DataZoneGsrMskDataSourceProps = { ... } ``` -- *Type:* string +#### Properties -An XML document generated by an identity provider (IdP) that supports SAML 2.0. The document includes the issuer's name, expiration information, and keys that can be used to validate the SAML authentication response (assertions) that are received from the IdP. You must generate the metadata document using the identity management software that is used as your organization's IdP. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| clusterName | string | The name of the MSK (Managed Streaming for Apache Kafka) cluster to use. | +| domainId | string | The unique identifier for the DataZone domain where the datasource resides. | +| projectId | string | The unique identifier for the project associated with this datasource. | +| registryName | string | The name of the registry for schema management. | +| enableSchemaRegistryEvent | boolean | Optional. | +| runSchedule | aws-cdk-lib.aws_events.Schedule | Optional. | --- -##### `serverCertificateArn`Required +##### `clusterName`Required ```typescript -public readonly serverCertificateArn: string; +public readonly clusterName: string; ``` - *Type:* string -The ARN of the server certificate. +The name of the MSK (Managed Streaming for Apache Kafka) cluster to use. --- -##### `authorizeAllUsersToVpcCidr`Optional +##### `domainId`Required ```typescript -public readonly authorizeAllUsersToVpcCidr: boolean; +public readonly domainId: string; ``` -- *Type:* boolean -- *Default:* true +- *Type:* string -Whether to authorize all users to the VPC CIDR. +The unique identifier for the DataZone domain where the datasource resides. --- -##### `dnsServers`Optional +##### `projectId`Required ```typescript -public readonly dnsServers: string[]; +public readonly projectId: string; ``` -- *Type:* string[] -- *Default:* DNS server in VPC, e.g. 10.0.0.2 +- *Type:* string -Information about the DNS servers to be used for DNS resolution. +The unique identifier for the project associated with this datasource. --- -##### `logging`Optional +##### `registryName`Required ```typescript -public readonly logging: boolean; +public readonly registryName: string; ``` -- *Type:* boolean -- *Default:* true +- *Type:* string -A CloudWatch Logs log group for connection logging. +The name of the registry for schema management. --- -##### `logGroup`Optional +##### `enableSchemaRegistryEvent`Optional ```typescript -public readonly logGroup: ILogGroup; +public readonly enableSchemaRegistryEvent: boolean; ``` -- *Type:* aws-cdk-lib.aws_logs.ILogGroup -- *Default:* new LogGroup is created +- *Type:* boolean +- *Default:* false, meaning the EventBridge listener for schema changes is disabled. -A CloudWatch Logs log group for connection logging. +Optional. + +A flag to enable or disable EventBridge listener for schema registry changes. --- -##### `port`Optional +##### `runSchedule`Optional ```typescript -public readonly port: VpnPort; +public readonly runSchedule: Schedule; ``` -- *Type:* aws-cdk-lib.aws_ec2.VpnPort -- *Default:* true +- *Type:* aws-cdk-lib.aws_events.Schedule +- *Default:* `cron(1 0 * * ? *)` if `enableSchemaRegistryEvent` is false or undefined, otherwise no schedule. -The port number to assign to the Client VPN endpoint for TCP and UDP traffic. +Optional. + +Defines the schedule for EventBridge events, specified using cron expressions. --- -##### `securityGroups`Optional +### DataZoneMskAssetTypeProps + +The properties for the DataZoneMskAssetType construct. + +#### Initializer ```typescript -public readonly securityGroups: ISecurityGroup[]; +import { governance } from '@cdklabs/aws-data-solutions-framework' + +const dataZoneMskAssetTypeProps: governance.DataZoneMskAssetTypeProps = { ... } ``` -- *Type:* aws-cdk-lib.aws_ec2.ISecurityGroup[] -- *Default:* new Securoty Group is created, allowing the incoming connections on port 443 +#### Properties -The security groups to apply to the target network. +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| domainId | string | The DataZone domain identifier. | +| dzCustomAssetTypeFactory | @cdklabs/aws-data-solutions-framework.governance.DataZoneCustomAssetTypeFactory | The factory to create the custom asset type. | +| projectId | string | The project identifier owner of the custom asset type. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy to apply to the asset type. | --- -##### `selfServicePortal`Optional +##### `domainId`Required ```typescript -public readonly selfServicePortal: boolean; +public readonly domainId: string; ``` -- *Type:* boolean -- *Default:* true +- *Type:* string -Specify whether to enable the self-service portal for the Client VPN endpoint. +The DataZone domain identifier. --- -##### `sessionTimeout`Optional +##### `dzCustomAssetTypeFactory`Optional ```typescript -public readonly sessionTimeout: ClientVpnSessionTimeout; +public readonly dzCustomAssetTypeFactory: DataZoneCustomAssetTypeFactory; ``` -- *Type:* aws-cdk-lib.aws_ec2.ClientVpnSessionTimeout -- *Default:* 480 minutes +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneCustomAssetTypeFactory +- *Default:* A new factory is created -The maximum VPN session duration time. +The factory to create the custom asset type. --- -##### `splitTunnel`Optional +##### `projectId`Optional ```typescript -public readonly splitTunnel: boolean; +public readonly projectId: string; ``` -- *Type:* boolean -- *Default:* true +- *Type:* string +- *Default:* A new project called MskGovernance is created -Indicates whether split-tunnel is enabled on the AWS Client VPN endpoint. +The project identifier owner of the custom asset type. --- -##### `transportProtocol`Optional +##### `removalPolicy`Optional ```typescript -public readonly transportProtocol: TransportProtocol; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* aws-cdk-lib.aws_ec2.TransportProtocol -- *Default:* TCP +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* RemovalPolicy.RETAIN -The transport protocol to be used by the VPN session. +The removal policy to apply to the asset type. --- -### DataVpcProps +### DataZoneMskCentralAuthorizerProps -The properties for the `DataVpc` construct. +The properties for the DataZoneMskCentralAuthorizer construct. -#### Initializer +#### Initializer ```typescript -import { utils } from '@cdklabs/aws-data-solutions-framework' +import { governance } from '@cdklabs/aws-data-solutions-framework' -const dataVpcProps: utils.DataVpcProps = { ... } +const dataZoneMskCentralAuthorizerProps: governance.DataZoneMskCentralAuthorizerProps = { ... } ``` #### Properties | **Name** | **Type** | **Description** | | --- | --- | --- | -| vpcCidr | string | The CIDR to use to create the subnets in the VPC. | -| clientVpnEndpointProps | @cdklabs/aws-data-solutions-framework.utils.DataVpcClientVpnEndpointProps | ClientVpnEndpoint propertioes. | -| flowLogKey | aws-cdk-lib.aws_kms.IKey | The KMS key used to encrypt the VPC Flow Logs in the CloudWatch Log Group. | -| flowLogRetention | aws-cdk-lib.aws_logs.RetentionDays | The retention period to apply to VPC Flow Logs. | -| flowLogRole | aws-cdk-lib.aws_iam.IRole | The IAM Role used to send the VPC Flow Logs in CloudWatch. | -| natGateways | number | Number of NAT Gateways. | -| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy when deleting the CDK resource. | +| domainId | string | The DataZone Domain ID. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy to apply to the asset type. | --- -##### `vpcCidr`Required +##### `domainId`Required ```typescript -public readonly vpcCidr: string; +public readonly domainId: string; ``` - *Type:* string -The CIDR to use to create the subnets in the VPC. +The DataZone Domain ID. --- -##### `clientVpnEndpointProps`Optional +##### `removalPolicy`Optional ```typescript -public readonly clientVpnEndpointProps: DataVpcClientVpnEndpointProps; +public readonly removalPolicy: RemovalPolicy; ``` -- *Type:* @cdklabs/aws-data-solutions-framework.utils.DataVpcClientVpnEndpointProps -- *Default:* None - -ClientVpnEndpoint propertioes. +- *Type:* aws-cdk-lib.RemovalPolicy +- *Default:* RemovalPolicy.RETAIN -Required if client vpn endpoint is needed +The removal policy to apply to the asset type. --- -##### `flowLogKey`Optional +### DataZoneMskEnvironmentAuthorizerProps -```typescript -public readonly flowLogKey: IKey; -``` +#### Initializer -- *Type:* aws-cdk-lib.aws_kms.IKey -- *Default:* A new KMS key is created +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' -The KMS key used to encrypt the VPC Flow Logs in the CloudWatch Log Group. +const dataZoneMskEnvironmentAuthorizerProps: governance.DataZoneMskEnvironmentAuthorizerProps = { ... } +``` -The resource policy of the key must be configured according to the AWS documentation. +#### Properties -> [https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html](https://docs.aws.amazon.com/AmazonCloudWatch/latest/logs/encrypt-log-data-kms.html) +| **Name** | **Type** | **Description** | +| --- | --- | --- | +| domainId | string | The DataZone Domain ID. | +| centralAccountId | string | The central account Id. | +| grantMskManagedVpc | boolean | If the authorizer is granting MSK managed VPC permissions. | +| removalPolicy | aws-cdk-lib.RemovalPolicy | The removal policy to apply to the asset type. | --- -##### `flowLogRetention`Optional +##### `domainId`Required ```typescript -public readonly flowLogRetention: RetentionDays; +public readonly domainId: string; ``` -- *Type:* aws-cdk-lib.aws_logs.RetentionDays -- *Default:* One week retention +- *Type:* string -The retention period to apply to VPC Flow Logs. +The DataZone Domain ID. --- -##### `flowLogRole`Optional +##### `centralAccountId`Optional ```typescript -public readonly flowLogRole: IRole; +public readonly centralAccountId: string; ``` -- *Type:* aws-cdk-lib.aws_iam.IRole -- *Default:* A new IAM role is created - -The IAM Role used to send the VPC Flow Logs in CloudWatch. - -The role must be configured as described in the AWS VPC Flow Log documentation. +- *Type:* string -> [https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-cwl.html#flow-logs-iam-role](https://docs.aws.amazon.com/vpc/latest/userguide/flow-logs-cwl.html#flow-logs-iam-role) +The central account Id. --- -##### `natGateways`Optional +##### `grantMskManagedVpc`Optional ```typescript -public readonly natGateways: number; +public readonly grantMskManagedVpc: boolean; ``` -- *Type:* number -- *Default:* 3 or the AZs defined in the context +- *Type:* boolean +- *Default:* false -Number of NAT Gateways. +If the authorizer is granting MSK managed VPC permissions. --- -##### `removalPolicy`Optional +##### `removalPolicy`Optional ```typescript public readonly removalPolicy: RemovalPolicy; ``` - *Type:* aws-cdk-lib.RemovalPolicy -- *Default:* The resources are not deleted (`RemovalPolicy.RETAIN`). - -The removal policy when deleting the CDK resource. +- *Default:* RemovalPolicy.RETAIN -If DESTROY is selected, context value `@data-solutions-framework-on-aws/removeDataOnDestroy` needs to be set to true. -Otherwise, the removalPolicy is reverted to RETAIN. +The removal policy to apply to the asset type. --- @@ -17921,6 +20043,134 @@ properties for TLS authentication. --- +### DataZoneHelpers + +#### Initializers + +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' + +new governance.DataZoneHelpers() +``` + +| **Name** | **Type** | **Description** | +| --- | --- | --- | + +--- + + +#### Static Functions + +| **Name** | **Description** | +| --- | --- | +| buildModelString | Build a Smithy model string from model fields. | +| createSubscriptionTarget | Creates a DataZone subscription target for a custom asset type. | + +--- + +##### `buildModelString` + +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' + +governance.DataZoneHelpers.buildModelString(formType: DataZoneFormType) +``` + +Build a Smithy model string from model fields. + +###### `formType`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.governance.DataZoneFormType + +The form type containing the model fields. + +--- + +##### `createSubscriptionTarget` + +```typescript +import { governance } from '@cdklabs/aws-data-solutions-framework' + +governance.DataZoneHelpers.createSubscriptionTarget(scope: Construct, id: string, customAssetType: CustomAssetType, name: string, provider: string, environmentId: string, authorizedPrincipals: IRole[], manageAccessRole: IRole) +``` + +Creates a DataZone subscription target for a custom asset type. + +Subscription targets are used to automatically add asset to environments when a custom asset is subscribed by a project. + +*Example* + +```typescript +dsf. +``` + + +###### `scope`Required + +- *Type:* constructs.Construct + +The scope of the construct. + +--- + +###### `id`Required + +- *Type:* string + +The id of the construct. + +--- + +###### `customAssetType`Required + +- *Type:* @cdklabs/aws-data-solutions-framework.governance.CustomAssetType + +The custom asset type that can be added to the environment. + +--- + +###### `name`Required + +- *Type:* string + +The name of the subscription target. + +--- + +###### `provider`Required + +- *Type:* string + +The provider of the subscription target. + +--- + +###### `environmentId`Required + +- *Type:* string + +The DataZone environment identifier. + +--- + +###### `authorizedPrincipals`Required + +- *Type:* aws-cdk-lib.aws_iam.IRole[] + +The authorized principals to be granted when assets are subscribed. + +--- + +###### `manageAccessRole`Required + +- *Type:* aws-cdk-lib.aws_iam.IRole + +The IAM role creating the subscription target. + +--- + + + ### KafkaVersion Kafka cluster version. diff --git a/framework/package.json b/framework/package.json index 5b529b14b..84b78fd24 100644 --- a/framework/package.json +++ b/framework/package.json @@ -62,7 +62,7 @@ "jsii-pacmak": "^1.96.0", "jsii-rosetta": "~5.3.0", "node-libcurl": "^4.0.0", - "projen": "^0.83.1", + "projen": "^0.87.2", "rosetta": "^1.1.0", "standard-version": "^9", "sync-request-curl": "^3.0.0", diff --git a/framework/src/governance/README.md b/framework/src/governance/README.md index b00aa74a1..2b5f155d4 100644 --- a/framework/src/governance/README.md +++ b/framework/src/governance/README.md @@ -61,3 +61,136 @@ You can change the default configuration of the AWS Glue Crawlers associated wit The parameters apply to the three databases, if you need fine-grained configuration per database, you can use the [DataCatalogDatabase](#datacatalogdatabase) construct. [example crawler configuration](./examples/data-lake-catalog-crawler.lit.ts) + +[//]: # (governance.datazone-msk-authorizer) +# DataZoneMskAuthorizer + +Custom DataZone MSK authorizer for granting access to MSK topics via DataZone asset subscription workflow. + +## Overview + +The DataZone MSK Authorizer is a custom process integrated with DataZone that implements the [Subscription Grant](https://docs.aws.amazon.com/datazone/latest/userguide/grant-access-to-unmanaged-asset.html) concept for Kafka topics hosted on Amazon MSK (provisioned and Serverless), +secured by IAM policies, and registered in DataZone using the `DataZoneMskAssetType`. +It supports: +- cross account access with MSK Provisioned clusters. +- MSK managed VPC connectivity permissions with MSK Provisioned clusters +- Glue Schema Registry permissions when sharing in the same account + +The authorizer is composed of 2 constructs: +- the `DataZoneMskCentralAuthorizer` is responsible for collecting metadata on the Subscription Grant, orchestrating the workflow and acknowledging the Subscription Grant creation. This construct must be deployed in the AWS root account of the DataZone Domain. +- the `DataZoneMskEnvironmentAuthorizer` is responsible for managing the permissions on the producer and consumer side. This construct must be deployed once per account associated with the DataZone Domain. + +The cross-account synchronization is exclusively done via EventBridge bus to restrict cross account permissions to the minimum. + +![DataZoneMskAuthorizer](../../../website/static/img/datazone-msk-authorizer.png) + +## DataZoneMskCentralAuthorizer + +The `DataZoneMskCentralAuthorizer` is the central component that receives all the Subscription Grant Requests from DataZone for the `MskTopicAssetType` and orchestrate the end-to-end workflow. +The workflow is a Step Functions State Machine that is triggered by [events emmitted by DataZone](https://docs.aws.amazon.com/datazone/latest/userguide/working-with-events-and-notifications.html) and contains the following steps: +1. Metadata collection: a Lambda Function collect additional information from DataZone on the producer, the subscriber and update the status of the Subscription Grant to `IN_PROGESS`. +2. Producer grant trigger: an event is sent to the producer account to request the creation of the grant on the producer MSK cluster (implemented in the `DataZoneMskEnvironmentAuthorizer`). This step is an asynchronous state using a callback mechanism from the `DataZoneMskEnvironmentAuthorizer`. +3. Consumer grant trigger: an event is sent to the consumer account to request the creation of the grant on the IAM consumer Role (implemented in the `DataZoneMskEnvironmentAuthorizer`). This step is an asynchronous state using a callback mechanism from the `DataZoneMskEnvironmentAuthorizer`. +4. DataZone Subscription Grant callback: a Lambda Function updates the status of the Subscription Grant in DataZone to `GRANTED` or `REVOKE` based on the initial request. + +If any failure happens during the process, the Step Functions catch the exceptions and updates the status of the Subscription Grant to `GRANT_FAILED` or `REVOKE_FAILED`. + +:::info Permission grant failure +If the grant fails for the consumer, the grant already done for the producer is not reverted but the user is notified within DataZone because the failure is propagated. +The authorizer process is idempotent so it's safe to replay the workflow and all the permissions will be deduplicated. If it's not replayed, the producer grant needs to be manually cleaned up. +::: + +### Usage + +[example default usage](./examples/datazone-msk-central-authorizer-default.lit.ts) + +### Register producer and consumer accounts + +The `DataZoneMskCentralAuthorizer` construct work in collaboration with the `DataZoneMskEnvironmentAuthorizer` construct which is deployed into the producers and consumers accounts. +To enable the integration, register accounts using the `registerAccount()` method on the `DataZoneMskCentralAuthorizer` object. +It will grant the required permissions so the central account and the environment accounts can communicate via EventBridge events. + +[example register account](./examples/datazone-msk-central-authorizer-register.lit.ts) + +## DataZoneMskEnvironmentAuthorizer + +The `DataZoneMskEnvironmentAuthorizer` is responsible from managing the permissions required to grant access on MSK Topics (and associated Glue Schema Registry) via IAM policies. +The workflow is a Step Functions State Machine that is triggered by events emitted by the `DataZoneMskCentralAuthorizer` and contains the following steps: +1. Grant the producer or consumer based on the request. If the event is a cross-account producer grant, a Lambda function adds an IAM policy statement to the MSK Cluster policy granting read access to the IAM consumer Role. Optionally, it can also grant the use of MSK Managed VPC. +2. Callback the `DataZoneMskCentralAuthorizer`: an EventBridge event is sent on the central EventBridge Bus to continue the workflow on the central account using the callback mechanism of Step Functions. + +### Usage + +[example default usage](./examples/datazone-msk-environment-authorizer-default.lit.ts) + +### Cross account workflow + +If the `DataZoneMskEnvironmentAuthorizer` is deployed in a different account than the DataZone root account where the `DataZoneMskCentralAuthorizer` is deployed, you need to configure the central account ID to authorize cross-account communication: + +[example cross account](./examples/datazone-msk-environment-authorizer-cross-account.lit.ts) + +### Granting MSK Managed VPC connectivity + +For easier cross-account Kafka consumption, MSK Provisioned clusters can use the [multi-VPC private connectivity](https://docs.aws.amazon.com/msk/latest/developerguide/aws-access-mult-vpc.html) feature which is a managed solution that simplifies the networking infrastructure for multi-VPC and cross-account connectivity. + +By default, the multi-VPC private connectivity permissions are not configured. You can enable it using the construct properties: + +[example vpc](./examples/datazone-msk-environment-authorizer-vpc.lit.ts) + +[//]: # (governance.datazone-msk-asset-type) +# DataZoneMskAssetType + +DataZone custom asset type for MSK topics. + +## Overview + +`DataZoneMskAssetType` is a custom asset type implementation for Kafka topics hosted in MSK clusters. MSK clusters can be provisioned or serverless. Topics can be linked to a Glue Schema Registry. +The construct is a CDK custom resource that creates the corresponding DataZone Form Types and Asset Type required to store metadata related to MSK Topics. It includes: +- A MSK Source Reference Form Type containing metadata about the MSK Cluster including the cluster ARN and type. +- A Kafka Schema For Type containing metadata about the topic including the topic name, schema version, Glue Schema Registry ARN and Glue Schema ARN. + +![DataZone MSK asset type](../../../website/static/img/datazone-msk-asset-type.png) + +## Usage + +[example default usage](./examples/datazone-msk-asset-type-default.lit.ts) + +## Reusing an existing owner project + +The `DataZoneMskAssetType` requires a DataZone project to own the custom asset type. By default, it will create a `MskGovernance` project within the domain but you pass an existing project. +The construct will make the IAM custom resource Role a member of the projectto be able to create the asset type and the form types. + +[example project](./examples/datazone-msk-asset-type-project.lit.ts) + +## Reusing a Custom Asset Type Factory + +By default, the `DataZoneMskAssetType` creates its own factory resources required to connect to DataZone and create the custom asset type. But it's possible to reuse a Factory across multiple Custom Asset Types to limit the number of custom resource providers and DataZone project membership: + +[example factory configuration](./examples/datazone-msk-asset-type-factory.lit.ts) + +[//]: # (governance.datazone-gsr-msk-datasource) +# DataZoneGsrMskDataSource + +DataZone Data Source for MSK Topics assets backed by Glue Schema Registry. + +## Overview + +`DataZoneGsrMskDataSource` is custom data source for DataZone that can create/update/delete MSK topics assets in DataZone based on a Glue Schema Registry definition. The custom data source can be triggered by a schedule or based on events from the Glue Schema Registry. The constructs implement: +- EventBridge Rules triggered either on a schedule or event based. +- A Lambda Function triggered from the EventBridge Rules and responsible for collecting metadata from The Glue Schema Registry and updating MSK Topic assets in DataZone. +- SSM Parameter Store Parameters to store required metadata + +## Usage + +[example default usage](./examples/datazone-gsr-msk-datasource-default.lit.ts) + +## Data Source trigger modes + +The custom data source process can be triggered in two different ways. By default, if no schedule and events are not enabled, the construct creates a schedule every one hour. +- Based on a Schedule + +[example schedule](./examples/datazone-gsr-msk-datasource-cron.lit.ts) + +- Based on events received from the Glue Schema Registry + +[example events](./examples/datazone-gsr-msk-datasource-event.lit.ts) \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-gsr-msk-datasource-cron.lit.ts b/framework/src/governance/examples/datazone-gsr-msk-datasource-cron.lit.ts new file mode 100644 index 000000000..db9f89dbd --- /dev/null +++ b/framework/src/governance/examples/datazone-gsr-msk-datasource-cron.lit.ts @@ -0,0 +1,28 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; +import * as events from 'aws-cdk-lib/aws-events' ; + + +class ExampleDefaultDataZoneGsrMskDataSourceStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster', + runSchedule: events.Schedule.expression('cron(0 * * * * *)'), + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneGsrMskDataSourceStack(app, 'ExampleDefaultDataZoneGsrMskDataSourceStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-gsr-msk-datasource-default.lit.ts b/framework/src/governance/examples/datazone-gsr-msk-datasource-default.lit.ts new file mode 100644 index 000000000..7efe19d4b --- /dev/null +++ b/framework/src/governance/examples/datazone-gsr-msk-datasource-default.lit.ts @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleDefaultDataZoneGsrMskDataSourceStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster' + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneGsrMskDataSourceStack(app, 'ExampleDefaultDataZoneGsrMskDataSourceStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-gsr-msk-datasource-event.lit.ts b/framework/src/governance/examples/datazone-gsr-msk-datasource-event.lit.ts new file mode 100644 index 000000000..d7185d36a --- /dev/null +++ b/framework/src/governance/examples/datazone-gsr-msk-datasource-event.lit.ts @@ -0,0 +1,27 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleDefaultDataZoneGsrMskDataSourceStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster', + enableSchemaRegistryEvent: true, + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneGsrMskDataSourceStack(app, 'ExampleDefaultDataZoneGsrMskDataSourceStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-asset-type-default.lit.ts b/framework/src/governance/examples/datazone-msk-asset-type-default.lit.ts new file mode 100644 index 000000000..df2d77f65 --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-asset-type-default.lit.ts @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleDefaultDataZoneMskAssetTypeStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneMskAssetTypeStack(app, 'ExampleDefaultDataZoneMskAssetTypeStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-asset-type-factory.lit.ts b/framework/src/governance/examples/datazone-msk-asset-type-factory.lit.ts new file mode 100644 index 000000000..4f7c55a9d --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-asset-type-factory.lit.ts @@ -0,0 +1,29 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleFactoryDataZoneMskAssetTypeStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + const dataZoneAssetFactory = new dsf.governance.DataZoneCustomAssetTypeFactory(this, 'DataZoneCustomAssetTypeFactory', { + domainId: 'aba_dc999t9ime9sss', + }); + + new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', + projectId: 'xxxxxxxxxxx', + dzCustomAssetTypeFactory: dataZoneAssetFactory + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleFactoryDataZoneMskAssetTypeStack(app, 'ExampleFactoryDataZoneMskAssetTypeStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-asset-type-project.lit.ts b/framework/src/governance/examples/datazone-msk-asset-type-project.lit.ts new file mode 100644 index 000000000..e6d0612ce --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-asset-type-project.lit.ts @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleProjectDataZoneMskAssetTypeStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', + projectId: 'xxxxxxxxxxx' + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleProjectDataZoneMskAssetTypeStack(app, 'ExampleProjectDataZoneMskAssetTypeStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-central-authorizer-default.lit.ts b/framework/src/governance/examples/datazone-msk-central-authorizer-default.lit.ts new file mode 100644 index 000000000..7aeacd185 --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-central-authorizer-default.lit.ts @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleDefaultDataZoneMskCentralAuthorizerStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneMskCentralAuthorizerStack(app, 'ExampleDefaultDataZoneMskCentralAuthorizerStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-central-authorizer-register.lit.ts b/framework/src/governance/examples/datazone-msk-central-authorizer-register.lit.ts new file mode 100644 index 000000000..0009377d0 --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-central-authorizer-register.lit.ts @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleRegisterDataZoneMskCentralAuthorizerStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + const centralAuthorizer = new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + }); + + // Add an account that is associated with the DataZone Domain + centralAuthorizer.registerAccount('123456789012'); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleRegisterDataZoneMskCentralAuthorizerStack(app, 'ExampleRegisterDataZoneMskCentralAuthorizerStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-environment-authorizer-cross-account.lit.ts b/framework/src/governance/examples/datazone-msk-environment-authorizer-cross-account.lit.ts new file mode 100644 index 000000000..9d64110c0 --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-environment-authorizer-cross-account.lit.ts @@ -0,0 +1,24 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleCrossAccountDataZoneMskEnvironmentAuthorizerStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + centralAccountId: '123456789012' + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleCrossAccountDataZoneMskEnvironmentAuthorizerStack(app, 'ExampleCrossAccountDataZoneMskEnvironmentAuthorizerStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-environment-authorizer-default.lit.ts b/framework/src/governance/examples/datazone-msk-environment-authorizer-default.lit.ts new file mode 100644 index 000000000..77d05b60a --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-environment-authorizer-default.lit.ts @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleDefaultDataZoneMskEnvironmentAuthorizerStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleDefaultDataZoneMskEnvironmentAuthorizerStack(app, 'ExampleDefaultDataZoneMskEnvironmentAuthorizerStack'); \ No newline at end of file diff --git a/framework/src/governance/examples/datazone-msk-environment-authorizer-vpc.lit.ts b/framework/src/governance/examples/datazone-msk-environment-authorizer-vpc.lit.ts new file mode 100644 index 000000000..59e4b4e2e --- /dev/null +++ b/framework/src/governance/examples/datazone-msk-environment-authorizer-vpc.lit.ts @@ -0,0 +1,25 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import * as cdk from 'aws-cdk-lib'; +import { Construct } from 'constructs'; +import * as dsf from '../../index'; + + +class ExampleVpcDataZoneMskEnvironmentAuthorizerStack extends cdk.Stack { + constructor(scope: Construct, id: string) { + super(scope, id); + + /// !show + new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + centralAccountId: '123456789012', + grantMskManagedVpc: true, + }); + /// !hide + + } +} + +const app = new cdk.App(); +new ExampleVpcDataZoneMskEnvironmentAuthorizerStack(app, 'ExampleVpcDataZoneMskEnvironmentAuthorizerStack'); \ No newline at end of file diff --git a/framework/src/governance/lib/custom-authorizer-central-helpers.ts b/framework/src/governance/lib/custom-authorizer-central-helpers.ts new file mode 100644 index 000000000..e43fccc36 --- /dev/null +++ b/framework/src/governance/lib/custom-authorizer-central-helpers.ts @@ -0,0 +1,275 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { CfnEventBusPolicy, EventBus, EventPattern, IRule, Rule } from 'aws-cdk-lib/aws-events'; +import { LambdaFunction, SfnStateMachine } from 'aws-cdk-lib/aws-events-targets'; +import { IRole, ManagedPolicy, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { IFunction, Function, Code, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { IQueue, Queue } from 'aws-cdk-lib/aws-sqs'; +import { DefinitionBody, Fail, IntegrationPattern, JsonPath, StateMachine, TaskInput, Timeout } from 'aws-cdk-lib/aws-stepfunctions'; +import { CallAwsService, LambdaInvoke } from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import { Construct } from 'constructs'; +import { Utils } from '../../utils'; + +/** + * Interface for the authorizer central workflow + */ +export interface AuthorizerCentralWorflow{ + /** + * The authorizer Step Functions state machine + */ + readonly stateMachine: StateMachine; + /** + * The authorizer dead letter queue for failed events + */ + readonly deadLetterQueue: IQueue; + /** + * The authorizer event rule for triggering the workflow + */ + readonly authorizerEventRule: IRule; + /** + * The authorizer event role for allowing events to invoke the workflow + */ + readonly authorizerEventRole: IRole; + /** + * The callback event rule for listening to producer and subscriber grants callback + */ + readonly callbackEventRule: IRule; + /** + * The Lambda function for handling producer and subscriber grants callback + */ + readonly callbackFunction: IFunction; + /** + * The role for the Lambda function handling producer and subscriber grants callback + */ + readonly callbackRole: IRole; +} +/** + * Grant type for the authorizer workflow + */ +enum GrantType{ + CONSUMER='consumerGrant', + PRODUCER='producerGrant' +} + +/** + * Create the resources used by a central authorizer workflow. + * @param scope The scope creating the resources + * @param authorizerName The name of the authorizer + * @param metadataCollectorFunction The Lambda function collecting metadata from the governance tool + * @param governanceCallbackFunction The Lambda function acknowledging the grant in the governance tool + * @param eventPattern The event pattern for triggering the authorizer workflow + * @param workflowTimeout The timeout for the authorizer workflow. @default - 5 minutes + * @param retryAttempts The number of retry attempts for the authorizer workflow. @default - No retry + * @param removalPolicy The removal policy for the created resources. @default - RemovalPolicy.RETAIN + * @returns The created AuthorizerCentralWorflow + */ +export function authorizerCentralWorkflowSetup( + scope: Construct, + authorizerName: string, + metadataCollectorFunction: IFunction, + governanceCallbackFunction: IFunction, + eventPattern: EventPattern, + workflowTimeout?: Duration, + retryAttempts?: number, + removalPolicy?: RemovalPolicy): AuthorizerCentralWorflow { + + const DEFAULT_TIMEOUT = Duration.minutes(5); + const DEFAULT_RETRY_ATTEMPTS = 0; + + const authorizerEventRule = new Rule(scope, 'AuthorizerEventRule', { + eventPattern, + }); + + const authorizerEventRole = new Role(scope, 'SourceEventRole', { + assumedBy: new ServicePrincipal('events.amazonaws.com'), + }); + + const callbackEventRule = new Rule(scope, 'CallbackEventRule', { + eventPattern: { + source: [authorizerName], + detailType: ['callback'], + }, + }); + + const metadataCollector = new LambdaInvoke(scope, 'MetadataCollector', { + lambdaFunction: metadataCollectorFunction, + resultSelector: { 'Metadata.$': '$.Payload' }, + taskTimeout: Timeout.duration(Duration.minutes(2)), + }); + + const invokeProducerGrant = invokeGrant(scope, 'ProducerGrant', authorizerName, GrantType.PRODUCER); + + const invokeConsumerGrant = invokeGrant(scope, 'ConsumerGrant', authorizerName, GrantType.CONSUMER); + + const governanceSuccessCallback = new LambdaInvoke(scope, 'GovernanceSuccessCallback', { + lambdaFunction: governanceCallbackFunction, + taskTimeout: Timeout.duration(Duration.minutes(1)), + payload: TaskInput.fromObject({ + Status: 'success', + Metadata: JsonPath.stringAt('$.Metadata'), + }), + }); + + const governanceFailureCallback = new LambdaInvoke(scope, 'GovernanceFailureCallback', { + lambdaFunction: governanceCallbackFunction, + taskTimeout: Timeout.duration(Duration.minutes(1)), + payload: TaskInput.fromObject({ + Status: 'failure', + Metadata: JsonPath.stringAt('$.Metadata'), + Error: JsonPath.stringAt('$.ErrorInfo.Error'), + Cause: JsonPath.stringAt('$.ErrorInfo.Cause'), + }), + }); + + const failure = governanceFailureCallback.next(new Fail(scope, 'CentralWorfklowFailure', { + errorPath: '$.ErrorInfo', + })); + + metadataCollector.addCatch(failure, { + errors: ['States.TaskFailed'], + resultPath: '$.ErrorInfo', + }); + + invokeProducerGrant.addCatch(failure, { + errors: ['States.TaskFailed'], + resultPath: '$.ErrorInfo', + }); + + invokeConsumerGrant.addCatch(failure, { + errors: ['States.TaskFailed'], + resultPath: '$.ErrorInfo', + }); + + const stateMachineDefinition = metadataCollector + .next(invokeProducerGrant) + .next(invokeConsumerGrant) + .next(governanceSuccessCallback); + + const stateMachine = new StateMachine(scope, 'StateMachine', { + definitionBody: DefinitionBody.fromChainable(stateMachineDefinition), + timeout: workflowTimeout || DEFAULT_TIMEOUT, + removalPolicy: removalPolicy || RemovalPolicy.RETAIN, + }); + + const deadLetterQueue = new Queue(scope, 'Queue', { + enforceSSL: true, + removalPolicy: removalPolicy || RemovalPolicy.RETAIN, + }); + + stateMachine.grantStartExecution(authorizerEventRole); + + authorizerEventRule.addTarget(new SfnStateMachine(stateMachine, { + deadLetterQueue, + role: authorizerEventRole, + retryAttempts: retryAttempts || DEFAULT_RETRY_ATTEMPTS, + })); + + const callbackRole = new Role(scope, 'LambdaCallbackRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + }); + + callbackRole.addToPolicy( + new PolicyStatement({ + actions: ['states:SendTaskSuccess', 'states:SendTaskFailure'], + resources: [stateMachine.stateMachineArn], + }), + ); + + const callbackFunction = new Function(scope, 'CallbackFunction', { + runtime: Runtime.NODEJS_20_X, + handler: 'index.handler', + code: Code.fromAsset(__dirname+'/resources/custom-authorizer-callback/'), + role: callbackRole, + timeout: Duration.seconds(5), + }); + + stateMachine.grantTaskResponse(callbackRole); + + callbackEventRule.addTarget(new LambdaFunction(callbackFunction, { + deadLetterQueue, + maxEventAge: Duration.hours(1), + retryAttempts: 10, + })); + + // grantPutEvents(scope, Stack.of(scope).account, stateMachine.role); + + return { stateMachine, deadLetterQueue, authorizerEventRule, authorizerEventRole, callbackEventRule, callbackFunction, callbackRole }; +} + +/** + * Grant a role to put events in the default Event Bridge bus of an account. + * This method adds an IAM role policy but doesn't modify the Event Bridge bus resource policy. + * @param scope The scope creating the resources + * @param accountId The account ID of the default Event Bridge bus + * @param role The role to grant access to + */ +export function grantPutEvents(scope: Construct, accountId: string, role: IRole) { + + const targetEventBus = EventBus.fromEventBusArn( + scope, + `${accountId}CentralEventBus`, + `arn:${Stack.of(scope).partition}:events:${Stack.of(scope).region}:${accountId}:event-bus/default`, + ); + + targetEventBus.grantPutEventsTo(role); +} + +/** + * Register an account into a central authorizer workflow to allow cross account communication. + * @param scope The scope of created resources + * @param id The id of the created resources + * @param accountId The account ID to register with the central authorizer + * @param role The role to grant access to + * @returns The CfnEventBusPolicy created to grant the account + */ +export function registerAccount(scope: Construct, id: string, accountId: string, role: IRole): CfnEventBusPolicy { + + grantPutEvents(scope, accountId, role); + + return new CfnEventBusPolicy(scope, `${accountId}${id}CentralEventBusPolicy`, { + statementId: Utils.stringSanitizer(accountId + id), + action: 'events:PutEvents', + principal: accountId, + eventBusName: 'default', + }); +}; + +/** + * Creates a Step Functions task to invoke GRANT and REVOKE via Event Bridge events + * @param scope The scope of created resources + * @param id The id of the created resources + * @param authorizerName The name of the authorizer + * @param grantType The grant type (GRANT or REVOKE) + * @returns The CallAwsService task to use in the Step Functions state machine + */ +function invokeGrant(scope: Construct, id: string, authorizerName: string, grantType: GrantType): CallAwsService { + + const eventBusName = grantType === GrantType.CONSUMER ? + JsonPath.format('arn:aws:events:{}:{}:event-bus/default', JsonPath.stringAt('$.Metadata.Consumer.Region'), JsonPath.stringAt('$.Metadata.Consumer.Account')) : + JsonPath.format('arn:aws:events:{}:{}:event-bus/default', JsonPath.stringAt('$.Metadata.Producer.Region'), JsonPath.stringAt('$.Metadata.Producer.Account')); + + return new CallAwsService(scope, `${id}EventBridgePutEvents`, { + service: 'eventbridge', + action: 'putEvents', + parameters: { + Entries: [{ + Detail: TaskInput.fromObject({ + TaskToken: JsonPath.taskToken, + Metadata: JsonPath.objectAt('$.Metadata'), + }), + DetailType: grantType, + Source: authorizerName, + EventBusName: eventBusName, + }], + }, + iamResources: [`arn:aws:events:${Stack.of(scope).region}:${Stack.of(scope).region}:event-bus/default`], + integrationPattern: IntegrationPattern.WAIT_FOR_TASK_TOKEN, + taskTimeout: Timeout.duration(Duration.minutes(5)), + resultPath: JsonPath.DISCARD, + }); +} \ No newline at end of file diff --git a/framework/src/governance/lib/custom-authorizer-environment-helpers.ts b/framework/src/governance/lib/custom-authorizer-environment-helpers.ts new file mode 100644 index 000000000..7fdfe011f --- /dev/null +++ b/framework/src/governance/lib/custom-authorizer-environment-helpers.ts @@ -0,0 +1,153 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { CfnEventBusPolicy, EventBus, IRule, Rule } from 'aws-cdk-lib/aws-events'; +import { SfnStateMachine } from 'aws-cdk-lib/aws-events-targets'; +import { IRole, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { IFunction } from 'aws-cdk-lib/aws-lambda'; +import { IQueue, Queue } from 'aws-cdk-lib/aws-sqs'; +import { DefinitionBody, IStateMachine, JsonPath, StateMachine, TaskInput, Timeout } from 'aws-cdk-lib/aws-stepfunctions'; +import { EventBridgePutEvents, LambdaInvoke } from 'aws-cdk-lib/aws-stepfunctions-tasks'; +import { Construct } from 'constructs'; + +/** + * The interface representing the environment custom authorizer workflow. + */ +export interface AuthorizerEnvironmentWorflow{ + /** + * The state machine that orchestrates the workflow. + */ + readonly stateMachine: IStateMachine; + /** + * The event rule that triggers the workflow. + */ + readonly eventRule: IRule; + /** + * The role used by the even rule to trigger the Step Function state machine. + */ + readonly eventRole: IRole; + /** + * The dead letter queue for failed events. + */ + readonly deadLetterQueue: IQueue; + /** + * The optional event bus policy for cross-account workflows. + */ + readonly eventBusPolicy?: CfnEventBusPolicy; +} + +/** + * + * @param scope The scope of the resources created + * @param authorizerName The name of the authorizer + * @param grantFunction The lambda function creating the grants + * @param centralAccount The central account ID hosting the central authorizer workflow + * @param workflowTimeout The timeout for the authorizer workflow. @default - 5 minutes + * @param retryAttempts The number of retry attempts for the authorizer workflow. @default - No retry + * @param removalPolicy The removal policy for the created resources. @default - RemovalPolicy.RETAIN + * @returns The created AuthorizerEnvironmentWorflow + */ +export function authorizerEnvironmentWorkflowSetup( + scope: Construct, + authorizerName: string, + grantFunction: IFunction, + centralAccount?: string, + workflowTimeout?: Duration, + retryAttempts?: number, + removalPolicy?: RemovalPolicy): AuthorizerEnvironmentWorflow { + + const DEFAULT_TIMEOUT = Duration.minutes(5); + const DEFAULT_RETRY_ATTEMPTS = 0; + + const eventRule = new Rule(scope, 'CentralEventRule', { + eventPattern: { + source: [authorizerName], + detailType: ['producerGrant', 'consumerGrant'], + }, + }); + + const grant = new LambdaInvoke(scope, 'GrantInvoke', { + lambdaFunction: grantFunction, + resultPath: '$.GrantResult', + taskTimeout: Timeout.duration(Duration.minutes(2)), + }); + + let eventBusPolicy: CfnEventBusPolicy | undefined = undefined; + + if (centralAccount !== undefined) { + eventBusPolicy = new CfnEventBusPolicy(scope, `${centralAccount}EnvEventBusPolicy`, { + statementId: centralAccount, + action: 'events:PutEvents', + principal: centralAccount, + eventBusName: 'default', + }); + } + const eventBusAccount = centralAccount || Stack.of(scope).account; + + const centralEventBus = EventBus.fromEventBusArn( + scope, + 'CentralEventBus', + `arn:${Stack.of(scope).partition}:events:${Stack.of(scope).region}:${eventBusAccount}:event-bus/default`, + ); + + const authorizerFailureCallbackEvent = new EventBridgePutEvents(scope, 'FailureCallback', { + entries: [{ + detail: TaskInput.fromObject({ + TaskToken: JsonPath.stringAt('$.detail.value.TaskToken'), + Status: 'failure', + Error: JsonPath.stringAt('$.ErrorInfo.Error'), + Cause: JsonPath.stringAt('$.ErrorInfo.Cause'), + }), + eventBus: centralEventBus, + detailType: 'callback', + source: authorizerName, + }], + }); + + grant.addCatch(authorizerFailureCallbackEvent, { + errors: ['States.TaskFailed'], + resultPath: '$.ErrorInfo', + }); + + const authorizerSuccessCallbackEvent = new EventBridgePutEvents(scope, 'SuccessCallback', { + entries: [{ + detail: TaskInput.fromObject({ + TaskToken: JsonPath.stringAt('$.detail.value.TaskToken'), + Status: 'success', + }), + eventBus: centralEventBus, + detailType: 'callback', + source: authorizerName, + }], + }); + + const stateMachineDefinition = grant + .next(authorizerSuccessCallbackEvent); + + const stateMachine = new StateMachine(scope, 'StateMachine', { + definitionBody: DefinitionBody.fromChainable(stateMachineDefinition), + timeout: workflowTimeout || DEFAULT_TIMEOUT, + removalPolicy: removalPolicy || RemovalPolicy.RETAIN, + }); + + centralEventBus.grantPutEventsTo(stateMachine.role); + + const deadLetterQueue = new Queue(scope, 'Queue', { + enforceSSL: true, + removalPolicy: removalPolicy || RemovalPolicy.RETAIN, + }); + + const eventRole = new Role(scope, 'CentralEventRole', { + assumedBy: new ServicePrincipal('events.amazonaws.com'), + }); + stateMachine.grantStartExecution(eventRole); + + eventRule.addTarget(new SfnStateMachine(stateMachine, { + deadLetterQueue, + role: eventRole, + retryAttempts: retryAttempts || DEFAULT_RETRY_ATTEMPTS, + })); + + return { stateMachine, eventRule, eventRole, deadLetterQueue, eventBusPolicy }; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-custom-asset-type-factory.ts b/framework/src/governance/lib/datazone/datazone-custom-asset-type-factory.ts new file mode 100644 index 000000000..978322a19 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-custom-asset-type-factory.ts @@ -0,0 +1,189 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { CustomResource, Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { CfnProjectMembership } from 'aws-cdk-lib/aws-datazone'; +import { Effect, IRole, ManagedPolicy, PolicyDocument, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { IFunction } from 'aws-cdk-lib/aws-lambda'; +import { ILogGroup } from 'aws-cdk-lib/aws-logs'; +import { Construct } from 'constructs'; +import { DataZoneCustomAssetTypeProps } from './datazone-custom-asset-type-props'; +import { DataZoneHelpers } from './datazone-helpers'; +import { Context, TrackedConstruct, TrackedConstructProps } from '../../../utils'; +import { DsfProvider } from '../../../utils/lib/dsf-provider'; + +/** + * Properties for the DataZoneCustomAssetTypeFactory construct + */ +export interface DataZoneCustomAssetTypeFactoryProps { + /** + * The DataZone domain identifier + */ + readonly domainId: string; + /** + * The removal policy for the custom resource + * @default RemovalPolicy.RETAIN + */ + readonly removalPolicy?: RemovalPolicy; +} + +/** + * Interface representing a DataZone custom asset type + */ +export interface CustomAssetType { + /** + * The domain identifier of the custom asset type + */ + readonly domainIdentifier: string; + /** + * The name of the custom asset type + */ + readonly name: string; + /** + * The project identifier owner of the custom asset type + */ + readonly projectIdentifier: string; + /** + * The revision of the custom asset type + */ + readonly revision: string; +} + +/** + * Factory construct providing resources to create a DataZone custom asset type. + * + * @example + * new dsf.governance.DataZoneCustomAssetTypeFactory(this, 'CustomAssetTypeFactory', { + * domainId: 'aba_dc999t9ime9sss', + * }); + * + */ +export class DataZoneCustomAssetTypeFactory extends TrackedConstruct { + /** + * The CloudWatch Logs Log Group for the DataZone custom asset type creation + */ + readonly createLogGroup: ILogGroup; + /** + * The Lambda Function for the DataZone custom asset type creation + */ + readonly createFunction: IFunction; + /** + * The IAM Role for the DataZone custom asset type creation + */ + readonly createRole: IRole; + /** + * The service token for the custom resource + */ + readonly serviceToken: string; + /** + * The role used by the custom resource + */ + readonly handlerRole: IRole; + + private readonly domainId: string; + private readonly removalPolicy: RemovalPolicy; + + /** + * Constructs a new instance of DataZoneCustomAssetTypeFactory + * @param scope the Scope of the CDK Construct + * @param id the ID of the CDK Construct + * @param props The DataZoneCustomAssetTypeFactory properties + */ + constructor(scope: Construct, id: string, props: DataZoneCustomAssetTypeFactoryProps) { + const trackedConstructProps: TrackedConstructProps = { + trackingTag: DataZoneCustomAssetTypeFactory.name, + }; + + super(scope, id, trackedConstructProps); + + const stack = Stack.of(this); + + this.domainId = props.domainId; + + this.removalPolicy = Context.revertRemovalPolicy(scope, props.removalPolicy); + + this.handlerRole = new Role(this, 'HandlerRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + inlinePolicies: { + DataZonePermission: new PolicyDocument({ + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'datazone:CreateFormType', + 'datazone:CreateAssetType', + 'datazone:DeleteAssetType', + 'datazone:DeleteFormType', + 'datazone:GetFormType', + ], + resources: [`arn:${stack.partition}:datazone:${stack.region}:${stack.account}:domain/${this.domainId}`], + }), + ], + }), + }, + }); + + const provider = new DsfProvider(this, 'Provider', { + providerName: 'DataZoneCustomAssetType', + onEventHandlerDefinition: { + depsLockFilePath: __dirname+'/resources/datazone-custom-asset-type/package-lock.json', + entryFile: __dirname+'/resources/datazone-custom-asset-type/index.mjs', + handler: 'index.handler', + iamRole: this.handlerRole, + timeout: Duration.minutes(2), + }, + removalPolicy: this.removalPolicy, + }); + + this.createLogGroup = provider.onEventHandlerLogGroup; + this.createFunction = provider.onEventHandlerFunction; + this.createRole = provider.onEventHandlerRole; + this.serviceToken = provider.serviceToken; + } + + /** + * Creates a DataZone custom asset type based on the provided properties + * @param id the ID of the CDK Construct + * @param customAssetType the properties of the custom asset type + * @returns the custom asset type + */ + public createCustomAssetType(id: string, customAssetType: DataZoneCustomAssetTypeProps): CustomAssetType { + + // create a project membership for the custom resource role so it can create custom asset types in this project + const projMembership = new CfnProjectMembership(this, `${id}ProjectMembership`, { + designation: 'PROJECT_OWNER', + domainIdentifier: this.domainId, + member: { + userIdentifier: this.handlerRole.roleArn, + }, + projectIdentifier: customAssetType.projectId, + }); + + // The custom resource creating the custom asset type + const crResp = new CustomResource(this, id, { + serviceToken: this.serviceToken, + removalPolicy: this.removalPolicy, + properties: { + domainId: this.domainId, + projectId: customAssetType.projectId, + // we build the smithy model based on typescript props + formTypes: customAssetType.formTypes.map( formType => { return { ...formType, model: DataZoneHelpers.buildModelString(formType) };}), + assetTypeName: customAssetType.assetTypeName, + assetTypeDescription: customAssetType.assetTypeDescription, + }, + resourceType: 'Custom::DataZoneCustomAssetType', + }); + + crResp.node.addDependency(projMembership); + + return { + domainIdentifier: crResp.getAttString('domainId'), + name: crResp.getAttString('name'), + projectIdentifier: crResp.getAttString('owningProjectId'), + revision: crResp.getAttString('revision'), + }; + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-custom-asset-type-props.ts b/framework/src/governance/lib/datazone/datazone-custom-asset-type-props.ts new file mode 100644 index 000000000..443d3314c --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-custom-asset-type-props.ts @@ -0,0 +1,66 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Interface representing a DataZoneFormTypeField + */ +export interface DataZoneFormTypeField { + /** + * The name of the field + */ + readonly name: string; + /** + * The type of the field + */ + readonly type: string; + /** + * Whether the field is required + * @default false + */ + readonly required?: boolean; +} + +/** + * Interface representing a DataZoneFormType + */ +export interface DataZoneFormType { + /** + * The name of the form + */ + readonly name: string; + /** + * The fields of the form + * @example [{ name: 'firstName', type: 'String', required: true }] + * @default - No model is required. The form is already configured in DataZone. + */ + readonly model?: DataZoneFormTypeField[]; + /** + * Whether the form is required + * @default false + */ + readonly required?: boolean; // Whether the form is required +} + +/** + * Properties for the DataZoneCustomAssetType construct + */ +export interface DataZoneCustomAssetTypeProps { + /** + * The project identifier owner of the custom asset type + */ + readonly projectId: string; + /** + * The form types of the custom asset type + * @example [{ name: 'userForm', model: [{ name: 'firstName', type: 'String', required: true }] }] + */ + readonly formTypes: DataZoneFormType[]; + /** + * The name of the custom asset type + */ + readonly assetTypeName: string; + /** + * The description of the custom asset type + * @default - No description provided + */ + readonly assetTypeDescription?: string; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource-props.ts b/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource-props.ts new file mode 100644 index 000000000..e66236e7c --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource-props.ts @@ -0,0 +1,41 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Schedule } from 'aws-cdk-lib/aws-events'; + +/** + * Properties for configuring a DataZone GSR MSK datasource. + */ +export interface DataZoneGsrMskDataSourceProps { + /** + * The name of the MSK (Managed Streaming for Apache Kafka) cluster to use. + */ + readonly clusterName: string; + + /** + * The unique identifier for the DataZone domain where the datasource resides. + */ + readonly domainId: string; + + /** + * The unique identifier for the project associated with this datasource. + */ + readonly projectId: string; + + /** + * The name of the registry for schema management. + */ + readonly registryName: string; + + /** + * Optional. Defines the schedule for EventBridge events, specified using cron expressions. + * @default - `cron(1 0 * * ? *)` if `enableSchemaRegistryEvent` is false or undefined, otherwise no schedule. + */ + readonly runSchedule?: Schedule; + + /** + * Optional. A flag to enable or disable EventBridge listener for schema registry changes. + * @default - false, meaning the EventBridge listener for schema changes is disabled. + */ + readonly enableSchemaRegistryEvent?: boolean; +} diff --git a/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource.ts b/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource.ts new file mode 100644 index 000000000..abc6d06b8 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-gsr-msk-datasource.ts @@ -0,0 +1,232 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, Stack } from 'aws-cdk-lib'; +import { CfnProjectMembership } from 'aws-cdk-lib/aws-datazone'; +import { Rule, RuleTargetInput, Schedule } from 'aws-cdk-lib/aws-events'; +import { LambdaFunction } from 'aws-cdk-lib/aws-events-targets'; +import { + Effect, + ManagedPolicy, + PolicyDocument, + PolicyStatement, + Role, + ServicePrincipal, +} from 'aws-cdk-lib/aws-iam'; +import { Function, Runtime, Code } from 'aws-cdk-lib/aws-lambda'; +import { Construct } from 'constructs'; +import { DataZoneGsrMskDataSourceProps } from './datazone-gsr-msk-datasource-props'; +import { TrackedConstruct, TrackedConstructProps } from '../../../utils'; + +/** + * A DataZone custom data source for MSK (Managed Streaming for Kafka) with integration for Glue Schema Registry. + * + * @example + * import { Schedule } from 'aws-cdk-lib/aws-events'; + * + * new dsf.governance.DataZoneGsrMskDataSource(this, 'MskDatasource', { + * domainId: 'aba_dc999t9ime9sss', + * projectId: '999999b3m5cpz', + * registryName: 'MyRegistry', + * clusterName: 'MyCluster', + * runSchedule: Schedule.cron({ minute: '0', hour: '12' }), // Trigger daily at noon + * enableSchemaRegistryEvent: true, // Enable events for Glue Schema Registry changes + * }); + */ +export class DataZoneGsrMskDataSource extends TrackedConstruct { + + /** + * The IAM Role of the Lambda Function interacting with DataZone API + */ + public readonly datasourceLambdaRole: Role; + /** + * The membership of the Lambda Role on the DataZone Project + */ + public readonly dataZoneMembership: CfnProjectMembership; + /** + * The Event Bridge Rule for schema creation and update + */ + public readonly createUpdateEventRule?: Rule; + /** + * The Event Bridge Rule for trigger the data source execution + */ + public readonly scheduleRule?: Rule; + /** + * The Event Bridge Rule for schema deletion + */ + public readonly deleteEventRule?: Rule; + + /** + * Build an instance of the DataZoneGsrMskDataSource + * @param scope the Scope of the CDK Construct + * @param id the ID of the CDK Construct + * @param props The DataZoneGsrMskDataSourceProps properties + */ + constructor(scope: Construct, id: string, props: DataZoneGsrMskDataSourceProps) { + const trackedConstructProps: TrackedConstructProps = { + trackingTag: DataZoneGsrMskDataSource.name, + }; + + super(scope, id, trackedConstructProps); + + const stack = Stack.of(this); + const accountId = stack.account; + const region = stack.region; + const partition = stack.partition; + + const clusterArn = `arn:${partition}:kafka:${region}:${accountId}:cluster/${props.clusterName}/*`; + const listClustersArn = `arn:${partition}:kafka:${region}:${accountId}:/api/v2/clusters`; + const glueRegistryArn = `arn:${partition}:glue:${region}:${accountId}:registry/${props.registryName}`; + const glueRegistrySchemasArn = `arn:${partition}:glue:${region}:${accountId}:schema/${props.registryName}/*`; + + // Define SSM Parameter paths to store asset information + const parameterPrefix = `/datazone/${props.domainId}/${props.registryName}/asset/`; + + this.datasourceLambdaRole = new Role(this, 'HandlerRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + inlinePolicies: { + DataZonePermission: new PolicyDocument({ + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'datazone:CreateAsset', + 'datazone:CreateAssetType', + 'datazone:CreateFormType', + 'datazone:GetAssetType', + 'datazone:GetFormType', + 'datazone:GetAsset', + 'datazone:CreateAssetRevision', + 'datazone:DeleteAsset', + ], + resources: [ + `arn:${partition}:datazone:${region}:${accountId}:domain/${props.domainId}`, + `arn:${partition}:datazone:${region}:${accountId}:project/${props.projectId}`, + ], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'glue:GetSchemaVersion', + 'glue:ListSchemas', + 'glue:ListSchemaVersions', + ], + resources: [glueRegistryArn, glueRegistrySchemasArn], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'kafka:DescribeClusterV2', + ], + resources: [clusterArn], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'kafka:ListClustersV2', + ], + resources: [listClustersArn], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'ssm:GetParameter', + 'ssm:PutParameter', + 'ssm:DeleteParameter', + 'ssm:GetParametersByPath', + ], + resources: [ + `arn:${partition}:ssm:${region}:${accountId}:parameter${parameterPrefix}*`, + ], + }), + ], + }), + }, + }); + + this.dataZoneMembership = new CfnProjectMembership(this, 'ProjectMembership', { + designation: 'PROJECT_CONTRIBUTOR', + domainIdentifier: props.domainId, + projectIdentifier: props.projectId, + member: { + userIdentifier: this.datasourceLambdaRole.roleArn, + }, + }); + + const lambdaCrawler = new Function(this, 'DataZoneGsrMskDataSource', { + runtime: Runtime.NODEJS_20_X, + handler: 'index.handler', + role: this.datasourceLambdaRole, + timeout: Duration.minutes(5), + code: Code.fromAsset(__dirname + '/resources/datazone-gsr-msk-datasource/'), + environment: { + DOMAIN_ID: props.domainId, + PROJECT_ID: props.projectId, + CLUSTER_NAME: props.clusterName, + REGION: region, + REGISTRY_NAME: props.registryName, + ACCOUNT_ID: accountId, + PARAMETER_PREFIX: parameterPrefix, + PARTITION: partition, + }, + }); + + lambdaCrawler.node.addDependency(this.dataZoneMembership); + + // Add EventBridge Rule for cron schedule (if provided) + if (props.runSchedule || props.enableSchemaRegistryEvent === undefined) { + this.scheduleRule = new Rule(this, 'ScheduledRule', { + schedule: props.runSchedule || Schedule.expression('cron(1 0 * * ? *)'), + targets: [new LambdaFunction(lambdaCrawler)], + }); + } + + // Add EventBridge Rule for Glue Schema Registry changes (if enabled) + if (props.enableSchemaRegistryEvent) { + this.createUpdateEventRule = new Rule(this, 'SchemaRegistryEventRule', { + eventPattern: { + source: ['aws.glue'], + detail: { + eventSource: ['glue.amazonaws.com'], + eventName: ['CreateSchema', 'RegisterSchemaVersion'], + responseElements: { + registryName: [props.registryName], + }, + }, + }, + targets: [ + new LambdaFunction(lambdaCrawler, { + event: RuleTargetInput.fromObject({ registryName: props.registryName }), + }), + ], + }); + + // Rule for DeleteSchema + this.deleteEventRule = new Rule(this, 'DeleteSchemaRule', { + ruleName: 'DeleteSchemaRule', + eventPattern: { + source: ['aws.glue'], + detail: { + eventSource: ['glue.amazonaws.com'], + eventName: ['DeleteSchema'], + requestParameters: { + schemaId: { + schemaArn: [{ + prefix: `arn:${partition}:glue:${region}:${accountId}:schema/${props?.registryName}/*`, + }], + }, + }, + }, + }, + targets: [ + new LambdaFunction(lambdaCrawler, { + event: RuleTargetInput.fromObject({ registryName: props.registryName }), + }), + ], + }); + } + } +} diff --git a/framework/src/governance/lib/datazone/datazone-helpers.ts b/framework/src/governance/lib/datazone/datazone-helpers.ts new file mode 100644 index 000000000..c33598ff9 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-helpers.ts @@ -0,0 +1,95 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { CfnSubscriptionTarget } from 'aws-cdk-lib/aws-datazone'; +import { IRole } from 'aws-cdk-lib/aws-iam'; +import { Construct } from 'constructs'; +import { CustomAssetType } from './datazone-custom-asset-type-factory'; +import { DataZoneFormType } from './datazone-custom-asset-type-props'; + +export class DataZoneHelpers { + + /** + * Build a Smithy model string from model fields. + * @param formType The form type containing the model fields. + * @returns The Smithy model string. + */ + static buildModelString(formType: DataZoneFormType): string|undefined { + + if (formType.model !== undefined) { + const fieldStrings = formType.model.map(field => { + const requiredIndicator = field.required ? '@required' : ''; + // Check if field.type is a valid Smithy type + if (DataZoneHelpers.isValidSmithyType(field.type)) { + const uppercasedType = field.type.charAt(0).toUpperCase() + field.type.toLowerCase().slice(1); + return `${requiredIndicator}\n${field.name}: ${uppercasedType}`; + } else { + throw new Error(`Invalid field type: ${field.type}`); + } + }); + + return ` + structure ${formType.name} { + ${fieldStrings.join('\n')} + } + `; + } else { + return undefined; + } + } + + /** + * Creates a DataZone subscription target for a custom asset type. + * Subscription targets are used to automatically add asset to environments when a custom asset is subscribed by a project. + * @param scope The scope of the construct. + * @param id The id of the construct. + * @param customAssetType The custom asset type that can be added to the environment. + * @param name The name of the subscription target. + * @param provider The provider of the subscription target. @example dsf. + * @param environmentId The DataZone environment identifier. + * @param authorizedPrincipals The authorized principals to be granted when assets are subscribed. + * @param manageAccessRole The IAM role creating the subscription target. + * @returns The DataZone subscription target. + */ + static createSubscriptionTarget( + scope: Construct, + id: string, + customAssetType: CustomAssetType, + name: string, + provider: string, + environmentId: string, + authorizedPrincipals: IRole[], + manageAccessRole: IRole) { + + return new CfnSubscriptionTarget( + scope, + `${id}SubscriptionTarget`, + { + applicableAssetTypes: [customAssetType.name], + authorizedPrincipals: authorizedPrincipals.map( r => r.roleArn), + domainIdentifier: customAssetType.domainIdentifier, + environmentIdentifier: environmentId, + manageAccessRole: manageAccessRole.roleArn, + name, + provider, + subscriptionTargetConfig: [], + type: 'BaseSubscriptionTargetType', + }, + ); + } + + // The list of valid types for specifying smithy models in typescript + private static readonly VALID_SMITHY_TYPES = [ + 'string', 'boolean', 'byte', 'short', 'integer', 'long', 'float', 'double', + 'bigInteger', 'bigDecimal', 'blob', 'document', 'timestamp', 'enum', 'intEnum', + ]; + + /** + * Check if a type is a valid smithy type. + * @param type The smithy type to check. + * @returns True if the type is a valid smithy type, false otherwise. + */ + private static isValidSmithyType(type: string): boolean { + return DataZoneHelpers.VALID_SMITHY_TYPES.includes(type.toLowerCase()); + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-asset-type-props.ts b/framework/src/governance/lib/datazone/datazone-msk-asset-type-props.ts new file mode 100644 index 000000000..a759d27f2 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-asset-type-props.ts @@ -0,0 +1,30 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { RemovalPolicy } from 'aws-cdk-lib'; +import { DataZoneCustomAssetTypeFactory } from './datazone-custom-asset-type-factory'; + +/** + * The properties for the DataZoneMskAssetType construct + */ +export interface DataZoneMskAssetTypeProps { + /** + * The DataZone domain identifier + */ + readonly domainId: string; + /** + * The project identifier owner of the custom asset type + * @default - A new project called MskGovernance is created + */ + readonly projectId?: string; + /** + * The factory to create the custom asset type + * @default - A new factory is created + */ + readonly dzCustomAssetTypeFactory?: DataZoneCustomAssetTypeFactory; + /** + * The removal policy to apply to the asset type + * @default - RemovalPolicy.RETAIN + */ + readonly removalPolicy?: RemovalPolicy; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-asset-type.ts b/framework/src/governance/lib/datazone/datazone-msk-asset-type.ts new file mode 100644 index 000000000..11167e39b --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-asset-type.ts @@ -0,0 +1,111 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { RemovalPolicy } from 'aws-cdk-lib'; +import { CfnProject } from 'aws-cdk-lib/aws-datazone'; +import { Construct } from 'constructs'; +import { CustomAssetType, DataZoneCustomAssetTypeFactory } from './datazone-custom-asset-type-factory'; +import { DataZoneMskAssetTypeProps } from './datazone-msk-asset-type-props'; +import { Context, TrackedConstruct, TrackedConstructProps } from '../../../utils'; + + +/** + * A DataZone custom asset type representing an MSK topic. + * + * @example + * new dsf.governance.DataZoneMskAssetType(this, 'MskAssetType', { + * domainId: 'aba_dc999t9ime9sss', + * projectId: '999999b3m5cpz', + * }); + */ +export class DataZoneMskAssetType extends TrackedConstruct { + /** + * The custom asset type for MSK + */ + readonly mskCustomAssetType: CustomAssetType; + /** + * The project owning the MSK asset type + */ + readonly owningProject?: CfnProject; + + + private readonly removalPolicy: RemovalPolicy; + + /** + * Construct an instance of the DataZoneMskAssetType + * @param scope the Scope of the CDK Construct + * @param id the ID of the CDK Construct + * @param props The DataZoneMskAssetTypeProps properties + */ + constructor(scope: Construct, id: string, props: DataZoneMskAssetTypeProps) { + const trackedConstructProps: TrackedConstructProps = { + trackingTag: DataZoneMskAssetType.name, + }; + + super(scope, id, trackedConstructProps); + this.removalPolicy = Context.revertRemovalPolicy(this, props.removalPolicy); + + const dzCustomAssetTypeFactory: DataZoneCustomAssetTypeFactory = props.dzCustomAssetTypeFactory || new DataZoneCustomAssetTypeFactory(this, 'DZCustomAssetTypeHandler', { + domainId: props.domainId, + removalPolicy: this.removalPolicy, + }); + + if (props.projectId === undefined) { + this.owningProject = new CfnProject(this, 'MskAssetTypeProjectOwner', { + name: 'MskGovernance', + domainIdentifier: props.domainId, + }); + } + + this.mskCustomAssetType = dzCustomAssetTypeFactory.createCustomAssetType('MskCustomAssetType', { + assetTypeName: 'MskTopicAssetType', + assetTypeDescription: 'Custom asset type to support MSK topic asset', + projectId: props.projectId || this.owningProject!.attrId, + formTypes: [ + { + name: 'amazon.datazone.RelationalTableFormType', + required: true, + }, + { + name: 'MskSourceReferenceFormType', + model: [ + { + name: 'cluster_arn', + type: 'String', + required: true, + }, + { + name: 'cluster_type', + type: 'String', + required: true, + }, + ], + required: true, + }, + { + name: 'KafkaSchemaFormType', + model: [ + { + name: 'kafka_topic', + type: 'String', + required: true, + }, + { + name: 'schema_version', + type: 'Integer', + }, + { + name: 'schema_arn', + type: 'String', + }, + { + name: 'registry_arn', + type: 'String', + }, + ], + required: true, + }, + ], + }); + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-central-authorizer-props.ts b/framework/src/governance/lib/datazone/datazone-msk-central-authorizer-props.ts new file mode 100644 index 000000000..d82066427 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-central-authorizer-props.ts @@ -0,0 +1,20 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { RemovalPolicy } from 'aws-cdk-lib'; + + +/** + * The properties for the DataZoneMskCentralAuthorizer construct + */ +export interface DataZoneMskCentralAuthorizerProps { + /** + * The DataZone Domain ID + */ + readonly domainId: string; + /** + * The removal policy to apply to the asset type + * @default - RemovalPolicy.RETAIN + */ + readonly removalPolicy?: RemovalPolicy; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-central-authorizer.ts b/framework/src/governance/lib/datazone/datazone-msk-central-authorizer.ts new file mode 100644 index 000000000..74b38ef61 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-central-authorizer.ts @@ -0,0 +1,207 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { CfnEventBusPolicy, IRule } from 'aws-cdk-lib/aws-events'; +import { Effect, IRole, ManagedPolicy, PolicyDocument, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam'; +import { Code, Function, IFunction, Runtime } from 'aws-cdk-lib/aws-lambda'; +import { IQueue } from 'aws-cdk-lib/aws-sqs'; +import { StateMachine } from 'aws-cdk-lib/aws-stepfunctions'; +import { Construct } from 'constructs'; +import { DataZoneMskCentralAuthorizerProps } from './datazone-msk-central-authorizer-props'; +import { Context, TrackedConstruct, TrackedConstructProps } from '../../../utils'; +import { authorizerCentralWorkflowSetup, registerAccount } from '../custom-authorizer-central-helpers'; + + +/** + * A central authorizer workflow for granting read access to Kafka topics. + * The workflow is triggered by an event sent to the DataZone event bus. + * First, it collects metadata from DataZone about the Kafka topics. + * Then, it grants access to the relevant IAM roles. + * Finally acknowledge the subscription grant in DataZone. + * + * @example + * new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + * domainId: 'aba_dc999t9ime9sss', + * }); + */ +export class DataZoneMskCentralAuthorizer extends TrackedConstruct { + /** + * The name of the authorizer + */ + public static readonly AUTHORIZER_NAME = 'dsf.MskTopicAuthorizer'; + /** + * The asset type for the DataZone custom asset type + */ + public static readonly MSK_ASSET_TYPE = 'MskTopicAssetType'; + /** + * The role used to collect metadata from DataZone + */ + public readonly metadataCollectorRole: IRole; + /** + * The Lambda function used to collect metadata from DataZone + */ + public readonly metadataCollectorFunction: IFunction; + /** + * The role used to acknowledge the subscription grant in DataZone + */ + public readonly datazoneCallbackRole: IRole; + /** + * The Lambda function used to acknowledge the subscription grant in DataZone + */ + public readonly datazoneCallbackFunction: IFunction; + /** + * The dead letter queue for the authorizer workflow + */ + public readonly deadLetterQueue: IQueue; + /** + * The role used by the DataZone event to trigger the authorizer workflow + */ + public readonly datazoneEventRole : IRole; + /** + * The event rule used to trigger the authorizer workflow + */ + public readonly datazoneEventRule: IRule; + /** + * The state machine used to orchestrate the authorizer workflow + */ + public readonly stateMachine: StateMachine; + /** + * The event rule used to listen for producer and subscriber grants callback + */ + public readonly callbackEventRule: IRule; + /** + * The Lambda function used to handle producer and subscriber grants callback + */ + public readonly callbackFunction: IFunction; + /** + * The role used by the Lambda function handling producer and subscriber grants callback + */ + public readonly callbackRole: IRole; + + private readonly removalPolicy: RemovalPolicy; + + /** + * Construct an instance of the DataZoneMskCentralAuthorizer + * @param scope the Scope of the CDK Construct + * @param id the ID of the CDK Construct + * @param props The DataZoneMskCentralAuthorizer properties + */ + constructor(scope: Construct, id: string, props: DataZoneMskCentralAuthorizerProps) { + const trackedConstructProps: TrackedConstructProps = { + trackingTag: DataZoneMskCentralAuthorizer.name, + }; + + super(scope, id, trackedConstructProps); + + const stack = Stack.of(this); + + this.removalPolicy = Context.revertRemovalPolicy(this, props.removalPolicy); + + this.metadataCollectorRole = new Role(this, 'MetadataCollectorHandlerRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + inlinePolicies: { + DataZonePermissions: new PolicyDocument({ + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'datazone:GetListing', + 'datazone:GetEnvironment', + 'datazone:GetSubscriptionTarget', + 'datazone:UpdateSubscriptionGrantStatus', + ], + resources: [`arn:${stack.partition}:datazone:${stack.region}:${stack.account}:domain/${props.domainId}`], + }), + ], + }), + }, + }); + + this.metadataCollectorFunction = new Function(this, 'MetadataCollectorHandler', { + runtime: Runtime.NODEJS_20_X, + handler: 'index.handler', + code: Code.fromAsset(__dirname + '/resources/datazone-msk-authorizer-metadata-collector/'), + role: this.metadataCollectorRole, + timeout: Duration.seconds(30), + }); + + this.datazoneCallbackRole = new Role(this, 'CallbackHandlerRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + inlinePolicies: { + DataZonePermissions: new PolicyDocument({ + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'datazone:UpdateSubscriptionGrantStatus', + ], + resources: [`arn:${stack.partition}:datazone:${stack.region}:${stack.account}:domain/${props.domainId}`], + }), + ], + }), + }, + }); + + this.datazoneCallbackFunction = new Function(this, 'CallbackHandler', { + runtime: Runtime.NODEJS_20_X, + handler: 'index.handler', + code: Code.fromAsset(__dirname+'/resources/datazone-msk-authorizer-callback/'), + role: this.datazoneCallbackRole, + timeout: Duration.seconds(30), + }); + + const datazonePattern = { + 'source': ['aws.datazone'], + 'detail-type': [ + 'Subscription Grant Requested', + 'Subscription Grant Revoke Requested', + ], + 'detail': { + metadata: { + domain: [props.domainId], + }, + data: { + asset: { + typeName: [DataZoneMskCentralAuthorizer.MSK_ASSET_TYPE], + }, + }, + }, + }; + + const customAuthorizer = authorizerCentralWorkflowSetup(this, + DataZoneMskCentralAuthorizer.AUTHORIZER_NAME, + this.metadataCollectorFunction, + this.datazoneCallbackFunction, + datazonePattern, + Duration.minutes(5), + 0, + this.removalPolicy, + ); + + this.deadLetterQueue = customAuthorizer.deadLetterQueue; + this.datazoneEventRole = customAuthorizer.authorizerEventRole; + this.datazoneEventRule = customAuthorizer.authorizerEventRule; + this.stateMachine = customAuthorizer.stateMachine; + this.callbackEventRule = customAuthorizer.callbackEventRule; + this.callbackFunction = customAuthorizer.callbackFunction; + this.callbackRole = customAuthorizer.callbackRole; + } + + + /** + * Connect the central authorizer workflow with environment authorizer workflows in other accounts. + * This method grants the environment workflow to send events in the default Event Bridge bus for orchestration. + * @param accountId The account ID to register the authorizer with + * @returns The CfnEventBusPolicy created to grant the account + */ + public registerAccount(accountId: string): CfnEventBusPolicy { + return registerAccount(this, DataZoneMskCentralAuthorizer.AUTHORIZER_NAME, accountId, this.stateMachine.role); + }; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer-props.ts b/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer-props.ts new file mode 100644 index 000000000..75cec4698 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer-props.ts @@ -0,0 +1,26 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { RemovalPolicy } from 'aws-cdk-lib'; + + +export interface DataZoneMskEnvironmentAuthorizerProps { + /** + * The DataZone Domain ID + */ + readonly domainId: string; + /** + * The central account Id + */ + readonly centralAccountId?: string; + /** + * If the authorizer is granting MSK managed VPC permissions + * @default - false + */ + readonly grantMskManagedVpc?: boolean; + /** + * The removal policy to apply to the asset type + * @default - RemovalPolicy.RETAIN + */ + readonly removalPolicy?: RemovalPolicy; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer.ts b/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer.ts new file mode 100644 index 000000000..8202572e3 --- /dev/null +++ b/framework/src/governance/lib/datazone/datazone-msk-environment-authorizer.ts @@ -0,0 +1,130 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { Duration, RemovalPolicy } from 'aws-cdk-lib'; +import { CfnEventBusPolicy, IRule } from 'aws-cdk-lib/aws-events'; +import { IRole, Role, ServicePrincipal, ManagedPolicy, PolicyDocument, PolicyStatement, Effect } from 'aws-cdk-lib/aws-iam'; +import { IFunction, Function, Runtime, Code } from 'aws-cdk-lib/aws-lambda'; +import { IStateMachine } from 'aws-cdk-lib/aws-stepfunctions'; +import { Construct } from 'constructs'; +import { DataZoneMskCentralAuthorizer } from './datazone-msk-central-authorizer'; +import { DataZoneMskEnvironmentAuthorizerProps } from './datazone-msk-environment-authorizer-props'; +import { Context, TrackedConstruct, TrackedConstructProps } from '../../../utils'; +import { authorizerEnvironmentWorkflowSetup } from '../custom-authorizer-environment-helpers'; + +/** + * An environment authorizer workflow for granting read access to Kafka topics. + * The workflow is triggered by an event sent by the central authorizer construct. + * It creates IAM policies required for the Kafka client to access the relevant topics. + * It supports MSK provisioned and serverless, in single and cross accounts, and grant/revoke requests. + * + * @example + * new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + * domainId: 'aba_dc999t9ime9sss', + * }); + */ +export class DataZoneMskEnvironmentAuthorizer extends TrackedConstruct { + + /** + * The IAM role used to grant access to Kafka topics + */ + public readonly grantRole: IRole; + /** + * The lambda function used to grant access to Kafka topics + */ + public readonly grantFunction: IFunction; + /** + * The event bus policy used to receive events from the central authorizer + */ + public readonly eventBusPolicy?: CfnEventBusPolicy; + /** + * The dead letter queue for the events + */ + public readonly deadLetterQueue: any; + /** + * The role used by the events to trigger the authorizer workflow + */ + public readonly eventRole: IRole; + /** + * The event rule used to trigger the authorizer workflow + */ + public readonly eventRule: IRule; + /** + * The state machine used to orchestrate the authorizer workflow + */ + public readonly stateMachine: IStateMachine; + + private readonly removalPolicy: RemovalPolicy; + + /** + * Create an instance of the DataZoneMskEnvironmentAuthorizer construct + * @param scope The CDK Construct scope + * @param id The CDK Construct id + * @param props The props for the DataZoneMskEnvironmentAuthorizer construct + */ + constructor(scope: Construct, id: string, props: DataZoneMskEnvironmentAuthorizerProps) { + const trackedConstructProps: TrackedConstructProps = { + trackingTag: DataZoneMskEnvironmentAuthorizer.name, + }; + + super(scope, id, trackedConstructProps); + + this.removalPolicy = Context.revertRemovalPolicy(this, props.removalPolicy); + + this.grantRole = new Role(this, 'GrantRole', { + assumedBy: new ServicePrincipal('lambda.amazonaws.com'), + managedPolicies: [ + ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'), + ], + inlinePolicies: { + IamPermissions: new PolicyDocument({ + statements: [ + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'iam:PutRolePolicy', + 'iam:DeleteRolePolicy', + ], + resources: ['*'], + }), + new PolicyStatement({ + effect: Effect.ALLOW, + actions: [ + 'kafka:GetClusterPolicy', + 'kafka:PutClusterPolicy', + ], + resources: ['*'], + }), + ], + }), + }, + }); + + this.grantFunction = new Function(this, 'GrantFunction', { + runtime: Runtime.NODEJS_20_X, + handler: 'index.handler', + code: Code.fromAsset(__dirname + '/resources/datazone-msk-authorizer-grant/'), + role: this.grantRole, + timeout: Duration.seconds(60), + environment: { + GRANT_VPC: props.grantMskManagedVpc ? 'true' : 'false', + }, + }); + + const customAuthorizer = authorizerEnvironmentWorkflowSetup(this, + DataZoneMskCentralAuthorizer.AUTHORIZER_NAME, + this.grantFunction, + props.centralAccountId, + Duration.minutes(2), + 0, + this.removalPolicy, + ); + + this.eventBusPolicy = customAuthorizer.eventBusPolicy; + this.deadLetterQueue = customAuthorizer.deadLetterQueue; + this.eventRole = customAuthorizer.eventRole; + this.eventRule = customAuthorizer.eventRule; + this.stateMachine = customAuthorizer.stateMachine; + + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/index.ts b/framework/src/governance/lib/datazone/index.ts new file mode 100644 index 000000000..ec336c804 --- /dev/null +++ b/framework/src/governance/lib/datazone/index.ts @@ -0,0 +1,14 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +export * from './datazone-msk-environment-authorizer'; +export * from './datazone-msk-central-authorizer'; +export * from './datazone-msk-environment-authorizer-props'; +export * from './datazone-msk-central-authorizer-props'; +export * from './datazone-msk-asset-type'; +export * from './datazone-msk-asset-type-props'; +export * from './datazone-custom-asset-type-factory'; +export * from './datazone-custom-asset-type-props'; +export * from './datazone-gsr-msk-datasource-props'; +export * from './datazone-gsr-msk-datasource'; +export * from './datazone-helpers'; diff --git a/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/index.mjs b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/index.mjs new file mode 100644 index 000000000..2239e13c0 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/index.mjs @@ -0,0 +1,121 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { DataZoneClient, GetFormTypeCommand, CreateFormTypeCommand, CreateAssetTypeCommand, DeleteAssetTypeCommand, DeleteFormTypeCommand, ResourceNotFoundException } from "@aws-sdk/client-datazone"; + +const client = new DataZoneClient() + +export const handler = async(event) => { + + console.log(`event received: ${JSON.stringify({ event }, null, 2)}`) + + const properties = event["ResourceProperties"] + const domainId = properties["domainId"] + const projectId = properties["projectId"] + const formTypes = properties["formTypes"] + const assetTypeName = properties["assetTypeName"]; + + if (["Create", "Update"].includes(event["RequestType"])) { + const formsInput = {} + + // iterate on creating or updating form types required by the custom asset type + for (let formType of formTypes) { + + let crFormTypeResp; + // if the form type has a model, that means we are creating a custom form type + if(formType.model !== undefined) { + crFormTypeResp = await client.send(new CreateFormTypeCommand({ + domainIdentifier: domainId, + name: formType.name, + model: { + smithy: formType.model + }, + owningProjectIdentifier: projectId, + status: "ENABLED" + })) + + console.log(`${formType.name} form type created`) + + } else { + // if there is no model attached, that means we are reusing an existing form type + // and need to get the latest revision + crFormTypeResp = await client.send(new GetFormTypeCommand({ + domainIdentifier: domainId, + formTypeIdentifier: formType.name, + })) + console.log(`${formType.name} form type already exists... reusing it`) + } + + const {revision} = crFormTypeResp + // extract the last part of the formType name + // mandatory for native formTypes like amazon.datazone.GlueTableFormType + formsInput[formType.name.split('.').slice(-1)[0]]= { + typeIdentifier: formType.name, + typeRevision: revision, + required: formType.required + } + } + + const crAssetTypeResp = await client.send(new CreateAssetTypeCommand({ + domainIdentifier: domainId, + name: assetTypeName, + description: properties["assetTypeDescription"], + formsInput, + owningProjectIdentifier: projectId + })) + + console.log(`${properties["assetTypeName"]} asset type created`) + + return { + "Data": crAssetTypeResp + } + } else if (event["RequestType"] === "Delete") { + + try { + await client.send(new DeleteAssetTypeCommand({ + domainIdentifier: domainId, + identifier: assetTypeName + })) + } catch (e) { + if (e instanceof ResourceNotFoundException){ + console.log(`${assetTypeName} asset type doesn't exist`) + } else throw new Error(`${assetTypeName} failed to delete: ${JSON.stringify(error)}`); + } + + console.log(`${assetTypeName} asset type deleted`) + + // cleanup the form types created with the custom asset type + for (let formType of formTypes) { + // We only delete form types having models, the others are shared across multiple asset types + if(formType.model !== undefined) { + + try { + // disable the form type first + await client.send(new CreateFormTypeCommand({ + domainIdentifier: domainId, + name: formType.name, + model: { + smithy: formType.model + }, + owningProjectIdentifier: projectId, + status: "DISABLED" + })) + + console.log(`${formType.name} form type disabled`) + + // then delete the form type + await client.send(new DeleteFormTypeCommand({ + domainIdentifier: domainId, + formTypeIdentifier: formType.name + })) + + console.log(`${formType.name} form type deleted`) + } catch (e) { + if (e instanceof ResourceNotFoundException){ + console.log(`${formType.name} form type doesn't exist`) + } else throw new Error(`${formType.name} failed to delete: ${JSON.stringify(error)}`); + } + } + } + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package-lock.json b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package-lock.json new file mode 100644 index 000000000..d84da98bb --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "datazone-custom-asset-type", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "datazone-custom-asset-type", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package.json b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package.json new file mode 100644 index 000000000..1956b3f83 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-custom-asset-type/package.json @@ -0,0 +1,4 @@ +{ + "name": "datazone-custom-asset-type", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/index.mjs b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/index.mjs new file mode 100644 index 000000000..4569d5137 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/index.mjs @@ -0,0 +1,280 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { DataZoneClient, GetAssetCommand, CreateAssetCommand, CreateAssetRevisionCommand, DeleteAssetCommand } from "@aws-sdk/client-datazone"; +import { GlueClient, ListSchemasCommand, GetSchemaVersionCommand } from "@aws-sdk/client-glue"; +import { KafkaClient, ListClustersV2Command, DescribeClusterV2Command } from "@aws-sdk/client-kafka"; +import { SSMClient, GetParametersByPathCommand, DeleteParameterCommand, PutParameterCommand } from "@aws-sdk/client-ssm"; + + +// Initialize AWS SDK clients +const ssmClient = new SSMClient(); +const dataZoneClient = new DataZoneClient(); +const glueClient = new GlueClient(); +const kafkaClient = new KafkaClient(); + +export const handler = async () => { + const clusterName = process.env.CLUSTER_NAME; + const region = process.env.REGION; + const registryName = process.env.REGISTRY_NAME; + const domainId = process.env.DOMAIN_ID; + const accountId = process.env.ACCOUNT_ID; + const projectId = process.env.PROJECT_ID; + const partition = process.env.PARTITION; + + if (!clusterName || !region || !registryName || !domainId || !accountId || !projectId) { + throw new Error('Missing required environment variables.'); + } + + const registryArn = `arn:${partition}:glue:${region}:${accountId}:registry/${registryName}`; + + + let clusterArn; + let clusterType; + + try { + // Step 1: Retrieve existing parameters + const existingParametersResponse = await ssmClient.send(new GetParametersByPathCommand({ + Path: `/datazone/${domainId}/${registryName}/asset/`, + Recursive: true, + WithDecryption: false + })); + const existingParameters = existingParametersResponse.Parameters || []; + const assetMap = new Map(); // Map to hold assetName and assetId + + for (const param of existingParameters) { + const assetName = param.Name.split('/').pop(); + if (assetName && param.Value) { + assetMap.set(assetName, param.Value); + } + } + console.log(assetMap); + + // Step 2: List all Kafka clusters and find the ARN for the specified cluster + try { + const listClustersCommand = new ListClustersV2Command({}); + const listClustersResponse = await kafkaClient.send(listClustersCommand); + const cluster = listClustersResponse.ClusterInfoList.find(c => c.ClusterName === clusterName); + + if (!cluster) { + throw new Error(`Cluster with name "${clusterName}" not found.`); + } + + clusterArn = cluster.ClusterArn; + console.log(`Cluster ARN for ${clusterName} found: ${clusterArn}`); + + // Describe the Kafka cluster to determine its type + const describeClusterCommand = new DescribeClusterV2Command({ ClusterArn: clusterArn }); + const describeClusterResponse = await kafkaClient.send(describeClusterCommand); + clusterType = describeClusterResponse.ClusterInfo?.ClusterType; + + if (!clusterType) { + throw new Error(`Failed to determine the cluster type for cluster: ${clusterName}`); + } + + console.log(`Cluster type for ${clusterName} is ${clusterType}`); + + } catch (err) { + console.error('Error handling Kafka cluster:', err); + throw new Error('Failed to handle Kafka cluster.'); + } + + // Step 3: List all schemas in the registry + const listSchemasCommand = new ListSchemasCommand({ + RegistryId: { RegistryName: registryName } + }); + const schemaListResponse = await glueClient.send(listSchemasCommand); + const schemas = schemaListResponse.Schemas || []; + + console.log(`Found ${schemas.length} schemas in the registry.`); + + // Step 4: Process each schema + for (const schema of schemas) { + const schemaArn = schema.SchemaArn; + const schemaName = schema.SchemaName; + const parameterName = `/datazone/${domainId}/${registryName}/asset/${schemaName}`; + let schemaDefinition = ''; + let versionNumber = 1; + + // Retrieve schema definition + try { + const getSchemaVersionCommand = new GetSchemaVersionCommand({ + SchemaId: { SchemaArn: schemaArn }, + SchemaVersionNumber: { LatestVersion: true } + }); + const schemaVersionResponse = await glueClient.send(getSchemaVersionCommand); + schemaDefinition = schemaVersionResponse.SchemaDefinition; + versionNumber = schemaVersionResponse.VersionNumber; + console.log('Retrieved schema definition.'); + } catch (err) { + console.error('Error retrieving schema definition:', err); + continue; // Skip to the next schema if there is an issue + } + + // Build the source identifier + const sourceIdentifier = `kafka://${clusterName}/${schemaName}`; + const formsInput = [ + { + formName: 'MskSourceReferenceFormType', + typeIdentifier: 'MskSourceReferenceFormType', + content: JSON.stringify({ + cluster_arn: clusterArn, + cluster_type: clusterType // Ensure clusterType is correctly included + }), + }, + { + formName: 'AssetCommonDetailsForm', + typeIdentifier: 'default', + content: JSON.stringify({ + sourceIdentifier: sourceIdentifier, + schemaDefinition: schemaDefinition || '' + }), + }, + { + formName: 'KafkaSchemaFormType', + typeIdentifier: 'KafkaSchemaFormType', + content: JSON.stringify({ + kafka_topic: schemaName, + schema_version: versionNumber, + schema_arn: schemaArn, + registry_arn: registryArn, + }), + }, + { + formName: 'RelationalTableFormType', + typeIdentifier: 'amazon.datazone.RelationalTableFormType', + content: JSON.stringify({ + tableName: schemaName, + columns: parseSchemaDefinition(schemaDefinition), + }), + } + ]; + + console.log(formsInput); + + // Check if the asset already exists + const assetId = assetMap.get(schemaName); + if (assetId) { + console.log(`Asset ${schemaName} already exists. Creating or updating revision.`); + + try { + // Check if asset exists in DataZone + await dataZoneClient.send(new GetAssetCommand({ + domainIdentifier: domainId, + identifier: assetId + })); + + // Create Asset Revision + await dataZoneClient.send(new CreateAssetRevisionCommand({ + name: schemaName, + domainIdentifier: domainId, + identifier: assetId, + description: 'Updating asset with new schema or forms', + formsInput, + externalIdentifier: buildMskTopicArn(region, accountId, clusterName, schemaName, partition), + })); + + console.log(`Asset revision for ${schemaName} updated.`); + } catch (error) { + if (error.$metadata?.httpStatusCode === 404) { + // If asset does not exist, it may have been deleted, so remove its parameter + console.log(`Asset ${schemaName} not found in DataZone, removing parameter.`); + await ssmClient.send(new DeleteParameterCommand({ Name: parameterName })); + assetMap.delete(schemaName); + } else { + console.error('Error creating asset revision:', error); + } + } + } else { + // Create new asset and store its ID + console.log(`Creating new asset ${schemaName}.`); + try { + const createResponse = await dataZoneClient.send(new CreateAssetCommand({ + domainIdentifier: domainId, + owningProjectIdentifier: projectId, + name: schemaName, + typeIdentifier: 'MskTopicAssetType', + formsInput, + externalIdentifier: buildMskTopicArn(region, accountId, clusterName, schemaName, partition), + })); + + const newAssetId = createResponse.id; + + // Store the new asset ID in SSM Parameter Store + const putParameterCommand = new PutParameterCommand({ + Name: parameterName, + Value: newAssetId, + Type: 'String', + Overwrite: true + }); + + await ssmClient.send(putParameterCommand); + console.log(`Stored asset ID ${newAssetId} in SSM Parameter Store under ${parameterName}`); + } catch (error) { + console.error('Error creating MSK asset:', error); + } + } + } + + // Step 5: Clean up old assets if necessary + for (const [assetName, assetId] of assetMap) { + if (!schemas.some(schema => schema.SchemaName === assetName)) { + // If the schema is not in the current list, delete the asset and its parameter + console.log(`Deleting stale asset ${assetName}.`); + try { + await dataZoneClient.send(new DeleteAssetCommand({ + domainIdentifier: domainId, + identifier: assetId + })); + + await ssmClient.send(new DeleteParameterCommand({ Name: `/datazone/${domainId}/${registryName}/asset/${assetName}` })); + console.log(`Deleted asset ${assetName} and its parameter.`); + } catch (error) { + console.error('Error deleting asset or parameter:', error); + } + } + } + + } catch (error) { + console.error('Error processing request:', error); + return { + statusCode: 500, + body: JSON.stringify({ + message: 'Failed to process request', + error: error.message + }), + }; + } +}; + +// Utility functions +function buildMskTopicArn(region, accountId, clusterName, topicName, partition) { + return `arn:${partition}:kafka:${region}:${accountId}:topic/${clusterName}/${topicName}`; +} + +function parseSchemaDefinition(schemaDefinition) { + try { + const schemaJson = JSON.parse(schemaDefinition); + const columns = []; + + if (schemaJson.fields) { + for (const field of schemaJson.fields) { + let columnType = field.type; + + if (typeof columnType === 'object' && columnType.type) { + columnType = columnType.type; + } + + columns.push({ + columnName: field.name, + dataType: columnType + }); + } + } + + return columns; + } catch (err) { + console.error('Error parsing schema definition:', err); + return []; + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package-lock.json b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package-lock.json new file mode 100644 index 000000000..b12d4047d --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "datazone-gsr-msk-datasource", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "datazone-gsr-msk-datasource", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package.json b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package.json new file mode 100644 index 000000000..b3bb9a6a0 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-gsr-msk-datasource/package.json @@ -0,0 +1,4 @@ +{ + "name": "datazone-gsr-msk-datasource", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/index.mjs b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/index.mjs new file mode 100644 index 000000000..7507b3d21 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/index.mjs @@ -0,0 +1,49 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { DataZoneClient, UpdateSubscriptionGrantStatusCommand, SubscriptionGrantStatus } from "@aws-sdk/client-datazone"; + + +const client = new DataZoneClient() + +export const handler = async(event) => { + + console.log(`event received: ${JSON.stringify({ event }, null, 2)}`); + + const status = event.Status; + const requestType = event.Metadata.RequestType; + + if (status === 'failure') { + + const results = await client.send(new UpdateSubscriptionGrantStatusCommand({ + domainIdentifier: event.Metadata.DomainId, + identifier: event.Metadata.SubscriptionGrantId, + assetIdentifier: event.Metadata.AssetId, + status: requestType === 'GRANT' ? SubscriptionGrantStatus.GRANT_FAILED : SubscriptionGrantStatus.REVOKE_FAILED, + failureCause: { + message: event.Cause + } + })) + console.log(`failure callback results: ${JSON.stringify({ results }, null, 2)}`); + + return {} + + } else if (status === 'success') { + + const results = await client.send(new UpdateSubscriptionGrantStatusCommand({ + domainIdentifier: event.Metadata.DomainId, + identifier: event.Metadata.SubscriptionGrantId, + assetIdentifier: event.Metadata.AssetId, + status: requestType === 'GRANT' ? SubscriptionGrantStatus.GRANTED : SubscriptionGrantStatus.REVOKED, + })) + + console.log(`success callback results: ${JSON.stringify({ results }, null, 2)}`); + + return {} + + } else { + + throw new Error('Invalid status') + + } +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package-lock.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package-lock.json new file mode 100644 index 000000000..96d19f562 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "datazone-msk-authorizer-callback", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "datazone-msk-authorizer-callback", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package.json new file mode 100644 index 000000000..008655b26 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-callback/package.json @@ -0,0 +1,4 @@ +{ + "name": "datazone-msk-authorizer-callback", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/index.mjs b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/index.mjs new file mode 100644 index 000000000..60ca6c66e --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/index.mjs @@ -0,0 +1,259 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { KafkaClient, PutClusterPolicyCommand, GetClusterPolicyCommand, BadRequestException } from "@aws-sdk/client-kafka" +import { IAMClient, PutRolePolicyCommand, DeleteRolePolicyCommand, NoSuchEntityException } from "@aws-sdk/client-iam"; + + +// Retry mechanism with exponential backoff configuration +const MAX_RETRIES = 20; // Maximum number of retries +const INITIAL_DELAY_MS = 100; // Initial delay in milliseconds +const MAX_DELAY_MS = 30000; // Maximum delay in milliseconds + +function getMskIamResources(topicArn, clusterArn) { + return [ + topicArn, + clusterArn, + getGroupArn(clusterArn), + ] +} + +function getGroupArn(clusterArn) { + const assetArnParts = clusterArn.split(":"); + const partition = assetArnParts[1]; + const account = assetArnParts[4]; + const region = assetArnParts[3]; + const clusterParts = assetArnParts[5].split('/'); + const cluster = `${clusterParts[1]}/${clusterParts[2]}`; + + return `arn:${partition}:kafka:${region}:${account}:group/${cluster}/*` +} + +function calculateExponentialBackoff(retryCount, initialDelay, maxDelay) { + const delay = initialDelay * Math.pow(2, retryCount); + return Math.min(delay, maxDelay); +} + +function delay(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +async function updateClusterPolicyWithRetry(client, grantStatement, requestType, clusterArn, retryCount = 0) { + try { + + const result = await client.send( + new GetClusterPolicyCommand({ + ClusterArn: clusterArn, + }) + ) + console.log(`Current policy: ${result.Policy}`); + let policy = JSON.parse(result.Policy); + + if (requestType === 'GRANT') { + // Merge policies + policy.Statement.push(grantStatement); + } else if (requestType === 'REVOKE') { + // Substract policy + policy.Statement = policy.Statement.filter(statement => JSON.stringify(statement) !== JSON.stringify(grantStatement)); + } else { + throw new Error(`Invalid request type: ${requestType}`); + } + console.log(`New policy: ${JSON.stringify({ policy }, null, 2)}`); + + // push the new policy with MVCC + const putResult = await client.send( + new PutClusterPolicyCommand({ + ClusterArn: clusterArn, + Policy: JSON.stringify(policy), + CurrentVersion: result.CurrentVersion, + }) + ); + console.log(`Policy updated: ${JSON.stringify({putResult}, null, 2)}`); + + } catch (error) { + // MVCC retry mechanism + if (error instanceof BadRequestException && error.message.includes("The version of the cluster policy isn't current")) { + + if (retryCount < MAX_RETRIES) { + + const delayMs = calculateExponentialBackoff(retryCount, INITIAL_DELAY_MS, MAX_DELAY_MS); + console.log(`Retrying in ${delayMs} ms...`); + await delay(delayMs); + await updateClusterPolicyWithRetry(client, kafkaClusterPolicy, requestType, clusterArn, retryCount + 1); + + } else { + throw new Error("Error updating MSK cluster policy: concurrent modifications failure and maximum retries exceeded."); + } + } else if (error instanceof BadRequestException && error.message.includes("The Statement Ids in the policy are not unique")) { + console.log("Cluster policy already exists, skipping..."); + } else { + throw error; + } + } +} + +const mskReadActions = [ + 'kafka-cluster:Connect', + 'kafka-cluster:DescribeTopic', + 'kafka-cluster:DescribeGroup', + 'kafka-cluster:AlterGroup', + 'kafka-cluster:ReadData' +]; + +const mskVpcConsumerActions = [ + "kafka:CreateVpcConnection", + "ec2:CreateTags", + "ec2:CreateVPCEndpoint" +]; + +const mskVpcClusterActions = [ + "kafka:CreateVpcConnection", + "kafka:GetBootstrapBrokers", + "kafka:DescribeCluster", + "kafka:DescribeClusterV2" +]; + +const gsrReadActions = [ + "glue:GetRegistry", + "glue:ListRegistries", + "glue:GetSchema", + "glue:ListSchemas", + "glue:GetSchemaByDefinition", + "glue:GetSchemaVersion", + "glue:ListSchemaVersions", + "glue:GetSchemaVersionsDiff", + "glue:CheckSchemaVersionValidity", + "glue:QuerySchemaVersionMetadata", + "glue:GetTags" +]; + +export const handler = async(event) => { + + console.log(`event received: ${JSON.stringify({ event }, null, 2)}`); + const grantManagedVpc = process.env.GRANT_VPC; + + const topicArn = event.detail.value.Metadata.Producer.TopicArn; + const clusterArn = event.detail.value.Metadata.Producer.ClusterArn; + const clusterType = event.detail.value.Metadata.Producer.ClusterType; + const producerAccount = event.detail.value.Metadata.Producer.Account; + const consumerAccount = event.detail.value.Metadata.Consumer.Account; + const consumerRolesArn = event.detail.value.Metadata.Consumer.RolesArn; + const subscriptionGrantId = event.detail.value.Metadata.SubscriptionGrantId; + const assetId = event.detail.value.Metadata.AssetId; + const requestType = event.detail.value.Metadata.RequestType; + + const iamMskResources = getMskIamResources(topicArn, clusterArn); + + if (event['detail-type'] === "producerGrant") { + + if (consumerAccount !== producerAccount) { + + if (clusterType === 'PROVISIONED') { + + const grantStatement = { + "Sid": `${subscriptionGrantId}DSF${assetId}`, + "Effect": "Allow", + "Principal": { + "AWS": consumerRolesArn, + }, + "Action": mskReadActions.concat(mskVpcClusterActions), + "Resource": iamMskResources, + }; + const client = new KafkaClient(); + + await updateClusterPolicyWithRetry(client, grantStatement, requestType, clusterArn); + + } else if (clusterType === 'SERVERLESS') { + throw new Error("Cross account access is not supported for Serverless cluster"); + } else { + throw new Error("Unsupported cluster type") + } + + } else { + console.log("Producer and consumer are in the same account, skipping cluster policy") + } + } else if (event['detail-type'] === 'consumerGrant') { + + let iamActions = mskReadActions; + let iamResources = iamMskResources; + // Test if we need to grant permissions on the Glue Schema Registry + const schemaArn = event.detail.value.Metadata.Producer.SchemaArn; + if ( schemaArn !== undefined && producerAccount === consumerAccount) { + iamActions = mskReadActions.concat(gsrReadActions); + iamResources = iamMskResources.concat([schemaArn, event.detail.value.Metadata.Producer.RegistryArn]); + } + + let statements = [ + { + "Effect": "Allow", + "Action": iamActions, + "Resource": iamResources, + } + ] + + if (consumerAccount !== producerAccount) { + + if (clusterType === 'PROVISIONED') { + + statements = statements.concat([ + { + "Effect": "Allow", + "Action": mskVpcConsumerActions, + "Resource": "*", + } + ]); + + } else if (clusterType === 'SERVERLESS') { + throw new Error("Cross account access is not supported for Serverless cluster"); + } else { + throw new Error("Unsupported cluster type") + } + } + + const iamRolePolicy = JSON.stringify({ + "Version": "2012-10-17", + "Statement": statements + }, null, 2); + + const client = new IAMClient(); + + for (var role of consumerRolesArn) { + console.log(`Processing role: ${role}`); + + const roleName = role.split(':')[5].split('/')[1]; + const policyName = `${subscriptionGrantId}_${assetId}`; + + if (requestType === 'GRANT') { + + const result = await client.send(new PutRolePolicyCommand({ + RoleName: roleName, + PolicyName: policyName, + PolicyDocument: iamRolePolicy + })); + console.log(`PutRolePolicy result: ${JSON.stringify({ result }, null, 2)}`); + + } else if (requestType === 'REVOKE') { + + try { + const result = await client.send(new DeleteRolePolicyCommand({ + RoleName: roleName, + PolicyName: policyName + })); + console.log(`DeleteRolePolicy result: ${JSON.stringify({ result }, null, 2)}`); + } catch (error) { + if (error instanceof NoSuchEntityException) { + console.log(`Policy ${policyName} doesn't exist... passing`); + } else { + throw error; + } + } + + } else { + throw new Error(`Invalid request type: ${requestType}`); + } + } + } else { + throw new Error("Unsupported grant action") + } + return {} +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package-lock.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package-lock.json new file mode 100644 index 000000000..c9890f3df --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "datazone-msk-authorizer-grant", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "datazone-msk-authorizer-grant", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package.json new file mode 100644 index 000000000..7d549fa28 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-grant/package.json @@ -0,0 +1,4 @@ +{ + "name": "datazone-msk-authorizer-grant", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/index.mjs b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/index.mjs new file mode 100644 index 000000000..18f3bd53d --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/index.mjs @@ -0,0 +1,125 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { + DataZoneClient, + GetEnvironmentCommand, + GetListingCommand, + UpdateSubscriptionGrantStatusCommand, + SubscriptionGrantStatus , + GetSubscriptionTargetCommand +} from "@aws-sdk/client-datazone"; + + +export const handler = async(event) => { + + console.log(`event received: ${JSON.stringify({ event }, null, 2)}`); + + const client = new DataZoneClient() + + const domainId = event.detail.metadata.domain; + const listingId = event.detail.data.asset.listingId; + const listingVersion = event.detail.data.asset.listingVersion; + const targetEnvId = event.detail.data.subscriptionTarget.environmentId; + const subscriptionTargetId = event.detail.data.subscriptionTarget.id; + const detailType = event['detail-type']; + const subscriptionGrantId = event.detail.metadata.id; + + // test if it's a GRANT or REVOKE request + const requestType = detailType.includes('Revoke') ? 'REVOKE' : 'GRANT'; + + //get asset information + const asset = await client.send(new GetListingCommand({ + domainIdentifier: domainId, + identifier: listingId, + listingRevision: listingVersion, + })) + + console.log(`GetListing result: ${JSON.stringify({ asset }, null, 2)}`); + + // Update the status in DataZone + const updateStatus = await client.send(new UpdateSubscriptionGrantStatusCommand({ + domainIdentifier: domainId, + identifier: subscriptionGrantId, + assetIdentifier: asset.item.assetListing.assetId, + status: requestType === 'GRANT' ? SubscriptionGrantStatus.GRANT_IN_PROGRESS : SubscriptionGrantStatus.REVOKE_IN_PROGRESS, + })) + + console.log(`UpdateSubscriptionGrant result: ${JSON.stringify({ updateStatus }, null, 2)}`); + + // Get the cluster ARN from the MskSourceReferenceFormType + const forms = JSON.parse(asset.item.assetListing.forms); + const clusterArn = forms.MskSourceReferenceFormType.cluster_arn; + const clusterType = forms.MskSourceReferenceFormType.cluster_type; + const topicName = forms.KafkaSchemaFormType.kafka_topic; + + let registryArn='', schemaArn=''; + try { + registryArn = forms.KafkaSchemaFormType.registry_arn; + schemaArn = forms.KafkaSchemaFormType.schema_arn; + } catch (error) { + if (error instanceof TypeError && error.message.includes("Cannot read properties of undefined")) { + console.log("RegistryArn and SchemaArn not found in forms, skipping..."); + } else { + throw error; + } + } + const assetArnParts = clusterArn.split(":"); + const partition = assetArnParts[1]; + const producerAccountId = assetArnParts[4]; + const producerRegion = assetArnParts[3]; + const clusterParts = assetArnParts[5].split('/'); + const cluster = `${clusterParts[1]}/${clusterParts[2]}`; + + const topicArn = `arn:${partition}:kafka:${producerRegion}:${producerAccountId}:topic/${cluster}/${topicName}`; + + // get target environment information + const targetEnv = await client.send(new GetEnvironmentCommand({ + domainIdentifier: domainId, + identifier: targetEnvId + })); + + console.log(`GetEnvironment result: ${JSON.stringify({ targetEnv }, null, 2)}`); + + // const targetEnvResources = targetEnv.provisionedResources; + // const userRole = targetEnvResources.find((element) => element.name === "userRoleArn"); + const consumerAccountId = targetEnv.awsAccountId; + const consumerRegion = targetEnv.awsAccountRegion; + + const targetSubscription = await client.send(new GetSubscriptionTargetCommand({ + domainIdentifier: domainId, + environmentIdentifier: targetEnvId, + identifier: subscriptionTargetId, + })); + + console.log(`GetSubscriptionTarget result: ${JSON.stringify({ targetSubscription }, null, 2)}`); + + const consumerRolesArn = targetSubscription.authorizedPrincipals; + + const results = { + DomainId: domainId, + SubscriptionGrantId: subscriptionGrantId, + AssetId: asset.item.assetListing.assetId, + RequestType: requestType, + //TODO field version + Producer: { + ClusterArn: clusterArn, + ClusterType: clusterType, + TopicArn: topicArn, + RegistryArn: registryArn, + SchemaArn: schemaArn, + Partition: partition, + Region: producerRegion, + Account: producerAccountId, + }, + Consumer: { + Region: consumerRegion, + Account: consumerAccountId, + RolesArn: consumerRolesArn, + } + }; + + console.log(`Metadata collection results: ${JSON.stringify({ results }, null, 2)}`); + + return results; +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package-lock.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package-lock.json new file mode 100644 index 000000000..a456d8c4e --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "datazone-msk-authorizer-metadata-collector", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "datazone-msk-authorizer-metadata-collector", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package.json b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package.json new file mode 100644 index 000000000..111ceb80f --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/datazone-msk-authorizer-metadata-collector/package.json @@ -0,0 +1,4 @@ +{ + "name": "datazone-msk-authorizer-metadata-collector", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/datazone/resources/glue-schema-version/index.mjs b/framework/src/governance/lib/datazone/resources/glue-schema-version/index.mjs new file mode 100644 index 000000000..09ac62401 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/glue-schema-version/index.mjs @@ -0,0 +1,81 @@ +import { GlueClient, GetSchemaVersionCommand } from "@aws-sdk/client-glue"; + +export const handler = async (event) => { + const client = new GlueClient(); + const properties = event["ResourceProperties"]; + + console.log("Event", event) + + // Log the received properties + console.log("Received properties:", JSON.stringify(properties)); + + const schemaArn = properties["schemaArn"]; + const registryName = properties["registryName"]; + const schemaName = properties["schemaName"]; + const schemaVersionNumber = properties["schemaVersionNumber"]; + const latestVersion = properties["latestVersion"]; + + // Validate and determine the SchemaId + const schemaId = {}; + if (schemaArn) { + // If schemaArn is provided, ensure no other fields are set + schemaId.SchemaArn = schemaArn; + if (registryName || schemaName) { + throw new Error("If schemaArn is provided, registryName and schemaName must not be provided."); + } + } else if (registryName && schemaName) { + // If registryName and schemaName are provided, ensure schemaArn is not set + schemaId.RegistryName = registryName; + schemaId.SchemaName = schemaName; + } else { + throw new Error("Either schemaArn or both registryName and schemaName must be provided."); + } + + // Log the determined SchemaId + console.log("Determined SchemaId:", JSON.stringify(schemaId)); + + // Validate and determine the SchemaVersionNumber + const schemaVersionNumberParam = {}; + if (latestVersion !== undefined) { + schemaVersionNumberParam.LatestVersion = latestVersion; + } else if (schemaVersionNumber !== undefined) { + schemaVersionNumberParam.VersionNumber = schemaVersionNumber; + } else { + throw new Error("Either schemaVersionNumber or latestVersion must be provided."); + } + + // Log the determined SchemaVersionNumber + console.log("Determined SchemaVersionNumber:", JSON.stringify(schemaVersionNumberParam)); + + const input = { + SchemaId: schemaId, + SchemaVersionNumber: schemaVersionNumberParam + }; + + // Log the input to be sent to AWS Glue + console.log("Input to GetSchemaVersionCommand:", JSON.stringify(input)); + + try { + const command = new GetSchemaVersionCommand(input); + const response = await client.send(command); + + // Log the successful response + console.log("Successful response:", JSON.stringify(response)); + + return { + "Data": response + }; + + } catch (error) { + // Log the error + console.error('Error retrieving schema version:', error); + + return { + statusCode: 500, + body: JSON.stringify({ + message: 'Failed to retrieve schema version', + error: error.message + }) + }; + } +}; diff --git a/framework/src/governance/lib/datazone/resources/glue-schema-version/package-lock.json b/framework/src/governance/lib/datazone/resources/glue-schema-version/package-lock.json new file mode 100644 index 000000000..c9dda8096 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/glue-schema-version/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "glue-schema-version", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "glue-schema-version", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/datazone/resources/glue-schema-version/package.json b/framework/src/governance/lib/datazone/resources/glue-schema-version/package.json new file mode 100644 index 000000000..816b894d5 --- /dev/null +++ b/framework/src/governance/lib/datazone/resources/glue-schema-version/package.json @@ -0,0 +1,4 @@ +{ + "name": "glue-schema-version", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/governance/lib/index.ts b/framework/src/governance/lib/index.ts index 1d306e745..d025764e4 100644 --- a/framework/src/governance/lib/index.ts +++ b/framework/src/governance/lib/index.ts @@ -5,3 +5,6 @@ export * from './data-catalog-database'; export * from './data-lake-catalog'; export * from './data-catalog-database-props'; export * from './data-lake-catalog-props'; +export * from './custom-authorizer-environment-helpers'; +export * from './custom-authorizer-central-helpers'; +export * from './datazone'; \ No newline at end of file diff --git a/framework/src/governance/lib/resources/custom-authorizer-callback/index.mjs b/framework/src/governance/lib/resources/custom-authorizer-callback/index.mjs new file mode 100644 index 000000000..23bab0568 --- /dev/null +++ b/framework/src/governance/lib/resources/custom-authorizer-callback/index.mjs @@ -0,0 +1,34 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { SFNClient, SendTaskSuccessCommand, SendTaskFailureCommand} from '@aws-sdk/client-sfn'; + + +export const handler = async(event) => { + + console.log(JSON.stringify({ event }, null, 2)); + + const status = event.detail.Status; + const client = new SFNClient(); + const taskToken = event.detail.TaskToken; + + if (status === 'success') { + const taskSuccessResponse = await client.send(new SendTaskSuccessCommand({ + taskToken, + output: JSON.stringify({ Status: 'success' }), + })); + + console.log(JSON.stringify({ taskSuccessResponse }, null, 2)); + + } else if (status === 'failure') { + + const taskFailureResponse = await client.send(new SendTaskFailureCommand({ + taskToken, + cause: `${event.detail.Error}: ${event.detail.Cause}`, + error: 'grant failed', + })); + + console.log(JSON.stringify({ taskFailureResponse }, null, 2)); + } + return {} +} \ No newline at end of file diff --git a/framework/src/governance/lib/resources/custom-authorizer-callback/package-lock.json b/framework/src/governance/lib/resources/custom-authorizer-callback/package-lock.json new file mode 100644 index 000000000..03f4454c2 --- /dev/null +++ b/framework/src/governance/lib/resources/custom-authorizer-callback/package-lock.json @@ -0,0 +1,12 @@ +{ + "name": "custom-authorizer-callback", + "version": "0.1.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "custom-authorizer-callback", + "version": "0.1.0" + } + } +} diff --git a/framework/src/governance/lib/resources/custom-authorizer-callback/package.json b/framework/src/governance/lib/resources/custom-authorizer-callback/package.json new file mode 100644 index 000000000..a1333011f --- /dev/null +++ b/framework/src/governance/lib/resources/custom-authorizer-callback/package.json @@ -0,0 +1,4 @@ +{ + "name": "custom-authorizer-callback", + "version": "0.1.0" +} \ No newline at end of file diff --git a/framework/src/utils/lib/create-service-linked-role.ts b/framework/src/utils/lib/create-service-linked-role.ts index a9f75471e..857bbf508 100644 --- a/framework/src/utils/lib/create-service-linked-role.ts +++ b/framework/src/utils/lib/create-service-linked-role.ts @@ -14,7 +14,7 @@ import { ServiceLinkedRoleService } from './service-linked-role-service'; * * @example * const slr = new dsf.utils.CreateServiceLinkedRole(this, 'CreateSLR') - * slr.create(ServiceLinkedRoleService.REDSHIFT) + * slr.create(dsf.utils.ServiceLinkedRoleService.REDSHIFT) */ export class CreateServiceLinkedRole extends Construct { diff --git a/framework/test/e2e/datazone-msk.e2e.test.ts b/framework/test/e2e/datazone-msk.e2e.test.ts new file mode 100644 index 000000000..c7981b9b6 --- /dev/null +++ b/framework/test/e2e/datazone-msk.e2e.test.ts @@ -0,0 +1,91 @@ +/** + * Testing DataZone MSK constructs + * + * @group e2e/governance/datazone-msk + */ + +import * as cdk from 'aws-cdk-lib'; +import { CfnDomain } from 'aws-cdk-lib/aws-datazone'; +import { Schedule } from 'aws-cdk-lib/aws-events'; +import { TestStack } from './test-stack'; +import { DataZoneGsrMskDataSource, DataZoneMskAssetType, DataZoneMskCentralAuthorizer, DataZoneMskEnvironmentAuthorizer } from '../../src/governance/index'; + +jest.setTimeout(10000000); + +// GIVEN +const app = new cdk.App(); +const testStack = new TestStack('E2eTestStack', app); +const { stack } = testStack; + +stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + +const cfnDomain = new CfnDomain(stack, 'CfnDomain', { + domainExecutionRole: 'arn:aws:iam::145388625860:role/service-role/AmazonDataZoneDomainExecution', + name: 'dsfE2eTest', +}); + +// const consumerRole = new Role(stack, 'ConsumerRole', { +// assumedBy: new ServicePrincipal('lambda.amazonaws.com'), +// }); + +const mskCentralAuthorizer = new DataZoneMskCentralAuthorizer(testStack.stack, 'MskAuthorizer', { + domainId: cfnDomain.attrId, + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); + +new DataZoneMskEnvironmentAuthorizer(stack, 'MskEnvAuthorizer', { + domainId: cfnDomain.attrId, + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); + +mskCentralAuthorizer.registerAccount('123456789012'); + +const mskAssetType = new DataZoneMskAssetType(stack, 'MskAssetType', { + domainId: cfnDomain.attrId, + removalPolicy: cdk.RemovalPolicy.DESTROY, +}); + +const gsrMskDataSource = new DataZoneGsrMskDataSource(stack, 'GsrMskDataSource', { + domainId: cfnDomain.attrId, + projectId: mskAssetType.owningProject!.attrId, + registryName: 'testRegistry', + clusterName: 'testCluster', + runSchedule: Schedule.cron({ minute: '0', hour: '12' }), + enableSchemaRegistryEvent: true, +}); + +// createSubscriptionTarget(stack, 'Consumer', +// mskAssetType.mskCustomAssetType, +// 'testSubscription', +// 'dsf', +// CONSUMER_ENV_ID, +// [consumerRole], +// assetFactory.createRole, +// ); + +new cdk.CfnOutput(stack, 'MskAssetTypeName', { + value: mskAssetType.mskCustomAssetType.name, +}); + +new cdk.CfnOutput(stack, 'GsrMskDataSourceOutput', { + value: gsrMskDataSource.registryName, +}); + +let deployResult: Record; + + +beforeAll(async() => { + // WHEN + deployResult = await testStack.deploy(); + +}, 10000000); + +it('MskTopicAssetType and DataZoneMskAuthorizers created successfully', async () => { + // THEN + expect(deployResult.MskAssetTypeName).toContain('MskTopicAssetType'); + expect(deployResult.GsrMskDataSourceOutput).toContain('testRegistry'); +}); + +afterAll(async () => { + await testStack.destroy(); +}, 10000000); diff --git a/framework/test/unit/governance/datazone-custom-asset-type-factory.test.ts b/framework/test/unit/governance/datazone-custom-asset-type-factory.test.ts new file mode 100644 index 000000000..2020c57eb --- /dev/null +++ b/framework/test/unit/governance/datazone-custom-asset-type-factory.test.ts @@ -0,0 +1,156 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Tests DataZoneCustomAssetTypeFactory construct + * + * @group unit/datazone/datazone-custom-asset-type-factory + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { DataZoneCustomAssetTypeFactory } from '../../../src/governance'; + + +describe ('Creating a DataZoneCustomAssetTypeFactory with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneCustomAssetTypeFactory(stack, 'DataZoneCustomAssetType', { + domainId: DOMAIN_ID, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + + test('should create an IAM role for the custom resource creating asset types', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ]), + ]), + }, + ], + Policies: [ + { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: [ + 'datazone:CreateFormType', + 'datazone:CreateAssetType', + 'datazone:DeleteAssetType', + 'datazone:DeleteFormType', + 'datazone:GetFormType', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':datazone:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + `:domain/${DOMAIN_ID}`, + ]), + ]), + }, + }, + ], + }), + PolicyName: 'DataZonePermission', + }, + ], + }), + ); + }); + + test('should create a Lambda function for the metadata collector', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneCustomAssetTypeHandlerRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 120, + }), + ); + }); +}); + +describe ('Creating a DataZoneCustomAssetTypeFactory with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneCustomAssetTypeFactory(stack, 'DataZoneCustomAssetType', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create CloudWatch Log Groups with RETAIN removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); +}); + +describe ('Creating a DataZoneCustomAssetTypeFactory with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + + new DataZoneCustomAssetTypeFactory(stack, 'DataZoneCustomAssetType', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create CloudWatch Log Groups with RETAIN removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); +}); \ No newline at end of file diff --git a/framework/test/unit/governance/datazone-gsr-msk-datasource.test.ts b/framework/test/unit/governance/datazone-gsr-msk-datasource.test.ts new file mode 100644 index 000000000..4ddc7e4ac --- /dev/null +++ b/framework/test/unit/governance/datazone-gsr-msk-datasource.test.ts @@ -0,0 +1,480 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +/** + * Tests DataZoneGsrMskDataSource construct + * + * @group unit/datazone/datazone-gsr-msk-datasource + */ + +import { App, Stack } from 'aws-cdk-lib'; + +import { Match, Template } from 'aws-cdk-lib/assertions'; + +import { Schedule } from 'aws-cdk-lib/aws-events'; +import { DataZoneGsrMskDataSource } from '../../../src/governance'; + +describe('Creating a DataZone-GSR-MSK-Datasource with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + const REGISTRY_NAME = 'schema-registry'; + const CLUSTER_NAME = 'msk-cluster'; + const PROJECT_ID = '999a99aa9aaaaa'; + const PARAMETER_PREFIX = `/datazone/${DOMAIN_ID}/${REGISTRY_NAME}/asset/`; + + new DataZoneGsrMskDataSource(stack, 'DataZoneGsrMskDataSource', { + domainId: DOMAIN_ID, + projectId: PROJECT_ID, + registryName: REGISTRY_NAME, + clusterName: CLUSTER_NAME, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create a the following resources', () => { + template.resourceCountIs('AWS::Lambda::Function', 1); + template.resourceCountIs('AWS::IAM::Role', 1); + template.resourceCountIs('AWS::DataZone::ProjectMembership', 1); + + }); + + test('should create Lambda IAM Role', () => { + // Validate the IAM Role properties + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: { + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + Version: '2012-10-17', + }, + ManagedPolicyArns: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ], + ], + }, + ], + Policies: Match.arrayWith([ + Match.objectLike({ + PolicyName: 'DataZonePermission', + PolicyDocument: { + Statement: Match.arrayWith([ + Match.objectLike({ + Action: [ + 'datazone:CreateAsset', + 'datazone:CreateAssetType', + 'datazone:CreateFormType', + 'datazone:GetAssetType', + 'datazone:GetFormType', + 'datazone:GetAsset', + 'datazone:CreateAssetRevision', + 'datazone:DeleteAsset', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':datazone:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:domain/${DOMAIN_ID}`, + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':datazone:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:project/${PROJECT_ID}`, + ], + ], + }, + ], + }), + Match.objectLike({ + Action: [ + 'glue:GetSchemaVersion', + 'glue:ListSchemas', + 'glue:ListSchemaVersions', + ], + Effect: 'Allow', + Resource: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':glue:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:registry/${REGISTRY_NAME}`, + ], + ], + }, + { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':glue:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:schema/${REGISTRY_NAME}/*`, + ], + ], + }, + ], + }), + Match.objectLike({ + Action: 'kafka:DescribeClusterV2', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':kafka:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:cluster/${CLUSTER_NAME}/*`, + ], + ], + }, + }), + Match.objectLike({ + Action: 'kafka:ListClustersV2', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':kafka:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + ':/api/v2/clusters', + ], + ], + }, + }), + Match.objectLike({ + Action: [ + 'ssm:GetParameter', + 'ssm:PutParameter', + 'ssm:DeleteParameter', + 'ssm:GetParametersByPath', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':ssm:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:parameter${PARAMETER_PREFIX}*`, + ], + ], + }, + }), + ]), + Version: '2012-10-17', + }, + }), + ]), + }), + ); + }); + + test('should create a default DataZone project membership', () => { + template.hasResourceProperties('AWS::DataZone::ProjectMembership', + Match.objectLike({ + Designation: 'PROJECT_CONTRIBUTOR', + DomainIdentifier: DOMAIN_ID, + Member: { + UserIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSourceHandlerRole.*'), + 'Arn', + ], + }, + }, + ProjectIdentifier: PROJECT_ID, + }), + ); + }); + + test('should create Lambda Function with correct properties', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Environment: { + Variables: { + DOMAIN_ID: DOMAIN_ID, + PROJECT_ID: PROJECT_ID, + CLUSTER_NAME: CLUSTER_NAME, + REGION: { Ref: 'AWS::Region' }, + REGISTRY_NAME: REGISTRY_NAME, + ACCOUNT_ID: { Ref: 'AWS::AccountId' }, + PARAMETER_PREFIX: PARAMETER_PREFIX, + }, + }, + Handler: 'index.handler', + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSourceHandlerRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 300, + }), + ); + }); + +}); + +describe('Creating a DataZone-GSR-MSK-Datasource with GSR Events and Scheduled configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + const REGISTRY_NAME = 'schema-registry'; + const CLUSTER_NAME = 'msk-cluster'; + const PROJECT_ID = '999a99aa9aaaaa'; + + new DataZoneGsrMskDataSource(stack, 'DataZoneGsrMskDataSource', { + domainId: DOMAIN_ID, + projectId: PROJECT_ID, + registryName: REGISTRY_NAME, + clusterName: CLUSTER_NAME, + enableSchemaRegistryEvent: true, + runSchedule: Schedule.cron({ minute: '0', hour: '12' }), + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + test('should create a the following resources', () => { + template.resourceCountIs('AWS::Lambda::Function', 1); + template.resourceCountIs('AWS::IAM::Role', 1); + template.resourceCountIs('AWS::DataZone::ProjectMembership', 1); + template.resourceCountIs('AWS::Events::Rule', 3); + template.resourceCountIs('AWS::Lambda::Permission', 3); + }); + + test('should create EventBridge Rule with correct properties', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + ScheduleExpression: 'cron(0 12 * * ? *)', + State: 'ENABLED', + Targets: Match.arrayWith([ + Match.objectLike({ + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Id: Match.stringLikeRegexp('Target.*'), + }), + ]), + }), + ); + }); + + test('should create EventBridge Rule for Glue Schema Registry with correct properties', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + EventPattern: { + source: [ + 'aws.glue', + ], + detail: { + eventSource: [ + 'glue.amazonaws.com', + ], + eventName: [ + 'CreateSchema', + 'RegisterSchemaVersion', + ], + responseElements: { + registryName: [ + REGISTRY_NAME, + ], + }, + }, + }, + State: 'ENABLED', + Targets: Match.arrayWith([ + Match.objectLike({ + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Id: Match.stringLikeRegexp('Target.*'), + Input: `{"registryName":"${REGISTRY_NAME}"}`, // Correct escaping and format + }), + ]), + }), + ); + }); + + test('should create Lambda Permission for EventBridge SchemaRegistryEventRule with correct properties', () => { + template.hasResourceProperties('AWS::Lambda::Permission', + Match.objectLike({ + Action: 'lambda:InvokeFunction', + FunctionName: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Principal: 'events.amazonaws.com', + SourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSourceSchemaRegistryEventRule.*'), + 'Arn', + ], + }, + }), + ); + }); + + test('should create Lambda Permission for EventBridge RegisterSchemaVersionRule with correct properties', () => { + template.hasResourceProperties('AWS::Lambda::Permission', + Match.objectLike({ + Action: 'lambda:InvokeFunction', + FunctionName: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Principal: 'events.amazonaws.com', + SourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + }), + ); + }); + + + test('should create EventBridge Rule for Glue DeleteSchema with correct properties', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + EventPattern: { + source: [ + 'aws.glue', + ], + detail: { + eventSource: [ + 'glue.amazonaws.com', + ], + eventName: [ + 'DeleteSchema', + ], + requestParameters: { + schemaId: { + schemaArn: [ + { + prefix: { + 'Fn::Join': [ + '', + [ + 'arn:', + { Ref: 'AWS::Partition' }, + ':glue:', + { Ref: 'AWS::Region' }, + ':', + { Ref: 'AWS::AccountId' }, + `:schema/${REGISTRY_NAME}/*`, + ], + ], + }, + }, + ], + }, + }, + }, + }, + Name: 'DeleteSchemaRule', + State: 'ENABLED', + Targets: Match.arrayWith([ + Match.objectLike({ + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Id: Match.stringLikeRegexp('Target.*'), + Input: `{"registryName":"${REGISTRY_NAME}"}`, + }), + ]), + }), + ); + }); + + test('should create Lambda Permission for EventBridge to invoke function with correct properties', () => { + template.hasResourceProperties('AWS::Lambda::Permission', + Match.objectLike({ + Action: 'lambda:InvokeFunction', + FunctionName: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSource.*'), + 'Arn', + ], + }, + Principal: 'events.amazonaws.com', + SourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneGsrMskDataSourceDeleteSchemaRule.*'), + 'Arn', + ], + }, + }), + ); + }); + + +}); \ No newline at end of file diff --git a/framework/test/unit/governance/datazone-msk-asset-type.test.ts b/framework/test/unit/governance/datazone-msk-asset-type.test.ts new file mode 100644 index 000000000..2d83beace --- /dev/null +++ b/framework/test/unit/governance/datazone-msk-asset-type.test.ts @@ -0,0 +1,216 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Tests DataZoneMskAssetType construct + * + * @group unit/datazone/datazone-msk-asset-type + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { DataZoneMskAssetType } from '../../../src/governance'; + + +describe ('Creating a DataZoneMskAssetType with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneMskAssetType(stack, 'DataZoneMskAssetType', { + domainId: DOMAIN_ID, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + + test('should create a default DataZoneCustomAssetFactory', () => { + template.resourceCountIs('AWS::Lambda::Function', 3); + template.resourceCountIs('AWS::IAM::Role', 3); + template.resourceCountIs('AWS::IAM::Policy', 3); + template.resourceCountIs('AWS::Logs::LogGroup', 1); + template.resourceCountIs('AWS::Lambda::Permission', 1); + }); + + test('should create a default DataZone project', () => { + template.hasResourceProperties('AWS::DataZone::Project', + Match.objectLike({ + DomainIdentifier: DOMAIN_ID, + Name: 'MskGovernance', + }), + ); + }); + + test('should create a default DataZone project membership', () => { + template.hasResourceProperties('AWS::DataZone::ProjectMembership', + Match.objectLike({ + Designation: 'PROJECT_OWNER', + DomainIdentifier: DOMAIN_ID, + Member: { + UserIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneMskAssetTypeDZCustomAssetTypeHandlerHandlerRole.*'), + 'Arn', + ], + }, + }, + ProjectIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneMskAssetTypeMskAssetTypeProjectOwner.*'), + 'Id', + ], + }, + }), + ); + }); + + test('should create a custom resource for the MSK topic asset type', () => { + template.hasResourceProperties('Custom::DataZoneCustomAssetType', + Match.objectLike({ + ServiceToken: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneMskAssetTypeDZCustomAssetTypeHandlerProviderCustomResourceProviderframeworkonEvent.*'), + 'Arn', + ], + }, + domainId: DOMAIN_ID, + projectId: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneMskAssetTypeMskAssetTypeProjectOwner.*'), + 'Id', + ], + }, + formTypes: [ + { + name: 'amazon.datazone.RelationalTableFormType', + required: true, + }, + { + name: 'MskSourceReferenceFormType', + model: '\n structure MskSourceReferenceFormType {\n @required\ncluster_arn: String\n@required\ncluster_type: String\n }\n ', + required: true, + }, + { + name: 'KafkaSchemaFormType', + model: '\n structure KafkaSchemaFormType {\n @required\nkafka_topic: String\n\nschema_version: Integer\n\nschema_arn: String\n\nregistry_arn: String\n }\n ', + required: true, + }, + ], + assetTypeName: 'MskTopicAssetType', + assetTypeDescription: 'Custom asset type to support MSK topic asset', + }), + ); + }); +}); + +describe ('Creating a DataZoneMskAssetType with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + const GOVERNANCE_PROJECT_ID = '999a99aa9aaaaa'; + + new DataZoneMskAssetType(stack, 'DataZoneMskAssetType', { + domainId: DOMAIN_ID, + projectId: GOVERNANCE_PROJECT_ID, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + + test('should not create a default DataZone project', () => { + template.resourceCountIs('AWS::DataZone::Project', 0); + }); + + test('should create a default DataZone project membership', () => { + template.hasResourceProperties('AWS::DataZone::ProjectMembership', + Match.objectLike({ + Designation: 'PROJECT_OWNER', + DomainIdentifier: DOMAIN_ID, + Member: { + UserIdentifier: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('DataZoneMskAssetTypeDZCustomAssetTypeHandlerHandlerRole.*'), + 'Arn', + ], + }, + }, + ProjectIdentifier: GOVERNANCE_PROJECT_ID, + }), + ); + }); + + test('should attach the custom asset type to the provided project', () => { + template.hasResourceProperties('Custom::DataZoneCustomAssetType', + Match.objectLike({ + projectId: GOVERNANCE_PROJECT_ID, + }), + ); + }); +}); + +describe ('Creating a DataZoneMskAssetType with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneMskAssetType(stack, 'DataZoneMskAssetType', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create CloudWatch Log Groups with RETAIN removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); + + test('should create custom resources with RETAIN removal policy', () => { + template.hasResource('Custom::DataZoneCustomAssetType', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); +}); + +describe ('Creating a DataZoneMskAssetType with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + + new DataZoneMskAssetType(stack, 'DataZoneMskAssetType', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create CloudWatch Log Groups with RETAIN removal policy', () => { + template.hasResource('AWS::Logs::LogGroup', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); + + test('should create custom resources with RETAIN removal policy', () => { + template.hasResource('Custom::DataZoneCustomAssetType', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); +}); \ No newline at end of file diff --git a/framework/test/unit/governance/datazone-msk-central-authorizer.test.ts b/framework/test/unit/governance/datazone-msk-central-authorizer.test.ts new file mode 100644 index 000000000..610782934 --- /dev/null +++ b/framework/test/unit/governance/datazone-msk-central-authorizer.test.ts @@ -0,0 +1,774 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Tests DataZoneMskCentralAuthorizer construct + * + * @group unit/datazone/datazone-msk-central-authorizer + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { DataZoneMskCentralAuthorizer } from '../../../src/governance'; + + +describe ('Creating a DataZoneMskCentralAuthorizer with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + const centralAuthorizer = new DataZoneMskCentralAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + }); + + centralAuthorizer.registerAccount('999999999999'); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + + test('should create an IAM role for the metadata collector function with proper DataZone permissions ', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ]), + ]), + }, + ], + Policies: [ + { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: [ + 'datazone:GetListing', + 'datazone:GetEnvironment', + 'datazone:GetSubscriptionTarget', + 'datazone:UpdateSubscriptionGrantStatus', + ], + Effect: 'Allow', + Resource: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':datazone:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + `:domain/${DOMAIN_ID}`, + ]), + ]), + }, + }, + ], + }), + PolicyName: 'DataZonePermissions', + }, + ], + }), + ); + }); + + test('should create a Lambda function for the metadata collector', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerMetadataCollectorHandlerRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 30, + }), + ); + }); + + test('should create an IAM role for the callback function with proper datazone permissions ', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ]), + ]), + }, + ], + Policies: [ + { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'datazone:UpdateSubscriptionGrantStatus', + Effect: 'Allow', + Resource: { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':datazone:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + `:domain/${DOMAIN_ID}`, + ]), + ]), + }, + }, + ], + }), + PolicyName: 'DataZonePermissions', + }, + ], + }), + ); + }); + + test('should create a Lambda function for the DataZone callback function', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackHandlerRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 30, + }), + ); + }); + + test('should create an Event Bridge event rule for DataZone events', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + EventPattern: { + 'source': [ + 'aws.datazone', + ], + 'detail-type': [ + 'Subscription Grant Requested', + 'Subscription Grant Revoke Requested', + ], + 'detail': { + metadata: { + domain: [ + DOMAIN_ID, + ], + }, + data: { + asset: { + typeName: [ + 'MskTopicAssetType', + ], + }, + }, + }, + }, + State: 'ENABLED', + Targets: [ + Match.objectLike({ + Arn: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + DeadLetterConfig: { + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }, + RetryPolicy: { + MaximumRetryAttempts: 0, + }, + RoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerSourceEventRole.*'), + 'Arn', + ], + }, + }), + ], + }), + ); + }); + + test('should create an IAM role for triggering the authorizer Step Functions state machine', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + }, + ], + }), + }), + ); + }); + + test('should attach proper permissions to the event rule role to trigger the state machine', () => { + template.hasResourceProperties('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'states:StartExecution', + Effect: 'Allow', + Resource: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + }, + ], + }), + PolicyName: Match.stringLikeRegexp('MskAuthorizerSourceEventRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerSourceEventRole.*'), + }, + ], + }), + ); + }); + + test('should create an Event Bridge event rule for the authorizer callback events', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + EventPattern: { + 'source': [ + 'dsf.MskTopicAuthorizer', + ], + 'detail-type': [ + 'callback', + ], + }, + State: 'ENABLED', + Targets: [ + Match.objectLike({ + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackFunction.*'), + 'Arn', + ], + }, + DeadLetterConfig: { + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }, + RetryPolicy: { + MaximumEventAgeInSeconds: 3600, + MaximumRetryAttempts: 10, + }, + }), + ], + }), + ); + }); + + test('should create Lambda function permissions for calling back', () => { + template.hasResourceProperties('AWS::Lambda::Permission', + Match.objectLike({ + Action: 'lambda:InvokeFunction', + FunctionName: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackFunction.*'), + 'Arn', + ], + }, + Principal: 'events.amazonaws.com', + SourceArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackEventRule.*'), + 'Arn', + ], + }, + }), + ); + }); + + test('should create an IAM role for the Step Functions state machine', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: { + 'Fn::FindInMap': [ + 'ServiceprincipalMap', + { + Ref: 'AWS::Region', + }, + 'states', + ], + }, + }, + }, + ], + }), + }), + ); + }); + + test('should attach proper permissions to the state machine role', () => { + template.hasResourceProperties('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'lambda:InvokeFunction', + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerMetadataCollectorHandler.*'), + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerMetadataCollectorHandler.*'), + 'Arn', + ], + }, + ':*', + ], + ], + }, + ], + }, + { + Action: 'eventbridge:putEvents', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:aws:events:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::Region', + }, + ':event-bus/default', + ], + ], + }, + }, + { + Action: 'lambda:InvokeFunction', + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + 'MskAuthorizerCallbackHandler948D9927', + 'Arn', + ], + }, + { + 'Fn::Join': [ + '', + [ + { + 'Fn::GetAtt': [ + 'MskAuthorizerCallbackHandler948D9927', + 'Arn', + ], + }, + ':*', + ], + ], + }, + ], + }, + { + Action: 'events:PutEvents', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':events:', + { + Ref: 'AWS::Region', + }, + ':999999999999:event-bus/default', + ], + ], + }, + }, + ], + }), + PolicyName: Match.stringLikeRegexp('MskAuthorizerStateMachineRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachineRole.*'), + }, + ], + }), + ); + }); + + test('should create the Step Functions state machine', () => { + template.hasResourceProperties('AWS::StepFunctions::StateMachine', + Match.objectLike({ + DefinitionString: { + 'Fn::Join': [ + '', + [ + '{"StartAt":"MetadataCollector","States":{"MetadataCollector":{"Next":"ProducerGrantEventBridgePutEvents","Retry":[{"ErrorEquals":["Lambda.ClientExecutionTimeoutException","Lambda.ServiceException","Lambda.AWSLambdaException","Lambda.SdkClientException"],"IntervalSeconds":2,"MaxAttempts":6,"BackoffRate":2}],"Catch":[{"ErrorEquals":["States.TaskFailed"],"ResultPath":"$.ErrorInfo","Next":"GovernanceFailureCallback"}],"Type":"Task","TimeoutSeconds":120,"ResultSelector":{"Metadata.$":"$.Payload"},"Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::lambda:invoke","Parameters":{"FunctionName":"', + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerMetadataCollectorHandler.*'), + 'Arn', + ], + }, + '","Payload.$":"$"}},"ProducerGrantEventBridgePutEvents":{"Next":"ConsumerGrantEventBridgePutEvents","Catch":[{"ErrorEquals":["States.TaskFailed"],"ResultPath":"$.ErrorInfo","Next":"GovernanceFailureCallback"}],"Type":"Task","TimeoutSeconds":300,"ResultPath":null,"Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ":states:::aws-sdk:eventbridge:putEvents.waitForTaskToken\",\"Parameters\":{\"Entries\":[{\"Detail\":{\"type\":1,\"value\":{\"TaskToken.$\":\"$$.Task.Token\",\"Metadata.$\":\"$.Metadata\"}},\"DetailType\":\"producerGrant\",\"Source\":\"dsf.MskTopicAuthorizer\",\"EventBusName.$\":\"States.Format('arn:aws:events:{}:{}:event-bus/default', $.Metadata.Producer.Region, $.Metadata.Producer.Account)\"}]}},\"ConsumerGrantEventBridgePutEvents\":{\"Next\":\"GovernanceSuccessCallback\",\"Catch\":[{\"ErrorEquals\":[\"States.TaskFailed\"],\"ResultPath\":\"$.ErrorInfo\",\"Next\":\"GovernanceFailureCallback\"}],\"Type\":\"Task\",\"TimeoutSeconds\":300,\"ResultPath\":null,\"Resource\":\"arn:", + { + Ref: 'AWS::Partition', + }, + ":states:::aws-sdk:eventbridge:putEvents.waitForTaskToken\",\"Parameters\":{\"Entries\":[{\"Detail\":{\"type\":1,\"value\":{\"TaskToken.$\":\"$$.Task.Token\",\"Metadata.$\":\"$.Metadata\"}},\"DetailType\":\"consumerGrant\",\"Source\":\"dsf.MskTopicAuthorizer\",\"EventBusName.$\":\"States.Format('arn:aws:events:{}:{}:event-bus/default', $.Metadata.Consumer.Region, $.Metadata.Consumer.Account)\"}]}},\"GovernanceSuccessCallback\":{\"End\":true,\"Retry\":[{\"ErrorEquals\":[\"Lambda.ClientExecutionTimeoutException\",\"Lambda.ServiceException\",\"Lambda.AWSLambdaException\",\"Lambda.SdkClientException\"],\"IntervalSeconds\":2,\"MaxAttempts\":6,\"BackoffRate\":2}],\"Type\":\"Task\",\"TimeoutSeconds\":60,\"Resource\":\"arn:", + { + Ref: 'AWS::Partition', + }, + ':states:::lambda:invoke","Parameters":{"FunctionName":"', + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackHandler.*'), + 'Arn', + ], + }, + '","Payload":{"Status":"success","Metadata.$":"$.Metadata"}}},"GovernanceFailureCallback":{"Next":"CentralWorfklowFailure","Retry":[{"ErrorEquals":["Lambda.ClientExecutionTimeoutException","Lambda.ServiceException","Lambda.AWSLambdaException","Lambda.SdkClientException"],"IntervalSeconds":2,"MaxAttempts":6,"BackoffRate":2}],"Type":"Task","TimeoutSeconds":60,"Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::lambda:invoke","Parameters":{"FunctionName":"', + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackHandler.*'), + 'Arn', + ], + }, + '","Payload":{"Status":"failure","Metadata.$":"$.Metadata","Error.$":"$.ErrorInfo.Error","Cause.$":"$.ErrorInfo.Cause"}}},"CentralWorfklowFailure":{"Type":"Fail","ErrorPath":"$.ErrorInfo"}},"TimeoutSeconds":300}', + ], + ], + }, + RoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerStateMachineRole.*'), + 'Arn', + ], + }, + }), + ); + }); + + test('should create an SAS queue as a dead letter queue for events', () => { + template.resourceCountIs('AWS::SQS::Queue', 1); + }); + + test('should create proper IAM policy for the dead letter queue ', () => { + template.hasResourceProperties('AWS::SQS::QueuePolicy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sqs:*', + Condition: { + Bool: { + 'aws:SecureTransport': 'false', + }, + }, + Effect: 'Deny', + Principal: { + AWS: '*', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }, + Match.objectLike({ + Action: 'sqs:SendMessage', + Condition: { + ArnEquals: { + 'aws:SourceArn': { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerAuthorizerEventRule.*'), + 'Arn', + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }), + Match.objectLike({ + Action: 'sqs:SendMessage', + Condition: { + ArnEquals: { + 'aws:SourceArn': { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCallbackEventRule.*'), + 'Arn', + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }), + ], + }), + Queues: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerQueue.*'), + }, + ], + }), + ); + }); + + test('should create an IAM Role for the callback Lambda function', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ], + ], + }, + ], + }), + ); + }); + + test('should create an IAM policy for the callback Lambda function', () => { + template.hasResourceProperties('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: [ + 'states:SendTaskSuccess', + 'states:SendTaskFailure', + ], + Effect: 'Allow', + Resource: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + }, + { + Action: [ + 'states:SendTaskSuccess', + 'states:SendTaskFailure', + 'states:SendTaskHeartbeat', + ], + Effect: 'Allow', + Resource: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + }, + ], + }), + PolicyName: Match.stringLikeRegexp('MskAuthorizerLambdaCallbackRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerLambdaCallbackRole.*'), + }, + ], + }), + ); + }); + + test('should create a callback Lambda function', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerLambdaCallbackRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 5, + }), + ); + }); + +}); + +describe ('Creating a DataZoneMskCentralAuthorizer with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneMskCentralAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create a Step Functions state machine with RETAIN removal policy', () => { + template.hasResource('AWS::StepFunctions::StateMachine', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); + + test('should create an SQS Queue with RETAIN removal policy', () => { + template.hasResource('AWS::SQS::Queue', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); +}); + +describe ('Creating a DataZoneMskCentralAuthorizer with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + + new DataZoneMskCentralAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create a Step Functions state machine with RETAIN removal policy', () => { + template.hasResource('AWS::StepFunctions::StateMachine', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); + + test('should create an SQS Queue with RETAIN removal policy', () => { + template.hasResource('AWS::SQS::Queue', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); +}); \ No newline at end of file diff --git a/framework/test/unit/governance/datazone-msk-environment-authorizer.test.ts b/framework/test/unit/governance/datazone-msk-environment-authorizer.test.ts new file mode 100644 index 000000000..c6aad4020 --- /dev/null +++ b/framework/test/unit/governance/datazone-msk-environment-authorizer.test.ts @@ -0,0 +1,465 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** + * Tests DataZoneMskEnvironmentAuthorizer construct + * + * @group unit/datazone/datazone-msk-environment-authorizer + */ + +import { App, RemovalPolicy, Stack } from 'aws-cdk-lib'; +import { Match, Template } from 'aws-cdk-lib/assertions'; +import { DataZoneMskEnvironmentAuthorizer } from '../../../src/governance'; + + +describe ('Creating a DataZoneMskEnvironmentAuthorizer with default configuration', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneMskEnvironmentAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + }); + + const template = Template.fromStack(stack); + // console.log(JSON.stringify(template.toJSON(), null, 2)); + + + test('should create an IAM role for the Lambda function creating the grants', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'lambda.amazonaws.com', + }, + }, + ], + }), + ManagedPolicyArns: [ + { + 'Fn::Join': Match.arrayWith([ + Match.arrayWith([ + { + Ref: 'AWS::Partition', + }, + ':iam::aws:policy/service-role/AWSLambdaBasicExecutionRole', + ]), + ]), + }, + ], + Policies: [ + { + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: [ + 'iam:PutRolePolicy', + 'iam:DeleteRolePolicy', + ], + Effect: 'Allow', + Resource: '*', + }, + { + Action: [ + 'kafka:GetClusterPolicy', + 'kafka:PutClusterPolicy', + ], + Effect: 'Allow', + Resource: '*', + }, + ], + }), + PolicyName: 'IamPermissions', + }, + ], + }), + ); + }); + + test('should create a Lambda function for creating the grants', () => { + template.hasResourceProperties('AWS::Lambda::Function', + Match.objectLike({ + Role: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerGrantRole.*'), + 'Arn', + ], + }, + Runtime: 'nodejs20.x', + Timeout: 60, + }), + ); + }); + + + test('should create an Event Bridge event rule for central authorizer events', () => { + template.hasResourceProperties('AWS::Events::Rule', + Match.objectLike({ + EventPattern: { + 'source': [ + 'dsf.MskTopicAuthorizer', + ], + 'detail-type': [ + 'producerGrant', + 'consumerGrant', + ], + }, + State: 'ENABLED', + Targets: [ + Match.objectLike({ + Arn: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + DeadLetterConfig: { + Arn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }, + RetryPolicy: { + MaximumRetryAttempts: 0, + }, + RoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCentralEventRole.*'), + 'Arn', + ], + }, + }), + ], + }), + ); + }); + + test('should create an IAM role for triggering the authorizer Step Functions state machine', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + }, + ], + }), + }), + ); + }); + + test('should attach proper permissions to the event rule role to trigger the state machine', () => { + template.hasResourceProperties('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'states:StartExecution', + Effect: 'Allow', + Resource: { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachine.*'), + }, + }, + ], + }), + PolicyName: Match.stringLikeRegexp('MskAuthorizerCentralEventRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerCentralEventRole.*'), + }, + ], + }), + ); + }); + + test('should create an IAM role for the Step Functions state machine', () => { + template.hasResourceProperties('AWS::IAM::Role', + Match.objectLike({ + AssumeRolePolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sts:AssumeRole', + Effect: 'Allow', + Principal: { + Service: { + 'Fn::FindInMap': [ + 'ServiceprincipalMap', + { + Ref: 'AWS::Region', + }, + 'states', + ], + }, + }, + }, + ], + }), + }), + ); + }); + + test('should create proper IAM policy for the Step Functions state machine', () => { + template.hasResourceProperties('AWS::IAM::Policy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'lambda:InvokeFunction', + Effect: 'Allow', + Resource: [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerGrantFunction.*'), + 'Arn', + ], + }, + { + 'Fn::Join': Match.arrayWith([ + [ + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerGrantFunction.*'), + 'Arn', + ], + }, + ':*', + ], + ]), + }, + ], + }, + { + Action: 'events:PutEvents', + Effect: 'Allow', + Resource: { + 'Fn::Join': [ + '', + [ + 'arn:', + { + Ref: 'AWS::Partition', + }, + ':events:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':event-bus/default', + ], + ], + }, + }, + ], + }), + PolicyName: Match.stringLikeRegexp('MskAuthorizerStateMachineRoleDefaultPolicy.*'), + Roles: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerStateMachineRole.*'), + }, + ], + }), + ); + }); + + test('should create the Step Functions state machine', () => { + template.hasResourceProperties('AWS::StepFunctions::StateMachine', + Match.objectLike({ + DefinitionString: { + 'Fn::Join': [ + '', + [ + '{"StartAt":"GrantInvoke","States":{"GrantInvoke":{"Next":"SuccessCallback","Retry":[{"ErrorEquals":["Lambda.ClientExecutionTimeoutException","Lambda.ServiceException","Lambda.AWSLambdaException","Lambda.SdkClientException"],"IntervalSeconds":2,"MaxAttempts":6,"BackoffRate":2}],"Catch":[{"ErrorEquals":["States.TaskFailed"],"ResultPath":"$.ErrorInfo","Next":"FailureCallback"}],"Type":"Task","TimeoutSeconds":120,"ResultPath":"$.GrantResult","Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::lambda:invoke","Parameters":{"FunctionName":"', + { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerGrantFunction.*'), + 'Arn', + ], + }, + '","Payload.$":"$"}},"SuccessCallback":{"End":true,"Type":"Task","Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::events:putEvents","Parameters":{"Entries":[{"Detail":{"TaskToken.$":"$.detail.value.TaskToken","Status":"success"},"DetailType":"callback","EventBusName":"arn:', + { + Ref: 'AWS::Partition', + }, + ':events:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':event-bus/default","Source":"dsf.MskTopicAuthorizer"}]}},"FailureCallback":{"End":true,"Type":"Task","Resource":"arn:', + { + Ref: 'AWS::Partition', + }, + ':states:::events:putEvents","Parameters":{"Entries":[{"Detail":{"TaskToken.$":"$.detail.value.TaskToken","Status":"failure","Error.$":"$.ErrorInfo.Error","Cause.$":"$.ErrorInfo.Cause"},"DetailType":"callback","EventBusName":"arn:', + { + Ref: 'AWS::Partition', + }, + ':events:', + { + Ref: 'AWS::Region', + }, + ':', + { + Ref: 'AWS::AccountId', + }, + ':event-bus/default","Source":"dsf.MskTopicAuthorizer"}]}}},"TimeoutSeconds":120}', + ], + ], + }, + RoleArn: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerStateMachineRole.*'), + 'Arn', + ], + }, + }), + ); + }); + + test('should create an SAS queue as a dead letter queue for events', () => { + template.resourceCountIs('AWS::SQS::Queue', 1); + }); + + test('should create proper IAM policy for the dead letter queue ', () => { + template.hasResourceProperties('AWS::SQS::QueuePolicy', + Match.objectLike({ + PolicyDocument: Match.objectLike({ + Statement: [ + { + Action: 'sqs:*', + Condition: { + Bool: { + 'aws:SecureTransport': 'false', + }, + }, + Effect: 'Deny', + Principal: { + AWS: '*', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }, + Match.objectLike({ + Action: 'sqs:SendMessage', + Condition: { + ArnEquals: { + 'aws:SourceArn': { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerCentralEventRule.*'), + 'Arn', + ], + }, + }, + }, + Effect: 'Allow', + Principal: { + Service: 'events.amazonaws.com', + }, + Resource: { + 'Fn::GetAtt': [ + Match.stringLikeRegexp('MskAuthorizerQueue.*'), + 'Arn', + ], + }, + }), + ], + }), + Queues: [ + { + Ref: Match.stringLikeRegexp('MskAuthorizerQueue.*'), + }, + ], + }), + ); + }); + + +}); + +describe ('Creating a DataZoneMskEnvironmentAuthorizer with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + new DataZoneMskEnvironmentAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create a Step Functions state machine with RETAIN removal policy', () => { + template.hasResource('AWS::StepFunctions::StateMachine', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); + + test('should create an SQS Queue with RETAIN removal policy', () => { + template.hasResource('AWS::SQS::Queue', + Match.objectLike({ + UpdateReplacePolicy: 'Retain', + DeletionPolicy: 'Retain', + }), + ); + }); +}); + +describe ('Creating a DataZoneMskEnvironmentAuthorizer with DELETE removal but without global data removal', () => { + const app = new App(); + const stack = new Stack(app, 'Stack'); + const DOMAIN_ID = 'aba_dc999t9ime9sss'; + + stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true); + + new DataZoneMskEnvironmentAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, + removalPolicy: RemovalPolicy.DESTROY, + }); + + const template = Template.fromStack(stack); + + test('should create a Step Functions state machine with RETAIN removal policy', () => { + template.hasResource('AWS::StepFunctions::StateMachine', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); + + test('should create an SQS Queue with RETAIN removal policy', () => { + template.hasResource('AWS::SQS::Queue', + Match.objectLike({ + UpdateReplacePolicy: 'Delete', + DeletionPolicy: 'Delete', + }), + ); + }); +}); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-datazone-custom-asset-type-factory.test.ts b/framework/test/unit/nag/governance/nag-datazone-custom-asset-type-factory.test.ts new file mode 100644 index 000000000..ceeb07e34 --- /dev/null +++ b/framework/test/unit/nag/governance/nag-datazone-custom-asset-type-factory.test.ts @@ -0,0 +1,56 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** +* Tests DataZoneCustomAssetTypeFactory +* +* @group unit/best-practice/datazone-custom-asset-type-factory +*/ + + +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { DataZoneCustomAssetTypeFactory } from '../../../../src/governance'; + +const app = new App(); +const stack = new Stack(app, 'Stack'); +const DOMAIN_ID = 'aba_dc999t9ime9sss'; + +new DataZoneCustomAssetTypeFactory(stack, 'DataZoneCustomAssetTypeFactory', { + domainId: DOMAIN_ID, +}); + +Aspects.of(stack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a', + 'Stack/DataZoneCustomAssetTypeFactory/Provider', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, + { id: 'AwsSolutions-IAM5', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, + { id: 'AwsSolutions-L1', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, +], +true); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/DataZoneCustomAssetTypeFactory/HandlerRole/Resource', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Recommended baseline policy for AWS Lambda Functions' }, +]); + + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-datazone-gsr-msk-datasource.test.ts b/framework/test/unit/nag/governance/nag-datazone-gsr-msk-datasource.test.ts new file mode 100644 index 000000000..169391184 --- /dev/null +++ b/framework/test/unit/nag/governance/nag-datazone-gsr-msk-datasource.test.ts @@ -0,0 +1,53 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** +* Tests DataZoneGsrMskDataSource +* +* @group unit/best-practice/datazone-gsr-msk-data-source +*/ + + +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { DataZoneGsrMskDataSource } from '../../../../src/governance'; + +const app = new App(); +const stack = new Stack(app, 'Stack'); +const DOMAIN_ID = 'aba_dc999t9ime9sss'; +const REGISTRY_NAME = 'schema-registry'; +const CLUSTER_NAME = 'msk-cluster'; +const PROJECT_ID = '999a99aa9aaaaa'; + +new DataZoneGsrMskDataSource(stack, 'DataZoneGsrMskDataSource', { + domainId: DOMAIN_ID, + projectId: PROJECT_ID, + registryName: REGISTRY_NAME, + clusterName: CLUSTER_NAME, +}); + +Aspects.of(stack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/DataZoneGsrMskDataSource/HandlerRole/Resource', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Recommended baseline policy for AWS Lambda Functions' }, + { id: 'AwsSolutions-IAM5', reason: 'Schemas and cluster ARNs are unknow and discovered during execution' }, + { id: 'AwsSolutions-IAM5', reason: 'SSM parameter ID is based on the schema name and discovered during execution' }, +]); + + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-datazone-msk-asset-type.test.ts b/framework/test/unit/nag/governance/nag-datazone-msk-asset-type.test.ts new file mode 100644 index 000000000..584acb70b --- /dev/null +++ b/framework/test/unit/nag/governance/nag-datazone-msk-asset-type.test.ts @@ -0,0 +1,56 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** +* Tests DataZoneMskAssetType +* +* @group unit/best-practice/datazone-msk-asset-type +*/ + + +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { DataZoneMskAssetType } from '../../../../src/governance'; + +const app = new App(); +const stack = new Stack(app, 'Stack'); +const DOMAIN_ID = 'aba_dc999t9ime9sss'; + +new DataZoneMskAssetType(stack, 'MskAssetType', { + domainId: DOMAIN_ID, +}); + +Aspects.of(stack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/LogRetentionaae0aa3c5b4d4f87b02d85b201efdd8a', + 'Stack/MskAssetType/DZCustomAssetTypeHandler/Provider', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, + { id: 'AwsSolutions-IAM5', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, + { id: 'AwsSolutions-L1', reason: 'Inherited from DsfProvider construct, not in the scope of this test' }, +], +true); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAssetType/DZCustomAssetTypeHandler/HandlerRole/Resource', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Inherited from the DataZoneCustomAssetTypeFactory construct, not in the scope of this test' }, +]); + + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-datazone-msk-central-authorizer.test.ts b/framework/test/unit/nag/governance/nag-datazone-msk-central-authorizer.test.ts new file mode 100644 index 000000000..303a81701 --- /dev/null +++ b/framework/test/unit/nag/governance/nag-datazone-msk-central-authorizer.test.ts @@ -0,0 +1,68 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** +* Tests DataZoneMskCentralAuthorizer +* +* @group unit/best-practice/datazone-msk-central-authorizer +*/ + + +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { DataZoneMskCentralAuthorizer } from '../../../../src/governance'; + +const app = new App(); +const stack = new Stack(app, 'Stack'); +const DOMAIN_ID = 'aba_dc999t9ime9sss'; + +new DataZoneMskCentralAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, +}); + +Aspects.of(stack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/MetadataCollectorHandlerRole/Resource', + 'Stack/MskAuthorizer/CallbackHandlerRole/Resource', + 'Stack/MskAuthorizer/LambdaCallbackRole/Resource', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Recommended baseline policy for AWS Lambda Functions' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/StateMachine/Role/DefaultPolicy/Resource', +], +[ + { id: 'AwsSolutions-IAM5', reason: 'Wildcard created automatically by CDK for the Step Functions role to trigger the Lambda Functions versions' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/Queue/Resource', +], +[ + { id: 'AwsSolutions-SQS3', reason: 'The SQS queue is used as a Dead Letter Queue' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/StateMachine/Resource', +], +[ + { id: 'AwsSolutions-SF1', reason: 'The state machine doesn\'t log ALL events to optimize costs and because Lambda Functions already log the business logic' }, + { id: 'AwsSolutions-SF2', reason: 'X-ray not required in the state machine, logging and tracing happen in the Lambda Functions' }, +]); + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); \ No newline at end of file diff --git a/framework/test/unit/nag/governance/nag-datazone-msk-environment-authorizer.test.ts b/framework/test/unit/nag/governance/nag-datazone-msk-environment-authorizer.test.ts new file mode 100644 index 000000000..7968f9f24 --- /dev/null +++ b/framework/test/unit/nag/governance/nag-datazone-msk-environment-authorizer.test.ts @@ -0,0 +1,67 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + + +/** +* Tests DataZoneMskEnvironmentAuthorizer +* +* @group unit/best-practice/datazone-msk-environment-authorizer +*/ + + +import { App, Aspects, Stack } from 'aws-cdk-lib'; +import { Annotations, Match } from 'aws-cdk-lib/assertions'; +import { AwsSolutionsChecks, NagSuppressions } from 'cdk-nag'; +import { DataZoneMskEnvironmentAuthorizer } from '../../../../src/governance'; + +const app = new App(); +const stack = new Stack(app, 'Stack'); +const DOMAIN_ID = 'aba_dc999t9ime9sss'; + +new DataZoneMskEnvironmentAuthorizer(stack, 'MskAuthorizer', { + domainId: DOMAIN_ID, +}); + +Aspects.of(stack).add(new AwsSolutionsChecks()); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/GrantRole/Resource', +], +[ + { id: 'AwsSolutions-IAM4', reason: 'Recommended baseline policy for AWS Lambda Functions' }, + { id: 'AwsSolutions-IAM5', reason: 'Wildcard required because the MSK clusters and the IAM Roles are not known' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/Queue/Resource', +], +[ + { id: 'AwsSolutions-SQS3', reason: 'The SQS queue is used as a Dead Letter Queue' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/StateMachine/Resource', +], +[ + { id: 'AwsSolutions-SF1', reason: 'The state machine doesn\'t log ALL events to optimize costs and because Lambda Functions already log the business logic' }, + { id: 'AwsSolutions-SF2', reason: 'X-ray not required in the state machine, logging and tracing happen in the Lambda Functions' }, +]); + +NagSuppressions.addResourceSuppressionsByPath(stack, [ + 'Stack/MskAuthorizer/StateMachine/Role/DefaultPolicy/Resource', +], +[ + { id: 'AwsSolutions-IAM5', reason: 'Wildcard created automatically by CDK for the Step Functions role to trigger the Lambda Functions versions' }, +]); + +test('No unsuppressed Warnings', () => { + const warnings = Annotations.fromStack(stack).findWarning('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(warnings); + expect(warnings).toHaveLength(0); +}); + +test('No unsuppressed Errors', () => { + const errors = Annotations.fromStack(stack).findError('*', Match.stringLikeRegexp('AwsSolutions-.*')); + console.log(errors); + expect(errors).toHaveLength(0); +}); \ No newline at end of file diff --git a/framework/yarn.lock b/framework/yarn.lock index fce9ff36f..e8fdc31fd 100644 --- a/framework/yarn.lock +++ b/framework/yarn.lock @@ -3119,11 +3119,16 @@ iconv-lite@^0.6.2: dependencies: safer-buffer ">= 2.1.2 < 3.0.0" -ignore@^5.2.0, ignore@^5.2.4, ignore@^5.3.1: +ignore@^5.2.0, ignore@^5.3.1: version "5.3.1" resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.1.tgz#5073e554cd42c5b33b394375f538b8593e34d4ef" integrity sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw== +ignore@^5.2.4: + version "5.3.2" + resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.2.tgz#3cd40e729f3643fd87cb04e50bf0eb722bc596f5" + integrity sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g== + import-fresh@^3.2.1: version "3.3.0" resolved "https://registry.yarnpkg.com/import-fresh/-/import-fresh-3.3.0.tgz#37162c25fcb9ebaa2e6e53d5b4d88ce17d9e0c2b" @@ -4815,10 +4820,10 @@ process-nextick-args@~2.0.0: resolved "https://registry.yarnpkg.com/process-nextick-args/-/process-nextick-args-2.0.1.tgz#7820d9b16120cc55ca9ae7792680ae7dba6d7fe2" integrity sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag== -projen@^0.83.1: - version "0.83.1" - resolved "https://registry.yarnpkg.com/projen/-/projen-0.83.1.tgz#0bc644b1f95be898a565526c4eb649c225101163" - integrity sha512-agobaTIvfOxHllRa5pNHxFTRjH7Zxq2uus7FWQ0f+6OTLkH/hODtnV3LEA5pxVk5e0cYAD+ZZuS/mKKicAtkng== +projen@^0.87.2: + version "0.87.2" + resolved "https://registry.yarnpkg.com/projen/-/projen-0.87.2.tgz#0e91c139233fc8d36101e193ddcce75a21930226" + integrity sha512-O9qglXmlfuWd58xl1iTRvwIRzb5kNU/DS1kTYYIdQQlwcmlMG6+q0HbdhkkSnkwdqorqflQK1VpVeKF1vC//zg== dependencies: "@iarna/toml" "^2.2.5" case "^1.6.3" @@ -4829,7 +4834,7 @@ projen@^0.83.1: fast-json-patch "^3.1.1" glob "^8" ini "^2.0.0" - semver "^7.6.2" + semver "^7.6.3" shx "^0.3.4" xmlbuilder2 "^3.1.1" yaml "^2.2.2" @@ -5377,16 +5382,7 @@ string-length@^4.0.1: char-regex "^1.0.2" strip-ansi "^6.0.0" -"string-width-cjs@npm:string-width@^4.2.0": - version "4.2.3" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" - integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.1" - -"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -5459,14 +5455,7 @@ stringify-package@^1.0.1: resolved "https://registry.yarnpkg.com/stringify-package/-/stringify-package-1.0.1.tgz#e5aa3643e7f74d0f28628b72f3dad5cecfc3ba85" integrity sha512-sa4DUQsYciMP1xhKWGuFM04fB0LG/9DlluZoSVywUMRNvzid6XucHK0/90xGxRoHrAaROrcHK1aPKaijCtSrhg== -"strip-ansi-cjs@npm:strip-ansi@^6.0.1": - version "6.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" - integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== - dependencies: - ansi-regex "^5.0.1" - -strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -5943,16 +5932,7 @@ workerpool@^6.5.1: resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.5.1.tgz#060f73b39d0caf97c6db64da004cd01b4c099544" integrity sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - -wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -6029,9 +6009,9 @@ yaml@1.10.2: integrity sha512-r3vXyErRCYJ7wg28yvBY5VSoAF8ZvlcW9/BwUzEtUsjvX/DKs24dIkuwjtuprwJJHsbyUbLApepYTR1BN4uHrg== yaml@^2.2.2: - version "2.4.5" - resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.4.5.tgz#60630b206dd6d84df97003d33fc1ddf6296cca5e" - integrity sha512-aBx2bnqDzVOyNKfsysjA2ms5ZlnjSAW2eG3/L5G/CSujfjLJTJsEw1bGw8kCf04KodQWk1pxlGnZ56CRxiawmg== + version "2.5.1" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.5.1.tgz#c9772aacf62cb7494a95b0c4f1fb065b563db130" + integrity sha512-bLQOjaX/ADgQ20isPJRvF0iRUHIxVhYvr53Of7wGcWlO2jvtUlH5m87DsmulFVxRpNLOnI4tB6p/oh8D7kpn9Q== yargs-parser@^20.2.2, yargs-parser@^20.2.3: version "20.2.9" diff --git a/website/docs/constructs/library/04-Governance/01-data-catalog-database.mdx b/website/docs/constructs/library/04-Governance/01-data-catalog-database.mdx index 440287ee3..bb86516db 100644 --- a/website/docs/constructs/library/04-Governance/01-data-catalog-database.mdx +++ b/website/docs/constructs/library/04-Governance/01-data-catalog-database.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 4 +sidebar_position: 1 sidebar_label: Data catalog database --- diff --git a/website/docs/constructs/library/04-Governance/02-data-lake-catalog.mdx b/website/docs/constructs/library/04-Governance/02-data-lake-catalog.mdx index 5118b76cb..d51a0e2b6 100644 --- a/website/docs/constructs/library/04-Governance/02-data-lake-catalog.mdx +++ b/website/docs/constructs/library/04-Governance/02-data-lake-catalog.mdx @@ -1,5 +1,5 @@ --- -sidebar_position: 4 +sidebar_position: 2 sidebar_label: Data lake catalog --- import GeneratedCode from '../generated/_governance-data-lake-catalog.mdx' diff --git a/website/docs/constructs/library/04-Governance/03-datazone-msk-asset-type.mdx b/website/docs/constructs/library/04-Governance/03-datazone-msk-asset-type.mdx new file mode 100644 index 000000000..47b2b1a41 --- /dev/null +++ b/website/docs/constructs/library/04-Governance/03-datazone-msk-asset-type.mdx @@ -0,0 +1,8 @@ +--- +sidebar_position: 3 +sidebar_label: DataZone MSK Asset Type +--- +import GeneratedCode from '../generated/_governance-datazone-msk-asset-type.mdx' + +# DataZoneMskAssetType + diff --git a/website/docs/constructs/library/04-Governance/04-datazone-msk-authorizer.mdx b/website/docs/constructs/library/04-Governance/04-datazone-msk-authorizer.mdx new file mode 100644 index 000000000..7205839bf --- /dev/null +++ b/website/docs/constructs/library/04-Governance/04-datazone-msk-authorizer.mdx @@ -0,0 +1,8 @@ +--- +sidebar_position: 4 +sidebar_label: DataZone MSK authorizer +--- +import GeneratedCode from '../generated/_governance-datazone-msk-authorizer.mdx' + +# DataZoneMskAuthorizer + diff --git a/website/docs/constructs/library/04-Governance/05-datazone-msk-data-source.mdx b/website/docs/constructs/library/04-Governance/05-datazone-msk-data-source.mdx new file mode 100644 index 000000000..6528eba28 --- /dev/null +++ b/website/docs/constructs/library/04-Governance/05-datazone-msk-data-source.mdx @@ -0,0 +1,8 @@ +--- +sidebar_position: 5 +sidebar_label: DataZone MSK Glue Schema Registry data source +--- +import GeneratedCode from '../generated/_governance-datazone-gsr-msk-datasource.mdx' + +# DataZoneGsrMskDataSource + diff --git a/website/docs/constructs/library/generated/_governance-datazone-gsr-msk-datasource.mdx b/website/docs/constructs/library/generated/_governance-datazone-gsr-msk-datasource.mdx new file mode 100644 index 000000000..dba2616e7 --- /dev/null +++ b/website/docs/constructs/library/generated/_governance-datazone-gsr-msk-datasource.mdx @@ -0,0 +1,109 @@ + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +DataZone Data Source for MSK Topics assets backed by Glue Schema Registry. + +## Overview + +`DataZoneGsrMskDataSource` is custom data source for DataZone that can create/update/delete MSK topics assets in DataZone based on a Glue Schema Registry definition. The custom data source can be triggered by a schedule or based on events from the Glue Schema Registry. The constructs implement: +- EventBridge Rules triggered either on a schedule or event based. +- A Lambda Function triggered from the EventBridge Rules and responsible for collecting metadata from The Glue Schema Registry and updating MSK Topic assets in DataZone. +- SSM Parameter Store Parameters to store required metadata + +## Usage + + + + + ```typescript +new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster' +}); + ``` + + + + + ```python +dsf.governance.DataZoneGsrMskDataSource(self, "DataZoneGsrMskDataSource", + domain_id="aba_dc999t9ime9sss", + registry_name="schema-registry", + project_id="999a99aa9aaaaa", + cluster_name="msk-cluster" +) + ``` + + + + +## Data Source trigger modes + +The custom data source process can be triggered in two different ways. By default, if no schedule and events are not enabled, the construct creates a schedule every one hour. +- Based on a Schedule + + + + + ```typescript +new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster', + runSchedule: events.Schedule.expression('cron(0 * * * * *)'), +}); + ``` + + + + + ```python +dsf.governance.DataZoneGsrMskDataSource(self, "DataZoneGsrMskDataSource", + domain_id="aba_dc999t9ime9sss", + registry_name="schema-registry", + project_id="999a99aa9aaaaa", + cluster_name="msk-cluster", + run_schedule=events.Schedule.expression("cron(0 * * * * *)") +) + ``` + + + + +- Based on events received from the Glue Schema Registry + + + + + ```typescript +new dsf.governance.DataZoneGsrMskDataSource(this, 'DataZoneGsrMskDataSource', { + domainId: 'aba_dc999t9ime9sss', + registryName: 'schema-registry', + projectId: '999a99aa9aaaaa', + clusterName: 'msk-cluster', + enableSchemaRegistryEvent: true, +}); + ``` + + + + + ```python +dsf.governance.DataZoneGsrMskDataSource(self, "DataZoneGsrMskDataSource", + domain_id="aba_dc999t9ime9sss", + registry_name="schema-registry", + project_id="999a99aa9aaaaa", + cluster_name="msk-cluster", + enable_schema_registry_event=True +) + ``` + + + + diff --git a/website/docs/constructs/library/generated/_governance-datazone-msk-asset-type.mdx b/website/docs/constructs/library/generated/_governance-datazone-msk-asset-type.mdx new file mode 100644 index 000000000..b08ff6c1e --- /dev/null +++ b/website/docs/constructs/library/generated/_governance-datazone-msk-asset-type.mdx @@ -0,0 +1,105 @@ + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +DataZone custom asset type for MSK topics. + +## Overview + +`DataZoneMskAssetType` is a custom asset type implementation for Kafka topics hosted in MSK clusters. MSK clusters can be provisioned or serverless. Topics can be linked to a Glue Schema Registry. +The construct is a CDK custom resource that creates the corresponding DataZone Form Types and Asset Type required to store metadata related to MSK Topics. It includes: +- A MSK Source Reference Form Type containing metadata about the MSK Cluster including the cluster ARN and type. +- A Kafka Schema For Type containing metadata about the topic including the topic name, schema version, Glue Schema Registry ARN and Glue Schema ARN. + +![DataZone MSK asset type](../../../../static/img/datazone-msk-asset-type.png) + +## Usage + + + + + ```typescript +new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskAssetType(self, "DataZoneMskAssetType", + domain_id="aba_dc999t9ime9sss" +) + ``` + + + + +## Reusing an existing owner project + +The `DataZoneMskAssetType` requires a DataZone project to own the custom asset type. By default, it will create a `MskGovernance` project within the domain but you pass an existing project. +The construct will make the IAM custom resource Role a member of the projectto be able to create the asset type and the form types. + + + + + ```typescript +new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', + projectId: 'xxxxxxxxxxx' +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskAssetType(self, "DataZoneMskAssetType", + domain_id="aba_dc999t9ime9sss", + project_id="xxxxxxxxxxx" +) + ``` + + + + +## Reusing a Custom Asset Type Factory + +By default, the `DataZoneMskAssetType` creates its own factory resources required to connect to DataZone and create the custom asset type. But it's possible to reuse a Factory across multiple Custom Asset Types to limit the number of custom resource providers and DataZone project membership: + + + + + ```typescript + const dataZoneAssetFactory = new dsf.governance.DataZoneCustomAssetTypeFactory(this, 'DataZoneCustomAssetTypeFactory', { + domainId: 'aba_dc999t9ime9sss', + }); + + new dsf.governance.DataZoneMskAssetType(this, 'DataZoneMskAssetType', { + domainId: 'aba_dc999t9ime9sss', + projectId: 'xxxxxxxxxxx', + dzCustomAssetTypeFactory: dataZoneAssetFactory + }); + ``` + + + + + ```python +data_zone_asset_factory = dsf.governance.DataZoneCustomAssetTypeFactory(self, "DataZoneCustomAssetTypeFactory", + domain_id="aba_dc999t9ime9sss" +) + +dsf.governance.DataZoneMskAssetType(self, "DataZoneMskAssetType", + domain_id="aba_dc999t9ime9sss", + project_id="xxxxxxxxxxx", + dz_custom_asset_type_factory=data_zone_asset_factory +) + ``` + + + + diff --git a/website/docs/constructs/library/generated/_governance-datazone-msk-authorizer.mdx b/website/docs/constructs/library/generated/_governance-datazone-msk-authorizer.mdx new file mode 100644 index 000000000..eae550144 --- /dev/null +++ b/website/docs/constructs/library/generated/_governance-datazone-msk-authorizer.mdx @@ -0,0 +1,185 @@ + + + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Custom DataZone MSK authorizer for granting access to MSK topics via DataZone asset subscription workflow. + +## Overview + +The DataZone MSK Authorizer is a custom process integrated with DataZone that implements the [Subscription Grant](https://docs.aws.amazon.com/datazone/latest/userguide/grant-access-to-unmanaged-asset.html) concept for Kafka topics hosted on Amazon MSK (provisioned and Serverless), +secured by IAM policies, and registered in DataZone using the `DataZoneMskAssetType`. +It supports: +- cross account access with MSK Provisioned clusters. +- MSK managed VPC connectivity permissions with MSK Provisioned clusters +- Glue Schema Registry permissions when sharing in the same account + +The authorizer is composed of 2 constructs: +- the `DataZoneMskCentralAuthorizer` is responsible for collecting metadata on the Subscription Grant, orchestrating the workflow and acknowledging the Subscription Grant creation. This construct must be deployed in the AWS root account of the DataZone Domain. +- the `DataZoneMskEnvironmentAuthorizer` is responsible for managing the permissions on the producer and consumer side. This construct must be deployed once per account associated with the DataZone Domain. + +The cross-account synchronization is exclusively done via EventBridge bus to restrict cross account permissions to the minimum. + +![DataZoneMskAuthorizer](../../../../static/img/datazone-msk-authorizer.png) + +## DataZoneMskCentralAuthorizer + +The `DataZoneMskCentralAuthorizer` is the central component that receives all the Subscription Grant Requests from DataZone for the `MskTopicAssetType` and orchestrate the end-to-end workflow. +The workflow is a Step Functions State Machine that is triggered by [events emmitted by DataZone](https://docs.aws.amazon.com/datazone/latest/userguide/working-with-events-and-notifications.html) and contains the following steps: +1. Metadata collection: a Lambda Function collect additional information from DataZone on the producer, the subscriber and update the status of the Subscription Grant to `IN_PROGESS`. +2. Producer grant trigger: an event is sent to the producer account to request the creation of the grant on the producer MSK cluster (implemented in the `DataZoneMskEnvironmentAuthorizer`). This step is an asynchronous state using a callback mechanism from the `DataZoneMskEnvironmentAuthorizer`. +3. Consumer grant trigger: an event is sent to the consumer account to request the creation of the grant on the IAM consumer Role (implemented in the `DataZoneMskEnvironmentAuthorizer`). This step is an asynchronous state using a callback mechanism from the `DataZoneMskEnvironmentAuthorizer`. +4. DataZone Subscription Grant callback: a Lambda Function updates the status of the Subscription Grant in DataZone to `GRANTED` or `REVOKE` based on the initial request. + +If any failure happens during the process, the Step Functions catch the exceptions and updates the status of the Subscription Grant to `GRANT_FAILED` or `REVOKE_FAILED`. + +:::info Permission grant failure +If the grant fails for the consumer, the grant already done for the producer is not reverted but the user is notified within DataZone because the failure is propagated. +The authorizer process is idempotent so it's safe to replay the workflow and all the permissions will be deduplicated. If it's not replayed, the producer grant needs to be manually cleaned up. +::: + +### Usage + + + + + ```typescript +new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskCentralAuthorizer(self, "MskAuthorizer", + domain_id="aba_dc999t9ime9sss" +) + ``` + + + + +### Register producer and consumer accounts + +The `DataZoneMskCentralAuthorizer` construct work in collaboration with the `DataZoneMskEnvironmentAuthorizer` construct which is deployed into the producers and consumers accounts. +To enable the integration, register accounts using the `registerAccount()` method on the `DataZoneMskCentralAuthorizer` object. +It will grant the required permissions so the central account and the environment accounts can communicate via EventBridge events. + + + + + ```typescript + const centralAuthorizer = new dsf.governance.DataZoneMskCentralAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + }); + + // Add an account that is associated with the DataZone Domain + centralAuthorizer.registerAccount('123456789012'); + ``` + + + + + ```python +central_authorizer = dsf.governance.DataZoneMskCentralAuthorizer(self, "MskAuthorizer", + domain_id="aba_dc999t9ime9sss" +) + +# Add an account that is associated with the DataZone Domain +central_authorizer.register_account("123456789012") + ``` + + + + +## DataZoneMskEnvironmentAuthorizer + +The `DataZoneMskEnvironmentAuthorizer` is responsible from managing the permissions required to grant access on MSK Topics (and associated Glue Schema Registry) via IAM policies. +The workflow is a Step Functions State Machine that is triggered by events emitted by the `DataZoneMskCentralAuthorizer` and contains the following steps: +1. Grant the producer or consumer based on the request. If the event is a cross-account producer grant, a Lambda function adds an IAM policy statement to the MSK Cluster policy granting read access to the IAM consumer Role. Optionally, it can also grant the use of MSK Managed VPC. +2. Callback the `DataZoneMskCentralAuthorizer`: an EventBridge event is sent on the central EventBridge Bus to continue the workflow on the central account using the callback mechanism of Step Functions. + +### Usage + + + + + ```typescript +new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskEnvironmentAuthorizer(self, "MskAuthorizer", + domain_id="aba_dc999t9ime9sss" +) + ``` + + + + +### Cross account workflow + +If the `DataZoneMskEnvironmentAuthorizer` is deployed in a different account than the DataZone root account where the `DataZoneMskCentralAuthorizer` is deployed, you need to configure the central account ID to authorize cross-account communication: + + + + + ```typescript +new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + centralAccountId: '123456789012' +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskEnvironmentAuthorizer(self, "MskAuthorizer", + domain_id="aba_dc999t9ime9sss", + central_account_id="123456789012" +) + ``` + + + + +### Granting MSK Managed VPC connectivity + +For easier cross-account Kafka consumption, MSK Provisioned clusters can use the [multi-VPC private connectivity](https://docs.aws.amazon.com/msk/latest/developerguide/aws-access-mult-vpc.html) feature which is a managed solution that simplifies the networking infrastructure for multi-VPC and cross-account connectivity. + +By default, the multi-VPC private connectivity permissions are not configured. You can enable it using the construct properties: + + + + + ```typescript +new dsf.governance.DataZoneMskEnvironmentAuthorizer(this, 'MskAuthorizer', { + domainId: 'aba_dc999t9ime9sss', + centralAccountId: '123456789012', + grantMskManagedVpc: true, +}); + ``` + + + + + ```python +dsf.governance.DataZoneMskEnvironmentAuthorizer(self, "MskAuthorizer", + domain_id="aba_dc999t9ime9sss", + central_account_id="123456789012", + grant_msk_managed_vpc=True +) + ``` + + + + diff --git a/website/static/img/adsf-diagrams.drawio b/website/static/img/adsf-diagrams.drawio index 35f0dc1fc..0b3e9dc6d 100644 --- a/website/static/img/adsf-diagrams.drawio +++ b/website/static/img/adsf-diagrams.drawio @@ -1,4 +1,4 @@ - + @@ -105,7 +105,7 @@ - + @@ -239,7 +239,7 @@ - + @@ -375,7 +375,7 @@ - + @@ -759,7 +759,7 @@ - + @@ -882,82 +882,82 @@ - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -1020,4 +1020,475 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/website/static/img/datazone-msk-asset-type.png b/website/static/img/datazone-msk-asset-type.png new file mode 100644 index 000000000..277fdf0dd Binary files /dev/null and b/website/static/img/datazone-msk-asset-type.png differ diff --git a/website/static/img/datazone-msk-authorizer.png b/website/static/img/datazone-msk-authorizer.png new file mode 100644 index 000000000..fb5600002 Binary files /dev/null and b/website/static/img/datazone-msk-authorizer.png differ diff --git a/website/static/img/datazone-msk-data-source.png b/website/static/img/datazone-msk-data-source.png new file mode 100644 index 000000000..79af6a6c2 Binary files /dev/null and b/website/static/img/datazone-msk-data-source.png differ diff --git a/website/static/img/datazone-msk-governance-example.png b/website/static/img/datazone-msk-governance-example.png new file mode 100644 index 000000000..0c12f34eb Binary files /dev/null and b/website/static/img/datazone-msk-governance-example.png differ