Skip to content

Commit

Permalink
Fix setup dependencies for ADX deployment (#1151)
Browse files Browse the repository at this point in the history
  • Loading branch information
MSBrett authored Nov 23, 2024
1 parent 71c8724 commit 018b059
Show file tree
Hide file tree
Showing 9 changed files with 215 additions and 69 deletions.
9 changes: 9 additions & 0 deletions src/templates/finops-hub/createUiDefinition.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
"Microsoft.KeyVault/vaults",
"Microsoft.Kusto/clusters",
"Microsoft.ManagedIdentity/userAssignedIdentities",
"Microsoft.Network/privateDnsZones",
"Microsoft.Network/privateDnsZones/virtualNetworkLinks",
"Microsoft.Network/privateEndpoints",
"Microsoft.Resources/deploymentScripts",
"Microsoft.Storage/storageAccounts"
]
Expand All @@ -24,6 +27,9 @@
"Microsoft.KeyVault/vaults",
"Microsoft.Kusto/clusters",
"Microsoft.ManagedIdentity/userAssignedIdentities",
"Microsoft.Network/privateDnsZones",
"Microsoft.Network/privateDnsZones/virtualNetworkLinks",
"Microsoft.Network/privateEndpoints",
"Microsoft.Resources/deploymentScripts",
"Microsoft.Storage/storageAccounts"
],
Expand Down Expand Up @@ -572,6 +578,9 @@
"Microsoft.KeyVault/vaults",
"Microsoft.Kusto/clusters",
"Microsoft.ManagedIdentity/userAssignedIdentities",
"Microsoft.Network/privateDnsZones",
"Microsoft.Network/privateDnsZones/virtualNetworkLinks",
"Microsoft.Network/privateEndpoints",
"Microsoft.Resources/deploymentScripts",
"Microsoft.Storage/storageAccounts"
]
Expand Down
131 changes: 98 additions & 33 deletions src/templates/finops-hub/modules/dataExplorer.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,8 @@ param dataFactoryName string
@description('Optional. Number of days of data to retain in the Data Explorer *_raw tables. Default: 0.')
param rawRetentionInDays int = 0

// @description('Required. Name of the storage account to use for data ingestion.')
// param storageAccountName string

// @description('Required. Name of storage container to monitor for data ingestion.')
// param storageContainerName string
@description('Required. Name of the storage account to use for data ingestion.')
param storageAccountName string

@description('Required. Resource ID of the virtual network for private endpoints.')
param virtualNetworkId string
Expand All @@ -132,6 +129,74 @@ var ftkVersion = contains(ftkver, '-') ? split(ftkver, '-')[0] : ftkver
var ftkBranch = contains(ftkver, '-') ? split(ftkver, '-')[1] : ''
var dataExplorerPrivateDnsZoneName = replace('privatelink.${location}.${replace(environment().suffixes.storage, 'core', 'kusto')}', '..', '.')

// Actual = Minimum(ClusterMaximumConcurrentOperations, Number of nodes in cluster * Maximum(1, Core count per node * CoreUtilizationCoefficient))
var ingestionCapacity = {
'Dev(No SLA)_Standard_E2a_v4': 1
'Dev(No SLA)_Standard_D11_v2': 1
Standard_D11_v2: 2
Standard_D12_v2: 4
Standard_D13_v2: 8
Standard_D14_v2: 16
Standard_D16d_v5: 16
Standard_D32d_v4: 32
Standard_D32d_v5: 32
'Standard_DS13_v2+1TB_PS': 8
'Standard_DS13_v2+2TB_PS': 8
'Standard_DS14_v2+3TB_PS': 16
'Standard_DS14_v2+4TB_PS': 16
Standard_E2a_v4: 2
Standard_E2ads_v5: 2
Standard_E2d_v4: 2
Standard_E2d_v5: 2
Standard_E4a_v4: 4
Standard_E4ads_v5: 4
Standard_E4d_v4: 4
Standard_E4d_v5: 4
Standard_E8a_v4: 8
Standard_E8ads_v5: 8
'Standard_E8as_v4+1TB_PS': 8
'Standard_E8as_v4+2TB_PS': 8
'Standard_E8as_v5+1TB_PS': 8
'Standard_E8as_v5+2TB_PS': 8
Standard_E8d_v4: 8
Standard_E8d_v5: 8
'Standard_E8s_v4+1TB_PS': 8
'Standard_E8s_v4+2TB_PS': 8
'Standard_E8s_v5+1TB_PS': 8
'Standard_E8s_v5+2TB_PS': 8
Standard_E16a_v4: 16
Standard_E16ads_v5: 16
'Standard_E16as_v4+3TB_PS': 16
'Standard_E16as_v4+4TB_PS': 16
'Standard_E16as_v5+3TB_PS': 16
'Standard_E16as_v5+4TB_PS': 16
Standard_E16d_v4: 16
Standard_E16d_v5: 16
'Standard_E16s_v4+3TB_PS': 16
'Standard_E16s_v4+4TB_PS': 16
'Standard_E16s_v5+3TB_PS': 16
'Standard_E16s_v5+4TB_PS': 16
Standard_E64i_v3: 64
Standard_E80ids_v4: 80
Standard_EC8ads_v5: 8
'Standard_EC8as_v5+1TB_PS': 8
'Standard_EC8as_v5+2TB_PS': 8
Standard_EC16ads_v5: 16
'Standard_EC16as_v5+3TB_PS': 16
'Standard_EC16as_v5+4TB_PS': 16
Standard_L4s: 4
Standard_L8as_v3: 8
Standard_L8s: 8
Standard_L8s_v2: 8
Standard_L8s_v3: 8
Standard_L16as_v3: 16
Standard_L16s: 16
Standard_L16s_v2: 16
Standard_L16s_v3: 16
Standard_L32as_v3: 32
Standard_L32s_v3: 32
}

//==============================================================================
// Resources
//==============================================================================
Expand All @@ -157,17 +222,9 @@ resource tablePrivateDnsZone 'Microsoft.Network/privateDnsZones@2024-06-01' exis
name: 'privatelink.table.${environment().suffixes.storage}'
}

// resource storage 'Microsoft.Storage/storageAccounts@2022-09-01' existing = {
// name: storageAccountName

// resource blobServices 'blobServices' = {
// name: 'default'

// resource landingContainer 'containers' = {
// name: storageContainerName
// }
// }
// }
resource storage 'Microsoft.Storage/storageAccounts@2022-09-01' existing = {
name: storageAccountName
}

//------------------------------------------------------------------------------
// Cluster + databases
Expand Down Expand Up @@ -238,7 +295,7 @@ resource cluster 'Microsoft.Kusto/clusters@2023-08-15' = {
location: location
kind: 'ReadWrite'
dependsOn: [
ingestionDb
ingestionDb::setupScript
]

resource commonScript 'scripts' = {
Expand All @@ -253,7 +310,6 @@ resource cluster 'Microsoft.Kusto/clusters@2023-08-15' = {
resource setupScript 'scripts' = {
name: 'SetupScript'
dependsOn: [
ingestionDb::setupScript
hubDb::commonScript
]
properties: {
Expand All @@ -267,26 +323,27 @@ resource cluster 'Microsoft.Kusto/clusters@2023-08-15' = {
}
}

// // Authorize Kusto Cluster to read storage
// resource clusterStorageAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
// name: guid(cluster.name, storageContainerName, 'Storage Blob Data Contributor')
// scope: storage::blobServices
// properties: {
// description: 'Give "Storage Blob Data Contributor" to the cluster'
// principalId: cluster.identity.principalId
// // Required in case principal not ready when deploying the assignment
// principalType: 'ServicePrincipal'
// roleDefinitionId: subscriptionResourceId(
// 'Microsoft.Authorization/roleDefinitions',
// 'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor -- https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#storage
// )
// }
// }
// Authorize Kusto Cluster to read storage
resource clusterStorageAccess 'Microsoft.Authorization/roleAssignments@2022-04-01' = {
name: guid(cluster.name, subscription().id, 'Storage Blob Data Contributor')
scope: storage
properties: {
description: 'Give "Storage Blob Data Contributor" to the cluster'
principalId: cluster.identity.principalId
// Required in case principal not ready when deploying the assignment
principalType: 'ServicePrincipal'
roleDefinitionId: subscriptionResourceId(
'Microsoft.Authorization/roleDefinitions',
'ba92f5b4-2d11-453d-a403-e96b0029c9fe' // Storage Blob Data Contributor -- https://learn.microsoft.com/azure/role-based-access-control/built-in-roles#storage
)
}
}

// DNS zone
resource dataExplorerPrivateDnsZone 'Microsoft.Network/privateDnsZones@2024-06-01' = {
name: dataExplorerPrivateDnsZoneName
location: 'global'
tags: union(tags, contains(tagsByResource, 'Microsoft.Network/privateDnsZones') ? tagsByResource['Microsoft.Network/privateDnsZones'] : {})
properties: {}
}

Expand All @@ -295,6 +352,7 @@ resource dataExplorerPrivateDnsZoneLink 'Microsoft.Network/privateDnsZones/virtu
name: '${replace(dataExplorerPrivateDnsZone.name, '.', '-')}-link'
location: 'global'
parent: dataExplorerPrivateDnsZone
tags: union(tags, contains(tagsByResource, 'Microsoft.Network/privateDnsZones/virtualNetworkLinks') ? tagsByResource['Microsoft.Network/privateDnsZones/virtualNetworkLinks'] : {})
properties: {
virtualNetwork: {
id: virtualNetworkId
Expand All @@ -307,6 +365,7 @@ resource dataExplorerPrivateDnsZoneLink 'Microsoft.Network/privateDnsZones/virtu
resource dataExplorerEndpoint 'Microsoft.Network/privateEndpoints@2023-11-01' = {
name: '${cluster.name}-ep'
location: location
tags: union(tags, contains(tagsByResource, 'Microsoft.Network/privateEndpoints') ? tagsByResource['Microsoft.Network/privateEndpoints'] : {})
properties: {
subnet: {
id: privateEndpointSubnetId
Expand Down Expand Up @@ -364,6 +423,9 @@ resource dataExplorerPrivateDnsZoneGroup 'Microsoft.Network/privateEndpoints/pri
@description('The resource ID of the cluster.')
output clusterId string = cluster.id

@description('The ID of the cluster system assigned managed identity.')
output principalId string = cluster.identity.principalId

@description('The name of the cluster.')
output clusterName string = cluster.name

Expand All @@ -375,3 +437,6 @@ output ingestionDbName string = cluster::ingestionDb.name

@description('The name of the database for queries.')
output hubDbName string = cluster::hubDb.name

@description('Max ingestion capacity of the cluster.')
output clusterIngestionCapacity int = ingestionCapacity[?clusterSku] ?? 1
64 changes: 53 additions & 11 deletions src/templates/finops-hub/modules/dataFactory.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,18 @@ param dataExplorerName string = ''
@description('Optional. Resource ID of the Azure Data Explorer cluster to use for advanced analytics, if applicable.')
param dataExplorerId string = ''

@description('Optional. ID of the Azure Data Explorer cluster system assigned managed identity, if applicable.')
param dataExplorerPrincipalId string = ''

@description('Optional. URI of the Azure Data Explorer cluster to use for advanced analytics, if applicable.')
param dataExplorerUri string = ''

@description('Optional. Name of the Azure Data Explorer ingestion database. Default: "ingestion".')
param dataExplorerIngestionDatabase string = 'Ingestion'

@description('Optional. Azure Data Explorer ingestion capacity. Increase for non-dev SKUs. Default: 1')
param dataExplorerIngestionCapacity int = 1

@description('Optional. The location to use for the managed identity and deployment script to auto-start triggers. Default = (resource group location).')
param location string = resourceGroup().location

Expand Down Expand Up @@ -170,13 +176,13 @@ resource managedIntegrationRuntime 'Microsoft.DataFactory/factories/integrationR
customProperties: []
}
copyComputeScaleProperties: {
dataIntegrationUnit: 256
dataIntegrationUnit: 16
timeToLive: 30
}
pipelineExternalComputeScaleProperties: {
timeToLive: 30
numberOfPipelineNodes: 10
numberOfExternalNodes: 10
numberOfPipelineNodes: 1
numberOfExternalNodes: 1
}
}
}
Expand Down Expand Up @@ -853,7 +859,7 @@ resource trigger_IngestionManifestAdded 'Microsoft.DataFactory/factories/trigger
typeProperties: {
blobPathBeginsWith: '/${ingestionContainerName}/blobs/'
blobPathEndsWith: 'manifest.json'
ignoreEmptyBlobs: false
ignoreEmptyBlobs: true
scope: storageAccount.id
events: [
'Microsoft.Storage.BlobCreated'
Expand Down Expand Up @@ -1164,10 +1170,44 @@ resource pipeline_InitializeHub 'Microsoft.DataFactory/factories/pipelines@2018-
}
]
ifTrueActivities: [
{ // Save ingestion policy in ADX
name: 'Set ingestion policy in ADX'
type: 'AzureDataExplorerCommand'
dependsOn: []
policy: {
timeout: '0.12:00:00'
retry: 0
retryIntervalInSeconds: 30
secureOutput: false
secureInput: false
}
userProperties: []
typeProperties: {
command: {
value: '.alter-merge database ${dataExplorerIngestionDatabase} policy managed_identity "[ { \'ObjectId\' : \'${dataExplorerPrincipalId}\', \'AllowedUsages\' : \'NativeIngestion\' }]"'
type: 'Expression'
}
commandTimeout: '00:20:00'
}
linkedServiceName: {
referenceName: linkedService_dataExplorer.name
type: 'LinkedServiceReference'
parameters: {
database: dataExplorerIngestionDatabase
}
}
}
{ // Save Hub Settings in ADX
name: 'Save Hub Settings in ADX'
type: 'AzureDataExplorerCommand'
dependsOn: []
dependsOn: [
{
activity: 'Set ingestion policy in ADX'
dependencyConditions: [
'Succeeded'
]
}
]
policy: {
timeout: '0.12:00:00'
retry: 0
Expand Down Expand Up @@ -2638,7 +2678,7 @@ resource pipeline_ExecuteExportsETL 'Microsoft.DataFactory/factories/pipelines@2
typeProperties: {
variableName: 'hasNoRows'
value: {
value: '@or(equals(activity(\'Read Manifest\').output.firstRow.dataRowCount, null), equals(activity(\'Read Manifest\').output.firstRow.dataRowCount, 0))'
value: '@or(equals(activity(\'Read Manifest\').output.firstRow.blobCount, null), equals(activity(\'Read Manifest\').output.firstRow.blobCount, 0))'
type: 'Expression'
}
}
Expand Down Expand Up @@ -3202,6 +3242,7 @@ resource pipeline_ExecuteExportsETL 'Microsoft.DataFactory/factories/pipelines@2
value: '@if(variables(\'hasNoRows\'), json(\'[]\'), activity(\'Read Manifest\').output.firstRow.blobs)'
type: 'Expression'
}
batchCount: enablePublicAccess ? 30 : 4 // so we don't overload the managed runtime
isSequential: false
activities: [
{ // Execute
Expand Down Expand Up @@ -4007,7 +4048,6 @@ resource pipeline_ToDataExplorer 'Microsoft.DataFactory/factories/pipelines@2018
name: '${safeIngestionContainerName}_ETL_dataExplorer'
parent: dataFactory
properties: {
// concurrency: 8 // sanity check
activities: [
{ // Read Hub Config
name: 'Read Hub Config'
Expand Down Expand Up @@ -4199,15 +4239,15 @@ resource pipeline_ToDataExplorer 'Microsoft.DataFactory/factories/pipelines@2018
]
policy: {
timeout: '0.12:00:00'
retry: 0
retryIntervalInSeconds: 30
retry: 3
retryIntervalInSeconds: 120
secureOutput: false
secureInput: false
}
userProperties: []
typeProperties: {
command: {
value: '@concat(\'.ingest into table \', pipeline().parameters.table, \' ("${storageAccount.properties.primaryEndpoints.dfs}/${ingestionContainerName}/\', pipeline().parameters.folderPath, \'/\', pipeline().parameters.fileName, \'") with (format="parquet", ingestionMappingReference="\', pipeline().parameters.table, \'_mapping", tags="[\\"drop-by:\', pipeline().parameters.ingestionId, \'\\", \\"drop-by:\', pipeline().parameters.folderPath, \'/\', pipeline().parameters.originalFileName, \'\\", \\"drop-by:ftk-version-${ftkVersion}\\"]")\')'
value: '@concat(\'.ingest into table \', pipeline().parameters.table, \' ("abfss://${ingestionContainerName}@${storageAccount.name}.dfs.${environment().suffixes.storage}/\', pipeline().parameters.folderPath, \'/\', pipeline().parameters.fileName, \';managed_identity=system") with (format="parquet", ingestionMappingReference="\', pipeline().parameters.table, \'_mapping", tags="[\\"drop-by:\', pipeline().parameters.ingestionId, \'\\", \\"drop-by:\', pipeline().parameters.folderPath, \'/\', pipeline().parameters.originalFileName, \'\\", \\"drop-by:ftk-version-${ftkVersion}\\"]"); print Success = assert(iff(toscalar($command_results | project-keep HasErrors) == false, true, false), "Ingestion Failed")\')'
type: 'Expression'
}
commandTimeout: '01:00:00'
Expand Down Expand Up @@ -4450,6 +4490,7 @@ resource pipeline_ExecuteIngestionETL 'Microsoft.DataFactory/factories/pipelines
name: '${safeIngestionContainerName}_ExecuteETL'
parent: dataFactory
properties: {
concurrency: 1
activities: [
{ // Wait
name: 'Wait'
Expand Down Expand Up @@ -4593,6 +4634,7 @@ resource pipeline_ExecuteIngestionETL 'Microsoft.DataFactory/factories/pipelines
]
userProperties: []
typeProperties: {
batchCount: dataExplorerIngestionCapacity // Concurrency limit
items: {
value: '@activity(\'Filter Out Folders\').output.Value'
type: 'Expression'
Expand All @@ -4612,7 +4654,7 @@ resource pipeline_ExecuteIngestionETL 'Microsoft.DataFactory/factories/pipelines
referenceName: pipeline_ToDataExplorer.name
type: 'PipelineReference'
}
waitOnCompletion: false
waitOnCompletion: true
parameters: {
folderPath: {
value: '@variables(\'containerFolderPath\')'
Expand Down
Loading

0 comments on commit 018b059

Please sign in to comment.