-
Notifications
You must be signed in to change notification settings - Fork 2.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added Data Quality Metrics aspect to emit data quality metrics metada…
…ta into Datahub
- Loading branch information
1 parent
e569fbc
commit c95e365
Showing
6 changed files
with
186 additions
and
0 deletions.
There are no files selected for viewing
25 changes: 25 additions & 0 deletions
25
metadata-models/src/main/pegasus/com/linkedin/DataQuality.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
namespace com.linkedin.dataforge.dataquality | ||
|
||
import com.linkedin.common.ChangeAuditStamps | ||
|
||
/** | ||
* Data Quality aspect. This aspect will be attached with the Dataset entity | ||
*/ | ||
@Aspect = { | ||
"name": "dataQuality" | ||
} | ||
record DataQuality includes ChangeAuditStamps { | ||
|
||
/** | ||
* datasetDimensionInfo is a type of DataQualityDimensionInfo | ||
* optional | ||
*/ | ||
datasetDimensionInfo: optional DataQualityDimensionInfo | ||
|
||
/** | ||
* schemaFieldDimensionInfo is an array of type SchemaFieldQualityDimensionInfo | ||
* optional | ||
*/ | ||
schemaFieldDimensionInfos: optional array[SchemaFieldQualityDimensionInfo] | ||
|
||
} |
32 changes: 32 additions & 0 deletions
32
metadata-models/src/main/pegasus/com/linkedin/DataQualityDimensionInfo.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
namespace com.linkedin.dataforge.dataquality | ||
|
||
/** | ||
* Record to capture Dataset/Field level Data Quality Metrics | ||
*/ | ||
record DataQualityDimensionInfo { | ||
|
||
/** | ||
* dimensions attribute to capture dimensions. It is an array of dimensionScore | ||
* Required if this record is initialized | ||
*/ | ||
dimensions: array[DimensionScore] | ||
|
||
/** | ||
* tool_name attribute to capture the tool that has been used to generate the quality metrics. | ||
* Optional field | ||
*/ | ||
toolName: optional string | ||
|
||
/** | ||
* record_count attribute to capture the number of data records that have been used to capture the quality metrics | ||
* Optional field | ||
*/ | ||
recordCount: optional long | ||
|
||
/** | ||
* note attribute to capture comment or any other information about the result | ||
* Optional field | ||
*/ | ||
note: optional string | ||
|
||
} |
46 changes: 46 additions & 0 deletions
46
metadata-models/src/main/pegasus/com/linkedin/DimensionScore.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
namespace com.linkedin.dataforge.dataquality | ||
|
||
import com.linkedin.common.Urn | ||
|
||
/** | ||
* Record to capture Dimension Score information | ||
*/ | ||
record DimensionScore { | ||
/** | ||
* dimensionName attribute to capture predefined dimension names. | ||
* Required field if this record is initialized | ||
*/ | ||
@Relationship = { | ||
"name": "dimensionNameType", | ||
"entityTypes": [ "dimensionNameType" ] | ||
} | ||
dimensionUrn: Urn | ||
|
||
/** | ||
* currentScore attribute to capture current score for a specific dimension | ||
* Required field if this record is initialized | ||
*/ | ||
currentScore: string | ||
|
||
/** | ||
* historicalWeightedScore attribute to capture historical weighted score for a specific dimension | ||
* Optional field. Value will be populated, if available | ||
*/ | ||
historicalWeightedScore: optional string | ||
|
||
/** | ||
* scoreType attribute to capture score type for a specific score | ||
* Required field if this record is initialized | ||
*/ | ||
scoreType: enum ScoreType { | ||
PERCENTAGE | ||
NUMERICAL_VALUE | ||
} | ||
|
||
/** | ||
* note attribute to capture note for a specific dimension | ||
* Optional field | ||
*/ | ||
note: optional string | ||
|
||
} |
57 changes: 57 additions & 0 deletions
57
metadata-models/src/main/pegasus/com/linkedin/DimensionTypeInfo.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
namespace com.linkedin.dataforge.dataquality | ||
|
||
import com.linkedin.common.AuditStamp | ||
|
||
/** | ||
* Information about an Dimension type | ||
*/ | ||
@Aspect = { | ||
"name": "dimensionTypeInfo" | ||
} | ||
record DimensionTypeInfo { | ||
|
||
/** | ||
* Display name of the Dimension Type | ||
*/ | ||
@Searchable = { | ||
"fieldType": "WORD_GRAM", | ||
"enableAutocomplete": true, | ||
"boostScore": 10.0 | ||
} | ||
name: string | ||
|
||
/** | ||
* Description of the Dimension Type | ||
*/ | ||
description: optional string | ||
|
||
/** | ||
* Audit stamp capturing the time and actor who created the Dimension Type. | ||
*/ | ||
@Searchable = { | ||
"/time": { | ||
"fieldType": "DATETIME", | ||
"fieldName": "createdAt" | ||
}, | ||
"/actor": { | ||
"fieldType": "URN", | ||
"fieldName": "createdBy" | ||
} | ||
} | ||
created: AuditStamp | ||
|
||
/** | ||
* Audit stamp capturing the time and actor who last modified the Dimension Type. | ||
*/ | ||
@Searchable = { | ||
"/time": { | ||
"fieldType": "DATETIME", | ||
"fieldName": "lastModifiedAt" | ||
} | ||
"/actor": { | ||
"fieldType": "URN", | ||
"fieldName": "lastModifiedBy" | ||
} | ||
} | ||
lastModified: AuditStamp | ||
} |
25 changes: 25 additions & 0 deletions
25
metadata-models/src/main/pegasus/com/linkedin/SchemaFieldQualityDimensionInfo.pdl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
namespace com.linkedin.dataforge.dataquality | ||
|
||
import com.linkedin.common.ChangeAuditStamps | ||
import com.linkedin.common.Urn | ||
|
||
/** | ||
* Record to capture Dataset/Field level Data Quality Metrics | ||
*/ | ||
record SchemaFieldQualityDimensionInfo includes ChangeAuditStamps { | ||
/** | ||
* schemaFieldURN attribute to capture schema field URN | ||
* Required field, if this record is initialized | ||
*/ | ||
@Relationship = { | ||
"name": "IsAssociatedWith", | ||
"entityTypes": [ "schemaField" ] | ||
} | ||
schemaFieldURN: Urn | ||
|
||
/** | ||
* schemaFieldDimensionInfo attribute to capture schema field dimension | ||
* Required field, if this record is initialized | ||
*/ | ||
schemaFieldDimensionInfo: DataQualityDimensionInfo | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters