You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2019/05/20 17:24:03 UTC
[atlas] branch branch-2.0 updated: ATLAS-3214: Create Spark models
in atlas to support spark lineage
This is an automated email from the ASF dual-hosted git repository.
sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 30ef558 ATLAS-3214: Create Spark models in atlas to support spark lineage
30ef558 is described below
commit 30ef558b9429d4ca1c047d6b1c662ad9b2da9500
Author: Aadarsh Jajodia <aa...@gmail.com>
AuthorDate: Mon May 20 10:15:41 2019 -0700
ATLAS-3214: Create Spark models in atlas to support spark lineage
Signed-off-by: Sarath Subramanian <ss...@cloudera.com>
(cherry picked from commit fce87e338ad3d7a9425f41c09f73968308e8ce83)
---
addons/models/1000-Hadoop/1100-spark_model.json | 477 ++++++++++++++++++++++++
1 file changed, 477 insertions(+)
diff --git a/addons/models/1000-Hadoop/1100-spark_model.json b/addons/models/1000-Hadoop/1100-spark_model.json
new file mode 100644
index 0000000..125fbf5
--- /dev/null
+++ b/addons/models/1000-Hadoop/1100-spark_model.json
@@ -0,0 +1,477 @@
+{
+ "enumDefs": [],
+ "structDefs": [],
+ "classificationDefs": [],
+ "entityDefs": [
+ {
+ "name": "spark_db",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "location",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 5
+ },
+ {
+ "name": "clusterName",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 8
+ },
+ {
+ "name": "parameters",
+ "typeName": "map<string,string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "ownerType",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ },
+ {
+ "name": "spark_table",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "tableType",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "provider",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 5
+ },
+ {
+ "name": "partitionColumnNames",
+ "typeName": "array<string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "bucketSpec",
+ "typeName": "map<string,string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "ownerType",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "createTime",
+ "typeName": "date",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "parameters",
+ "typeName": "map<string,string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "comment",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 9
+ },
+ {
+ "name": "unsupportedFeatures",
+ "typeName": "array<string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "viewOriginalText",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 9
+ },
+ {
+ "name": "schemaDesc",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 5
+ },
+ {
+ "name": "partitionProvider",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ },
+ {
+ "name": "spark_column",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "type",
+ "typeName": "string",
+ "isOptional": false,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": true
+ },
+ {
+ "name": "nullable",
+ "typeName": "boolean",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "metadata",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "comment",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 9
+ }
+ ]
+ },
+ {
+ "name": "spark_ml_directory",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "uri",
+ "typeName": "string",
+ "isOptional": false,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": true,
+ "searchWeight": 10
+ },
+ {
+ "name": "directory",
+ "typeName": "string",
+ "isOptional": false,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": true,
+ "searchWeight": 10
+ }
+ ]
+ },
+ {
+ "name": "spark_storagedesc",
+ "superTypes": [
+ "Referenceable"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "location",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 10
+ },
+ {
+ "name": "inputFormat",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "outputFormat",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "serde",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "compressed",
+ "typeName": "boolean",
+ "isOptional": false,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": true
+ },
+ {
+ "name": "parameters",
+ "typeName": "map<string,string>",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ },
+ {
+ "name": "spark_ml_model",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "extra",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ },
+ {
+ "name": "spark_process",
+ "superTypes": [
+ "Process"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "executionId",
+ "typeName": "long",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 10
+ },
+ {
+ "name": "currUser",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 10
+ },
+ {
+ "name": "remoteUser",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false,
+ "searchWeight": 10
+ },
+ {
+ "name": "details",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ },
+ {
+ "name": "sparkPlanDescription",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ },
+ {
+ "name": "spark_ml_pipeline",
+ "superTypes": [
+ "DataSet"
+ ],
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "attributeDefs": [
+ {
+ "name": "extra",
+ "typeName": "string",
+ "isOptional": true,
+ "cardinality": "SINGLE",
+ "isUnique": false,
+ "isIndexable": false
+ }
+ ]
+ }
+ ],
+ "relationshipDefs": [
+ {
+ "name": "spark_table_db",
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "relationshipCategory": "AGGREGATION",
+ "endDef1": {
+ "type": "spark_table",
+ "name": "db",
+ "isContainer": false,
+ "cardinality": "SINGLE"
+ },
+ "endDef2": {
+ "type": "spark_db",
+ "name": "tables",
+ "isContainer": true,
+ "cardinality": "SET"
+ },
+ "propagateTags": "NONE"
+ },
+ {
+ "name": "spark_table_columns",
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "relationshipCategory": "COMPOSITION",
+ "endDef1": {
+ "type": "spark_table",
+ "name": "columns",
+ "isContainer": true,
+ "cardinality": "SET"
+ },
+ "endDef2": {
+ "type": "spark_column",
+ "name": "table",
+ "isContainer": false,
+ "cardinality": "SINGLE"
+ },
+ "propagateTags": "NONE"
+ },
+ {
+ "name": "spark_table_storagedesc",
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "relationshipCategory": "COMPOSITION",
+ "endDef1": {
+ "type": "spark_table",
+ "name": "sd",
+ "isContainer": true,
+ "cardinality": "SINGLE"
+ },
+ "endDef2": {
+ "type": "spark_storagedesc",
+ "name": "table",
+ "isContainer": false,
+ "cardinality": "SINGLE"
+ },
+ "propagateTags": "NONE"
+ },
+ {
+ "name": "spark_ml_model_ml_directory",
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "relationshipCategory": "COMPOSITION",
+ "endDef1": {
+ "type": "spark_ml_model",
+ "name": "directory",
+ "isContainer": true,
+ "cardinality": "SINGLE"
+ },
+ "endDef2": {
+ "type": "spark_ml_directory",
+ "name": "model",
+ "isContainer": false,
+ "cardinality": "SINGLE"
+ },
+ "propagateTags": "NONE"
+ },
+ {
+ "name": "spark_ml_pipeline_ml_directory",
+ "serviceType": "spark",
+ "typeVersion": "1.0",
+ "relationshipCategory": "COMPOSITION",
+ "endDef1": {
+ "type": "spark_ml_pipeline",
+ "name": "directory",
+ "isContainer": true,
+ "cardinality": "SINGLE"
+ },
+ "endDef2": {
+ "type": "spark_ml_directory",
+ "name": "pipeline",
+ "isContainer": false,
+ "cardinality": "SINGLE"
+ },
+ "propagateTags": "NONE"
+ }
+ ]
+}
\ No newline at end of file