You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2019/05/20 17:24:03 UTC

[atlas] branch branch-2.0 updated: ATLAS-3214: Create Spark models in atlas to support spark lineage

This is an automated email from the ASF dual-hosted git repository.

sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 30ef558  ATLAS-3214: Create Spark models in atlas to support spark lineage
30ef558 is described below

commit 30ef558b9429d4ca1c047d6b1c662ad9b2da9500
Author: Aadarsh Jajodia <aa...@gmail.com>
AuthorDate: Mon May 20 10:15:41 2019 -0700

    ATLAS-3214: Create Spark models in atlas to support spark lineage
    
    Signed-off-by: Sarath Subramanian <ss...@cloudera.com>
    (cherry picked from commit fce87e338ad3d7a9425f41c09f73968308e8ce83)
---
 addons/models/1000-Hadoop/1100-spark_model.json | 477 ++++++++++++++++++++++++
 1 file changed, 477 insertions(+)

diff --git a/addons/models/1000-Hadoop/1100-spark_model.json b/addons/models/1000-Hadoop/1100-spark_model.json
new file mode 100644
index 0000000..125fbf5
--- /dev/null
+++ b/addons/models/1000-Hadoop/1100-spark_model.json
@@ -0,0 +1,477 @@
+{
+  "enumDefs": [],
+  "structDefs": [],
+  "classificationDefs": [],
+  "entityDefs": [
+    {
+      "name": "spark_db",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "location",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 5
+        },
+        {
+          "name": "clusterName",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 8
+        },
+        {
+          "name": "parameters",
+          "typeName": "map<string,string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "ownerType",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    },
+    {
+      "name": "spark_table",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "tableType",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "provider",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 5
+        },
+        {
+          "name": "partitionColumnNames",
+          "typeName": "array<string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "bucketSpec",
+          "typeName": "map<string,string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "ownerType",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "createTime",
+          "typeName": "date",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "parameters",
+          "typeName": "map<string,string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "comment",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 9
+        },
+        {
+          "name": "unsupportedFeatures",
+          "typeName": "array<string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "viewOriginalText",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 9
+        },
+        {
+          "name": "schemaDesc",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 5
+        },
+        {
+          "name": "partitionProvider",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    },
+    {
+      "name": "spark_column",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "type",
+          "typeName": "string",
+          "isOptional": false,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": true
+        },
+        {
+          "name": "nullable",
+          "typeName": "boolean",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "metadata",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "comment",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 9
+        }
+      ]
+    },
+    {
+      "name": "spark_ml_directory",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "uri",
+          "typeName": "string",
+          "isOptional": false,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": true,
+          "searchWeight": 10
+        },
+        {
+          "name": "directory",
+          "typeName": "string",
+          "isOptional": false,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": true,
+          "searchWeight": 10
+        }
+      ]
+    },
+    {
+      "name": "spark_storagedesc",
+      "superTypes": [
+        "Referenceable"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "location",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 10
+        },
+        {
+          "name": "inputFormat",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "outputFormat",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "serde",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "compressed",
+          "typeName": "boolean",
+          "isOptional": false,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": true
+        },
+        {
+          "name": "parameters",
+          "typeName": "map<string,string>",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    },
+    {
+      "name": "spark_ml_model",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "extra",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    },
+    {
+      "name": "spark_process",
+      "superTypes": [
+        "Process"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "executionId",
+          "typeName": "long",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 10
+        },
+        {
+          "name": "currUser",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 10
+        },
+        {
+          "name": "remoteUser",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false,
+          "searchWeight": 10
+        },
+        {
+          "name": "details",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        },
+        {
+          "name": "sparkPlanDescription",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    },
+    {
+      "name": "spark_ml_pipeline",
+      "superTypes": [
+        "DataSet"
+      ],
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "attributeDefs": [
+        {
+          "name": "extra",
+          "typeName": "string",
+          "isOptional": true,
+          "cardinality": "SINGLE",
+          "isUnique": false,
+          "isIndexable": false
+        }
+      ]
+    }
+  ],
+  "relationshipDefs": [
+    {
+      "name": "spark_table_db",
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "relationshipCategory": "AGGREGATION",
+      "endDef1": {
+        "type": "spark_table",
+        "name": "db",
+        "isContainer": false,
+        "cardinality": "SINGLE"
+      },
+      "endDef2": {
+        "type": "spark_db",
+        "name": "tables",
+        "isContainer": true,
+        "cardinality": "SET"
+      },
+      "propagateTags": "NONE"
+    },
+    {
+      "name": "spark_table_columns",
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "relationshipCategory": "COMPOSITION",
+      "endDef1": {
+        "type": "spark_table",
+        "name": "columns",
+        "isContainer": true,
+        "cardinality": "SET"
+      },
+      "endDef2": {
+        "type": "spark_column",
+        "name": "table",
+        "isContainer": false,
+        "cardinality": "SINGLE"
+      },
+      "propagateTags": "NONE"
+    },
+    {
+      "name": "spark_table_storagedesc",
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "relationshipCategory": "COMPOSITION",
+      "endDef1": {
+        "type": "spark_table",
+        "name": "sd",
+        "isContainer": true,
+        "cardinality": "SINGLE"
+      },
+      "endDef2": {
+        "type": "spark_storagedesc",
+        "name": "table",
+        "isContainer": false,
+        "cardinality": "SINGLE"
+      },
+      "propagateTags": "NONE"
+    },
+    {
+      "name": "spark_ml_model_ml_directory",
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "relationshipCategory": "COMPOSITION",
+      "endDef1": {
+        "type": "spark_ml_model",
+        "name": "directory",
+        "isContainer": true,
+        "cardinality": "SINGLE"
+      },
+      "endDef2": {
+        "type": "spark_ml_directory",
+        "name": "model",
+        "isContainer": false,
+        "cardinality": "SINGLE"
+      },
+      "propagateTags": "NONE"
+    },
+    {
+      "name": "spark_ml_pipeline_ml_directory",
+      "serviceType": "spark",
+      "typeVersion": "1.0",
+      "relationshipCategory": "COMPOSITION",
+      "endDef1": {
+        "type": "spark_ml_pipeline",
+        "name": "directory",
+        "isContainer": true,
+        "cardinality": "SINGLE"
+      },
+      "endDef2": {
+        "type": "spark_ml_directory",
+        "name": "pipeline",
+        "isContainer": false,
+        "cardinality": "SINGLE"
+      },
+      "propagateTags": "NONE"
+    }
+  ]
+}
\ No newline at end of file