You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jx...@apache.org on 2015/07/02 20:26:19 UTC
[6/8] hive git commit: HIVE-11139: Emit more lineage information
(Jimmy, reviewed by Szehon)
http://git-wip-us.apache.org/repos/asf/hive/blob/cdd1c7bf/ql/src/test/results/clientpositive/lineage2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/lineage2.q.out b/ql/src/test/results/clientpositive/lineage2.q.out
new file mode 100644
index 0000000..669be97
--- /dev/null
+++ b/ql/src/test/results/clientpositive/lineage2.q.out
@@ -0,0 +1,2905 @@
+PREHOOK: query: drop table if exists src2
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table src2 as select key key2, value value2 from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@src2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "3a39d46286e4c2cd2139c9bb248f7b4f",
+ "queryText": "create table src2 as select key key2, value value2 from src1",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+PREHOOK: query: select * from src1 where key is not null and value is not null limit 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "b5b224847b2333e790a2c229434a04c8",
+ "queryText": "select * from src1 where key is not null and value is not null limit 3",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 2,
+ 3
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "(src1.key is not null and src1.value is not null)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+238 val_238
+
+311 val_311
+PREHOOK: query: select * from src1 where key > 10 and value > 'val' order by key limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "773d9d0ea92e797eae292ae1eeea11ab",
+ "queryText": "select * from src1 where key > 10 and value > 'val' order by key limit 5",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 2,
+ 3
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "((UDFToDouble(src1.key) > UDFToDouble(10)) and (src1.value > 'val'))",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+146 val_146
+150 val_150
+213 val_213
+238 val_238
+255 val_255
+PREHOOK: query: drop table if exists dest1
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest1 as select * from src1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest1
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "712fe958c357bcfc978b95c43eb19084",
+ "queryText": "create table dest1 as select * from src1",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+PREHOOK: query: insert into table dest1 select * from src2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@dest1
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "ecc718a966d8887b18084a55dd96f0bc",
+ "queryText": "insert into table dest1 select * from src2",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: select key k, dest1.value from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "416b6f4cd63edd4f9d8213d2d7819d21",
+ "queryText": "select key k, dest1.value from dest1",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "k"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "dest1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ }
+ ]
+}
+238 val_238
+
+311 val_311
+ val_27
+ val_165
+ val_409
+255 val_255
+278 val_278
+98 val_98
+ val_484
+ val_265
+ val_193
+401 val_401
+150 val_150
+273 val_273
+224
+369
+66 val_66
+128
+213 val_213
+146 val_146
+406 val_406
+
+
+
+238 val_238
+
+311 val_311
+ val_27
+ val_165
+ val_409
+255 val_255
+278 val_278
+98 val_98
+ val_484
+ val_265
+ val_193
+401 val_401
+150 val_150
+273 val_273
+224
+369
+66 val_66
+128
+213 val_213
+146 val_146
+406 val_406
+
+
+
+PREHOOK: query: select key from src1 union select key2 from src2 order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "50fa3d1074b3fda37ce11dc6ec92ebf3",
+ "queryText": "select key from src1 union select key2 from src2 order by key",
+ "edges": [
+ {
+ "sources": [
+ 1,
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "key",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "u2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ }
+ ]
+}
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: select key k from src1 union select key2 from src2 order by k
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "a739460bd79c8c91ec35e22c97329769",
+ "queryText": "select key k from src1 union select key2 from src2 order by k",
+ "edges": [
+ {
+ "sources": [
+ 1,
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "key",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "u2.k"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ }
+ ]
+}
+
+128
+146
+150
+213
+224
+238
+255
+273
+278
+311
+369
+401
+406
+66
+98
+PREHOOK: query: select key, count(1) a from dest1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "3901b5e3a164064736b3234355046340",
+ "queryText": "select key, count(1) a from dest1 group by key",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "count(1)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "a"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "TABLE",
+ "vertexId": "default.dest1"
+ }
+ ]
+}
+ 20
+128 2
+146 2
+150 2
+213 2
+224 2
+238 2
+255 2
+273 2
+278 2
+311 2
+369 2
+401 2
+406 2
+66 2
+98 2
+PREHOOK: query: select key k, count(*) from dest1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "0d5a212f10847aeaab31e8c31121e6d4",
+ "queryText": "select key k, count(*) from dest1 group by key",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "count(*)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "k"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "TABLE",
+ "vertexId": "default.dest1"
+ }
+ ]
+}
+ 20
+128 2
+146 2
+150 2
+213 2
+224 2
+238 2
+255 2
+273 2
+278 2
+311 2
+369 2
+401 2
+406 2
+66 2
+98 2
+PREHOOK: query: select key k, count(value) from dest1 group by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "56429eccb04ded722f5bd9d9d8cf7260",
+ "queryText": "select key k, count(value) from dest1 group by key",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "count(default.dest1.value)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "k"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ }
+ ]
+}
+ 20
+128 2
+146 2
+150 2
+213 2
+224 2
+238 2
+255 2
+273 2
+278 2
+311 2
+369 2
+401 2
+406 2
+66 2
+98 2
+PREHOOK: query: select value, max(length(key)) from dest1 group by value
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "7e1cfc3dece85b41b6f7c46365580cde",
+ "queryText": "select value, max(length(key)) from dest1 group by value",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "max(length(dest1.key))",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "value"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ }
+ ]
+}
+ 3
+val_146 3
+val_150 3
+val_165 0
+val_193 0
+val_213 3
+val_238 3
+val_255 3
+val_265 0
+val_27 0
+val_273 3
+val_278 3
+val_311 3
+val_401 3
+val_406 3
+val_409 0
+val_484 0
+val_66 2
+val_98 2
+PREHOOK: query: select value, max(length(key)) from dest1 group by value order by value limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "c6578ce1dd72498c4af33f20f164e483",
+ "queryText": "select value, max(length(key)) from dest1 group by value order by value limit 5",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "max(length(dest1.key))",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "value"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ }
+ ]
+}
+ 3
+val_146 3
+val_150 3
+val_165 0
+val_193 0
+PREHOOK: query: select key, length(value) from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "91fbcea5cb34362071555cd93e8d0abe",
+ "queryText": "select key, length(value) from dest1",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "length(dest1.value)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ }
+ ]
+}
+238 7
+ 0
+311 7
+ 6
+ 7
+ 7
+255 7
+278 7
+98 6
+ 7
+ 7
+ 7
+401 7
+150 7
+273 7
+224 0
+369 0
+66 6
+128 0
+213 7
+146 7
+406 7
+ 0
+ 0
+ 0
+238 7
+ 0
+311 7
+ 6
+ 7
+ 7
+255 7
+278 7
+98 6
+ 7
+ 7
+ 7
+401 7
+150 7
+273 7
+224 0
+369 0
+66 6
+128 0
+213 7
+146 7
+406 7
+ 0
+ 0
+ 0
+PREHOOK: query: select length(value) + 3 from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "3d8a347cc9052111cb328938d37b9b03",
+ "queryText": "select length(value) + 3 from dest1",
+ "edges": [
+ {
+ "sources": [
+ 1
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "(length(dest1.value) + 3)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "c0"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest1.value"
+ }
+ ]
+}
+10
+3
+10
+9
+10
+10
+10
+10
+9
+10
+10
+10
+10
+10
+10
+3
+3
+9
+3
+10
+10
+10
+3
+3
+3
+10
+3
+10
+9
+10
+10
+10
+10
+9
+10
+10
+10
+10
+10
+10
+3
+3
+9
+3
+10
+10
+10
+3
+3
+3
+PREHOOK: query: select 5 from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "bae960bf4376ec00e37258469b17360d",
+ "queryText": "select 5 from dest1",
+ "edges": [
+ {
+ "sources": [],
+ "targets": [
+ 0
+ ],
+ "expression": "5",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "c0"
+ }
+ ]
+}
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+5
+PREHOOK: query: select 3 * 5 from dest1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "753abad4d55afd3df34fdc73abfcd44d",
+ "queryText": "select 3 * 5 from dest1",
+ "edges": [
+ {
+ "sources": [],
+ "targets": [
+ 0
+ ],
+ "expression": "(3 * 5)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "c0"
+ }
+ ]
+}
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+15
+PREHOOK: query: drop table if exists dest2
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest2 as select * from src1 JOIN src2 ON src1.key = src2.key2
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "386791c174a4999fc916e300b5e76bf2",
+ "queryText": "create table dest2 as select * from src1 JOIN src2 ON src1.key = src2.key2",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(src1.key = src2.key2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@dest2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "e494b771d94800dc3430bf5d0810cd9f",
+ "queryText": "insert overwrite table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(src1.key = src2.key2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@dest2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "efeaddd0d36105b1013b414627850dc2",
+ "queryText": "insert into table dest2 select * from src1 JOIN src2 ON src1.key = src2.key2",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(src1.key = src2.key2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: insert into table dest2
+ select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@dest2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "e9450a56b3d103642e06bef0e4f0d482",
+ "queryText": "insert into table dest2\n select * from src1 JOIN src2 ON length(src1.value) = length(src2.value2) + 1",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5,
+ 7
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(length(src1.value) = (length(src2.value2) + 1))",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: select * from src1 where length(key) > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "4028c94d222d5dd221f651d414386972",
+ "queryText": "select * from src1 where length(key) > 2",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "(length(src1.key) > 2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+238 val_238
+311 val_311
+255 val_255
+278 val_278
+401 val_401
+150 val_150
+273 val_273
+224
+369
+128
+213 val_213
+146 val_146
+406 val_406
+PREHOOK: query: select * from src1 where length(key) > 2 and value > 'a'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "5727531f7743cfcd60d634d8c835515f",
+ "queryText": "select * from src1 where length(key) > 2 and value > 'a'",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 2,
+ 3
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "((length(src1.key) > 2) and (src1.value > 'a'))",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "src1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+238 val_238
+311 val_311
+255 val_255
+278 val_278
+401 val_401
+150 val_150
+273 val_273
+213 val_213
+146 val_146
+406 val_406
+PREHOOK: query: drop table if exists dest3
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest3 as
+ select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest3
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "a2c4e9a3ec678039814f5d84b1e38ce4",
+ "queryText": "create table dest3 as\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 1",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(length(src1.key) > 1)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(src1.key = src2.key2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest3.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest3.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest3.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest3.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: insert overwrite table dest2
+ select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+PREHOOK: Input: default@src2
+PREHOOK: Output: default@dest2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "76d84512204ddc576ad4d93f252e4358",
+ "queryText": "insert overwrite table dest2\n select * from src1 JOIN src2 ON src1.key = src2.key2 WHERE length(key) > 3",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(length(src1.key) > 3)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(src1.key = src2.key2)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.key2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest2.value2"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.key2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src2.value2"
+ }
+ ]
+}
+PREHOOK: query: drop table if exists dest_l1
+PREHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE dest_l1(key INT, value STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_l1
+PREHOOK: query: INSERT OVERWRITE TABLE dest_l1
+SELECT j.*
+FROM (SELECT t1.key, p1.value
+ FROM src1 t1
+ LEFT OUTER JOIN src p1
+ ON (t1.key = p1.key)
+ UNION ALL
+ SELECT t2.key, p2.value
+ FROM src1 t2
+ LEFT OUTER JOIN src p2
+ ON (t2.key = p2.key)) j
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Input: default@src1
+PREHOOK: Output: default@dest_l1
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "60b589744e2527dd235a6c8168d6a653",
+ "queryText": "INSERT OVERWRITE TABLE dest_l1\nSELECT j.*\nFROM (SELECT t1.key, p1.value\n FROM src1 t1\n LEFT OUTER JOIN src p1\n ON (t1.key = p1.key)\n UNION ALL\n SELECT t2.key, p2.value\n FROM src1 t2\n LEFT OUTER JOIN src p2\n ON (t2.key = p2.key)) j",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "UDFToInteger(j.key)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "j.value",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4,
+ 2
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "(p1.key = t1.key)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l1.key"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l1.value"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src.value"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src.key"
+ }
+ ]
+}
+PREHOOK: query: drop table if exists emp
+PREHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists dept
+PREHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists project
+PREHOOK: type: DROPTABLE
+PREHOOK: query: drop table if exists tgt
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table emp(emp_id int, name string, mgr_id int, dept_id int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@emp
+PREHOOK: query: create table dept(dept_id int, dept_name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dept
+PREHOOK: query: create table project(project_id int, project_name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@project
+PREHOOK: query: create table tgt(dept_name string, name string,
+ emp_id int, mgr_id int, proj_id int, proj_name string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tgt
+PREHOOK: query: INSERT INTO TABLE tgt
+SELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name
+FROM (
+ SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id
+ FROM (
+ SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id
+ FROM emp e JOIN emp m ON e.emp_id = m.emp_id
+ ) em
+ JOIN dept d ON d.dept_id = em.dept_id
+ ) emd JOIN project p ON emd.dept_id = p.project_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dept
+PREHOOK: Input: default@emp
+PREHOOK: Input: default@project
+PREHOOK: Output: default@tgt
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "f59797e0422d2e51515063374dfac361",
+ "queryText": "INSERT INTO TABLE tgt\nSELECT emd.dept_name, emd.name, emd.emp_id, emd.mgr_id, p.project_id, p.project_name\nFROM (\n SELECT d.dept_name, em.name, em.emp_id, em.mgr_id, em.dept_id\n FROM (\n SELECT e.name, e.dept_id, e.emp_id emp_id, m.emp_id mgr_id\n FROM emp e JOIN emp m ON e.emp_id = m.emp_id\n ) em\n JOIN dept d ON d.dept_id = em.dept_id\n ) emd JOIN project p ON emd.dept_id = p.project_id",
+ "edges": [
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "emd.name",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 8
+ ],
+ "targets": [
+ 2
+ ],
+ "expression": "emd.emp_id",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 8
+ ],
+ "targets": [
+ 3
+ ],
+ "expression": "emd.mgr_id",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 9
+ ],
+ "targets": [
+ 4
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 10
+ ],
+ "targets": [
+ 5
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 8
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "expression": "(e.emp_id = m.emp_id)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 11,
+ 12
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "expression": "(em._col1 = d.dept_id)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 11,
+ 9
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5
+ ],
+ "expression": "(emd._col4 = p.project_id)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.dept_name"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.name"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.emp_id"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.mgr_id"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.proj_id"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.tgt.proj_name"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dept.dept_name"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.emp.name"
+ },
+ {
+ "id": 8,
+ "vertexType": "COLUMN",
+ "vertexId": "default.emp.emp_id"
+ },
+ {
+ "id": 9,
+ "vertexType": "COLUMN",
+ "vertexId": "default.project.project_id"
+ },
+ {
+ "id": 10,
+ "vertexType": "COLUMN",
+ "vertexId": "default.project.project_name"
+ },
+ {
+ "id": 11,
+ "vertexType": "COLUMN",
+ "vertexId": "default.emp.dept_id"
+ },
+ {
+ "id": 12,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dept.dept_id"
+ }
+ ]
+}
+PREHOOK: query: drop table if exists dest_l2
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest_l2 (id int, c1 tinyint, c2 int, c3 bigint) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_l2
+PREHOOK: query: insert into dest_l2 values(0, 1, 100, 10000)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@dest_l2
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "e001334e3f8384806b0f25a7c303045f",
+ "queryText": "insert into dest_l2 values(0, 1, 100, 10000)",
+ "edges": [
+ {
+ "sources": [],
+ "targets": [
+ 0
+ ],
+ "expression": "UDFToInteger(values__tmp__table__1.tmp_values_col1)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [],
+ "targets": [
+ 1
+ ],
+ "expression": "UDFToByte(values__tmp__table__1.tmp_values_col2)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [],
+ "targets": [
+ 2
+ ],
+ "expression": "UDFToInteger(values__tmp__table__1.tmp_values_col3)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [],
+ "targets": [
+ 3
+ ],
+ "expression": "UDFToLong(values__tmp__table__1.tmp_values_col4)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.id"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c3"
+ }
+ ]
+}
+PREHOOK: query: select * from (
+ select c1 + c2 x from dest_l2
+ union all
+ select sum(c3) y from (select c3 from dest_l2) v1) v2 order by x
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_l2
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "a2c96a96be9d315ede966be5b45ef20e",
+ "queryText": "select * from (\n select c1 + c2 x from dest_l2\n union all\n select sum(c3) y from (select c3 from dest_l2) v1) v2 order by x",
+ "edges": [
+ {
+ "sources": [
+ 1,
+ 2,
+ 3
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "v2.x",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "v2.x"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c3"
+ }
+ ]
+}
+101
+10000
+PREHOOK: query: drop table if exists dest_l3
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table dest_l3 (id int, c1 string, c2 string, c3 int) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@dest_l3
+PREHOOK: query: insert into dest_l3 values(0, "s1", "s2", 15)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__2
+PREHOOK: Output: default@dest_l3
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "09df51ba6ba2d07f2304523ee505f094",
+ "queryText": "insert into dest_l3 values(0, \"s1\", \"s2\", 15)",
+ "edges": [
+ {
+ "sources": [],
+ "targets": [
+ 0
+ ],
+ "expression": "UDFToInteger(values__tmp__table__2.tmp_values_col1)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [],
+ "targets": [
+ 1,
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [],
+ "targets": [
+ 3
+ ],
+ "expression": "UDFToInteger(values__tmp__table__2.tmp_values_col4)",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.id"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c3"
+ }
+ ]
+}
+PREHOOK: query: select sum(a.c1) over (partition by a.c1 order by a.id)
+from dest_l2 a
+where a.c2 != 10
+group by a.c1, a.c2, a.id
+having count(a.c2) > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_l2
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "0ae7aa4a0cbd1283210fa79e8a19104a",
+ "queryText": "select sum(a.c1) over (partition by a.c1 order by a.id)\nfrom dest_l2 a\nwhere a.c2 != 10\ngroup by a.c1, a.c2, a.id\nhaving count(a.c2) > 0",
+ "edges": [
+ {
+ "sources": [
+ 1,
+ 2,
+ 3
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "(tok_function sum (. (tok_table_or_col $hdt$_0) $f0) (tok_windowspec (tok_partitioningspec (tok_distributeby (. (tok_table_or_col $hdt$_0) $f0)) (tok_orderby (tok_tabsortcolnameasc (. (tok_table_or_col $hdt$_0) $f2)))) (tok_windowvalues (preceding 2147483647) current)))",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "(a.c2 <> 10)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "(count(default.dest_l2.c2) > 0)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "c0"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.id"
+ }
+ ]
+}
+1
+PREHOOK: query: select sum(a.c1), count(b.c1), b.c2, b.c3
+from dest_l2 a join dest_l3 b on (a.id = b.id)
+where a.c2 != 10 and b.c3 > 0
+group by a.c1, a.c2, a.id, b.c1, b.c2, b.c3
+having count(a.c2) > 0
+order by b.c3 limit 5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_l2
+PREHOOK: Input: default@dest_l3
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "01879c619517509d9f5b6ead998bb4bb",
+ "queryText": "select sum(a.c1), count(b.c1), b.c2, b.c3\nfrom dest_l2 a join dest_l3 b on (a.id = b.id)\nwhere a.c2 != 10 and b.c3 > 0\ngroup by a.c1, a.c2, a.id, b.c1, b.c2, b.c3\nhaving count(a.c2) > 0\norder by b.c3 limit 5",
+ "edges": [
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "sum(default.dest_l2.c1)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 5
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "count(default.dest_l3.c1)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 6
+ ],
+ "targets": [
+ 2
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 7
+ ],
+ "targets": [
+ 3
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 8,
+ 7
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "((a.c2 <> 10) and (b.c3 > 0))",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 8
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(count(default.dest_l2.c2) > 0)",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 9,
+ 10
+ ],
+ "targets": [
+ 0,
+ 1,
+ 2,
+ 3
+ ],
+ "expression": "(a.id = b.id)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "_c0"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "_c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "b.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "b.c3"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c1"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c1"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c2"
+ },
+ {
+ "id": 7,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c3"
+ },
+ {
+ "id": 8,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c2"
+ },
+ {
+ "id": 9,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.id"
+ },
+ {
+ "id": 10,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.id"
+ }
+ ]
+}
+1 1 s2 15
+PREHOOK: query: drop table if exists t
+PREHOOK: type: DROPTABLE
+PREHOOK: query: create table t as
+select distinct a.c2, a.c3 from dest_l2 a
+inner join dest_l3 b on (a.id = b.id)
+where a.id > 0 and b.c3 = 15
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@dest_l2
+PREHOOK: Input: default@dest_l3
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "0d2f15b494111ffe236d5be42a76fa28",
+ "queryText": "create table t as\nselect distinct a.c2, a.c3 from dest_l2 a\ninner join dest_l3 b on (a.id = b.id)\nwhere a.id > 0 and b.c3 = 15",
+ "edges": [
+ {
+ "sources": [
+ 2
+ ],
+ "targets": [
+ 0
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 1
+ ],
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4,
+ 5
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "((a.id > 0) and (b.c3 = 15))",
+ "edgeType": "PREDICATE"
+ },
+ {
+ "sources": [
+ 4,
+ 6
+ ],
+ "targets": [
+ 0,
+ 1
+ ],
+ "expression": "(a.id = b.id)",
+ "edgeType": "PREDICATE"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "default.t.c2"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "default.t.c3"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.c3"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l2.id"
+ },
+ {
+ "id": 5,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.c3"
+ },
+ {
+ "id": 6,
+ "vertexType": "COLUMN",
+ "vertexId": "default.dest_l3.id"
+ }
+ ]
+}
+PREHOOK: query: SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)),
+concat(substr(src1.key,1,1),sum(substr(src1.value,5)))
+from src1
+GROUP BY substr(src1.key,1,1)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src1
+#### A masked pattern was here ####
+POSTHOOK: LINEAGE: {
+ "version": "1.0",
+ "engine": "mr",
+ "hash": "5b1022708124ee2b80f9e2e8a0dcb15c",
+ "queryText": "SELECT substr(src1.key,1,1), count(DISTINCT substr(src1.value,5)),\nconcat(substr(src1.key,1,1),sum(substr(src1.value,5)))\nfrom src1\nGROUP BY substr(src1.key,1,1)",
+ "edges": [
+ {
+ "sources": [
+ 3
+ ],
+ "targets": [
+ 0
+ ],
+ "expression": "substr(src1.key, 1, 1)",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 4
+ ],
+ "targets": [
+ 1
+ ],
+ "expression": "count(DISTINCT substr(src1.value, 5))",
+ "edgeType": "PROJECTION"
+ },
+ {
+ "sources": [
+ 3,
+ 4
+ ],
+ "targets": [
+ 2
+ ],
+ "expression": "concat(substr(src1.key, 1, 1), sum(substr(src1.value, 5)))",
+ "edgeType": "PROJECTION"
+ }
+ ],
+ "vertices": [
+ {
+ "id": 0,
+ "vertexType": "COLUMN",
+ "vertexId": "c0"
+ },
+ {
+ "id": 1,
+ "vertexType": "COLUMN",
+ "vertexId": "c1"
+ },
+ {
+ "id": 2,
+ "vertexType": "COLUMN",
+ "vertexId": "c2"
+ },
+ {
+ "id": 3,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.key"
+ },
+ {
+ "id": 4,
+ "vertexType": "COLUMN",
+ "vertexId": "default.src1.value"
+ }
+ ]
+}
+ 7 1543.0
+1 3 1296.0
+2 6 21257.0
+3 2 3311.0
+4 2 4807.0
+6 1 666.0
+9 1 998.0