You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by Andrew Sherman via Review Board <no...@reviews.apache.org> on 2017/11/30 01:22:46 UTC

Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/
-----------------------------------------------------------

Review request for hive.


Repository: hive-git


Description
-------

A Hive Session can contain multiple concurrent sql Operations.
Lineage is currently tracked in SessionState and is cleared when a query
completes. This results in Lineage for other running queries being lost.

To fix this, move LineageState from SessionState to QueryState.
In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
rather than trying to use it from MoveWork.
Add a test which runs multiple jdbc queries in a thread pool
against the same connection and show that Vertices are not lost from Lineage.
As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
HookContexts in memory and makes them available for reading.
Make LineageLogger methods static so they can be used elsewhere.

Sometimes a running query (originating in a Driver) will instantiate
another Driver to run or compile another query. Because these Drivers
shared a Session, the child Driver would accumulate Lineage information
along with that of the parent Driver. For consistency a LineageState is
passed to these child Drivers and stored in the new Driver's QueryState.


Diffs
-----

  itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
  ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
  ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
  ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
  ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
  ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
  ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
  ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
  ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
  ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
  ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
  ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
  ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
  ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
  ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
  ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
  ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
  ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
  ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
  ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
  ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
  ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
  ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
  ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
  ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
  ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
  ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
  ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
  ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
  ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 


Diff: https://reviews.apache.org/r/64193/diff/1/


Testing
-------


Thanks,

Andrew Sherman


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Andrew Sherman via Review Board <no...@reviews.apache.org>.

> On Dec. 2, 2017, 12:22 a.m., Sahil Takiar wrote:
> > Since we touch the `LoadSemanticAnalyzer` could we add a q-test (could be added to one of the existing `lineage*.q` files) for `LOAD` statements. Same for import / export statements (as far as I can tell there are no existing ones, correct me if I am wrong).
> > 
> > If you have time, it would be great to run some of the lineage tests for HoS too, but since thats a bit orthogonal to this JIRA, it can be done in a follow up JIRA.

I will addsome more tests...


> On Dec. 2, 2017, 12:22 a.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/Driver.java
> > Lines 365 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904283#file1904283line365>
> >
> >     Sounds good. Just curious, is there any way to know for sure where code run by a `Driver`, creates another `Driver`? How did you determine when this is necessary?

I reviewed all code that creates a Driver.


> On Dec. 2, 2017, 12:22 a.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
> > Line 401 (original), 401 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904286#file1904286line401>
> >
> >     Ok, but do we need to do `if (queryState.getLineageState() != null)` to ensure an NPE isn't thrown? That seems to be what the old code is doing.

I don't think we need to do that. There is always an initial lineageState inside queryState so for it to be null someone would have had to call setLineageState(null)


> On Dec. 2, 2017, 12:22 a.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
> > Lines 111 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904313#file1904313line111>
> >
> >     Doesn't a `TaskCompiler` already have a `QueryState` object? Why do we need to explicitly pass in a `LineageState`?

Good catch, I will fix


- Andrew


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review192601
-----------------------------------------------------------


On Nov. 30, 2017, 1:22 a.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Nov. 30, 2017, 1:22 a.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/1/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Andrew Sherman via Review Board <no...@reviews.apache.org>.

> On Dec. 2, 2017, 12:22 a.m., Sahil Takiar wrote:
> > Since we touch the `LoadSemanticAnalyzer` could we add a q-test (could be added to one of the existing `lineage*.q` files) for `LOAD` statements. Same for import / export statements (as far as I can tell there are no existing ones, correct me if I am wrong).
> > 
> > If you have time, it would be great to run some of the lineage tests for HoS too, but since thats a bit orthogonal to this JIRA, it can be done in a follow up JIRA.
> 
> Andrew Sherman wrote:
>     I will addsome more tests...

Update: I looked into his some more. It turns out that Lineage info is printed by default by the PostExecutePrinter.
So any .q tests should show their lineage in the output.
HoS tests do print some lineage so I think that part is covered.
But currently no lineage is printed for LOAD/IMPORT/EXPORT.
The MoveTask does call getLineageState().setLineage() to set lineage into the LineageState but it does not call getLineageState().mapDirToOp().
Possibly this is because LOAD statements don't have a org.apache.hadoop.hive.ql.exec.Operator
And IMPORT statements behave similarly.
IMO the whole Lineage stuff is not clearly specified. 
Do you think it is worth doing follow-up work to try to document /test) it better?


- Andrew


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review192601
-----------------------------------------------------------


On Nov. 30, 2017, 1:22 a.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Nov. 30, 2017, 1:22 a.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/1/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Sahil Takiar <ta...@gmail.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review192601
-----------------------------------------------------------



Since we touch the `LoadSemanticAnalyzer` could we add a q-test (could be added to one of the existing `lineage*.q` files) for `LOAD` statements. Same for import / export statements (as far as I can tell there are no existing ones, correct me if I am wrong).

If you have time, it would be great to run some of the lineage tests for HoS too, but since thats a bit orthogonal to this JIRA, it can be done in a follow up JIRA.


ql/src/java/org/apache/hadoop/hive/ql/Driver.java
Lines 365 (patched)
<https://reviews.apache.org/r/64193/#comment270856>

    Sounds good. Just curious, is there any way to know for sure where code run by a `Driver`, creates another `Driver`? How did you determine when this is necessary?



ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
Line 401 (original), 401 (patched)
<https://reviews.apache.org/r/64193/#comment270854>

    Ok, but do we need to do `if (queryState.getLineageState() != null)` to ensure an NPE isn't thrown? That seems to be what the old code is doing.



ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
Lines 111 (patched)
<https://reviews.apache.org/r/64193/#comment270857>

    Doesn't a `TaskCompiler` already have a `QueryState` object? Why do we need to explicitly pass in a `LineageState`?


- Sahil Takiar


On Nov. 30, 2017, 1:22 a.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Nov. 30, 2017, 1:22 a.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/1/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Andrew Sherman via Review Board <no...@reviews.apache.org>.

> On Dec. 1, 2017, 5:37 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/Driver.java
> > Lines 365 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904283#file1904283line365>
> >
> >     Why do we need constructors to explicitly pass in a `LineageState` object. I thought that a `LineageState` is part of a `QueryState` oject now. Is this what you are referring to as passing the `LineageState` from a parent `Driver` to a child `Driver`?

Yes, we need to pass lineageState when a driver instantiates another Driver to run or compile another query. I've added a comment to the new constructors to explain this.


> On Dec. 1, 2017, 5:37 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
> > Line 401 (original), 401 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904286#file1904286line401>
> >
> >     Why is this necessary?

You are asking about work.getLineagState() != null ? This was some comparatively recent code which put the LineageState in the *Work classes.


> On Dec. 1, 2017, 5:37 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java
> > Lines 27 (patched)
> > <https://reviews.apache.org/r/64193/diff/1/?file=1904292#file1904292line27>
> >
> >     If this is just used in tests, why not make it a test-specific class.

At one point I needed to have the hook in the HS2 classpath, that is no longer true so I moved it to test, thanks.
.


- Andrew


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review192514
-----------------------------------------------------------


On Nov. 30, 2017, 1:22 a.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Nov. 30, 2017, 1:22 a.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/1/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Sahil Takiar <ta...@gmail.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review192514
-----------------------------------------------------------




ql/src/java/org/apache/hadoop/hive/ql/Driver.java
Lines 365 (patched)
<https://reviews.apache.org/r/64193/#comment270711>

    Why do we need constructors to explicitly pass in a `LineageState` object. I thought that a `LineageState` is part of a `QueryState` oject now. Is this what you are referring to as passing the `LineageState` from a parent `Driver` to a child `Driver`?



ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
Line 401 (original), 401 (patched)
<https://reviews.apache.org/r/64193/#comment270707>

    Why is this necessary?



ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java
Lines 27 (patched)
<https://reviews.apache.org/r/64193/#comment270704>

    If this is just used in tests, why not make it a test-specific class.


- Sahil Takiar


On Nov. 30, 2017, 1:22 a.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Nov. 30, 2017, 1:22 a.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java f5ed735c1ec14dfee338e56020fa2629b168389d 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java af9f193dc94e2e05caa88d965a34f4483c9d7069 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java 7d5aa8b179e536e25c41a8946e667f8dd5669e0f 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java e7af5e004fb560b574b82f6d1b60517511802f37 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java e2f8c1f8012ad25114e279747e821b291c7f4ca6 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java bb1f4e50509e57a9d0b9e6793c1fc08baa4d2981 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/ReadableHook.java PRE-CREATION 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java 67739a1db9fc52a67f4f5ea7dba80fe0e95750c8 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java 1fd634c928a5384b09d97322c3ea785f518d73fe 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java cd75130d7c5f0b402f1b4331c57edc611eb4b2ed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 238fbd60572ee5f7f8f6c4d5b2abce8f66c7e495 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b66817f65f65b6aaf8dbc339a969b8e9e0565e9e 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/1/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Andrew Sherman via Review Board <no...@reviews.apache.org>.

> On Dec. 7, 2017, 8:12 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
> > Line 201 (original), 208 (patched)
> > <https://reviews.apache.org/r/64193/diff/2/?file=1910683#file1910683line209>
> >
> >     Can we use `parseContext.getQueryState()`?

yes, fixed


> On Dec. 7, 2017, 8:12 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
> > Lines 164 (patched)
> > <https://reviews.apache.org/r/64193/diff/2/?file=1910699#file1910699line164>
> >
> >     Think u can just use `parseContext.getQueryState()`

yes, fixed


> On Dec. 7, 2017, 8:12 p.m., Sahil Takiar wrote:
> > ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
> > Lines 351 (patched)
> > <https://reviews.apache.org/r/64193/diff/2/?file=1910701#file1910701line351>
> >
> >     I think `pCtx#getQueryState` could be used for all the changes to the `TaskCompiler`s?

yes, fixed


- Andrew


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review193155
-----------------------------------------------------------


On Dec. 7, 2017, 7:12 p.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Dec. 7, 2017, 7:12 p.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/ReadableHook.java PRE-CREATION 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 70bd29c5178456c683652cf2377206059b735514 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java d3df015288fe1963d2b548e32db53cfc2310af21 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java f3a46dbcaf151706521c735654f377a2f2f76a81 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 55ef8de9a5c7144931d0a6ff13224765ee737fea 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java f5a5e713bb0e081591a53a30caf56f97750c3f8e 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java 545b7a8b7e9f1370b767fc777cb10fa59bd81917 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java bdaf105697fd2c2074885fa3a35548043167c7e7 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java a09b7961c2dbc26b4d2fa912d0be7037885f63e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java 83d53bc157f35b4b57fc37bb24b6c400ac58d8ca 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java cc956da57567114aa29ee0552566ca62c68f6be7 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 28e3621d3264f4f704da0d775b396f7b7764fdb6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/2/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Sahil Takiar <ta...@gmail.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/#review193155
-----------------------------------------------------------




ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java
Line 201 (original), 208 (patched)
<https://reviews.apache.org/r/64193/#comment271675>

    Can we use `parseContext.getQueryState()`?



ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java
Lines 164 (patched)
<https://reviews.apache.org/r/64193/#comment271674>

    Think u can just use `parseContext.getQueryState()`



ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
Lines 351 (patched)
<https://reviews.apache.org/r/64193/#comment271673>

    I think `pCtx#getQueryState` could be used for all the changes to the `TaskCompiler`s?


- Sahil Takiar


On Dec. 7, 2017, 7:12 p.m., Andrew Sherman wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/64193/
> -----------------------------------------------------------
> 
> (Updated Dec. 7, 2017, 7:12 p.m.)
> 
> 
> Review request for hive.
> 
> 
> Repository: hive-git
> 
> 
> Description
> -------
> 
> A Hive Session can contain multiple concurrent sql Operations.
> Lineage is currently tracked in SessionState and is cleared when a query
> completes. This results in Lineage for other running queries being lost.
> 
> To fix this, move LineageState from SessionState to QueryState.
> In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
> rather than trying to use it from MoveWork.
> Add a test which runs multiple jdbc queries in a thread pool
> against the same connection and show that Vertices are not lost from Lineage.
> As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
> HookContexts in memory and makes them available for reading.
> Make LineageLogger methods static so they can be used elsewhere.
> 
> Sometimes a running query (originating in a Driver) will instantiate
> another Driver to run or compile another query. Because these Drivers
> shared a Session, the child Driver would accumulate Lineage information
> along with that of the parent Driver. For consistency a LineageState is
> passed to these child Drivers and stored in the new Driver's QueryState.
> 
> 
> Diffs
> -----
> 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/ReadableHook.java PRE-CREATION 
>   itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 70bd29c5178456c683652cf2377206059b735514 
>   ql/src/java/org/apache/hadoop/hive/ql/Driver.java d3df015288fe1963d2b548e32db53cfc2310af21 
>   ql/src/java/org/apache/hadoop/hive/ql/QueryState.java f3a46dbcaf151706521c735654f377a2f2f76a81 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 55ef8de9a5c7144931d0a6ff13224765ee737fea 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java f5a5e713bb0e081591a53a30caf56f97750c3f8e 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
>   ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java 545b7a8b7e9f1370b767fc777cb10fa59bd81917 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
>   ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
>   ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
>   ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
>   ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
>   ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
>   ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java bdaf105697fd2c2074885fa3a35548043167c7e7 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
>   ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java a09b7961c2dbc26b4d2fa912d0be7037885f63e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java 83d53bc157f35b4b57fc37bb24b6c400ac58d8ca 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java cc956da57567114aa29ee0552566ca62c68f6be7 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 28e3621d3264f4f704da0d775b396f7b7764fdb6 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
>   ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
>   ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
>   ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
>   ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
>   ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
>   ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 
> 
> 
> Diff: https://reviews.apache.org/r/64193/diff/2/
> 
> 
> Testing
> -------
> 
> 
> Thanks,
> 
> Andrew Sherman
> 
>


Re: Review Request 64193: HIVE-18054: Make Lineage work with concurrent queries on a Session

Posted by Andrew Sherman via Review Board <no...@reviews.apache.org>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/64193/
-----------------------------------------------------------

(Updated Dec. 7, 2017, 7:12 p.m.)


Review request for hive.


Repository: hive-git


Description
-------

A Hive Session can contain multiple concurrent sql Operations.
Lineage is currently tracked in SessionState and is cleared when a query
completes. This results in Lineage for other running queries being lost.

To fix this, move LineageState from SessionState to QueryState.
In MoveTask/MoveWork use the LineageState from the MoveTask's QueryState
rather than trying to use it from MoveWork.
Add a test which runs multiple jdbc queries in a thread pool
against the same connection and show that Vertices are not lost from Lineage.
As part of this test, add ReadableHook, an ExecuteWithHookContext that stores
HookContexts in memory and makes them available for reading.
Make LineageLogger methods static so they can be used elsewhere.

Sometimes a running query (originating in a Driver) will instantiate
another Driver to run or compile another query. Because these Drivers
shared a Session, the child Driver would accumulate Lineage information
along with that of the parent Driver. For consistency a LineageState is
passed to these child Drivers and stored in the new Driver's QueryState.


Diffs (updated)
-----

  itests/hive-unit/src/test/java/org/apache/hive/jdbc/ReadableHook.java PRE-CREATION 
  itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java 70bd29c5178456c683652cf2377206059b735514 
  ql/src/java/org/apache/hadoop/hive/ql/Driver.java d3df015288fe1963d2b548e32db53cfc2310af21 
  ql/src/java/org/apache/hadoop/hive/ql/QueryState.java f3a46dbcaf151706521c735654f377a2f2f76a81 
  ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java 55ef8de9a5c7144931d0a6ff13224765ee737fea 
  ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java f5a5e713bb0e081591a53a30caf56f97750c3f8e 
  ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java 1f0487f4f72ab18bcf876f45ad5758d83a7f001b 
  ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadPartitions.java 262225fc202d4627652acfd77350e44b0284b3da 
  ql/src/java/org/apache/hadoop/hive/ql/exec/repl/bootstrap/load/table/LoadTable.java 545b7a8b7e9f1370b767fc777cb10fa59bd81917 
  ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java 7b617309f6b0d8a7ce0dea80ab1f790c2651b147 
  ql/src/java/org/apache/hadoop/hive/ql/hooks/LineageLogger.java 2f764f8a29a9d41a7db013a949ffe3a8a9417d32 
  ql/src/java/org/apache/hadoop/hive/ql/index/AggregateIndexHandler.java 68709b4d3baf15d78e60e948ccdef3df84f28cec 
  ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexHandler.java 1e577da82343a1b7361467fb662661f9c6642ec0 
  ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java 29886ae7f97f8dae7116f4fc9a2417ab8f9dac0a 
  ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java 7b067a0d45e33bc3347c43b050af933c296a9227 
  ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java 504b0623142a6fa6cdb45a26b49f146e12ec2d7a 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRFileSink1.java d7a83f775abca39b219f71aff88173a14ffaee9f 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMRProcContext.java 4387c4297fee48d4c03e95d5a2fcb822ab480eeb 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java bdaf105697fd2c2074885fa3a35548043167c7e7 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/IndexUtils.java 338c1856672f09bb7da35d2336ebb5b6f3fdc5a6 
  ql/src/java/org/apache/hadoop/hive/ql/optimizer/lineage/Generator.java e6c07713b24df719315d804f006151106eea9aed 
  ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java a09b7961c2dbc26b4d2fa912d0be7037885f63e4 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java 065c7e50986872cd35386feee712f3452597d643 
  ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezProcContext.java 0c160acf46eb1eb07c5f04091099c1024e166638 
  ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b6f1139fe1a78283277bf4d0c5224ab1d718c634 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ImportSemanticAnalyzer.java 83d53bc157f35b4b57fc37bb24b6c400ac58d8ca 
  ql/src/java/org/apache/hadoop/hive/ql/parse/IndexUpdater.java f31775ed942160da73344c4dca707da7b8c658a6 
  ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java cc956da57567114aa29ee0552566ca62c68f6be7 
  ql/src/java/org/apache/hadoop/hive/ql/parse/MapReduceCompiler.java d7a56e5846d5754dec5070d8c44443543a3695e4 
  ql/src/java/org/apache/hadoop/hive/ql/parse/ReplicationSemanticAnalyzer.java 498b6741c3f40b92ce3fb218e91e7809a17383f0 
  ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 28e3621d3264f4f704da0d775b396f7b7764fdb6 
  ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java 7b2937032ab8dd57f8923e0a9e7aab4a92de55ee 
  ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java be33f380030ea8b416a4549c3947d767bba66356 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkProcContext.java 4d2bcfa285dc08811106f3c234346efff22afd99 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/GenSparkUtils.java 604c8aee151a45cf942852a3644b5e79f779f353 
  ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java 965044d9253585eeaeef50d7fe4fc4d818042df8 
  ql/src/java/org/apache/hadoop/hive/ql/plan/MoveWork.java 28a33740b30b7be0057ce91de55a0407dd2f2cbf 
  ql/src/java/org/apache/hadoop/hive/ql/session/LineageState.java 056d6141d6239816699ed5f730cbd14e48d8d9bb 
  ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java bb6ddc6fa4667ac0e30994d0f9ee8b969542383c 
  ql/src/test/org/apache/hadoop/hive/ql/optimizer/TestGenMapRedUtilsCreateConditionalTask.java 340689255c738ea497bcd269463b8b8bc38cf34c 
  ql/src/test/org/apache/hadoop/hive/ql/parse/TestGenTezWork.java 2c28c398ca49ba661df460c9f3e6d578c785d3ce 


Diff: https://reviews.apache.org/r/64193/diff/2/

Changes: https://reviews.apache.org/r/64193/diff/1-2/


Testing
-------


Thanks,

Andrew Sherman