You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Stamatis Zampetakis (Jira)" <ji...@apache.org> on 2020/09/01 11:29:00 UTC

[jira] [Created] (HIVE-24104) NPE due to null key columns in ReduceSink after deduplication

Stamatis Zampetakis created HIVE-24104:
------------------------------------------

             Summary: NPE due to null key columns in ReduceSink after deduplication
                 Key: HIVE-24104
                 URL: https://issues.apache.org/jira/browse/HIVE-24104
             Project: Hive
          Issue Type: Bug
          Components: Physical Optimizer
            Reporter: Stamatis Zampetakis
            Assignee: Stamatis Zampetakis


In some cases the {{ReduceSinkDeDuplication}} optimization creates ReduceSink operators where the key columns are null. This can lead to NPE in various places in the code. 

The following stracktrace shows an example where NPE is raised due to key columns being null.

{noformat}
java.lang.NullPointerException
	at org.apache.hadoop.hive.ql.plan.ExprNodeDesc$ExprNodeDescEqualityWrapper.equals(ExprNodeDesc.java:141)
	at java.util.AbstractList.equals(AbstractList.java:523)
	at org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism.process(SetReducerParallelism.java:101)
	at org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher.dispatch(DefaultRuleDispatcher.java:90)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatchAndReturn(DefaultGraphWalker.java:105)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.dispatch(DefaultGraphWalker.java:89)
	at org.apache.hadoop.hive.ql.lib.ForwardWalker.walk(ForwardWalker.java:74)
	at org.apache.hadoop.hive.ql.lib.DefaultGraphWalker.startWalking(DefaultGraphWalker.java:120)
	at org.apache.hadoop.hive.ql.parse.TezCompiler.runStatsDependentOptimizations(TezCompiler.java:492)
	at org.apache.hadoop.hive.ql.parse.TezCompiler.optimizeOperatorPlan(TezCompiler.java:226)
	at org.apache.hadoop.hive.ql.parse.TaskCompiler.compile(TaskCompiler.java:161)
	at org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.analyzeInternal(SemanticAnalyzer.java:12643)
	at org.apache.hadoop.hive.ql.parse.CalcitePlanner.analyzeInternal(CalcitePlanner.java:443)
	at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301)
	at org.apache.hadoop.hive.ql.parse.ExplainSemanticAnalyzer.analyzeInternal(ExplainSemanticAnalyzer.java:171)
	at org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.analyze(BaseSemanticAnalyzer.java:301)
	at org.apache.hadoop.hive.ql.Compiler.analyze(Compiler.java:220)
	at org.apache.hadoop.hive.ql.Compiler.compile(Compiler.java:104)
	at org.apache.hadoop.hive.ql.Driver.compile(Driver.java:173)
	at org.apache.hadoop.hive.ql.Driver.compileInternal(Driver.java:414)
	at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:363)
	at org.apache.hadoop.hive.ql.Driver.compileAndRespond(Driver.java:357)
	at org.apache.hadoop.hive.ql.reexec.ReExecDriver.compileAndRespond(ReExecDriver.java:129)
	at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:231)
	at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:258)
	at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:203)
	at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:129)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:424)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:355)
	at org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:740)
	at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:710)
	at org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:170)
	at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157)
	at org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
	at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
	at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
	at org.junit.runners.Suite.runChild(Suite.java:128)
	at org.junit.runners.Suite.runChild(Suite.java:27)
	at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
	at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
	at org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95)
	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365)
	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273)
	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238)
	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159)
	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:377)
	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:138)
	at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:465)
	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:451)
{noformat}




--
This message was sent by Atlassian Jira
(v8.3.4#803005)