You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@hive.apache.org by "Eugene Koifman (JIRA)" <ji...@apache.org> on 2014/09/24 22:58:37 UTC
[jira] [Created] (HIVE-8247) Pig cursor written to Hive via HCat
doesn't NULL-fill missing columns
Eugene Koifman created HIVE-8247:
------------------------------------
Summary: Pig cursor written to Hive via HCat doesn't NULL-fill missing columns
Key: HIVE-8247
URL: https://issues.apache.org/jira/browse/HIVE-8247
Project: Hive
Issue Type: Bug
Components: HCatalog
Affects Versions: 0.13.1
Reporter: Eugene Koifman
This started out as BUG-15650 but in BUG-15650 it's no longer clear what the real issue is so I'm filing a new ticket.
Suppose a Hive table has columns (a,b,c,d)
If a Pig script writing to this table produces schema (a,b,c) it works: 'd' will be NULL.
If a Pig script writing to this table produces schema (a,b,d) it fails with error below.
This is an old issue. There is nothing in HCatalog documentation that indicates whether this should work.
{noformat}
Running org.apache.hive.hcatalog.pig.TestOrcHCatStorer
Tests run: 1, Failures: 0, Errors: 1, Skipped: 0, Time elapsed: 30.113 sec <<< FAILURE! - in org.apache.hive.hcatalog.pig.TestOrcHCatStorer
partialSchemaSepcification(org.apache.hive.hcatalog.pig.TestOrcHCatStorer) Time elapsed: 29.886 sec <<< ERROR!
org.apache.pig.impl.logicalLayer.FrontendException: Unable to store alias ABD
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1635)
at org.apache.pig.PigServer.registerQuery(PigServer.java:575)
at org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92)
at org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149)
at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124)
at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200)
at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153)
at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103)
Caused by: org.apache.pig.impl.plan.VisitorException:
<line 7, column 0> Output Location Validation Failed for: 'T More info to follow:
org.apache.hive.hcatalog.common.HCatException : 2007 : Invalid column position in partition schema : Expected column <c> at position 3, found column <d>
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileVisitor.visit(InputOutputFileValidator.java:75)
at org.apache.pig.newplan.logical.relational.LOStore.accept(LOStore.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:64)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.walk(DepthFirstWalker.java:53)
at org.apache.pig.newplan.PlanVisitor.visit(PlanVisitor.java:52)
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator.validate(InputOutputFileValidator.java:45)
at org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.compile(HExecutionEngine.java:303)
at org.apache.pig.PigServer.compilePp(PigServer.java:1380)
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1305)
at org.apache.pig.PigServer.execute(PigServer.java:1297)
at org.apache.pig.PigServer.access$400(PigServer.java:122)
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1630)
at org.apache.pig.PigServer.registerQuery(PigServer.java:575)
at org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92)
at org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149)
at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124)
at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200)
at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153)
at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103)
Caused by: org.apache.hive.hcatalog.common.HCatException: org.apache.hive.hcatalog.common.HCatException : 2007 : Invalid column position in partition schema : Expected column <c> at position 3, found column <d>
at org.apache.hive.hcatalog.common.HCatUtil.validatePartitionSchema(HCatUtil.java:258)
at org.apache.hive.hcatalog.mapreduce.HCatBaseOutputFormat.setPartDetails(HCatBaseOutputFormat.java:231)
at org.apache.hive.hcatalog.mapreduce.HCatOutputFormat.setSchema(HCatOutputFormat.java:244)
at org.apache.hive.hcatalog.mapreduce.HCatOutputFormat.setSchema(HCatOutputFormat.java:231)
at org.apache.hive.hcatalog.pig.HCatStorer.setStoreLocation(HCatStorer.java:206)
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator$InputOutputFileVisitor.visit(InputOutputFileValidator.java:68)
at org.apache.pig.newplan.logical.relational.LOStore.accept(LOStore.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:64)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.depthFirst(DepthFirstWalker.java:66)
at org.apache.pig.newplan.DepthFirstWalker.walk(DepthFirstWalker.java:53)
at org.apache.pig.newplan.PlanVisitor.visit(PlanVisitor.java:52)
at org.apache.pig.newplan.logical.rules.InputOutputFileValidator.validate(InputOutputFileValidator.java:45)
at org.apache.pig.backend.hadoop.executionengine.HExecutionEngine.compile(HExecutionEngine.java:303)
at org.apache.pig.PigServer.compilePp(PigServer.java:1380)
at org.apache.pig.PigServer.executeCompiledLogicalPlan(PigServer.java:1305)
at org.apache.pig.PigServer.execute(PigServer.java:1297)
at org.apache.pig.PigServer.access$400(PigServer.java:122)
at org.apache.pig.PigServer$Graph.registerQuery(PigServer.java:1630)
at org.apache.pig.PigServer.registerQuery(PigServer.java:575)
at org.apache.hive.hcatalog.mapreduce.HCatBaseTest.logAndRegister(HCatBaseTest.java:92)
at org.apache.hive.hcatalog.pig.TestHCatStorer.partialSchemaSepcification(TestHCatStorer.java:1035)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:45)
at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:15)
at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:42)
at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:20)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:263)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:68)
at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:47)
at org.junit.runners.ParentRunner$3.run(ParentRunner.java:231)
at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:60)
at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:229)
at org.junit.runners.ParentRunner.access$000(ParentRunner.java:50)
at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:222)
at org.junit.internal.runners.statements.RunBefores.evaluate(RunBefores.java:28)
at org.junit.runners.ParentRunner.run(ParentRunner.java:300)
at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:254)
at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:149)
at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:124)
at org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:200)
at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:153)
at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:103)
Results :
Tests in error:
TestOrcHCatStorer>TestHCatStorer.partialSchemaSepcification:1035->HCatBaseTest.logAndRegister:92 ? Frontend
{noformat}
Reproducer (which can be added to org.apache.hive.hcatalog.pig.TestHCatStorer)
{noformat}
@Test
public void partialSchemaSepcification() throws Exception {
driver.run("drop table if exists T");
String createTable = "create table T(a int, b int, c string, d string) stored as " + getStorageFormat();
int retCode = driver.run(createTable).getResponseCode();
if (retCode != 0) {
throw new IOException("Failed to create table.");
}
String[] inputData = {"1\t20\tstr1\tstr20", "2\t30\tstr2\tstr30", "3\t40\tstr3\tstr40", "4\t50\tstr4\tstr40"};
HcatTestUtils.createTestDataFile(INPUT_FILE_NAME, inputData);
int lineNumber = 1;
PigServer ps = createPigServer(true);
logAndRegister(ps, "A1 = LOAD '" + INPUT_FILE_NAME + "' USING PigStorage() AS (a:int,b:int,c:chararray,d:chararray);", lineNumber++);
logAndRegister(ps, "ROW1 = FILTER A1 BY a == 1;", lineNumber++);
logAndRegister(ps, "ABC = FOREACH ROW1 GENERATE a,b,c;", lineNumber++);
logAndRegister(ps, "STORE ABC INTO 'T' USING " + HCatStorer.class.getName() + "();", lineNumber++);
logAndRegister(ps, "ROW2 = FILTER A1 BY a == 2;", lineNumber++);
logAndRegister(ps, "ABD = FOREACH ROW2 GENERATE a,b,d;", lineNumber++);
logAndRegister(ps, "STORE ABD INTO 'T' USING " + HCatStorer.class.getName() + "();", lineNumber);
driver.run("select * from T");
ArrayList<String> results = new ArrayList<String>();
driver.getResults(results);
Assert.assertEquals(2, results.size());
driver.run("drop table T");
}
{noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)