You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2014/04/08 17:10:01 UTC
svn commit: r1585754 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Author: omalley
Date: Tue Apr 8 15:10:01 2014
New Revision: 1585754
URL: http://svn.apache.org/r1585754
Log:
HIVE-6787. Only add ACID OrcInputSplits when the partition is actually ACID
format. (omalley)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Apr 8 15:10:01 2014
@@ -536,10 +536,12 @@ public class OrcInputFormat implements
// Generate a split for any buckets that weren't covered.
// This happens in the case where a bucket just has deltas and no
// base.
- for(int b=0; b < context.numBuckets; ++b) {
- if (!covered[b]) {
- context.splits.add(new OrcSplit(dir, b, 0, new String[0], null,
- false, false, deltas));
+ if (!deltas.isEmpty()) {
+ for (int b = 0; b < context.numBuckets; ++b) {
+ if (!covered[b]) {
+ context.splits.add(new OrcSplit(dir, b, 0, new String[0], null,
+ false, false, deltas));
+ }
}
}
} catch (Throwable th) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java Tue Apr 8 15:10:01 2014
@@ -175,9 +175,11 @@ public class OrcRecordUpdater implements
FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
strm.writeInt(ORC_ACID_VERSION);
strm.close();
- LOG.info("Created " + path + "/" + ACID_FORMAT);
} catch (IOException ioe) {
- // we just need one task to write this file
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
+ ioe);
+ }
}
if (options.getMinimumTransactionId() != options.getMaximumTransactionId()
&& !options.isWritingBase()){
Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Apr 8 15:10:01 2014
@@ -48,6 +48,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -1078,6 +1079,54 @@ public class TestInputOutputFormat {
assertEquals(false, reader.next(key, value));
}
+ /**
+ * Test vectorization, non-acid, non-combine.
+ * @throws Exception
+ */
+ @Test
+ public void testVectorizationWithBuckets() throws Exception {
+ // get the object inspector for MyRow
+ StructObjectInspector inspector;
+ synchronized (TestOrcFile.class) {
+ inspector = (StructObjectInspector)
+ ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+ ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+ }
+ JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
+ inspector, true);
+
+ // write the orc file to the mock file system
+ Writer writer =
+ OrcFile.createWriter(new Path(conf.get("mapred.input.dir") + "/0_0"),
+ OrcFile.writerOptions(conf).blockPadding(false)
+ .bufferSize(1024).inspector(inspector));
+ for(int i=0; i < 10; ++i) {
+ writer.addRow(new MyRow(i, 2*i));
+ }
+ writer.close();
+ ((MockOutputStream) ((WriterImpl) writer).getStream())
+ .setBlocks(new MockBlock("host0", "host1"));
+
+ // call getsplits
+ conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3);
+ HiveInputFormat<?,?> inputFormat =
+ new HiveInputFormat<WritableComparable, Writable>();
+ InputSplit[] splits = inputFormat.getSplits(conf, 10);
+ assertEquals(1, splits.length);
+
+ org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
+ reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
+ NullWritable key = reader.createKey();
+ VectorizedRowBatch value = reader.createValue();
+ assertEquals(true, reader.next(key, value));
+ assertEquals(10, value.count());
+ LongColumnVector col0 = (LongColumnVector) value.cols[0];
+ for(int i=0; i < 10; i++) {
+ assertEquals("checking " + i, i, col0.vector[i]);
+ }
+ assertEquals(false, reader.next(key, value));
+ }
+
// test acid with vectorization, no combine
@Test
public void testVectorizationWithAcid() throws Exception {