You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2014/04/08 17:10:01 UTC

svn commit: r1585754 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java

Author: omalley
Date: Tue Apr  8 15:10:01 2014
New Revision: 1585754

URL: http://svn.apache.org/r1585754
Log:
HIVE-6787. Only add ACID OrcInputSplits when the partition is actually ACID
format. (omalley)

Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Tue Apr  8 15:10:01 2014
@@ -536,10 +536,12 @@ public class OrcInputFormat  implements 
         // Generate a split for any buckets that weren't covered.
         // This happens in the case where a bucket just has deltas and no
         // base.
-        for(int b=0; b < context.numBuckets; ++b) {
-          if (!covered[b]) {
-            context.splits.add(new OrcSplit(dir, b, 0, new String[0], null,
-                               false, false, deltas));
+        if (!deltas.isEmpty()) {
+          for (int b = 0; b < context.numBuckets; ++b) {
+            if (!covered[b]) {
+              context.splits.add(new OrcSplit(dir, b, 0, new String[0], null,
+                  false, false, deltas));
+            }
           }
         }
       } catch (Throwable th) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRecordUpdater.java Tue Apr  8 15:10:01 2014
@@ -175,9 +175,11 @@ public class OrcRecordUpdater implements
       FSDataOutputStream strm = fs.create(new Path(path, ACID_FORMAT), false);
       strm.writeInt(ORC_ACID_VERSION);
       strm.close();
-      LOG.info("Created " + path + "/" + ACID_FORMAT);
     } catch (IOException ioe) {
-      // we just need one task to write this file
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Failed to create " + path + "/" + ACID_FORMAT + " with " +
+            ioe);
+      }
     }
     if (options.getMinimumTransactionId() != options.getMaximumTransactionId()
         && !options.isWritingBase()){

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1585754&r1=1585753&r2=1585754&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Tue Apr  8 15:10:01 2014
@@ -48,6 +48,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.permission.FsPermission;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapper;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
@@ -1078,6 +1079,54 @@ public class TestInputOutputFormat {
     assertEquals(false, reader.next(key, value));
   }
 
+  /**
+   * Test vectorization, non-acid, non-combine.
+   * @throws Exception
+   */
+  @Test
+  public void testVectorizationWithBuckets() throws Exception {
+    // get the object inspector for MyRow
+    StructObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = (StructObjectInspector)
+          ObjectInspectorFactory.getReflectionObjectInspector(MyRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    JobConf conf = createMockExecutionEnvironment(workDir, new Path("mock:///"),
+        inspector, true);
+
+    // write the orc file to the mock file system
+    Writer writer =
+        OrcFile.createWriter(new Path(conf.get("mapred.input.dir") + "/0_0"),
+            OrcFile.writerOptions(conf).blockPadding(false)
+                .bufferSize(1024).inspector(inspector));
+    for(int i=0; i < 10; ++i) {
+      writer.addRow(new MyRow(i, 2*i));
+    }
+    writer.close();
+    ((MockOutputStream) ((WriterImpl) writer).getStream())
+        .setBlocks(new MockBlock("host0", "host1"));
+
+    // call getsplits
+    conf.setInt(hive_metastoreConstants.BUCKET_COUNT, 3);
+    HiveInputFormat<?,?> inputFormat =
+        new HiveInputFormat<WritableComparable, Writable>();
+    InputSplit[] splits = inputFormat.getSplits(conf, 10);
+    assertEquals(1, splits.length);
+
+    org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>
+        reader = inputFormat.getRecordReader(splits[0], conf, Reporter.NULL);
+    NullWritable key = reader.createKey();
+    VectorizedRowBatch value = reader.createValue();
+    assertEquals(true, reader.next(key, value));
+    assertEquals(10, value.count());
+    LongColumnVector col0 = (LongColumnVector) value.cols[0];
+    for(int i=0; i < 10; i++) {
+      assertEquals("checking " + i, i, col0.vector[i]);
+    }
+    assertEquals(false, reader.next(key, value));
+  }
+
   // test acid with vectorization, no combine
   @Test
   public void testVectorizationWithAcid() throws Exception {