You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2013/11/13 21:54:51 UTC
svn commit: r1541706 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/orc/ java/org/apache/hadoop/hive/ql/io/sarg/ test/org/apache/hadoop/hive/ql/io/orc/ test/queries/clientpositive/ test/results/clientpositive/

Author: gunther
Date: Wed Nov 13 20:54:51 2013
New Revision: 1541706

URL: http://svn.apache.org/r1541706
Log:
HIVE-5632: Eliminate splits based on SARGs using stripe statistics in ORC (Patch by Prasanth J, reviewed by Eric Hanson and Gunther Hagleitner)

Added:
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
    hive/trunk/ql/src/test/queries/clientpositive/orc_split_elimination.q
    hive/trunk/ql/src/test/results/clientpositive/orc_split_elimination.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1541706&r1=1541705&r2=1541706&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Wed Nov 13 20:54:51 2013
@@ -41,8 +41,11 @@ import org.apache.hadoop.hive.ql.exec.Ut
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.InputFormatChecker;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
 import org.apache.hadoop.hive.ql.plan.TableScanDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
@@ -165,7 +168,8 @@ public class OrcInputFormat  implements 
   public static SearchArgument createSarg(List<OrcProto.Type> types, Configuration conf) {
     String serializedPushdown = conf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
     if (serializedPushdown == null
-        || conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) == null) {
+        || (conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR) == null
+        && conf.get(serdeConstants.LIST_COLUMNS) == null)) {
       LOG.info("No ORC pushdown predicate");
       return null;
     }
@@ -531,9 +535,51 @@ public class OrcInputFormat  implements 
     public void run() {
       try {
         Reader orcReader = OrcFile.createReader(fs, file.getPath());
+        Configuration conf = context.conf;
+        List<OrcProto.Type> types = orcReader.getTypes();
+        SearchArgument sarg = createSarg(types, conf);
+        List<StripeStatistics> stripeStats = null;
+        int[] filterColumns = null;
+        if (sarg != null) {
+          List<PredicateLeaf> sargLeaves = null;
+          String[] columnNames = conf.get(serdeConstants.LIST_COLUMNS).split(",");
+          if (columnNames == null) {
+            columnNames = conf.get(ColumnProjectionUtils.READ_COLUMN_NAMES_CONF_STR).split(",");
+          }
+          sargLeaves = sarg.getLeaves();
+          filterColumns = new int[sargLeaves.size()];
+          for (int i = 0; i < filterColumns.length; ++i) {
+            String colName = sargLeaves.get(i).getColumnName();
+            filterColumns[i] = RecordReaderImpl.findColumns(columnNames, colName);
+          }
+
+          Metadata metadata = orcReader.getMetadata();
+          stripeStats = metadata.getStripeStatistics();
+        }
+
         long currentOffset = -1;
         long currentLength = 0;
+        int idx = -1;
         for(StripeInformation stripe: orcReader.getStripes()) {
+          idx++;
+
+          // eliminate stripes that doesn't satisfy the predicate condition
+          if (sarg != null && !isStripeSatisfyPredicate(stripeStats.get(idx), sarg, filterColumns)) {
+
+            // if a stripe doesn't satisfy predicate condition then skip it
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Eliminating ORC stripe-" + idx + " of file '" + file.getPath()
+                  + "'  as it did not satisfy predicate condition.");
+            }
+
+            // create split for the previous unfinished stripe
+            if (currentOffset != -1) {
+              createSplit(currentOffset, currentLength);
+              currentOffset = -1;
+            }
+            continue;
+          }
+
           // if we are working on a stripe, over the min stripe size, and
           // crossed a block boundary, cut the input split here.
           if (currentOffset != -1 && currentLength > context.minSize &&
@@ -562,6 +608,56 @@ public class OrcInputFormat  implements 
         }
       }
     }
+
+    private boolean isStripeSatisfyPredicate(StripeStatistics stripeStatistics,
+        SearchArgument sarg, int[] filterColumns) {
+      if (sarg != null && filterColumns != null) {
+        List<PredicateLeaf> predLeaves = sarg.getLeaves();
+        TruthValue[] truthValues = new TruthValue[predLeaves.size()];
+        for (int pred = 0; pred < truthValues.length; pred++) {
+          if (filterColumns[pred] != -1) {
+
+            // column statistics at index 0 contains only the number of rows
+            ColumnStatistics stats = stripeStatistics.getColumnStatistics()[filterColumns[pred] + 1];
+            Object minValue = getMin(stats);
+            Object maxValue = getMax(stats);
+            truthValues[pred] = RecordReaderImpl.evaluatePredicateRange(predLeaves.get(pred),
+                minValue, maxValue);
+          }
+        }
+        return sarg.evaluate(truthValues).isNeeded();
+      }
+      return true;
+    }
+
+    private Object getMax(ColumnStatistics index) {
+      if (index instanceof IntegerColumnStatistics) {
+        return ((IntegerColumnStatistics) index).getMaximum();
+      } else if (index instanceof DoubleColumnStatistics) {
+        return ((DoubleColumnStatistics) index).getMaximum();
+      } else if (index instanceof StringColumnStatistics) {
+        return ((StringColumnStatistics) index).getMaximum();
+      } else if (index instanceof DateColumnStatistics) {
+        return ((DateColumnStatistics) index).getMaximum();
+      } else {
+        return null;
+      }
+    }
+
+    private Object getMin(ColumnStatistics index) {
+      if (index instanceof IntegerColumnStatistics) {
+        return ((IntegerColumnStatistics) index).getMinimum();
+      } else if (index instanceof DoubleColumnStatistics) {
+        return ((DoubleColumnStatistics) index).getMinimum();
+      } else if (index instanceof StringColumnStatistics) {
+        return ((StringColumnStatistics) index).getMinimum();
+      } else if (index instanceof DateColumnStatistics) {
+        return ((DateColumnStatistics) index).getMinimum();
+      } else {
+        return null;
+      }
+    }
+
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1541706&r1=1541705&r2=1541706&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Wed Nov 13 20:54:51 2013
@@ -133,7 +133,7 @@ class RecordReaderImpl implements Record
     advanceToNextRow(0L);
   }
 
-  private static int findColumns(String[] columnNames,
+  static int findColumns(String[] columnNames,
                                  String columnName) {
     for(int i=0; i < columnNames.length; ++i) {
       if (columnName.equals(columnNames[i])) {
@@ -2034,6 +2034,11 @@ class RecordReaderImpl implements Record
       }
     }
     Object maxValue = getMax(index);
+    return evaluatePredicateRange(predicate, minValue, maxValue);
+  }
+
+  static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object minValue,
+      Object maxValue) {
     Location loc;
     switch (predicate.getOperator()) {
       case NULL_SAFE_EQUALS:
@@ -2154,7 +2159,7 @@ class RecordReaderImpl implements Record
           leafValues[pred] = TruthValue.YES_NO_NULL;
         }
       }
-      result[rowGroup] = sarg.evaluate(leafValues).isNotNeeded();
+      result[rowGroup] = sarg.evaluate(leafValues).isNeeded();
       if (LOG.isDebugEnabled()) {
         LOG.debug("Row group " + (rowIndexStride * rowGroup) + " to " +
             (rowIndexStride * (rowGroup+1) - 1) + " is " +

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java?rev=1541706&r1=1541705&r2=1541706&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgument.java Wed Nov 13 20:54:51 2013
@@ -139,7 +139,7 @@ public interface SearchArgument {
      * Does the RecordReader need to include this set of records?
      * @return true unless none of the rows qualify
      */
-    public boolean isNotNeeded() {
+    public boolean isNeeded() {
       switch (this) {
         case NO:
         case NULL:

Added: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java?rev=1541706&view=auto
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java (added)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestOrcSplitElimination.java Wed Nov 13 20:54:51 2013
@@ -0,0 +1,385 @@
+package org.apache.hadoop.hive.ql.io.orc;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.IOException;
+import java.sql.Timestamp;
+import java.util.List;
+
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPAnd;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TestName;
+
+import com.google.common.collect.Lists;
+
+public class TestOrcSplitElimination {
+
+  public static class AllTypesRow {
+    Long userid;
+    Text string1;
+    Double subtype;
+    HiveDecimal decimal1;
+    Timestamp ts;
+
+    AllTypesRow(Long uid, String s1, Double d1, HiveDecimal decimal, Timestamp ts) {
+      this.userid = uid;
+      this.string1 = new Text(s1);
+      this.subtype = d1;
+      this.decimal1 = decimal;
+      this.ts = ts;
+    }
+  }
+
+  Path workDir = new Path(System.getProperty("test.tmp.dir",
+      "target" + File.separator + "test" + File.separator + "tmp"));
+
+  JobConf conf;
+  FileSystem fs;
+  Path testFilePath;
+
+  @Rule
+  public TestName testCaseName = new TestName();
+
+  @Before
+  public void openFileSystem() throws Exception {
+    conf = new JobConf();
+    conf.set("columns", "userid,string1,subtype,decimal1,ts");
+    conf.set("columns.types", "bigint,string,double,decimal,timestamp");
+    fs = FileSystem.getLocal(conf);
+    testFilePath = new Path(workDir, "TestOrcFile." +
+        testCaseName.getMethodName() + ".orc");
+    fs.delete(testFilePath, false);
+  }
+
+  @Test
+  public void testSplitEliminationSmallMaxSplit() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory
+          .getReflectionObjectInspector(AllTypesRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+        100000, CompressionKind.NONE, 10000, 10000);
+    writeData(writer);
+    writer.close();
+    conf.set("mapred.min.split.size", "1000");
+    conf.set("mapred.max.split.size", "5000");
+    InputFormat<?, ?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+
+    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
+    List<ExprNodeDesc> childExpr = Lists.newArrayList();
+    ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
+    ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
+    childExpr.add(col);
+    childExpr.add(con);
+    ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    String sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(5, splits.length);
+
+    con = new ExprNodeConstantDesc(1);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(0, splits.length);
+
+    con = new ExprNodeConstantDesc(2);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(1, splits.length);
+
+    con = new ExprNodeConstantDesc(5);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(13);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(3, splits.length);
+
+    con = new ExprNodeConstantDesc(29);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(4, splits.length);
+
+    con = new ExprNodeConstantDesc(70);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    assertEquals(5, splits.length);
+  }
+
+  @Test
+  public void testSplitEliminationLargeMaxSplit() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory
+          .getReflectionObjectInspector(AllTypesRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+        100000, CompressionKind.NONE, 10000, 10000);
+    writeData(writer);
+    writer.close();
+    conf.set("mapred.min.split.size", "1000");
+    conf.set("mapred.max.split.size", "150000");
+    InputFormat<?, ?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+
+    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
+    List<ExprNodeDesc> childExpr = Lists.newArrayList();
+    ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
+    ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
+    childExpr.add(col);
+    childExpr.add(con);
+    ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    String sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(0);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // no stripes satisfies the condition
+    assertEquals(0, splits.length);
+
+    con = new ExprNodeConstantDesc(2);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // only first stripe will satisfy condition and hence single split
+    assertEquals(1, splits.length);
+
+    con = new ExprNodeConstantDesc(5);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // first stripe will satisfy the predicate and will be a single split, last stripe will be a
+    // separate split
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(13);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // first 2 stripes will satisfy the predicate and merged to single split, last stripe will be a
+    // separate split
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(29);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // first 3 stripes will satisfy the predicate and merged to single split, last stripe will be a
+    // separate split
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(70);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+    sargStr = Utilities.serializeExpression(en);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // first 2 stripes will satisfy the predicate and merged to single split, last two stripe will
+    // be a separate split
+    assertEquals(2, splits.length);
+  }
+
+
+  @Test
+  public void testSplitEliminationComplexExpr() throws Exception {
+    ObjectInspector inspector;
+    synchronized (TestOrcFile.class) {
+      inspector = ObjectInspectorFactory
+          .getReflectionObjectInspector(AllTypesRow.class,
+              ObjectInspectorFactory.ObjectInspectorOptions.JAVA);
+    }
+    Writer writer = OrcFile.createWriter(fs, testFilePath, conf, inspector,
+        100000, CompressionKind.NONE, 10000, 10000);
+    writeData(writer);
+    writer.close();
+    conf.set("mapred.min.split.size", "1000");
+    conf.set("mapred.max.split.size", "150000");
+    InputFormat<?, ?> in = new OrcInputFormat();
+    FileInputFormat.setInputPaths(conf, testFilePath.toString());
+
+    // predicate expression: userid <= 100 and subtype <= 1000.0
+    GenericUDF udf = new GenericUDFOPEqualOrLessThan();
+    List<ExprNodeDesc> childExpr = Lists.newArrayList();
+    ExprNodeColumnDesc col = new ExprNodeColumnDesc(Long.class, "userid", "T", false);
+    ExprNodeConstantDesc con = new ExprNodeConstantDesc(100);
+    childExpr.add(col);
+    childExpr.add(con);
+    ExprNodeGenericFuncDesc en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+
+    GenericUDF udf1 = new GenericUDFOPEqualOrLessThan();
+    List<ExprNodeDesc> childExpr1 = Lists.newArrayList();
+    ExprNodeColumnDesc col1 = new ExprNodeColumnDesc(Double.class, "subtype", "T", false);
+    ExprNodeConstantDesc con1 = new ExprNodeConstantDesc(1000.0);
+    childExpr1.add(col1);
+    childExpr1.add(con1);
+    ExprNodeGenericFuncDesc en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
+
+    GenericUDF udf2 = new GenericUDFOPAnd();
+    List<ExprNodeDesc> childExpr2 = Lists.newArrayList();
+    childExpr2.add(en);
+    childExpr2.add(en1);
+    ExprNodeGenericFuncDesc en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
+
+    String sargStr = Utilities.serializeExpression(en2);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    InputSplit[] splits = in.getSplits(conf, 1);
+    assertEquals(2, splits.length);
+
+    con = new ExprNodeConstantDesc(2);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+
+    con1 = new ExprNodeConstantDesc(0.0);
+    childExpr1.set(1, con1);
+    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
+
+    childExpr2.set(0, en);
+    childExpr2.set(1, en1);
+    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
+
+    sargStr = Utilities.serializeExpression(en2);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // no stripe will satisfy the predicate
+    assertEquals(0, splits.length);
+
+    con = new ExprNodeConstantDesc(2);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+
+    con1 = new ExprNodeConstantDesc(1.0);
+    childExpr1.set(1, con1);
+    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
+
+    childExpr2.set(0, en);
+    childExpr2.set(1, en1);
+    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
+
+    sargStr = Utilities.serializeExpression(en2);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // only first stripe will satisfy condition and hence single split
+    assertEquals(1, splits.length);
+
+    udf = new GenericUDFOPEqual();
+    con = new ExprNodeConstantDesc(13);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+
+    con1 = new ExprNodeConstantDesc(80.0);
+    childExpr1.set(1, con1);
+    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
+
+    childExpr2.set(0, en);
+    childExpr2.set(1, en1);
+    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
+
+    sargStr = Utilities.serializeExpression(en2);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // first two stripes will satisfy condition and hence single split
+    assertEquals(2, splits.length);
+
+    udf = new GenericUDFOPEqual();
+    con = new ExprNodeConstantDesc(13);
+    childExpr.set(1, con);
+    en = new ExprNodeGenericFuncDesc(inspector, udf, childExpr);
+
+    udf1 = new GenericUDFOPEqual();
+    con1 = new ExprNodeConstantDesc(80.0);
+    childExpr1.set(1, con1);
+    en1 = new ExprNodeGenericFuncDesc(inspector, udf1, childExpr1);
+
+    childExpr2.set(0, en);
+    childExpr2.set(1, en1);
+    en2 = new ExprNodeGenericFuncDesc(inspector, udf2, childExpr2);
+
+    sargStr = Utilities.serializeExpression(en2);
+    conf.set("hive.io.filter.expr.serialized", sargStr);
+    splits = in.getSplits(conf, 1);
+    // only second stripes will satisfy condition and hence single split
+    assertEquals(1, splits.length);
+  }
+
+  private void writeData(Writer writer) throws IOException {
+    for (int i = 0; i < 25000; i++) {
+      if (i == 0) {
+        writer.addRow(new AllTypesRow(2L, "foo", 0.8, HiveDecimal.create("1.2"), new Timestamp(0)));
+      } else if (i == 5000) {
+        writer.addRow(new AllTypesRow(13L, "bar", 80.0, HiveDecimal.create("2.2"), new Timestamp(
+            5000)));
+      } else if (i == 10000) {
+        writer.addRow(new AllTypesRow(29L, "cat", 8.0, HiveDecimal.create("3.3"), new Timestamp(
+            10000)));
+      } else if (i == 15000) {
+        writer.addRow(new AllTypesRow(70L, "dog", 1.8, HiveDecimal.create("4.4"), new Timestamp(
+            15000)));
+      } else if (i == 20000) {
+        writer.addRow(new AllTypesRow(5L, "eat", 0.8, HiveDecimal.create("5.5"), new Timestamp(
+            20000)));
+      } else {
+        writer.addRow(new AllTypesRow(100L, "zebra", 8.0, HiveDecimal.create("0.0"), new Timestamp(
+            250000)));
+      }
+    }
+  }
+}

Added: hive/trunk/ql/src/test/queries/clientpositive/orc_split_elimination.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_split_elimination.q?rev=1541706&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_split_elimination.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_split_elimination.q Wed Nov 13 20:54:51 2013
@@ -0,0 +1,101 @@
+create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=5000;
+SET hive.optimize.index.filter=false;
+
+-- The above table will have 5 splits with the followings stats
+--  Stripe 1:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
+--    Column 2: count: 5000 min: foo max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
+--    Column 5: count: 5000
+--  Stripe 2:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
+--    Column 2: count: 5000 min: bar max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
+--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
+--    Column 5: count: 5000
+--  Stripe 3:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
+--    Column 2: count: 5000 min: cat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
+--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
+--    Column 5: count: 5000
+--  Stripe 4:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
+--    Column 2: count: 5000 min: dog max: zebra sum: 24998
+--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
+--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
+--    Column 5: count: 5000
+--  Stripe 5:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
+--    Column 2: count: 5000 min: eat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
+--    Column 5: count: 5000
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers. count should be 0
+select count(*) from orc_split_elim where userid<=0;
+
+SET hive.optimize.index.filter=true;
+-- 0 mapper
+select count(*) from orc_split_elim where userid<=0;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 1 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 2 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 3 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 4 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid;
+SET hive.optimize.index.filter=false;
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+
+SET hive.optimize.index.filter=true;
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid;
+SET hive.optimize.index.filter=false;

Added: hive/trunk/ql/src/test/results/clientpositive/orc_split_elimination.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_split_elimination.q.out?rev=1541706&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/orc_split_elimination.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/orc_split_elimination.q.out Wed Nov 13 20:54:51 2013
@@ -0,0 +1,257 @@
+PREHOOK: query: create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: create table orc_split_elim (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@orc_split_elim
+PREHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim
+PREHOOK: type: LOAD
+PREHOOK: Output: default@orc_split_elim
+POSTHOOK: query: load data local inpath '../../data/files/orc_split_elim.orc' into table orc_split_elim
+POSTHOOK: type: LOAD
+POSTHOOK: Output: default@orc_split_elim
+PREHOOK: query: -- The above table will have 5 splits with the followings stats
+--  Stripe 1:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
+--    Column 2: count: 5000 min: foo max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
+--    Column 5: count: 5000
+--  Stripe 2:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
+--    Column 2: count: 5000 min: bar max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
+--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
+--    Column 5: count: 5000
+--  Stripe 3:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
+--    Column 2: count: 5000 min: cat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
+--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
+--    Column 5: count: 5000
+--  Stripe 4:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
+--    Column 2: count: 5000 min: dog max: zebra sum: 24998
+--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
+--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
+--    Column 5: count: 5000
+--  Stripe 5:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
+--    Column 2: count: 5000 min: eat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
+--    Column 5: count: 5000
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- The above table will have 5 splits with the followings stats
+--  Stripe 1:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 2 max: 100 sum: 499902
+--    Column 2: count: 5000 min: foo max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 1.2 sum: 1.2
+--    Column 5: count: 5000
+--  Stripe 2:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 13 max: 100 sum: 499913
+--    Column 2: count: 5000 min: bar max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 80.0 sum: 40072.0
+--    Column 4: count: 5000 min: 0 max: 2.2 sum: 2.2
+--    Column 5: count: 5000
+--  Stripe 3:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 29 max: 100 sum: 499929
+--    Column 2: count: 5000 min: cat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 8.0 max: 8.0 sum: 40000.0
+--    Column 4: count: 5000 min: 0 max: 3.3 sum: 3.3
+--    Column 5: count: 5000
+--  Stripe 4:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 70 max: 100 sum: 499970
+--    Column 2: count: 5000 min: dog max: zebra sum: 24998
+--    Column 3: count: 5000 min: 1.8 max: 8.0 sum: 39993.8
+--    Column 4: count: 5000 min: 0 max: 4.4 sum: 4.4
+--    Column 5: count: 5000
+--  Stripe 5:
+--    Column 0: count: 5000
+--    Column 1: count: 5000 min: 5 max: 100 sum: 499905
+--    Column 2: count: 5000 min: eat max: zebra sum: 24998
+--    Column 3: count: 5000 min: 0.8 max: 8.0 sum: 39992.8
+--    Column 4: count: 5000 min: 0 max: 5.5 sum: 5.5
+--    Column 5: count: 5000
+
+-- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+PREHOOK: query: -- 0 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 0 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+PREHOOK: query: -- 5 mappers. count should be 0
+select count(*) from orc_split_elim where userid<=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers. count should be 0
+select count(*) from orc_split_elim where userid<=0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- 0 mapper
+select count(*) from orc_split_elim where userid<=0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 0 mapper
+select count(*) from orc_split_elim where userid<=0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+0
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+PREHOOK: query: -- 1 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 1 mapper
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=2 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+PREHOOK: query: -- 2 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 2 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=5 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+PREHOOK: query: -- 3 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 3 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=13 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+29	cat	8.0	3	1969-12-31 16:00:10
+PREHOOK: query: -- 4 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 4 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=29 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+29	cat	8.0	3	1969-12-31 16:00:10
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+29	cat	8.0	3	1969-12-31 16:00:10
+70	dog	1.8	4	1969-12-31 16:00:15
+PREHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+POSTHOOK: query: -- 5 mappers
+select userid,string1,subtype,decimal1,ts from orc_split_elim where userid<=70 order by userid
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orc_split_elim
+#### A masked pattern was here ####
+2	foo	0.8	1	1969-12-31 16:00:00
+5	eat	0.8	6	1969-12-31 16:00:20
+13	bar	80.0	2	1969-12-31 16:00:05
+29	cat	8.0	3	1969-12-31 16:00:10
+70	dog	1.8	4	1969-12-31 16:00:15