You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/03/25 04:20:15 UTC

[3/3] git commit: TAJO-682: RangePartitionAlgorithm should be improved to handle empty texts. (Alvin Henrick via hyunsik)

TAJO-682: RangePartitionAlgorithm should be improved to handle empty texts. (Alvin Henrick via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/e12e038b
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/e12e038b
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/e12e038b

Branch: refs/heads/master
Commit: e12e038b5d50db015e6b9cdf8de478591b0712be
Parents: 37fef79
Author: Hyunsik Choi <hy...@apache.org>
Authored: Tue Mar 25 12:19:34 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Tue Mar 25 12:19:34 2014 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  3 +
 .../engine/planner/RangePartitionAlgorithm.java |  7 +-
 .../engine/planner/UniformRangePartition.java   |  5 +-
 .../planner/TestUniformRangePartition.java      | 74 ++++++++++++++++++++
 4 files changed, 85 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/e12e038b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index a935225..7d9862e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -826,6 +826,9 @@ Release 0.2.0 - released
 
   BUG FIXES
 
+    TAJO-682: RangePartitionAlgorithm should be improved to handle empty
+    texts. (hyunsik)
+
     TAJO-511: Sometimes, a query progress becomes higher than 100%. (jihoon)
 
     TAJO-281: 'mvn package -Pdist' generates duplicate Tajo jar files.

http://git-wip-us.apache.org/repos/asf/tajo/blob/e12e038b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java
index 500001f..c4b1ae1 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/RangePartitionAlgorithm.java
@@ -22,6 +22,7 @@ import org.apache.tajo.catalog.Column;
 import org.apache.tajo.catalog.SortSpec;
 import org.apache.tajo.common.TajoDataTypes.DataType;
 import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.TupleRange;
 
@@ -113,10 +114,12 @@ public abstract class RangePartitionAlgorithm {
         }
         break;
       case TEXT:
+        final char textStart =  start instanceof NullDatum ? '0' : start.asChars().charAt(0);
+        final char textEnd = end instanceof NullDatum ? '0' : end.asChars().charAt(0);
         if (isAscending) {
-          columnCard = new BigDecimal(end.asChars().charAt(0) - start.asChars().charAt(0));
+          columnCard = new BigDecimal(textEnd - textStart);
         } else {
-          columnCard = new BigDecimal(start.asChars().charAt(0) - end.asChars().charAt(0));
+          columnCard = new BigDecimal(textStart - textEnd);
         }
         break;
       case DATE:

http://git-wip-us.apache.org/repos/asf/tajo/blob/e12e038b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java
index 948b19e..a7df3e8 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/planner/UniformRangePartition.java
@@ -24,6 +24,7 @@ import org.apache.tajo.catalog.Column;
 import org.apache.tajo.catalog.SortSpec;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
 import org.apache.tajo.engine.exception.RangeOverflowException;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.TupleRange;
@@ -194,7 +195,7 @@ public class UniformRangePartition extends RangePartitionAlgorithm {
       }
       case TEXT: {
         if (sortSpecs[colId].isAscending()) {
-          candidate = inc.add(new BigDecimal((int)(last.asChars().charAt(0))));
+          candidate = inc.add(new BigDecimal((int)(last instanceof NullDatum ? '0' : last.asChars().charAt(0))));
           return new BigDecimal(range.getEnd().get(colId).asChars().charAt(0)).compareTo(candidate) < 0;
         } else {
           candidate = new BigDecimal((int)(last.asChars().charAt(0))).subtract(inc);
@@ -381,7 +382,7 @@ public class UniformRangePartition extends RangePartitionAlgorithm {
                 + incs[i].longValue())) + ""));
           } else {
             end.put(i, DatumFactory.createText(
-                ((char) (last.get(i).asChars().charAt(0) + incs[i].longValue())) + ""));
+                ((char) ((last.get(i) instanceof NullDatum ? '0': last.get(i).asChars().charAt(0)) + incs[i].longValue())) + ""));
           }
           break;
         case DATE:

http://git-wip-us.apache.org/repos/asf/tajo/blob/e12e038b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java
index 3d5cdf2..dc5108a 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/planner/TestUniformRangePartition.java
@@ -304,4 +304,78 @@ public class TestUniformRangePartition {
 
     assertEquals(expected, ranges[0]);
   }
+
+  @Test
+  public void testPartitionForOnePartNumWithOneOfTheValueNull() {
+    Schema schema = new Schema()
+            .addColumn("l_returnflag", Type.TEXT)
+            .addColumn("l_linestatus", Type.TEXT);
+
+    SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema);
+
+    Tuple s = new VTuple(2);
+    s.put(0, DatumFactory.createNullDatum());
+    s.put(1, DatumFactory.createText("F"));
+    Tuple e = new VTuple(2);
+    e.put(0, DatumFactory.createText("R"));
+    e.put(1, DatumFactory.createNullDatum());
+    TupleRange expected = new TupleRange(sortSpecs, s, e);
+    RangePartitionAlgorithm partitioner =
+            new UniformRangePartition(expected, sortSpecs, true);
+    TupleRange [] ranges = partitioner.partition(1);
+
+    assertEquals(expected, ranges[0]);
+  }
+
+  @Test
+  public void testPartitionForOnePartNumWithBothValueNull() {
+    Schema schema = new Schema()
+            .addColumn("l_returnflag", Type.TEXT)
+            .addColumn("l_linestatus", Type.TEXT);
+
+    SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema);
+
+    Tuple s = new VTuple(2);
+    s.put(0, DatumFactory.createNullDatum());
+    s.put(1, DatumFactory.createNullDatum());
+    Tuple e = new VTuple(2);
+    e.put(0, DatumFactory.createNullDatum());
+    e.put(1, DatumFactory.createNullDatum());
+    TupleRange expected = new TupleRange(sortSpecs, s, e);
+    RangePartitionAlgorithm partitioner =
+            new UniformRangePartition(expected, sortSpecs, true);
+    TupleRange [] ranges = partitioner.partition(1);
+
+    assertEquals(expected, ranges[0]);
+  }
+
+    @Test
+    public void testPartitionWithNull() {
+        Schema schema = new Schema();
+        schema.addColumn("l_returnflag", Type.TEXT);
+        schema.addColumn("l_linestatus", Type.TEXT);
+
+        SortSpec [] sortSpecs = PlannerUtil.schemaToSortSpecs(schema);
+
+        Tuple s = new VTuple(2);
+        s.put(0, DatumFactory.createNullDatum());
+        s.put(1, DatumFactory.createText("F"));
+        Tuple e = new VTuple(2);
+        e.put(0, DatumFactory.createNullDatum());
+        e.put(1, DatumFactory.createText("O"));
+        TupleRange expected = new TupleRange(sortSpecs, s, e);
+        RangePartitionAlgorithm partitioner
+                = new UniformRangePartition(expected, sortSpecs, true);
+        TupleRange [] ranges = partitioner.partition(10);
+
+
+        TupleRange prev = null;
+        for (TupleRange r : ranges) {
+            if (prev == null) {
+                prev = r;
+            } else {
+                assertTrue(prev.compareTo(r) > 0);
+            }
+        }
+    }
 }