You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2014/09/25 23:30:11 UTC

svn commit: r1627672 - in /pig/trunk: CHANGES.txt src/org/apache/pig/impl/util/orc/OrcUtils.java test/org/apache/pig/builtin/TestOrcStorage.java test/org/apache/pig/builtin/TestOrcStoragePushdown.java test/org/apache/pig/builtin/orc/charvarchar.orc

Author: daijy
Date: Thu Sep 25 21:30:10 2014
New Revision: 1627672

URL: http://svn.apache.org/r1627672
Log:
PIG-4195: Support loading char/varchar data in OrcStorage

Added:
    pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc   (with props)
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java
    pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java
    pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 25 21:30:10 2014
@@ -80,6 +80,8 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-4195: Support loading char/varchar data in OrcStorage (daijy)
+
 PIG-4201: Native e2e tests fail when run against old version of pig (rohini)
 
 PIG-4197: Fix typo in Job Stats header: MinMapTIme => MinMapTime (jmartell7 via daijy)

Modified: pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java Thu Sep 25 21:30:10 2014
@@ -27,7 +27,9 @@ import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -125,6 +127,12 @@ public class OrcUtils {
         case STRING:
             result = poi.getPrimitiveJavaObject(obj);
             break;
+        case CHAR:
+            result = ((HiveChar)poi.getPrimitiveJavaObject(obj)).getValue();
+            break;
+        case VARCHAR:
+            result = ((HiveVarchar)poi.getPrimitiveJavaObject(obj)).getValue();
+            break;
         case BYTE:
             result = (int)(Byte)poi.getPrimitiveJavaObject(obj);
             break;
@@ -222,6 +230,12 @@ public class OrcUtils {
             case STRING:
                 fieldSchema.setType(DataType.CHARARRAY);
                 break;
+            case VARCHAR:
+                fieldSchema.setType(DataType.CHARARRAY);
+                break;
+            case CHAR:
+                fieldSchema.setType(DataType.CHARARRAY);
+                break;
             case TIMESTAMP:
                 fieldSchema.setType(DataType.DATETIME);
                 break;

Modified: pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java Thu Sep 25 21:30:10 2014
@@ -57,6 +57,7 @@ import org.apache.pig.backend.executione
 import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
 import org.apache.pig.data.BinSedesTuple;
 import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
 import org.apache.pig.data.DefaultDataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -153,6 +154,28 @@ public class TestOrcStorage {
     }
 
     @Test
+    // See PIG-4195
+    public void testCharVarchar() throws Exception {
+        pigServer.registerQuery("A = load '" + basedir + "charvarchar.orc'" + " using OrcStorage();" );
+        Schema schema = pigServer.dumpSchema("A");
+        assertEquals(schema.size(), 2);
+        assertEquals(schema.getField(0).type, DataType.CHARARRAY);
+        assertEquals(schema.getField(1).type, DataType.CHARARRAY);
+        Iterator<Tuple> iter = pigServer.openIterator("A");
+        int count=0;
+        Tuple t=null;
+        while (iter.hasNext()) {
+            t = iter.next();
+            assertEquals(t.size(), 2);
+            assertTrue(t.get(0) instanceof String);
+            assertTrue(t.get(1) instanceof String);
+            assertEquals(((String)t.get(1)).length(), 20);
+            count++;
+        }
+        assertEquals(count, 10000);
+    }
+
+    @Test
     public void testSimpleStore() throws Exception {
         pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
         pigServer.store("A", OUTPUT1, "OrcStorage");

Modified: pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java Thu Sep 25 21:30:10 2014
@@ -86,6 +86,7 @@ public class TestOrcStoragePushdown {
     public static void oneTimeSetup() throws Exception{
         cluster = MiniGenericCluster.buildCluster();
         Util.copyFromLocalToCluster(cluster, basedir + "orc-file-11-format.orc", basedir + "orc-file-11-format.orc");
+        Util.copyFromLocalToCluster(cluster, basedir + "charvarchar.orc", basedir + "charvarchar.orc");
         createInputData();
 
         if(Util.WINDOWS){
@@ -312,32 +313,32 @@ public class TestOrcStoragePushdown {
 
     @Test
     public void testPredicatePushdownBoolean() throws Exception {
-        testPredicatePushdown("f1 == true", 2500, 1200000);
+        testPredicatePushdown(INPUT, "f1 == true", 2500, 1200000);
     }
 
     @Test
     public void testPredicatePushdownByteShort() throws Exception {
-        testPredicatePushdown("f2 != 5 or f3 == 100", 3500, 1200000);
+        testPredicatePushdown(INPUT, "f2 != 5 or f3 == 100", 3500, 1200000);
     }
 
     @Test
     public void testPredicatePushdownIntLongString() throws Exception {
-        testPredicatePushdown("f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 1200000);
+        testPredicatePushdown(INPUT, "f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 1200000);
     }
 
     @Test
     public void testPredicatePushdownFloatDouble() throws Exception {
-        testPredicatePushdown("f6 == 100.0 and f7 > 2000.00000001", 167, 1600000);
+        testPredicatePushdown(INPUT, "f6 == 100.0 and f7 > 2000.00000001", 167, 1600000);
     }
 
     @Test
     public void testPredicatePushdownBigDecimal() throws Exception {
-        testPredicatePushdown("f11 < (bigdecimal)'1000000000';", 2500, 1600000);
+        testPredicatePushdown(INPUT, "f11 < (bigdecimal)'1000000000';", 2500, 1600000);
     }
 
     @Test
     public void testPredicatePushdownTimestamp() throws Exception {
-        testPredicatePushdown("f10 >= ToDate('20100101', 'yyyyMMdd', 'UTC')", 3000, 400000);
+        testPredicatePushdown(INPUT, "f10 >= ToDate('20100101', 'yyyyMMdd', 'UTC')", 3000, 400000);
     }
 
     private Expression getExpressionForTest(String query, List<String> predicateCols) throws Exception {
@@ -369,7 +370,7 @@ public class TestOrcStoragePushdown {
         Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows);
     }
 
-    private void testPredicatePushdown(String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException {
+    private void testPredicatePushdown(String inputFile, String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException {
 
         Util.resetStateForExecModeSwitch();
         // Minicluster is required to get hdfs bytes read counter value
@@ -381,7 +382,7 @@ public class TestOrcStoragePushdown {
         disabledOptimizerRules.add("PredicatePushdownOptimizer");
         pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
                 ObjectSerializer.serialize(disabledOptimizerRules));
-        pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();");
+        pigServer_disabledRule.registerQuery("B = load '" + inputFile + "' using OrcStorage();");
         pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";");
         ExecJob job = pigServer_disabledRule.store("C", OUTPUT3);
         //Util.copyFromClusterToLocal(cluster, OUTPUT3 + "/part-m-00000", OUTPUT3);
@@ -390,7 +391,7 @@ public class TestOrcStoragePushdown {
         long bytesWithoutPushdown = stats.getHdfsBytesRead();
 
         // Test with PredicatePushdownOptimizer enabled. Only 2 blocks should be read
-        pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();");
+        pigServer.registerQuery("D = load '" + inputFile + "' using OrcStorage();");
         pigServer.registerQuery("E = filter D by " + filterStmt + ";");
         job = pigServer.store("E", OUTPUT4);
         //Util.copyFromClusterToLocal(cluster, OUTPUT4 + "/part-m-00000", OUTPUT4);
@@ -409,6 +410,13 @@ public class TestOrcStoragePushdown {
 
     }
 
+    @Test
+    public void testPredicatePushdownChar() throws Exception {
+        testPredicatePushdown(basedir + "charvarchar.orc", "$0 == 'ulysses thompson'", 18, 18000);
+    }
 
-
+    @Test
+    public void testPredicatePushdownVarchar() throws Exception {
+        testPredicatePushdown(basedir + "charvarchar.orc", "$1 == 'alice allen         '", 19, 18000);
+    }
 }

Added: pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc?rev=1627672&view=auto
==============================================================================
Binary file - no diff available.

Propchange: pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream