You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2014/09/25 23:30:11 UTC
svn commit: r1627672 - in /pig/trunk: CHANGES.txt
src/org/apache/pig/impl/util/orc/OrcUtils.java
test/org/apache/pig/builtin/TestOrcStorage.java
test/org/apache/pig/builtin/TestOrcStoragePushdown.java
test/org/apache/pig/builtin/orc/charvarchar.orc
Author: daijy
Date: Thu Sep 25 21:30:10 2014
New Revision: 1627672
URL: http://svn.apache.org/r1627672
Log:
PIG-4195: Support loading char/varchar data in OrcStorage
Added:
pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc (with props)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java
pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java
pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Sep 25 21:30:10 2014
@@ -80,6 +80,8 @@ OPTIMIZATIONS
BUG FIXES
+PIG-4195: Support loading char/varchar data in OrcStorage (daijy)
+
PIG-4201: Native e2e tests fail when run against old version of pig (rohini)
PIG-4197: Fix typo in Job Stats header: MinMapTIme => MinMapTime (jmartell7 via daijy)
Modified: pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java (original)
+++ pig/trunk/src/org/apache/pig/impl/util/orc/OrcUtils.java Thu Sep 25 21:30:10 2014
@@ -27,7 +27,9 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
@@ -125,6 +127,12 @@ public class OrcUtils {
case STRING:
result = poi.getPrimitiveJavaObject(obj);
break;
+ case CHAR:
+ result = ((HiveChar)poi.getPrimitiveJavaObject(obj)).getValue();
+ break;
+ case VARCHAR:
+ result = ((HiveVarchar)poi.getPrimitiveJavaObject(obj)).getValue();
+ break;
case BYTE:
result = (int)(Byte)poi.getPrimitiveJavaObject(obj);
break;
@@ -222,6 +230,12 @@ public class OrcUtils {
case STRING:
fieldSchema.setType(DataType.CHARARRAY);
break;
+ case VARCHAR:
+ fieldSchema.setType(DataType.CHARARRAY);
+ break;
+ case CHAR:
+ fieldSchema.setType(DataType.CHARARRAY);
+ break;
case TIMESTAMP:
fieldSchema.setType(DataType.DATETIME);
break;
Modified: pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestOrcStorage.java Thu Sep 25 21:30:10 2014
@@ -57,6 +57,7 @@ import org.apache.pig.backend.executione
import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
import org.apache.pig.data.BinSedesTuple;
import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.DataType;
import org.apache.pig.data.DefaultDataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -153,6 +154,28 @@ public class TestOrcStorage {
}
@Test
+ // See PIG-4195
+ public void testCharVarchar() throws Exception {
+ pigServer.registerQuery("A = load '" + basedir + "charvarchar.orc'" + " using OrcStorage();" );
+ Schema schema = pigServer.dumpSchema("A");
+ assertEquals(schema.size(), 2);
+ assertEquals(schema.getField(0).type, DataType.CHARARRAY);
+ assertEquals(schema.getField(1).type, DataType.CHARARRAY);
+ Iterator<Tuple> iter = pigServer.openIterator("A");
+ int count=0;
+ Tuple t=null;
+ while (iter.hasNext()) {
+ t = iter.next();
+ assertEquals(t.size(), 2);
+ assertTrue(t.get(0) instanceof String);
+ assertTrue(t.get(1) instanceof String);
+ assertEquals(((String)t.get(1)).length(), 20);
+ count++;
+ }
+ assertEquals(count, 10000);
+ }
+
+ @Test
public void testSimpleStore() throws Exception {
pigServer.registerQuery("A = load '" + INPUT1 + "' as (a0:int, a1:chararray);");
pigServer.store("A", OUTPUT1, "OrcStorage");
Modified: pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java?rev=1627672&r1=1627671&r2=1627672&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java (original)
+++ pig/trunk/test/org/apache/pig/builtin/TestOrcStoragePushdown.java Thu Sep 25 21:30:10 2014
@@ -86,6 +86,7 @@ public class TestOrcStoragePushdown {
public static void oneTimeSetup() throws Exception{
cluster = MiniGenericCluster.buildCluster();
Util.copyFromLocalToCluster(cluster, basedir + "orc-file-11-format.orc", basedir + "orc-file-11-format.orc");
+ Util.copyFromLocalToCluster(cluster, basedir + "charvarchar.orc", basedir + "charvarchar.orc");
createInputData();
if(Util.WINDOWS){
@@ -312,32 +313,32 @@ public class TestOrcStoragePushdown {
@Test
public void testPredicatePushdownBoolean() throws Exception {
- testPredicatePushdown("f1 == true", 2500, 1200000);
+ testPredicatePushdown(INPUT, "f1 == true", 2500, 1200000);
}
@Test
public void testPredicatePushdownByteShort() throws Exception {
- testPredicatePushdown("f2 != 5 or f3 == 100", 3500, 1200000);
+ testPredicatePushdown(INPUT, "f2 != 5 or f3 == 100", 3500, 1200000);
}
@Test
public void testPredicatePushdownIntLongString() throws Exception {
- testPredicatePushdown("f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 1200000);
+ testPredicatePushdown(INPUT, "f4 >= 980 and f4 < 1010 and (f5 == 100 or f9 is not null)", 20, 1200000);
}
@Test
public void testPredicatePushdownFloatDouble() throws Exception {
- testPredicatePushdown("f6 == 100.0 and f7 > 2000.00000001", 167, 1600000);
+ testPredicatePushdown(INPUT, "f6 == 100.0 and f7 > 2000.00000001", 167, 1600000);
}
@Test
public void testPredicatePushdownBigDecimal() throws Exception {
- testPredicatePushdown("f11 < (bigdecimal)'1000000000';", 2500, 1600000);
+ testPredicatePushdown(INPUT, "f11 < (bigdecimal)'1000000000';", 2500, 1600000);
}
@Test
public void testPredicatePushdownTimestamp() throws Exception {
- testPredicatePushdown("f10 >= ToDate('20100101', 'yyyyMMdd', 'UTC')", 3000, 400000);
+ testPredicatePushdown(INPUT, "f10 >= ToDate('20100101', 'yyyyMMdd', 'UTC')", 3000, 400000);
}
private Expression getExpressionForTest(String query, List<String> predicateCols) throws Exception {
@@ -369,7 +370,7 @@ public class TestOrcStoragePushdown {
Util.checkQueryOutputs(pigServer_disabledRule.openIterator("C"), pigServer.openIterator("E"), expectedRows);
}
- private void testPredicatePushdown(String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException {
+ private void testPredicatePushdown(String inputFile, String filterStmt, int expectedRows, int expectedBytesReadDiff) throws IOException {
Util.resetStateForExecModeSwitch();
// Minicluster is required to get hdfs bytes read counter value
@@ -381,7 +382,7 @@ public class TestOrcStoragePushdown {
disabledOptimizerRules.add("PredicatePushdownOptimizer");
pigServer_disabledRule.getPigContext().getProperties().setProperty(PigImplConstants.PIG_OPTIMIZER_RULES_KEY,
ObjectSerializer.serialize(disabledOptimizerRules));
- pigServer_disabledRule.registerQuery("B = load '" + INPUT + "' using OrcStorage();");
+ pigServer_disabledRule.registerQuery("B = load '" + inputFile + "' using OrcStorage();");
pigServer_disabledRule.registerQuery("C = filter B by " + filterStmt + ";");
ExecJob job = pigServer_disabledRule.store("C", OUTPUT3);
//Util.copyFromClusterToLocal(cluster, OUTPUT3 + "/part-m-00000", OUTPUT3);
@@ -390,7 +391,7 @@ public class TestOrcStoragePushdown {
long bytesWithoutPushdown = stats.getHdfsBytesRead();
// Test with PredicatePushdownOptimizer enabled. Only 2 blocks should be read
- pigServer.registerQuery("D = load '" + INPUT + "' using OrcStorage();");
+ pigServer.registerQuery("D = load '" + inputFile + "' using OrcStorage();");
pigServer.registerQuery("E = filter D by " + filterStmt + ";");
job = pigServer.store("E", OUTPUT4);
//Util.copyFromClusterToLocal(cluster, OUTPUT4 + "/part-m-00000", OUTPUT4);
@@ -409,6 +410,13 @@ public class TestOrcStoragePushdown {
}
+ @Test
+ public void testPredicatePushdownChar() throws Exception {
+ testPredicatePushdown(basedir + "charvarchar.orc", "$0 == 'ulysses thompson'", 18, 18000);
+ }
-
+ @Test
+ public void testPredicatePushdownVarchar() throws Exception {
+ testPredicatePushdown(basedir + "charvarchar.orc", "$1 == 'alice allen '", 19, 18000);
+ }
}
Added: pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc?rev=1627672&view=auto
==============================================================================
Binary file - no diff available.
Propchange: pig/trunk/test/org/apache/pig/builtin/orc/charvarchar.orc
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream