You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by pr...@apache.org on 2010/01/08 19:17:12 UTC
svn commit: r897283 [1/5] - in /hadoop/pig/branches/load-store-redesign: ./
contrib/piggybank/java/ contrib/zebra/
contrib/zebra/src/java/org/apache/hadoop/zebra/pig/
contrib/zebra/src/java/org/apache/hadoop/zebra/types/
contrib/zebra/src/test/e2e/merg...
Author: pradeepkth
Date: Fri Jan 8 18:17:07 2010
New Revision: 897283
URL: http://svn.apache.org/viewvc?rev=897283&view=rev
Log:
svn merge -r892972:896951 http://svn.apache.org/repos/asf/hadoop/pig/trunk
Added:
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/1.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/2.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/leftBigMerge.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeBytes.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeDouble.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeEmpty.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeFloat.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeInt.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeLong.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeNotCommonKey.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeString.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeab.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeabc.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeac.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/readme
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/reverse.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filter.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup_wrong.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group_wrong.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/1.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/2.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/readme
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionBytes.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionDouble.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionFloat.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionInt.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionLong.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionString.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/readme
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/1.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/complex.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/load.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/map.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/nested_map.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/readme
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/simple.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-complex.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-simple.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-mapper.pl
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/tuple.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnName.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ToolTestComparator.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestGlobTableLoader.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestOrderPreserveMultiTableGlob.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageRecord2.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/bad_join.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/collecion4.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/config
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join2.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_after_union.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_after_union10k.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_after_union2.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_after_union3.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/join_jira1.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/readme
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/sortSimpleString.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_01.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_02.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_03.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_04.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_join_05.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_00.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_01.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_02.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_03.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_04.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_load_store_05.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_01_save.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_02.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_sort_03.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_01.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_02_2.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_03.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/stress_union_04.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/testjoing1.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union1.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union2.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union3.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/stress/union4.pig
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/PigCounters.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/LocalLogToPhyTranslationVisitor.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POCogroup.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POCounter.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POCross.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POSplit.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POSplitOutput.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/physicalOperators/POStreamLocal.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/utils/LocalSeekableInputStream.java
Removed:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/datastorage/LocalDataStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/datastorage/LocalDir.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/datastorage/LocalFile.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/datastorage/LocalPath.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/datastorage/LocalSeekableInputStream.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/LocalExecutionEngine.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/LocalJob.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/LocalPOStoreImpl.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/LocalPigLauncher.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/LocalResult.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/LocalLogToPhyTranslationVisitor.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/counters/POCounter.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/relationalOperators/POCogroup.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/relationalOperators/POCross.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/relationalOperators/POSplit.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/relationalOperators/POSplitOutput.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/local/executionengine/physicalLayer/relationalOperators/POStreamLocal.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLocalJobSubmission.java
Modified:
hadoop/pig/branches/load-store-redesign/CHANGES.txt
hadoop/pig/branches/load-store-redesign/build.xml
hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/build.xml
hadoop/pig/branches/load-store-redesign/contrib/zebra/CHANGES.txt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/orderby2.pig
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestOrderPreserveProjectionNegative.java
hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_reference.xml
hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/piglatin_users.xml
hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_mapreduce.xml
hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_pig.xml
hadoop/pig/branches/load-store-redesign/src/docs/src/documentation/content/xdocs/zebra_reference.xml
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PhyPlanSetter.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SampleOptimizer.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/plans/POPackageAnnotator.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhyPlanVisitor.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/DefaultAbstractBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/DefaultDataBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/DistinctDataBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/InternalCachedBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/InternalDistinctBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/InternalSortedBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/data/SortedDataBag.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/experimental/JsonMetadata.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/PigContext.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOForEach.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOUnion.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/RelationalOperator.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/optimizer/LogicalOptimizer.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/optimizer/PushDownForeachFlatten.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/DerivedDataVisitor.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/pen/ExampleGenerator.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/tools/pigstats/PigStats.java
hadoop/pig/branches/load-store-redesign/test/findbugsExcludeFile.xml
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestBZip.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestEvalPipeline2.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoadFunc.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLocalPOSplit.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestMultiQuery.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPOCogroup.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPOCross.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPruneColumn.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPushDownForeachFlatten.java
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestStore.java
Modified: hadoop/pig/branches/load-store-redesign/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/CHANGES.txt?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/CHANGES.txt (original)
+++ hadoop/pig/branches/load-store-redesign/CHANGES.txt Fri Jan 8 18:17:07 2010
@@ -51,6 +51,12 @@
IMPROVEMENTS
+PIG-1177: Pig 0.6 Docs - Zebra docs (chandec via olgan)
+
+PIG-1175: Pig 0.6 Docs - Store v. Dump (chandec via olgan)
+
+PIG-1102: Collect number of spills per job (sriranjan via olgan)
+
PIG-1149: Allow instantiation of SampleLoaders with parametrized LoadFuncs
(dvryaboy via pradeepkth)
@@ -91,10 +97,14 @@
PIG-1085: Pass JobConf and UDF specific configuration information to UDFs
(gates)
+PIG-1173: pig cannot be built without an internet connection (jmhodges via daijy)
+
OPTIMIZATIONS
BUG FIXES
+PIG-1171: Top-N queries produce incorrect results when followed by a cross statement (rding via olgan)
+
PIG-1159: merge join right side table does not support comma seperated paths
(rding via olgan)
@@ -151,6 +161,11 @@
PIG-1086: Nested sort by * throw exception (rding via daijy)
+PIG-1146: Inconsistent column pruning in LOUnion (daijy)
+
+PIG-1176: Column Pruner issues in union of loader with and without schema
+(daijy)
+
Release 0.6.0 - Unreleased
INCOMPATIBLE CHANGES
@@ -344,6 +359,16 @@
PIG-1144: set default_parallelism construct does not set the number of
reducers correctly (daijy)
+PIG-1165: Signature of loader does not set correctly for order by (daijy)
+
+PIG-761: ERROR 2086 on simple JOIN (daijy)
+
+PIG-1172: PushDownForeachFlatten shall not push ForEach below Join if the
+flattened fields is used in Join (daijy)
+
+PIG-1180: Piggybank should compile even if we only have
+"pig-withouthadoop.jar" but no "pig.jar" in the pig home directory (daijy)
+
Release 0.5.0
INCOMPATIBLE CHANGES
Modified: hadoop/pig/branches/load-store-redesign/build.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/build.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/build.xml (original)
+++ hadoop/pig/branches/load-store-redesign/build.xml Fri Jan 8 18:17:07 2010
@@ -869,7 +869,7 @@
</condition>
</target>
- <target name="ivy-download" description="To download ivy">
+ <target name="ivy-download" description="To download ivy" unless="offline">
<get src="${ivy_repo_url}" dest="${ivy.jar}" usetimestamp="true"/>
</target>
Modified: hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/build.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/build.xml?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/build.xml (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/piggybank/java/build.xml Fri Jan 8 18:17:07 2010
@@ -32,6 +32,8 @@
<property name="build.docs" value="${build.dir}/docs" />
<property name="build.javadoc" value="${build.docs}/api" />
<property name="pigjar" value="../../../pig.jar" />
+ <property name="pigjar-withouthadoop" value="../../../pig-withouthadoop.jar" />
+ <property name="hadoopjar" value="../../../lib/hadoop20.jar" />
<property name="pigtest" value="../../../build/test/classes" />
<property name="udfjar" value="piggybank.jar" />
<property name="src.dir" value="src/main/java/org/apache/pig/piggybank" />
@@ -51,6 +53,8 @@
<path id="pigudf.classpath">
<pathelement location="${build.classes}"/>
<pathelement location="${pigjar}"/>
+ <pathelement location="${pigjar-withouthadoop}"/>
+ <pathelement location="${hadoopjar}"/>
<pathelement location="${pigtest}"/>
</path>
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/CHANGES.txt?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/CHANGES.txt Fri Jan 8 18:17:07 2010
@@ -12,6 +12,10 @@
IMPROVEMENTS
+ PIG-1170 new end-to-end and stress test cases (jing1234 via yanz)
+
+ PIG-1136 Support of map split on hash keys with leading underscore (xuefuz via yanz)
+
PIG-1125 Map/Reduce API Changes (Chao Wang via yanz)
PIG-1104 Streaming Support (Chao Wang via yanz)
@@ -48,6 +52,10 @@
BUG FIXES
+ PIG-1167: Hadoop file glob support (yanz)
+
+ PIG-1153: Record split exception fix (yanz)
+
PIG-1145: Merge Join on Large Table throws an EOF exception (yanz)
PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableLoader.java Fri Jan 8 18:17:07 2010
@@ -226,7 +226,7 @@
FileSystem fs = p.getFileSystem(jobConf);
FileStatus[] matches = fs.globStatus(p);
if (matches == null) {
- LOG.warn("Input path does not exist: " + p);
+ throw new IOException("Input path does not exist: " + p);
}
else if (matches.length == 0) {
LOG.warn("Input Pattern " + p + " matches 0 files");
@@ -293,33 +293,14 @@
Projection projection;
- if (!fileName.contains(",")) { // one table;
- org.apache.hadoop.zebra.schema.Schema tschema = BasicTable.Reader.getSchema(new Path(fileName), jobConf);
- try {
- projection = new org.apache.hadoop.zebra.types.Projection(tschema, TableInputFormat.getProjection(jobConf));
- projectionSchema = projection.getProjectionSchema();
- } catch (ParseException e) {
- throw new IOException("Schema parsing failed : "+e.getMessage());
- }
- } else { // table union;
- org.apache.hadoop.zebra.schema.Schema unionSchema = new org.apache.hadoop.zebra.schema.Schema();
- for (Path p : paths) {
- org.apache.hadoop.zebra.schema.Schema schema = BasicTable.Reader.getSchema(p, jobConf);
- try {
- unionSchema.unionSchema(schema);
- } catch (ParseException e) {
- throw new IOException(e.getMessage());
- }
- }
-
- try {
- projection = new org.apache.hadoop.zebra.types.Projection(unionSchema, TableInputFormat.getProjection(jobConf));
- projectionSchema = projection.getProjectionSchema();
- } catch (ParseException e) {
- throw new IOException("Schema parsing failed : "+e.getMessage());
- }
- }
-
+ org.apache.hadoop.zebra.schema.Schema tschema = TableInputFormat.getSchema(jobConf);
+ try {
+ projection = new org.apache.hadoop.zebra.types.Projection(tschema, TableInputFormat.getProjection(jobConf));
+ projectionSchema = projection.getProjectionSchema();
+ } catch (ParseException e) {
+ throw new IOException("Schema parsing failed : "+e.getMessage());
+ }
+
if (projectionSchema == null) {
throw new IOException("Cannot determine table projection schema");
}
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Fri Jan 8 18:17:07 2010
@@ -149,12 +149,11 @@
class PartitionFieldInfo {
private HashSet<PartitionInfo.ColumnMappingEntry> mSplitMaps =
new HashSet<ColumnMappingEntry>();
- private HashSet<String> mSplitColumns = new HashSet<String>();
private ColumnMappingEntry mCGIndex = null;
private String mCGName = null; // fully qualified name
private HashSet<String> keySet = null;
private SplitType stype = SplitType.NONE;
- private boolean splitChild;
+ private HashSet<String> splitChildren = new HashSet<String>();
/**
* set a MAP key split (sub)column
@@ -165,7 +164,6 @@
new Partition.PartitionInfo.ColumnMappingEntry( ri, fi, fs);
mSplitMaps.add(cme);
// multiple map splits on one MAP column is allowed!
- mSplitColumns.add(name);
if (keySet == null)
keySet = new HashSet<String>();
return cme.addKeys(keys, keySet);
@@ -196,22 +194,19 @@
if (st == stype)
{
// multiple MAP splits of a field and its children on different keys are ok
- if (st == SplitType.MAP || cst == SplitType.MAP || splitChild == this.splitChild)
+ if (st == SplitType.MAP || cst == SplitType.MAP)
return;
}
- if (stype != SplitType.NONE) {
- if (childName != null)
- name = name + "." + childName;
- throw new ParseException("Different Split Types Set on the same field: " + name);
+ if (splitChild)
+ {
+ if (stype != SplitType.NONE && splitChildren.isEmpty())
+ throw new ParseException("Split on "+name+" is set at different levels.");
+ splitChildren.add(childName);
+ } else {
+ if (splitChildren.contains(childName))
+ throw new ParseException("Split on "+name+" is set at different levels.");
}
stype = st;
- this.splitChild = splitChild;
- if (mSplitColumns.contains(name)) {
- if (childName != null)
- name = name + "." + childName;
- throw new ParseException("Split on "+name+" are set more than once");
- }
- mSplitColumns.add(name);
}
/*
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt Fri Jan 8 18:17:07 2010
@@ -74,7 +74,8 @@
| <#SPECIALCHAR : ["_"] >
| <#FSSPECIALCHAR: ["-", ":", "/"]>
| <#SCOPEOP : "::">
-| <IDENTIFIER: ( <LETTER> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )* ( <SCOPEOP> ( <LETTER> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )*)* >
+| <IDENTIFIER: ( <LETTER> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )* ( <SCOPEOP> ( <LETTER> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )*)* >
+| <MAPKEYIDENTIFIER: ( <LETTER> | <SPECIALCHAR> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )* ( <SCOPEOP> ( <LETTER> )+ ( <DIGIT> | <LETTER> | <SPECIALCHAR> )*)* >
| <SHORT : (<OCTAL>){3} >
}
@@ -299,9 +300,9 @@
}
{
(
- LOOKAHEAD(SchemaRecord()) fs = SchemaRecord(mSchema, name, colIndex)
-| LOOKAHEAD(SchemaMap()) fs = SchemaMap(mSchema, name, colIndex)
-| LOOKAHEAD(AtomSchema()) fs = AtomSchema(mSchema, name, colIndex)
+ LOOKAHEAD(2) fs = SchemaRecord(mSchema, name, colIndex)
+| LOOKAHEAD(2) fs = SchemaMap(mSchema, name, colIndex)
+| LOOKAHEAD(2) fs = AtomSchema(mSchema, name, colIndex)
)
{
return fs;
@@ -517,9 +518,18 @@
HashSet<String> result = new HashSet<String>();
}
{
- t = <IDENTIFIER> { result.add(t.image); }
- ("|" t = <IDENTIFIER> { result.add(t.image); })*
+ t = hashKey() { result.add(t.image); }
+ ("|" t = hashKey() { result.add(t.image); })*
{
return result;
}
}
+
+Token hashKey() :
+{
+ Token t;
+}
+{
+ ( t = <MAPKEYIDENTIFIER> | t = <IDENTIFIER> )
+ { return t; }
+}
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/1.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/1.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/1.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/1.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting (0,0) [a#0,b#0,c#0]
+1 1.1 11 1.1 some some (1,1) [a#1,b#1,c#1]
+-100 -100.1 -100 -50e+2 so so (2,2) [a#2,b#2,c#2]
+3 1.1 11 1.1 some some (3,3) [a#3,b#3,c#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/2.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/2.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/2.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/2.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting (0,0) [b#0,c#0,a#0]
+2 1.2 12 1.2 somee somee (1,1) [a#1,b#1,c#1]
+-99 -99.1 -99 -40e+2 soo soo (2,2) [a#2,b#2,c#2]
+3 1.1 11 1.1 some some (3,3) [b#3,c#3,a#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,5 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load 'bool.txt' as (b1:Boolean, b2:Boolean);
+dump a1;
+
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/bool.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,2 @@
+true true
+false false
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/leftBigMerge.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/leftBigMerge.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/leftBigMerge.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/leftBigMerge.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,18 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a2 = load 'reverse.txt' as (m1:map[],r1(f1:chararray,f2:chararray),f:bytearray,e:chararray,d:double,c:long,b:float,a:int);
+--dump a1;
+
+a1order = order a1 by e;
+a2order = order a2 by e;
+
+
+store a1order into 'new11' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store a2order into 'new22' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'new11' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'new22' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by (e), rec2 by (e) using "merge" ;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeBytes.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeBytes.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeBytes.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeBytes.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by f;
+aorder = order a by f;
+dump a1order;
+
+store a1order into 'f1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'f2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'f1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'f2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'f1,f2' USING org.apache.hadoop.zebra.pig.TableLoader ('source_table,f,m1,a,b,c,d,e', 'sorted');
+joina = join rec1 by f, rec2 by f using "merge" ;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeDouble.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeDouble.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeDouble.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeDouble.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by d;
+aorder = order a by d;
+
+
+store a1order into 'd1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'd2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'd1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'd2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'd1,d' USING org.apache.hadoop.zebra.pig.TableLoader ('d,e,f,m1,a,b,c', 'sorted');
+joina = join rec1 by d, rec2 by d using "merge" ;
+--dump records1;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeEmpty.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeEmpty.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeEmpty.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeEmpty.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,18 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a2 = load 'empty.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by a;
+a2order = order a2 by a;
+
+
+store a1order into 'a1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store a2order into 'empty' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'a1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'empty' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by a, rec2 by a using "merge" ;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeFloat.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeFloat.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeFloat.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeFloat.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by b;
+aorder = order a by b;
+
+
+store a1order into 'b1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'b2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'b1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'b2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'b1,b2' USING org.apache.hadoop.zebra.pig.TableLoader ('b,c,d,e,f,r1,m1,a', 'sorted');
+joina = join rec1 by b, rec2 by b using "merge" ;
+--dump records1;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeInt.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeInt.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeInt.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeInt.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by a;
+aorder = order a by a;
+
+
+store a1order into 'a1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'a2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'a1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'a2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'a1,a2' USING org.apache.hadoop.zebra.pig.TableLoader ('a,b,c,d,e,f,r1,m1', 'sorted');
+joina = join rec1 by a, rec2 by a using "merge" ;
+--dump records1;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeLong.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeLong.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeLong.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeLong.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by c;
+aorder = order a by c;
+
+
+store a1order into 'c1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'c2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'c1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'c2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'c1,c2' USING org.apache.hadoop.zebra.pig.TableLoader ('c,d,e,f,r1,m1,a,b', 'sorted');
+joina = join rec1 by c, rec2 by c using "merge" ;
+--dump records1;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeNotCommonKey.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeNotCommonKey.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeNotCommonKey.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeNotCommonKey.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '3.txt' as (e:chararray,f:chararray,r1:chararray);
+
+a2 = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by e;
+a2order = order a2 by a;
+
+
+--store a1order into 'notCommonSortKey1' using org.apache.hadoop.zebra.pig.TableStorer('[e,f,r1]');
+store a2order into 'notCommonSortKey2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'notCommonSortKey1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'notCommonSortKey2' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by e, rec2 by a using "merge" ;
+dump joina;
+
+--ERROR 1107: Cannot merge join keys, incompatible types
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeString.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeString.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeString.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeString.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by e;
+aorder = order a by e;
+
+
+store a1order into 'e1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'e2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'e1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'e2' using org.apache.hadoop.zebra.pig.TableLoader();
+--records1 = LOAD 'e1,e2' USING org.apache.hadoop.zebra.pig.TableLoader ('e,f,r1,m1,a,b,c,d', 'sorted');
+joina = join rec1 by e, rec2 by e using "merge" ;
+--dump records1;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeab.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeab.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeab.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeab.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,18 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a2 = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by a,b;
+a2order = order a2 by a,b;
+
+
+store a1order into 'ab1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store a2order into 'ab2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'ab1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'ab2' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by (a,b), rec2 by (a,b) using "merge" ;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeabc.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeabc.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeabc.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeabc.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,20 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a2 = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by a,b,c;
+a2order = order a2 by a,b,c;
+
+
+store a1order into 'abc1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store a2order into 'abc2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'abc1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'abc2' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by (a,b,c), rec2 by (a,b,c) using "merge" ;
+dump joina;
+
+
+--store rec1 into 'merge-table' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeac.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeac.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeac.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/mergeac.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,18 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a2 = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by a,c;
+a2order = order a2 by a,c;
+
+
+store a1order into 'ac1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store a2order into 'ac2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'ac1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'ac2' using org.apache.hadoop.zebra.pig.TableLoader();
+joina = join rec1 by (a,c), rec2 by (a,c) using "merge" ;
+dump joina;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/readme
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/readme?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/readme (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/readme Fri Jan 8 18:17:07 2010
@@ -0,0 +1,6 @@
+1. put zebra.jar to /grid/0/dev/hadoopqa/jars
+2. put 1.txt 2.txt empty.txt reverse.txt bool.txt to hdfs /users/hadoopqa/.
+3. run each pig script.
+java -cp /grid/0/dev/hadoopqa/jing1234/conf:/grid/0/dev/hadoopqa/jars/pig.jar:/grid/0/dev/hadoopqa/jars/zebra.jar org.apache.pig.Main -M <my.pig>
+
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/reverse.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/reverse.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/reverse.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/mergeJoin/reverse.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+[a#-1,b#-1,c#-1] (-1,-1) somet someth 5e+2 10 10.1 10
+[a#0,b#0,c#0] (0,0) som som 1.0 11 1.0 0
+[a#-2,b#-22,c#-2] (-1,-1) s s -510e+2 -1010 -1010.1 -1100
+[a2,b#2,c#2] (0,0) som som 1.0 10 1.0 0
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filter.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filter.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filter.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filter.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,11 @@
+register /grid/0/dev/hadoopqa/hadoop/lib/zebra.jar;
+A = load 'filter.txt' as (name:chararray, age:int);
+
+B = filter A by age < 20;
+--dump B;
+store B into 'filter1' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
+C = filter A by age >= 20;
+--dump C;
+Store C into 'filter2' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,15 @@
+register /grid/0/dev/hadoopqa/hadoop/lib/zebra.jar;
+A = load 'filter.txt' as (name:chararray, age:int);
+
+B = filter A by age < 20;
+--dump B;
+store B into 'filter1' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
+C = filter A by age >= 20;
+--dump C;
+Store C into 'filter2' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
+D = group A by age;
+E = foreach D generate group as group1, COUNT(A.age) as myage;
+--E = foreach D generate group;
+Store E into 'filterandgroup' using org.apache.hadoop.zebra.pig.TableStorer('[group1];[myage]');
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup_wrong.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup_wrong.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup_wrong.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/filterandgroup_wrong.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,15 @@
+register /grid/0/dev/hadoopqa/hadoop/lib/zebra.jar;
+A = load 'filter.txt' as (name:chararray, age:int);
+
+B = filter A by age < 20;
+--dump B;
+store B into 'filter1' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
+C = filter A by age >= 20;
+--dump C;
+Store C into 'filter2' using org.apache.hadoop.zebra.pig.TableStorer('[name];[age]');
+
+D = group A by age;
+E = foreach D generate group as group1, COUNT(A.age) as myage;
+--E = foreach D generate group;
+Store E into 'filterandgroup' using org.apache.hadoop.zebra.pig.TableStorer('[group1];[myage]');
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,10 @@
+register /grid/0/dev/hadoopqa/hadoop/lib/zebra.jar;
+A = load 'filter.txt' as (name:chararray, age:int);
+
+B = group A by name;
+C = foreach B generate group as group1, COUNT(A.name) as myname;
+Store C into 'group1' using org.apache.hadoop.zebra.pig.TableStorer('[group1];[myname]');
+D = group A by age;
+E = foreach D generate group as group2, COUNT(A.age) as myage;
+Store E into 'group2' using org.apache.hadoop.zebra.pig.TableStorer('[group2];[myage]');
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group_wrong.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group_wrong.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group_wrong.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/multi-query/group_wrong.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,5 @@
+register /grid/0/dev/hadoopqa/jars/zebra-111.jar;
+A = load 'filter.txt' as (name:chararray, age:int);
+B = group A by name;
+C = foreach B generate group, COUNT(A.name) as cnt;
+Store C into 'group1' using org.apache.hadoop.zebra.pig.TableStorer('[group];[cnt]');
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/1.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/1.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/1.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/1.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting (0,0) [a#0,b#0,c#0]
+1 1.1 11 1.1 some some (1,1) [a#1,b#1,c#1]
+-100 -100.1 -100 -50e+2 so so (2,2) [a#2,b#2,c#2]
+3 1.1 11 1.1 some some (3,3) [a#3,b#3,c#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/2.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/2.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/2.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/2.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting (0,0) [b#0,c#0,a#0]
+2 1.2 12 1.2 somee somee (1,1) [a#1,b#1,c#1]
+-99 -99.1 -99 -40e+2 soo soo (2,2) [a#2,b#2,c#2]
+3 1.1 11 1.1 some some (3,3) [b#3,c#3,a#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/readme
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/readme?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/readme (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/readme Fri Jan 8 18:17:07 2010
@@ -0,0 +1,6 @@
+1. put zebra.jar to /grid/0/dev/hadoopqa/jars
+2. put 1.txt 2.txt hdfs /users/hadoopqa/.
+3. run each pig script.
+java -cp /grid/0/dev/hadoopqa/jing1234/conf:/grid/0/dev/hadoopqa/jars/pig.jar:/grid/0/dev/hadoopqa/jars/zebra.jar org.apache.pig.Main -M <my.pig>
+
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionBytes.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionBytes.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionBytes.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionBytes.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a1;
+
+a1order = order a1 by f;
+aorder = order a by f;
+--dump a1order;
+
+store a1order into 'f1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'f2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'f1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'f2' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'f1,f2' USING org.apache.hadoop.zebra.pig.TableLoader ('source_table,f,m1,a,b,c,d,e', 'sorted');
+joina = join rec1 by a, rec2 by a using "merge" ;
+dump records1;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionDouble.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionDouble.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionDouble.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionDouble.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by d;
+aorder = order a by d;
+
+
+store a1order into 'd1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'd' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'd1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'd' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'd1,d' USING org.apache.hadoop.zebra.pig.TableLoader ('d,e,f,m1,a,b,c', 'sorted');
+--joina = join rec1 by a, rec2 by a using "merge" ;
+dump records1;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionFloat.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionFloat.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionFloat.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionFloat.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by b;
+aorder = order a by b;
+
+
+store a1order into 'b1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'b2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'b1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'b2' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'b1,b2' USING org.apache.hadoop.zebra.pig.TableLoader ('b,c,d,e,f,r1,m1,a', 'sorted');
+--joina = join rec1 by a, rec2 by a2 using "merge" ;
+dump records1;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionInt.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionInt.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionInt.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionInt.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+dump a;
+
+a1order = order a1 by a;
+aorder = order a by a;
+
+
+store a1order into 'a1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'a2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'a1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'a2' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'a1,a2' USING org.apache.hadoop.zebra.pig.TableLoader ('a,b,c,d,e,f,r1,m1', 'sorted');
+--joina = join rec1 by a, rec2 by a2 using "merge" ;
+dump records1;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionLong.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionLong.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionLong.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionLong.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+dump a;
+
+a1order = order a1 by c;
+aorder = order a by c;
+
+
+store a1order into 'c1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'c2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'c1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'c2' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'c1,c2' USING org.apache.hadoop.zebra.pig.TableLoader ('c,d,e,f,r1,m1,a,b', 'sorted');
+--joina = join rec1 by a, rec2 by a2 using "merge" ;
+dump records1;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionString.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionString.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionString.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/orderPreserveUnion/unionString.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,19 @@
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+
+a = load '2.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
+--dump a;
+
+a1order = order a1 by e;
+aorder = order a by e;
+
+
+store a1order into 'e1' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+store aorder into 'e2' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f,r1,m1]');
+
+rec1 = load 'e1' using org.apache.hadoop.zebra.pig.TableLoader();
+rec2 = load 'e2' using org.apache.hadoop.zebra.pig.TableLoader();
+records1 = LOAD 'e1,e2' USING org.apache.hadoop.zebra.pig.TableLoader ('e,f,r1,m1,a,b,c,d', 'sorted');
+--joina = join rec1 by a, rec2 by a2 using "merge" ;
+dump records1;
+
Modified: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/orderby2.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/orderby2.pig?rev=897283&r1=897282&r2=897283&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/orderby2.pig (original)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/orderby2.pig Fri Jan 8 18:17:07 2010
@@ -1,10 +1,9 @@
register /grid/0/dev/hadoopqa/jars/zebra.jar;
a = load '1.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray,r1(f1:chararray,f2:chararray),m1:map[]);
---b = order a by m1#'a',m1#'b';
b = order a by m1#'a';
---c = foreach b generate a as a, m1#'a' as ms1;
---describe c;
+c = foreach b generate a as a, m1#'a' as ms1;
+describe c;
--dump c;
--There should be 2 columns in orderby1 table
---store c into 'orderby2' using org.apache.hadoop.zebra.pig.TableStorer('');
+store c into 'orderby2' using org.apache.hadoop.zebra.pig.TableStorer('');
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/readme
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/readme?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/readme (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/pruning/readme Fri Jan 8 18:17:07 2010
@@ -0,0 +1,5 @@
+1.put 1.txt and 2.txt to hdf /user/hadoopqa/
+2.run each pig script
+java -cp /grid/0/dev/hadoopqa/jing1234/conf:/grid/0/dev/hadoopqa/jars/pig.jar:/grid/0/dev/hadoopqa/jars/tfile.jar:/grid/0/dev/hadoopqa/jars/zebra.jar org.apache.pig.Main -M <my.pig>
+3. check output table
+$HADOOP_HOME/bin/hadoop fs -cat <table>/.btschema
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/1.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/1.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/1.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/1.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting (0,0) [a#0,b#0,c#0]
+1 1.1 11 1.1 some some (1,1) [a#1,b#1,c#1]
+-100 -100.1 -100 -50e+2 so so (2,2) [a#2,b#2,c#2]
+3 1.1 11 1.1 some some (3,3) [a#3,b#3,c#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/complex.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/complex.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/complex.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/complex.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+(0,0 [a#0,b#0,c#0]
+(1,1) [a#1,b#1,c#1]
+(2,2) [a#2,b#2,c#2]
+(3,3) [a#3,b#3,c#3]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/load.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/load.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/load.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/load.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,11 @@
+-- The script converts simple.txt to simple table
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load 'simple.txt' as (a:int, b:float,c:long,d:double,e:chararray,f:bytearray);
+
+dump a1;
+
+store a1 into 'simple-table' using org.apache.hadoop.zebra.pig.TableStorer('[a,b,c];[d,e,f]');
+a2 = load 'simple-table' using org.apache.hadoop.zebra.pig.TableLoader();
+dump a2;
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/map.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/map.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/map.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/map.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,2 @@
+[aaa#100]
+[bbb#200]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/nested_map.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/nested_map.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/nested_map.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/nested_map.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,2 @@
+[aaa#[aaa#100]]
+[bbb#[bbb#200]]
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/readme
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/readme?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/readme (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/readme Fri Jan 8 18:17:07 2010
@@ -0,0 +1,12 @@
+1. simple.txt (this txt does not have boolean type, because pig doesn't support bool type)
+
+2. run table-creator-simple.pig
+
+Use the already generated simple table (IO layer, TestSchema,located at /homes/hadoopqa/jing1234/simple-table
+3.
+$HADOOP_HOME/bin/hadoop jar /grid/0/dev/hadoopqa/hadoop/hadoop-streaming.jar -libjars /grid/0/dev/hadoopqa/jars/pig.jar,/grid/0/dev/hadoopqa/jars/zebra.jar -D mapred.lib.table.input.projection="s1,s2,s3,s4,s5,s6" -input simple-table -output simple-stream-all-fields -mapper 'cat' -inputformat org.apache.hadoop.zebra.mapred.TableInputFormat
+4.
+$HADOOP_HOME/bin/hadoop fs -tail simple-stream-all-fields/part-00000
+
+For details, please refer to
+http://twiki.corp.yahoo.com/pub/Grid/Release2TestPlan/zebra_streaming_test.html
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/simple.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/simple.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/simple.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/simple.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,4 @@
+100 100.1 100 50e+2 something someting
+1 1.1 11 1.1 some some
+-100 -100.1 -100 -50e+2 so so
+3 1.1 11 1.1 some some
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-complex.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-complex.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-complex.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-complex.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,10 @@
+-- The script converts simple.txt to simple table
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load 'complex.txt' as (r1(f1:chararray,f2:chararray),m1:map[]);
+
+dump a1;
+
+store a1 into 'complex-table' using org.apache.hadoop.zebra.pig.TableStorer('[r1];[m1]');
+
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-simple.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-simple.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-simple.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator-simple.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,10 @@
+-- The script converts simple.txt to simple table
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+a1 = load 'simple.txt' as (a2:int, b2:float,c2:long,d2:double,e2:chararray,f2:bytearray);
+
+dump a1;
+
+store a1 into 'simple-table2' using org.apache.hadoop.zebra.pig.TableStorer('[a2,b2,c2];[d2,e2,f2]');
+
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator.pig
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator.pig?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator.pig (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-creator.pig Fri Jan 8 18:17:07 2010
@@ -0,0 +1,10 @@
+-- The script converts wikipedia pagecounts file into a
+-- zebra table
+
+register /grid/0/dev/hadoopqa/jars/zebra.jar;
+pagecounts = LOAD 'pagecounts-20090922-100000' USING PigStorage(' ') AS (project, page, count, size);
+
+-- covert to a zebra table
+
+STORE pagecounts INTO 'pagecounts-table' USING org.apache.hadoop.zebra.pig.TableStorer('');
+
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-mapper.pl
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-mapper.pl?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-mapper.pl (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/table-mapper.pl Fri Jan 8 18:17:07 2010
@@ -0,0 +1,9 @@
+#!/usr/bin/env perl
+
+#print total line number of the file
+$n=0;
+while ($line = <>) {
+$n++;
+print "$n). $line>\n";
+}
+print "There are ($n) lines in the file\n";
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/tuple.txt
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/tuple.txt?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/tuple.txt (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/e2e/streaming/tuple.txt Fri Jan 8 18:17:07 2010
@@ -0,0 +1,3 @@
+(3,8,9) (4,5,[aaa#100])
+(1,4,7) (3,7,[bbb#200])
+(2,5,8) (9,5,[ccc#1255])
Added: hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnName.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnName.java?rev=897283&view=auto
==============================================================================
--- hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnName.java (added)
+++ hadoop/pig/branches/load-store-redesign/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnName.java Fri Jan 8 18:17:07 2010
@@ -0,0 +1,208 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.io;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import junit.framework.Assert;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocalFileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RawLocalFileSystem;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TableInserter;
+import org.apache.hadoop.zebra.io.TableScanner;
+import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.apache.hadoop.zebra.schema.Schema;
+import org.apache.hadoop.zebra.types.TypesUtils;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.DataByteArray;
+import org.apache.pig.data.Tuple;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ *
+ * Test conventions for column names. Specifically, '_' is allowed as leading character for map keys,
+ * but it's disallowed for other fields.
+ *
+ */
+public class TestColumnName {
+ final static String STR_SCHEMA =
+ "f1:bool, r:record(f11:int, f12:long), m:map(string), c:collection(f13:double, f14:float, f15:bytes)";
+ final static String STR_STORAGE = "[r.f12, f1, m#{b}]; [m#{_a}, r.f11]";
+
+ final static String INVALID_STR_SCHEMA =
+ "_f1:bool, _r:record(f11:int, _f12:long), _m:map(string), _c:collection(_f13:double, _f14:float, _f15:bytes)";
+ final static String INVALID_STR_STORAGE = "[_r.f12, _f1, _m#{b}]; [_m#{_a}, _r.f11]";
+
+ private static Configuration conf = new Configuration();
+ private static FileSystem fs = new LocalFileSystem( new RawLocalFileSystem() );
+ private static Path path = new Path( fs.getWorkingDirectory(), "TestColumnName" );
+ static {
+ conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
+ conf.setInt("table.input.split.minSize", 64 * 1024);
+ conf.set("table.output.tfile.compression", "none");
+ }
+
+ @BeforeClass
+ public static void setUp() throws IOException {
+ // drop any previous tables
+ BasicTable.drop( path, conf );
+
+ BasicTable.Writer writer = new BasicTable.Writer( path, STR_SCHEMA, STR_STORAGE, conf );
+ writer.finish();
+
+ Schema schema = writer.getSchema();
+ Tuple tuple = TypesUtils.createTuple( schema );
+
+ BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
+ int part = 0;
+ TableInserter inserter = writer1.getInserter("part" + part, true);
+ TypesUtils.resetTuple(tuple);
+
+ tuple.set(0, true);
+
+ Tuple tupRecord;
+ try {
+ tupRecord = TypesUtils.createTuple(schema.getColumnSchema("r")
+ .getSchema());
+ } catch (ParseException e) {
+ e.printStackTrace();
+ throw new IOException(e);
+ }
+ tupRecord.set(0, 1);
+ tupRecord.set(1, 1001L);
+ tuple.set(1, tupRecord);
+
+ Map<String, String> map = new HashMap<String, String>();
+ map.put("_a", "x");
+ map.put("b", "y");
+ map.put("c", "z");
+ tuple.set(2, map);
+
+ DataBag bagColl = TypesUtils.createBag();
+ Schema schColl = schema.getColumn(3).getSchema();
+ Tuple tupColl1 = TypesUtils.createTuple(schColl);
+ Tuple tupColl2 = TypesUtils.createTuple(schColl);
+ byte[] abs1 = new byte[3];
+ byte[] abs2 = new byte[4];
+ tupColl1.set(0, 3.1415926);
+ tupColl1.set(1, 1.6);
+ abs1[0] = 11;
+ abs1[1] = 12;
+ abs1[2] = 13;
+ tupColl1.set(2, new DataByteArray(abs1));
+ bagColl.add(tupColl1);
+ tupColl2.set(0, 123.456789);
+ tupColl2.set(1, 100);
+ abs2[0] = 21;
+ abs2[1] = 22;
+ abs2[2] = 23;
+ abs2[3] = 24;
+ tupColl2.set(2, new DataByteArray(abs2));
+ bagColl.add(tupColl2);
+ tuple.set(3, bagColl);
+
+ int row = 0;
+ inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
+ .getBytes()), tuple);
+ inserter.close();
+ writer1.finish();
+
+ writer.close();
+ }
+
+ @AfterClass
+ public static void tearDownOnce() throws IOException {
+ }
+
+ @Test
+ public void testInvalidCase() throws IOException {
+ Path p = new Path( fs.getWorkingDirectory(), "TestColumnNameInvalid" );
+ BasicTable.drop( p, conf );
+
+ try {
+ BasicTable.Writer writer = new BasicTable.Writer( p, INVALID_STR_SCHEMA, INVALID_STR_STORAGE, conf );
+ writer.finish();
+ } catch(IOException ex) {
+ // Do nothing. This is expected.
+ return;
+ }
+
+ Assert.assertTrue( false ); // Test failure.
+ }
+
+ @Test
+ public void testRead() throws IOException, ParseException {
+ String projection = new String("f1, m#{_a|b}, r, m#{c}");
+ BasicTable.Reader reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ // long totalBytes = reader.getStatus().getSize();
+
+ List<RangeSplit> splits = reader.rangeSplit(1);
+ reader.close();
+ reader = new BasicTable.Reader(path, conf);
+ reader.setProjection(projection);
+ TableScanner scanner = reader.getScanner(splits.get(0), true);
+ BytesWritable key = new BytesWritable();
+ Tuple value = TypesUtils.createTuple(scanner.getSchema());
+
+ scanner.getKey(key);
+ Assert.assertEquals(key, new BytesWritable("k11".getBytes()));
+ scanner.getValue(value);
+
+ Tuple recordTuple = (Tuple) value.get(2);
+ Assert.assertEquals(1, recordTuple.get(0));
+ Assert.assertEquals(1001L, recordTuple.get(1));
+ Assert.assertEquals(true, value.get(0));
+
+ HashMap<String, Object> mapval = (HashMap<String, Object>) value.get(1);
+ Assert.assertEquals("x", mapval.get("_a"));
+ Assert.assertEquals("y", mapval.get("b"));
+ Assert.assertEquals(null, mapval.get("c"));
+ mapval = (HashMap<String, Object>) value.get(3);
+ Assert.assertEquals("z", mapval.get("c"));
+ Assert.assertEquals(null, mapval.get("_a"));
+ Assert.assertEquals(null, mapval.get("b"));
+ reader.close();
+ }
+
+ @Test
+ public void testProjectionParsing() throws IOException, ParseException {
+ String projection = new String( "f1, m#{_a}, _r, m#{c}, m" );
+ BasicTable.Reader reader = new BasicTable.Reader( path, conf );
+ try {
+ reader.setProjection( projection );
+ reader.close();
+ } catch(ParseException ex) {
+ // Expected.
+ return;
+ }
+
+ Assert.assertTrue( false );
+ }
+
+}