You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/02/24 22:41:41 UTC

svn commit: r1571454 [1/5] - in /pig/branches/tez: ./ conf/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/ contrib/piggybank/java/src/main/java/...

Author: cheolsoo
Date: Mon Feb 24 21:41:38 2014
New Revision: 1571454

URL: http://svn.apache.org/r1571454
Log:
Merge latest trunk changes

Added:
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/Stuff.java
      - copied unchanged from r1571421, pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/Stuff.java
    pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestStuff.java
      - copied unchanged from r1571421, pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestStuff.java
    pig/branches/tez/src/org/apache/pig/OverwritableStoreFunc.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/OverwritableStoreFunc.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/
      - copied from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AbstractAccumuloStorage.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AbstractAccumuloStorage.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloBinaryConverter.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloBinaryConverter.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorage.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorage.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorageOptions.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorageOptions.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/Column.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/Column.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/FixedByteArrayOutputStream.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/FixedByteArrayOutputStream.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/Utils.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/Utils.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/
      - copied from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchLauncher.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchLauncher.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
      - copied, changed from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchPOStoreImpl.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchPOStoreImpl.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchProgressableReporter.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchProgressableReporter.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/ReadScalarsTez.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/visitor/ResetProjectionAttachedRelationalOpVisitor.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/newplan/logical/visitor/ResetProjectionAttachedRelationalOpVisitor.java
    pig/branches/tez/src/org/apache/pig/tools/pigstats/SimpleFetchPigStats.java
      - copied unchanged from r1571421, pig/trunk/src/org/apache/pig/tools/pigstats/SimpleFetchPigStats.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/
      - copied from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAbstractAccumuloStorage.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAbstractAccumuloStorage.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloBinaryConverter.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloBinaryConverter.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloColumns.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloColumns.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorage.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorage.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageConfiguration.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageConfiguration.java
    pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageOptions.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageOptions.java
    pig/branches/tez/test/org/apache/pig/test/TestAutoLocalMode.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestAutoLocalMode.java
    pig/branches/tez/test/org/apache/pig/test/TestFetch.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestFetch.java
    pig/branches/tez/test/org/apache/pig/test/TestPigContextClassCache.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestPigContextClassCache.java
    pig/branches/tez/test/org/apache/pig/test/TestPredeployedJar.java
      - copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java
Removed:
    pig/branches/tez/src/org/apache/pig/impl/builtin/ReadScalarsTez.java
Modified:
    pig/branches/tez/   (props changed)
    pig/branches/tez/CHANGES.txt
    pig/branches/tez/build.xml
    pig/branches/tez/conf/pig.properties
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java
    pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
    pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
    pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java
    pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java
    pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java
    pig/branches/tez/ivy.xml
    pig/branches/tez/ivy/libraries.properties
    pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
    pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
    pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml
    pig/branches/tez/src/docs/src/documentation/content/xdocs/func.xml
    pig/branches/tez/src/docs/src/documentation/content/xdocs/perf.xml
    pig/branches/tez/src/org/apache/pig/ExecTypeProvider.java
    pig/branches/tez/src/org/apache/pig/LoadFunc.java
    pig/branches/tez/src/org/apache/pig/Main.java
    pig/branches/tez/src/org/apache/pig/PigConfiguration.java
    pig/branches/tez/src/org/apache/pig/PigServer.java
    pig/branches/tez/src/org/apache/pig/PigWarning.java
    pig/branches/tez/src/org/apache/pig/StoreFunc.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/datastorage/ConfigurationUtil.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/datastorage/HPath.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigHadoopLogger.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPartialAgg.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POStream.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezCompiler.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
    pig/branches/tez/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
    pig/branches/tez/src/org/apache/pig/builtin/BinStorage.java
    pig/branches/tez/src/org/apache/pig/builtin/JsonStorage.java
    pig/branches/tez/src/org/apache/pig/builtin/PigStorage.java
    pig/branches/tez/src/org/apache/pig/data/SelfSpillBag.java
    pig/branches/tez/src/org/apache/pig/impl/PigContext.java
    pig/branches/tez/src/org/apache/pig/impl/PigImplConstants.java
    pig/branches/tez/src/org/apache/pig/impl/io/FileLocalizer.java
    pig/branches/tez/src/org/apache/pig/impl/io/SequenceFileInterStorage.java
    pig/branches/tez/src/org/apache/pig/impl/plan/CompilationMessageCollector.java
    pig/branches/tez/src/org/apache/pig/impl/util/JarManager.java
    pig/branches/tez/src/org/apache/pig/impl/util/ObjectSerializer.java
    pig/branches/tez/src/org/apache/pig/impl/util/PropertiesUtil.java
    pig/branches/tez/src/org/apache/pig/impl/util/Utils.java
    pig/branches/tez/src/org/apache/pig/newplan/FilterExtractor.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/expression/ExpToPhyTranslationVisitor.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOForEach.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOGenerate.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOSort.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/rules/InputOutputFileValidator.java
    pig/branches/tez/src/org/apache/pig/newplan/logical/visitor/CastLineageSetter.java
    pig/branches/tez/src/org/apache/pig/parser/AliasMasker.g
    pig/branches/tez/src/org/apache/pig/parser/AstPrinter.g
    pig/branches/tez/src/org/apache/pig/parser/AstValidator.g
    pig/branches/tez/src/org/apache/pig/parser/LogicalPlanBuilder.java
    pig/branches/tez/src/org/apache/pig/parser/LogicalPlanGenerator.g
    pig/branches/tez/src/org/apache/pig/parser/QueryParser.g
    pig/branches/tez/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
    pig/branches/tez/src/org/apache/pig/tools/pigstats/PigStatsUtil.java
    pig/branches/tez/src/pig-default.properties   (props changed)
    pig/branches/tez/test/e2e/pig/tests/cmdline.conf
    pig/branches/tez/test/e2e/pig/tests/negative.conf
    pig/branches/tez/test/e2e/pig/tests/nightly.conf
    pig/branches/tez/test/org/apache/pig/test/TestAssert.java
    pig/branches/tez/test/org/apache/pig/test/TestEvalPipeline2.java
    pig/branches/tez/test/org/apache/pig/test/TestJsonLoaderStorage.java
    pig/branches/tez/test/org/apache/pig/test/TestMultiQueryLocal.java
    pig/branches/tez/test/org/apache/pig/test/TestNewPartitionFilterPushDown.java
    pig/branches/tez/test/org/apache/pig/test/TestNewPlanFilterRule.java
    pig/branches/tez/test/org/apache/pig/test/TestPOPartialAgg.java
    pig/branches/tez/test/org/apache/pig/test/TestPigRunner.java
    pig/branches/tez/test/org/apache/pig/test/TestPigServer.java
    pig/branches/tez/test/org/apache/pig/test/TestPigStorage.java
    pig/branches/tez/test/org/apache/pig/test/TestPruneColumn.java
    pig/branches/tez/test/org/apache/pig/test/TestRank3.java
    pig/branches/tez/test/org/apache/pig/test/TestRegisteredJarVisibility.java
    pig/branches/tez/test/org/apache/pig/test/TestStore.java
    pig/branches/tez/test/org/apache/pig/test/data/bzipdir1.bz2/bzipdir2.bz2/recordLossblockHeaderEndsAt136500.txt.bz2   (props changed)
    pig/branches/tez/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/udf/PigPerformanceLoader.java

Propchange: pig/branches/tez/
------------------------------------------------------------------------------
  Merged /pig/trunk:r1554090-1571421

Modified: pig/branches/tez/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/tez/CHANGES.txt?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/CHANGES.txt (original)
+++ pig/branches/tez/CHANGES.txt Mon Feb 24 21:41:38 2014
@@ -26,8 +26,35 @@ PIG-3485: Remove CastUtils.bytesToMap(by
 
 PIG-3419: Pluggable Execution Engine (achalsoni81 via cheolsoo)
 
+PIG-2207: Support custom counters for aggregating warnings from different udfs (aniket486)
+
 IMPROVEMENTS
 
+PIG-3675: Documentation for AccumuloStorage (elserj via daijy)
+
+PIG-3648: Make the sample size for RandomSampleLoader configurable (cheolsoo)
+
+PIG-259: allow store to overwrite existing directroy (nezihyigitbasi via daijy)
+
+PIG-2672: Optimize the use of DistributedCache (aniket486)
+
+PIG-3238: Pig current releases lack a UDF Stuff(). This UDF deletes a specified length of characters
+ and inserts another set of characters at a specified starting point (nezihyigitbasi via daijy)
+
+PIG-3299: Provide support for LazyOutputFormat to avoid creating empty files (lbendig via daijy)
+
+PIG-3642: Direct HDFS access for small jobs (fetch) (lbendig via cheolsoo)
+
+PIG-3730: Performance issue in SelfSpillBag (rajesh.balamohan via rohini)
+
+PIG-3654: Add class cache to PigContext (tmwoodruff via daijy)
+
+PIG-3463: Pig should use hadoop local mode for small jobs (aniket486)
+
+PIG-3573: Provide StoreFunc and LoadFunc for Accumulo (elserj via daijy)
+
+PIG-3653: Add support for pre-deployed jars (tmwoodruff via daijy)
+
 PIG-3645: Move FileLocalizer.setR() calls to unit tests (cheolsoo)
 
 PIG-3637: PigCombiner creating log spam (rohini)
@@ -54,8 +81,6 @@ PIG-3295: Casting from bytearray failing
 
 PIG-3444: CONCAT with 2+ input parameters fail (lbendig via daijy)
 
-PIG-3529: Upgrade HBase dependency from 0.95-SNAPSHOT to 0.96 (jarcec via daijy)
-
 PIG-3117: A debug mode in which pig does not delete temporary files (ihadanny via cheolsoo)
 
 PIG-3484: Make the size of pig.script property configurable (cheolsoo)
@@ -64,6 +89,55 @@ OPTIMIZATIONS
  
 BUG FIXES
 
+PIG-3776: Conflicting versions of jline is present in trunk (cheolsoo)
+
+PIG-3674: Fix TestAccumuloPigCluster on Hadoop 2 (elserj via daijy)
+
+PIG-3740: Document direct fetch optimization (lbendig via cheolsoo)
+
+PIG-3746: NPE is thrown if Pig fails before PigStats is intialized (cheolsoo)
+
+PIG-3747: Update skewed join documentation (cheolsoo)
+
+PIG-3755: auto local mode selection does not check lower bound for size (aniket486)
+
+PIG-3753: LOGenerate generates null schema (daijy)
+
+PIG-3447: Compiler warning message dropped for CastLineageSetter and others with no enum kind (knoguchi via cheolsoo)
+
+PIG-3627: Json storage : Doesn't work in cases , where other Store Functions (like PigStorage / AvroStorage)
+ do work (ssvinarchukhorton via daijy)
+
+PIG-3606: Pig script throws error when searching for hcatalog jars in latest hive (deepesh via daijy)
+
+PIG-3623: HBaseStorage: setting loadKey and noWAL to false doesn't have any affect (nezihyigitbasi via rohini)
+
+PIG-3744: SequenceFileLoader does not support BytesWritable (rohini)
+
+PIG-3726: Ranking empty records leads to NullPointerException (jarcec via daijy)
+
+PIG-3652: Pigmix parser (PigPerformanceLoader) deletes chars during parsing (keren3000 via daijy)
+
+PIG-3722: Udf deserialization for registered classes fails in local_mode (aniket486)
+
+PIG-3641: Split "otherwise" producing incorrect output when combined with ColumnPruning (knoguchi)
+
+PIG-3682: mvn-inst target does not install pig-h2.jar into local .m2 (raluri via aniket486)
+
+PIG-3661: Piggybank AvroStorage fails if used in more than one load or store statement (rohini)
+
+PIG-3511: Security: Pig temporary directories might have world readable permissions (rohini)
+
+PIG-3664: Piggy Bank XPath UDF can't be called (nezihyigitbasi via daijy)
+
+PIG-3662: Static loadcaster in BinStorage can cause exception (lbendig via rohini)
+
+PIG-3617: problem with temp file deletion in MAPREDUCE operator (nezihyigitbasi via cheolsoo)
+
+PIG-3649: POPartialAgg incorrectly calculates size reduction when multiple values aggregated (tmwoodruff via daijy)
+
+PIG-3650: Fix for PIG-3100 breaks column pruning (tmwoodruff via daijy)
+
 PIG-3643: Nested Foreach with UDF and bincond is broken (cheolsoo)
 
 PIG-3616: TestBuiltIn.testURIwithCurlyBrace() silently fails (lbendig via cheolsoo)
@@ -141,6 +215,8 @@ Release 0.12.1 (unreleased changes)
 
 IMPROVEMENTS
 
+PIG-3529: Upgrade HBase dependency from 0.95-SNAPSHOT to 0.96 (jarcec via daijy)
+
 PIG-3552: UriUtil used by reducer estimator should support viewfs (amatsukawa via aniket486)
 
 PIG-3549: Print hadoop jobids for failed, killed job (aniket486)
@@ -151,6 +227,18 @@ PIG-3480: TFile-based tmpfile compressio
 
 BUG FIXES
 
+PIG-3774: Piggybank Over UDF get wrong result (daijy)
+
+PIG-3657: New partition filter extractor fails with NPE (cheolsoo)
+
+PIG-3347: Store invocation brings side effect (daijy)
+
+PIG-3670: Fix assert in Pig script (daijy)
+
+PIG-3741: Utils.setTmpFileCompressionOnConf can cause side effect for SequenceFileInterStorage (aniket486)
+
+PIG-3677: ConfigurationUtil.getLocalFSProperties can return an inconsistent property set (rohini)
+
 PIG-3621: Python Avro library can't read Avros made with builtin AvroStorage (rusell.jurney via cheolsoo)
 
 PIG-3592: Should not try to create success file for non-fs schemes like hbase (rohini)

Modified: pig/branches/tez/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/build.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/build.xml (original)
+++ pig/branches/tez/build.xml Mon Feb 24 21:41:38 2014
@@ -182,13 +182,6 @@
         <equals arg1="${hadoopversion}" arg2="23"/>
     </condition>
 
-    <!-- Tez needs guava 15.0 whereas Hadoop 1.x depends on guava 11 -->
-    <script language="javascript">
-        if (project.getProperty('hadoopversion').equals('23')) {
-            project.setProperty('guava.version', project.getProperty('guava-hadoop2.version'));
-        }
-    </script>
-
     <!--
       HBase master version
       Denotes how the HBase dependencies are layout. Value "94" denotes older
@@ -1157,6 +1150,7 @@
                <pom refid="pig"/>
            <attach file="${output.jarfile.sources}" classifier="sources" />
            <attach file="${output.jarfile.javadoc}" classifier="javadoc" />
+           <attach file="${output.jarfile.core-h2}" classifier="h2" />
           </artifact:install>
          <artifact:pom file="${pigunit.pom}" id="pigunit"/>
           <artifact:install file="${pigunit.jarfile}">

Modified: pig/branches/tez/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/branches/tez/conf/pig.properties?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/conf/pig.properties (original)
+++ pig/branches/tez/conf/pig.properties Mon Feb 24 21:41:38 2014
@@ -62,6 +62,7 @@
 #pig.skewedjoin.reduce.memusage=0.3
 #pig.exec.nocombiner=false
 #opt.multiquery=true
+#opt.fetch=true
 
 #Following parameters are for configuring intermediate storage format
 #Supported storage types are seqfile and tfile
@@ -233,3 +234,28 @@ pig.location.check.strict=false
 # purpose. By default, it is set to true.
 # To inspect, use a = load '<path_to_tmp_file>' using org.apache.pig.impl.io.TFileStorage();
 # pig.delete.temp.files=true
+
+# Set this option to true to convert jobs with input data size smaller than
+# pig.auto.local.input.maxbytes bytes and number of reducers <=1 to run in local mode
+# By default, this is set to false.
+# pig.auto.local.enabled=true
+
+# Set value in long as a threshold number of bytes to convert
+# jobs with smaller input data size to run in local mode
+# pig.auto.local.input.maxbytes=100000000
+
+# Set this option to overwrite the sample size of RandomeSampleLoader for
+# order-by. The default value is 100 rows per task.
+# pig.random.sampler.sample.size=100
+
+# When enabled, jobs won't create empty part files if no output is written. In this case
+# PigOutputFormat will be wrapped with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.
+# pig.output.lazy=true
+
+# Set this option to turn on additional jar caching for the user
+# pig.user.cache.enabled=true
+
+# This option defines location where additional jars are cached for the user.
+# Additional jar will be cached under PIG_USER_CACHE_LOCATION/${user.name}/.pigcache
+# and will be re-used across the jobs run by the user if the jar has not changed.
+# pig.user.cache.location=/tmp

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Mon Feb 24 21:41:38 2014
@@ -349,7 +349,8 @@ public class Over extends EvalFunc<DataB
         } else if ("percent_rank".equalsIgnoreCase(agg)) {
             func = new PercentRank(udfArgs);
         } else if ("cume_dist".equalsIgnoreCase(agg)) {
-            func = new CumeDist(udfArgs);
+            //func = new CumeDist(udfArgs);
+            func = new CumeDist();
         } else if ("debug".equalsIgnoreCase(agg)) {
             func = new Debug();
         } else {
@@ -755,6 +756,7 @@ public class Over extends EvalFunc<DataB
         }
     }
 
+    /*
     private static class CumeDist extends BaseRank<Double> {
         CumeDist(Object[] args) throws IOException {
             super(args);
@@ -768,6 +770,20 @@ public class Over extends EvalFunc<DataB
             return ((double)lastRankUsed) / (double)iter.tuples.size();
         }
     }
+    */
+
+    private static class CumeDist extends ResetableEvalFunc<Double> {
+
+        @Override
+        public Double exec(Tuple input) throws IOException {
+            DataBag inbag = (DataBag)input.get(0);
+            OverBag.OverBagIterator iter =
+                (OverBag.OverBagIterator)inbag.iterator();
+
+            return ((double)++currentRow)/(double)iter.tuples.size();
+        }
+    }
+
 
 
 

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Mon Feb 24 21:41:38 2014
@@ -29,6 +29,7 @@ import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
 import org.w3c.dom.Document;
 import org.xml.sax.InputSource;
 
@@ -115,15 +116,32 @@ public class XPath extends EvalFunc<Stri
         }
     }
 
-    @Override
-    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
-        
-        final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
-        
-        funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
-        
-        return funcList;
-    }
+	@Override
+	public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+
+		final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+
+		/*either two chararray arguments*/
+		List<FieldSchema> fields = new ArrayList<FieldSchema>();
+		fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+		fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+
+		Schema twoArgInSchema = new Schema(fields);
+
+		funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));
+
+		/*or two chararray and a boolean argument*/
+		fields = new ArrayList<FieldSchema>();
+		fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+		fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+		fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
+
+		Schema threeArgInSchema = new Schema(fields);
+
+		funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));
+
+		return funcList;
+	}
 
 }
 

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Mon Feb 24 21:41:38 2014
@@ -367,7 +367,8 @@ public class CSVExcelStorage extends Pig
         // further records to it. If they are the same (this would 
         // happen if multiple small files each with a header were combined
         // into one split), we know to skip the duplicate header record as well.
-        if (loadingFirstRecord && headerTreatment == Headers.SKIP_INPUT_HEADER && splitIndex == 0) {
+        if (loadingFirstRecord && headerTreatment == Headers.SKIP_INPUT_HEADER &&
+                (splitIndex == 0 || splitIndex == -1)) {
             try {
                 if (!in.nextKeyValue())
                     return null;

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java Mon Feb 24 21:41:38 2014
@@ -295,7 +295,7 @@ public class FixedWidthLoader extends Lo
     
     @Override
     public Tuple getNext() throws IOException {
-        if (loadingFirstRecord && skipHeader && splitIndex == 0) {
+        if (loadingFirstRecord && skipHeader && (splitIndex == 0 || splitIndex == -1)) {
             try {
                 if (!reader.nextKeyValue()) 
                     return null;

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java Mon Feb 24 21:41:38 2014
@@ -21,12 +21,11 @@ import java.io.IOException;
 import java.lang.reflect.Type;
 import java.util.ArrayList;
 
-import org.joda.time.DateTime;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
@@ -56,42 +55,42 @@ import org.apache.pig.data.TupleFactory;
  **/
 
 public class SequenceFileLoader extends FileInputLoadFunc {
-  
+
   private SequenceFileRecordReader<Writable, Writable> reader;
- 
+
   private Writable key;
   private Writable value;
   private ArrayList<Object> mProtoTuple = null;
-  
+
   protected static final Log LOG = LogFactory.getLog(SequenceFileLoader.class);
   protected TupleFactory mTupleFactory = TupleFactory.getInstance();
   protected SerializationFactory serializationFactory;
 
   protected byte keyType = DataType.UNKNOWN;
   protected byte valType = DataType.UNKNOWN;
-    
+
   public SequenceFileLoader() {
     mProtoTuple = new ArrayList<Object>(2);
   }
- 
+
   protected void setKeyType(Class<?> keyClass) throws BackendException {
     this.keyType |= inferPigDataType(keyClass);
-    if (keyType == DataType.ERROR) { 
+    if (keyType == DataType.ERROR) {
       LOG.warn("Unable to translate key "+key.getClass()+" to a Pig datatype");
       throw new BackendException("Unable to translate "+key.getClass()+" to a Pig datatype");
-    } 
+    }
   }
-  
+
   protected void setValueType(Class<?> valueClass) throws BackendException {
     this.valType |= inferPigDataType(valueClass);
-    if (keyType == DataType.ERROR) { 
+    if (keyType == DataType.ERROR) {
       LOG.warn("Unable to translate key "+key.getClass()+" to a Pig datatype");
       throw new BackendException("Unable to translate "+key.getClass()+" to a Pig datatype");
-    } 
+    }
   }
-    
+
   protected byte inferPigDataType(Type t) {
-    if (t == DataByteArray.class) return DataType.BYTEARRAY;
+    if (t == BytesWritable.class) return DataType.BYTEARRAY;
     else if (t == Text.class) return DataType.CHARARRAY;
     else if (t == IntWritable.class) return DataType.INTEGER;
     else if (t == LongWritable.class) return DataType.LONG;
@@ -103,11 +102,14 @@ public class SequenceFileLoader extends 
     // not doing maps or other complex types for now
     else return DataType.ERROR;
   }
-  
+
   protected Object translateWritableToPigDataType(Writable w, byte dataType) {
     switch(dataType) {
       case DataType.CHARARRAY: return ((Text) w).toString();
-      case DataType.BYTEARRAY: return((DataByteArray) w).get();
+      case DataType.BYTEARRAY:
+            BytesWritable bw = (BytesWritable) w;
+            // Make a copy
+            return new DataByteArray(bw.getBytes(), 0, bw.getLength());
       case DataType.BOOLEAN: return ((BooleanWritable) w).get();
       case DataType.INTEGER: return ((IntWritable) w).get();
       case DataType.LONG: return ((LongWritable) w).get();
@@ -116,10 +118,10 @@ public class SequenceFileLoader extends 
       case DataType.BYTE: return ((ByteWritable) w).get();
       case DataType.DATETIME: return ((DateTimeWritable) w).get();
     }
-    
+
     return null;
   }
-  
+
   @Override
   public Tuple getNext() throws IOException {
     boolean next = false;
@@ -128,19 +130,19 @@ public class SequenceFileLoader extends 
     } catch (InterruptedException e) {
       throw new IOException(e);
     }
-    
+
     if (!next) return null;
-    
+
     key = reader.getCurrentKey();
     value = reader.getCurrentValue();
-    
+
     if (keyType == DataType.UNKNOWN && key != null) {
         setKeyType(key.getClass());
     }
     if (valType == DataType.UNKNOWN && value != null) {
         setValueType(value.getClass());
     }
-    
+
     mProtoTuple.add(translateWritableToPigDataType(key, keyType));
     mProtoTuple.add(translateWritableToPigDataType(value, valType));
     Tuple t =  mTupleFactory.newTuple(mProtoTuple);
@@ -163,6 +165,6 @@ public class SequenceFileLoader extends 
 
   @Override
   public void setLocation(String location, Job job) throws IOException {
-    FileInputFormat.setInputPaths(job, location);    
+    FileInputFormat.setInputPaths(job, location);
   }
 }

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java Mon Feb 24 21:41:38 2014
@@ -21,16 +21,14 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Properties;
 import java.util.Set;
-import java.util.HashSet;
-import java.net.URI;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
-import org.apache.avro.file.DataFileStream;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
@@ -105,6 +103,7 @@ public class AvroStorage extends FileInp
 
     private boolean checkSchema = true; /*whether check schema of input directories*/
     private boolean ignoreBadFiles = false; /* whether ignore corrupted files during load */
+    private String contextSignature = null;
 
     /**
      * Empty constructor. Output schema is derived from pig schema.
@@ -151,11 +150,15 @@ public class AvroStorage extends FileInp
         if (inputAvroSchema != null) {
             return;
         }
-        Set<Path> paths = new HashSet<Path>();
+
         Configuration conf = job.getConfiguration();
-        if (AvroStorageUtils.getAllSubDirs(new Path(location), conf, paths)) {
+        Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true);
+        if (!paths.isEmpty()) {
+            // Set top level directories in input format. Adding all files will
+            // bloat configuration size
+            FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
+            // Scan all directories including sub directories for schema
             setInputAvroSchema(paths, conf);
-            FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
         } else {
             throw new IOException("Input path \'" + location + "\' is not found");
         }
@@ -193,9 +196,17 @@ public class AvroStorage extends FileInp
         if (paths == null || paths.isEmpty()) {
             return null;
         }
-        Path path = paths.iterator().next();
-        FileSystem fs = FileSystem.get(path.toUri(), conf);
-        return getAvroSchema(path, fs);
+        Iterator<Path> iterator = paths.iterator();
+        Schema schema = null;
+        while (iterator.hasNext()) {
+            Path path = iterator.next();
+            FileSystem fs = FileSystem.get(path.toUri(), conf);
+            schema = getAvroSchema(path, fs);
+            if (schema != null) {
+                break;
+            }
+        }
+        return schema;
     }
 
     /**
@@ -237,7 +248,7 @@ public class AvroStorage extends FileInp
                         System.out.println("Do not check schema; use schema of " + s.getPath());
                         return schema;
                     }
-                } else if (!schema.equals(newSchema)) {
+                } else if (newSchema != null && !schema.equals(newSchema)) {
                     throw new IOException( "Input path is " + path + ". Sub-direcotry " + s.getPath()
                                          + " contains different schema " + newSchema + " than " + schema);
                 }
@@ -254,19 +265,23 @@ public class AvroStorage extends FileInp
      * Merge multiple input avro schemas into one. Note that we can't merge arbitrary schemas.
      * Please see AvroStorageUtils.mergeSchema() for what's allowed and what's not allowed.
      *
-     * @param paths  set of input files
+     * @param basePaths  set of input dir or files
      * @param conf  configuration
      * @return avro schema
      * @throws IOException
      */
-    protected Schema getMergedSchema(Set<Path> paths, Configuration conf) throws IOException {
+    protected Schema getMergedSchema(Set<Path> basePaths, Configuration conf) throws IOException {
         Schema result = null;
         Map<Path, Schema> mergedFiles = new HashMap<Path, Schema>();
+
+        Set<Path> paths = AvroStorageUtils.getAllFilesRecursively(basePaths, conf);
         for (Path path : paths) {
             FileSystem fs = FileSystem.get(path.toUri(), conf);
             Schema schema = getSchema(path, fs);
-            result = AvroStorageUtils.mergeSchema(result, schema);
-            mergedFiles.put(path, schema);
+            if (schema != null) {
+                result = AvroStorageUtils.mergeSchema(result, schema);
+                mergedFiles.put(path, schema);
+            }
         }
         // schemaToMergedSchemaMap is only needed when merging multiple records.
         if (mergedFiles.size() > 1 && result.getType().equals(Schema.Type.RECORD)) {
@@ -302,6 +317,9 @@ public class AvroStorage extends FileInp
     protected Schema getSchemaFromFile(Path path, FileSystem fs) throws IOException {
         /* get path of the last file */
         Path lastFile = AvroStorageUtils.getLast(path, fs);
+        if (lastFile == null) {
+            return null;
+        }
 
         /* read in file and obtain schema */
         GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
@@ -360,9 +378,14 @@ public class AvroStorage extends FileInp
         /* get avro schema */
         AvroStorageLog.funcCall("getSchema");
         if (inputAvroSchema == null) {
-            Set<Path> paths = new HashSet<Path>();
             Configuration conf = job.getConfiguration();
-            if (AvroStorageUtils.getAllSubDirs(new Path(location), conf, paths)) {
+            // If within a script, you store to one location and read from same
+            // location using AvroStorage getPaths will be empty. Since
+            // getSchema is called during script parsing we don't want to fail
+            // here if path not found
+
+            Set<Path> paths = AvroStorageUtils.getPaths(location, conf, false);
+            if (!paths.isEmpty()) {
                 setInputAvroSchema(paths, conf);
             }
         }
@@ -617,8 +640,7 @@ public class AvroStorage extends FileInp
     @Override
     public void checkSchema(ResourceSchema s) throws IOException {
         AvroStorageLog.funcCall("Check schema");
-        UDFContext context = UDFContext.getUDFContext();
-        Properties property = context.getUDFProperties(ResourceSchema.class);
+        Properties property = getUDFProperties();
         String prevSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
         AvroStorageLog.details("Previously defined schemas=" + prevSchemaStr);
 
@@ -651,6 +673,14 @@ public class AvroStorage extends FileInp
         AvroStorageLog.details("New schemas=" + newSchemaStr);
     }
 
+    /**
+     * Returns UDFProperties based on <code>contextSignature</code>.
+     */
+    private Properties getUDFProperties() {
+        return UDFContext.getUDFContext()
+            .getUDFProperties(this.getClass(), new String[] {contextSignature});
+    }
+
     private String getSchemaKey() {
         return Integer.toString(storeFuncIndex);
     }
@@ -678,8 +708,7 @@ public class AvroStorage extends FileInp
     public OutputFormat getOutputFormat() throws IOException {
         AvroStorageLog.funcCall("getOutputFormat");
 
-        UDFContext context = UDFContext.getUDFContext();
-        Properties property = context.getUDFProperties(ResourceSchema.class);
+        Properties property = getUDFProperties();
         String allSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
         Map<String, String> map = (allSchemaStr != null)  ? parseSchemaMap(allSchemaStr) : null;
 
@@ -701,7 +730,7 @@ public class AvroStorage extends FileInp
 
     @Override
     public void setStoreFuncUDFContextSignature(String signature) {
-        // Nothing to do
+        this.contextSignature = signature;
     }
 
     @Override

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java Mon Feb 24 21:41:38 2014
@@ -17,12 +17,10 @@
 
 package org.apache.pig.piggybank.storage.avro;
 
-import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.LinkedHashMap;
@@ -30,7 +28,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.Set;
-import java.net.URI;
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
 import org.apache.avro.generic.GenericDatumReader;
@@ -40,14 +37,10 @@ import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.pig.LoadFunc;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.ResourceSchema.ResourceFieldSchema;
 import org.apache.pig.data.DataType;
-import org.apache.pig.piggybank.storage.avro.AvroStorageLog;
-
 import org.codehaus.jackson.JsonNode;
 /**
  * This is utility class for this package
@@ -100,51 +93,59 @@ public class AvroStorageUtils {
     }
 
     /**
-     * get input paths to job config
-     */
-    public static boolean addInputPaths(String pathString, Job job) throws IOException {
-      Configuration conf = job.getConfiguration();
-      FileSystem fs = FileSystem.get(conf);
-      HashSet<Path> paths = new  HashSet<Path>();
-      if (getAllSubDirs(new Path(pathString), conf, paths)) {
-        paths.addAll(Arrays.asList(FileInputFormat.getInputPaths(job)));
-        FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
-        return true;
-      }
-      return false;
-    }
-
-    /**
-     * Adds all non-hidden directories and subdirectories to set param
-     * it supports comma-separated input paths and glob style path
+     * Gets the list of paths from the pathString specified which may contain
+     * comma-separated paths and glob style path
      *
      * @throws IOException
      */
-    public static boolean getAllSubDirs(Path path, Configuration conf,
-            Set<Path> paths) throws IOException {
-        String[] pathStrs = LoadFunc.getPathStrings(path.toString());
+    public static Set<Path> getPaths(String pathString, Configuration conf, boolean failIfNotFound)
+            throws IOException {
+        Set<Path> paths = new HashSet<Path>();
+        String[] pathStrs = LoadFunc.getPathStrings(pathString);
         for (String pathStr : pathStrs) {
             FileSystem fs = FileSystem.get(new Path(pathStr).toUri(), conf);
             FileStatus[] matchedFiles = fs.globStatus(new Path(pathStr), PATH_FILTER);
             if (matchedFiles == null || matchedFiles.length == 0) {
-                return false;
+                if (failIfNotFound) {
+                    throw new IOException("Input Pattern " + pathStr + " matches 0 files");
+                } else {
+                    continue;
+                }
             }
             for (FileStatus file : matchedFiles) {
-                getAllSubDirsInternal(file, conf, paths, fs);
+                paths.add(file.getPath());
             }
         }
-        return true;
+        return paths;
+    }
+
+    /**
+     * Returns all non-hidden files recursively inside the base paths given
+     *
+     * @throws IOException
+     */
+    public static Set<Path> getAllFilesRecursively(Set<Path> basePaths, Configuration conf) throws IOException {
+        Set<Path> paths = new HashSet<Path>();
+        for (Path path : basePaths) {
+            FileSystem fs = FileSystem.get(path.toUri(), conf);
+            FileStatus f = fs.getFileStatus(path);
+            if (f.isDir()) {
+                getAllFilesInternal(f, conf, paths, fs);
+            } else {
+                paths.add(path);
+            }
+        }
+        return paths;
     }
 
-    private static void getAllSubDirsInternal(FileStatus file, Configuration conf,
+    private static void getAllFilesInternal(FileStatus file, Configuration conf,
             Set<Path> paths, FileSystem fs) throws IOException {
-        if (file.isDir()) {
-            for (FileStatus sub : fs.listStatus(file.getPath())) {
-                getAllSubDirsInternal(sub, conf, paths, fs);
+        for (FileStatus f : fs.listStatus(file.getPath(), PATH_FILTER)) {
+            if (f.isDir()) {
+                getAllFilesInternal(f, conf, paths, fs);
+            } else {
+                paths.add(f.getPath());
             }
-        } else {
-            AvroStorageLog.details("Add input file:" + file);
-            paths.add(file.getPath());
         }
     }
 
@@ -160,22 +161,24 @@ public class AvroStorageUtils {
     /** get last file of a hdfs path if it is  a directory;
      *   or return the file itself if path is a file
      */
-    public static Path getLast(String path, FileSystem fs) throws IOException {
-        return getLast(new Path(path), fs);
-    }
-
-    /** get last file of a hdfs path if it is  a directory;
-     *   or return the file itself if path is a file
-     */
     public static Path getLast(Path path, FileSystem fs) throws IOException {
 
+        FileStatus status = fs.getFileStatus(path);
+        if (!status.isDir()) {
+            return path;
+        }
         FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);
 
         if (statuses.length == 0) {
-            return path;
+            return null;
         } else {
             Arrays.sort(statuses);
-            return statuses[statuses.length - 1].getPath();
+            for (int i = statuses.length - 1; i >= 0; i--) {
+                if (!statuses[i].isDir()) {
+                    return statuses[i].getPath();
+                }
+            }
+            return null;
         }
     }
 
@@ -705,6 +708,9 @@ public class AvroStorageUtils {
     public static Schema getSchema(Path path, FileSystem fs) throws IOException {
         /* get path of the last file */
         Path lastFile = AvroStorageUtils.getLast(path, fs);
+        if (lastFile == null) {
+            return null;
+        }
 
         /* read in file and obtain schema */
         GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java Mon Feb 24 21:41:38 2014
@@ -5,9 +5,9 @@
  * licenses this file to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -18,9 +18,9 @@
 package org.apache.pig.piggybank.storage.avro;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+
 import org.apache.avro.Schema;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Recor
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
 
 /**
  * The InputFormat for avro data.
@@ -86,4 +87,14 @@ public class PigAvroInputFormat extends 
                 ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
     }
 
+    /*
+     * This is to support multi-level/recursive directory listing until
+     * MAPREDUCE-1577 is fixed.
+     */
+    @Override
+    protected List<FileStatus> listStatus(JobContext job) throws IOException {
+        return MapRedUtil.getAllFileRecursively(super.listStatus(job),
+                job.getConfiguration());
+    }
+
 }

Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java Mon Feb 24 21:41:38 2014
@@ -5,9 +5,9 @@
  * licenses this file to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
- * 
+ *
  * http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -127,6 +127,15 @@ public class PigAvroRecordReader extends
                 JsonNode defValue = subFields.get(i).defaultValue();
                 if (defValue != null) {
                     Schema.Type type = subFields.get(i).schema().getType();
+                    if (type.equals(Schema.Type.UNION)) {
+                        List<Schema> schemas = subFields.get(i).schema().getTypes();
+                        for (Schema schema : schemas) {
+                            if (!schema.getType().equals(Schema.Type.NULL)) {
+                                type = schema.getType();
+                                break;
+                            }
+                        }
+                    }
                     switch (type) {
                         case BOOLEAN:
                             mProtoTuple.add(i, defValue.getBooleanValue());

Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Mon Feb 24 21:41:38 2014
@@ -1610,6 +1610,13 @@ public class TestOver {
         t.set(4, 0);
         DataBag outbag = func.exec(t);
         assertEquals(7, outbag.size());
+        int count = 1;
+        for (Tuple to : outbag) {
+            assertEquals(1, to.size());
+            assertEquals(count/7.0, to.get(0));
+            count++;
+        }
+        /*
         Iterator<Tuple> iter = outbag.iterator();
         t = iter.next();
         assertEquals(0.14285714285714285, t.get(0));
@@ -1625,5 +1632,6 @@ public class TestOver {
         assertEquals(0.5714285714285714, t.get(0));
         t = iter.next();
         assertEquals(1.0, t.get(0));
+        */
     }
 }

Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java Mon Feb 24 21:41:38 2014
@@ -18,21 +18,27 @@
 package org.apache.pig.piggybank.test.storage;
 
 import static org.apache.pig.ExecType.LOCAL;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
 
 import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
 
 import junit.framework.TestCase;
+
 import org.junit.Test;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.pig.PigServer;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.DataByteArray;
 import org.apache.pig.data.Tuple;
 //import org.apache.pig.test.PigExecTestCase;
 import org.apache.pig.test.Util;
@@ -43,15 +49,15 @@ import org.apache.pig.test.Util;
     "one, two, buckle my shoe",
     "three, four, shut the door",
     "five, six, something else" };
-  
+
   private static final String[][] EXPECTED = {
     {"0", "one, two, buckle my shoe"},
     {"1", "three, four, shut the door"},
     {"2", "five, six, something else"}
   };
-  
+
   private String tmpFileName;
-  
+
   private PigServer pigServer;
   @Override
   public void setUp() throws Exception {
@@ -62,12 +68,12 @@ import org.apache.pig.test.Util;
     Path path = new Path("file:///"+tmpFileName);
     JobConf conf = new JobConf();
     FileSystem fs = FileSystem.get(path.toUri(), conf);
-    
+
     IntWritable key = new IntWritable();
     Text value = new Text();
     SequenceFile.Writer writer = null;
     try {
-      writer = SequenceFile.createWriter(fs, conf, path, 
+      writer = SequenceFile.createWriter(fs, conf, path,
                                          key.getClass(), value.getClass());
       for (int i=0; i < DATA.length; i++) {
         key.set(i);
@@ -78,10 +84,10 @@ import org.apache.pig.test.Util;
       IOUtils.closeStream(writer);
     }
   }
-  
+
   @Test
   public void testReadsNocast() throws IOException {
-    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) + 
+    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
     "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key, val);");
     Iterator<?> it = pigServer.openIterator("A");
     int tupleCount = 0;
@@ -98,10 +104,10 @@ import org.apache.pig.test.Util;
     }
     assertEquals(DATA.length, tupleCount);
   }
-  
+
   @Test
   public void testReadsStringCast() throws IOException {
-    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) + 
+    pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
     "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);");
     Iterator<?> it = pigServer.openIterator("A");
     int tupleCount = 0;
@@ -117,4 +123,41 @@ import org.apache.pig.test.Util;
     }
     assertEquals(DATA.length, tupleCount);
   }
+
+    @Test
+    public void testReadBytesWritable() throws IOException {
+        File inputFile = File.createTempFile("test", ".txt");
+        System.err.println("fileName: " + inputFile.getAbsolutePath());
+        Path path = new Path("file:///" + inputFile.getAbsolutePath());
+        JobConf conf = new JobConf();
+        FileSystem fs = FileSystem.get(path.toUri(), conf);
+
+        IntWritable key = new IntWritable();
+        SequenceFile.Writer writer = null;
+        try {
+            writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), BytesWritable.class);
+            int numRecords = 3;
+            for (int i = 0; i < numRecords; i++) {
+                key.set(i);
+                String val = "" + Math.pow(10, (numRecords - i));
+                writer.append(key, new BytesWritable(val.getBytes()));
+            }
+        } finally {
+            IOUtils.closeStream(writer);
+        }
+
+        Data data = resetData(pigServer);
+        data.set("expected",
+                tuple(0L, new DataByteArray("1000.0")),
+                tuple(1L, new DataByteArray("100.0")),
+                tuple(2L, new DataByteArray("10.0")));
+
+        pigServer.registerQuery(
+                "A = LOAD '" + Util.encodeEscape(inputFile.getAbsolutePath()) +
+                "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);");
+        pigServer.registerQuery("STORE A into 'actual' USING mock.Storage();");
+
+        assertEquals(data.get("expected"), data.get("actual"));
+
+    }
 }

Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java Mon Feb 24 21:41:38 2014
@@ -19,6 +19,16 @@ package org.apache.pig.piggybank.test.st
 import static org.apache.pig.builtin.mock.Storage.resetData;
 import static org.apache.pig.builtin.mock.Storage.schema;
 import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
 
 import org.apache.avro.file.DataFileStream;
 import org.apache.avro.generic.GenericDatumReader;
@@ -29,9 +39,9 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.ExecType;
 import org.apache.pig.LoadFunc;
+import org.apache.pig.PigConfiguration;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.backend.executionengine.ExecJob;
@@ -41,26 +51,13 @@ import org.apache.pig.builtin.mock.Stora
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.io.FileLocalizer;
 import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.piggybank.storage.avro.AvroStorage;
 import org.apache.pig.piggybank.storage.avro.PigSchema2Avro;
-import org.apache.pig.test.MiniCluster;
 import org.apache.pig.test.Util;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Properties;
-import java.util.Set;
-
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class TestAvroStorage {
 
     protected static final Log LOG = LogFactory.getLog(TestAvroStorage.class);
@@ -194,7 +191,7 @@ public class TestAvroStorage {
     public static void setup() throws ExecException, IOException {
         pigServerLocal = new PigServer(ExecType.LOCAL);
         String TMP_DIR = System.getProperty("user.dir") + "/build/test/tmp/";
-        pigServerLocal.getPigContext().getProperties().setProperty("pig.temp.dir", TMP_DIR);
+        pigServerLocal.getPigContext().getProperties().setProperty(PigConfiguration.PIG_TEMP_DIR, TMP_DIR);
         outbasedir = FileLocalizer.getTemporaryPath(pigServerLocal.getPigContext()).toString() + "/TestAvroStorage/";
         deleteDirectory(new File(outbasedir));
     }
@@ -594,6 +591,7 @@ public class TestAvroStorage {
 
     @Test
     public void testUserDefinedLoadSchema() throws IOException {
+        PigSchema2Avro.setTupleIndex(2);
         // Verify that user specified schema correctly maps to input schemas
         // Input Avro files have the following schemas:
         //   name:"string", address:[customField1:"int", addressLine:"string"]
@@ -604,7 +602,7 @@ public class TestAvroStorage {
         // dropping, adding, and reordering fields where needed.
         String output= outbasedir + "testUserDefinedLoadSchema";
         String expected = basedir + "expected_testUserDefinedLoadSchema.avro";
-        String customSchema = 
+        String customSchema =
                     "{\"type\": \"record\", \"name\": \"employee\", \"fields\": [ "
                         +"{ \"default\": \"***\", \"type\": \"string\", \"name\": \"name\" }, "
                         +"{ \"name\": \"address\", \"type\": { "
@@ -621,7 +619,7 @@ public class TestAvroStorage {
             " in = LOAD '" + testUserDefinedLoadSchemaFile
                 + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ('schema', '" + customSchema + "');",
             " o = ORDER in BY name;",
-            " STORE o INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();" 
+            " STORE o INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
            };
         testAvroStorage(queries);
         verifyResults(output, expected);
@@ -1212,30 +1210,65 @@ public class TestAvroStorage {
     // {"name":"age","type":["null","int"],"doc":"autogenerated from Pig Field Schema"},
     // {"name":"gpa","type":["null","double"],"doc":"autogenerated from Pig Field Schema"}]}]
     public void testLoadwithNullValues() throws IOException {
-    //Input is supposed to have empty tuples
-    PigSchema2Avro.setTupleIndex(0);
-    Data data = resetData(pigServerLocal);
-    String output = outbasedir + "testLoadwithNulls";
-    deleteDirectory(new File(output));
-    String [] queries = {
-       " A = load '" +  testLoadwithNullValues + "' USING " +
-          " org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
-       " B = order A by name;",
-       " store B into '" +  output +"' USING mock.Storage();"
-       };
-    testAvroStorage(queries);
-    List<Tuple> out = data.get(output);
-    assertEquals(out + " size", 4, out.size());
-
-    assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
-
-    // sorted data ordered by name
-    assertEquals(tuple((String)null),out.get(0));
-    assertEquals(tuple((String)null),out.get(1));
-    assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
-    assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
+        //Input is supposed to have empty tuples
+        PigSchema2Avro.setTupleIndex(0);
+        Data data = resetData(pigServerLocal);
+        String output = outbasedir + "testLoadwithNulls";
+        deleteDirectory(new File(output));
+        String [] queries = {
+           " A = load '" +  testLoadwithNullValues + "' USING " +
+              " org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
+           " B = order A by name;",
+           " store B into '" +  output +"' USING mock.Storage();"
+           };
+        testAvroStorage(queries);
+        List<Tuple> out = data.get(output);
+        assertEquals(out + " size", 4, out.size());
 
-   }
+        assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
+
+        // sorted data ordered by name
+        assertEquals(tuple((String)null),out.get(0));
+        assertEquals(tuple((String)null),out.get(1));
+        assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
+        assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
+
+    }
+
+    @Test
+    public void testMultipleLoadStore() throws Exception {
+        PigSchema2Avro.setTupleIndex(0);
+        Data data = resetData(pigServerLocal);
+        data.set("foo",
+                tuple(1, 2, 3),
+                tuple(4, 5, 6),
+                tuple(7, 8, 9));
+        data.set("bar",
+                tuple("a", "b", "c"),
+                tuple("d", "e", "f"),
+                tuple("g", "h", "i"));
+        String output = outbasedir + "testMultipleLoadStore";
+        deleteDirectory(new File(output));
+        String[] storeQuery = {
+                "A = LOAD 'foo' USING " + "mock.Storage() as (a1:int, a2:int, a3:int);",
+                "B = LOAD 'bar' USING " + "mock.Storage() as (b1:chararray, b2:chararray, b3:chararray);",
+                "STORE A into '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+                "STORE B into '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();"
+                };
+        testAvroStorage(storeQuery);
+        String[] loadQuery = {
+                "C = LOAD '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+                "D = LOAD '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+                "STORE C into 'foo-actual' USING mock.Storage();",
+                "STORE D into 'bar-actual' USING mock.Storage();"
+                };
+        testAvroStorage(loadQuery);
+
+        assertEquals(data.get("foo"), data.get("foo-actual"));
+        assertEquals(data.get("bar"), data.get("bar-actual"));
+        assertEquals("{a1: int,a2: int,a3: int}", data.getSchema("foo-actual").toString());
+        assertEquals("{b1: chararray,b2: chararray,b3: chararray}", data.getSchema("bar-actual").toString());
+    }
 
     private static void deleteDirectory (File path) {
         if ( path.exists()) {

Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java Mon Feb 24 21:41:38 2014
@@ -19,15 +19,12 @@ package org.apache.pig.piggybank.test.st
 import org.apache.avro.Schema;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
 import org.apache.pig.piggybank.storage.avro.AvroStorageUtils;
-
 import org.junit.Assert;
 import org.junit.Test;
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 
@@ -93,37 +90,45 @@ public class TestAvroStorageUtils {
     }
 
     @Test
-    public void testGetAllSubDirs() throws IOException {
-        final String basedir = System.getProperty("user.dir");
+    public void testGetPaths() throws IOException {
+        final String basedir = "file://" + System.getProperty("user.dir");
         final String tempdir = Long.toString(System.currentTimeMillis());
         final String nonexistentpath = basedir + "/" + tempdir + "/this_path_does_not_exist";
 
         String locationStr = null;
-        Set<Path> paths = new HashSet<Path>();
+        Set<Path> paths;
         Configuration conf = new Configuration();
 
         // existent path
         locationStr = basedir;
-        assertTrue(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
+        paths = AvroStorageUtils.getPaths(locationStr, conf, true);
         assertFalse(paths.isEmpty());
-        paths.clear();
 
         // non-existent path
         locationStr = nonexistentpath;
-        assertFalse(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
-        assertTrue(paths.isEmpty());
-        paths.clear();
+        try {
+            paths = AvroStorageUtils.getPaths(locationStr, conf, true);
+            fail();
+        } catch (IOException e) {
+            assertTrue(e.getMessage().contains("matches 0 files"));
+        }
 
         // empty glob pattern
         locationStr = basedir + "/{}";
-        assertFalse(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
+        try {
+            paths = AvroStorageUtils.getPaths(locationStr, conf, true);
+            fail();
+        } catch (IOException e) {
+            assertTrue(e.getMessage().contains("matches 0 files"));
+        }
+
+        paths = AvroStorageUtils.getPaths(locationStr, conf, false);
         assertTrue(paths.isEmpty());
-        paths.clear();
 
         // bad glob pattern
         locationStr = basedir + "/{1,";
         try {
-            AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths);
+            AvroStorageUtils.getPaths(locationStr, conf, true);
             Assert.fail("Negative test to test illegal file pattern. Should not be succeeding!");
         } catch (IOException e) {
             // The message of the exception for illegal file pattern is rather long,

Modified: pig/branches/tez/ivy.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/ivy.xml (original)
+++ pig/branches/tez/ivy.xml Mon Feb 24 21:41:38 2014
@@ -358,6 +358,40 @@
     <dependency org="com.yammer.metrics" name="metrics-core" rev="${metrics-core.version}"
        conf="test->default"/>
 
+    <!-- Dependency for Accumulo{Input,Output}Format -->
+    <dependency org="org.apache.accumulo" name="accumulo-core" rev="${accumulo15.version}" conf="compile->default">
+      <exclude org="com.google.guava" module="guava"/>
+      <exclude org="commons-codec" module="commons-codec"/>
+      <exclude org="commons-collections" module="commons-collections"/>
+      <exclude org="commons-configuration" module="commons-configuration"/>
+      <exclude org="commons-io" module="commons-io"/>
+      <exclude org="commons-lang" module="commons-lang"/>
+      <exclude org="commons-logging" module="commons-logging"/>
+      <exclude org="jline" module="jline"/>
+      <exclude org="log4j" module="log4j"/>
+      <exclude org="org.apache.hadoop" module="hadoop-client"/>
+      <exclude org="org.apache.zookeeper" module="zookeeper"/>
+      <exclude org="org.slf4j" module="slf4j-api"/>
+      <exclude org="org.slf4j" module="slf4j-log4j12"/>
+    </dependency>
+
+    <!-- Used for 'functional' Accumulo tests -->
+    <dependency org="org.apache.accumulo" name="accumulo-minicluster" rev="${accumulo15.version}" conf="compile->default">
+      <exclude org="com.google.guava" module="guava"/>
+      <exclude org="commons-codec" module="commons-codec"/>
+      <exclude org="commons-collections" module="commons-collections"/>
+      <exclude org="commons-configuration" module="commons-configuration"/>
+      <exclude org="commons-io" module="commons-io"/>
+      <exclude org="commons-lang" module="commons-lang"/>
+      <exclude org="commons-logging" module="commons-logging"/>
+      <exclude org="jline" module="jline"/>
+      <exclude org="log4j" module="log4j"/>
+      <exclude org="org.apache.hadoop" module="hadoop-client"/>
+      <exclude org="org.apache.zookeeper" module="zookeeper"/>
+      <exclude org="org.slf4j" module="slf4j-api"/>
+      <exclude org="org.slf4j" module="slf4j-log4j12"/>
+    </dependency>
+
     <!-- for piggybank -->
     <dependency org="hsqldb" name="hsqldb" rev="${hsqldb.version}"
       conf="test->default" />

Modified: pig/branches/tez/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy/libraries.properties?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/ivy/libraries.properties (original)
+++ pig/branches/tez/ivy/libraries.properties Mon Feb 24 21:41:38 2014
@@ -14,6 +14,7 @@
 #It drives ivy and the generation of a maven POM
 
 #These are the versions of our dependencies (in alphabetical order)
+accumulo15.version=1.5.0
 apacheant.version=1.7.1
 apacherat.version=0.8
 automaton.version=1.11-8
@@ -36,10 +37,9 @@ ivy.version=2.2.0
 jasper.version=6.1.14
 groovy.version=1.8.6
 guava.version=11.0
-guava-hadoop2.version=15.0
 jersey-core.version=1.8
-hadoop-core.version=1.0.0
-hadoop-test.version=1.0.0
+hadoop-core.version=1.0.4
+hadoop-test.version=1.0.4
 hadoop-common.version=2.2.0
 hadoop-hdfs.version=2.2.0
 hadoop-mapreduce.version=2.2.0
@@ -59,7 +59,7 @@ jdiff.version=1.0.9
 jettison.version=1.3.4
 jetty.version=6.1.26
 jetty-util.version=6.1.26
-jline.version=0.9.94
+jline.version=1.0
 joda-time.version=2.1
 jopt.version=4.1
 json-simple.version=1.1
@@ -77,7 +77,7 @@ slf4j-log4j12.version=1.6.1
 xerces.version=2.10.0
 xalan.version=2.7.1
 wagon-http.version=1.0-beta-2
-zookeeper.version=3.4.4
+zookeeper.version=3.4.5
 servlet.version=4.0.6
 servlet-api.version=2.5
 protobuf-java.version=2.5.0

Modified: pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Mon Feb 24 21:41:38 2014
@@ -97,7 +97,7 @@ public class HadoopShims {
     public static JobControl newJobControl(String groupName, int timeToSleep) {
       return new PigJobControl(groupName, timeToSleep);
     }
-    
+
     public static long getDefaultBlockSize(FileSystem fs, Path path) {
         return fs.getDefaultBlockSize();
     }
@@ -115,4 +115,8 @@ public class HadoopShims {
         // for Hadoop 0.20
         return report.getProgress() != successfulProgress;
     }
+
+    public static void unsetConf(Configuration conf, String key) {
+        // Not supported in Hadoop 0.20/1.x
+    }
 }

Modified: pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Mon Feb 24 21:41:38 2014
@@ -115,4 +115,8 @@ public class HadoopShims {
     public static boolean isJobFailed(TaskReport report) {
         return report.getCurrentStatus()==TIPStatus.FAILED;
     }
+    
+    public static void unsetConf(Configuration conf, String key) {
+        conf.unset(key);
+    }
 }

Modified: pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml Mon Feb 24 21:41:38 2014
@@ -5655,7 +5655,7 @@ B = FOREACH A GENERATE -x, y;
    <table>
        <tr>
             <td>
-               <p>ASSERT alias BY expression [message];</p>
+               <p>ASSERT alias BY expression [, message];</p>
             </td>
         </tr>
    </table>
@@ -5721,7 +5721,7 @@ DUMP A;
 
 <p>Now, you can assert that a0 column in your data is >0, fail if otherwise</p>
 <source>
-ASSERT A by a0 > 0 'a0 should be greater than 0';
+ASSERT A by a0 > 0, 'a0 should be greater than 0';
 </source>
 
 </section></section>