You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2014/02/24 22:41:41 UTC
svn commit: r1571454 [1/5] - in /pig/branches/tez: ./ conf/
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/
contrib/piggybank/java/src/main/java/...
Author: cheolsoo
Date: Mon Feb 24 21:41:38 2014
New Revision: 1571454
URL: http://svn.apache.org/r1571454
Log:
Merge latest trunk changes
Added:
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/Stuff.java
- copied unchanged from r1571421, pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/string/Stuff.java
pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestStuff.java
- copied unchanged from r1571421, pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/string/TestStuff.java
pig/branches/tez/src/org/apache/pig/OverwritableStoreFunc.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/OverwritableStoreFunc.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/
- copied from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AbstractAccumuloStorage.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AbstractAccumuloStorage.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloBinaryConverter.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloBinaryConverter.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorage.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorage.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorageOptions.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/AccumuloStorageOptions.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/Column.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/Column.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/FixedByteArrayOutputStream.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/FixedByteArrayOutputStream.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/accumulo/Utils.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/accumulo/Utils.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/
- copied from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchLauncher.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchLauncher.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
- copied, changed from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchOptimizer.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchPOStoreImpl.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchPOStoreImpl.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchProgressableReporter.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/fetch/FetchProgressableReporter.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/ReadScalarsTez.java
pig/branches/tez/src/org/apache/pig/newplan/logical/visitor/ResetProjectionAttachedRelationalOpVisitor.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/newplan/logical/visitor/ResetProjectionAttachedRelationalOpVisitor.java
pig/branches/tez/src/org/apache/pig/tools/pigstats/SimpleFetchPigStats.java
- copied unchanged from r1571421, pig/trunk/src/org/apache/pig/tools/pigstats/SimpleFetchPigStats.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/
- copied from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAbstractAccumuloStorage.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAbstractAccumuloStorage.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloBinaryConverter.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloBinaryConverter.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloColumns.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloColumns.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloPigCluster.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorage.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorage.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageConfiguration.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageConfiguration.java
pig/branches/tez/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageOptions.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/backend/hadoop/accumulo/TestAccumuloStorageOptions.java
pig/branches/tez/test/org/apache/pig/test/TestAutoLocalMode.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestAutoLocalMode.java
pig/branches/tez/test/org/apache/pig/test/TestFetch.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestFetch.java
pig/branches/tez/test/org/apache/pig/test/TestPigContextClassCache.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestPigContextClassCache.java
pig/branches/tez/test/org/apache/pig/test/TestPredeployedJar.java
- copied unchanged from r1571421, pig/trunk/test/org/apache/pig/test/TestPredeployedJar.java
Removed:
pig/branches/tez/src/org/apache/pig/impl/builtin/ReadScalarsTez.java
Modified:
pig/branches/tez/ (props changed)
pig/branches/tez/CHANGES.txt
pig/branches/tez/build.xml
pig/branches/tez/conf/pig.properties
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java
pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java
pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java
pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java
pig/branches/tez/ivy.xml
pig/branches/tez/ivy/libraries.properties
pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml
pig/branches/tez/src/docs/src/documentation/content/xdocs/func.xml
pig/branches/tez/src/docs/src/documentation/content/xdocs/perf.xml
pig/branches/tez/src/org/apache/pig/ExecTypeProvider.java
pig/branches/tez/src/org/apache/pig/LoadFunc.java
pig/branches/tez/src/org/apache/pig/Main.java
pig/branches/tez/src/org/apache/pig/PigConfiguration.java
pig/branches/tez/src/org/apache/pig/PigServer.java
pig/branches/tez/src/org/apache/pig/PigWarning.java
pig/branches/tez/src/org/apache/pig/StoreFunc.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/datastorage/ConfigurationUtil.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/datastorage/HPath.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceLauncher.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MapReduceOper.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigHadoopLogger.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigInputFormat.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputFormat.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POPartialAgg.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POStream.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/PigProcessor.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezCompiler.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezDagBuilder.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/tez/TezLauncher.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java
pig/branches/tez/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
pig/branches/tez/src/org/apache/pig/builtin/BinStorage.java
pig/branches/tez/src/org/apache/pig/builtin/JsonStorage.java
pig/branches/tez/src/org/apache/pig/builtin/PigStorage.java
pig/branches/tez/src/org/apache/pig/data/SelfSpillBag.java
pig/branches/tez/src/org/apache/pig/impl/PigContext.java
pig/branches/tez/src/org/apache/pig/impl/PigImplConstants.java
pig/branches/tez/src/org/apache/pig/impl/io/FileLocalizer.java
pig/branches/tez/src/org/apache/pig/impl/io/SequenceFileInterStorage.java
pig/branches/tez/src/org/apache/pig/impl/plan/CompilationMessageCollector.java
pig/branches/tez/src/org/apache/pig/impl/util/JarManager.java
pig/branches/tez/src/org/apache/pig/impl/util/ObjectSerializer.java
pig/branches/tez/src/org/apache/pig/impl/util/PropertiesUtil.java
pig/branches/tez/src/org/apache/pig/impl/util/Utils.java
pig/branches/tez/src/org/apache/pig/newplan/FilterExtractor.java
pig/branches/tez/src/org/apache/pig/newplan/logical/expression/ExpToPhyTranslationVisitor.java
pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOForEach.java
pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOGenerate.java
pig/branches/tez/src/org/apache/pig/newplan/logical/relational/LOSort.java
pig/branches/tez/src/org/apache/pig/newplan/logical/rules/InputOutputFileValidator.java
pig/branches/tez/src/org/apache/pig/newplan/logical/visitor/CastLineageSetter.java
pig/branches/tez/src/org/apache/pig/parser/AliasMasker.g
pig/branches/tez/src/org/apache/pig/parser/AstPrinter.g
pig/branches/tez/src/org/apache/pig/parser/AstValidator.g
pig/branches/tez/src/org/apache/pig/parser/LogicalPlanBuilder.java
pig/branches/tez/src/org/apache/pig/parser/LogicalPlanGenerator.g
pig/branches/tez/src/org/apache/pig/parser/QueryParser.g
pig/branches/tez/src/org/apache/pig/tools/pigscript/parser/PigScriptParser.jj
pig/branches/tez/src/org/apache/pig/tools/pigstats/PigStatsUtil.java
pig/branches/tez/src/pig-default.properties (props changed)
pig/branches/tez/test/e2e/pig/tests/cmdline.conf
pig/branches/tez/test/e2e/pig/tests/negative.conf
pig/branches/tez/test/e2e/pig/tests/nightly.conf
pig/branches/tez/test/org/apache/pig/test/TestAssert.java
pig/branches/tez/test/org/apache/pig/test/TestEvalPipeline2.java
pig/branches/tez/test/org/apache/pig/test/TestJsonLoaderStorage.java
pig/branches/tez/test/org/apache/pig/test/TestMultiQueryLocal.java
pig/branches/tez/test/org/apache/pig/test/TestNewPartitionFilterPushDown.java
pig/branches/tez/test/org/apache/pig/test/TestNewPlanFilterRule.java
pig/branches/tez/test/org/apache/pig/test/TestPOPartialAgg.java
pig/branches/tez/test/org/apache/pig/test/TestPigRunner.java
pig/branches/tez/test/org/apache/pig/test/TestPigServer.java
pig/branches/tez/test/org/apache/pig/test/TestPigStorage.java
pig/branches/tez/test/org/apache/pig/test/TestPruneColumn.java
pig/branches/tez/test/org/apache/pig/test/TestRank3.java
pig/branches/tez/test/org/apache/pig/test/TestRegisteredJarVisibility.java
pig/branches/tez/test/org/apache/pig/test/TestStore.java
pig/branches/tez/test/org/apache/pig/test/data/bzipdir1.bz2/bzipdir2.bz2/recordLossblockHeaderEndsAt136500.txt.bz2 (props changed)
pig/branches/tez/test/perf/pigmix/src/java/org/apache/pig/test/pigmix/udf/PigPerformanceLoader.java
Propchange: pig/branches/tez/
------------------------------------------------------------------------------
Merged /pig/trunk:r1554090-1571421
Modified: pig/branches/tez/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/tez/CHANGES.txt?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/CHANGES.txt (original)
+++ pig/branches/tez/CHANGES.txt Mon Feb 24 21:41:38 2014
@@ -26,8 +26,35 @@ PIG-3485: Remove CastUtils.bytesToMap(by
PIG-3419: Pluggable Execution Engine (achalsoni81 via cheolsoo)
+PIG-2207: Support custom counters for aggregating warnings from different udfs (aniket486)
+
IMPROVEMENTS
+PIG-3675: Documentation for AccumuloStorage (elserj via daijy)
+
+PIG-3648: Make the sample size for RandomSampleLoader configurable (cheolsoo)
+
+PIG-259: allow store to overwrite existing directroy (nezihyigitbasi via daijy)
+
+PIG-2672: Optimize the use of DistributedCache (aniket486)
+
+PIG-3238: Pig current releases lack a UDF Stuff(). This UDF deletes a specified length of characters
+ and inserts another set of characters at a specified starting point (nezihyigitbasi via daijy)
+
+PIG-3299: Provide support for LazyOutputFormat to avoid creating empty files (lbendig via daijy)
+
+PIG-3642: Direct HDFS access for small jobs (fetch) (lbendig via cheolsoo)
+
+PIG-3730: Performance issue in SelfSpillBag (rajesh.balamohan via rohini)
+
+PIG-3654: Add class cache to PigContext (tmwoodruff via daijy)
+
+PIG-3463: Pig should use hadoop local mode for small jobs (aniket486)
+
+PIG-3573: Provide StoreFunc and LoadFunc for Accumulo (elserj via daijy)
+
+PIG-3653: Add support for pre-deployed jars (tmwoodruff via daijy)
+
PIG-3645: Move FileLocalizer.setR() calls to unit tests (cheolsoo)
PIG-3637: PigCombiner creating log spam (rohini)
@@ -54,8 +81,6 @@ PIG-3295: Casting from bytearray failing
PIG-3444: CONCAT with 2+ input parameters fail (lbendig via daijy)
-PIG-3529: Upgrade HBase dependency from 0.95-SNAPSHOT to 0.96 (jarcec via daijy)
-
PIG-3117: A debug mode in which pig does not delete temporary files (ihadanny via cheolsoo)
PIG-3484: Make the size of pig.script property configurable (cheolsoo)
@@ -64,6 +89,55 @@ OPTIMIZATIONS
BUG FIXES
+PIG-3776: Conflicting versions of jline is present in trunk (cheolsoo)
+
+PIG-3674: Fix TestAccumuloPigCluster on Hadoop 2 (elserj via daijy)
+
+PIG-3740: Document direct fetch optimization (lbendig via cheolsoo)
+
+PIG-3746: NPE is thrown if Pig fails before PigStats is intialized (cheolsoo)
+
+PIG-3747: Update skewed join documentation (cheolsoo)
+
+PIG-3755: auto local mode selection does not check lower bound for size (aniket486)
+
+PIG-3753: LOGenerate generates null schema (daijy)
+
+PIG-3447: Compiler warning message dropped for CastLineageSetter and others with no enum kind (knoguchi via cheolsoo)
+
+PIG-3627: Json storage : Doesn't work in cases , where other Store Functions (like PigStorage / AvroStorage)
+ do work (ssvinarchukhorton via daijy)
+
+PIG-3606: Pig script throws error when searching for hcatalog jars in latest hive (deepesh via daijy)
+
+PIG-3623: HBaseStorage: setting loadKey and noWAL to false doesn't have any affect (nezihyigitbasi via rohini)
+
+PIG-3744: SequenceFileLoader does not support BytesWritable (rohini)
+
+PIG-3726: Ranking empty records leads to NullPointerException (jarcec via daijy)
+
+PIG-3652: Pigmix parser (PigPerformanceLoader) deletes chars during parsing (keren3000 via daijy)
+
+PIG-3722: Udf deserialization for registered classes fails in local_mode (aniket486)
+
+PIG-3641: Split "otherwise" producing incorrect output when combined with ColumnPruning (knoguchi)
+
+PIG-3682: mvn-inst target does not install pig-h2.jar into local .m2 (raluri via aniket486)
+
+PIG-3661: Piggybank AvroStorage fails if used in more than one load or store statement (rohini)
+
+PIG-3511: Security: Pig temporary directories might have world readable permissions (rohini)
+
+PIG-3664: Piggy Bank XPath UDF can't be called (nezihyigitbasi via daijy)
+
+PIG-3662: Static loadcaster in BinStorage can cause exception (lbendig via rohini)
+
+PIG-3617: problem with temp file deletion in MAPREDUCE operator (nezihyigitbasi via cheolsoo)
+
+PIG-3649: POPartialAgg incorrectly calculates size reduction when multiple values aggregated (tmwoodruff via daijy)
+
+PIG-3650: Fix for PIG-3100 breaks column pruning (tmwoodruff via daijy)
+
PIG-3643: Nested Foreach with UDF and bincond is broken (cheolsoo)
PIG-3616: TestBuiltIn.testURIwithCurlyBrace() silently fails (lbendig via cheolsoo)
@@ -141,6 +215,8 @@ Release 0.12.1 (unreleased changes)
IMPROVEMENTS
+PIG-3529: Upgrade HBase dependency from 0.95-SNAPSHOT to 0.96 (jarcec via daijy)
+
PIG-3552: UriUtil used by reducer estimator should support viewfs (amatsukawa via aniket486)
PIG-3549: Print hadoop jobids for failed, killed job (aniket486)
@@ -151,6 +227,18 @@ PIG-3480: TFile-based tmpfile compressio
BUG FIXES
+PIG-3774: Piggybank Over UDF get wrong result (daijy)
+
+PIG-3657: New partition filter extractor fails with NPE (cheolsoo)
+
+PIG-3347: Store invocation brings side effect (daijy)
+
+PIG-3670: Fix assert in Pig script (daijy)
+
+PIG-3741: Utils.setTmpFileCompressionOnConf can cause side effect for SequenceFileInterStorage (aniket486)
+
+PIG-3677: ConfigurationUtil.getLocalFSProperties can return an inconsistent property set (rohini)
+
PIG-3621: Python Avro library can't read Avros made with builtin AvroStorage (rusell.jurney via cheolsoo)
PIG-3592: Should not try to create success file for non-fs schemes like hbase (rohini)
Modified: pig/branches/tez/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/build.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/build.xml (original)
+++ pig/branches/tez/build.xml Mon Feb 24 21:41:38 2014
@@ -182,13 +182,6 @@
<equals arg1="${hadoopversion}" arg2="23"/>
</condition>
- <!-- Tez needs guava 15.0 whereas Hadoop 1.x depends on guava 11 -->
- <script language="javascript">
- if (project.getProperty('hadoopversion').equals('23')) {
- project.setProperty('guava.version', project.getProperty('guava-hadoop2.version'));
- }
- </script>
-
<!--
HBase master version
Denotes how the HBase dependencies are layout. Value "94" denotes older
@@ -1157,6 +1150,7 @@
<pom refid="pig"/>
<attach file="${output.jarfile.sources}" classifier="sources" />
<attach file="${output.jarfile.javadoc}" classifier="javadoc" />
+ <attach file="${output.jarfile.core-h2}" classifier="h2" />
</artifact:install>
<artifact:pom file="${pigunit.pom}" id="pigunit"/>
<artifact:install file="${pigunit.jarfile}">
Modified: pig/branches/tez/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/branches/tez/conf/pig.properties?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/conf/pig.properties (original)
+++ pig/branches/tez/conf/pig.properties Mon Feb 24 21:41:38 2014
@@ -62,6 +62,7 @@
#pig.skewedjoin.reduce.memusage=0.3
#pig.exec.nocombiner=false
#opt.multiquery=true
+#opt.fetch=true
#Following parameters are for configuring intermediate storage format
#Supported storage types are seqfile and tfile
@@ -233,3 +234,28 @@ pig.location.check.strict=false
# purpose. By default, it is set to true.
# To inspect, use a = load '<path_to_tmp_file>' using org.apache.pig.impl.io.TFileStorage();
# pig.delete.temp.files=true
+
+# Set this option to true to convert jobs with input data size smaller than
+# pig.auto.local.input.maxbytes bytes and number of reducers <=1 to run in local mode
+# By default, this is set to false.
+# pig.auto.local.enabled=true
+
+# Set value in long as a threshold number of bytes to convert
+# jobs with smaller input data size to run in local mode
+# pig.auto.local.input.maxbytes=100000000
+
+# Set this option to overwrite the sample size of RandomeSampleLoader for
+# order-by. The default value is 100 rows per task.
+# pig.random.sampler.sample.size=100
+
+# When enabled, jobs won't create empty part files if no output is written. In this case
+# PigOutputFormat will be wrapped with org.apache.hadoop.mapreduce.lib.output.LazyOutputFormat.
+# pig.output.lazy=true
+
+# Set this option to turn on additional jar caching for the user
+# pig.user.cache.enabled=true
+
+# This option defines location where additional jars are cached for the user.
+# Additional jar will be cached under PIG_USER_CACHE_LOCATION/${user.name}/.pigcache
+# and will be re-used across the jobs run by the user if the jar has not changed.
+# pig.user.cache.location=/tmp
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Mon Feb 24 21:41:38 2014
@@ -349,7 +349,8 @@ public class Over extends EvalFunc<DataB
} else if ("percent_rank".equalsIgnoreCase(agg)) {
func = new PercentRank(udfArgs);
} else if ("cume_dist".equalsIgnoreCase(agg)) {
- func = new CumeDist(udfArgs);
+ //func = new CumeDist(udfArgs);
+ func = new CumeDist();
} else if ("debug".equalsIgnoreCase(agg)) {
func = new Debug();
} else {
@@ -755,6 +756,7 @@ public class Over extends EvalFunc<DataB
}
}
+ /*
private static class CumeDist extends BaseRank<Double> {
CumeDist(Object[] args) throws IOException {
super(args);
@@ -768,6 +770,20 @@ public class Over extends EvalFunc<DataB
return ((double)lastRankUsed) / (double)iter.tuples.size();
}
}
+ */
+
+ private static class CumeDist extends ResetableEvalFunc<Double> {
+
+ @Override
+ public Double exec(Tuple input) throws IOException {
+ DataBag inbag = (DataBag)input.get(0);
+ OverBag.OverBagIterator iter =
+ (OverBag.OverBagIterator)inbag.iterator();
+
+ return ((double)++currentRow)/(double)iter.tuples.size();
+ }
+ }
+
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Mon Feb 24 21:41:38 2014
@@ -29,6 +29,7 @@ import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
@@ -115,15 +116,32 @@ public class XPath extends EvalFunc<Stri
}
}
- @Override
- public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
-
- final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
-
- funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
-
- return funcList;
- }
+ @Override
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+
+ final List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+
+ /*either two chararray arguments*/
+ List<FieldSchema> fields = new ArrayList<FieldSchema>();
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+
+ Schema twoArgInSchema = new Schema(fields);
+
+ funcList.add(new FuncSpec(this.getClass().getName(), twoArgInSchema));
+
+ /*or two chararray and a boolean argument*/
+ fields = new ArrayList<FieldSchema>();
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ fields.add(new Schema.FieldSchema(null, DataType.BOOLEAN));
+
+ Schema threeArgInSchema = new Schema(fields);
+
+ funcList.add(new FuncSpec(this.getClass().getName(), threeArgInSchema));
+
+ return funcList;
+ }
}
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Mon Feb 24 21:41:38 2014
@@ -367,7 +367,8 @@ public class CSVExcelStorage extends Pig
// further records to it. If they are the same (this would
// happen if multiple small files each with a header were combined
// into one split), we know to skip the duplicate header record as well.
- if (loadingFirstRecord && headerTreatment == Headers.SKIP_INPUT_HEADER && splitIndex == 0) {
+ if (loadingFirstRecord && headerTreatment == Headers.SKIP_INPUT_HEADER &&
+ (splitIndex == 0 || splitIndex == -1)) {
try {
if (!in.nextKeyValue())
return null;
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/FixedWidthLoader.java Mon Feb 24 21:41:38 2014
@@ -295,7 +295,7 @@ public class FixedWidthLoader extends Lo
@Override
public Tuple getNext() throws IOException {
- if (loadingFirstRecord && skipHeader && splitIndex == 0) {
+ if (loadingFirstRecord && skipHeader && (splitIndex == 0 || splitIndex == -1)) {
try {
if (!reader.nextKeyValue())
return null;
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/SequenceFileLoader.java Mon Feb 24 21:41:38 2014
@@ -21,12 +21,11 @@ import java.io.IOException;
import java.lang.reflect.Type;
import java.util.ArrayList;
-import org.joda.time.DateTime;
-
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
@@ -56,42 +55,42 @@ import org.apache.pig.data.TupleFactory;
**/
public class SequenceFileLoader extends FileInputLoadFunc {
-
+
private SequenceFileRecordReader<Writable, Writable> reader;
-
+
private Writable key;
private Writable value;
private ArrayList<Object> mProtoTuple = null;
-
+
protected static final Log LOG = LogFactory.getLog(SequenceFileLoader.class);
protected TupleFactory mTupleFactory = TupleFactory.getInstance();
protected SerializationFactory serializationFactory;
protected byte keyType = DataType.UNKNOWN;
protected byte valType = DataType.UNKNOWN;
-
+
public SequenceFileLoader() {
mProtoTuple = new ArrayList<Object>(2);
}
-
+
protected void setKeyType(Class<?> keyClass) throws BackendException {
this.keyType |= inferPigDataType(keyClass);
- if (keyType == DataType.ERROR) {
+ if (keyType == DataType.ERROR) {
LOG.warn("Unable to translate key "+key.getClass()+" to a Pig datatype");
throw new BackendException("Unable to translate "+key.getClass()+" to a Pig datatype");
- }
+ }
}
-
+
protected void setValueType(Class<?> valueClass) throws BackendException {
this.valType |= inferPigDataType(valueClass);
- if (keyType == DataType.ERROR) {
+ if (keyType == DataType.ERROR) {
LOG.warn("Unable to translate key "+key.getClass()+" to a Pig datatype");
throw new BackendException("Unable to translate "+key.getClass()+" to a Pig datatype");
- }
+ }
}
-
+
protected byte inferPigDataType(Type t) {
- if (t == DataByteArray.class) return DataType.BYTEARRAY;
+ if (t == BytesWritable.class) return DataType.BYTEARRAY;
else if (t == Text.class) return DataType.CHARARRAY;
else if (t == IntWritable.class) return DataType.INTEGER;
else if (t == LongWritable.class) return DataType.LONG;
@@ -103,11 +102,14 @@ public class SequenceFileLoader extends
// not doing maps or other complex types for now
else return DataType.ERROR;
}
-
+
protected Object translateWritableToPigDataType(Writable w, byte dataType) {
switch(dataType) {
case DataType.CHARARRAY: return ((Text) w).toString();
- case DataType.BYTEARRAY: return((DataByteArray) w).get();
+ case DataType.BYTEARRAY:
+ BytesWritable bw = (BytesWritable) w;
+ // Make a copy
+ return new DataByteArray(bw.getBytes(), 0, bw.getLength());
case DataType.BOOLEAN: return ((BooleanWritable) w).get();
case DataType.INTEGER: return ((IntWritable) w).get();
case DataType.LONG: return ((LongWritable) w).get();
@@ -116,10 +118,10 @@ public class SequenceFileLoader extends
case DataType.BYTE: return ((ByteWritable) w).get();
case DataType.DATETIME: return ((DateTimeWritable) w).get();
}
-
+
return null;
}
-
+
@Override
public Tuple getNext() throws IOException {
boolean next = false;
@@ -128,19 +130,19 @@ public class SequenceFileLoader extends
} catch (InterruptedException e) {
throw new IOException(e);
}
-
+
if (!next) return null;
-
+
key = reader.getCurrentKey();
value = reader.getCurrentValue();
-
+
if (keyType == DataType.UNKNOWN && key != null) {
setKeyType(key.getClass());
}
if (valType == DataType.UNKNOWN && value != null) {
setValueType(value.getClass());
}
-
+
mProtoTuple.add(translateWritableToPigDataType(key, keyType));
mProtoTuple.add(translateWritableToPigDataType(value, valType));
Tuple t = mTupleFactory.newTuple(mProtoTuple);
@@ -163,6 +165,6 @@ public class SequenceFileLoader extends
@Override
public void setLocation(String location, Job job) throws IOException {
- FileInputFormat.setInputPaths(job, location);
+ FileInputFormat.setInputPaths(job, location);
}
}
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorage.java Mon Feb 24 21:41:38 2014
@@ -21,16 +21,14 @@ import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;
-import java.util.HashSet;
-import java.net.URI;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
-import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
@@ -105,6 +103,7 @@ public class AvroStorage extends FileInp
private boolean checkSchema = true; /*whether check schema of input directories*/
private boolean ignoreBadFiles = false; /* whether ignore corrupted files during load */
+ private String contextSignature = null;
/**
* Empty constructor. Output schema is derived from pig schema.
@@ -151,11 +150,15 @@ public class AvroStorage extends FileInp
if (inputAvroSchema != null) {
return;
}
- Set<Path> paths = new HashSet<Path>();
+
Configuration conf = job.getConfiguration();
- if (AvroStorageUtils.getAllSubDirs(new Path(location), conf, paths)) {
+ Set<Path> paths = AvroStorageUtils.getPaths(location, conf, true);
+ if (!paths.isEmpty()) {
+ // Set top level directories in input format. Adding all files will
+ // bloat configuration size
+ FileInputFormat.setInputPaths(job, paths.toArray(new Path[paths.size()]));
+ // Scan all directories including sub directories for schema
setInputAvroSchema(paths, conf);
- FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
} else {
throw new IOException("Input path \'" + location + "\' is not found");
}
@@ -193,9 +196,17 @@ public class AvroStorage extends FileInp
if (paths == null || paths.isEmpty()) {
return null;
}
- Path path = paths.iterator().next();
- FileSystem fs = FileSystem.get(path.toUri(), conf);
- return getAvroSchema(path, fs);
+ Iterator<Path> iterator = paths.iterator();
+ Schema schema = null;
+ while (iterator.hasNext()) {
+ Path path = iterator.next();
+ FileSystem fs = FileSystem.get(path.toUri(), conf);
+ schema = getAvroSchema(path, fs);
+ if (schema != null) {
+ break;
+ }
+ }
+ return schema;
}
/**
@@ -237,7 +248,7 @@ public class AvroStorage extends FileInp
System.out.println("Do not check schema; use schema of " + s.getPath());
return schema;
}
- } else if (!schema.equals(newSchema)) {
+ } else if (newSchema != null && !schema.equals(newSchema)) {
throw new IOException( "Input path is " + path + ". Sub-direcotry " + s.getPath()
+ " contains different schema " + newSchema + " than " + schema);
}
@@ -254,19 +265,23 @@ public class AvroStorage extends FileInp
* Merge multiple input avro schemas into one. Note that we can't merge arbitrary schemas.
* Please see AvroStorageUtils.mergeSchema() for what's allowed and what's not allowed.
*
- * @param paths set of input files
+ * @param basePaths set of input dir or files
* @param conf configuration
* @return avro schema
* @throws IOException
*/
- protected Schema getMergedSchema(Set<Path> paths, Configuration conf) throws IOException {
+ protected Schema getMergedSchema(Set<Path> basePaths, Configuration conf) throws IOException {
Schema result = null;
Map<Path, Schema> mergedFiles = new HashMap<Path, Schema>();
+
+ Set<Path> paths = AvroStorageUtils.getAllFilesRecursively(basePaths, conf);
for (Path path : paths) {
FileSystem fs = FileSystem.get(path.toUri(), conf);
Schema schema = getSchema(path, fs);
- result = AvroStorageUtils.mergeSchema(result, schema);
- mergedFiles.put(path, schema);
+ if (schema != null) {
+ result = AvroStorageUtils.mergeSchema(result, schema);
+ mergedFiles.put(path, schema);
+ }
}
// schemaToMergedSchemaMap is only needed when merging multiple records.
if (mergedFiles.size() > 1 && result.getType().equals(Schema.Type.RECORD)) {
@@ -302,6 +317,9 @@ public class AvroStorage extends FileInp
protected Schema getSchemaFromFile(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
+ if (lastFile == null) {
+ return null;
+ }
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
@@ -360,9 +378,14 @@ public class AvroStorage extends FileInp
/* get avro schema */
AvroStorageLog.funcCall("getSchema");
if (inputAvroSchema == null) {
- Set<Path> paths = new HashSet<Path>();
Configuration conf = job.getConfiguration();
- if (AvroStorageUtils.getAllSubDirs(new Path(location), conf, paths)) {
+ // If within a script, you store to one location and read from same
+ // location using AvroStorage getPaths will be empty. Since
+ // getSchema is called during script parsing we don't want to fail
+ // here if path not found
+
+ Set<Path> paths = AvroStorageUtils.getPaths(location, conf, false);
+ if (!paths.isEmpty()) {
setInputAvroSchema(paths, conf);
}
}
@@ -617,8 +640,7 @@ public class AvroStorage extends FileInp
@Override
public void checkSchema(ResourceSchema s) throws IOException {
AvroStorageLog.funcCall("Check schema");
- UDFContext context = UDFContext.getUDFContext();
- Properties property = context.getUDFProperties(ResourceSchema.class);
+ Properties property = getUDFProperties();
String prevSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
AvroStorageLog.details("Previously defined schemas=" + prevSchemaStr);
@@ -651,6 +673,14 @@ public class AvroStorage extends FileInp
AvroStorageLog.details("New schemas=" + newSchemaStr);
}
+ /**
+ * Returns UDFProperties based on <code>contextSignature</code>.
+ */
+ private Properties getUDFProperties() {
+ return UDFContext.getUDFContext()
+ .getUDFProperties(this.getClass(), new String[] {contextSignature});
+ }
+
private String getSchemaKey() {
return Integer.toString(storeFuncIndex);
}
@@ -678,8 +708,7 @@ public class AvroStorage extends FileInp
public OutputFormat getOutputFormat() throws IOException {
AvroStorageLog.funcCall("getOutputFormat");
- UDFContext context = UDFContext.getUDFContext();
- Properties property = context.getUDFProperties(ResourceSchema.class);
+ Properties property = getUDFProperties();
String allSchemaStr = property.getProperty(AVRO_OUTPUT_SCHEMA_PROPERTY);
Map<String, String> map = (allSchemaStr != null) ? parseSchemaMap(allSchemaStr) : null;
@@ -701,7 +730,7 @@ public class AvroStorage extends FileInp
@Override
public void setStoreFuncUDFContextSignature(String signature) {
- // Nothing to do
+ this.contextSignature = signature;
}
@Override
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/AvroStorageUtils.java Mon Feb 24 21:41:38 2014
@@ -17,12 +17,10 @@
package org.apache.pig.piggybank.storage.avro;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
@@ -30,7 +28,6 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
-import java.net.URI;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.generic.GenericDatumReader;
@@ -40,14 +37,10 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.pig.LoadFunc;
import org.apache.pig.ResourceSchema;
import org.apache.pig.ResourceSchema.ResourceFieldSchema;
import org.apache.pig.data.DataType;
-import org.apache.pig.piggybank.storage.avro.AvroStorageLog;
-
import org.codehaus.jackson.JsonNode;
/**
* This is utility class for this package
@@ -100,51 +93,59 @@ public class AvroStorageUtils {
}
/**
- * get input paths to job config
- */
- public static boolean addInputPaths(String pathString, Job job) throws IOException {
- Configuration conf = job.getConfiguration();
- FileSystem fs = FileSystem.get(conf);
- HashSet<Path> paths = new HashSet<Path>();
- if (getAllSubDirs(new Path(pathString), conf, paths)) {
- paths.addAll(Arrays.asList(FileInputFormat.getInputPaths(job)));
- FileInputFormat.setInputPaths(job, paths.toArray(new Path[0]));
- return true;
- }
- return false;
- }
-
- /**
- * Adds all non-hidden directories and subdirectories to set param
- * it supports comma-separated input paths and glob style path
+ * Gets the list of paths from the pathString specified which may contain
+ * comma-separated paths and glob style path
*
* @throws IOException
*/
- public static boolean getAllSubDirs(Path path, Configuration conf,
- Set<Path> paths) throws IOException {
- String[] pathStrs = LoadFunc.getPathStrings(path.toString());
+ public static Set<Path> getPaths(String pathString, Configuration conf, boolean failIfNotFound)
+ throws IOException {
+ Set<Path> paths = new HashSet<Path>();
+ String[] pathStrs = LoadFunc.getPathStrings(pathString);
for (String pathStr : pathStrs) {
FileSystem fs = FileSystem.get(new Path(pathStr).toUri(), conf);
FileStatus[] matchedFiles = fs.globStatus(new Path(pathStr), PATH_FILTER);
if (matchedFiles == null || matchedFiles.length == 0) {
- return false;
+ if (failIfNotFound) {
+ throw new IOException("Input Pattern " + pathStr + " matches 0 files");
+ } else {
+ continue;
+ }
}
for (FileStatus file : matchedFiles) {
- getAllSubDirsInternal(file, conf, paths, fs);
+ paths.add(file.getPath());
}
}
- return true;
+ return paths;
+ }
+
+ /**
+ * Returns all non-hidden files recursively inside the base paths given
+ *
+ * @throws IOException
+ */
+ public static Set<Path> getAllFilesRecursively(Set<Path> basePaths, Configuration conf) throws IOException {
+ Set<Path> paths = new HashSet<Path>();
+ for (Path path : basePaths) {
+ FileSystem fs = FileSystem.get(path.toUri(), conf);
+ FileStatus f = fs.getFileStatus(path);
+ if (f.isDir()) {
+ getAllFilesInternal(f, conf, paths, fs);
+ } else {
+ paths.add(path);
+ }
+ }
+ return paths;
}
- private static void getAllSubDirsInternal(FileStatus file, Configuration conf,
+ private static void getAllFilesInternal(FileStatus file, Configuration conf,
Set<Path> paths, FileSystem fs) throws IOException {
- if (file.isDir()) {
- for (FileStatus sub : fs.listStatus(file.getPath())) {
- getAllSubDirsInternal(sub, conf, paths, fs);
+ for (FileStatus f : fs.listStatus(file.getPath(), PATH_FILTER)) {
+ if (f.isDir()) {
+ getAllFilesInternal(f, conf, paths, fs);
+ } else {
+ paths.add(f.getPath());
}
- } else {
- AvroStorageLog.details("Add input file:" + file);
- paths.add(file.getPath());
}
}
@@ -160,22 +161,24 @@ public class AvroStorageUtils {
/** get last file of a hdfs path if it is a directory;
* or return the file itself if path is a file
*/
- public static Path getLast(String path, FileSystem fs) throws IOException {
- return getLast(new Path(path), fs);
- }
-
- /** get last file of a hdfs path if it is a directory;
- * or return the file itself if path is a file
- */
public static Path getLast(Path path, FileSystem fs) throws IOException {
+ FileStatus status = fs.getFileStatus(path);
+ if (!status.isDir()) {
+ return path;
+ }
FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);
if (statuses.length == 0) {
- return path;
+ return null;
} else {
Arrays.sort(statuses);
- return statuses[statuses.length - 1].getPath();
+ for (int i = statuses.length - 1; i >= 0; i--) {
+ if (!statuses[i].isDir()) {
+ return statuses[i].getPath();
+ }
+ }
+ return null;
}
}
@@ -705,6 +708,9 @@ public class AvroStorageUtils {
public static Schema getSchema(Path path, FileSystem fs) throws IOException {
/* get path of the last file */
Path lastFile = AvroStorageUtils.getLast(path, fs);
+ if (lastFile == null) {
+ return null;
+ }
/* read in file and obtain schema */
GenericDatumReader<Object> avroReader = new GenericDatumReader<Object>();
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroInputFormat.java Mon Feb 24 21:41:38 2014
@@ -5,9 +5,9 @@
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -18,9 +18,9 @@
package org.apache.pig.piggybank.storage.avro;
import java.io.IOException;
-import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+
import org.apache.avro.Schema;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
@@ -32,6 +32,7 @@ import org.apache.hadoop.mapreduce.Recor
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil;
/**
* The InputFormat for avro data.
@@ -86,4 +87,14 @@ public class PigAvroInputFormat extends
ignoreBadFiles, schemaToMergedSchemaMap, useMultipleSchemas);
}
+ /*
+ * This is to support multi-level/recursive directory listing until
+ * MAPREDUCE-1577 is fixed.
+ */
+ @Override
+ protected List<FileStatus> listStatus(JobContext job) throws IOException {
+ return MapRedUtil.getAllFileRecursively(super.listStatus(job),
+ job.getConfiguration());
+ }
+
}
Modified: pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/avro/PigAvroRecordReader.java Mon Feb 24 21:41:38 2014
@@ -5,9 +5,9 @@
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
@@ -127,6 +127,15 @@ public class PigAvroRecordReader extends
JsonNode defValue = subFields.get(i).defaultValue();
if (defValue != null) {
Schema.Type type = subFields.get(i).schema().getType();
+ if (type.equals(Schema.Type.UNION)) {
+ List<Schema> schemas = subFields.get(i).schema().getTypes();
+ for (Schema schema : schemas) {
+ if (!schema.getType().equals(Schema.Type.NULL)) {
+ type = schema.getType();
+ break;
+ }
+ }
+ }
switch (type) {
case BOOLEAN:
mProtoTuple.add(i, defValue.getBooleanValue());
Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Mon Feb 24 21:41:38 2014
@@ -1610,6 +1610,13 @@ public class TestOver {
t.set(4, 0);
DataBag outbag = func.exec(t);
assertEquals(7, outbag.size());
+ int count = 1;
+ for (Tuple to : outbag) {
+ assertEquals(1, to.size());
+ assertEquals(count/7.0, to.get(0));
+ count++;
+ }
+ /*
Iterator<Tuple> iter = outbag.iterator();
t = iter.next();
assertEquals(0.14285714285714285, t.get(0));
@@ -1625,5 +1632,6 @@ public class TestOver {
assertEquals(0.5714285714285714, t.get(0));
t = iter.next();
assertEquals(1.0, t.get(0));
+ */
}
}
Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestSequenceFileLoader.java Mon Feb 24 21:41:38 2014
@@ -18,21 +18,27 @@
package org.apache.pig.piggybank.test.storage;
import static org.apache.pig.ExecType.LOCAL;
+import static org.apache.pig.builtin.mock.Storage.resetData;
+import static org.apache.pig.builtin.mock.Storage.tuple;
import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import junit.framework.TestCase;
+
import org.junit.Test;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.pig.PigServer;
+import org.apache.pig.builtin.mock.Storage.Data;
+import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
//import org.apache.pig.test.PigExecTestCase;
import org.apache.pig.test.Util;
@@ -43,15 +49,15 @@ import org.apache.pig.test.Util;
"one, two, buckle my shoe",
"three, four, shut the door",
"five, six, something else" };
-
+
private static final String[][] EXPECTED = {
{"0", "one, two, buckle my shoe"},
{"1", "three, four, shut the door"},
{"2", "five, six, something else"}
};
-
+
private String tmpFileName;
-
+
private PigServer pigServer;
@Override
public void setUp() throws Exception {
@@ -62,12 +68,12 @@ import org.apache.pig.test.Util;
Path path = new Path("file:///"+tmpFileName);
JobConf conf = new JobConf();
FileSystem fs = FileSystem.get(path.toUri(), conf);
-
+
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer writer = null;
try {
- writer = SequenceFile.createWriter(fs, conf, path,
+ writer = SequenceFile.createWriter(fs, conf, path,
key.getClass(), value.getClass());
for (int i=0; i < DATA.length; i++) {
key.set(i);
@@ -78,10 +84,10 @@ import org.apache.pig.test.Util;
IOUtils.closeStream(writer);
}
}
-
+
@Test
public void testReadsNocast() throws IOException {
- pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
+ pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
"' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key, val);");
Iterator<?> it = pigServer.openIterator("A");
int tupleCount = 0;
@@ -98,10 +104,10 @@ import org.apache.pig.test.Util;
}
assertEquals(DATA.length, tupleCount);
}
-
+
@Test
public void testReadsStringCast() throws IOException {
- pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
+ pigServer.registerQuery("A = LOAD '" + Util.encodeEscape(tmpFileName) +
"' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);");
Iterator<?> it = pigServer.openIterator("A");
int tupleCount = 0;
@@ -117,4 +123,41 @@ import org.apache.pig.test.Util;
}
assertEquals(DATA.length, tupleCount);
}
+
+ @Test
+ public void testReadBytesWritable() throws IOException {
+ File inputFile = File.createTempFile("test", ".txt");
+ System.err.println("fileName: " + inputFile.getAbsolutePath());
+ Path path = new Path("file:///" + inputFile.getAbsolutePath());
+ JobConf conf = new JobConf();
+ FileSystem fs = FileSystem.get(path.toUri(), conf);
+
+ IntWritable key = new IntWritable();
+ SequenceFile.Writer writer = null;
+ try {
+ writer = SequenceFile.createWriter(fs, conf, path, key.getClass(), BytesWritable.class);
+ int numRecords = 3;
+ for (int i = 0; i < numRecords; i++) {
+ key.set(i);
+ String val = "" + Math.pow(10, (numRecords - i));
+ writer.append(key, new BytesWritable(val.getBytes()));
+ }
+ } finally {
+ IOUtils.closeStream(writer);
+ }
+
+ Data data = resetData(pigServer);
+ data.set("expected",
+ tuple(0L, new DataByteArray("1000.0")),
+ tuple(1L, new DataByteArray("100.0")),
+ tuple(2L, new DataByteArray("10.0")));
+
+ pigServer.registerQuery(
+ "A = LOAD '" + Util.encodeEscape(inputFile.getAbsolutePath()) +
+ "' USING org.apache.pig.piggybank.storage.SequenceFileLoader() AS (key:long, val);");
+ pigServer.registerQuery("STORE A into 'actual' USING mock.Storage();");
+
+ assertEquals(data.get("expected"), data.get("actual"));
+
+ }
}
Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorage.java Mon Feb 24 21:41:38 2014
@@ -19,6 +19,16 @@ package org.apache.pig.piggybank.test.st
import static org.apache.pig.builtin.mock.Storage.resetData;
import static org.apache.pig.builtin.mock.Storage.schema;
import static org.apache.pig.builtin.mock.Storage.tuple;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Properties;
+import java.util.Set;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericDatumReader;
@@ -29,9 +39,9 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
-import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.ExecType;
import org.apache.pig.LoadFunc;
+import org.apache.pig.PigConfiguration;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.executionengine.ExecJob;
@@ -41,26 +51,13 @@ import org.apache.pig.builtin.mock.Stora
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.impl.logicalLayer.FrontendException;
-import org.apache.pig.piggybank.storage.avro.AvroStorage;
import org.apache.pig.piggybank.storage.avro.PigSchema2Avro;
-import org.apache.pig.test.MiniCluster;
import org.apache.pig.test.Util;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Properties;
-import java.util.Set;
-
-import static junit.framework.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
public class TestAvroStorage {
protected static final Log LOG = LogFactory.getLog(TestAvroStorage.class);
@@ -194,7 +191,7 @@ public class TestAvroStorage {
public static void setup() throws ExecException, IOException {
pigServerLocal = new PigServer(ExecType.LOCAL);
String TMP_DIR = System.getProperty("user.dir") + "/build/test/tmp/";
- pigServerLocal.getPigContext().getProperties().setProperty("pig.temp.dir", TMP_DIR);
+ pigServerLocal.getPigContext().getProperties().setProperty(PigConfiguration.PIG_TEMP_DIR, TMP_DIR);
outbasedir = FileLocalizer.getTemporaryPath(pigServerLocal.getPigContext()).toString() + "/TestAvroStorage/";
deleteDirectory(new File(outbasedir));
}
@@ -594,6 +591,7 @@ public class TestAvroStorage {
@Test
public void testUserDefinedLoadSchema() throws IOException {
+ PigSchema2Avro.setTupleIndex(2);
// Verify that user specified schema correctly maps to input schemas
// Input Avro files have the following schemas:
// name:"string", address:[customField1:"int", addressLine:"string"]
@@ -604,7 +602,7 @@ public class TestAvroStorage {
// dropping, adding, and reordering fields where needed.
String output= outbasedir + "testUserDefinedLoadSchema";
String expected = basedir + "expected_testUserDefinedLoadSchema.avro";
- String customSchema =
+ String customSchema =
"{\"type\": \"record\", \"name\": \"employee\", \"fields\": [ "
+"{ \"default\": \"***\", \"type\": \"string\", \"name\": \"name\" }, "
+"{ \"name\": \"address\", \"type\": { "
@@ -621,7 +619,7 @@ public class TestAvroStorage {
" in = LOAD '" + testUserDefinedLoadSchemaFile
+ "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ('schema', '" + customSchema + "');",
" o = ORDER in BY name;",
- " STORE o INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
+ " STORE o INTO '" + output + "' USING org.apache.pig.piggybank.storage.avro.AvroStorage ();"
};
testAvroStorage(queries);
verifyResults(output, expected);
@@ -1212,30 +1210,65 @@ public class TestAvroStorage {
// {"name":"age","type":["null","int"],"doc":"autogenerated from Pig Field Schema"},
// {"name":"gpa","type":["null","double"],"doc":"autogenerated from Pig Field Schema"}]}]
public void testLoadwithNullValues() throws IOException {
- //Input is supposed to have empty tuples
- PigSchema2Avro.setTupleIndex(0);
- Data data = resetData(pigServerLocal);
- String output = outbasedir + "testLoadwithNulls";
- deleteDirectory(new File(output));
- String [] queries = {
- " A = load '" + testLoadwithNullValues + "' USING " +
- " org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
- " B = order A by name;",
- " store B into '" + output +"' USING mock.Storage();"
- };
- testAvroStorage(queries);
- List<Tuple> out = data.get(output);
- assertEquals(out + " size", 4, out.size());
-
- assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
-
- // sorted data ordered by name
- assertEquals(tuple((String)null),out.get(0));
- assertEquals(tuple((String)null),out.get(1));
- assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
- assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
+ //Input is supposed to have empty tuples
+ PigSchema2Avro.setTupleIndex(0);
+ Data data = resetData(pigServerLocal);
+ String output = outbasedir + "testLoadwithNulls";
+ deleteDirectory(new File(output));
+ String [] queries = {
+ " A = load '" + testLoadwithNullValues + "' USING " +
+ " org.apache.pig.piggybank.storage.avro.AvroStorage(); ",
+ " B = order A by name;",
+ " store B into '" + output +"' USING mock.Storage();"
+ };
+ testAvroStorage(queries);
+ List<Tuple> out = data.get(output);
+ assertEquals(out + " size", 4, out.size());
- }
+ assertEquals(schema("name:chararray,age:int,gpa:double"), data.getSchema(output));
+
+ // sorted data ordered by name
+ assertEquals(tuple((String)null),out.get(0));
+ assertEquals(tuple((String)null),out.get(1));
+ assertEquals(tuple("calvin ellison", 24, 0.71), out.get(2));
+ assertEquals(tuple("wendy johnson", 60, 0.07), out.get(3));
+
+ }
+
+ @Test
+ public void testMultipleLoadStore() throws Exception {
+ PigSchema2Avro.setTupleIndex(0);
+ Data data = resetData(pigServerLocal);
+ data.set("foo",
+ tuple(1, 2, 3),
+ tuple(4, 5, 6),
+ tuple(7, 8, 9));
+ data.set("bar",
+ tuple("a", "b", "c"),
+ tuple("d", "e", "f"),
+ tuple("g", "h", "i"));
+ String output = outbasedir + "testMultipleLoadStore";
+ deleteDirectory(new File(output));
+ String[] storeQuery = {
+ "A = LOAD 'foo' USING " + "mock.Storage() as (a1:int, a2:int, a3:int);",
+ "B = LOAD 'bar' USING " + "mock.Storage() as (b1:chararray, b2:chararray, b3:chararray);",
+ "STORE A into '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+ "STORE B into '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();"
+ };
+ testAvroStorage(storeQuery);
+ String[] loadQuery = {
+ "C = LOAD '"+ output +"/A' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+ "D = LOAD '"+ output +"/B' USING " + "org.apache.pig.piggybank.storage.avro.AvroStorage();",
+ "STORE C into 'foo-actual' USING mock.Storage();",
+ "STORE D into 'bar-actual' USING mock.Storage();"
+ };
+ testAvroStorage(loadQuery);
+
+ assertEquals(data.get("foo"), data.get("foo-actual"));
+ assertEquals(data.get("bar"), data.get("bar-actual"));
+ assertEquals("{a1: int,a2: int,a3: int}", data.getSchema("foo-actual").toString());
+ assertEquals("{b1: chararray,b2: chararray,b3: chararray}", data.getSchema("bar-actual").toString());
+ }
private static void deleteDirectory (File path) {
if ( path.exists()) {
Modified: pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java (original)
+++ pig/branches/tez/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/avro/TestAvroStorageUtils.java Mon Feb 24 21:41:38 2014
@@ -19,15 +19,12 @@ package org.apache.pig.piggybank.test.st
import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.Job;
import org.apache.pig.piggybank.storage.avro.AvroStorageUtils;
-
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -93,37 +90,45 @@ public class TestAvroStorageUtils {
}
@Test
- public void testGetAllSubDirs() throws IOException {
- final String basedir = System.getProperty("user.dir");
+ public void testGetPaths() throws IOException {
+ final String basedir = "file://" + System.getProperty("user.dir");
final String tempdir = Long.toString(System.currentTimeMillis());
final String nonexistentpath = basedir + "/" + tempdir + "/this_path_does_not_exist";
String locationStr = null;
- Set<Path> paths = new HashSet<Path>();
+ Set<Path> paths;
Configuration conf = new Configuration();
// existent path
locationStr = basedir;
- assertTrue(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
+ paths = AvroStorageUtils.getPaths(locationStr, conf, true);
assertFalse(paths.isEmpty());
- paths.clear();
// non-existent path
locationStr = nonexistentpath;
- assertFalse(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
- assertTrue(paths.isEmpty());
- paths.clear();
+ try {
+ paths = AvroStorageUtils.getPaths(locationStr, conf, true);
+ fail();
+ } catch (IOException e) {
+ assertTrue(e.getMessage().contains("matches 0 files"));
+ }
// empty glob pattern
locationStr = basedir + "/{}";
- assertFalse(AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths));
+ try {
+ paths = AvroStorageUtils.getPaths(locationStr, conf, true);
+ fail();
+ } catch (IOException e) {
+ assertTrue(e.getMessage().contains("matches 0 files"));
+ }
+
+ paths = AvroStorageUtils.getPaths(locationStr, conf, false);
assertTrue(paths.isEmpty());
- paths.clear();
// bad glob pattern
locationStr = basedir + "/{1,";
try {
- AvroStorageUtils.getAllSubDirs(new Path(locationStr), conf, paths);
+ AvroStorageUtils.getPaths(locationStr, conf, true);
Assert.fail("Negative test to test illegal file pattern. Should not be succeeding!");
} catch (IOException e) {
// The message of the exception for illegal file pattern is rather long,
Modified: pig/branches/tez/ivy.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/ivy.xml (original)
+++ pig/branches/tez/ivy.xml Mon Feb 24 21:41:38 2014
@@ -358,6 +358,40 @@
<dependency org="com.yammer.metrics" name="metrics-core" rev="${metrics-core.version}"
conf="test->default"/>
+ <!-- Dependency for Accumulo{Input,Output}Format -->
+ <dependency org="org.apache.accumulo" name="accumulo-core" rev="${accumulo15.version}" conf="compile->default">
+ <exclude org="com.google.guava" module="guava"/>
+ <exclude org="commons-codec" module="commons-codec"/>
+ <exclude org="commons-collections" module="commons-collections"/>
+ <exclude org="commons-configuration" module="commons-configuration"/>
+ <exclude org="commons-io" module="commons-io"/>
+ <exclude org="commons-lang" module="commons-lang"/>
+ <exclude org="commons-logging" module="commons-logging"/>
+ <exclude org="jline" module="jline"/>
+ <exclude org="log4j" module="log4j"/>
+ <exclude org="org.apache.hadoop" module="hadoop-client"/>
+ <exclude org="org.apache.zookeeper" module="zookeeper"/>
+ <exclude org="org.slf4j" module="slf4j-api"/>
+ <exclude org="org.slf4j" module="slf4j-log4j12"/>
+ </dependency>
+
+ <!-- Used for 'functional' Accumulo tests -->
+ <dependency org="org.apache.accumulo" name="accumulo-minicluster" rev="${accumulo15.version}" conf="compile->default">
+ <exclude org="com.google.guava" module="guava"/>
+ <exclude org="commons-codec" module="commons-codec"/>
+ <exclude org="commons-collections" module="commons-collections"/>
+ <exclude org="commons-configuration" module="commons-configuration"/>
+ <exclude org="commons-io" module="commons-io"/>
+ <exclude org="commons-lang" module="commons-lang"/>
+ <exclude org="commons-logging" module="commons-logging"/>
+ <exclude org="jline" module="jline"/>
+ <exclude org="log4j" module="log4j"/>
+ <exclude org="org.apache.hadoop" module="hadoop-client"/>
+ <exclude org="org.apache.zookeeper" module="zookeeper"/>
+ <exclude org="org.slf4j" module="slf4j-api"/>
+ <exclude org="org.slf4j" module="slf4j-log4j12"/>
+ </dependency>
+
<!-- for piggybank -->
<dependency org="hsqldb" name="hsqldb" rev="${hsqldb.version}"
conf="test->default" />
Modified: pig/branches/tez/ivy/libraries.properties
URL: http://svn.apache.org/viewvc/pig/branches/tez/ivy/libraries.properties?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/ivy/libraries.properties (original)
+++ pig/branches/tez/ivy/libraries.properties Mon Feb 24 21:41:38 2014
@@ -14,6 +14,7 @@
#It drives ivy and the generation of a maven POM
#These are the versions of our dependencies (in alphabetical order)
+accumulo15.version=1.5.0
apacheant.version=1.7.1
apacherat.version=0.8
automaton.version=1.11-8
@@ -36,10 +37,9 @@ ivy.version=2.2.0
jasper.version=6.1.14
groovy.version=1.8.6
guava.version=11.0
-guava-hadoop2.version=15.0
jersey-core.version=1.8
-hadoop-core.version=1.0.0
-hadoop-test.version=1.0.0
+hadoop-core.version=1.0.4
+hadoop-test.version=1.0.4
hadoop-common.version=2.2.0
hadoop-hdfs.version=2.2.0
hadoop-mapreduce.version=2.2.0
@@ -59,7 +59,7 @@ jdiff.version=1.0.9
jettison.version=1.3.4
jetty.version=6.1.26
jetty-util.version=6.1.26
-jline.version=0.9.94
+jline.version=1.0
joda-time.version=2.1
jopt.version=4.1
json-simple.version=1.1
@@ -77,7 +77,7 @@ slf4j-log4j12.version=1.6.1
xerces.version=2.10.0
xalan.version=2.7.1
wagon-http.version=1.0-beta-2
-zookeeper.version=3.4.4
+zookeeper.version=3.4.5
servlet.version=4.0.6
servlet-api.version=2.5
protobuf-java.version=2.5.0
Modified: pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/tez/shims/src/hadoop20/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Mon Feb 24 21:41:38 2014
@@ -97,7 +97,7 @@ public class HadoopShims {
public static JobControl newJobControl(String groupName, int timeToSleep) {
return new PigJobControl(groupName, timeToSleep);
}
-
+
public static long getDefaultBlockSize(FileSystem fs, Path path) {
return fs.getDefaultBlockSize();
}
@@ -115,4 +115,8 @@ public class HadoopShims {
// for Hadoop 0.20
return report.getProgress() != successfulProgress;
}
+
+ public static void unsetConf(Configuration conf, String key) {
+ // Not supported in Hadoop 0.20/1.x
+ }
}
Modified: pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java
URL: http://svn.apache.org/viewvc/pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java (original)
+++ pig/branches/tez/shims/src/hadoop23/org/apache/pig/backend/hadoop/executionengine/shims/HadoopShims.java Mon Feb 24 21:41:38 2014
@@ -115,4 +115,8 @@ public class HadoopShims {
public static boolean isJobFailed(TaskReport report) {
return report.getCurrentStatus()==TIPStatus.FAILED;
}
+
+ public static void unsetConf(Configuration conf, String key) {
+ conf.unset(key);
+ }
}
Modified: pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml
URL: http://svn.apache.org/viewvc/pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml?rev=1571454&r1=1571453&r2=1571454&view=diff
==============================================================================
--- pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/branches/tez/src/docs/src/documentation/content/xdocs/basic.xml Mon Feb 24 21:41:38 2014
@@ -5655,7 +5655,7 @@ B = FOREACH A GENERATE -x, y;
<table>
<tr>
<td>
- <p>ASSERT alias BY expression [message];</p>
+ <p>ASSERT alias BY expression [, message];</p>
</td>
</tr>
</table>
@@ -5721,7 +5721,7 @@ DUMP A;
<p>Now, you can assert that a0 column in your data is >0, fail if otherwise</p>
<source>
-ASSERT A by a0 > 0 'a0 should be greater than 0';
+ASSERT A by a0 > 0, 'a0 should be greater than 0';
</source>
</section></section>