You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@pig.apache.org by Pallavi Rao <pa...@inmobi.com> on 2016/04/04 07:20:02 UTC

Review Request 45667: Support Pig On Spark

-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/
-----------------------------------------------------------

Review request for pig, Daniel Dai and Rohini Palaniswamy.


Bugs: PIG-4059 and PIG-4854
    https://issues.apache.org/jira/browse/PIG-4059
    https://issues.apache.org/jira/browse/PIG-4854


Repository: pig-git


Description
-------

The patch contains all the work done in the spark branch, so far.


Diffs
-----

  bin/pig 81f1426 
  build.xml 8db1a80 
  ivy.xml dd9878e 
  ivy/libraries.properties 55d9aed 
  shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  src/META-INF/services/org.apache.pig.ExecType 5c034c8 
  src/docs/src/documentation/content/xdocs/start.xml 36f9952 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java 894cda7 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 6adfa91 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
  src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
  src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
  src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
  src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
  src/org/apache/pig/impl/PigContext.java d43949f 
  src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
  src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
  src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
  test/e2e/pig/build.xml f7c38ba 
  test/e2e/pig/conf/spark.conf PRE-CREATION 
  test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
  test/e2e/pig/tests/streaming.conf 18f2fb2 
  test/excluded-tests-spark PRE-CREATION 
  test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
  test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
  test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
  test/org/apache/pig/test/MiniGenericCluster.java 9347269 
  test/org/apache/pig/test/TestAssert.java 6d4b5c6 
  test/org/apache/pig/test/TestBuiltin.java 44b4d09 
  test/org/apache/pig/test/TestCase.java c9bb2fa 
  test/org/apache/pig/test/TestCollectedGroup.java a958d33 
  test/org/apache/pig/test/TestCombiner.java df44293 
  test/org/apache/pig/test/TestCubeOperator.java de96e6c 
  test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
  test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
  test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
  test/org/apache/pig/test/TestFinish.java f18c103 
  test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
  test/org/apache/pig/test/TestGrunt.java ef121a3 
  test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
  test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
  test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
  test/org/apache/pig/test/TestMergeJoin.java f1a9608 
  test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
  test/org/apache/pig/test/TestMultiQuery.java 40684b4 
  test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
  test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
  test/org/apache/pig/test/TestNullConstant.java 3ea4509 
  test/org/apache/pig/test/TestPigRunner.java fde8609 
  test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
  test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
  test/org/apache/pig/test/TestPruneColumn.java 3936332 
  test/org/apache/pig/test/TestRank1.java 9e4ef62 
  test/org/apache/pig/test/TestRank2.java fc802a9 
  test/org/apache/pig/test/TestRank3.java 43af10d 
  test/org/apache/pig/test/TestSecondarySort.java 8991010 
  test/org/apache/pig/test/TestSkewedJoin.java dba2241 
  test/org/apache/pig/test/TestStoreBase.java eb3b253 
  test/org/apache/pig/test/Util.java 8dae247 
  test/spark-tests PRE-CREATION 

Diff: https://reviews.apache.org/r/45667/diff/


Testing
-------

New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/


Thanks,

Pallavi Rao


Re: Review Request 45667: Support Pig On Spark

Posted by kelly zhang <li...@intel.com>.

> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java, line 313
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323847#file1323847line313>
> >
> >     Just change the code to use UDFContext.getUDFContext().getJobConf() which should not be null instead of getClientSystemProps(). Not sure why it is using getClientSystemProps() in the first place.
> 
> kelly zhang wrote:
>     Here if we change to UDFContext.getUDFContext().getJobConf(), problem still exists.
>     
>     
>     The reason why verify  UDFContext.getUDFContext().getJobConf() or not is because spark executor first initializes all the object then UDFContext.deserialize is called, HBaseStorage constructor is called before UDFContext.deserialized(), so here we need to verify  UDFContext.getUDFContext().getJobConf() is null or not otherwise NPE will be thrown out here.

Update PigOnSpark_3.patch. After PIG-4920, we store UdfContext#getClientSystemProps  UDFContext#getUdfConfs into SparkEngineConf. so not modify HBaseStorage any more.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/impl/PigContext.java, line 924
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323849#file1323849line924>
> >
> >     This can be reverted. PigContext need not be serialized to the backend. See PIG-4866
> 
> kelly zhang wrote:
>     PIG-4866 is not serialize pigcontext in configuration while here we override PigContext#writeObject and PigContext#readObject to only serialize and deserialize 1 attribute(packageImportList)  in spark mode.

Update PigOnSpark_3.patch, After PIG-4920, we store UdfContext#getClientSystemProps  UDFContext#getUdfConfs into SparkEngineConf. so not modify PigContext anymore.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestBuiltin.java, line 3255
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323870#file1323870line3255>
> >
> >     This testcase is broken if you have 0-0 repeating twice. It is not UniqueID anymore.
> 
> kelly zhang wrote:
>     0-0 repeating twice is because we use TaskID in UniqueID#exec:
>     public String exec(Tuple input) throws IOException {
>         String taskIndex = PigMapReduce.sJobConfInternal.get().get(PigConstants.TASK_INDEX);
>         String sequenceId = taskIndex + "-" + Long.toString(sequence);
>         sequence++;
>         return sequenceId;
>     }
>     in MR, we initialize PigContants.TASK_INDEX in  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce.Reduce#setup 
>     protected void setup(Context context) throws IOException, InterruptedException {
>        ...
>         context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
>     ...
>     }
>     
>     But spark does not provide funtion like PigGenericMapReduce.Reduce#setup to initialize PigContants.TASK_INDEX when job starts.
>     Suggest to file a new jira(Initialize PigContants.TASK_INDEX when spark job starts) and skip this unit test until this jira is resolved.

Update PigOnSpark_3.patch. Have created PIG-5051 and  added comment on TestBuilt#testUniqueID(the behavior in spark mode will be same with what in mr until PIG-5051 is fixed)


- kelly


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/#review134255
-----------------------------------------------------------


On July 11, 2016, 4:32 a.m., Pallavi Rao wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/45667/
> -----------------------------------------------------------
> 
> (Updated July 11, 2016, 4:32 a.m.)
> 
> 
> Review request for pig, Daniel Dai and Rohini Palaniswamy.
> 
> 
> Bugs: PIG-4059 and PIG-4854
>     https://issues.apache.org/jira/browse/PIG-4059
>     https://issues.apache.org/jira/browse/PIG-4854
> 
> 
> Repository: pig-git
> 
> 
> Description
> -------
> 
> The patch contains all the work done in the spark branch, so far.
> 
> 
> Diffs
> -----
> 
>   bin/pig 81f1426 
>   build.xml 99ba1f4 
>   ivy.xml dd9878e 
>   ivy/libraries.properties 3a819a5 
>   shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java 792a1bd 
>   shims/test/hadoop23/org/apache/pig/test/YarnMiniCluster.java PRE-CREATION 
>   src/META-INF/services/org.apache.pig.ExecType 5c034c8 
>   src/docs/src/documentation/content/xdocs/start.xml 36f9952 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java d80951a 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 13f70c0 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
>   src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
>   src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
>   src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
>   src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
>   src/org/apache/pig/impl/PigContext.java d43949f 
>   src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
>   src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
>   test/e2e/pig/build.xml f7c38ba 
>   test/e2e/pig/conf/spark.conf PRE-CREATION 
>   test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
>   test/e2e/pig/tests/streaming.conf 18f2fb2 
>   test/excluded-tests-spark PRE-CREATION 
>   test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
>   test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
>   test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
>   test/org/apache/pig/test/MiniGenericCluster.java 9347269 
>   test/org/apache/pig/test/TestAssert.java 6d4b5c6 
>   test/org/apache/pig/test/TestBuiltin.java fbc3f1e 
>   test/org/apache/pig/test/TestCase.java c9bb2fa 
>   test/org/apache/pig/test/TestCollectedGroup.java a958d33 
>   test/org/apache/pig/test/TestCombiner.java df44293 
>   test/org/apache/pig/test/TestCubeOperator.java de96e6c 
>   test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
>   test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
>   test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
>   test/org/apache/pig/test/TestFinish.java f18c103 
>   test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
>   test/org/apache/pig/test/TestGrunt.java 9eaf298 
>   test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
>   test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
>   test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
>   test/org/apache/pig/test/TestMergeJoin.java f1a9608 
>   test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
>   test/org/apache/pig/test/TestMultiQuery.java c32eab7 
>   test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
>   test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
>   test/org/apache/pig/test/TestNullConstant.java 3ea4509 
>   test/org/apache/pig/test/TestPigRunner.java fde8609 
>   test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
>   test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
>   test/org/apache/pig/test/TestPruneColumn.java 3936332 
>   test/org/apache/pig/test/TestRank1.java 9e4ef62 
>   test/org/apache/pig/test/TestRank2.java fc802a9 
>   test/org/apache/pig/test/TestRank3.java 43af10d 
>   test/org/apache/pig/test/TestSecondarySort.java 8991010 
>   test/org/apache/pig/test/TestSkewedJoin.java dba2241 
>   test/org/apache/pig/test/TestStoreBase.java eb3b253 
>   test/org/apache/pig/test/Util.java 36d01e8 
>   test/spark-tests PRE-CREATION 
> 
> Diff: https://reviews.apache.org/r/45667/diff/
> 
> 
> Testing
> -------
> 
> New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/
> 
> 
> Thanks,
> 
> Pallavi Rao
> 
>


RE: Review Request 45667: Support Pig On Spark

Posted by "Zhang, Liyun" <li...@intel.com>.
Pallavi create this review board so I have not privilege to upload new patch to this review board, I have sent the new patch to Pallavi and later Pallavi will upload the patch.

-----Original Message-----
From: kelly zhang [mailto:noreply@reviews.apache.org] On Behalf Of kelly zhang
Sent: Tuesday, June 14, 2016 11:25 AM
To: Rohini Palaniswamy; Daniel Dai
Cc: Zhang, Liyun; Pallavi Rao; pig
Subject: Re: Review Request 45667: Support Pig On Spark



> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestBuiltin.java, line 3255 
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323870#file1323870
> > line3255>
> >
> >     This testcase is broken if you have 0-0 repeating twice. It is not UniqueID anymore.

0-0 repeating twice is because we use TaskID in UniqueID#exec:
public String exec(Tuple input) throws IOException {
    String taskIndex = PigMapReduce.sJobConfInternal.get().get(PigConstants.TASK_INDEX);
    String sequenceId = taskIndex + "-" + Long.toString(sequence);
    sequence++;
    return sequenceId;
}
in MR, we initialize PigContants.TASK_INDEX in  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce.Reduce#setup
protected void setup(Context context) throws IOException, InterruptedException {
   ...
    context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
...
}

But spark does not provide funtion like PigGenericMapReduce.Reduce#setup to initialize PigContants.TASK_INDEX when job starts.
Suggest to file a new jira(Initialize PigContants.TASK_INDEX when spark job starts) and skip this unit test until this jira is resolved.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/data/SelfSpillBag.java, line 32 
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323848#file1323848
> > line32>
> >
> >     Why is bag even being serialized by Spark?

SelfSpillBag is used in TestHBaseStorage, if not mark it transient, NotSerializableExecption is thrown out


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/newplan/logical/relational/TestLocationInPhysica
> > lPlan.java, line 66 
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323865#file1323865
> > line66>
> >
> >     Why does A[3,4] repeat?

The pig script is like:

            LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' using PigStorage();\n"
               "B = GROUP A BY $0;\n"
                "A = FOREACH B GENERATE COUNT(A);\n"
               "STORE A INTO '" + Util.encodeEscape(output.getAbsolutePath()) + "';");

The spark plan is :
A: Store(/tmp/pig_junit_tmp1755582848/test6087259092054964214output:org.apache.pig.builtin.PigStorage) - scope-9
|
|---A: New For Each(false)[tuple] - scope-13
    |   |
    |   Project[bag][1] - scope-11
    |   
    |   POUserFunc(org.apache.pig.builtin.COUNT$Final)[long] - scope-12
    |   |
    |   |---Project[bag][1] - scope-28
    |
    |---Reduce By(false,false)[tuple] - scope-18
        |   |
        |   Project[bytearray][0] - scope-19
        |   |
        |   POUserFunc(org.apache.pig.builtin.COUNT$Intermediate)[tuple] - scope-20
        |   |
        |   |---Project[bag][1] - scope-21
        |
        |---B: Local Rearrange[tuple]{bytearray}(false) - scope-24
            |   |
            |   Project[bytearray][0] - scope-26
            |
            |---A: New For Each(false,false)[bag] - scope-14
                |   |
                |   Project[bytearray][0] - scope-15
                |   |
                |   POUserFunc(org.apache.pig.builtin.COUNT$Initial)[tuple] - scope-16
                |   |
                |   |---Project[bag][1] - scope-17
                |
                |---Pre Combiner Local Rearrange[tuple]{Unknown} - scope-27
                    |
                    |---A: Load(/tmp/pig_junit_tmp1755582848/test7108242581632795697input:PigStorage) - scope-0--------

 There are two ForEach (scope-13 and scope-14) in the sparkplan so A[3,4] appears twice.

Comparing with MR plan:
#--------------------------------------------------
# Map Reduce Plan                                 
#--------------------------------------------------
MapReduce node scope-10
Map Plan
B: Local Rearrange[tuple]{bytearray}(false) - scope-22
|   |
|   Project[bytearray][0] - scope-24
|
|---A: New For Each(false,false)[bag] - scope-11
    |   |
    |   Project[bytearray][0] - scope-12
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Initial)[tuple] - scope-13
    |   |
    |   |---Project[bag][1] - scope-14
    |
    |---Pre Combiner Local Rearrange[tuple]{Unknown} - scope-25
        |
        |---A: Load(/tmp/pig_junit_tmp910232853/test2548400580131197161input:PigStorage) - scope-0-------- Combine Plan
B: Local Rearrange[tuple]{bytearray}(false) - scope-26
|   |
|   Project[bytearray][0] - scope-28
|
|---A: New For Each(false,false)[bag] - scope-15
    |   |
    |   Project[bytearray][0] - scope-16
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Intermediate)[tuple] - scope-17
    |   |
    |   |---Project[bag][1] - scope-18
    |
    |---B: Package(CombinerPackager)[tuple]{bytearray} - scope-21-------- Reduce Plan
A: Store(/tmp/pig_junit_tmp910232853/test9096852332434708302output:org.apache.pig.builtin.PigStorage) - scope-9
|
|---A: New For Each(false)[bag] - scope-8
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Final)[long] - scope-6
    |   |
    |   |---Project[bag][1] - scope-19
    |
    |---B: Package(CombinerPackager)[tuple]{bytearray} - scope-2-------- Global sort: false
----------------

There are two ForEach(scope-8 and scope-11) in MapPlan and Reduce Plan, so A[3,4] appears twice in result(M: A[1,4],A[3,4],B[2,4] C: A[3,4],B[2,4] R: A[3,4] )


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestMultiQuery.java, line 116 
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323886#file1323886
> > line116>
> >
> >     Why are we using checkQueryOutputsAfterSortRecursive in many places when checkQueryOutputsAfterSort would do? It will unnecessarily increase the test execution time. Can they all be changed? Or am I missing something and checkQueryOutputsAfterSort cannot be used for some reason?

The difference between Util.checkQueryOutputsAfterSortRecursive and Util.checkQueryOutputsAfterSort: we can send schema:LogicalSchema to Util.checkQueryOutputsAfterSortRecursive, so function will help change expectedResArray:String[] to expectedRes:ArrayList<Tuple> with proper schema(int,string or other type) 

           static public void checkQueryOutputsAfterSortRecursive(Iterator<Tuple> actualResultsIt,
            String[] expectedResArray, LogicalSchema schema) throws IOException {
            ...
            }


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > ivy.xml, line 451
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323775#file1323775
> > line451>
> >
> >     What does 2.10 in spark-core_2.10 and spark-yarn_2.10 signify?

2.10 means the version of scala,  this is hard-code when we write the ivy dependency(http://mvnrepository.com/artifact/org.apache.spark).


- kelly


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/#review134255
-----------------------------------------------------------


On April 4, 2016, 5:19 a.m., Pallavi Rao wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/45667/
> -----------------------------------------------------------
> 
> (Updated April 4, 2016, 5:19 a.m.)
> 
> 
> Review request for pig, Daniel Dai and Rohini Palaniswamy.
> 
> 
> Bugs: PIG-4059 and PIG-4854
>     https://issues.apache.org/jira/browse/PIG-4059
>     https://issues.apache.org/jira/browse/PIG-4854
> 
> 
> Repository: pig-git
> 
> 
> Description
> -------
> 
> The patch contains all the work done in the spark branch, so far.
> 
> 
> Diffs
> -----
> 
>   bin/pig 81f1426 
>   build.xml 8db1a80 
>   ivy.xml dd9878e 
>   ivy/libraries.properties 55d9aed 
>   shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   src/META-INF/services/org.apache.pig.ExecType 5c034c8 
>   src/docs/src/documentation/content/xdocs/start.xml 36f9952 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java 894cda7 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 6adfa91 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
>   src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
>   src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
>   src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
>   src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
>   src/org/apache/pig/impl/PigContext.java d43949f 
>   src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
>   src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
>   test/e2e/pig/build.xml f7c38ba 
>   test/e2e/pig/conf/spark.conf PRE-CREATION 
>   test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
>   test/e2e/pig/tests/streaming.conf 18f2fb2 
>   test/excluded-tests-spark PRE-CREATION 
>   test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
>   test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
>   test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
>   test/org/apache/pig/test/MiniGenericCluster.java 9347269 
>   test/org/apache/pig/test/TestAssert.java 6d4b5c6 
>   test/org/apache/pig/test/TestBuiltin.java 44b4d09 
>   test/org/apache/pig/test/TestCase.java c9bb2fa 
>   test/org/apache/pig/test/TestCollectedGroup.java a958d33 
>   test/org/apache/pig/test/TestCombiner.java df44293 
>   test/org/apache/pig/test/TestCubeOperator.java de96e6c 
>   test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
>   test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
>   test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
>   test/org/apache/pig/test/TestFinish.java f18c103 
>   test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
>   test/org/apache/pig/test/TestGrunt.java ef121a3 
>   test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
>   test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
>   test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
>   test/org/apache/pig/test/TestMergeJoin.java f1a9608 
>   test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
>   test/org/apache/pig/test/TestMultiQuery.java 40684b4 
>   test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
>   test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
>   test/org/apache/pig/test/TestNullConstant.java 3ea4509 
>   test/org/apache/pig/test/TestPigRunner.java fde8609 
>   test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
>   test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
>   test/org/apache/pig/test/TestPruneColumn.java 3936332 
>   test/org/apache/pig/test/TestRank1.java 9e4ef62 
>   test/org/apache/pig/test/TestRank2.java fc802a9 
>   test/org/apache/pig/test/TestRank3.java 43af10d 
>   test/org/apache/pig/test/TestSecondarySort.java 8991010 
>   test/org/apache/pig/test/TestSkewedJoin.java dba2241 
>   test/org/apache/pig/test/TestStoreBase.java eb3b253 
>   test/org/apache/pig/test/Util.java 8dae247 
>   test/spark-tests PRE-CREATION
> 
> Diff: https://reviews.apache.org/r/45667/diff/
> 
> 
> Testing
> -------
> 
> New UTs were added where required and ensure old UTs pass -> 
> https://builds.apache.org/job/Pig-spark/
> 
> 
> Thanks,
> 
> Pallavi Rao
> 
>


Re: Review Request 45667: Support Pig On Spark

Posted by kelly zhang <li...@intel.com>.

> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestBuiltin.java, line 3255
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323870#file1323870line3255>
> >
> >     This testcase is broken if you have 0-0 repeating twice. It is not UniqueID anymore.

0-0 repeating twice is because we use TaskID in UniqueID#exec:
public String exec(Tuple input) throws IOException {
    String taskIndex = PigMapReduce.sJobConfInternal.get().get(PigConstants.TASK_INDEX);
    String sequenceId = taskIndex + "-" + Long.toString(sequence);
    sequence++;
    return sequenceId;
}
in MR, we initialize PigContants.TASK_INDEX in  org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigGenericMapReduce.Reduce#setup 
protected void setup(Context context) throws IOException, InterruptedException {
   ...
    context.getConfiguration().set(PigConstants.TASK_INDEX, Integer.toString(context.getTaskAttemptID().getTaskID().getId()));
...
}

But spark does not provide funtion like PigGenericMapReduce.Reduce#setup to initialize PigContants.TASK_INDEX when job starts.
Suggest to file a new jira(Initialize PigContants.TASK_INDEX when spark job starts) and skip this unit test until this jira is resolved.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/data/SelfSpillBag.java, line 32
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323848#file1323848line32>
> >
> >     Why is bag even being serialized by Spark?

SelfSpillBag is used in TestHBaseStorage, if not mark it transient, NotSerializableExecption is thrown out


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java, line 66
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323865#file1323865line66>
> >
> >     Why does A[3,4] repeat?

The pig script is like:

            LOAD '" + Util.encodeEscape(input.getAbsolutePath()) + "' using PigStorage();\n"
               "B = GROUP A BY $0;\n"
                "A = FOREACH B GENERATE COUNT(A);\n"
               "STORE A INTO '" + Util.encodeEscape(output.getAbsolutePath()) + "';");

The spark plan is :
A: Store(/tmp/pig_junit_tmp1755582848/test6087259092054964214output:org.apache.pig.builtin.PigStorage) - scope-9
|
|---A: New For Each(false)[tuple] - scope-13
    |   |
    |   Project[bag][1] - scope-11
    |   
    |   POUserFunc(org.apache.pig.builtin.COUNT$Final)[long] - scope-12
    |   |
    |   |---Project[bag][1] - scope-28
    |
    |---Reduce By(false,false)[tuple] - scope-18
        |   |
        |   Project[bytearray][0] - scope-19
        |   |
        |   POUserFunc(org.apache.pig.builtin.COUNT$Intermediate)[tuple] - scope-20
        |   |
        |   |---Project[bag][1] - scope-21
        |
        |---B: Local Rearrange[tuple]{bytearray}(false) - scope-24
            |   |
            |   Project[bytearray][0] - scope-26
            |
            |---A: New For Each(false,false)[bag] - scope-14
                |   |
                |   Project[bytearray][0] - scope-15
                |   |
                |   POUserFunc(org.apache.pig.builtin.COUNT$Initial)[tuple] - scope-16
                |   |
                |   |---Project[bag][1] - scope-17
                |
                |---Pre Combiner Local Rearrange[tuple]{Unknown} - scope-27
                    |
                    |---A: Load(/tmp/pig_junit_tmp1755582848/test7108242581632795697input:PigStorage) - scope-0--------

 There are two ForEach (scope-13 and scope-14) in the sparkplan so A[3,4] appears twice.

Comparing with MR plan:
#--------------------------------------------------
# Map Reduce Plan                                 
#--------------------------------------------------
MapReduce node scope-10
Map Plan
B: Local Rearrange[tuple]{bytearray}(false) - scope-22
|   |
|   Project[bytearray][0] - scope-24
|
|---A: New For Each(false,false)[bag] - scope-11
    |   |
    |   Project[bytearray][0] - scope-12
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Initial)[tuple] - scope-13
    |   |
    |   |---Project[bag][1] - scope-14
    |
    |---Pre Combiner Local Rearrange[tuple]{Unknown} - scope-25
        |
        |---A: Load(/tmp/pig_junit_tmp910232853/test2548400580131197161input:PigStorage) - scope-0--------
Combine Plan
B: Local Rearrange[tuple]{bytearray}(false) - scope-26
|   |
|   Project[bytearray][0] - scope-28
|
|---A: New For Each(false,false)[bag] - scope-15
    |   |
    |   Project[bytearray][0] - scope-16
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Intermediate)[tuple] - scope-17
    |   |
    |   |---Project[bag][1] - scope-18
    |
    |---B: Package(CombinerPackager)[tuple]{bytearray} - scope-21--------
Reduce Plan
A: Store(/tmp/pig_junit_tmp910232853/test9096852332434708302output:org.apache.pig.builtin.PigStorage) - scope-9
|
|---A: New For Each(false)[bag] - scope-8
    |   |
    |   POUserFunc(org.apache.pig.builtin.COUNT$Final)[long] - scope-6
    |   |
    |   |---Project[bag][1] - scope-19
    |
    |---B: Package(CombinerPackager)[tuple]{bytearray} - scope-2--------
Global sort: false
----------------

There are two ForEach(scope-8 and scope-11) in MapPlan and Reduce Plan, so A[3,4] appears twice in result(M: A[1,4],A[3,4],B[2,4] C: A[3,4],B[2,4] R: A[3,4] )


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestMultiQuery.java, line 116
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323886#file1323886line116>
> >
> >     Why are we using checkQueryOutputsAfterSortRecursive in many places when checkQueryOutputsAfterSort would do? It will unnecessarily increase the test execution time. Can they all be changed? Or am I missing something and checkQueryOutputsAfterSort cannot be used for some reason?

The difference between Util.checkQueryOutputsAfterSortRecursive and Util.checkQueryOutputsAfterSort: we can send schema:LogicalSchema to Util.checkQueryOutputsAfterSortRecursive, so function will help change expectedResArray:String[] to expectedRes:ArrayList<Tuple> with proper schema(int,string or other type) 

           static public void checkQueryOutputsAfterSortRecursive(Iterator<Tuple> actualResultsIt,
            String[] expectedResArray, LogicalSchema schema) throws IOException {
            ...
            }


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > ivy.xml, line 451
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323775#file1323775line451>
> >
> >     What does 2.10 in spark-core_2.10 and spark-yarn_2.10 signify?

2.10 means the version of scala,  this is hard-code when we write the ivy dependency(http://mvnrepository.com/artifact/org.apache.spark).


- kelly


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/#review134255
-----------------------------------------------------------


On April 4, 2016, 5:19 a.m., Pallavi Rao wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/45667/
> -----------------------------------------------------------
> 
> (Updated April 4, 2016, 5:19 a.m.)
> 
> 
> Review request for pig, Daniel Dai and Rohini Palaniswamy.
> 
> 
> Bugs: PIG-4059 and PIG-4854
>     https://issues.apache.org/jira/browse/PIG-4059
>     https://issues.apache.org/jira/browse/PIG-4854
> 
> 
> Repository: pig-git
> 
> 
> Description
> -------
> 
> The patch contains all the work done in the spark branch, so far.
> 
> 
> Diffs
> -----
> 
>   bin/pig 81f1426 
>   build.xml 8db1a80 
>   ivy.xml dd9878e 
>   ivy/libraries.properties 55d9aed 
>   shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   src/META-INF/services/org.apache.pig.ExecType 5c034c8 
>   src/docs/src/documentation/content/xdocs/start.xml 36f9952 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java 894cda7 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 6adfa91 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
>   src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
>   src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
>   src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
>   src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
>   src/org/apache/pig/impl/PigContext.java d43949f 
>   src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
>   src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
>   test/e2e/pig/build.xml f7c38ba 
>   test/e2e/pig/conf/spark.conf PRE-CREATION 
>   test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
>   test/e2e/pig/tests/streaming.conf 18f2fb2 
>   test/excluded-tests-spark PRE-CREATION 
>   test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
>   test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
>   test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
>   test/org/apache/pig/test/MiniGenericCluster.java 9347269 
>   test/org/apache/pig/test/TestAssert.java 6d4b5c6 
>   test/org/apache/pig/test/TestBuiltin.java 44b4d09 
>   test/org/apache/pig/test/TestCase.java c9bb2fa 
>   test/org/apache/pig/test/TestCollectedGroup.java a958d33 
>   test/org/apache/pig/test/TestCombiner.java df44293 
>   test/org/apache/pig/test/TestCubeOperator.java de96e6c 
>   test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
>   test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
>   test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
>   test/org/apache/pig/test/TestFinish.java f18c103 
>   test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
>   test/org/apache/pig/test/TestGrunt.java ef121a3 
>   test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
>   test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
>   test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
>   test/org/apache/pig/test/TestMergeJoin.java f1a9608 
>   test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
>   test/org/apache/pig/test/TestMultiQuery.java 40684b4 
>   test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
>   test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
>   test/org/apache/pig/test/TestNullConstant.java 3ea4509 
>   test/org/apache/pig/test/TestPigRunner.java fde8609 
>   test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
>   test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
>   test/org/apache/pig/test/TestPruneColumn.java 3936332 
>   test/org/apache/pig/test/TestRank1.java 9e4ef62 
>   test/org/apache/pig/test/TestRank2.java fc802a9 
>   test/org/apache/pig/test/TestRank3.java 43af10d 
>   test/org/apache/pig/test/TestSecondarySort.java 8991010 
>   test/org/apache/pig/test/TestSkewedJoin.java dba2241 
>   test/org/apache/pig/test/TestStoreBase.java eb3b253 
>   test/org/apache/pig/test/Util.java 8dae247 
>   test/spark-tests PRE-CREATION 
> 
> Diff: https://reviews.apache.org/r/45667/diff/
> 
> 
> Testing
> -------
> 
> New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/
> 
> 
> Thanks,
> 
> Pallavi Rao
> 
>


Re: Review Request 45667: Support Pig On Spark

Posted by kelly zhang <li...@intel.com>.

> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestCombiner.java, lines 189-191
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323873#file1323873line189>
> >
> >     Can be removed with the change to checkCombinerUsed

can not change checkCombinerUsed(pigServer, "c", true) to assertTrue(baos.toString().matches("(?si).*combine plan.*")). If we use original code, this test will fail in spark mode because there is combine plan in spark mode. In spark mode, there is no combine plan but POReduceBySpark implement the combine function.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestCombiner.java, lines 239-241
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323873#file1323873line239>
> >
> >     Can be removed with the change to checkCombinerUsed

can not change checkCombinerUsed(pigServer, "c", true) to assertTrue(baos.toString().matches("(?si).*combine plan.*")). If we use original code, this test will fail in spark mode because there is combine plan in spark mode. In spark mode, there is no combine plan but POReduceBySpark implement the combine function.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java, line 313
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323847#file1323847line313>
> >
> >     Just change the code to use UDFContext.getUDFContext().getJobConf() which should not be null instead of getClientSystemProps(). Not sure why it is using getClientSystemProps() in the first place.

Here if we change to UDFContext.getUDFContext().getJobConf(), problem still exists.


The reason why verify  UDFContext.getUDFContext().getJobConf() or not is because spark executor first initializes all the object then UDFContext.deserialize is called, HBaseStorage constructor is called before UDFContext.deserialized(), so here we need to verify  UDFContext.getUDFContext().getJobConf() is null or not otherwise NPE will be thrown out here.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java, line 294
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323844#file1323844line294>
> >
> >     The utility class is for common code between mapreduce, tez and spark. Please move all Spark specific code to the spark AccumulatorOptimizer

OK, i will move AccumulatorOptimizerUtil#addAccumulatorSpark to spark AccumulatorOptimizer but need make AccumulatorOptimizerUtil#check public because this will be used in spark AccumulatorOptimizer.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/impl/PigContext.java, line 924
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323849#file1323849line924>
> >
> >     This can be reverted. PigContext need not be serialized to the backend. See PIG-4866

PIG-4866 is not serialize pigcontext in configuration while here we override PigContext#writeObject and PigContext#readObject to only serialize and deserialize 1 attribute(packageImportList)  in spark mode.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestPigRunner.java, lines 333-334
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323890#file1323890line333>
> >
> >     1 job for 1 POStore can impact performance. It is not exactly multi-query. Is there a jira to optimize this better and use 1 job?

Yes, 1 job for 1 POStore can impact performance. PIG-4863 will implement it.  I have added a TODO in the comment of code:
  //TODO: 1 job for 1 POStore can impact performance. PIG-4863 will fix this problem
 assertTrue(stats.getJobGraph().size() == 2);


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestStoreBase.java, line 155
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323899#file1323899line155>
> >
> >     Second store should not succeed.

Yes, the second store should not succeed because of the first one fails.

I will change the comment from
 A = load xx;
             store A into '1.out' using DummyStore('true','1');   -- first job should fail
             store A into '2.out' using DummyStore('false','1');  -- second job should success
             After multiQuery optimization the spark plan will be:
           Split - scope-14
            |   |
            |   a: Store(hdfs://1.out:myudfs.DummyStore('true','1')) - scope-4
            |   |
            |   a: Store(hdfs://2.out:myudfs.DummyStore('false','1')) - scope-7
            |
            |---a: Load(hdfs://zly2.sh.intel.com:8020/user/root/multiStore.txt:org.apache.pig.builtin.PigStorage) - scope-0------
            In current code base, once the first job fails, the second job will not be executed.
            the FILE_SETUPJOB_CALLED of second job will not exist.
            I explain more detail in PIG-4243

to
          
A = load xx;
             store A into '1.out' using DummyStore('true','1');   -- first store fails
             store A into '2.out' using DummyStore('false','1');  -- second store will not be executed after first store fails
             After multiQuery optimization the spark plan will be:
           Split - scope-14
            |   |
            |   a: Store(hdfs://1.out:myudfs.DummyStore('true','1')) - scope-4
            |   |
            |   a: Store(hdfs://2.out:myudfs.DummyStore('false','1')) - scope-7
            |
            |---a: Load(hdfs://zly2.sh.intel.com:8020/user/root/multiStore.txt:org.apache.pig.builtin.PigStorage) - scope-0------
            In current code base, once the first job fails, the second job will not be executed.
            the FILE_SETUPJOB_CALLED of second job will not exist.
            I explain more detail in PIG-4243


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestStoreBase.java, line 165
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323899#file1323899line165>
> >
> >     Setup job file of second one should also be there. Same with setup task as well. They are supposed to happen before putNext of both the stores is called.

In mr mode, one MapReduceOper is related with one job. If the first store in the job fails, second store will not be executed. But 
FILE_SETUPJOB_CALLED, FILE_SETUPJOB_CALLED of first and second job are both exists because setupTask() and setupJob() is  happened before putNext() of first store.
 In curent implementation of spark mode, this SparkOperator related with two stores. One store is related to one job so there are two jobs. Once one job fails the other jobs  will not be executed.
 the FILE_SETUPJOB_CALLED, FILE_SETUPTASK_CALLED of second job will not exist because setupJob and setupTask of second job is after putNext of first store.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestMapSideCogroup.java, line 340
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323883#file1323883line340>
> >
> >     Why is output ordering different with merge join? The assumption with merge join is that files are sorted and join is done based on that. So ordering should be same.

The script of this unit test is like following:

REGISTER myudfs.jar;
A = load './multiSplits1.txt,./multiSplits3.txt' using myudfs.DummyCollectableLoader() as (c1:chararray,c2:int);
B = load './multiSplits2.txt' using myudfs.DummyIndexableLoader() as (c1:chararray,c2:int);
C = cogroup A by c1, B by c1 using 'merge';
store C into './multiSplits.out';

 cat bin/multiSplits1.txt ?
1     1
1     2
1     3
2     1
2     2
2     3
3     1
3     2
3     3
cat bin/multiSplits3.txt ?
4     1
4     2
4     3
5     1
5     2
5     3
6     1
6     2
6     3
7     1
7     2
7     3
8     1
8     2
8     3
9     1
9     2
9     3
 cat bin/multiSplits2.txt 
3	1
3	2
3	3
4	1
4	2
4	3
5	1
5	2
5	3

It is interesting that there are different behavior in "load './multiSplits1.txt,./multiSplits3.txt'" in spark and mr mode.
In mr, it load multiple files according to the size of splits(bigger split go first). it first load multiSplits3.txt then multiSplits1.txt.  So the result of the script in mr will be:


4,{(4,1),(4,2),(4,3)},{(4,1),(4,2),(4,3)}
5,{(5,2),(5,1),(5,3)},{(5,1),(5,2),(5,3)}
6,{(6,1),(6,2),(6,3)},{}
7,{(7,1),(7,2),(7,3)},{}
8,{(8,1),(8,2),(8,3)},{}
9,{(9,1),(9,2),(9,3)},{}
1,{(1,1),(1,2),(1,3)},{}
2,{(2,1),(2,2),(2,3)},{}
3,{(3,3),(3,2),(3,1)},{(3,1),(3,2),(3,3)}

In spark, it loads multiple files in sequence. So the result of the script in spark will be:

1,{(1,1),(1,2),(1,3)},{}
2,{(2,1),(2,2),(2,3)},{}
3,{(3,3),(3,2),(3,1)},{(3,1),(3,2),(3,3)}
4,{(4,1),(4,2),(4,3)},{(4,1),(4,2),(4,3)}
5,{(5,2),(5,1),(5,3)},{(5,1),(5,2),(5,3)}
6,{(6,1),(6,2),(6,3)},{}
7,{(7,1),(7,2),(7,3)},{}
8,{(8,1),(8,2),(8,3)},{}
9,{(9,1),(9,2),(9,3)},{}

The reason why there are difference when load multiple splits in mr and spark mode is because org.apache.hadoop.mapreduce.JobSubmitter#writeNewSplits  sort the splits by the size(bigger split first) in mr mode.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > build.xml, line 366
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323774#file1323774line366>
> >
> >     Can you confirm that with all the spark changes running with hadoop 1.x (ant test -Dhadoopversion=20) is good?

Pig will drop  hadoop 1 support since 0.17(PIG-4923), so should we support hadoop 1.x in pig on spark?


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java, line 59
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323846#file1323846line59>
> >
> >     It is not a good idea to use a static variable to keep state in this static class. Multi-threaded use of Pig will have issues if different modes are used.
> >     
> >     This utility class is for code common to mapreduce, tez and spark. Please move the spark specific code to spark's SecondaryKeyOptimizer. This is also necessary if we ever complete the mavenization and move tez and spark specific code to separate modules.

OK, i will move specific code to spark's SecondaryKeyOptimizer, but i need make following function static to be reuse in spark's SecondaryKeyOptimizer:
org.apache.pig.backend.hadoop.executionengine.util.SecondaryKeyOptimizerUtil#getSortKeyInfo
org.apache.pig.backend.hadoop.executionengine.util.SecondaryKeyOptimizerUtil#setSecondaryPlan
org.apache.pig.backend.hadoop.executionengine.util.SecondaryKeyOptimizerUtil#collectColumnChain


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestEvalPipelineLocal.java, lines 1135-1136
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323877#file1323877line1135>
> >
> >     Code should be taking care of this.

Have moved the code of reset UDFContext.getUDFContext().addJonConf(null) to SparkLauncher.java


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestMultiQuery.java, line 116
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323886#file1323886line116>
> >
> >     Why are we using checkQueryOutputsAfterSortRecursive in many places when checkQueryOutputsAfterSort would do? It will unnecessarily increase the test execution time. Can they all be changed? Or am I missing something and checkQueryOutputsAfterSort cannot be used for some reason?
> 
> kelly zhang wrote:
>     The difference between Util.checkQueryOutputsAfterSortRecursive and Util.checkQueryOutputsAfterSort: we can send schema:LogicalSchema to Util.checkQueryOutputsAfterSortRecursive, so function will help change expectedResArray:String[] to expectedRes:ArrayList<Tuple> with proper schema(int,string or other type) 
>     
>                static public void checkQueryOutputsAfterSortRecursive(Iterator<Tuple> actualResultsIt,
>                 String[] expectedResArray, LogicalSchema schema) throws IOException {
>                 ...
>                 }

The output of group,join, distinct has difference between mr and spark mode, the output is in order in mr because sort is done in the shuffle process while there is no such operation in spark. So add following 
functions in org.apache.pig.test.Util to only sort the output when in mr mode

public static void checkQueryOutputs(Iterator<Tuple> actualResults, List<Tuple> expectedResults, boolean checkAfterSort) 

public static void checkQueryOutputs(Iterator<Tuple> actualResults, String[] expectedResults,LogicalSchema logicalSchema, boolean checkAfterSort) throws IOException


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestPigRunner.java, line 1155
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323890#file1323890line1155>
> >
> >     Is there a jira to fix the number of records?

In spark mode, if pig.disable.counter = true, the number of records of the input are not calculated.  I have add some comments to explain this.


> On May 22, 2016, 9:57 p.m., Rohini Palaniswamy wrote:
> > test/org/apache/pig/test/TestSecondarySort.java, line 520
> > <https://reviews.apache.org/r/45667/diff/1/?file=1323897#file1323897line520>
> >
> >     Use Assume.assumeTrue

TestSecondarySort#testCustomPartitionerWithSort pass in spark mode now, need not skip this test


- kelly


-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/#review134255
-----------------------------------------------------------


On June 14, 2016, 4:39 a.m., Pallavi Rao wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/45667/
> -----------------------------------------------------------
> 
> (Updated June 14, 2016, 4:39 a.m.)
> 
> 
> Review request for pig, Daniel Dai and Rohini Palaniswamy.
> 
> 
> Bugs: PIG-4059 and PIG-4854
>     https://issues.apache.org/jira/browse/PIG-4059
>     https://issues.apache.org/jira/browse/PIG-4854
> 
> 
> Repository: pig-git
> 
> 
> Description
> -------
> 
> The patch contains all the work done in the spark branch, so far.
> 
> 
> Diffs
> -----
> 
>   bin/pig 81f1426 
>   build.xml 99ba1f4 
>   ivy.xml dd9878e 
>   ivy/libraries.properties 3a819a5 
>   shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java 792a1bd 
>   shims/test/hadoop23/org/apache/pig/test/YarnMiniCluster.java PRE-CREATION 
>   src/META-INF/services/org.apache.pig.ExecType 5c034c8 
>   src/docs/src/documentation/content/xdocs/start.xml 36f9952 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java d80951a 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 13f70c0 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
>   src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
>   src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
>   src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
>   src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
>   src/org/apache/pig/impl/PigContext.java d43949f 
>   src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
>   src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
>   test/e2e/pig/build.xml f7c38ba 
>   test/e2e/pig/conf/spark.conf PRE-CREATION 
>   test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
>   test/e2e/pig/tests/streaming.conf 18f2fb2 
>   test/excluded-tests-spark PRE-CREATION 
>   test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
>   test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
>   test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
>   test/org/apache/pig/test/MiniGenericCluster.java 9347269 
>   test/org/apache/pig/test/TestAssert.java 6d4b5c6 
>   test/org/apache/pig/test/TestBuiltin.java fbc3f1e 
>   test/org/apache/pig/test/TestCase.java c9bb2fa 
>   test/org/apache/pig/test/TestCollectedGroup.java a958d33 
>   test/org/apache/pig/test/TestCombiner.java df44293 
>   test/org/apache/pig/test/TestCubeOperator.java de96e6c 
>   test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
>   test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
>   test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
>   test/org/apache/pig/test/TestFinish.java f18c103 
>   test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
>   test/org/apache/pig/test/TestGrunt.java 9eaf298 
>   test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
>   test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
>   test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
>   test/org/apache/pig/test/TestMergeJoin.java f1a9608 
>   test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
>   test/org/apache/pig/test/TestMultiQuery.java c32eab7 
>   test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
>   test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
>   test/org/apache/pig/test/TestNullConstant.java 3ea4509 
>   test/org/apache/pig/test/TestPigRunner.java fde8609 
>   test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
>   test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
>   test/org/apache/pig/test/TestPruneColumn.java 3936332 
>   test/org/apache/pig/test/TestRank1.java 9e4ef62 
>   test/org/apache/pig/test/TestRank2.java fc802a9 
>   test/org/apache/pig/test/TestRank3.java 43af10d 
>   test/org/apache/pig/test/TestSecondarySort.java 8991010 
>   test/org/apache/pig/test/TestSkewedJoin.java dba2241 
>   test/org/apache/pig/test/TestStoreBase.java eb3b253 
>   test/org/apache/pig/test/Util.java 36d01e8 
>   test/spark-tests PRE-CREATION 
> 
> Diff: https://reviews.apache.org/r/45667/diff/
> 
> 
> Testing
> -------
> 
> New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/
> 
> 
> Thanks,
> 
> Pallavi Rao
> 
>


Re: Review Request 45667: Support Pig On Spark

Posted by Rohini Palaniswamy <ro...@gmail.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/#review134255
-----------------------------------------------------------



Review comments for all non-spark classes.


bin/pig (line 365)
<https://reviews.apache.org/r/45667/#comment198955>

    This is not a good idea. If I remember correctly, spark-assembly.jar is 128MB+. If you are copying all the individual jars that it is made up of to distcache for every job, it will suffer bad performance as copy to hdfs and localization by NM will be very costly. 
    
    Like Tez you can have users copy the assembly jar to hdfs and specify the hdfs location. This will ensure there is only one copy in hdfs and localization is done only once per node by node manager.



bin/pig (line 426)
<https://reviews.apache.org/r/45667/#comment198956>

    Why is this extra variable needed? This will be a problem if users are programatically calling PigServer instead of using bin/pig



build.xml (line 366)
<https://reviews.apache.org/r/45667/#comment198949>

    Can you confirm that with all the spark changes running with hadoop 1.x (ant test -Dhadoopversion=20) is good?



build.xml (lines 401 - 415)
<https://reviews.apache.org/r/45667/#comment198950>

    Please remove this. This target is supposed to contain the minimum jars required for just Pig without any hadooop, tez or spark dependencies.



build.xml (line 771)
<https://reviews.apache.org/r/45667/#comment198951>

    Directory path will be build/ivy/lib/spark/Pig. You can just do build/ivy/lib/spark.



build.xml (line 1052)
<https://reviews.apache.org/r/45667/#comment198952>

    spark mode



ivy.xml (line 451)
<https://reviews.apache.org/r/45667/#comment198954>

    What does 2.10 in spark-core_2.10 and spark-yarn_2.10 signify?



ivy/libraries.properties (line 80)
<https://reviews.apache.org/r/45667/#comment198953>

    Can we move to 1.6.1?



shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java (line 68)
<https://reviews.apache.org/r/45667/#comment198958>

    You will have to implement getLauncher method. Else will end up with infinite loop. Refer PIG-4530



shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java (line 38)
<https://reviews.apache.org/r/45667/#comment198961>

    You should move common code between TezMiniCluster and this into a YarnMiniCluster class to avoid duplication of code. TezMiniCluster and SparkMiniCluster can then extend that class and override specific methods.



shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java (line 52)
<https://reviews.apache.org/r/45667/#comment198959>

    Formatting nitpick. New line before the constructor.



src/META-INF/services/org.apache.pig.ExecType (lines 16 - 17)
<https://reviews.apache.org/r/45667/#comment198962>

    Need to be uncommented back.



src/docs/src/documentation/content/xdocs/start.xml (line 99)
<https://reviews.apache.org/r/45667/#comment198963>

    This file and content needs rebasing with trunk to include latest changes. Spark Local Mode should be after Tez Local Mode and Spark Mode after Tez Mode so that local modes are together as it is in the current trunk version.



src/docs/src/documentation/content/xdocs/start.xml (line 132)
<https://reviews.apache.org/r/45667/#comment198964>

    six execution modes



src/docs/src/documentation/content/xdocs/start.xml (line 143)
<https://reviews.apache.org/r/45667/#comment198965>

    1) Can you add a section for Spark Local Mode as well? In all other places of the documentation as well wherever there is tez_local.
    2) Can we add a bulleted list for the different options of env::SPARK_MASTER for easy readability.



src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java (line 123)
<https://reviews.apache.org/r/45667/#comment198966>

    Formatting nitpick. Space between () and {



src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java (line 153)
<https://reviews.apache.org/r/45667/#comment198967>

    Can this additional method be avoided? What is the special case that this is required for?



src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java (line 72)
<https://reviews.apache.org/r/45667/#comment198968>

    Formatting nitpick. Break into multiple lines.



src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java (line 294)
<https://reviews.apache.org/r/45667/#comment199079>

    The utility class is for common code between mapreduce, tez and spark. Please move all Spark specific code to the spark AccumulatorOptimizer



src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java (line 59)
<https://reviews.apache.org/r/45667/#comment199080>

    It is not a good idea to use a static variable to keep state in this static class. Multi-threaded use of Pig will have issues if different modes are used.
    
    This utility class is for code common to mapreduce, tez and spark. Please move the spark specific code to spark's SecondaryKeyOptimizer. This is also necessary if we ever complete the mavenization and move tez and spark specific code to separate modules.



src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java (line 313)
<https://reviews.apache.org/r/45667/#comment199081>

    Just change the code to use UDFContext.getUDFContext().getJobConf() which should not be null instead of getClientSystemProps(). Not sure why it is using getClientSystemProps() in the first place.



src/org/apache/pig/data/SelfSpillBag.java (line 32)
<https://reviews.apache.org/r/45667/#comment199082>

    Why is bag even being serialized by Spark?



src/org/apache/pig/impl/PigContext.java (line 924)
<https://reviews.apache.org/r/45667/#comment199083>

    This can be reverted. PigContext need not be serialized to the backend. See PIG-4866



test/e2e/pig/conf/spark.conf (line 58)
<https://reviews.apache.org/r/45667/#comment199031>

    Why local instead of mapreduce?



test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java (line 66)
<https://reviews.apache.org/r/45667/#comment199032>

    Why does A[3,4] repeat?



test/org/apache/pig/spark/TestIndexedKey.java (lines 2 - 5)
<https://reviews.apache.org/r/45667/#comment199036>

    Keep license formatting same in all the files. Might cause issue with RAT report.



test/org/apache/pig/test/TestBuiltin.java (line 3248)
<https://reviews.apache.org/r/45667/#comment199039>

    Jira number does not seem to be the right one.



test/org/apache/pig/test/TestBuiltin.java (line 3255)
<https://reviews.apache.org/r/45667/#comment199040>

    This testcase is broken if you have 0-0 repeating twice. It is not UniqueID anymore.



test/org/apache/pig/test/TestCollectedGroup.java (line 297)
<https://reviews.apache.org/r/45667/#comment199041>

    Please use Assume.assumeTrue to skip tests for spark instead of if conditions. The test will also show up as Skipped instead of Passed.
    
    Assume.assumeTrue("Skip this test for Spark. See <jira>", !Util.isSparkExecType(cluster.getExecType()));



test/org/apache/pig/test/TestCombiner.java (lines 189 - 191)
<https://reviews.apache.org/r/45667/#comment199043>

    Can be removed with the change to checkCombinerUsed



test/org/apache/pig/test/TestCombiner.java (lines 239 - 241)
<https://reviews.apache.org/r/45667/#comment199044>

    Can be removed with the change to checkCombinerUsed



test/org/apache/pig/test/TestCombiner.java (line 409)
<https://reviews.apache.org/r/45667/#comment199046>

    Can we use the name "alias" instead of "variable" here?



test/org/apache/pig/test/TestCombiner.java (line 416)
<https://reviews.apache.org/r/45667/#comment199042>

    Util.isSparkExecType



test/org/apache/pig/test/TestEvalPipeline.java (lines 431 - 436)
<https://reviews.apache.org/r/45667/#comment199053>

    Can be extracted to a utility method as it is repeated multiple times.



test/org/apache/pig/test/TestEvalPipeline2.java (line 770)
<https://reviews.apache.org/r/45667/#comment199055>

    Can be simplified as
    Util.checkQueryOutputs(iter, expectedResults, Util.isSparkExecType());
    by adding another Util method. Please replace in other tests as well where it has been changed to Util.checkQueryOutputsAfterSort instead of Util.checkQueryOutputs.
    
    Util.java
    
    public static void checkQueryOutputs(Iterator<Tuple> actualResults, Tuple[] expectedResults, boolean checkAfterSort)  {
       if (checkAfterSort) {
           checkQueryOutputsAfterSort(actualResults, Arrays.asList(expectedResults));
       } else {
           checkQueryOutputs(actualResults, Arrays.asList(expectedResults));
       }
    }



test/org/apache/pig/test/TestEvalPipelineLocal.java (lines 1117 - 1118)
<https://reviews.apache.org/r/45667/#comment199056>

    Code should be taking care of this.



test/org/apache/pig/test/TestFinish.java (line 48)
<https://reviews.apache.org/r/45667/#comment199058>

    private static



test/org/apache/pig/test/TestFinish.java (line 72)
<https://reviews.apache.org/r/45667/#comment199057>

    This should not be done. The cluster object is not something a user UDF will have access to.



test/org/apache/pig/test/TestFinish.java (line 144)
<https://reviews.apache.org/r/45667/#comment199059>

    To be reverted



test/org/apache/pig/test/TestFinish.java (line 163)
<https://reviews.apache.org/r/45667/#comment199060>

    To be reverted



test/org/apache/pig/test/TestGrunt.java (line 937)
<https://reviews.apache.org/r/45667/#comment199061>

    Assert.assumeTrue



test/org/apache/pig/test/TestLimitVariable.java (line 93)
<https://reviews.apache.org/r/45667/#comment199062>

    Please revert the type change to int. This testcase is testing bytearray casting.



test/org/apache/pig/test/TestLimitVariable.java (line 117)
<https://reviews.apache.org/r/45667/#comment199063>

    Please revert the type change to int. This testcase is testing bytearray casting.



test/org/apache/pig/test/TestMapSideCogroup.java (line 339)
<https://reviews.apache.org/r/45667/#comment199066>

    Why is output ordering different with merge join? The assumption with merge join is that files are sorted and join is done based on that. So ordering should be same.



test/org/apache/pig/test/TestMergeJoin.java (line 641)
<https://reviews.apache.org/r/45667/#comment199064>

    Assume.assumeTrue



test/org/apache/pig/test/TestMergeJoin.java (line 690)
<https://reviews.apache.org/r/45667/#comment199065>

    Assume.assumeTrue



test/org/apache/pig/test/TestMergeJoinOuter.java (line 173)
<https://reviews.apache.org/r/45667/#comment199068>

    Assume.assumeTrue for all tests in this class



test/org/apache/pig/test/TestMultiQuery.java (line 115)
<https://reviews.apache.org/r/45667/#comment199070>

    Why are we using checkQueryOutputsAfterSortRecursive in many places when checkQueryOutputsAfterSort would do? It will unnecessarily increase the test execution time. Can they all be changed? Or am I missing something and checkQueryOutputsAfterSort cannot be used for some reason?



test/org/apache/pig/test/TestPigRunner.java (lines 333 - 334)
<https://reviews.apache.org/r/45667/#comment199074>

    1 job for 1 POStore can impact performance. It is not exactly multi-query. Is there a jira to optimize this better and use 1 job?



test/org/apache/pig/test/TestPigRunner.java (line 1155)
<https://reviews.apache.org/r/45667/#comment199075>

    Is there a jira to fix the number of records?



test/org/apache/pig/test/TestPigServerLocal.java (line 265)
<https://reviews.apache.org/r/45667/#comment199076>

    Can you add _testParseBatchWithScripting as well?



test/org/apache/pig/test/TestSecondarySort.java (line 364)
<https://reviews.apache.org/r/45667/#comment199052>

    It is not right to check output after sorting here. This is an order by query. Output should already be in the expected order.



test/org/apache/pig/test/TestSecondarySort.java (line 512)
<https://reviews.apache.org/r/45667/#comment199051>

    Use Assume.assumeTrue



test/org/apache/pig/test/TestSkewedJoin.java (line 290)
<https://reviews.apache.org/r/45667/#comment199050>

    Use Assume.assumeTrue



test/org/apache/pig/test/TestStoreBase.java (line 155)
<https://reviews.apache.org/r/45667/#comment199047>

    Second store should not succeed.



test/org/apache/pig/test/TestStoreBase.java (line 165)
<https://reviews.apache.org/r/45667/#comment199049>

    Setup job file of second one should also be there. Same with setup task as well. They are supposed to happen before putNext of both the stores is called.


- Rohini Palaniswamy


On April 4, 2016, 5:19 a.m., Pallavi Rao wrote:
> 
> -----------------------------------------------------------
> This is an automatically generated e-mail. To reply, visit:
> https://reviews.apache.org/r/45667/
> -----------------------------------------------------------
> 
> (Updated April 4, 2016, 5:19 a.m.)
> 
> 
> Review request for pig, Daniel Dai and Rohini Palaniswamy.
> 
> 
> Bugs: PIG-4059 and PIG-4854
>     https://issues.apache.org/jira/browse/PIG-4059
>     https://issues.apache.org/jira/browse/PIG-4854
> 
> 
> Repository: pig-git
> 
> 
> Description
> -------
> 
> The patch contains all the work done in the spark branch, so far.
> 
> 
> Diffs
> -----
> 
>   bin/pig 81f1426 
>   build.xml 8db1a80 
>   ivy.xml dd9878e 
>   ivy/libraries.properties 55d9aed 
>   shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
>   src/META-INF/services/org.apache.pig.ExecType 5c034c8 
>   src/docs/src/documentation/content/xdocs/start.xml 36f9952 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java 894cda7 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 6adfa91 
>   src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
>   src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
>   src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
>   src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
>   src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
>   src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
>   src/org/apache/pig/impl/PigContext.java d43949f 
>   src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
>   src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
>   src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
>   test/e2e/pig/build.xml f7c38ba 
>   test/e2e/pig/conf/spark.conf PRE-CREATION 
>   test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
>   test/e2e/pig/tests/streaming.conf 18f2fb2 
>   test/excluded-tests-spark PRE-CREATION 
>   test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
>   test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
>   test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
>   test/org/apache/pig/test/MiniGenericCluster.java 9347269 
>   test/org/apache/pig/test/TestAssert.java 6d4b5c6 
>   test/org/apache/pig/test/TestBuiltin.java 44b4d09 
>   test/org/apache/pig/test/TestCase.java c9bb2fa 
>   test/org/apache/pig/test/TestCollectedGroup.java a958d33 
>   test/org/apache/pig/test/TestCombiner.java df44293 
>   test/org/apache/pig/test/TestCubeOperator.java de96e6c 
>   test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
>   test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
>   test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
>   test/org/apache/pig/test/TestFinish.java f18c103 
>   test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
>   test/org/apache/pig/test/TestGrunt.java ef121a3 
>   test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
>   test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
>   test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
>   test/org/apache/pig/test/TestMergeJoin.java f1a9608 
>   test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
>   test/org/apache/pig/test/TestMultiQuery.java 40684b4 
>   test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
>   test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
>   test/org/apache/pig/test/TestNullConstant.java 3ea4509 
>   test/org/apache/pig/test/TestPigRunner.java fde8609 
>   test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
>   test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
>   test/org/apache/pig/test/TestPruneColumn.java 3936332 
>   test/org/apache/pig/test/TestRank1.java 9e4ef62 
>   test/org/apache/pig/test/TestRank2.java fc802a9 
>   test/org/apache/pig/test/TestRank3.java 43af10d 
>   test/org/apache/pig/test/TestSecondarySort.java 8991010 
>   test/org/apache/pig/test/TestSkewedJoin.java dba2241 
>   test/org/apache/pig/test/TestStoreBase.java eb3b253 
>   test/org/apache/pig/test/Util.java 8dae247 
>   test/spark-tests PRE-CREATION 
> 
> Diff: https://reviews.apache.org/r/45667/diff/
> 
> 
> Testing
> -------
> 
> New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/
> 
> 
> Thanks,
> 
> Pallavi Rao
> 
>


Re: Review Request 45667: Support Pig On Spark

Posted by Pallavi Rao <pa...@inmobi.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/
-----------------------------------------------------------

(Updated Oct. 27, 2016, 3:17 a.m.)


Review request for pig, Daniel Dai and Rohini Palaniswamy.


Bugs: PIG-4059 and PIG-4854
    https://issues.apache.org/jira/browse/PIG-4059
    https://issues.apache.org/jira/browse/PIG-4854


Repository: pig-git


Description
-------

The patch contains all the work done in the spark branch, so far.


Diffs (updated)
-----

  bin/pig 81f1426 
  build.xml 99ba1f4 
  ivy.xml dd9878e 
  ivy/libraries.properties 3a819a5 
  shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java 792a1bd 
  shims/test/hadoop23/org/apache/pig/test/YarnMiniCluster.java PRE-CREATION 
  src/META-INF/services/org.apache.pig.ExecType 5c034c8 
  src/docs/src/documentation/content/xdocs/start.xml 36f9952 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java d80951a 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 13f70c0 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkEngineConf.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
  src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
  src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
  src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
  src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
  src/org/apache/pig/impl/util/UDFContext.java 09afc0a 
  src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
  src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
  test/e2e/pig/build.xml f7c38ba 
  test/e2e/pig/conf/spark.conf PRE-CREATION 
  test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
  test/e2e/pig/tests/streaming.conf 18f2fb2 
  test/excluded-tests-spark PRE-CREATION 
  test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
  test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
  test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
  test/org/apache/pig/test/MiniGenericCluster.java 9347269 
  test/org/apache/pig/test/TestAssert.java 6d4b5c6 
  test/org/apache/pig/test/TestBuiltin.java fbc3f1e 
  test/org/apache/pig/test/TestCase.java c9bb2fa 
  test/org/apache/pig/test/TestCollectedGroup.java a958d33 
  test/org/apache/pig/test/TestCombiner.java df44293 
  test/org/apache/pig/test/TestCubeOperator.java de96e6c 
  test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
  test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
  test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
  test/org/apache/pig/test/TestFinish.java f18c103 
  test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
  test/org/apache/pig/test/TestGrunt.java 9eaf298 
  test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
  test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
  test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
  test/org/apache/pig/test/TestMergeJoin.java f1a9608 
  test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
  test/org/apache/pig/test/TestMultiQuery.java c32eab7 
  test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
  test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
  test/org/apache/pig/test/TestNullConstant.java 3ea4509 
  test/org/apache/pig/test/TestPigRunner.java fde8609 
  test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
  test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
  test/org/apache/pig/test/TestPruneColumn.java 3936332 
  test/org/apache/pig/test/TestRank1.java 9e4ef62 
  test/org/apache/pig/test/TestRank2.java fc802a9 
  test/org/apache/pig/test/TestRank3.java 43af10d 
  test/org/apache/pig/test/TestSecondarySort.java 8991010 
  test/org/apache/pig/test/TestSkewedJoin.java dba2241 
  test/org/apache/pig/test/TestStoreBase.java eb3b253 
  test/org/apache/pig/test/Util.java 36d01e8 
  test/spark-tests PRE-CREATION 

Diff: https://reviews.apache.org/r/45667/diff/


Testing
-------

New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/


Thanks,

Pallavi Rao


Re: Review Request 45667: Support Pig On Spark

Posted by Pallavi Rao <pa...@inmobi.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/
-----------------------------------------------------------

(Updated July 11, 2016, 4:32 a.m.)


Review request for pig, Daniel Dai and Rohini Palaniswamy.


Changes
-------

Addressed some more review comments


Bugs: PIG-4059 and PIG-4854
    https://issues.apache.org/jira/browse/PIG-4059
    https://issues.apache.org/jira/browse/PIG-4854


Repository: pig-git


Description
-------

The patch contains all the work done in the spark branch, so far.


Diffs (updated)
-----

  bin/pig 81f1426 
  build.xml 99ba1f4 
  ivy.xml dd9878e 
  ivy/libraries.properties 3a819a5 
  shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java 792a1bd 
  shims/test/hadoop23/org/apache/pig/test/YarnMiniCluster.java PRE-CREATION 
  src/META-INF/services/org.apache.pig.ExecType 5c034c8 
  src/docs/src/documentation/content/xdocs/start.xml 36f9952 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java d80951a 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 13f70c0 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
  src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
  src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
  src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
  src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
  src/org/apache/pig/impl/PigContext.java d43949f 
  src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
  src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
  src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
  test/e2e/pig/build.xml f7c38ba 
  test/e2e/pig/conf/spark.conf PRE-CREATION 
  test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
  test/e2e/pig/tests/streaming.conf 18f2fb2 
  test/excluded-tests-spark PRE-CREATION 
  test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
  test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
  test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
  test/org/apache/pig/test/MiniGenericCluster.java 9347269 
  test/org/apache/pig/test/TestAssert.java 6d4b5c6 
  test/org/apache/pig/test/TestBuiltin.java fbc3f1e 
  test/org/apache/pig/test/TestCase.java c9bb2fa 
  test/org/apache/pig/test/TestCollectedGroup.java a958d33 
  test/org/apache/pig/test/TestCombiner.java df44293 
  test/org/apache/pig/test/TestCubeOperator.java de96e6c 
  test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
  test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
  test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
  test/org/apache/pig/test/TestFinish.java f18c103 
  test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
  test/org/apache/pig/test/TestGrunt.java 9eaf298 
  test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
  test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
  test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
  test/org/apache/pig/test/TestMergeJoin.java f1a9608 
  test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
  test/org/apache/pig/test/TestMultiQuery.java c32eab7 
  test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
  test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
  test/org/apache/pig/test/TestNullConstant.java 3ea4509 
  test/org/apache/pig/test/TestPigRunner.java fde8609 
  test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
  test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
  test/org/apache/pig/test/TestPruneColumn.java 3936332 
  test/org/apache/pig/test/TestRank1.java 9e4ef62 
  test/org/apache/pig/test/TestRank2.java fc802a9 
  test/org/apache/pig/test/TestRank3.java 43af10d 
  test/org/apache/pig/test/TestSecondarySort.java 8991010 
  test/org/apache/pig/test/TestSkewedJoin.java dba2241 
  test/org/apache/pig/test/TestStoreBase.java eb3b253 
  test/org/apache/pig/test/Util.java 36d01e8 
  test/spark-tests PRE-CREATION 

Diff: https://reviews.apache.org/r/45667/diff/


Testing
-------

New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/


Thanks,

Pallavi Rao


Re: Review Request 45667: Support Pig On Spark

Posted by Pallavi Rao <pa...@inmobi.com>.
-----------------------------------------------------------
This is an automatically generated e-mail. To reply, visit:
https://reviews.apache.org/r/45667/
-----------------------------------------------------------

(Updated June 14, 2016, 4:39 a.m.)


Review request for pig, Daniel Dai and Rohini Palaniswamy.


Changes
-------

Uploading patch on behalf of Kelly : The new patch has ~70% of the review comments addressed.


Bugs: PIG-4059 and PIG-4854
    https://issues.apache.org/jira/browse/PIG-4059
    https://issues.apache.org/jira/browse/PIG-4854


Repository: pig-git


Description
-------

The patch contains all the work done in the spark branch, so far.


Diffs (updated)
-----

  bin/pig 81f1426 
  build.xml 99ba1f4 
  ivy.xml dd9878e 
  ivy/libraries.properties 3a819a5 
  shims/test/hadoop20/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/SparkMiniCluster.java PRE-CREATION 
  shims/test/hadoop23/org/apache/pig/test/TezMiniCluster.java 792a1bd 
  shims/test/hadoop23/org/apache/pig/test/YarnMiniCluster.java PRE-CREATION 
  src/META-INF/services/org.apache.pig.ExecType 5c034c8 
  src/docs/src/documentation/content/xdocs/start.xml 36f9952 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java 1ff1abd 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java ecf780c 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/plans/PhysicalPlan.java 2376d03 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POCollectedGroup.java bcbfe2b 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POFRJoin.java d80951a 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POForEach.java 21b75f1 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POGlobalRearrange.java 52cfb73 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMergeJoin.java 13f70c0 
  src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java c3a82c3 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobGraphBuilder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/JobMetricsListener.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/KryoSerializer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/MapReducePartitionerWrapper.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkExecutionEngine.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLauncher.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkLocalExecType.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/SparkUtil.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/UDFJarsFinder.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CollectedGroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/CounterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/DistinctConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FRJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/FilterConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ForEachConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/GlobalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IndexedKey.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/IteratorTransform.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LimitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LoadConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/LocalRearrangeConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeCogroupConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/MergeJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/OutputConsumerIterator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PackageConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/PigSecondaryKeyComparatorSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RDDConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/RankConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/ReduceByConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SkewedJoinConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SortConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/SplitConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StoreConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/StreamConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/converter/UnionConverter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/NativeSparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POGlobalRearrangeSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/operator/POReduceBySpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/AccumulatorOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/CombinerOptimizer.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/MultiQueryOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/NoopFilterRemover.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/ParallelismSetter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/optimizer/SecondaryKeyOptimizerSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/DotSparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompiler.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkCompilerException.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOpPlanVisitor.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperPlan.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkOperator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPOPackageAnnotator.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/plan/SparkPrinter.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/spark/running/PigInputFormatSpark.java PRE-CREATION 
  src/org/apache/pig/backend/hadoop/executionengine/util/AccumulatorOptimizerUtil.java c4b44ad 
  src/org/apache/pig/backend/hadoop/executionengine/util/CombinerOptimizerUtil.java 889c01b 
  src/org/apache/pig/backend/hadoop/executionengine/util/SecondaryKeyOptimizerUtil.java 0b59c9c 
  src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java e0581d9 
  src/org/apache/pig/data/SelfSpillBag.java d17f0a8 
  src/org/apache/pig/impl/PigContext.java d43949f 
  src/org/apache/pig/impl/plan/OperatorPlan.java 8b2e2e7 
  src/org/apache/pig/tools/pigstats/PigStatsUtil.java 542cc2e 
  src/org/apache/pig/tools/pigstats/spark/SparkCounter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounterGroup.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkCounters.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkJobStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStats.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkPigStatusReporter.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkScriptState.java PRE-CREATION 
  src/org/apache/pig/tools/pigstats/spark/SparkStatsUtil.java PRE-CREATION 
  test/e2e/pig/build.xml f7c38ba 
  test/e2e/pig/conf/spark.conf PRE-CREATION 
  test/e2e/pig/drivers/TestDriverPig.pm bf9c302 
  test/e2e/pig/tests/streaming.conf 18f2fb2 
  test/excluded-tests-spark PRE-CREATION 
  test/org/apache/pig/newplan/logical/relational/TestLocationInPhysicalPlan.java 94b34b3 
  test/org/apache/pig/spark/TestIndexedKey.java PRE-CREATION 
  test/org/apache/pig/spark/TestSecondarySortSpark.java PRE-CREATION 
  test/org/apache/pig/test/MiniGenericCluster.java 9347269 
  test/org/apache/pig/test/TestAssert.java 6d4b5c6 
  test/org/apache/pig/test/TestBuiltin.java fbc3f1e 
  test/org/apache/pig/test/TestCase.java c9bb2fa 
  test/org/apache/pig/test/TestCollectedGroup.java a958d33 
  test/org/apache/pig/test/TestCombiner.java df44293 
  test/org/apache/pig/test/TestCubeOperator.java de96e6c 
  test/org/apache/pig/test/TestEvalPipeline.java 9efde13 
  test/org/apache/pig/test/TestEvalPipeline2.java c8f51d7 
  test/org/apache/pig/test/TestEvalPipelineLocal.java c12d595 
  test/org/apache/pig/test/TestFinish.java f18c103 
  test/org/apache/pig/test/TestForEachNestedPlanLocal.java b0aa3a8 
  test/org/apache/pig/test/TestGrunt.java 9eaf298 
  test/org/apache/pig/test/TestHBaseStorage.java 8d2ad85 
  test/org/apache/pig/test/TestLimitVariable.java 53b9dae 
  test/org/apache/pig/test/TestMapSideCogroup.java 2c78b4a 
  test/org/apache/pig/test/TestMergeJoin.java f1a9608 
  test/org/apache/pig/test/TestMergeJoinOuter.java 81aee55 
  test/org/apache/pig/test/TestMultiQuery.java c32eab7 
  test/org/apache/pig/test/TestMultiQueryLocal.java b9ac035 
  test/org/apache/pig/test/TestNativeMapReduce.java c4f6573 
  test/org/apache/pig/test/TestNullConstant.java 3ea4509 
  test/org/apache/pig/test/TestPigRunner.java fde8609 
  test/org/apache/pig/test/TestPigServerLocal.java fbabd03 
  test/org/apache/pig/test/TestProjectRange.java 2e3e7b8 
  test/org/apache/pig/test/TestPruneColumn.java 3936332 
  test/org/apache/pig/test/TestRank1.java 9e4ef62 
  test/org/apache/pig/test/TestRank2.java fc802a9 
  test/org/apache/pig/test/TestRank3.java 43af10d 
  test/org/apache/pig/test/TestSecondarySort.java 8991010 
  test/org/apache/pig/test/TestSkewedJoin.java dba2241 
  test/org/apache/pig/test/TestStoreBase.java eb3b253 
  test/org/apache/pig/test/Util.java 36d01e8 
  test/spark-tests PRE-CREATION 

Diff: https://reviews.apache.org/r/45667/diff/


Testing
-------

New UTs were added where required and ensure old UTs pass -> https://builds.apache.org/job/Pig-spark/


Thanks,

Pallavi Rao