You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by zl...@apache.org on 2017/02/22 09:43:46 UTC

svn commit: r1783988 [4/24] - in /pig/branches/spark: ./ bin/ conf/ contrib/piggybank/java/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/ contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelo...

Modified: pig/branches/spark/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/bin/pig (original)
+++ pig/branches/spark/bin/pig Wed Feb 22 09:43:41 2017
@@ -330,7 +330,8 @@ HADOOP_CORE_JAR=`echo ${HADOOP_HOME}/had
 if [ -z "$HADOOP_CORE_JAR" ]; then
     HADOOP_VERSION=2
 else
-    HADOOP_VERSION=1
+    echo "Pig requires Hadoop 2 to be present in HADOOP_HOME (currently: $HADOOP_HOME). Please install Hadoop 2.x"
+    exit 1
 fi
 
 # if using HBase, likely want to include HBase jars and config
@@ -439,11 +440,7 @@ if [ -n "$HADOOP_BIN" ]; then
     if [ -n "$PIG_JAR" ]; then
         CLASSPATH=${CLASSPATH}:$PIG_JAR
     else
-        if [ "$HADOOP_VERSION" == "1" ]; then
-            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again"
-        else
-            echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant -Dhadoopversion=23 jar', and try again"
-        fi
+        echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again"
         exit 1
     fi
 
@@ -464,8 +461,8 @@ if [ -n "$HADOOP_BIN" ]; then
         exec "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
     fi
 else
-    # use hadoop-core.jar to run local mode
-    PIG_JAR=`echo $PIG_HOME/pig*-core-h1.jar`
+    # use bundled hadoop to run local mode
+    PIG_JAR=`echo $PIG_HOME/pig*-core-h2.jar`
 
     if [ -n "$PIG_JAR" ]; then
         CLASSPATH="${CLASSPATH}:$PIG_JAR"
@@ -474,12 +471,12 @@ else
         exit 1
     fi
 
-    for f in $PIG_HOME/lib/h1/*.jar; do
+    for f in $PIG_HOME/lib/h2/*.jar; do
         CLASSPATH=${CLASSPATH}:$f;
     done
 
-    # Add bundled hadoop-core.jar
-    for f in $PIG_HOME/lib/hadoop1-runtime/*.jar; do
+    # Add bundled hadoop jars
+    for f in $PIG_HOME/lib/hadoop2-runtime/*.jar; do
         CLASSPATH=${CLASSPATH}:$f;
     done
 

Modified: pig/branches/spark/bin/pig.py
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig.py?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/bin/pig.py (original)
+++ pig/branches/spark/bin/pig.py Wed Feb 22 09:43:41 2017
@@ -338,7 +338,7 @@ hadoopCoreJars = glob.glob(os.path.join(
 if len(hadoopCoreJars) == 0:
   hadoopVersion = 2
 else:
-  hadoopVersion = 1
+  sys.exit("Cannot locate Hadoop 2 binaries, please install Hadoop 2.x and try again.")
 
 if hadoopBin != "":
   if debug == True:
@@ -361,10 +361,7 @@ if hadoopBin != "":
       if len(pigJars) == 1:
         pigJar = pigJars[0]
       else:
-        if hadoopVersion == 1:
-          sys.exit("Cannot locate pig-core-h1.jar do 'ant jar', and try again")
-        else:
-          sys.exit("Cannot locate pig-core-h2.jar do 'ant -Dhadoopversion=23 jar', and try again")
+        sys.exit("Cannot locate pig-core-h2.jar do 'ant jar', and try again")
 
   pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h" + str(hadoopVersion), "*.jar"))
   for jar in pigLibJars:
@@ -393,13 +390,13 @@ if hadoopBin != "":
 else:
   # fall back to use fat pig.jar
   if debug == True:
-    print "Cannot find local hadoop installation, using bundled hadoop 1"
-    
-  if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")):
-    pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")
+    print "Cannot find local hadoop installation, using bundled hadoop 2"
+
+  if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")):
+    pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")
 
   else:
-    pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h1.jar"))
+    pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h2.jar"))
 
     if len(pigJars) == 1:
       pigJar = pigJars[0]
@@ -407,15 +404,15 @@ else:
     elif len(pigJars) > 1:
       print "Ambiguity with pig jars found the following jars"
       print pigJars
-      sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar"))
+      sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar"))
     else:
-      sys.exit("Cannot locate pig-core-h1.jar. do 'ant jar' and try again")
+      sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again")
 
-  pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h1", "*.jar"))
+  pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h2", "*.jar"))
   for jar in pigLibJars:
     classpath += os.pathsep + jar
 
-  pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop1-runtime", "*.jar"))
+  pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop2-runtime", "*.jar"))
   for jar in pigLibJars:
     classpath += os.pathsep + jar
 
@@ -423,7 +420,7 @@ else:
   pigClass = "org.apache.pig.Main"
   if debug == True:
     print "dry runXXX:"
-    print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs)) 
+    print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs))
   else:
     cmdLine = java + ' ' + javaHeapMax + ' ' + pigOpts
     cmdLine += ' ' + '-classpath ' + classpath + ' ' + pigClass +  ' ' + ' '.join(restArgs)

Modified: pig/branches/spark/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/spark/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/build.xml (original)
+++ pig/branches/spark/build.xml Wed Feb 22 09:43:41 2017
@@ -20,6 +20,13 @@
          xmlns:ivy="antlib:org.apache.ivy.ant">
     <!-- Load all the default properties, and any the user wants    -->
     <!-- to contribute (without having to type -D or edit this file -->
+
+    <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+        <classpath>
+            <pathelement location="${basedir}/ivy/ant-contrib-1.0b3.jar"/>
+        </classpath>
+    </taskdef>
+
     <property file="${user.home}/build.properties" />
     <property file="${basedir}/build.properties" />
 
@@ -31,11 +38,11 @@
     <property name="pigsmoke.pom" value="${basedir}/ivy/pigsmoke.pom" />
     <property name="pigunit.pom" value="${basedir}/ivy/pigunit.pom" />
     <property name="piggybank.pom" value="${basedir}/ivy/piggybank.pom" />
-    <property name="pig.version" value="0.16.0" />
+    <property name="pig.version" value="0.17.0" />
     <property name="pig.version.suffix" value="-SNAPSHOT" />
     <property name="version" value="${pig.version}${pig.version.suffix}" />
     <property name="final.name" value="${name}-${version}" />
-    <property name="year" value="2007-2012" />
+    <property name="year" value="2007-2016" />
 
     <!-- source properties -->
     <property name="lib.dir" value="${basedir}/lib" />
@@ -70,7 +77,6 @@
 
     <!-- artifact jar file names -->
     <property name="artifact.pig.jar" value="${final.name}.jar"/>
-    <property name="artifact.pig-h1.jar" value="${final.name}-h1.jar"/>
     <property name="artifact.pig-h2.jar" value="${final.name}-h2.jar"/>
     <property name="artifact.pig-sources.jar" value="${final.name}-sources.jar"/>
     <property name="artifact.pig-javadoc.jar" value="${final.name}-javadoc.jar"/>
@@ -78,15 +84,12 @@
 
     <!-- jar names. TODO we might want to use the svn reversion name in the name in case it is a dev version -->
     <property name="output.jarfile.withouthadoop" value="${build.dir}/${final.name}-withouthadoop.jar" />
-    <property name="output.jarfile.withouthadoop-h1" value="${legacy.dir}/${final.name}-withouthadoop-h1.jar" />
     <property name="output.jarfile.withouthadoop-h2" value="${legacy.dir}/${final.name}-withouthadoop-h2.jar" />
     <property name="output.jarfile.core" value="${build.dir}/${artifact.pig.jar}" />
-    <property name="output.jarfile.core-h1" value="${build.dir}/${artifact.pig-h1.jar}" />
     <property name="output.jarfile.core-h2" value="${build.dir}/${artifact.pig-h2.jar}" />
     <property name="output.jarfile.sources" value="${build.dir}/${artifact.pig-sources.jar}" />
     <property name="output.jarfile.javadoc" value="${build.dir}/${artifact.pig-javadoc.jar}" />
     <!-- Maintain old pig.jar in top level directory. -->
-    <property name="output.jarfile.backcompat-core-h1" value="${basedir}/${final.name}-core-h1.jar" />
     <property name="output.jarfile.backcompat-core-h2" value="${basedir}/${final.name}-core-h2.jar" />
 
     <!-- test properties -->
@@ -107,8 +110,6 @@
     <property name="test.spark.file" value="${test.src.dir}/spark-tests"/>
     <property name="test.spark_local.file" value="${test.src.dir}/spark-local-tests"/>
     <property name="test.exclude.file" value="${test.src.dir}/excluded-tests"/>
-    <property name="test.exclude.file.20" value="${test.src.dir}/excluded-tests-20"/>
-    <property name="test.exclude.file.23" value="${test.src.dir}/excluded-tests-23"/>
     <property name="test.exclude.file.mr" value="${test.src.dir}/excluded-tests-mr"/>
     <property name="test.exclude.file.tez" value="${test.src.dir}/excluded-tests-tez"/>
     <property name="test.exclude.file.spark" value="${test.src.dir}/excluded-tests-spark"/>
@@ -155,9 +156,8 @@
 	
     <target name="setTezEnv">
         <propertyreset name="test.timeout" value="900000" />
-        <propertyreset name="hadoopversion" value="23" />
-        <propertyreset name="isHadoop23" value="true" />
-        <propertyreset name="hbase.hadoop.version" value="hadoop2" />
+        <propertyreset name="hadoopversion" value="2" />
+        <propertyreset name="isHadoop2" value="true" />
         <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
         <propertyreset name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
         <propertyreset name="src.exclude.dir" value="" />
@@ -209,40 +209,42 @@
     <property name="loglevel" value="quiet" />
     <loadproperties srcfile="${ivy.dir}/libraries.properties"/>
 
-    <property name="hadoopversion" value="20" />
 
-    <condition property="isHadoop23">
+    <!--
+      Hadoop master version
+      (Value 23 is translated for backward compatibility in old build scripts)
+    -->
+    <if>
         <equals arg1="${hadoopversion}" arg2="23"/>
-    </condition>
+        <then>
+            <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo>
+            <var name="hadoopversion" unset="true"/>
+            <property name="hadoopversion" value="2" />
+        </then>
+    </if>
+    <property name="hadoopversion" value="2" />
 
-    <condition property="hbase.hadoop.version" value="hadoop1" else="hadoop2">
-        <not>
-            <equals arg1="${hadoopversion}" arg2="23"/>
-        </not>
+    <condition property="isHadoop2">
+        <equals arg1="${hadoopversion}" arg2="2"/>
     </condition>
 
     <!--
       HBase master version
-      Denotes how the HBase dependencies are layout. Value "94" denotes older
-      format where all HBase code is present in one single jar, which is the
-      way HBase is available up to version 0.94. Value "95" denotes new format
-      where HBase is cut into multiple dependencies per each major subsystem,
-      e.g. "client", "server", ... . Only values "94" and "95" are supported
-      at the moment.
+      (Value 95 is translated for backward compatibility in old build scripts)
     -->
-    <property name="hbaseversion" value="95" />
-
-    <!-- exclude tez code if not hadoop20 -->
-    <condition property="src.exclude.dir" value="**/tez/**" else="">
-        <not>
-            <equals arg1="${hadoopversion}" arg2="23"/>
-        </not>
-    </condition>
+    <if>
+        <equals arg1="${hbaseversion}" arg2="95"/>
+        <then>
+            <echo>Property setting hbaseversion=95 is deprecated. Overwriting to hbaseversion=1</echo>
+            <var name="hbaseversion" unset="true"/>
+            <property name="hbaseversion" value="1" />
+        </then>
+    </if>
+    <property name="hbaseversion" value="1" />
 
     <property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
     <property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
 
-    <property name="hadoop.jar" value="hadoop-core-${hadoop-core.version}.jar" />
     <property name="asfrepo" value="https://repository.apache.org"/>
     <property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
     <property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
@@ -284,7 +286,7 @@
     <property name="xerces.jar" value="${ivy.lib.dir}/xercesImpl-${xerces.version}.jar"/>
     <property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
     <property name="jdiff.xml.dir" value="${docs.dir}/jdiff"/>
-    <property name="jdiff.stable" value="0.15.0"/>
+    <property name="jdiff.stable" value="0.16.0"/>
     <property name="jdiff.stable.javadoc" value="http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/>
 
     <!-- Packaging properties -->
@@ -392,12 +394,6 @@
             <include name="joda-time-${joda-time.version}.jar"/>
             <include name="automaton-${automaton.version}.jar"/>
             <include name="jansi-${jansi.version}.jar"/>
-            <include name="jackson-mapper-asl-${jackson.version}.jar" unless="isHadoop23"/>
-            <include name="jackson-core-asl-${jackson.version}.jar" unless="isHadoop23"/>
-            <include name="guava-${guava.version}.jar" unless="isHadoop23"/>
-            <include name="snappy-java-${snappy.version}.jar" unless="isHadoop23"/>
-            <include name="asm-${asm.version}.jar" unless="isHadoop23"/>
-
             <include name="scala*.jar"/>
             <include name="akka*.jar"/>
             <include name="jcl-over-slf4j*.jar"/>
@@ -574,6 +570,7 @@
         <echo>*** Building Main Sources ***</echo>
         <echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
         <echo>*** Else, you will only be warned about deprecations ***</echo>
+        <echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ***</echo>
         <compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir}"
             excludes="${src.exclude.dir}" dist="${build.classes}" cp="classpath" warnings="${javac.args.warnings}" />
         <copy todir="${build.classes}/META-INF">
@@ -703,23 +700,6 @@
     </target>
 
     <!-- ================================================================== -->
-    <!-- Facede to build pig.jar for both Hadoop 1 and Hadoop 2             -->
-    <!-- ================================================================== -->
-    <target name="jar-h12" description="Create pig for both Hadoop 1 and Hadoop 2">
-        <propertyreset name="hadoopversion" value="20" />
-        <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
-        <antcall target="clean" inheritRefs="true" inheritall="true"/>
-        <antcall target="jar" inheritRefs="true" inheritall="true"/>
-        <antcall target="copyHadoop1LocalRuntimeDependencies"/>
-        <delete dir="${build.dir}" />
-        <propertyreset name="hadoopversion" value="23" />
-        <propertyreset name="hbase.hadoop.version" value="hadoop2" />
-        <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
-        <propertyreset name="src.exclude.dir" value="" />
-        <antcall target="jar" inheritRefs="true" inheritall="true"/>
-    </target>
-
-    <!-- ================================================================== -->
     <!-- Make pig.jar                                                       -->
     <!-- ================================================================== -->
     <target name="jar" depends="compile,ivy-buildJar" description="Create pig core jar">
@@ -727,8 +707,8 @@
         <buildJar svnString="${svn.revision}" outputFile="${output.jarfile.withouthadoop}" includedJars="runtime.dependencies-withouthadoop.jar"/>
         <antcall target="copyCommonDependencies"/>
         <antcall target="copySparkDependencies"/>
-        <antcall target="copyh1Dependencies"/>
         <antcall target="copyh2Dependencies"/>
+        <antcall target="copyHadoop2LocalRuntimeDependencies" />
     </target>
 
     <target name="copyCommonDependencies">
@@ -752,7 +732,9 @@
             <fileset dir="${ivy.lib.dir}" includes="jruby-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="groovy-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="js-*.jar"/>
-            <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="htrace-core*incubating.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="metrics-core-*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop2*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="hive-*.jar" excludes="hive-shims-0.*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="protobuf-java-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="zookeeper-*.jar"/>
@@ -772,24 +754,12 @@
         </copy>
     </target>
 
-    <target name="copyh1Dependencies" unless="isHadoop23">
-        <mkdir dir="${lib.dir}/h1" />
-        <copy todir="${lib.dir}/h1">
-            <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/>
-            <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/>
-            <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop1.jar"/>
-        </copy>
-        <copy file="${output.jarfile.core}" tofile="${output.jarfile.backcompat-core-h1}"/>
-        <mkdir dir="${legacy.dir}" />
-        <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h1}"/>
-    </target>
-
-    <target name="copyh2Dependencies" if="isHadoop23">
+    <target name="copyh2Dependencies" if="isHadoop2">
         <mkdir dir="${lib.dir}/h2" />
         <copy todir="${lib.dir}/h2">
             <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/>
-            <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop2.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="hbase-hadoop2*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="tez-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-collections4-*.jar"/>
         </copy>
@@ -798,18 +768,21 @@
         <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h2}"/>
     </target>
 
-    <target name="copyHadoop1LocalRuntimeDependencies">
-        <mkdir dir="${lib.dir}/hadoop1-runtime" />
-        <copy todir="${lib.dir}/hadoop1-runtime">
-            <fileset dir="${ivy.lib.dir}" includes="hadoop-core-*.jar"/>
+    <target name="copyHadoop2LocalRuntimeDependencies">
+        <mkdir dir="${lib.dir}/hadoop2-runtime" />
+        <copy todir="${lib.dir}/hadoop2-runtime">
+            <fileset dir="${ivy.lib.dir}" includes="hadoop-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-cli-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-configuration-*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="commons-collections-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-lang-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-codec-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-io-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="commons-logging-*.jar"/>
-            <fileset dir="${ivy.lib.dir}" includes="commons-httpclient-*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="httpclient-*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="httpcore-*.jar"/>
             <fileset dir="${ivy.lib.dir}" includes="log4j-*.jar"/>
+            <fileset dir="${ivy.lib.dir}" includes="slf4j-*.jar"/>
         </copy>
     </target>
 
@@ -955,6 +928,9 @@
             <sysproperty key="test.exec.type" value="${test.exec.type}" />
             <sysproperty key="ssh.gateway" value="${ssh.gateway}" />
             <sysproperty key="hod.server" value="${hod.server}" />
+            <sysproperty key="build.classes" value="${build.classes}" />
+            <sysproperty key="test.build.classes" value="${test.build.classes}" />
+            <sysproperty key="ivy.lib.dir" value="${ivy.lib.dir}" />
             <sysproperty key="java.io.tmpdir" value="${junit.tmp.dir}" />
             <sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>
             <jvmarg line="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=128M ${debugArgs} -Djava.library.path=${hadoop.root}\bin"/>
@@ -980,8 +956,6 @@
                     <patternset>
                        <includesfile name="@{test.file}"/>
                        <excludesfile name="${test.exclude.file}" if="test.exclude.file"/>
-                       <excludesfile name="${test.exclude.file.20}" unless="isHadoop23"/>
-                       <excludesfile name="${test.exclude.file.23}" if="isHadoop23"/>
                        <excludesfile name="${test.exclude.file.for.exectype}"/>
                     </patternset>
                     <exclude name="**/${exclude.testcase}.java" if="exclude.testcase" />
@@ -990,7 +964,9 @@
                 </fileset>
             </batchtest>
             <batchtest fork="yes" todir="${test.log.dir}" if="testcase">
-                <fileset dir="test" includes="**/${testcase}.java"/>
+                <fileset dir="test" includes="**/${testcase}.java">
+                    <exclude name="e2e/**/*.java"/>
+                </fileset>
             </batchtest>
 
             <assertions>
@@ -1008,10 +984,10 @@
 
     <target name="test-core-mrtez" description="run core tests on both mr and tez mode"
             depends="setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download">
-        <fail message="hadoopversion must be set to 23 when invoking test-core-mrtez">
+        <fail message="hadoopversion must be set to 2 when invoking test-core-mrtez">
           <condition>
             <not>
-              <equals arg1="${hadoopversion}" arg2="23" />
+              <equals arg1="${hadoopversion}" arg2="2" />
             </not>
           </condition>
         </fail>
@@ -1021,6 +997,9 @@
         <propertyreset name="test.exec.type" value="mr" />
         <propertyreset name="test.log.dir" value="${test.build.dir}/logs/${test.exec.type}" />
         <macro-test-runner test.file="${test.all.file}" tests.failed="test.mr.failed"/>
+        <delete>
+            <fileset dir="${build.classes}" includes="*.xml" />
+        </delete>
         <echo />
         <echo message="=======================" />
         <echo message="Running Tez tests" />
@@ -1099,10 +1078,7 @@
     <!-- ================================================================== -->
     <!-- Distribution                                                       -->
     <!-- ================================================================== -->
-    <target name="package-h12" depends="jar-h12, docs, api-report, piggybank" description="Create a Pig tar release">
-        <package-base/>
-    </target>
-	
+
     <target name="package" depends="jar, docs, api-report, piggybank" description="Create a Pig tar release">
         <package-base/>
     </target>
@@ -1122,7 +1098,6 @@
             <fileset dir="${lib.dir}"/>
         </copy>
 
-        <copy file="${output.jarfile.backcompat-core-h1}" tofile="${tar.dist.dir}/${final.name}-core-h1.jar" failonerror="false"/>
         <copy file="${output.jarfile.backcompat-core-h2}" tofile="${tar.dist.dir}/${final.name}-core-h2.jar" failonerror="false"/>
 
         <copy todir="${tar.dist.dir}/lib" file="contrib/piggybank/java/piggybank.jar"/>
@@ -1200,10 +1175,6 @@
         <tar-base/>
     </target>
 
-    <target name="tar-h12" depends="package-h12" description="Source distribution">
-        <tar-base/>
-    </target>
-
     <macrodef name="tar-base">
       <sequential>
         <tar compression="gzip" longfile="gnu" destfile="${build.dir}/${artifact.pig.tar}">
@@ -1289,15 +1260,13 @@
           uri="urn:maven-artifact-ant"
           classpathref="mvn-ant-task.classpath"/>
     </target>
-    <target name="mvn-install" depends="mvn-taskdef,jar-h12, set-version, source-jar,
-      javadoc-jar, pigunit-jar, smoketests-jar, piggybank"
+    <target name="mvn-install" depends="mvn-taskdef, mvn-build, set-version"
          description="To install pig to local filesystem's m2 cache">
          <artifact:pom file="${pig.pom}" id="pig"/>
-          <artifact:install file="${output.jarfile.core-h1}">
+          <artifact:install file="${output.jarfile.core-h2}">
                <pom refid="pig"/>
            <attach file="${output.jarfile.sources}" classifier="sources" />
            <attach file="${output.jarfile.javadoc}" classifier="javadoc" />
-           <attach file="${output.jarfile.core-h2}" classifier="h2" />
           </artifact:install>
          <artifact:pom file="${pigunit.pom}" id="pigunit"/>
           <artifact:install file="${pigunit.jarfile}">
@@ -1313,10 +1282,9 @@
          </artifact:install>
     </target>
 
-    <target name="mvn-build" depends="jar-h12, source-jar,
+    <target name="mvn-build" depends="jar, source-jar,
                                       javadoc-jar, smoketests-jar, pigunit-jar, piggybank"
          description="To build the pig jar artifacts to be deployed to apache maven repository">
-        <move file="${output.jarfile.backcompat-core-h1}" tofile="${output.jarfile.core}"/>
         <move file="${output.jarfile.backcompat-core-h2}" tofile="${output.jarfile.core-h2}"/>
     </target>
 
@@ -1338,8 +1306,6 @@
            <pom refid="pig"/>
            <attach file="${output.jarfile.core}.asc" type="jar.asc"/>
            <attach file="${pig.pom}.asc" type="pom.asc"/>
-           <attach file="${output.jarfile.core-h2}.asc" type="jar.asc" classifier="h2"/>
-           <attach file="${output.jarfile.core-h2}" classifier="h2" />
            <attach file="${output.jarfile.sources}.asc" type="jar.asc" classifier="sources"/>
            <attach file="${output.jarfile.sources}" classifier="sources" />
            <attach file="${output.jarfile.javadoc}.asc" type="jar.asc"  classifier="javadoc"/>
@@ -1374,7 +1340,6 @@
       <artifact:deploy file="${output.jarfile.core}">
               <remoteRepository id="${snapshots_repo_id}" url="${asfsnapshotrepo}"/>
               <pom refid="pig"/>
-              <attach file="${output.jarfile.core-h2}" classifier="h2" />
               <attach file="${output.jarfile.sources}" classifier="sources" />
               <attach file="${output.jarfile.javadoc}" classifier="javadoc" />
       </artifact:deploy>
@@ -1418,8 +1383,6 @@
        </macrodef>
        <sign-artifact input.file="${output.jarfile.core}"
         output.file="${output.jarfile.core}.asc" gpg.passphrase="${gpg.passphrase}"/>
-       <sign-artifact input.file="${output.jarfile.core-h2}"
-        output.file="${output.jarfile.core-h2}.asc" gpg.passphrase="${gpg.passphrase}"/>
        <sign-artifact input.file="${output.jarfile.sources}"
         output.file="${output.jarfile.sources}.asc" gpg.passphrase="${gpg.passphrase}"/>
        <sign-artifact input.file="${output.jarfile.javadoc}"
@@ -1707,7 +1670,9 @@
 
      <target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies">
        <property name="ivy.resolved" value="true"/>
+       <echo>*** Ivy resolve with Hadoop ${hadoopversion} and HBase ${hbaseversion} ***</echo>
        <ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/>
+       <ivy:report toDir="build/ivy/report"/>
      </target>
 
      <target name="ivy-compile" depends="ivy-resolve" description="Retrieve Ivy-managed artifacts for compile configuration">

Modified: pig/branches/spark/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/branches/spark/conf/pig.properties?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/conf/pig.properties (original)
+++ pig/branches/spark/conf/pig.properties Wed Feb 22 09:43:41 2017
@@ -557,6 +557,9 @@ pig.location.check.strict=false
 #
 hcat.bin=/usr/local/hcat/bin/hcat
 
+# Enable ATS hook to log the Pig specific ATS entry, disable only when ATS server is not deployed
+pig.ats.enabled=true
+
 ###########################################################################
 #
 # Overrides for extreme environments
@@ -611,13 +614,13 @@ hcat.bin=/usr/local/hcat/bin/hcat
 # If you want Pig to allow certain errors before failing you can set this property.
 # If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors 
 # based on the OutputErrorHandler implementation  
-# pig.allow.store.errors = false
+# pig.error-handling.enabled = false
 #
 # Controls the minimum number of errors for store
-# pig.errors.min.records = 0
+# pig.error-handling.min.error.records = 0
 #
 # Set the threshold for percentage of errors
-# pig.error.threshold.percent = 0.0f
+# pig.error-handling.error.threshold = 0.0f
 
 ###########################################################################
 #
@@ -675,3 +678,6 @@ hcat.bin=/usr/local/hcat/bin/hcat
      
 pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage
 
+# If set, Pig will override tez.am.launch.cmd-opts and tez.am.resource.memory.mb to optimal
+# even they are set to a different value. Default value is true.
+#pig.tez.configure.am.memory=false

Modified: pig/branches/spark/contrib/piggybank/java/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/build.xml (original)
+++ pig/branches/spark/contrib/piggybank/java/build.xml Wed Feb 22 09:43:41 2017
@@ -16,13 +16,20 @@
 -->
 
 <project basedir="." default="jar" name="pigudf">
+
+    <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+        <classpath>
+            <pathelement location="../../../ivy/ant-contrib-1.0b3.jar"/>
+        </classpath>
+    </taskdef>
+
     <property file="../../../build.properties" />
     <!-- javac properties -->
     <property name="javac.debug" value="on" />
     <property name="javac.level" value="source,lines,vars"/>
     <property name="javac.optimize" value="on" />
     <property name="javac.deprecation" value="off" />
-    <property name="javac.version" value="1.6" />
+    <property name="javac.version" value="1.7" />
     <property name="javac.args" value="" />
     <!-- TODO we should use warning...   <property name="javac.args.warnings" value="-Xlint:unchecked" /> -->
     <property name="javac.args.warnings" value="" />
@@ -38,16 +45,22 @@
     <property name="src.dir" value="src/main/java/org/apache/pig/piggybank" />
     <property name="hsqldb.jar" value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/>
 
-    <!-- JobHistoryLoader currently does not support 0.23 -->
-    <condition property="build.classes.excludes" value="**/HadoopJobHistoryLoader.java" else="">
-        <equals arg1="${hadoopversion}" arg2="23"/>
-    </condition>
-    <condition property="test.classes.excludes" value="**/TestHadoopJobHistoryLoader.java" else="">
+    <!--
+      Hadoop master version
+      (Value 23 is translated for backward compatibility in old build scripts)
+    -->
+    <if>
         <equals arg1="${hadoopversion}" arg2="23"/>
-    </condition>
+        <then>
+            <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo>
+            <var name="hadoopversion" unset="true"/>
+            <property name="hadoopversion" value="2" />
+        </then>
+    </if>
+    <property name="hadoopversion" value="2" />
 
-    <condition property="hadoopsuffix" value="2" else="1">
-        <equals arg1="${hadoopversion}" arg2="23"/>
+    <condition property="hadoopsuffix" value="2" else="">
+        <equals arg1="${hadoopversion}" arg2="2"/>
     </condition>
 
     <!-- jar properties -->

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java Wed Feb 22 09:43:41 2017
@@ -21,6 +21,7 @@ package org.apache.pig.piggybank.evaluat
 import java.io.IOException;
 import java.util.Iterator;
 
+import org.apache.pig.Accumulator;
 import org.apache.pig.Algebraic;
 import org.apache.pig.EvalFunc;
 import org.apache.pig.backend.executionengine.ExecException;
@@ -43,7 +44,7 @@ import org.apache.pig.impl.logicalLayer.
  * 
  * @author Vadim Zaliva <lo...@codemindes.com>
  */
-public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic
+public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic, Accumulator<Tuple>
 {
     /**
      * Indicates once for how many items progress hartbeat should be sent.
@@ -131,6 +132,11 @@ public class MaxTupleBy1stField extends
     protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException
     {
         DataBag values = (DataBag) input.get(0);
+        return max(values,reporter);
+    }
+
+    protected static Tuple max(DataBag values, PigProgressable reporter) throws ExecException
+    {
 
         // if we were handed an empty bag, return NULL
         // this is in compliance with SQL standard
@@ -183,4 +189,44 @@ public class MaxTupleBy1stField extends
         return Final.class.getName();
     }
 
+
+    /**
+     * Accumulator implementation
+     */
+
+    private Tuple intermediate = null;
+
+    /**
+     * Accumulate implementation - calls max() on the incoming tuple set including intermediate tuple if already exists
+     * @param b A tuple containing a single field, which is a bag.  The bag will contain the set
+     * @throws IOException
+     */
+    @Override
+    public void accumulate(Tuple b) throws IOException {
+        try{
+            DataBag values = BagFactory.getInstance().newDefaultBag();
+            values.addAll((DataBag) b.get(0));
+
+            if (intermediate != null) {
+                values.add(intermediate);
+            }
+            intermediate = max(values,reporter);
+
+        }catch (ExecException ee){
+            IOException oughtToBeEE = new IOException();
+            oughtToBeEE.initCause(ee);
+            throw oughtToBeEE;
+        }
+    }
+
+    @Override
+    public Tuple getValue() {
+        return intermediate;
+    }
+
+    @Override
+    public void cleanup() {
+        intermediate = null;
+    }
+
 }

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Wed Feb 22 09:43:41 2017
@@ -23,10 +23,13 @@ import java.util.Iterator;
 import java.util.List;
 
 import org.apache.pig.EvalFunc;
-import org.apache.pig.FuncSpec;
 import org.apache.pig.PigException;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.builtin.AVG;
+import org.apache.pig.builtin.BigDecimalAvg;
+import org.apache.pig.builtin.BigDecimalMax;
+import org.apache.pig.builtin.BigDecimalMin;
+import org.apache.pig.builtin.BigDecimalSum;
 import org.apache.pig.builtin.COUNT;
 import org.apache.pig.builtin.DoubleAvg;
 import org.apache.pig.builtin.DoubleMax;
@@ -54,6 +57,7 @@ import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
 
 /**
  * Given an aggregate function, a bag, and possibly a window definition,
@@ -73,23 +77,27 @@ import org.apache.pig.impl.logicalLayer.
  *           <li>sum(int)</li>
  *           <li>sum(long)</li>
  *           <li>sum(bytearray)</li>
+ *           <li>sum(bigdecimal)</li>
  *           <li>avg(double)</li>
  *           <li>avg(float)</li>
  *           <li>avg(long)</li>
  *           <li>avg(int)</li>
  *           <li>avg(bytearray)</li>
+ *           <li>avg(bigdecimal)</li>
  *           <li>min(double)</li>
  *           <li>min(float)</li>
  *           <li>min(long)</li>
  *           <li>min(int)</li>
  *           <li>min(chararray)</li>
  *           <li>min(bytearray)</li>
+ *           <li>min(bigdecimal)</li>
  *           <li>max(double)</li>
  *           <li>max(float)</li>
  *           <li>max(long)</li>
  *           <li>max(int)</li>
  *           <li>max(chararray)</li>
  *           <li>max(bytearray)</li>
+ *           <li>max(bigdecimal)</li>
  *           <li>row_number</li>
  *           <li>first_value</li>
  *           <li>last_value</li>
@@ -153,7 +161,8 @@ import org.apache.pig.impl.logicalLayer.
  * current row and 3 following) over T;</tt>
  *
  * <p>Over accepts a constructor argument specifying the name and type,
- * colon-separated, of its return schema.</p>
+ * colon-separated, of its return schema. If the argument option is 'true' use the inner-search,
+ * take the name and type of bag and return a schema with alias+'_over' and the same type</p>
  *
  * <p><pre>
  * DEFINE IOver org.apache.pig.piggybank.evaluation.Over('state_rk:int');
@@ -188,12 +197,14 @@ public class Over extends EvalFunc<DataB
     private Object[] udfArgs;
     private byte   returnType;
     private String returnName;
+    private boolean searchInnerType;
 
     public Over() {
         initialized = false;
         udfArgs = null;
         func = null;
         returnType = DataType.UNKNOWN;
+        searchInnerType = false;
     }
 
     public Over(String typespec) {
@@ -202,12 +213,16 @@ public class Over extends EvalFunc<DataB
             String[] fn_tn = typespec.split(":", 2);
             this.returnName = fn_tn[0];
             this.returnType = DataType.findTypeByName(fn_tn[1]);
-        } else {
+        } else if(Boolean.parseBoolean(typespec)) {
+            searchInnerType = Boolean.parseBoolean(typespec);
+        }else{
             this.returnName = "result";
             this.returnType = DataType.findTypeByName(typespec);
-        }
+        }       
     }
 
+
+
     @Override
     public DataBag exec(Tuple input) throws IOException {
         if (input == null || input.size() < 2) {
@@ -255,19 +270,42 @@ public class Over extends EvalFunc<DataB
     @Override
     public Schema outputSchema(Schema inputSch) {
         try {
-            if (returnType == DataType.UNKNOWN) {
+            FieldSchema field;
+
+            if (searchInnerType) {
+                field = new FieldSchema(inputSch.getField(0));
+                while (searchInnerType) {
+                    if (field.schema != null
+                            && field.schema.getFields().size() > 1) {
+                        searchInnerType = false;
+                    } else {
+                        if (field.type == DataType.TUPLE
+                                || field.type == DataType.BAG) {
+                            field = new FieldSchema(field.schema.getField(0));
+                        } else {
+                            field.alias = field.alias + "_over";
+                            searchInnerType = false;
+                        }
+                    }
+                }
+
+                searchInnerType = true;
+            } else if (returnType == DataType.UNKNOWN) {
                 return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
             } else {
-                Schema outputTupleSchema = new Schema(new Schema.FieldSchema(returnName, returnType));
-                return new Schema(new Schema.FieldSchema(
-                        getSchemaName(this.getClass().getName().toLowerCase(), inputSch),
-                            outputTupleSchema, 
-                            DataType.BAG));
+                field = new Schema.FieldSchema(returnName, returnType);
             }
+
+            Schema outputTupleSchema = new Schema(field);
+            return new Schema(new Schema.FieldSchema(getSchemaName(this
+                    .getClass().getName().toLowerCase(), inputSch),
+                    outputTupleSchema, DataType.BAG));
+
         } catch (FrontendException fe) {
             throw new RuntimeException("Unable to create nested schema", fe);
         }
     }
+    
 
     private void init(Tuple input) throws IOException {
         initialized = true;
@@ -329,6 +367,8 @@ public class Over extends EvalFunc<DataB
             func = new LongSum();
         } else if ("sum(bytearray)".equalsIgnoreCase(agg)) {
             func = new SUM();
+        } else if ("sum(bigdecimal)".equalsIgnoreCase(agg)) {
+            func = new BigDecimalSum();
         } else if ("avg(double)".equalsIgnoreCase(agg)) {
             func = new DoubleAvg();
         } else if ("avg(float)".equalsIgnoreCase(agg)) {
@@ -339,6 +379,8 @@ public class Over extends EvalFunc<DataB
             func = new IntAvg();
         } else if ("avg(bytearray)".equalsIgnoreCase(agg)) {
             func = new AVG();
+        } else if ("avg(bigdecimal)".equalsIgnoreCase(agg)) {
+            func = new BigDecimalAvg();
         } else if ("min(double)".equalsIgnoreCase(agg)) {
             func = new DoubleMin();
         } else if ("min(float)".equalsIgnoreCase(agg)) {
@@ -351,6 +393,8 @@ public class Over extends EvalFunc<DataB
             func = new StringMin();
         } else if ("min(bytearray)".equalsIgnoreCase(agg)) {
             func = new MIN();
+        } else if ("min(bigdecimal)".equalsIgnoreCase(agg)) {
+            func = new BigDecimalMin();
         } else if ("max(double)".equalsIgnoreCase(agg)) {
             func = new DoubleMax();
         } else if ("max(float)".equalsIgnoreCase(agg)) {
@@ -363,6 +407,8 @@ public class Over extends EvalFunc<DataB
             func = new StringMax();
         } else if ("max(bytearray)".equalsIgnoreCase(agg)) {
             func = new MAX();
+        } else if ("max(bigdecimal)".equalsIgnoreCase(agg)) {
+            func = new BigDecimalMax();
         } else if ("row_number".equalsIgnoreCase(agg)) {
             func = new RowNumber();
         } else if ("first_value".equalsIgnoreCase(agg)) {

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java Wed Feb 22 09:43:41 2017
@@ -363,6 +363,15 @@ public class SearchEngineExtractor exten
         searchEngines.put("search.lycos.com", "Lycos");
         searchEngines.put("search.msn.co.uk", "MSN UK");
         searchEngines.put("search.msn.com", "MSN");
+        searchEngines.put("bing.com", "Bing");
+        searchEngines.put("ssl.bing.com", "Bing");
+        searchEngines.put("cn.bing.com", "Bing China");
+        searchEngines.put("br.bing.com", "Bing Brazil");
+        searchEngines.put("it.bing.com", "Bing Italy");
+        searchEngines.put("be.bing.com", "Bing Netherlands");
+        searchEngines.put("uk.bing.com", "Bing UK");
+        searchEngines.put("hk.bing.com", "Bing Hong Kong");
+        searchEngines.put("nz.bing.com", "Bing New Zeland");
         searchEngines.put("search.myway.com", "MyWay");
         searchEngines.put("search.mywebsearch.com", "My Web Search");
         searchEngines.put("search.ntlworld.com", "NTLWorld");

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Wed Feb 22 09:43:41 2017
@@ -16,8 +16,11 @@ package org.apache.pig.piggybank.evaluat
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
+import java.util.Iterator;
 import java.util.List;
 
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
 import javax.xml.parsers.DocumentBuilder;
 import javax.xml.parsers.DocumentBuilderFactory;
 import javax.xml.xpath.XPathFactory;
@@ -49,8 +52,7 @@ public class XPath extends EvalFunc<Stri
     
     private static boolean cache = true;
     private static boolean ignoreNamespace = true;
-    public static final String EMPTY_STRING = "";
-    
+
     /**
      * input should contain: 1) xml 2) xpath 
      *                       3) optional cache xml doc flag 
@@ -95,8 +97,13 @@ public class XPath extends EvalFunc<Stri
                 return null;
             }
             
-            if(input.size() > 2)
+            if(input.size() > 2) {
                 cache = (Boolean) input.get(2);
+            }
+
+            if (input.size() > 3) {
+                ignoreNamespace = (Boolean) input.get(3);
+            }
 
             if (!cache || xpath == null || !xml.equals(this.xml)) {
                 final InputSource source = new InputSource(new StringReader(xml));
@@ -104,6 +111,7 @@ public class XPath extends EvalFunc<Stri
                 this.xml = xml; // track the xml for subsequent calls to this udf
 
                 final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+                dbf.setNamespaceAware(!ignoreNamespace);
                 final DocumentBuilder db = dbf.newDocumentBuilder();
 
                 this.document = db.parse(source);
@@ -112,14 +120,32 @@ public class XPath extends EvalFunc<Stri
 
                 this.xpath = xpathFactory.newXPath();
 
+                if (!ignoreNamespace) {
+                    xpath.setNamespaceContext(new NamespaceContext() {
+                        @Override
+                        public String getNamespaceURI(String prefix) {
+                            if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
+                                return document.lookupNamespaceURI(null);
+                            } else {
+                                return document.lookupNamespaceURI(prefix);
+                            }
+                        }
+
+                        @Override
+                        public String getPrefix(String namespaceURI) {
+                            return document.lookupPrefix(namespaceURI);
+                        }
+
+                        @Override
+                        public Iterator getPrefixes(String namespaceURI) {
+                            return null;
+                        }
+                    });
+                }
             }
 
             String xpathString = (String) input.get(1);
 
-            if (ignoreNamespace) {
-                xpathString = createNameSpaceIgnoreXpathString(xpathString);
-            }
-
             final String value = xpath.evaluate(xpathString, document);
 
             return value;
@@ -165,34 +191,6 @@ public class XPath extends EvalFunc<Stri
         }
         return true;
     }
-    
-    
-    /**
-     * Returns a new the xPathString by adding additional parameters 
-     * in the existing xPathString for ignoring the namespace during compilation.
-     * 
-     * @param String xpathString
-     * @return String modified xpathString
-     */
-    private String createNameSpaceIgnoreXpathString(final String xpathString) {
-        final String QUERY_PREFIX = "//*";
-        final String LOCAL_PREFIX = "[local-name()='";
-        final String LOCAL_POSTFIX = "']";
-        final String SPLITTER = "/";
-
-        try {
-            String xpathStringWithLocalName = EMPTY_STRING;
-            String[] individualNodes = xpathString.split(SPLITTER);
-
-            for (String node : individualNodes) {
-                xpathStringWithLocalName = xpathStringWithLocalName.concat(QUERY_PREFIX + LOCAL_PREFIX + node
-                        + LOCAL_POSTFIX);
-            }
-            return xpathStringWithLocalName;
-        } catch (Exception ex) {
-            return xpathString;
-        }
-    }
 
     /**
      * Returns argument schemas of the UDF.

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Wed Feb 22 09:43:41 2017
@@ -580,7 +580,7 @@ public class CSVExcelStorage extends Pig
                 }
             } else if (b == DOUBLE_QUOTE) {
                 // Does a double quote immediately follow?                  
-                if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE)) {
+                if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE) && (fieldBuffer.position() != 0)) {
                     fieldBuffer.put(b);
                     nextTupleSkipChar = true;
                     continue;

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java Wed Feb 22 09:43:41 2017
@@ -91,6 +91,7 @@ public class DBStorage extends StoreFunc
   /**
    * Write the tuple to Database directly here.
    */
+  @Override
   public void putNext(Tuple tuple) throws IOException {
     int sqlPos = 1;
     try {
@@ -373,4 +374,9 @@ public class DBStorage extends StoreFunc
       p.setProperty(SCHEMA_SIGNATURE, s.toString());
   }
 
+  @Override
+  public Boolean supportsParallelWriteToStoreLocation() {
+    return false;
+  }
+
 }

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java Wed Feb 22 09:43:41 2017
@@ -60,7 +60,6 @@ import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.util.StorageUtil;
 import org.apache.pig.data.DataType;
 import org.apache.pig.data.DataByteArray;
-import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
 
 /**
  * <code>IndexedStorage</code> is a form of <code>PigStorage</code> that supports a

Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java Wed Feb 22 09:43:41 2017
@@ -16,7 +16,9 @@ import java.io.ByteArrayOutputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.text.NumberFormat;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
@@ -42,6 +44,9 @@ import org.apache.pig.backend.executione
 import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.util.StorageUtil;
+import org.apache.xml.utils.StringBufferPool;
+
+import com.google.common.base.Strings;
 
 /**
  * The UDF is useful for splitting the output data into a bunch of directories
@@ -73,13 +78,21 @@ import org.apache.pig.impl.util.StorageU
  * If the output is compressed,then the sub directories and the output files will
  * be having the extension. Say for example in the above case if bz2 is used one file 
  * will look like ;/my/home/output.bz2/a1.bz2/a1-0000.bz2
+ *
+ * Key field can also be a comma separated list of indices e.g. '0,1' - in this case
+ * storage will be multi-level:
+ * /my/home/output/a1/b1/a1-b1-0000
+ * /my/home/output/a1/b2/a1-b2-0000
+ * There is also an option to leave key values out of storage, see isRemoveKeys.
  */
 public class MultiStorage extends StoreFunc {
 
+  private static final String KEYFIELD_DELIMETER = ",";
   private Path outputPath; // User specified output Path
-  private int splitFieldIndex = -1; // Index of the key field
+  private final List<Integer> splitFieldIndices= new ArrayList<Integer>(); // Indices of the key fields
   private String fieldDel; // delimiter of the output record.
   private Compression comp; // Compression type of output data.
+  private boolean isRemoveKeys = false;
   
   // Compression types supported by this store
   enum Compression {
@@ -95,9 +108,14 @@ public class MultiStorage extends StoreF
     this(parentPathStr, splitFieldIndex, compression, "\\t");
   }
 
+  public MultiStorage(String parentPathStr, String splitFieldIndex,
+      String compression, String fieldDel) {
+    this(parentPathStr, splitFieldIndex, compression, fieldDel, "false");
+  }
+
   /**
    * Constructor
-   * 
+   *
    * @param parentPathStr
    *          Parent output dir path (this will be specified in store statement,
    *            so MultiStorage don't use this parameter in reality. However, we don't
@@ -108,18 +126,26 @@ public class MultiStorage extends StoreF
    *          'bz2', 'bz', 'gz' or 'none'
    * @param fieldDel
    *          Output record field delimiter.
+   * @param isRemoveKeys
+   *          Removes key columns from result during write.
    */
   public MultiStorage(String parentPathStr, String splitFieldIndex,
-      String compression, String fieldDel) {
+                      String compression, String fieldDel, String isRemoveKeys) {
+    this.isRemoveKeys = Boolean.parseBoolean(isRemoveKeys);
     this.outputPath = new Path(parentPathStr);
-    this.splitFieldIndex = Integer.parseInt(splitFieldIndex);
+
+    String[] splitFieldIndices = splitFieldIndex.split(KEYFIELD_DELIMETER);
+    for (String splitFieldIndexString : splitFieldIndices){
+      this.splitFieldIndices.add(Integer.parseInt(splitFieldIndexString));
+    }
+
     this.fieldDel = fieldDel;
     try {
       this.comp = (compression == null) ? Compression.none : Compression
-        .valueOf(compression.toLowerCase());
+              .valueOf(compression.toLowerCase());
     } catch (IllegalArgumentException e) {
       System.err.println("Exception when converting compression string: "
-          + compression + " to enum. No compression will be used");
+              + compression + " to enum. No compression will be used");
       this.comp = Compression.none;
     }
   }
@@ -127,22 +153,26 @@ public class MultiStorage extends StoreF
   //--------------------------------------------------------------------------
   // Implementation of StoreFunc
 
-  private RecordWriter<String, Tuple> writer;
+  private RecordWriter<List<String>, Tuple> writer;
   
   @Override
   public void putNext(Tuple tuple) throws IOException {
-    if (tuple.size() <= splitFieldIndex) {
-      throw new IOException("split field index:" + this.splitFieldIndex
-          + " >= tuple size:" + tuple.size());
+    for (int splitFieldIndex : this.splitFieldIndices) {
+      if (tuple.size() <= splitFieldIndex) {
+        throw new IOException("split field index:" + splitFieldIndex
+                + " >= tuple size:" + tuple.size());
+      }
     }
-    Object field = null;
-    try {
-      field = tuple.get(splitFieldIndex);
-    } catch (ExecException exec) {
-      throw new IOException(exec);
+    List<String> fields = new ArrayList<String>();
+    for (int splitFieldIndex : this.splitFieldIndices){
+      try {
+        fields.add(String.valueOf(tuple.get(splitFieldIndex)));
+      } catch (ExecException exec) {
+        throw new IOException(exec);
+      }
     }
     try {
-      writer.write(String.valueOf(field), tuple);
+      writer.write(fields, tuple);
     } catch (InterruptedException e) {
       throw new IOException(e);
     }
@@ -153,6 +183,9 @@ public class MultiStorage extends StoreF
   public OutputFormat getOutputFormat() throws IOException {
       MultiStorageOutputFormat format = new MultiStorageOutputFormat();
       format.setKeyValueSeparator(fieldDel);
+      if (this.isRemoveKeys){
+        format.setSkipIndices(this.splitFieldIndices);
+      }
       return format;
   }
     
@@ -174,27 +207,33 @@ public class MultiStorage extends StoreF
       FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
     }
   }
- 
+
+  @Override
+  public Boolean supportsParallelWriteToStoreLocation() {
+    return false;
+  }
+
   //--------------------------------------------------------------------------
   // Implementation of OutputFormat
   
   public static class MultiStorageOutputFormat extends
-  TextOutputFormat<String, Tuple> {
+  TextOutputFormat<List<String>, Tuple> {
 
     private String keyValueSeparator = "\\t";
     private byte fieldDel = '\t';
-  
+    private List<Integer> skipIndices = null;
+
     @Override
-    public RecordWriter<String, Tuple> 
+    public RecordWriter<List<String>, Tuple>
     getRecordWriter(TaskAttemptContext context
                 ) throws IOException, InterruptedException {
     
       final TaskAttemptContext ctx = context;
         
-      return new RecordWriter<String, Tuple>() {
+      return new RecordWriter<List<String>, Tuple>() {
 
-        private Map<String, MyLineRecordWriter> storeMap = 
-              new HashMap<String, MyLineRecordWriter>();
+        private Map<List<String>, MyLineRecordWriter> storeMap =
+              new HashMap<List<String>, MyLineRecordWriter>();
           
         private static final int BUFFER_SIZE = 1024;
           
@@ -202,7 +241,7 @@ public class MultiStorage extends StoreF
               new ByteArrayOutputStream(BUFFER_SIZE);
                            
         @Override
-        public void write(String key, Tuple val) throws IOException {                
+        public void write(List<String> key, Tuple val) throws IOException {
           int sz = val.size();
           for (int i = 0; i < sz; i++) {
             Object field;
@@ -212,9 +251,13 @@ public class MultiStorage extends StoreF
               throw ee;
             }
 
-            StorageUtil.putField(mOut, field);
+            boolean skipCurrentField = skipIndices != null && skipIndices.contains(i);
 
-            if (i != sz - 1) {
+            if (!skipCurrentField) {
+              StorageUtil.putField(mOut, field);
+            }
+
+            if (i != sz - 1 && !skipCurrentField) {
               mOut.write(fieldDel);
             }
           }
@@ -231,17 +274,17 @@ public class MultiStorage extends StoreF
           }
         }
       
-        private MyLineRecordWriter getStore(String fieldValue) throws IOException {
-          MyLineRecordWriter store = storeMap.get(fieldValue);
+        private MyLineRecordWriter getStore(List<String> fieldValues) throws IOException {
+          MyLineRecordWriter store = storeMap.get(fieldValues);
           if (store == null) {                  
-            DataOutputStream os = createOutputStream(fieldValue);
+            DataOutputStream os = createOutputStream(fieldValues);
             store = new MyLineRecordWriter(os, keyValueSeparator);
-            storeMap.put(fieldValue, store);
+            storeMap.put(fieldValues, store);
           }
           return store;
         }
           
-        private DataOutputStream createOutputStream(String fieldValue) throws IOException {
+        private DataOutputStream createOutputStream(List<String> fieldValues) throws IOException {
           Configuration conf = ctx.getConfiguration();
           TaskID taskId = ctx.getTaskAttemptID().getTaskID();
           
@@ -259,7 +302,21 @@ public class MultiStorage extends StoreF
           NumberFormat nf = NumberFormat.getInstance();
           nf.setMinimumIntegerDigits(4);
 
-          Path path = new Path(fieldValue+extension, fieldValue + '-'
+          StringBuffer pathStringBuffer = new StringBuffer();
+          for (String fieldValue : fieldValues){
+            String safeFieldValue = fieldValue.replaceAll("\\/","-");
+            pathStringBuffer.append(safeFieldValue);
+            pathStringBuffer.append("/");
+          }
+          pathStringBuffer.deleteCharAt(pathStringBuffer.length()-1);
+          String pathString = pathStringBuffer.toString();
+          String idString = pathString.replaceAll("\\/","-");
+
+          if (!Strings.isNullOrEmpty(extension)){
+            pathString = pathString.replaceAll("\\/",extension+"\\/");
+          }
+
+          Path path = new Path(pathString+extension, idString + '-'
                 + nf.format(taskId.getId())+extension);
           Path workOutputPath = ((FileOutputCommitter)getOutputCommitter(ctx)).getWorkPath();
           Path file = new Path(workOutputPath, path);
@@ -279,8 +336,12 @@ public class MultiStorage extends StoreF
       keyValueSeparator = sep;
       fieldDel = StorageUtil.parseFieldDel(keyValueSeparator);  
     }
-  
-  //------------------------------------------------------------------------
+
+    public void setSkipIndices(List<Integer> skipIndices) {
+      this.skipIndices = skipIndices;
+    }
+
+    //------------------------------------------------------------------------
   //
   
     protected static class MyLineRecordWriter

Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Wed Feb 22 09:43:41 2017
@@ -18,12 +18,11 @@
 package org.apache.pig.piggybank.evaluation;
 
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertTrue;
 
+import java.math.BigDecimal;
 import java.util.Iterator;
-import java.util.List;
 import java.util.Random;
 
 import org.apache.pig.backend.executionengine.ExecException;
@@ -34,8 +33,6 @@ import org.apache.pig.data.DataType;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-import org.junit.Before;
 import org.junit.Test;
 
 public class TestOver {
@@ -66,11 +63,25 @@ public class TestOver {
         out = func.outputSchema(in);
         assertEquals("{org.apache.pig.piggybank.evaluation.over_3: {result: double}}", out.toString());
 
+        // bigdecimal
+        func = new Over("BIGDECIMAL");
+        in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
+        out = func.outputSchema(in);
+        assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {result: bigdecimal}}", out.toString());
+        
         // named 
         func = new Over("bob:chararray");
         in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
         out = func.outputSchema(in);
-        assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {bob: chararray}}", out.toString());
+        assertEquals("{org.apache.pig.piggybank.evaluation.over_5: {bob: chararray}}", out.toString());
+        
+        
+        // Search inner alias and type
+        func = new Over("true");
+        in = Schema.generateNestedSchema(DataType.BAG, DataType.BIGDECIMAL);
+        in.getField(0).schema.getField(0).alias="test";
+        out = func.outputSchema(in);
+        assertEquals("{org.apache.pig.piggybank.evaluation.over_6: {test_over: bigdecimal}}", out.toString());
     }
 
     @Test
@@ -397,6 +408,28 @@ public class TestOver {
             assertEquals(new Long(10), to.get(0));
         }
     }
+    
+    @Test
+    public void testSumBigDecimal() throws Exception {
+    	Over func = new Over();
+        DataBag inbag = BagFactory.getInstance().newDefaultBag();
+        for (int i = 0; i < 10; i++) {
+            Tuple t = TupleFactory.getInstance().newTuple(1);
+            t.set(0, new BigDecimal(1));
+            inbag.add(t);
+        }
+        Tuple t = TupleFactory.getInstance().newTuple(4);
+        t.set(0, inbag);
+        t.set(1, "sum(bigdecimal)");
+        t.set(2, -1);
+        t.set(3, -1);
+        DataBag outbag = func.exec(t);
+        assertEquals(10, outbag.size());
+        for (Tuple to : outbag) {
+            assertEquals(1, to.size());
+            assertEquals(new BigDecimal(10), to.get(0));
+        }
+    }
 
     @Test
     public void testAvgDouble() throws Exception {
@@ -509,6 +542,29 @@ public class TestOver {
     }
     
     @Test
+    public void testAvgBigDecimal() throws Exception {
+        Over func = new Over();
+        DataBag inbag = BagFactory.getInstance().newDefaultBag();
+        for (int i = 0; i < 10; i++) {
+            Tuple t = TupleFactory.getInstance().newTuple(1);
+            t.set(0, new BigDecimal(i));
+            inbag.add(t);
+        }
+        Tuple t = TupleFactory.getInstance().newTuple(4);
+        t.set(0, inbag);
+        t.set(1, "avg(bigdecimal)");
+        t.set(2, -1);
+        t.set(3, -1);
+        DataBag outbag = func.exec(t);
+        assertEquals(10, outbag.size());
+        for (Tuple to : outbag) {
+            assertEquals(1, to.size());
+            assertEquals(new BigDecimal(4.5), to.get(0));
+        }
+    }
+    
+    
+    @Test
     public void testMinDouble() throws Exception {
         Over func = new Over();
         DataBag inbag = BagFactory.getInstance().newDefaultBag();
@@ -627,6 +683,26 @@ public class TestOver {
             assertEquals("0", to.get(0));
         }
     }
+    
+    @Test
+    public void testMinBigDecimal() throws Exception {
+        Over func = new Over();
+        DataBag inbag = BagFactory.getInstance().newDefaultBag();
+        for (int i = 0; i < 10; i++) {
+            Tuple t = TupleFactory.getInstance().newTuple(1);
+            t.set(0,  new BigDecimal(i));
+            inbag.add(t);
+        }
+        Tuple t = TupleFactory.getInstance().newTuple(2);
+        t.set(0, inbag);
+        t.set(1, "min(bigdecimal)");
+        DataBag outbag = func.exec(t);
+        assertEquals(10, outbag.size());
+        for (Tuple to : outbag) {
+            assertEquals(1, to.size());
+            assertEquals(new BigDecimal(0), to.get(0));
+        }
+    }
 
     @Test
     public void testMaxDouble() throws Exception {
@@ -754,6 +830,28 @@ public class TestOver {
             assertEquals("9", to.get(0));
         }
     }
+    
+    @Test
+    public void testMaxBigDecimal() throws Exception {
+        Over func = new Over();
+        DataBag inbag = BagFactory.getInstance().newDefaultBag();
+        for (int i = 0; i < 10; i++) {
+            Tuple t = TupleFactory.getInstance().newTuple(1);
+            t.set(0, new BigDecimal(i));
+            inbag.add(t);
+        }
+        Tuple t = TupleFactory.getInstance().newTuple(2);
+        t.set(0, inbag);
+        t.set(1, "max(bigdecimal)");
+        DataBag outbag = func.exec(t);
+        assertEquals(10, outbag.size());
+        int count = 0;
+        for (Tuple to : outbag) {
+            assertEquals(1, to.size());
+            assertEquals(new BigDecimal(count++), to.get(0));
+        }
+    }
+	
 
     @Test
     public void testRowNumber() throws Exception {

Added: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java?rev=1783988&view=auto
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java (added)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java Wed Feb 22 09:43:41 2017
@@ -0,0 +1,95 @@
+package org.apache.pig.piggybank.test.evaluation;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.common.collect.Lists;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.piggybank.evaluation.MaxTupleBy1stField;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TestMaxTupleBy1stField {
+
+    private static List<Tuple> inputTuples = new ArrayList<>();
+    private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
+    private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
+
+    @BeforeClass
+    public static void setup() throws Exception {
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(0L, "Fruit", "orange", 21F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(1L, "Fruit", "apple", 9.9F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(2L, "Vegetable", "paprika", 30F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(3L, "Fruit", "blueberry", 40F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(4L, "Vegetable", "carrot", 50F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(5L, "Fruit", "blueberry", 41F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(6L, "Vegetable", "paprika", 31F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(7L, "Fruit", "orange", 20.5F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(8L, "Fruit", "apple", 10.1F)));
+        inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(9L, "Fruit", "apple", 10.2F)));
+    }
+
+
+    @Test
+    public void testExecFunc() throws Exception {
+        MaxTupleBy1stField udf = new MaxTupleBy1stField();
+        Tuple inputTuple = createTupleFromInputList(0,inputTuples.size());
+
+        Tuple result = udf.exec(inputTuple);
+        Assert.assertEquals("apple", result.get(2));
+        Assert.assertEquals(10.2F, (Float) result.get(3), 1E-8);
+    }
+
+    @Test
+    public void testAccumulator() throws Exception {
+        MaxTupleBy1stField udf = new MaxTupleBy1stField();
+
+        Tuple inputTuple = createTupleFromInputList(0, 3);
+        udf.accumulate(inputTuple);
+        Tuple result = udf.getValue();
+        Assert.assertEquals("paprika", result.get(2));
+        Assert.assertEquals(30F, (Float) result.get(3), 1E-6);
+
+        inputTuple = createTupleFromInputList(3, 6);
+        udf.accumulate(inputTuple);
+        result = udf.getValue();
+        Assert.assertEquals("apple", result.get(2));
+        Assert.assertEquals(10.1F, (Float) result.get(3), 1E-6);
+
+        udf.cleanup();
+        Assert.assertEquals(null,udf.getValue());
+    }
+
+    private static Tuple createTupleFromInputList(int offset, int length) {
+        DataBag inputBag = BAG_FACTORY.newDefaultBag();
+        for (int i = offset; i < offset+length; ++i) {
+            inputBag.add(inputTuples.get(i));
+        }
+        Tuple inputTuple = TUPLE_FACTORY.newTuple();
+        inputTuple.append(inputBag);
+        return inputTuple;
+    }
+
+}

Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java Wed Feb 22 09:43:41 2017
@@ -151,6 +151,95 @@ public class XPathTest {
     }
 
     @Test
+    public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+        final XPath xpath = new XPath();
+
+        final Tuple tuple = mock(Tuple.class);
+
+        when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+                "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+                "<bar:element>MyBar</bar:element>" +
+                "</foo:document>");
+
+        when(tuple.size()).thenReturn(4);
+        when(tuple.get(2)).thenReturn(true);
+        when(tuple.get(3)).thenReturn(false);
+
+        when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+        assertEquals("MyBar", xpath.exec(tuple));
+
+    }
+
+    @Test
+    public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+        final XPath xpath = new XPath();
+
+        final Tuple tuple = mock(Tuple.class);
+
+        when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+                "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+                "<bar:element>MyBar</bar:element>" +
+                "</foo:document>");
+
+        when(tuple.size()).thenReturn(4);
+        when(tuple.get(2)).thenReturn(true);
+        when(tuple.get(3)).thenReturn(false);
+
+        when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+        assertEquals("MyBar", xpath.exec(tuple));
+
+    }
+
+    @Test
+    public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+        final XPath xpath = new XPath();
+
+        final Tuple tuple = mock(Tuple.class);
+
+        when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+                "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+                "<bar:element>MyBar</bar:element>" +
+                "</foo:document>");
+
+        when(tuple.size()).thenReturn(4);
+        when(tuple.get(2)).thenReturn(true);
+        when(tuple.get(3)).thenReturn(false);
+
+        when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+        assertEquals("MyBar", xpath.exec(tuple));
+
+    }
+
+
+    @Test
+    public void testFunctionInXPath() throws Exception {
+
+        final XPath xpath = new XPath();
+
+        final Tuple tuple = mock(Tuple.class);
+
+        when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Dd>test2</Dd>" +
+                "</Aa>");
+
+        when(tuple.size()).thenReturn(4);
+        when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)");
+        when(tuple.get(2)).thenReturn(true);
+        when(tuple.get(3)).thenReturn(true);
+
+        assertEquals("4", xpath.exec(tuple));
+
+    }
+
+
+    @Test
     public void testExecTupleWithElementNodeWithComplexNameSpace() throws Exception {
 
         final XPath xpath = new XPath();
@@ -210,7 +299,31 @@ public class XPathTest {
         assertEquals("4 stars3.5 stars4 stars4.2 stars3.5 stars", xpath.exec(tuple));
 
     }
-    
+
+    @Test
+    public void testFunctionInXPath() throws Exception {
+
+        final XPath xpath = new XPath();
+
+        final Tuple tuple = mock(Tuple.class);
+
+        when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Bb Cc=\"1\"/>" +
+                "<Dd>test2</Dd>" +
+                "</Aa>");
+
+        when(tuple.size()).thenReturn(4);
+        when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)");
+        when(tuple.get(2)).thenReturn(true);
+        when(tuple.get(3)).thenReturn(true);
+
+        assertEquals("4", xpath.exec(tuple));
+
+    }
+
     @Ignore //--optional test
     @Test 
     public void testCacheBenefit() throws Exception{

Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java Wed Feb 22 09:43:41 2017
@@ -218,7 +218,7 @@ public class TestCSVExcelStorage  {
         Util.registerMultiLineQuery(pig, script);
         Iterator<Tuple> it = pig.openIterator("a");
         Assert.assertEquals(Util.createTuple(new String[] {"foo,\"bar\",baz"}), it.next());
-        Assert.assertEquals(Util.createTuple(new String[] {"\"\"\"\""}), it.next());
+        Assert.assertEquals(Util.createTuple(new String[] {"\"\"\""}), it.next());
     }
 
     // Handle newlines in fields

Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java Wed Feb 22 09:43:41 2017
@@ -109,7 +109,7 @@ public class TestLogFormatLoader {
         Tuple actual = out.get(0);
         Tuple expected = tuple(
             "2001:980:91c0:1:8d31:a232:25e5:85d",
-            "[05/Sep/2010:11:27:50 +0200]",
+            "05/Sep/2010:11:27:50 +0200",
             "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",
             map(
                 "promo"       , "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",