You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by zl...@apache.org on 2017/02/22 09:43:46 UTC
svn commit: r1783988 [4/24] - in /pig/branches/spark: ./ bin/ conf/
contrib/piggybank/java/
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/
contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelo...
Modified: pig/branches/spark/bin/pig
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/bin/pig (original)
+++ pig/branches/spark/bin/pig Wed Feb 22 09:43:41 2017
@@ -330,7 +330,8 @@ HADOOP_CORE_JAR=`echo ${HADOOP_HOME}/had
if [ -z "$HADOOP_CORE_JAR" ]; then
HADOOP_VERSION=2
else
- HADOOP_VERSION=1
+ echo "Pig requires Hadoop 2 to be present in HADOOP_HOME (currently: $HADOOP_HOME). Please install Hadoop 2.x"
+ exit 1
fi
# if using HBase, likely want to include HBase jars and config
@@ -439,11 +440,7 @@ if [ -n "$HADOOP_BIN" ]; then
if [ -n "$PIG_JAR" ]; then
CLASSPATH=${CLASSPATH}:$PIG_JAR
else
- if [ "$HADOOP_VERSION" == "1" ]; then
- echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again"
- else
- echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant -Dhadoopversion=23 jar', and try again"
- fi
+ echo "Cannot locate pig-core-h${HADOOP_VERSION}.jar. do 'ant jar', and try again"
exit 1
fi
@@ -464,8 +461,8 @@ if [ -n "$HADOOP_BIN" ]; then
exec "$HADOOP_BIN" jar "$PIG_JAR" "${remaining[@]}"
fi
else
- # use hadoop-core.jar to run local mode
- PIG_JAR=`echo $PIG_HOME/pig*-core-h1.jar`
+ # use bundled hadoop to run local mode
+ PIG_JAR=`echo $PIG_HOME/pig*-core-h2.jar`
if [ -n "$PIG_JAR" ]; then
CLASSPATH="${CLASSPATH}:$PIG_JAR"
@@ -474,12 +471,12 @@ else
exit 1
fi
- for f in $PIG_HOME/lib/h1/*.jar; do
+ for f in $PIG_HOME/lib/h2/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
- # Add bundled hadoop-core.jar
- for f in $PIG_HOME/lib/hadoop1-runtime/*.jar; do
+ # Add bundled hadoop jars
+ for f in $PIG_HOME/lib/hadoop2-runtime/*.jar; do
CLASSPATH=${CLASSPATH}:$f;
done
Modified: pig/branches/spark/bin/pig.py
URL: http://svn.apache.org/viewvc/pig/branches/spark/bin/pig.py?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/bin/pig.py (original)
+++ pig/branches/spark/bin/pig.py Wed Feb 22 09:43:41 2017
@@ -338,7 +338,7 @@ hadoopCoreJars = glob.glob(os.path.join(
if len(hadoopCoreJars) == 0:
hadoopVersion = 2
else:
- hadoopVersion = 1
+ sys.exit("Cannot locate Hadoop 2 binaries, please install Hadoop 2.x and try again.")
if hadoopBin != "":
if debug == True:
@@ -361,10 +361,7 @@ if hadoopBin != "":
if len(pigJars) == 1:
pigJar = pigJars[0]
else:
- if hadoopVersion == 1:
- sys.exit("Cannot locate pig-core-h1.jar do 'ant jar', and try again")
- else:
- sys.exit("Cannot locate pig-core-h2.jar do 'ant -Dhadoopversion=23 jar', and try again")
+ sys.exit("Cannot locate pig-core-h2.jar do 'ant jar', and try again")
pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h" + str(hadoopVersion), "*.jar"))
for jar in pigLibJars:
@@ -393,13 +390,13 @@ if hadoopBin != "":
else:
# fall back to use fat pig.jar
if debug == True:
- print "Cannot find local hadoop installation, using bundled hadoop 1"
-
- if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")):
- pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar")
+ print "Cannot find local hadoop installation, using bundled hadoop 2"
+
+ if os.path.exists(os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")):
+ pigJar = os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar")
else:
- pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h1.jar"))
+ pigJars = glob.glob(os.path.join(os.environ['PIG_HOME'], "pig-*-core-h2.jar"))
if len(pigJars) == 1:
pigJar = pigJars[0]
@@ -407,15 +404,15 @@ else:
elif len(pigJars) > 1:
print "Ambiguity with pig jars found the following jars"
print pigJars
- sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h1.jar"))
+ sys.exit("Please remove irrelavant jars from %s" % os.path.join(os.environ['PIG_HOME'], "pig-core-h2.jar"))
else:
- sys.exit("Cannot locate pig-core-h1.jar. do 'ant jar' and try again")
+ sys.exit("Cannot locate pig-core-h2.jar. do 'ant jar' and try again")
- pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h1", "*.jar"))
+ pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "h2", "*.jar"))
for jar in pigLibJars:
classpath += os.pathsep + jar
- pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop1-runtime", "*.jar"))
+ pigLibJars = glob.glob(os.path.join(os.environ['PIG_HOME']+"/lib", "hadoop2-runtime", "*.jar"))
for jar in pigLibJars:
classpath += os.pathsep + jar
@@ -423,7 +420,7 @@ else:
pigClass = "org.apache.pig.Main"
if debug == True:
print "dry runXXX:"
- print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs))
+ print "%s %s %s -classpath %s %s %s" % (java, javaHeapMax, pigOpts, classpath, pigClass, ' '.join(restArgs))
else:
cmdLine = java + ' ' + javaHeapMax + ' ' + pigOpts
cmdLine += ' ' + '-classpath ' + classpath + ' ' + pigClass + ' ' + ' '.join(restArgs)
Modified: pig/branches/spark/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/spark/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/build.xml (original)
+++ pig/branches/spark/build.xml Wed Feb 22 09:43:41 2017
@@ -20,6 +20,13 @@
xmlns:ivy="antlib:org.apache.ivy.ant">
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
+
+ <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+ <classpath>
+ <pathelement location="${basedir}/ivy/ant-contrib-1.0b3.jar"/>
+ </classpath>
+ </taskdef>
+
<property file="${user.home}/build.properties" />
<property file="${basedir}/build.properties" />
@@ -31,11 +38,11 @@
<property name="pigsmoke.pom" value="${basedir}/ivy/pigsmoke.pom" />
<property name="pigunit.pom" value="${basedir}/ivy/pigunit.pom" />
<property name="piggybank.pom" value="${basedir}/ivy/piggybank.pom" />
- <property name="pig.version" value="0.16.0" />
+ <property name="pig.version" value="0.17.0" />
<property name="pig.version.suffix" value="-SNAPSHOT" />
<property name="version" value="${pig.version}${pig.version.suffix}" />
<property name="final.name" value="${name}-${version}" />
- <property name="year" value="2007-2012" />
+ <property name="year" value="2007-2016" />
<!-- source properties -->
<property name="lib.dir" value="${basedir}/lib" />
@@ -70,7 +77,6 @@
<!-- artifact jar file names -->
<property name="artifact.pig.jar" value="${final.name}.jar"/>
- <property name="artifact.pig-h1.jar" value="${final.name}-h1.jar"/>
<property name="artifact.pig-h2.jar" value="${final.name}-h2.jar"/>
<property name="artifact.pig-sources.jar" value="${final.name}-sources.jar"/>
<property name="artifact.pig-javadoc.jar" value="${final.name}-javadoc.jar"/>
@@ -78,15 +84,12 @@
<!-- jar names. TODO we might want to use the svn reversion name in the name in case it is a dev version -->
<property name="output.jarfile.withouthadoop" value="${build.dir}/${final.name}-withouthadoop.jar" />
- <property name="output.jarfile.withouthadoop-h1" value="${legacy.dir}/${final.name}-withouthadoop-h1.jar" />
<property name="output.jarfile.withouthadoop-h2" value="${legacy.dir}/${final.name}-withouthadoop-h2.jar" />
<property name="output.jarfile.core" value="${build.dir}/${artifact.pig.jar}" />
- <property name="output.jarfile.core-h1" value="${build.dir}/${artifact.pig-h1.jar}" />
<property name="output.jarfile.core-h2" value="${build.dir}/${artifact.pig-h2.jar}" />
<property name="output.jarfile.sources" value="${build.dir}/${artifact.pig-sources.jar}" />
<property name="output.jarfile.javadoc" value="${build.dir}/${artifact.pig-javadoc.jar}" />
<!-- Maintain old pig.jar in top level directory. -->
- <property name="output.jarfile.backcompat-core-h1" value="${basedir}/${final.name}-core-h1.jar" />
<property name="output.jarfile.backcompat-core-h2" value="${basedir}/${final.name}-core-h2.jar" />
<!-- test properties -->
@@ -107,8 +110,6 @@
<property name="test.spark.file" value="${test.src.dir}/spark-tests"/>
<property name="test.spark_local.file" value="${test.src.dir}/spark-local-tests"/>
<property name="test.exclude.file" value="${test.src.dir}/excluded-tests"/>
- <property name="test.exclude.file.20" value="${test.src.dir}/excluded-tests-20"/>
- <property name="test.exclude.file.23" value="${test.src.dir}/excluded-tests-23"/>
<property name="test.exclude.file.mr" value="${test.src.dir}/excluded-tests-mr"/>
<property name="test.exclude.file.tez" value="${test.src.dir}/excluded-tests-tez"/>
<property name="test.exclude.file.spark" value="${test.src.dir}/excluded-tests-spark"/>
@@ -155,9 +156,8 @@
<target name="setTezEnv">
<propertyreset name="test.timeout" value="900000" />
- <propertyreset name="hadoopversion" value="23" />
- <propertyreset name="isHadoop23" value="true" />
- <propertyreset name="hbase.hadoop.version" value="hadoop2" />
+ <propertyreset name="hadoopversion" value="2" />
+ <propertyreset name="isHadoop2" value="true" />
<propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
<propertyreset name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
<propertyreset name="src.exclude.dir" value="" />
@@ -209,40 +209,42 @@
<property name="loglevel" value="quiet" />
<loadproperties srcfile="${ivy.dir}/libraries.properties"/>
- <property name="hadoopversion" value="20" />
- <condition property="isHadoop23">
+ <!--
+ Hadoop master version
+ (Value 23 is translated for backward compatibility in old build scripts)
+ -->
+ <if>
<equals arg1="${hadoopversion}" arg2="23"/>
- </condition>
+ <then>
+ <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo>
+ <var name="hadoopversion" unset="true"/>
+ <property name="hadoopversion" value="2" />
+ </then>
+ </if>
+ <property name="hadoopversion" value="2" />
- <condition property="hbase.hadoop.version" value="hadoop1" else="hadoop2">
- <not>
- <equals arg1="${hadoopversion}" arg2="23"/>
- </not>
+ <condition property="isHadoop2">
+ <equals arg1="${hadoopversion}" arg2="2"/>
</condition>
<!--
HBase master version
- Denotes how the HBase dependencies are layout. Value "94" denotes older
- format where all HBase code is present in one single jar, which is the
- way HBase is available up to version 0.94. Value "95" denotes new format
- where HBase is cut into multiple dependencies per each major subsystem,
- e.g. "client", "server", ... . Only values "94" and "95" are supported
- at the moment.
+ (Value 95 is translated for backward compatibility in old build scripts)
-->
- <property name="hbaseversion" value="95" />
-
- <!-- exclude tez code if not hadoop20 -->
- <condition property="src.exclude.dir" value="**/tez/**" else="">
- <not>
- <equals arg1="${hadoopversion}" arg2="23"/>
- </not>
- </condition>
+ <if>
+ <equals arg1="${hbaseversion}" arg2="95"/>
+ <then>
+ <echo>Property setting hbaseversion=95 is deprecated. Overwriting to hbaseversion=1</echo>
+ <var name="hbaseversion" unset="true"/>
+ <property name="hbaseversion" value="1" />
+ </then>
+ </if>
+ <property name="hbaseversion" value="1" />
<property name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
<property name="src.shims.test.dir" value="${basedir}/shims/test/hadoop${hadoopversion}" />
- <property name="hadoop.jar" value="hadoop-core-${hadoop-core.version}.jar" />
<property name="asfrepo" value="https://repository.apache.org"/>
<property name="asfsnapshotrepo" value="${asfrepo}/content/repositories/snapshots"/>
<property name="mvnrepo" value="http://repo2.maven.org/maven2"/>
@@ -284,7 +286,7 @@
<property name="xerces.jar" value="${ivy.lib.dir}/xercesImpl-${xerces.version}.jar"/>
<property name="jdiff.build.dir" value="${build.docs}/jdiff"/>
<property name="jdiff.xml.dir" value="${docs.dir}/jdiff"/>
- <property name="jdiff.stable" value="0.15.0"/>
+ <property name="jdiff.stable" value="0.16.0"/>
<property name="jdiff.stable.javadoc" value="http://hadoop.apache.org/${name}/docs/r${jdiff.stable}/api/"/>
<!-- Packaging properties -->
@@ -392,12 +394,6 @@
<include name="joda-time-${joda-time.version}.jar"/>
<include name="automaton-${automaton.version}.jar"/>
<include name="jansi-${jansi.version}.jar"/>
- <include name="jackson-mapper-asl-${jackson.version}.jar" unless="isHadoop23"/>
- <include name="jackson-core-asl-${jackson.version}.jar" unless="isHadoop23"/>
- <include name="guava-${guava.version}.jar" unless="isHadoop23"/>
- <include name="snappy-java-${snappy.version}.jar" unless="isHadoop23"/>
- <include name="asm-${asm.version}.jar" unless="isHadoop23"/>
-
<include name="scala*.jar"/>
<include name="akka*.jar"/>
<include name="jcl-over-slf4j*.jar"/>
@@ -574,6 +570,7 @@
<echo>*** Building Main Sources ***</echo>
<echo>*** To compile with all warnings enabled, supply -Dall.warnings=1 on command line ***</echo>
<echo>*** Else, you will only be warned about deprecations ***</echo>
+ <echo>*** Hadoop version used: ${hadoopversion} ; HBase version used: ${hbaseversion} ***</echo>
<compileSources sources="${src.dir};${src.gen.dir};${src.lib.dir}/bzip2;${src.shims.dir}"
excludes="${src.exclude.dir}" dist="${build.classes}" cp="classpath" warnings="${javac.args.warnings}" />
<copy todir="${build.classes}/META-INF">
@@ -703,23 +700,6 @@
</target>
<!-- ================================================================== -->
- <!-- Facede to build pig.jar for both Hadoop 1 and Hadoop 2 -->
- <!-- ================================================================== -->
- <target name="jar-h12" description="Create pig for both Hadoop 1 and Hadoop 2">
- <propertyreset name="hadoopversion" value="20" />
- <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
- <antcall target="clean" inheritRefs="true" inheritall="true"/>
- <antcall target="jar" inheritRefs="true" inheritall="true"/>
- <antcall target="copyHadoop1LocalRuntimeDependencies"/>
- <delete dir="${build.dir}" />
- <propertyreset name="hadoopversion" value="23" />
- <propertyreset name="hbase.hadoop.version" value="hadoop2" />
- <propertyreset name="src.shims.dir" value="${basedir}/shims/src/hadoop${hadoopversion}" />
- <propertyreset name="src.exclude.dir" value="" />
- <antcall target="jar" inheritRefs="true" inheritall="true"/>
- </target>
-
- <!-- ================================================================== -->
<!-- Make pig.jar -->
<!-- ================================================================== -->
<target name="jar" depends="compile,ivy-buildJar" description="Create pig core jar">
@@ -727,8 +707,8 @@
<buildJar svnString="${svn.revision}" outputFile="${output.jarfile.withouthadoop}" includedJars="runtime.dependencies-withouthadoop.jar"/>
<antcall target="copyCommonDependencies"/>
<antcall target="copySparkDependencies"/>
- <antcall target="copyh1Dependencies"/>
<antcall target="copyh2Dependencies"/>
+ <antcall target="copyHadoop2LocalRuntimeDependencies" />
</target>
<target name="copyCommonDependencies">
@@ -752,7 +732,9 @@
<fileset dir="${ivy.lib.dir}" includes="jruby-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="groovy-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="js-*.jar"/>
- <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="htrace-core*incubating.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="metrics-core-*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="hbase-*.jar" excludes="hbase-*tests.jar,hbase-*hadoop2*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="hive-*.jar" excludes="hive-shims-0.*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="protobuf-java-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="zookeeper-*.jar"/>
@@ -772,24 +754,12 @@
</copy>
</target>
- <target name="copyh1Dependencies" unless="isHadoop23">
- <mkdir dir="${lib.dir}/h1" />
- <copy todir="${lib.dir}/h1">
- <fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/>
- <fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/>
- <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop1.jar"/>
- </copy>
- <copy file="${output.jarfile.core}" tofile="${output.jarfile.backcompat-core-h1}"/>
- <mkdir dir="${legacy.dir}" />
- <move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h1}"/>
- </target>
-
- <target name="copyh2Dependencies" if="isHadoop23">
+ <target name="copyh2Dependencies" if="isHadoop2">
<mkdir dir="${lib.dir}/h2" />
<copy todir="${lib.dir}/h2">
<fileset dir="${ivy.lib.dir}" includes="avro-mapred-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="hive-shims-0.*.jar"/>
- <fileset dir="${ivy.lib.dir}" includes="hbase-*hadoop2.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="hbase-hadoop2*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="tez-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-collections4-*.jar"/>
</copy>
@@ -798,18 +768,21 @@
<move file="${output.jarfile.withouthadoop}" tofile="${output.jarfile.withouthadoop-h2}"/>
</target>
- <target name="copyHadoop1LocalRuntimeDependencies">
- <mkdir dir="${lib.dir}/hadoop1-runtime" />
- <copy todir="${lib.dir}/hadoop1-runtime">
- <fileset dir="${ivy.lib.dir}" includes="hadoop-core-*.jar"/>
+ <target name="copyHadoop2LocalRuntimeDependencies">
+ <mkdir dir="${lib.dir}/hadoop2-runtime" />
+ <copy todir="${lib.dir}/hadoop2-runtime">
+ <fileset dir="${ivy.lib.dir}" includes="hadoop-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-cli-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-configuration-*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="commons-collections-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-lang-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-codec-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-io-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="commons-logging-*.jar"/>
- <fileset dir="${ivy.lib.dir}" includes="commons-httpclient-*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="httpclient-*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="httpcore-*.jar"/>
<fileset dir="${ivy.lib.dir}" includes="log4j-*.jar"/>
+ <fileset dir="${ivy.lib.dir}" includes="slf4j-*.jar"/>
</copy>
</target>
@@ -955,6 +928,9 @@
<sysproperty key="test.exec.type" value="${test.exec.type}" />
<sysproperty key="ssh.gateway" value="${ssh.gateway}" />
<sysproperty key="hod.server" value="${hod.server}" />
+ <sysproperty key="build.classes" value="${build.classes}" />
+ <sysproperty key="test.build.classes" value="${test.build.classes}" />
+ <sysproperty key="ivy.lib.dir" value="${ivy.lib.dir}" />
<sysproperty key="java.io.tmpdir" value="${junit.tmp.dir}" />
<sysproperty key="hadoop.log.dir" value="${test.log.dir}"/>
<jvmarg line="-XX:+CMSClassUnloadingEnabled -XX:MaxPermSize=128M ${debugArgs} -Djava.library.path=${hadoop.root}\bin"/>
@@ -980,8 +956,6 @@
<patternset>
<includesfile name="@{test.file}"/>
<excludesfile name="${test.exclude.file}" if="test.exclude.file"/>
- <excludesfile name="${test.exclude.file.20}" unless="isHadoop23"/>
- <excludesfile name="${test.exclude.file.23}" if="isHadoop23"/>
<excludesfile name="${test.exclude.file.for.exectype}"/>
</patternset>
<exclude name="**/${exclude.testcase}.java" if="exclude.testcase" />
@@ -990,7 +964,9 @@
</fileset>
</batchtest>
<batchtest fork="yes" todir="${test.log.dir}" if="testcase">
- <fileset dir="test" includes="**/${testcase}.java"/>
+ <fileset dir="test" includes="**/${testcase}.java">
+ <exclude name="e2e/**/*.java"/>
+ </fileset>
</batchtest>
<assertions>
@@ -1008,10 +984,10 @@
<target name="test-core-mrtez" description="run core tests on both mr and tez mode"
depends="setWindowsPath,setLinuxPath,compile-test,jar,debugger.check,jackson-pig-3039-test-download">
- <fail message="hadoopversion must be set to 23 when invoking test-core-mrtez">
+ <fail message="hadoopversion must be set to 2 when invoking test-core-mrtez">
<condition>
<not>
- <equals arg1="${hadoopversion}" arg2="23" />
+ <equals arg1="${hadoopversion}" arg2="2" />
</not>
</condition>
</fail>
@@ -1021,6 +997,9 @@
<propertyreset name="test.exec.type" value="mr" />
<propertyreset name="test.log.dir" value="${test.build.dir}/logs/${test.exec.type}" />
<macro-test-runner test.file="${test.all.file}" tests.failed="test.mr.failed"/>
+ <delete>
+ <fileset dir="${build.classes}" includes="*.xml" />
+ </delete>
<echo />
<echo message="=======================" />
<echo message="Running Tez tests" />
@@ -1099,10 +1078,7 @@
<!-- ================================================================== -->
<!-- Distribution -->
<!-- ================================================================== -->
- <target name="package-h12" depends="jar-h12, docs, api-report, piggybank" description="Create a Pig tar release">
- <package-base/>
- </target>
-
+
<target name="package" depends="jar, docs, api-report, piggybank" description="Create a Pig tar release">
<package-base/>
</target>
@@ -1122,7 +1098,6 @@
<fileset dir="${lib.dir}"/>
</copy>
- <copy file="${output.jarfile.backcompat-core-h1}" tofile="${tar.dist.dir}/${final.name}-core-h1.jar" failonerror="false"/>
<copy file="${output.jarfile.backcompat-core-h2}" tofile="${tar.dist.dir}/${final.name}-core-h2.jar" failonerror="false"/>
<copy todir="${tar.dist.dir}/lib" file="contrib/piggybank/java/piggybank.jar"/>
@@ -1200,10 +1175,6 @@
<tar-base/>
</target>
- <target name="tar-h12" depends="package-h12" description="Source distribution">
- <tar-base/>
- </target>
-
<macrodef name="tar-base">
<sequential>
<tar compression="gzip" longfile="gnu" destfile="${build.dir}/${artifact.pig.tar}">
@@ -1289,15 +1260,13 @@
uri="urn:maven-artifact-ant"
classpathref="mvn-ant-task.classpath"/>
</target>
- <target name="mvn-install" depends="mvn-taskdef,jar-h12, set-version, source-jar,
- javadoc-jar, pigunit-jar, smoketests-jar, piggybank"
+ <target name="mvn-install" depends="mvn-taskdef, mvn-build, set-version"
description="To install pig to local filesystem's m2 cache">
<artifact:pom file="${pig.pom}" id="pig"/>
- <artifact:install file="${output.jarfile.core-h1}">
+ <artifact:install file="${output.jarfile.core-h2}">
<pom refid="pig"/>
<attach file="${output.jarfile.sources}" classifier="sources" />
<attach file="${output.jarfile.javadoc}" classifier="javadoc" />
- <attach file="${output.jarfile.core-h2}" classifier="h2" />
</artifact:install>
<artifact:pom file="${pigunit.pom}" id="pigunit"/>
<artifact:install file="${pigunit.jarfile}">
@@ -1313,10 +1282,9 @@
</artifact:install>
</target>
- <target name="mvn-build" depends="jar-h12, source-jar,
+ <target name="mvn-build" depends="jar, source-jar,
javadoc-jar, smoketests-jar, pigunit-jar, piggybank"
description="To build the pig jar artifacts to be deployed to apache maven repository">
- <move file="${output.jarfile.backcompat-core-h1}" tofile="${output.jarfile.core}"/>
<move file="${output.jarfile.backcompat-core-h2}" tofile="${output.jarfile.core-h2}"/>
</target>
@@ -1338,8 +1306,6 @@
<pom refid="pig"/>
<attach file="${output.jarfile.core}.asc" type="jar.asc"/>
<attach file="${pig.pom}.asc" type="pom.asc"/>
- <attach file="${output.jarfile.core-h2}.asc" type="jar.asc" classifier="h2"/>
- <attach file="${output.jarfile.core-h2}" classifier="h2" />
<attach file="${output.jarfile.sources}.asc" type="jar.asc" classifier="sources"/>
<attach file="${output.jarfile.sources}" classifier="sources" />
<attach file="${output.jarfile.javadoc}.asc" type="jar.asc" classifier="javadoc"/>
@@ -1374,7 +1340,6 @@
<artifact:deploy file="${output.jarfile.core}">
<remoteRepository id="${snapshots_repo_id}" url="${asfsnapshotrepo}"/>
<pom refid="pig"/>
- <attach file="${output.jarfile.core-h2}" classifier="h2" />
<attach file="${output.jarfile.sources}" classifier="sources" />
<attach file="${output.jarfile.javadoc}" classifier="javadoc" />
</artifact:deploy>
@@ -1418,8 +1383,6 @@
</macrodef>
<sign-artifact input.file="${output.jarfile.core}"
output.file="${output.jarfile.core}.asc" gpg.passphrase="${gpg.passphrase}"/>
- <sign-artifact input.file="${output.jarfile.core-h2}"
- output.file="${output.jarfile.core-h2}.asc" gpg.passphrase="${gpg.passphrase}"/>
<sign-artifact input.file="${output.jarfile.sources}"
output.file="${output.jarfile.sources}.asc" gpg.passphrase="${gpg.passphrase}"/>
<sign-artifact input.file="${output.jarfile.javadoc}"
@@ -1707,7 +1670,9 @@
<target name="ivy-resolve" depends="ivy-init" unless="ivy.resolved" description="Resolve Ivy dependencies">
<property name="ivy.resolved" value="true"/>
+ <echo>*** Ivy resolve with Hadoop ${hadoopversion} and HBase ${hbaseversion} ***</echo>
<ivy:resolve log="${loglevel}" settingsRef="${ant.project.name}.ivy.settings" conf="compile"/>
+ <ivy:report toDir="build/ivy/report"/>
</target>
<target name="ivy-compile" depends="ivy-resolve" description="Retrieve Ivy-managed artifacts for compile configuration">
Modified: pig/branches/spark/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/branches/spark/conf/pig.properties?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/conf/pig.properties (original)
+++ pig/branches/spark/conf/pig.properties Wed Feb 22 09:43:41 2017
@@ -557,6 +557,9 @@ pig.location.check.strict=false
#
hcat.bin=/usr/local/hcat/bin/hcat
+# Enable ATS hook to log the Pig specific ATS entry, disable only when ATS server is not deployed
+pig.ats.enabled=true
+
###########################################################################
#
# Overrides for extreme environments
@@ -611,13 +614,13 @@ hcat.bin=/usr/local/hcat/bin/hcat
# If you want Pig to allow certain errors before failing you can set this property.
# If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors
# based on the OutputErrorHandler implementation
-# pig.allow.store.errors = false
+# pig.error-handling.enabled = false
#
# Controls the minimum number of errors for store
-# pig.errors.min.records = 0
+# pig.error-handling.min.error.records = 0
#
# Set the threshold for percentage of errors
-# pig.error.threshold.percent = 0.0f
+# pig.error-handling.error.threshold = 0.0f
###########################################################################
#
@@ -675,3 +678,6 @@ hcat.bin=/usr/local/hcat/bin/hcat
pig.sort.readonce.loadfuncs=org.apache.pig.backend.hadoop.hbase.HBaseStorage,org.apache.pig.backend.hadoop.accumulo.AccumuloStorage
+# If set, Pig will override tez.am.launch.cmd-opts and tez.am.resource.memory.mb to optimal
+# even they are set to a different value. Default value is true.
+#pig.tez.configure.am.memory=false
Modified: pig/branches/spark/contrib/piggybank/java/build.xml
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/build.xml?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/build.xml (original)
+++ pig/branches/spark/contrib/piggybank/java/build.xml Wed Feb 22 09:43:41 2017
@@ -16,13 +16,20 @@
-->
<project basedir="." default="jar" name="pigudf">
+
+ <taskdef resource="net/sf/antcontrib/antcontrib.properties">
+ <classpath>
+ <pathelement location="../../../ivy/ant-contrib-1.0b3.jar"/>
+ </classpath>
+ </taskdef>
+
<property file="../../../build.properties" />
<!-- javac properties -->
<property name="javac.debug" value="on" />
<property name="javac.level" value="source,lines,vars"/>
<property name="javac.optimize" value="on" />
<property name="javac.deprecation" value="off" />
- <property name="javac.version" value="1.6" />
+ <property name="javac.version" value="1.7" />
<property name="javac.args" value="" />
<!-- TODO we should use warning... <property name="javac.args.warnings" value="-Xlint:unchecked" /> -->
<property name="javac.args.warnings" value="" />
@@ -38,16 +45,22 @@
<property name="src.dir" value="src/main/java/org/apache/pig/piggybank" />
<property name="hsqldb.jar" value="../../../build/ivy/lib/Pig/hsqldb-1.8.0.10.jar"/>
- <!-- JobHistoryLoader currently does not support 0.23 -->
- <condition property="build.classes.excludes" value="**/HadoopJobHistoryLoader.java" else="">
- <equals arg1="${hadoopversion}" arg2="23"/>
- </condition>
- <condition property="test.classes.excludes" value="**/TestHadoopJobHistoryLoader.java" else="">
+ <!--
+ Hadoop master version
+ (Value 23 is translated for backward compatibility in old build scripts)
+ -->
+ <if>
<equals arg1="${hadoopversion}" arg2="23"/>
- </condition>
+ <then>
+ <echo>Property setting hadoopversion=23 is deprecated. Overwriting to hadoopversion=2</echo>
+ <var name="hadoopversion" unset="true"/>
+ <property name="hadoopversion" value="2" />
+ </then>
+ </if>
+ <property name="hadoopversion" value="2" />
- <condition property="hadoopsuffix" value="2" else="1">
- <equals arg1="${hadoopversion}" arg2="23"/>
+ <condition property="hadoopsuffix" value="2" else="">
+ <equals arg1="${hadoopversion}" arg2="2"/>
</condition>
<!-- jar properties -->
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/MaxTupleBy1stField.java Wed Feb 22 09:43:41 2017
@@ -21,6 +21,7 @@ package org.apache.pig.piggybank.evaluat
import java.io.IOException;
import java.util.Iterator;
+import org.apache.pig.Accumulator;
import org.apache.pig.Algebraic;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
@@ -43,7 +44,7 @@ import org.apache.pig.impl.logicalLayer.
*
* @author Vadim Zaliva <lo...@codemindes.com>
*/
-public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic
+public class MaxTupleBy1stField extends EvalFunc<Tuple> implements Algebraic, Accumulator<Tuple>
{
/**
* Indicates once for how many items progress hartbeat should be sent.
@@ -131,6 +132,11 @@ public class MaxTupleBy1stField extends
protected static Tuple max(Tuple input, PigProgressable reporter) throws ExecException
{
DataBag values = (DataBag) input.get(0);
+ return max(values,reporter);
+ }
+
+ protected static Tuple max(DataBag values, PigProgressable reporter) throws ExecException
+ {
// if we were handed an empty bag, return NULL
// this is in compliance with SQL standard
@@ -183,4 +189,44 @@ public class MaxTupleBy1stField extends
return Final.class.getName();
}
+
+ /**
+ * Accumulator implementation
+ */
+
+ private Tuple intermediate = null;
+
+ /**
+ * Accumulate implementation - calls max() on the incoming tuple set including intermediate tuple if already exists
+ * @param b A tuple containing a single field, which is a bag. The bag will contain the set
+ * @throws IOException
+ */
+ @Override
+ public void accumulate(Tuple b) throws IOException {
+ try{
+ DataBag values = BagFactory.getInstance().newDefaultBag();
+ values.addAll((DataBag) b.get(0));
+
+ if (intermediate != null) {
+ values.add(intermediate);
+ }
+ intermediate = max(values,reporter);
+
+ }catch (ExecException ee){
+ IOException oughtToBeEE = new IOException();
+ oughtToBeEE.initCause(ee);
+ throw oughtToBeEE;
+ }
+ }
+
+ @Override
+ public Tuple getValue() {
+ return intermediate;
+ }
+
+ @Override
+ public void cleanup() {
+ intermediate = null;
+ }
+
}
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/Over.java Wed Feb 22 09:43:41 2017
@@ -23,10 +23,13 @@ import java.util.Iterator;
import java.util.List;
import org.apache.pig.EvalFunc;
-import org.apache.pig.FuncSpec;
import org.apache.pig.PigException;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.builtin.AVG;
+import org.apache.pig.builtin.BigDecimalAvg;
+import org.apache.pig.builtin.BigDecimalMax;
+import org.apache.pig.builtin.BigDecimalMin;
+import org.apache.pig.builtin.BigDecimalSum;
import org.apache.pig.builtin.COUNT;
import org.apache.pig.builtin.DoubleAvg;
import org.apache.pig.builtin.DoubleMax;
@@ -54,6 +57,7 @@ import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema;
/**
* Given an aggregate function, a bag, and possibly a window definition,
@@ -73,23 +77,27 @@ import org.apache.pig.impl.logicalLayer.
* <li>sum(int)</li>
* <li>sum(long)</li>
* <li>sum(bytearray)</li>
+ * <li>sum(bigdecimal)</li>
* <li>avg(double)</li>
* <li>avg(float)</li>
* <li>avg(long)</li>
* <li>avg(int)</li>
* <li>avg(bytearray)</li>
+ * <li>avg(bigdecimal)</li>
* <li>min(double)</li>
* <li>min(float)</li>
* <li>min(long)</li>
* <li>min(int)</li>
* <li>min(chararray)</li>
* <li>min(bytearray)</li>
+ * <li>min(bigdecimal)</li>
* <li>max(double)</li>
* <li>max(float)</li>
* <li>max(long)</li>
* <li>max(int)</li>
* <li>max(chararray)</li>
* <li>max(bytearray)</li>
+ * <li>max(bigdecimal)</li>
* <li>row_number</li>
* <li>first_value</li>
* <li>last_value</li>
@@ -153,7 +161,8 @@ import org.apache.pig.impl.logicalLayer.
* current row and 3 following) over T;</tt>
*
* <p>Over accepts a constructor argument specifying the name and type,
- * colon-separated, of its return schema.</p>
+ * colon-separated, of its return schema. If the argument option is 'true' use the inner-search,
+ * take the name and type of bag and return a schema with alias+'_over' and the same type</p>
*
* <p><pre>
* DEFINE IOver org.apache.pig.piggybank.evaluation.Over('state_rk:int');
@@ -188,12 +197,14 @@ public class Over extends EvalFunc<DataB
private Object[] udfArgs;
private byte returnType;
private String returnName;
+ private boolean searchInnerType;
public Over() {
initialized = false;
udfArgs = null;
func = null;
returnType = DataType.UNKNOWN;
+ searchInnerType = false;
}
public Over(String typespec) {
@@ -202,12 +213,16 @@ public class Over extends EvalFunc<DataB
String[] fn_tn = typespec.split(":", 2);
this.returnName = fn_tn[0];
this.returnType = DataType.findTypeByName(fn_tn[1]);
- } else {
+ } else if(Boolean.parseBoolean(typespec)) {
+ searchInnerType = Boolean.parseBoolean(typespec);
+ }else{
this.returnName = "result";
this.returnType = DataType.findTypeByName(typespec);
- }
+ }
}
+
+
@Override
public DataBag exec(Tuple input) throws IOException {
if (input == null || input.size() < 2) {
@@ -255,19 +270,42 @@ public class Over extends EvalFunc<DataB
@Override
public Schema outputSchema(Schema inputSch) {
try {
- if (returnType == DataType.UNKNOWN) {
+ FieldSchema field;
+
+ if (searchInnerType) {
+ field = new FieldSchema(inputSch.getField(0));
+ while (searchInnerType) {
+ if (field.schema != null
+ && field.schema.getFields().size() > 1) {
+ searchInnerType = false;
+ } else {
+ if (field.type == DataType.TUPLE
+ || field.type == DataType.BAG) {
+ field = new FieldSchema(field.schema.getField(0));
+ } else {
+ field.alias = field.alias + "_over";
+ searchInnerType = false;
+ }
+ }
+ }
+
+ searchInnerType = true;
+ } else if (returnType == DataType.UNKNOWN) {
return Schema.generateNestedSchema(DataType.BAG, DataType.NULL);
} else {
- Schema outputTupleSchema = new Schema(new Schema.FieldSchema(returnName, returnType));
- return new Schema(new Schema.FieldSchema(
- getSchemaName(this.getClass().getName().toLowerCase(), inputSch),
- outputTupleSchema,
- DataType.BAG));
+ field = new Schema.FieldSchema(returnName, returnType);
}
+
+ Schema outputTupleSchema = new Schema(field);
+ return new Schema(new Schema.FieldSchema(getSchemaName(this
+ .getClass().getName().toLowerCase(), inputSch),
+ outputTupleSchema, DataType.BAG));
+
} catch (FrontendException fe) {
throw new RuntimeException("Unable to create nested schema", fe);
}
}
+
private void init(Tuple input) throws IOException {
initialized = true;
@@ -329,6 +367,8 @@ public class Over extends EvalFunc<DataB
func = new LongSum();
} else if ("sum(bytearray)".equalsIgnoreCase(agg)) {
func = new SUM();
+ } else if ("sum(bigdecimal)".equalsIgnoreCase(agg)) {
+ func = new BigDecimalSum();
} else if ("avg(double)".equalsIgnoreCase(agg)) {
func = new DoubleAvg();
} else if ("avg(float)".equalsIgnoreCase(agg)) {
@@ -339,6 +379,8 @@ public class Over extends EvalFunc<DataB
func = new IntAvg();
} else if ("avg(bytearray)".equalsIgnoreCase(agg)) {
func = new AVG();
+ } else if ("avg(bigdecimal)".equalsIgnoreCase(agg)) {
+ func = new BigDecimalAvg();
} else if ("min(double)".equalsIgnoreCase(agg)) {
func = new DoubleMin();
} else if ("min(float)".equalsIgnoreCase(agg)) {
@@ -351,6 +393,8 @@ public class Over extends EvalFunc<DataB
func = new StringMin();
} else if ("min(bytearray)".equalsIgnoreCase(agg)) {
func = new MIN();
+ } else if ("min(bigdecimal)".equalsIgnoreCase(agg)) {
+ func = new BigDecimalMin();
} else if ("max(double)".equalsIgnoreCase(agg)) {
func = new DoubleMax();
} else if ("max(float)".equalsIgnoreCase(agg)) {
@@ -363,6 +407,8 @@ public class Over extends EvalFunc<DataB
func = new StringMax();
} else if ("max(bytearray)".equalsIgnoreCase(agg)) {
func = new MAX();
+ } else if ("max(bigdecimal)".equalsIgnoreCase(agg)) {
+ func = new BigDecimalMax();
} else if ("row_number".equalsIgnoreCase(agg)) {
func = new RowNumber();
} else if ("first_value".equalsIgnoreCase(agg)) {
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/util/apachelogparser/SearchEngineExtractor.java Wed Feb 22 09:43:41 2017
@@ -363,6 +363,15 @@ public class SearchEngineExtractor exten
searchEngines.put("search.lycos.com", "Lycos");
searchEngines.put("search.msn.co.uk", "MSN UK");
searchEngines.put("search.msn.com", "MSN");
+ searchEngines.put("bing.com", "Bing");
+ searchEngines.put("ssl.bing.com", "Bing");
+ searchEngines.put("cn.bing.com", "Bing China");
+ searchEngines.put("br.bing.com", "Bing Brazil");
+ searchEngines.put("it.bing.com", "Bing Italy");
+ searchEngines.put("be.bing.com", "Bing Netherlands");
+ searchEngines.put("uk.bing.com", "Bing UK");
+ searchEngines.put("hk.bing.com", "Bing Hong Kong");
+ searchEngines.put("nz.bing.com", "Bing New Zeland");
searchEngines.put("search.myway.com", "MyWay");
searchEngines.put("search.mywebsearch.com", "My Web Search");
searchEngines.put("search.ntlworld.com", "NTLWorld");
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/xml/XPath.java Wed Feb 22 09:43:41 2017
@@ -16,8 +16,11 @@ package org.apache.pig.piggybank.evaluat
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
+import java.util.Iterator;
import java.util.List;
+import javax.xml.XMLConstants;
+import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPathFactory;
@@ -49,8 +52,7 @@ public class XPath extends EvalFunc<Stri
private static boolean cache = true;
private static boolean ignoreNamespace = true;
- public static final String EMPTY_STRING = "";
-
+
/**
* input should contain: 1) xml 2) xpath
* 3) optional cache xml doc flag
@@ -95,8 +97,13 @@ public class XPath extends EvalFunc<Stri
return null;
}
- if(input.size() > 2)
+ if(input.size() > 2) {
cache = (Boolean) input.get(2);
+ }
+
+ if (input.size() > 3) {
+ ignoreNamespace = (Boolean) input.get(3);
+ }
if (!cache || xpath == null || !xml.equals(this.xml)) {
final InputSource source = new InputSource(new StringReader(xml));
@@ -104,6 +111,7 @@ public class XPath extends EvalFunc<Stri
this.xml = xml; // track the xml for subsequent calls to this udf
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ dbf.setNamespaceAware(!ignoreNamespace);
final DocumentBuilder db = dbf.newDocumentBuilder();
this.document = db.parse(source);
@@ -112,14 +120,32 @@ public class XPath extends EvalFunc<Stri
this.xpath = xpathFactory.newXPath();
+ if (!ignoreNamespace) {
+ xpath.setNamespaceContext(new NamespaceContext() {
+ @Override
+ public String getNamespaceURI(String prefix) {
+ if (prefix.equals(XMLConstants.DEFAULT_NS_PREFIX)) {
+ return document.lookupNamespaceURI(null);
+ } else {
+ return document.lookupNamespaceURI(prefix);
+ }
+ }
+
+ @Override
+ public String getPrefix(String namespaceURI) {
+ return document.lookupPrefix(namespaceURI);
+ }
+
+ @Override
+ public Iterator getPrefixes(String namespaceURI) {
+ return null;
+ }
+ });
+ }
}
String xpathString = (String) input.get(1);
- if (ignoreNamespace) {
- xpathString = createNameSpaceIgnoreXpathString(xpathString);
- }
-
final String value = xpath.evaluate(xpathString, document);
return value;
@@ -165,34 +191,6 @@ public class XPath extends EvalFunc<Stri
}
return true;
}
-
-
- /**
- * Returns a new the xPathString by adding additional parameters
- * in the existing xPathString for ignoring the namespace during compilation.
- *
- * @param String xpathString
- * @return String modified xpathString
- */
- private String createNameSpaceIgnoreXpathString(final String xpathString) {
- final String QUERY_PREFIX = "//*";
- final String LOCAL_PREFIX = "[local-name()='";
- final String LOCAL_POSTFIX = "']";
- final String SPLITTER = "/";
-
- try {
- String xpathStringWithLocalName = EMPTY_STRING;
- String[] individualNodes = xpathString.split(SPLITTER);
-
- for (String node : individualNodes) {
- xpathStringWithLocalName = xpathStringWithLocalName.concat(QUERY_PREFIX + LOCAL_PREFIX + node
- + LOCAL_POSTFIX);
- }
- return xpathStringWithLocalName;
- } catch (Exception ex) {
- return xpathString;
- }
- }
/**
* Returns argument schemas of the UDF.
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/CSVExcelStorage.java Wed Feb 22 09:43:41 2017
@@ -580,7 +580,7 @@ public class CSVExcelStorage extends Pig
}
} else if (b == DOUBLE_QUOTE) {
// Does a double quote immediately follow?
- if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE)) {
+ if ((i < recordLen-1) && (buf[i+1] == DOUBLE_QUOTE) && (fieldBuffer.position() != 0)) {
fieldBuffer.put(b);
nextTupleSkipChar = true;
continue;
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/DBStorage.java Wed Feb 22 09:43:41 2017
@@ -91,6 +91,7 @@ public class DBStorage extends StoreFunc
/**
* Write the tuple to Database directly here.
*/
+ @Override
public void putNext(Tuple tuple) throws IOException {
int sqlPos = 1;
try {
@@ -373,4 +374,9 @@ public class DBStorage extends StoreFunc
p.setProperty(SCHEMA_SIGNATURE, s.toString());
}
+ @Override
+ public Boolean supportsParallelWriteToStoreLocation() {
+ return false;
+ }
+
}
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/IndexedStorage.java Wed Feb 22 09:43:41 2017
@@ -60,7 +60,6 @@ import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.StorageUtil;
import org.apache.pig.data.DataType;
import org.apache.pig.data.DataByteArray;
-import org.apache.pig.backend.hadoop.executionengine.shims.HadoopShims;
/**
* <code>IndexedStorage</code> is a form of <code>PigStorage</code> that supports a
Modified: pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/MultiStorage.java Wed Feb 22 09:43:41 2017
@@ -16,7 +16,9 @@ import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.text.NumberFormat;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
@@ -42,6 +44,9 @@ import org.apache.pig.backend.executione
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRConfiguration;
import org.apache.pig.data.Tuple;
import org.apache.pig.impl.util.StorageUtil;
+import org.apache.xml.utils.StringBufferPool;
+
+import com.google.common.base.Strings;
/**
* The UDF is useful for splitting the output data into a bunch of directories
@@ -73,13 +78,21 @@ import org.apache.pig.impl.util.StorageU
* If the output is compressed,then the sub directories and the output files will
* be having the extension. Say for example in the above case if bz2 is used one file
* will look like ;/my/home/output.bz2/a1.bz2/a1-0000.bz2
+ *
+ * Key field can also be a comma separated list of indices e.g. '0,1' - in this case
+ * storage will be multi-level:
+ * /my/home/output/a1/b1/a1-b1-0000
+ * /my/home/output/a1/b2/a1-b2-0000
+ * There is also an option to leave key values out of storage, see isRemoveKeys.
*/
public class MultiStorage extends StoreFunc {
+ private static final String KEYFIELD_DELIMETER = ",";
private Path outputPath; // User specified output Path
- private int splitFieldIndex = -1; // Index of the key field
+ private final List<Integer> splitFieldIndices= new ArrayList<Integer>(); // Indices of the key fields
private String fieldDel; // delimiter of the output record.
private Compression comp; // Compression type of output data.
+ private boolean isRemoveKeys = false;
// Compression types supported by this store
enum Compression {
@@ -95,9 +108,14 @@ public class MultiStorage extends StoreF
this(parentPathStr, splitFieldIndex, compression, "\\t");
}
+ public MultiStorage(String parentPathStr, String splitFieldIndex,
+ String compression, String fieldDel) {
+ this(parentPathStr, splitFieldIndex, compression, fieldDel, "false");
+ }
+
/**
* Constructor
- *
+ *
* @param parentPathStr
* Parent output dir path (this will be specified in store statement,
* so MultiStorage don't use this parameter in reality. However, we don't
@@ -108,18 +126,26 @@ public class MultiStorage extends StoreF
* 'bz2', 'bz', 'gz' or 'none'
* @param fieldDel
* Output record field delimiter.
+ * @param isRemoveKeys
+ * Removes key columns from result during write.
*/
public MultiStorage(String parentPathStr, String splitFieldIndex,
- String compression, String fieldDel) {
+ String compression, String fieldDel, String isRemoveKeys) {
+ this.isRemoveKeys = Boolean.parseBoolean(isRemoveKeys);
this.outputPath = new Path(parentPathStr);
- this.splitFieldIndex = Integer.parseInt(splitFieldIndex);
+
+ String[] splitFieldIndices = splitFieldIndex.split(KEYFIELD_DELIMETER);
+ for (String splitFieldIndexString : splitFieldIndices){
+ this.splitFieldIndices.add(Integer.parseInt(splitFieldIndexString));
+ }
+
this.fieldDel = fieldDel;
try {
this.comp = (compression == null) ? Compression.none : Compression
- .valueOf(compression.toLowerCase());
+ .valueOf(compression.toLowerCase());
} catch (IllegalArgumentException e) {
System.err.println("Exception when converting compression string: "
- + compression + " to enum. No compression will be used");
+ + compression + " to enum. No compression will be used");
this.comp = Compression.none;
}
}
@@ -127,22 +153,26 @@ public class MultiStorage extends StoreF
//--------------------------------------------------------------------------
// Implementation of StoreFunc
- private RecordWriter<String, Tuple> writer;
+ private RecordWriter<List<String>, Tuple> writer;
@Override
public void putNext(Tuple tuple) throws IOException {
- if (tuple.size() <= splitFieldIndex) {
- throw new IOException("split field index:" + this.splitFieldIndex
- + " >= tuple size:" + tuple.size());
+ for (int splitFieldIndex : this.splitFieldIndices) {
+ if (tuple.size() <= splitFieldIndex) {
+ throw new IOException("split field index:" + splitFieldIndex
+ + " >= tuple size:" + tuple.size());
+ }
}
- Object field = null;
- try {
- field = tuple.get(splitFieldIndex);
- } catch (ExecException exec) {
- throw new IOException(exec);
+ List<String> fields = new ArrayList<String>();
+ for (int splitFieldIndex : this.splitFieldIndices){
+ try {
+ fields.add(String.valueOf(tuple.get(splitFieldIndex)));
+ } catch (ExecException exec) {
+ throw new IOException(exec);
+ }
}
try {
- writer.write(String.valueOf(field), tuple);
+ writer.write(fields, tuple);
} catch (InterruptedException e) {
throw new IOException(e);
}
@@ -153,6 +183,9 @@ public class MultiStorage extends StoreF
public OutputFormat getOutputFormat() throws IOException {
MultiStorageOutputFormat format = new MultiStorageOutputFormat();
format.setKeyValueSeparator(fieldDel);
+ if (this.isRemoveKeys){
+ format.setSkipIndices(this.splitFieldIndices);
+ }
return format;
}
@@ -174,27 +207,33 @@ public class MultiStorage extends StoreF
FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
}
}
-
+
+ @Override
+ public Boolean supportsParallelWriteToStoreLocation() {
+ return false;
+ }
+
//--------------------------------------------------------------------------
// Implementation of OutputFormat
public static class MultiStorageOutputFormat extends
- TextOutputFormat<String, Tuple> {
+ TextOutputFormat<List<String>, Tuple> {
private String keyValueSeparator = "\\t";
private byte fieldDel = '\t';
-
+ private List<Integer> skipIndices = null;
+
@Override
- public RecordWriter<String, Tuple>
+ public RecordWriter<List<String>, Tuple>
getRecordWriter(TaskAttemptContext context
) throws IOException, InterruptedException {
final TaskAttemptContext ctx = context;
- return new RecordWriter<String, Tuple>() {
+ return new RecordWriter<List<String>, Tuple>() {
- private Map<String, MyLineRecordWriter> storeMap =
- new HashMap<String, MyLineRecordWriter>();
+ private Map<List<String>, MyLineRecordWriter> storeMap =
+ new HashMap<List<String>, MyLineRecordWriter>();
private static final int BUFFER_SIZE = 1024;
@@ -202,7 +241,7 @@ public class MultiStorage extends StoreF
new ByteArrayOutputStream(BUFFER_SIZE);
@Override
- public void write(String key, Tuple val) throws IOException {
+ public void write(List<String> key, Tuple val) throws IOException {
int sz = val.size();
for (int i = 0; i < sz; i++) {
Object field;
@@ -212,9 +251,13 @@ public class MultiStorage extends StoreF
throw ee;
}
- StorageUtil.putField(mOut, field);
+ boolean skipCurrentField = skipIndices != null && skipIndices.contains(i);
- if (i != sz - 1) {
+ if (!skipCurrentField) {
+ StorageUtil.putField(mOut, field);
+ }
+
+ if (i != sz - 1 && !skipCurrentField) {
mOut.write(fieldDel);
}
}
@@ -231,17 +274,17 @@ public class MultiStorage extends StoreF
}
}
- private MyLineRecordWriter getStore(String fieldValue) throws IOException {
- MyLineRecordWriter store = storeMap.get(fieldValue);
+ private MyLineRecordWriter getStore(List<String> fieldValues) throws IOException {
+ MyLineRecordWriter store = storeMap.get(fieldValues);
if (store == null) {
- DataOutputStream os = createOutputStream(fieldValue);
+ DataOutputStream os = createOutputStream(fieldValues);
store = new MyLineRecordWriter(os, keyValueSeparator);
- storeMap.put(fieldValue, store);
+ storeMap.put(fieldValues, store);
}
return store;
}
- private DataOutputStream createOutputStream(String fieldValue) throws IOException {
+ private DataOutputStream createOutputStream(List<String> fieldValues) throws IOException {
Configuration conf = ctx.getConfiguration();
TaskID taskId = ctx.getTaskAttemptID().getTaskID();
@@ -259,7 +302,21 @@ public class MultiStorage extends StoreF
NumberFormat nf = NumberFormat.getInstance();
nf.setMinimumIntegerDigits(4);
- Path path = new Path(fieldValue+extension, fieldValue + '-'
+ StringBuffer pathStringBuffer = new StringBuffer();
+ for (String fieldValue : fieldValues){
+ String safeFieldValue = fieldValue.replaceAll("\\/","-");
+ pathStringBuffer.append(safeFieldValue);
+ pathStringBuffer.append("/");
+ }
+ pathStringBuffer.deleteCharAt(pathStringBuffer.length()-1);
+ String pathString = pathStringBuffer.toString();
+ String idString = pathString.replaceAll("\\/","-");
+
+ if (!Strings.isNullOrEmpty(extension)){
+ pathString = pathString.replaceAll("\\/",extension+"\\/");
+ }
+
+ Path path = new Path(pathString+extension, idString + '-'
+ nf.format(taskId.getId())+extension);
Path workOutputPath = ((FileOutputCommitter)getOutputCommitter(ctx)).getWorkPath();
Path file = new Path(workOutputPath, path);
@@ -279,8 +336,12 @@ public class MultiStorage extends StoreF
keyValueSeparator = sep;
fieldDel = StorageUtil.parseFieldDel(keyValueSeparator);
}
-
- //------------------------------------------------------------------------
+
+ public void setSkipIndices(List<Integer> skipIndices) {
+ this.skipIndices = skipIndices;
+ }
+
+ //------------------------------------------------------------------------
//
protected static class MyLineRecordWriter
Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/evaluation/TestOver.java Wed Feb 22 09:43:41 2017
@@ -18,12 +18,11 @@
package org.apache.pig.piggybank.evaluation;
import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
+import java.math.BigDecimal;
import java.util.Iterator;
-import java.util.List;
import java.util.Random;
import org.apache.pig.backend.executionengine.ExecException;
@@ -34,8 +33,6 @@ import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.logicalLayer.schema.Schema;
-
-import org.junit.Before;
import org.junit.Test;
public class TestOver {
@@ -66,11 +63,25 @@ public class TestOver {
out = func.outputSchema(in);
assertEquals("{org.apache.pig.piggybank.evaluation.over_3: {result: double}}", out.toString());
+ // bigdecimal
+ func = new Over("BIGDECIMAL");
+ in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
+ out = func.outputSchema(in);
+ assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {result: bigdecimal}}", out.toString());
+
// named
func = new Over("bob:chararray");
in = Schema.generateNestedSchema(DataType.BAG, DataType.INTEGER);
out = func.outputSchema(in);
- assertEquals("{org.apache.pig.piggybank.evaluation.over_4: {bob: chararray}}", out.toString());
+ assertEquals("{org.apache.pig.piggybank.evaluation.over_5: {bob: chararray}}", out.toString());
+
+
+ // Search inner alias and type
+ func = new Over("true");
+ in = Schema.generateNestedSchema(DataType.BAG, DataType.BIGDECIMAL);
+ in.getField(0).schema.getField(0).alias="test";
+ out = func.outputSchema(in);
+ assertEquals("{org.apache.pig.piggybank.evaluation.over_6: {test_over: bigdecimal}}", out.toString());
}
@Test
@@ -397,6 +408,28 @@ public class TestOver {
assertEquals(new Long(10), to.get(0));
}
}
+
+ @Test
+ public void testSumBigDecimal() throws Exception {
+ Over func = new Over();
+ DataBag inbag = BagFactory.getInstance().newDefaultBag();
+ for (int i = 0; i < 10; i++) {
+ Tuple t = TupleFactory.getInstance().newTuple(1);
+ t.set(0, new BigDecimal(1));
+ inbag.add(t);
+ }
+ Tuple t = TupleFactory.getInstance().newTuple(4);
+ t.set(0, inbag);
+ t.set(1, "sum(bigdecimal)");
+ t.set(2, -1);
+ t.set(3, -1);
+ DataBag outbag = func.exec(t);
+ assertEquals(10, outbag.size());
+ for (Tuple to : outbag) {
+ assertEquals(1, to.size());
+ assertEquals(new BigDecimal(10), to.get(0));
+ }
+ }
@Test
public void testAvgDouble() throws Exception {
@@ -509,6 +542,29 @@ public class TestOver {
}
@Test
+ public void testAvgBigDecimal() throws Exception {
+ Over func = new Over();
+ DataBag inbag = BagFactory.getInstance().newDefaultBag();
+ for (int i = 0; i < 10; i++) {
+ Tuple t = TupleFactory.getInstance().newTuple(1);
+ t.set(0, new BigDecimal(i));
+ inbag.add(t);
+ }
+ Tuple t = TupleFactory.getInstance().newTuple(4);
+ t.set(0, inbag);
+ t.set(1, "avg(bigdecimal)");
+ t.set(2, -1);
+ t.set(3, -1);
+ DataBag outbag = func.exec(t);
+ assertEquals(10, outbag.size());
+ for (Tuple to : outbag) {
+ assertEquals(1, to.size());
+ assertEquals(new BigDecimal(4.5), to.get(0));
+ }
+ }
+
+
+ @Test
public void testMinDouble() throws Exception {
Over func = new Over();
DataBag inbag = BagFactory.getInstance().newDefaultBag();
@@ -627,6 +683,26 @@ public class TestOver {
assertEquals("0", to.get(0));
}
}
+
+ @Test
+ public void testMinBigDecimal() throws Exception {
+ Over func = new Over();
+ DataBag inbag = BagFactory.getInstance().newDefaultBag();
+ for (int i = 0; i < 10; i++) {
+ Tuple t = TupleFactory.getInstance().newTuple(1);
+ t.set(0, new BigDecimal(i));
+ inbag.add(t);
+ }
+ Tuple t = TupleFactory.getInstance().newTuple(2);
+ t.set(0, inbag);
+ t.set(1, "min(bigdecimal)");
+ DataBag outbag = func.exec(t);
+ assertEquals(10, outbag.size());
+ for (Tuple to : outbag) {
+ assertEquals(1, to.size());
+ assertEquals(new BigDecimal(0), to.get(0));
+ }
+ }
@Test
public void testMaxDouble() throws Exception {
@@ -754,6 +830,28 @@ public class TestOver {
assertEquals("9", to.get(0));
}
}
+
+ @Test
+ public void testMaxBigDecimal() throws Exception {
+ Over func = new Over();
+ DataBag inbag = BagFactory.getInstance().newDefaultBag();
+ for (int i = 0; i < 10; i++) {
+ Tuple t = TupleFactory.getInstance().newTuple(1);
+ t.set(0, new BigDecimal(i));
+ inbag.add(t);
+ }
+ Tuple t = TupleFactory.getInstance().newTuple(2);
+ t.set(0, inbag);
+ t.set(1, "max(bigdecimal)");
+ DataBag outbag = func.exec(t);
+ assertEquals(10, outbag.size());
+ int count = 0;
+ for (Tuple to : outbag) {
+ assertEquals(1, to.size());
+ assertEquals(new BigDecimal(count++), to.get(0));
+ }
+ }
+
@Test
public void testRowNumber() throws Exception {
Added: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java?rev=1783988&view=auto
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java (added)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/TestMaxTupleBy1stField.java Wed Feb 22 09:43:41 2017
@@ -0,0 +1,95 @@
+package org.apache.pig.piggybank.test.evaluation;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import com.google.common.collect.Lists;
+import org.apache.pig.data.BagFactory;
+import org.apache.pig.data.DataBag;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.data.TupleFactory;
+import org.apache.pig.piggybank.evaluation.MaxTupleBy1stField;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class TestMaxTupleBy1stField {
+
+ private static List<Tuple> inputTuples = new ArrayList<>();
+ private static final TupleFactory TUPLE_FACTORY = TupleFactory.getInstance();
+ private static final BagFactory BAG_FACTORY = BagFactory.getInstance();
+
+ @BeforeClass
+ public static void setup() throws Exception {
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(0L, "Fruit", "orange", 21F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(1L, "Fruit", "apple", 9.9F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(2L, "Vegetable", "paprika", 30F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(3L, "Fruit", "blueberry", 40F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(4L, "Vegetable", "carrot", 50F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(5L, "Fruit", "blueberry", 41F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(6L, "Vegetable", "paprika", 31F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(7L, "Fruit", "orange", 20.5F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(8L, "Fruit", "apple", 10.1F)));
+ inputTuples.add(TUPLE_FACTORY.newTuple(Lists.newArrayList(9L, "Fruit", "apple", 10.2F)));
+ }
+
+
+ @Test
+ public void testExecFunc() throws Exception {
+ MaxTupleBy1stField udf = new MaxTupleBy1stField();
+ Tuple inputTuple = createTupleFromInputList(0,inputTuples.size());
+
+ Tuple result = udf.exec(inputTuple);
+ Assert.assertEquals("apple", result.get(2));
+ Assert.assertEquals(10.2F, (Float) result.get(3), 1E-8);
+ }
+
+ @Test
+ public void testAccumulator() throws Exception {
+ MaxTupleBy1stField udf = new MaxTupleBy1stField();
+
+ Tuple inputTuple = createTupleFromInputList(0, 3);
+ udf.accumulate(inputTuple);
+ Tuple result = udf.getValue();
+ Assert.assertEquals("paprika", result.get(2));
+ Assert.assertEquals(30F, (Float) result.get(3), 1E-6);
+
+ inputTuple = createTupleFromInputList(3, 6);
+ udf.accumulate(inputTuple);
+ result = udf.getValue();
+ Assert.assertEquals("apple", result.get(2));
+ Assert.assertEquals(10.1F, (Float) result.get(3), 1E-6);
+
+ udf.cleanup();
+ Assert.assertEquals(null,udf.getValue());
+ }
+
+ private static Tuple createTupleFromInputList(int offset, int length) {
+ DataBag inputBag = BAG_FACTORY.newDefaultBag();
+ for (int i = offset; i < offset+length; ++i) {
+ inputBag.add(inputTuples.get(i));
+ }
+ Tuple inputTuple = TUPLE_FACTORY.newTuple();
+ inputTuple.append(inputBag);
+ return inputTuple;
+ }
+
+}
Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/xml/XPathTest.java Wed Feb 22 09:43:41 2017
@@ -151,6 +151,95 @@ public class XPathTest {
}
@Test
+ public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+ "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+ "<bar:element>MyBar</bar:element>" +
+ "</foo:document>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(false);
+
+ when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+ assertEquals("MyBar", xpath.exec(tuple));
+
+ }
+
+ @Test
+ public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+ "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+ "<bar:element>MyBar</bar:element>" +
+ "</foo:document>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(false);
+
+ when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+ assertEquals("MyBar", xpath.exec(tuple));
+
+ }
+
+ @Test
+ public void testExecTupleWithDontIgnoreNamespace() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn("<?xml version='1.0'?>\n" +
+ "<foo:document xmlns:foo=\"http://apache.org/foo\" xmlns:bar=\"http://apache.org/bar\">" +
+ "<bar:element>MyBar</bar:element>" +
+ "</foo:document>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(false);
+
+ when(tuple.get(1)).thenReturn("/foo:document/bar:element");
+ assertEquals("MyBar", xpath.exec(tuple));
+
+ }
+
+
+ @Test
+ public void testFunctionInXPath() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Dd>test2</Dd>" +
+ "</Aa>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)");
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(true);
+
+ assertEquals("4", xpath.exec(tuple));
+
+ }
+
+
+ @Test
public void testExecTupleWithElementNodeWithComplexNameSpace() throws Exception {
final XPath xpath = new XPath();
@@ -210,7 +299,31 @@ public class XPathTest {
assertEquals("4 stars3.5 stars4 stars4.2 stars3.5 stars", xpath.exec(tuple));
}
-
+
+ @Test
+ public void testFunctionInXPath() throws Exception {
+
+ final XPath xpath = new XPath();
+
+ final Tuple tuple = mock(Tuple.class);
+
+ when(tuple.get(0)).thenReturn("<Aa name=\"test1\">" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Bb Cc=\"1\"/>" +
+ "<Dd>test2</Dd>" +
+ "</Aa>");
+
+ when(tuple.size()).thenReturn(4);
+ when(tuple.get(1)).thenReturn("sum(Aa/Bb/@Cc)");
+ when(tuple.get(2)).thenReturn(true);
+ when(tuple.get(3)).thenReturn(true);
+
+ assertEquals("4", xpath.exec(tuple));
+
+ }
+
@Ignore //--optional test
@Test
public void testCacheBenefit() throws Exception{
Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestCSVExcelStorage.java Wed Feb 22 09:43:41 2017
@@ -218,7 +218,7 @@ public class TestCSVExcelStorage {
Util.registerMultiLineQuery(pig, script);
Iterator<Tuple> it = pig.openIterator("a");
Assert.assertEquals(Util.createTuple(new String[] {"foo,\"bar\",baz"}), it.next());
- Assert.assertEquals(Util.createTuple(new String[] {"\"\"\"\""}), it.next());
+ Assert.assertEquals(Util.createTuple(new String[] {"\"\"\""}), it.next());
}
// Handle newlines in fields
Modified: pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java
URL: http://svn.apache.org/viewvc/pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java?rev=1783988&r1=1783987&r2=1783988&view=diff
==============================================================================
--- pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java (original)
+++ pig/branches/spark/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestLogFormatLoader.java Wed Feb 22 09:43:41 2017
@@ -109,7 +109,7 @@ public class TestLogFormatLoader {
Tuple actual = out.get(0);
Tuple expected = tuple(
"2001:980:91c0:1:8d31:a232:25e5:85d",
- "[05/Sep/2010:11:27:50 +0200]",
+ "05/Sep/2010:11:27:50 +0200",
"koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",
map(
"promo" , "koken-pannen_303_hs-koken-pannen-afj-120601_B3_product_1_9200000002876066",